Files
llm/model.py
2026-01-06 18:31:04 -05:00

42 lines
1.1 KiB
Python

from tinygrad import Tensor,nn,TinyJit
class MultiHeadAttention:
def __init__(self):
pass #TODO
def __call__(self):
pass #TODO
def cast(self):
pass #TODO
class FeedForwardNetwork:
def __init__(self,embeding_size,ratio=(8/3)):
hidden_size = int(embeding_size*ratio)
self.norm = nn.RMSNorm(embeding_size)
self.gate = nn.Linear(embeding_size,hidden_size,bias=False)
self.up = nn.Linear(embeding_size, hidden_size,bias=False)
self.down = nn.Linear(hidden_size,embeding_size,bias=False)
def __call__(self,x):
x = self.norm(x)
return self.down(self.gate(x).silu() * self.up(x))
def cast(self,dtype):
self.gate.weight = gate.weight.cast(dtype)
self.up.weight = up.weight.cast(dtype)
self.down.weight = down.weight.cast(dtype)
class Block:
def __init__(self):
pass #TODO
def __call__(self):
pass #TODO
def cast(self):
pass #TODO
class Transformer():
def __init__(self):
pass #TODO
def __call__(self):
pass #TODO
def cast(self):
pass #TODO