From d6b9f45fcc2e7e53262d0f032cef1999a76d0c7f Mon Sep 17 00:00:00 2001 From: k Date: Tue, 6 Jan 2026 18:31:04 -0500 Subject: [PATCH] Implimented Feed Forward Netwok --- model.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 model.py diff --git a/model.py b/model.py new file mode 100644 index 0000000..4e82f51 --- /dev/null +++ b/model.py @@ -0,0 +1,41 @@ +from tinygrad import Tensor,nn,TinyJit + +class MultiHeadAttention: + def __init__(self): + pass #TODO + def __call__(self): + pass #TODO + def cast(self): + pass #TODO + + +class FeedForwardNetwork: + def __init__(self,embeding_size,ratio=(8/3)): + hidden_size = int(embeding_size*ratio) + self.norm = nn.RMSNorm(embeding_size) + self.gate = nn.Linear(embeding_size,hidden_size,bias=False) + self.up = nn.Linear(embeding_size, hidden_size,bias=False) + self.down = nn.Linear(hidden_size,embeding_size,bias=False) + def __call__(self,x): + x = self.norm(x) + return self.down(self.gate(x).silu() * self.up(x)) + def cast(self,dtype): + self.gate.weight = gate.weight.cast(dtype) + self.up.weight = up.weight.cast(dtype) + self.down.weight = down.weight.cast(dtype) + +class Block: + def __init__(self): + pass #TODO + def __call__(self): + pass #TODO + def cast(self): + pass #TODO + +class Transformer(): + def __init__(self): + pass #TODO + def __call__(self): + pass #TODO + def cast(self): + pass #TODO