added fine-tuning

This commit is contained in:
k
2026-01-07 13:01:06 -05:00
parent 121640bab6
commit 496916f428
2 changed files with 33 additions and 12 deletions

View File

@@ -1,13 +1,14 @@
from tinygrad.nn.state import get_state_dict,safe_load, load_state_dict
from concurrent.futures import ThreadPoolExecutor
from tinygrad import Tensor,TinyJit,Device,nn
from tinygrad.nn.state import get_state_dict
from model import Transformer
from transformers import AutoTokenizer
from datasets import load_dataset
from model import Transformer
from tqdm import tqdm
import optm
import data
import log
import sys
hypr = {
"embed_size": 512,
@@ -22,9 +23,15 @@ hypr = {
"encoding": "gpt2",
"dataset": "HuggingFaceTB/smollm-corpus",
"subset": "cosmopedia-v2",
"chat_dataset": "yahma/alpaca-cleaned",
"chat_subset": None,
}
print(Device.DEFAULT)
chat = len(sys.argv) > 1
if(chat):
hypr["dataset"] = hypr["chat_dataset"]
hypr["subset"] = hypr["chat_subset"]
#for loging
loger = ThreadPoolExecutor(max_workers=2)
@@ -34,10 +41,14 @@ dataset = load_dataset(hypr["dataset"],
split="train",
streaming=True)
encoding = AutoTokenizer.from_pretrained(hypr["encoding"])
if encoding.pad_token_id == None:
encoding.pad_token_id=encoding.eos_token_id
hypr["vocab_size"] = encoding.vocab_size
model = Transformer(hypr["vocab_size"],hypr["embed_size"],hypr["n_heads"],hypr["n_blocks"],hypr["block_size"])
batch = data.startDataWorker(dataset,encoding,hypr["batch_size"],hypr["block_size"])
batch = data.startDataWorker(dataset,encoding,hypr["batch_size"],hypr["block_size"],chat)
model = Transformer(hypr["vocab_size"],hypr["embed_size"],hypr["n_heads"],hypr["n_blocks"],hypr["block_size"])
if (chat):
load_state_dict(model,safe_load(sys.argv[1]))
params = nn.state.get_parameters(model)
optimizer = optm.llmOptimizer(params,hypr["steps"],hypr["starting_lr"],hypr["minimum_lr"])
@@ -74,4 +85,6 @@ for steps in bar:
#TODO non sycronus safetensor loging
#loger.submit(log.logModel,steps,m)
m = get_state_dict(model)
log.logModel("final",m)
loger.shutdown(wait=True)