More training with less heads
This commit is contained in:
parent
dc231ae703
commit
89c9d01cb8
1 changed files with 5 additions and 5 deletions
10
train.py
10
train.py
|
|
@ -12,20 +12,20 @@ import sys
|
|||
|
||||
hypr = {
|
||||
"embed_size": 768,
|
||||
"n_heads": 12,
|
||||
"n_heads": 8,
|
||||
"n_blocks": 12,
|
||||
"block_size": 512,
|
||||
"batch_size": 8,
|
||||
"starting_lr": 6e-4,
|
||||
"minimum_lr": 6e-5,
|
||||
"warmup": 1_000,
|
||||
"steps": 20_000,
|
||||
"encoding": "gpt2",
|
||||
"warmup": 5_000,
|
||||
"steps": 535_000,
|
||||
"encoding": "TinyLlama/TinyLlama_v1.1",
|
||||
"dataset": "HuggingFaceTB/smollm-corpus",
|
||||
"subset": "cosmopedia-v2",
|
||||
"chat_dataset": "HuggingFaceTB/smoltalk",
|
||||
"chat_subset": "all",
|
||||
"half": False,
|
||||
"half": True,
|
||||
}
|
||||
|
||||
print(Device.DEFAULT)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue