More training with less heads
This commit is contained in:
10
train.py
10
train.py
@@ -12,20 +12,20 @@ import sys
|
|||||||
|
|
||||||
hypr = {
|
hypr = {
|
||||||
"embed_size": 768,
|
"embed_size": 768,
|
||||||
"n_heads": 12,
|
"n_heads": 8,
|
||||||
"n_blocks": 12,
|
"n_blocks": 12,
|
||||||
"block_size": 512,
|
"block_size": 512,
|
||||||
"batch_size": 8,
|
"batch_size": 8,
|
||||||
"starting_lr": 6e-4,
|
"starting_lr": 6e-4,
|
||||||
"minimum_lr": 6e-5,
|
"minimum_lr": 6e-5,
|
||||||
"warmup": 1_000,
|
"warmup": 5_000,
|
||||||
"steps": 20_000,
|
"steps": 535_000,
|
||||||
"encoding": "gpt2",
|
"encoding": "TinyLlama/TinyLlama_v1.1",
|
||||||
"dataset": "HuggingFaceTB/smollm-corpus",
|
"dataset": "HuggingFaceTB/smollm-corpus",
|
||||||
"subset": "cosmopedia-v2",
|
"subset": "cosmopedia-v2",
|
||||||
"chat_dataset": "HuggingFaceTB/smoltalk",
|
"chat_dataset": "HuggingFaceTB/smoltalk",
|
||||||
"chat_subset": "all",
|
"chat_subset": "all",
|
||||||
"half": False,
|
"half": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
print(Device.DEFAULT)
|
print(Device.DEFAULT)
|
||||||
|
|||||||
Reference in New Issue
Block a user