changed chat dataset.
This commit is contained in:
9
data.py
9
data.py
@@ -20,10 +20,11 @@ def dataWorker(q, dataset, encoding, batch_size, block_size,chat):
|
||||
for text in dataset:
|
||||
tokens = None
|
||||
if(chat):
|
||||
txt = f"<|user|>{text['instruction']}"
|
||||
if(text["input"] != None):
|
||||
txt += f"\n{text['input']}"
|
||||
txt = txt + f"<|end|>\n<|assistant|>{text['output']}<|end|>"
|
||||
txt=""
|
||||
for msg in text['messages']:
|
||||
role = msg['role']
|
||||
content = msg['content']
|
||||
txt = txt + f"<|{role}|>{content}<|end|>"
|
||||
tokens = [encoding.bos_token_id]+encoding.encode(txt)
|
||||
else:
|
||||
tokens = [encoding.bos_token_id]+encoding.encode(text["text"])
|
||||
|
||||
Reference in New Issue
Block a user