mirror of https://github.com/hpcaitech/ColossalAI
[FIX BUG] UnboundLocalError: cannot access local variable 'default_conversation' where it is not associated with a value (#5931)
* cannot access local variable 'default_conversation' where it is not associated with a value set default value for 'default_conversation' * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>pull/5934/head
parent
f585d4e38e
commit
4ec17a7cdf
|
@ -10,7 +10,7 @@ import math
|
|||
import os
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
from colossal_llama.dataset.conversation import LLaMA2_Conv
|
||||
from colossal_llama.dataset.conversation import LLaMA2_Conv, LLaMA3_Conv
|
||||
from colossal_llama.dataset.spliced_and_tokenized_dataset import supervised_tokenize_sft
|
||||
from datasets import dataset_dict, load_dataset
|
||||
from transformers import AddedToken, AutoTokenizer
|
||||
|
@ -75,6 +75,8 @@ def main():
|
|||
# Prepare to the tokenizer.
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_dir)
|
||||
|
||||
default_conversation = LLaMA3_Conv
|
||||
|
||||
# Fix </s> split issue: https://github.com/huggingface/transformers/issues/23833
|
||||
if args.llama_version == 2:
|
||||
tokenizer.add_tokens(AddedToken("</s>", normalized=False, special=True), special_tokens=True)
|
||||
|
|
Loading…
Reference in New Issue