fix(tools): set add_eos_token=True in tokenizer.py (#555)

2023-12-22 21:57:14 +08:00 · 2023-12-22 21:57:14 +08:00 · ac7509389b
parent cb922d44e2
commit ac7509389b
1 changed files with 3 additions and 1 deletions
--- a/tools/tokenizer.py
+++ b/tools/tokenizer.py
@ -10,7 +10,9 @@ model_path = os.path.join(current_dir, "V7_sft.model")
 sys.path.append(os.path.join(current_dir, "transformers"))
 from tokenization_internlm import InternLMTokenizer
-tokenizer = InternLMTokenizer(vocab_file=model_path)
+tokenizer = InternLMTokenizer(
    vocab_file=model_path, add_bos_token=True, add_eos_token=True
 )
 def write_bin(context: str, bin_file) -> None: