fix(tools): set add_eos_token=True in tokenizer.py (#555)

pull/582/head
x54-729 2023-12-22 21:57:14 +08:00 committed by GitHub
parent cb922d44e2
commit ac7509389b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 1 deletions

View File

@ -10,7 +10,9 @@ model_path = os.path.join(current_dir, "V7_sft.model")
sys.path.append(os.path.join(current_dir, "transformers")) sys.path.append(os.path.join(current_dir, "transformers"))
from tokenization_internlm import InternLMTokenizer from tokenization_internlm import InternLMTokenizer
tokenizer = InternLMTokenizer(vocab_file=model_path) tokenizer = InternLMTokenizer(
vocab_file=model_path, add_bos_token=True, add_eos_token=True
)
def write_bin(context: str, bin_file) -> None: def write_bin(context: str, bin_file) -> None: