add bos&eos in tools/tokenizer

pull/555/head
x54-729 2023-12-22 21:08:26 +08:00
parent cb922d44e2
commit 8c40539f6f
1 changed files with 3 additions and 1 deletions

View File

@ -10,7 +10,9 @@ model_path = os.path.join(current_dir, "V7_sft.model")
sys.path.append(os.path.join(current_dir, "transformers"))
from tokenization_internlm import InternLMTokenizer
tokenizer = InternLMTokenizer(vocab_file=model_path)
tokenizer = InternLMTokenizer(
vocab_file=model_path, add_bos_token=True, add_eos_token=True
)
def write_bin(context: str, bin_file) -> None: