mirror of https://github.com/InternLM/InternLM
fix(tools): set add_eos_token=True in tokenizer.py (#555)
parent
cb922d44e2
commit
ac7509389b
|
@ -10,7 +10,9 @@ model_path = os.path.join(current_dir, "V7_sft.model")
|
||||||
sys.path.append(os.path.join(current_dir, "transformers"))
|
sys.path.append(os.path.join(current_dir, "transformers"))
|
||||||
from tokenization_internlm import InternLMTokenizer
|
from tokenization_internlm import InternLMTokenizer
|
||||||
|
|
||||||
tokenizer = InternLMTokenizer(vocab_file=model_path)
|
tokenizer = InternLMTokenizer(
|
||||||
|
vocab_file=model_path, add_bos_token=True, add_eos_token=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def write_bin(context: str, bin_file) -> None:
|
def write_bin(context: str, bin_file) -> None:
|
||||||
|
|
Loading…
Reference in New Issue