From 43ad0d9ef031c3c102ae5eee2378ee7fc3910090 Mon Sep 17 00:00:00 2001 From: Orion-Zheng Date: Tue, 14 Nov 2023 09:58:00 +0800 Subject: [PATCH] fix wrong EOS token in ColossalChat --- applications/Chat/examples/community/peft/train_peft_prompts.py | 2 +- applications/Chat/examples/community/peft/train_peft_sft.py | 2 +- applications/Chat/examples/inference.py | 2 +- applications/Chat/examples/train_prompts.py | 2 +- applications/Chat/examples/train_reward_model.py | 2 +- applications/Chat/examples/train_sft.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/applications/Chat/examples/community/peft/train_peft_prompts.py b/applications/Chat/examples/community/peft/train_peft_prompts.py index 99a024f14..1dd9ffcdf 100644 --- a/applications/Chat/examples/community/peft/train_peft_prompts.py +++ b/applications/Chat/examples/community/peft/train_peft_prompts.py @@ -118,7 +118,7 @@ def main(args): tokenizer.pad_token = tokenizer.eos_token elif args.model == "llama": tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) - tokenizer.eos_token = "<\s>" + tokenizer.eos_token = "" tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') diff --git a/applications/Chat/examples/community/peft/train_peft_sft.py b/applications/Chat/examples/community/peft/train_peft_sft.py index 3bbef7208..6d395dead 100644 --- a/applications/Chat/examples/community/peft/train_peft_sft.py +++ b/applications/Chat/examples/community/peft/train_peft_sft.py @@ -68,7 +68,7 @@ def train(args): padding_side="right", use_fast=False, ) - tokenizer.eos_token = "<\s>" + tokenizer.eos_token = "" tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') diff --git a/applications/Chat/examples/inference.py b/applications/Chat/examples/inference.py index 62e06bf7b..9df8649d9 100644 --- a/applications/Chat/examples/inference.py +++ b/applications/Chat/examples/inference.py @@ -39,7 +39,7 @@ def eval(args): tokenizer.pad_token = tokenizer.eos_token elif args.model == "llama": tokenizer = LlamaTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") - tokenizer.eos_token = "<\s>" + tokenizer.eos_token = "" tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py index 8868e278d..40e06043a 100644 --- a/applications/Chat/examples/train_prompts.py +++ b/applications/Chat/examples/train_prompts.py @@ -125,7 +125,7 @@ def main(args): tokenizer = LlamaTokenizer.from_pretrained( "hf-internal-testing/llama-tokenizer" if args.tokenizer is None else args.tokenizer ) - tokenizer.eos_token = "<\s>" + tokenizer.eos_token = "" tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py index df6e8b6bd..fcdd29b29 100644 --- a/applications/Chat/examples/train_reward_model.py +++ b/applications/Chat/examples/train_reward_model.py @@ -72,7 +72,7 @@ def train(args): tokenizer = LlamaTokenizer.from_pretrained( "hf-internal-testing/llama-tokenizer" if args.tokenizer is None else args.tokenizer ) - tokenizer.eos_token = "<\s>" + tokenizer.eos_token = "" tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py index 66d08da30..d00c04809 100644 --- a/applications/Chat/examples/train_sft.py +++ b/applications/Chat/examples/train_sft.py @@ -75,7 +75,7 @@ def train(args): tokenizer = LlamaTokenizer.from_pretrained( "hf-internal-testing/llama-tokenizer" if args.tokenizer is None else args.tokenizer ) - tokenizer.eos_token = "<\s>" + tokenizer.eos_token = "" tokenizer.pad_token = tokenizer.unk_token elif args.model == "chatglm": tokenizer = ChatGLMTokenizer.from_pretrained(