diff --git a/applications/ColossalChat/benchmarks/prepare_dummy_test_dataset.py b/applications/ColossalChat/benchmarks/prepare_dummy_test_dataset.py index 70a422208..f501c5358 100644 --- a/applications/ColossalChat/benchmarks/prepare_dummy_test_dataset.py +++ b/applications/ColossalChat/benchmarks/prepare_dummy_test_dataset.py @@ -40,7 +40,7 @@ if __name__ == "__main__": type=str, required=True, default=None, - help="The type of data", + help="The type of data, choose one from ['sft', 'prompt', 'preference', 'kto']", ) args = parser.parse_args() if args.data_type == "sft": diff --git a/applications/ColossalChat/coati/dataset/tokenization_utils.py b/applications/ColossalChat/coati/dataset/tokenization_utils.py index cf69b6fe7..d573853d6 100755 --- a/applications/ColossalChat/coati/dataset/tokenization_utils.py +++ b/applications/ColossalChat/coati/dataset/tokenization_utils.py @@ -46,8 +46,7 @@ def supervised_tokenize_sft( max_length: the maximum context length """ - if ignore_index is None: - ignore_index = IGNORE_INDEX + ignore_index = IGNORE_INDEX messages = data_point["messages"] template = deepcopy(conversation_template) @@ -146,8 +145,6 @@ def tokenize_prompt_dataset( ignore_index: the ignore index when calculate loss during training max_length: the maximum context length """ - if ignore_index is None: - ignore_index = IGNORE_INDEX messages = data_point["messages"] template = deepcopy(conversation_template) @@ -226,8 +223,6 @@ def tokenize_rlhf( {"context": [{"from": "user", "content": "xxx"}, {"from": "assistant", "content": "xxx"}], "chosen": {"from": "assistant", "content": "xxx"}, "rejected": {"from": "assistant", "content": "xxx"}} """ - if ignore_index is None: - ignore_index = IGNORE_INDEX context = data_point["context"] template = deepcopy(conversation_template) diff --git a/applications/ColossalChat/coati/trainer/dpo.py b/applications/ColossalChat/coati/trainer/dpo.py index 3daab54f6..c7ef2be8f 100755 --- a/applications/ColossalChat/coati/trainer/dpo.py +++ b/applications/ColossalChat/coati/trainer/dpo.py @@ -26,7 +26,7 @@ from .utils import is_rank_0, to_device class DPOTrainer(SLTrainer): """ - Trainer for PPO algorithm. + Trainer for DPO algorithm. Args: actor (Actor): the actor model in ppo algorithm diff --git a/applications/ColossalChat/coati/trainer/kto.py b/applications/ColossalChat/coati/trainer/kto.py index 83bff3bbe..8ab0bc66b 100755 --- a/applications/ColossalChat/coati/trainer/kto.py +++ b/applications/ColossalChat/coati/trainer/kto.py @@ -27,7 +27,7 @@ from .utils import is_rank_0, to_device class KTOTrainer(SLTrainer): """ - Trainer for PPO algorithm. + Trainer for KTO algorithm. Args: actor (Actor): the actor model in ppo algorithm diff --git a/applications/ColossalChat/coati/trainer/orpo.py b/applications/ColossalChat/coati/trainer/orpo.py index 495bb332b..b039da4af 100644 --- a/applications/ColossalChat/coati/trainer/orpo.py +++ b/applications/ColossalChat/coati/trainer/orpo.py @@ -26,7 +26,7 @@ from .utils import is_rank_0, to_device class ORPOTrainer(SLTrainer): """ - Trainer for PPO algorithm. + Trainer for ORPO algorithm. Args: actor (Actor): the actor model in ppo algorithm diff --git a/applications/ColossalChat/tests/generate_dummy_datasets_for_testing.py b/applications/ColossalChat/tests/generate_dummy_datasets_for_testing.py index 6cce6563c..e50b20b6b 100644 --- a/applications/ColossalChat/tests/generate_dummy_datasets_for_testing.py +++ b/applications/ColossalChat/tests/generate_dummy_datasets_for_testing.py @@ -39,15 +39,15 @@ preference_seed = { } kto_seed = { "prompt": [ - {"from": "user", "content": "What are some cuss words in english?"}, + {"from": "user", "content": "What are some praise words in english?"}, { "from": "assistant", - "content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, fuck, shit, bitch, turd, shithead, shitbag, scrotum, cunt, whore, fucker, shit-eating, cum, cumbucket, fucknugget, butthole, poop, jackass, cocksucker, asshole, goddamn, piss, sperm, blow, wank, jism, cum-sucking, masturbate, faggot, queer, jizz, jizz-licking, prostitute, slut, cheater, fornicator, floozy, wetback, Mexican, Hispanic, sodomite, midget, mama's boy, faggot, pervert, queer, scumbag, bitch,", + "content": "Here's an incomplete list.\n\nexcellent, fantastic, impressive ...", }, {"from": "user", "content": "What's your favorite one?"}, ], - "completion": {"from": "assistant", "content": "Ass."}, - "label": False, + "completion": {"from": "assistant", "content": "Impressive."}, + "label": True, } diff --git a/applications/ColossalChat/tests/test_data/kto/test_kto_data.jsonl b/applications/ColossalChat/tests/test_data/kto/test_kto_data.jsonl index f0e78b1c6..4f4fce83d 100644 --- a/applications/ColossalChat/tests/test_data/kto/test_kto_data.jsonl +++ b/applications/ColossalChat/tests/test_data/kto/test_kto_data.jsonl @@ -1 +1 @@ -{"prompt": [{"from": "user", "content": "What are some cuss words in english?"}, {"from": "assistant", "content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, ..."},{"from": "user", "content": "What's your favorite one?"}],"completion": {"from": "assistant", "content": "Ass."},"label": false} +{"prompt": [{"from": "user", "content": "What are some praise words in english?"}, {"from": "assistant", "content": "Here's an incomplete list.\n\nexcellent, fantastic, impressive ..."},{"from": "user", "content": "What's your favorite one?"}],"completion": {"from": "assistant", "content": "impressive."},"label": true}