mirror of https://github.com/hpcaitech/ColossalAI
fix style
parent
9688e19b32
commit
8a3ff4f315
|
@ -40,7 +40,7 @@ if __name__ == "__main__":
|
|||
type=str,
|
||||
required=True,
|
||||
default=None,
|
||||
help="The type of data",
|
||||
help="The type of data, choose one from ['sft', 'prompt', 'preference', 'kto']",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
if args.data_type == "sft":
|
||||
|
|
|
@ -46,8 +46,7 @@ def supervised_tokenize_sft(
|
|||
max_length: the maximum context length
|
||||
"""
|
||||
|
||||
if ignore_index is None:
|
||||
ignore_index = IGNORE_INDEX
|
||||
ignore_index = IGNORE_INDEX
|
||||
|
||||
messages = data_point["messages"]
|
||||
template = deepcopy(conversation_template)
|
||||
|
@ -146,8 +145,6 @@ def tokenize_prompt_dataset(
|
|||
ignore_index: the ignore index when calculate loss during training
|
||||
max_length: the maximum context length
|
||||
"""
|
||||
if ignore_index is None:
|
||||
ignore_index = IGNORE_INDEX
|
||||
|
||||
messages = data_point["messages"]
|
||||
template = deepcopy(conversation_template)
|
||||
|
@ -226,8 +223,6 @@ def tokenize_rlhf(
|
|||
{"context": [{"from": "user", "content": "xxx"}, {"from": "assistant", "content": "xxx"}],
|
||||
"chosen": {"from": "assistant", "content": "xxx"}, "rejected": {"from": "assistant", "content": "xxx"}}
|
||||
"""
|
||||
if ignore_index is None:
|
||||
ignore_index = IGNORE_INDEX
|
||||
|
||||
context = data_point["context"]
|
||||
template = deepcopy(conversation_template)
|
||||
|
|
|
@ -26,7 +26,7 @@ from .utils import is_rank_0, to_device
|
|||
|
||||
class DPOTrainer(SLTrainer):
|
||||
"""
|
||||
Trainer for PPO algorithm.
|
||||
Trainer for DPO algorithm.
|
||||
|
||||
Args:
|
||||
actor (Actor): the actor model in ppo algorithm
|
||||
|
|
|
@ -27,7 +27,7 @@ from .utils import is_rank_0, to_device
|
|||
|
||||
class KTOTrainer(SLTrainer):
|
||||
"""
|
||||
Trainer for PPO algorithm.
|
||||
Trainer for KTO algorithm.
|
||||
|
||||
Args:
|
||||
actor (Actor): the actor model in ppo algorithm
|
||||
|
|
|
@ -26,7 +26,7 @@ from .utils import is_rank_0, to_device
|
|||
|
||||
class ORPOTrainer(SLTrainer):
|
||||
"""
|
||||
Trainer for PPO algorithm.
|
||||
Trainer for ORPO algorithm.
|
||||
|
||||
Args:
|
||||
actor (Actor): the actor model in ppo algorithm
|
||||
|
|
|
@ -39,15 +39,15 @@ preference_seed = {
|
|||
}
|
||||
kto_seed = {
|
||||
"prompt": [
|
||||
{"from": "user", "content": "What are some cuss words in english?"},
|
||||
{"from": "user", "content": "What are some praise words in english?"},
|
||||
{
|
||||
"from": "assistant",
|
||||
"content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, fuck, shit, bitch, turd, shithead, shitbag, scrotum, cunt, whore, fucker, shit-eating, cum, cumbucket, fucknugget, butthole, poop, jackass, cocksucker, asshole, goddamn, piss, sperm, blow, wank, jism, cum-sucking, masturbate, faggot, queer, jizz, jizz-licking, prostitute, slut, cheater, fornicator, floozy, wetback, Mexican, Hispanic, sodomite, midget, mama's boy, faggot, pervert, queer, scumbag, bitch,",
|
||||
"content": "Here's an incomplete list.\n\nexcellent, fantastic, impressive ...",
|
||||
},
|
||||
{"from": "user", "content": "What's your favorite one?"},
|
||||
],
|
||||
"completion": {"from": "assistant", "content": "Ass."},
|
||||
"label": False,
|
||||
"completion": {"from": "assistant", "content": "Impressive."},
|
||||
"label": True,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"prompt": [{"from": "user", "content": "What are some cuss words in english?"}, {"from": "assistant", "content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, ..."},{"from": "user", "content": "What's your favorite one?"}],"completion": {"from": "assistant", "content": "Ass."},"label": false}
|
||||
{"prompt": [{"from": "user", "content": "What are some praise words in english?"}, {"from": "assistant", "content": "Here's an incomplete list.\n\nexcellent, fantastic, impressive ..."},{"from": "user", "content": "What's your favorite one?"}],"completion": {"from": "assistant", "content": "impressive."},"label": true}
|
||||
|
|
Loading…
Reference in New Issue