mirror of https://github.com/hpcaitech/ColossalAI
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
12 lines
465 B
12 lines
465 B
jsonl_file = 'seed_prompts_xx.jsonl' # seed_prompts_en.jsonl or seed_prompts_ch.json from InstructionWild
|
|
reformat_file = 'prompts_xx.jsonl' # reformat jsonl file used as Prompt dataset in Stage3
|
|
|
|
data = ''
|
|
with open(jsonl_file, 'r', encoding="utf-8") as f1:
|
|
for jsonstr in f1.readlines():
|
|
jsonstr = '\t' + jsonstr.strip('\n') + ',\n'
|
|
data = data + jsonstr
|
|
data = '[\n' + data + ']'
|
|
|
|
with open(reformat_file, 'w') as f2:
|
|
f2.write(data) |