remove debug code

pull/5850/head
YeAnbang 2024-06-24 05:16:29 +00:00
parent 0b2d6275c4
commit f3de5a025c
2 changed files with 10 additions and 27 deletions

View File

@ -207,23 +207,6 @@ class DataCollatorForPreferenceDataset(object):
chuncate_sequence([ins["rejected_loss_mask"] for ins in instances], self.max_length, torch.bool),
)
for i in range(len(chosen_loss_mask)):
if sum(chosen_loss_mask[i][1:]) == 0:
print(
"After truncated",
chosen_loss_mask[i],
len(chosen_loss_mask[i]),
len(instances[i]["chosen_input_ids"]),
)
for i in range(len(reject_loss_mask)):
if sum(reject_loss_mask[i][1:]) == 0:
print(
"After truncated",
reject_loss_mask[i],
len(reject_loss_mask[i]),
len(instances[i]["rejected_input_ids"]),
)
padding_side = self.tokenizer.padding_side
chosen_attention_mask = [torch.ones_like(seq).bool() for seq in chosen_input_ids]
reject_attention_mask = [torch.ones_like(seq).bool() for seq in reject_input_ids]

View File

@ -23,16 +23,16 @@ PARENT_CONFIG_FILE="" # Path to a folder to save training config logs
PRETRAINED_MODEL_PATH="" # huggingface or local model path
PRETRAINED_TOKENIZER_PATH="" # huggingface or local tokenizer path
declare -a dataset=(
/Your/Preference/Data/arrow/part-00000
/Your/Preference/Data/arrow/part-00001
/Your/Preference/Data/arrow/part-00002
/Your/Preference/Data/arrow/part-00003
/Your/Preference/Data/arrow/part-00004
/Your/Preference/Data/arrow/part-00005
/Your/Preference/Data/arrow/part-00006
/Your/Preference/Data/arrow/part-00007
/Your/Preference/Data/arrow/part-00008
/Your/Preference/Data/arrow/part-00009
/Your/SFT/Data/arrow/part-00000
/Your/SFT/Data/arrow/part-00001
/Your/SFT/Data/arrow/part-00002
/Your/SFT/Data/arrow/part-00003
/Your/SFT/Data/arrow/part-00004
/Your/SFT/Data/arrow/part-00005
/Your/SFT/Data/arrow/part-00006
/Your/SFT/Data/arrow/part-00007
/Your/SFT/Data/arrow/part-00008
/Your/SFT/Data/arrow/part-00009
)
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)