Update main.py

update padding to left for padding_side='left' in Tokenizer setting. just making code to be consistent with tokenizer setting.
pull/328/head
dumpmemory 2023-04-02 00:00:01 +08:00 committed by GitHub
parent 4371f7a572
commit 5507f607be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 2 deletions

View File

@ -185,8 +185,8 @@ def main():
labels = [-100] * context_length + input_ids[mask_position+1:] labels = [-100] * context_length + input_ids[mask_position+1:]
pad_len = max_seq_length - len(input_ids) pad_len = max_seq_length - len(input_ids)
input_ids = input_ids + [tokenizer.pad_token_id] * pad_len input_ids = [tokenizer.pad_token_id] * pad_len + input_ids
labels = labels + [tokenizer.pad_token_id] * pad_len labels = [tokenizer.pad_token_id] * pad_len + labels
model_inputs["input_ids"].append(input_ids) model_inputs["input_ids"].append(input_ids)
model_inputs["labels"].append(labels) model_inputs["labels"].append(labels)