Update default hyperparameters

Remove hardcode token id
pull/427/head
duzx16 2023-04-06 22:42:20 +08:00
parent a1ecafd91f
commit ea682a6f51
3 changed files with 7 additions and 7 deletions

View File

@ -1,5 +1,5 @@
PRE_SEQ_LEN=8
CHECKPOINT=adgen-chatglm-6b-pt-8-1e-2
PRE_SEQ_LEN=128
CHECKPOINT=adgen-chatglm-6b-pt-128-2e-2
STEP=3000
CUDA_VISIBLE_DEVICES=0 python3 main.py \

View File

@ -198,9 +198,9 @@ def main():
if len(b_ids) > data_args.max_target_length - 2:
b_ids = b_ids[: data_args.max_target_length - 2]
input_ids = a_ids + [150001, 150004] + b_ids + [150005]
input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)
context_length = input_ids.index(150004)
context_length = input_ids.index(tokenizer.bos_token_id)
mask_position = context_length - 1
labels = [-100] * context_length + input_ids[mask_position+1:]

View File

@ -1,5 +1,5 @@
PRE_SEQ_LEN=8
LR=1e-2
PRE_SEQ_LEN=128
LR=2e-2
CUDA_VISIBLE_DEVICES=0 python3 main.py \
--do_train \