2023-07-25 11:51:50 +00:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
rm -rf $GITHUB_WORKSPACE/llm_ckpts/20
|
2023-08-01 09:49:01 +00:00
|
|
|
srun -p llm -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher "torch"
|
2023-07-25 11:51:50 +00:00
|
|
|
|
|
|
|
file_dir="$GITHUB_WORKSPACE/llm_ckpts/20/*.pt"
|
|
|
|
source ./ci_scripts/common/basic_func.sh
|
|
|
|
|
|
|
|
num_files ${file_dir}
|
|
|
|
|
|
|
|
if [ $file_num -ne 21 ]
|
|
|
|
then
|
|
|
|
echo "The num of files is not right"
|
|
|
|
ls -l $file_dir
|
|
|
|
rm -rf $GITHUB_WORKSPACE/llm_ckpts
|
|
|
|
exit 1
|
|
|
|
fi
|