mirror of https://github.com/InternLM/InternLM
merge main
commit
30b21075e8
|
@ -9,11 +9,11 @@ on:
|
|||
- "**.md"
|
||||
env:
|
||||
WORKSPACE_PREFIX: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-4)
|
||||
SLURM_PARTITION: llm
|
||||
SLURM_PARTITION: llm_s
|
||||
|
||||
jobs:
|
||||
check-requirements:
|
||||
runs-on: [lmtest]
|
||||
runs-on: [t_cluster]
|
||||
steps:
|
||||
- name: mask env
|
||||
run: |
|
||||
|
@ -37,7 +37,7 @@ jobs:
|
|||
dataset-preparation:
|
||||
if: ${{ always() }}
|
||||
needs: check-requirements
|
||||
runs-on: [lmtest]
|
||||
runs-on: [t_cluster]
|
||||
steps:
|
||||
- name: mask env
|
||||
run: |
|
||||
|
@ -57,7 +57,7 @@ jobs:
|
|||
train:
|
||||
if: ${{ always() }}
|
||||
needs: check-requirements
|
||||
runs-on: [lmtest]
|
||||
runs-on: [t_cluster]
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: mask env
|
||||
|
@ -83,18 +83,19 @@ jobs:
|
|||
source activate internlm-env-test
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||
rm -rf $GITHUB_WORKSPACE/llm_ckpts
|
||||
rsync -av --remove-source-files $GITHUB_WORKSPACE/llm_ckpts ${{env.WORKSPACE_PREFIX}}/ci_clean_bak
|
||||
|
||||
- name: torchrun-train
|
||||
run: |
|
||||
source activate internlm-env-test
|
||||
sh ./ci_scripts/train/torchrun.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||
rm -rf $GITHUB_WORKSPACE/llm_ckpts
|
||||
rsync -av --remove-source-files $GITHUB_WORKSPACE/llm_ckpts ${{env.WORKSPACE_PREFIX}}/ci_clean_bak
|
||||
|
||||
convert-model-then-load:
|
||||
if: ${{ always() }}
|
||||
needs: check-requirements
|
||||
runs-on: [lmtest]
|
||||
runs-on: [t_cluster]
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: mask env
|
||||
run: |
|
||||
|
@ -107,13 +108,14 @@ jobs:
|
|||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
sh ./ci_scripts/model/convert_to_hf.sh
|
||||
cd ./hf_ckpt
|
||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
|
||||
srun -p ${SLURM_PARTITION} --quotatype=spot --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
|
||||
cd ..
|
||||
rm -rf $GITHUB_WORKSPACE/hf_ckpt
|
||||
rsync -av --remove-source-files $GITHUB_WORKSPACE/hf_ckpt ${{env.WORKSPACE_PREFIX}}/ci_clean_bak
|
||||
load-chat-model-in-hf:
|
||||
if: ${{ always() }}
|
||||
needs: check-requirements
|
||||
runs-on: [lmtest]
|
||||
runs-on: [t_cluster]
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: mask env
|
||||
run: |
|
||||
|
@ -123,4 +125,4 @@ jobs:
|
|||
- name: chat-model-in-hf
|
||||
run: |
|
||||
source activate internlm-env-test
|
||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ./ci_scripts/model/demo_load_7B_chat_model.py
|
||||
srun -p ${SLURM_PARTITION} --quotatype=spot --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ./ci_scripts/model/demo_load_7B_chat_model.py
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
readonly DATA_VOLUME=$(echo $GITHUB_WORKSPACE | cut -d '/' -f 1-4)/data
|
||||
readonly CLEAN_PATH=$(echo $GITHUB_WORKSPACE | cut -d '/' -f 1-4)/ci_clean_bak
|
||||
|
|
|
@ -3,6 +3,7 @@ set -x
|
|||
|
||||
source ./ci_scripts/common/variables.sh
|
||||
[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
|
||||
[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
|
||||
|
||||
readonly SRC_DATASET_META=${DATA_VOLUME}/lm_data/alpaca_data/alpaca_data.json
|
||||
readonly RESULTS=${DATA_VOLUME}/lm_data/alpaca_data/result
|
||||
|
@ -19,7 +20,7 @@ source ./ci_scripts/common/basic_func.sh
|
|||
echo "start to test alpaca_tokenizer.py."
|
||||
|
||||
if [[ -d ${RESULTS} ]]; then
|
||||
if ! rm -rf ${RESULTS}/*; then
|
||||
if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
|
||||
echo "cleaning test data in ${RESULTS} failed, exit."
|
||||
exit 1
|
||||
fi
|
||||
|
@ -41,8 +42,8 @@ for file in ${file_list[@]}; do
|
|||
fi
|
||||
done
|
||||
|
||||
# clean the test files.
|
||||
if ! rm -rf ${RESULTS}/*; then
|
||||
# move the test files.
|
||||
if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
|
||||
echo "cleaning test data in ${RESULTS} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
set -x
|
||||
|
||||
source ./ci_scripts/common/variables.sh
|
||||
[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci."; exit 1; }
|
||||
[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
|
||||
[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
|
||||
|
||||
readonly DATA=${DATA_VOLUME}/lm_data/cn_data/raw_data.txt
|
||||
readonly RESULT=${DATA_VOLUME}/lm_data/cn_data/result.bin
|
||||
|
@ -16,13 +17,13 @@ echo "start to test tokenizer.py."
|
|||
|
||||
num=$(num_files "${RESULTS}")
|
||||
if [[ ${num} -gt 0 ]]; then
|
||||
if ! rm -rf ${RESULTS}; then
|
||||
if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
|
||||
echo "cleaning test data ${RESULTS} failed, exit."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
srun -p ${SLURM_PARTITION} --job-name=$1 --gpus-per-task=1 python tools/tokenizer.py --text_input_path ${DATA} --bin_output_path ${RESULT}
|
||||
srun -p ${SLURM_PARTITION} --quotatype=spot --job-name=$1 --gpus-per-task=1 python tools/tokenizer.py --text_input_path ${DATA} --bin_output_path ${RESULT}
|
||||
[[ $? -ne 0 ]] && { echo "test tokenizer.py failed."; exit_code=$(($exit_code + 1)); }
|
||||
|
||||
file_list=($RESULT $RESULT_META)
|
||||
|
@ -33,8 +34,8 @@ for file in ${file_list[@]}; do
|
|||
fi
|
||||
done
|
||||
|
||||
# clean the test files.
|
||||
if ! rm -rf ${RESULTS}/*; then
|
||||
# move the test files.
|
||||
if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
|
||||
echo "cleaning cached file in ${RESULTS} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
|
|
@ -4,6 +4,7 @@ set -x
|
|||
source ./ci_scripts/common/variables.sh
|
||||
[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
|
||||
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
|
||||
[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
|
||||
|
||||
readonly CKPTS_INPUT="${DATA_VOLUME}/lm_data/alpaca_data/llm_ckpts/20"
|
||||
readonly CKPTS_OUTPUT="${GITHUB_WORKSPACE}/hf_ckpt"
|
||||
|
@ -18,7 +19,7 @@ source ./ci_scripts/common/basic_func.sh
|
|||
echo "start to test convert2hf.py."
|
||||
|
||||
if [[ -d ${CKPTS_OUTPUT} ]]; then
|
||||
if ! rm -rf ${CKPTS_OUTPUT}/*; then
|
||||
if ! rsync -av --remove-source-files ${CKPTS_OUTPUT}/* ${CLEAN_PATH}; then
|
||||
echo "cleaning cached file in ${CKPTS_OUTPUT} failed, exit."
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
source ./ci_scripts/common/variables.sh
|
||||
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
|
||||
[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
|
||||
|
||||
readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
|
||||
readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
|
||||
readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
|
||||
|
@ -19,7 +22,7 @@ if [[ ! -f ${file} ]]; then
|
|||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
|
||||
srun -p ${SLURM_PARTITION} --exclusive --quotatype=spot --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
|
||||
[[ $? -ne 0 ]] && { echo "test slurm training failed."; exit_code=$(($exit_code + 1)); }
|
||||
|
||||
|
||||
|
@ -29,10 +32,12 @@ if [[ ${num} -ne ${expected_num} ]]; then
|
|||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
# clean the test files.
|
||||
if ! rm -rf ${CKPTS_PATH}/*; then
|
||||
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
# move the test files.
|
||||
if [[ -d ${CKPTS_PATH} ]]; then
|
||||
if ! rsync -av --remove-source-files ${CKPTS_PATH} ${CLEAN_PATH}; then
|
||||
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
fi
|
||||
|
||||
exit $exit_code
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
source ./ci_scripts/common/variables.sh
|
||||
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
|
||||
[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
|
||||
|
||||
readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
|
||||
readonly CKPTS20_PATH="$GITHUB_WORKSPACE/llm_ckpts/20"
|
||||
readonly CKPTS20_OUTPUT="${CKPTS20_PATH}/*.pt"
|
||||
|
@ -13,13 +16,13 @@ source ./ci_scripts/common/basic_func.sh
|
|||
echo "start to test slurm training."
|
||||
|
||||
if [[ -d ${CKPTS20_PATH} ]]; then
|
||||
if ! rm -rf ${CKPTS20_PATH}/*; then
|
||||
if ! rsync -av --remove-source-files ${CKPTS20_PATH} ${CLEAN_PATH}; then
|
||||
echo "cleaning cached file in ${CKPTS20_PATH} failed, exit."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ./ci_scripts/train/ci_7B_sft.py
|
||||
srun -p ${SLURM_PARTITION} --exclusive --quotatype=spot --job-name=$1 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ./ci_scripts/train/ci_7B_sft.py
|
||||
[[ $? -ne 0 ]] && { echo "test slurm training failed."; exit_code=$(($exit_code + 1)); }
|
||||
|
||||
num=$(num_files "${CKPTS20_OUTPUT}")
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
source ./ci_scripts/common/variables.sh
|
||||
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
|
||||
[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
|
||||
|
||||
readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
|
||||
readonly CKPTS20_PATH="$GITHUB_WORKSPACE/llm_ckpts/20"
|
||||
readonly CKPTS_OUTPUT="${CKPTS20_PATH}/*.pt"
|
||||
|
@ -13,13 +16,13 @@ source ./ci_scripts/common/basic_func.sh
|
|||
echo "start to test torch training."
|
||||
|
||||
if [[ -d ${CKPTS20_PATH} ]]; then
|
||||
if ! rm -rf ${CKPTS20_PATH}/*; then
|
||||
if ! rsync -av --remove-source-files ${CKPTS20_PATH} ${CLEAN_PATH}; then
|
||||
echo "cleaning cached file in ${CKPTS20_PATH} failed, exit."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
|
||||
srun -p ${SLURM_PARTITION} --exclusive --quotatype=spot --job-name=$1 -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
|
||||
[[ $? -ne 0 ]] && { echo "test torch training failed."; exit_code=$(($exit_code + 1)); }
|
||||
|
||||
num=$(num_files "${CKPTS_OUTPUT}")
|
||||
|
@ -28,8 +31,8 @@ if [[ ${num} -ne ${expected_num} ]]; then
|
|||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
# clean the test files.
|
||||
if ! rm -rf ${CKPTS_PATH}/*; then
|
||||
# move the test files.
|
||||
if ! rsync -av --remove-source-files ${CKPTS_PATH}/* ${CLEAN_PATH}; then
|
||||
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
|
|
@ -28,10 +28,19 @@ from torch import nn
|
|||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from transformers.activations import ACT2FN
|
||||
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
|
||||
from transformers.modeling_outputs import (
|
||||
BaseModelOutputWithPast,
|
||||
CausalLMOutputWithPast,
|
||||
SequenceClassifierOutputWithPast,
|
||||
)
|
||||
from transformers.modeling_utils import PreTrainedModel
|
||||
from transformers.generation.streamers import BaseStreamer
|
||||
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
|
||||
from transformers.utils import (
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
logging,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from configuration_internlm import InternLMConfig
|
||||
|
||||
|
||||
|
@ -39,6 +48,7 @@ logger = logging.get_logger(__name__)
|
|||
|
||||
_CONFIG_FOR_DOC = "InternLMConfig"
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart._make_causal_mask
|
||||
def _make_causal_mask(
|
||||
input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
|
||||
|
@ -437,6 +447,7 @@ class InternLMModel(InternLMPreTrainedModel):
|
|||
Args:
|
||||
config: InternLMConfig
|
||||
"""
|
||||
|
||||
_auto_class = "AutoModel"
|
||||
|
||||
def __init__(self, config: InternLMConfig):
|
||||
|
@ -765,7 +776,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
|
|||
for layer_past in past_key_values:
|
||||
reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
|
||||
return reordered_past
|
||||
|
||||
|
||||
def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = []):
|
||||
prompt = ""
|
||||
for record in history:
|
||||
|
@ -774,43 +785,49 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
|
|||
prompt += "<s>"
|
||||
prompt += f"""<|User|>:{query}<eoh>\n<|Bot|>:"""
|
||||
return tokenizer([prompt], return_tensors="pt")
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def chat(self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
streamer: Optional[BaseStreamer] = None,
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs):
|
||||
def chat(
|
||||
self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
streamer: Optional[BaseStreamer] = None,
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs,
|
||||
):
|
||||
inputs = self.build_inputs(tokenizer, query, history)
|
||||
inputs = {k: v.to(self.device) for k, v in inputs.items() if torch.is_tensor(v)}
|
||||
outputs = self.generate(**inputs,
|
||||
streamer=streamer,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=do_sample,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs)
|
||||
outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]):]
|
||||
outputs = self.generate(
|
||||
**inputs,
|
||||
streamer=streamer,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=do_sample,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs,
|
||||
)
|
||||
outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]) :]
|
||||
response = tokenizer.decode(outputs, skip_special_tokens=True)
|
||||
response = response.split("<eoa>")[0]
|
||||
history = history + [(query, response)]
|
||||
return response, history
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def stream_chat(self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs):
|
||||
def stream_chat(
|
||||
self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Return a generator in format: (response, history)
|
||||
Eg.
|
||||
|
@ -856,12 +873,12 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
|
|||
tokenizer=tokenizer,
|
||||
query=query,
|
||||
streamer=ChatStreamer(tokenizer=tokenizer),
|
||||
history=history,
|
||||
history=history,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=do_sample,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def consumer():
|
||||
|
@ -869,7 +886,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
|
|||
producer.start()
|
||||
while True:
|
||||
res = response_queue.get()
|
||||
if res is None:
|
||||
if res is not None:
|
||||
return
|
||||
yield res
|
||||
|
||||
|
|
Loading…
Reference in New Issue