mirror of https://github.com/InternLM/InternLM
fix(model/embedding.py): ci lint check error (#345)
* fix(ci): fix ci lint error * fix(ci): fix ci lint errorpull/306/head
parent
8464425a7b
commit
3b0eff0c8a
|
@ -2,7 +2,6 @@ name: e2e-tests
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
- "main"
|
|
||||||
- "develop"
|
- "develop"
|
||||||
paths-ignore:
|
paths-ignore:
|
||||||
- "doc/**"
|
- "doc/**"
|
||||||
|
@ -12,45 +11,16 @@ env:
|
||||||
SLURM_PARTITION: llm_s
|
SLURM_PARTITION: llm_s
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check-requirements:
|
training_8GPU:
|
||||||
runs-on: [t_cluster]
|
runs-on: [t_cluster]
|
||||||
steps:
|
timeout-minutes: 5
|
||||||
- name: mask env
|
|
||||||
run: |
|
|
||||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
- name: check-requirements
|
|
||||||
run: |
|
|
||||||
source activate internlm-env-test
|
|
||||||
changed_files=$(git diff --name-only -r HEAD^1 HEAD)
|
|
||||||
echo $changed_files
|
|
||||||
if [[ $changed_files =~ "runtime.txt" ]]; then
|
|
||||||
pip install -r requirements/runtime.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ $changed_files =~ "torch.txt" ]]; then
|
|
||||||
pip install -r requirements/torch.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
e2e_tests:
|
|
||||||
if: ${{ always() }}
|
|
||||||
needs: check-requirements
|
|
||||||
runs-on: [t_cluster]
|
|
||||||
timeout-minutes: 30
|
|
||||||
steps:
|
steps:
|
||||||
- name: mask env
|
- name: mask env
|
||||||
run: |
|
run: |
|
||||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: e2e-test
|
- name: training_8GPU
|
||||||
run: |
|
run: |
|
||||||
source activate internlm-env-test
|
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0
|
||||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n8 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_8GPU" ./tests/test_training
|
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n8 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_8GPU" ./tests/test_training
|
||||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2TP" ./tests/test_training
|
|
||||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2TPSP" ./tests/test_training
|
|
||||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2PP" ./tests/test_training
|
|
||||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2PP_InterleavedOverlap" ./tests/test_training
|
|
||||||
|
|
|
@ -241,7 +241,10 @@ class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):
|
||||||
https://github.com/huggingface/transformers/blob/eb8489971ac1415f67b0abdd1584fde8 \
|
https://github.com/huggingface/transformers/blob/eb8489971ac1415f67b0abdd1584fde8 \
|
||||||
b659ced9/src/transformers/models/llama/modeling_llama.py#L147
|
b659ced9/src/transformers/models/llama/modeling_llama.py#L147
|
||||||
"""
|
"""
|
||||||
def __init__(self, dim: int, base=10000, scale_base=0, device=None, max_position_embeddings=2048, scaling_factor=1.0):
|
|
||||||
|
def __init__(
|
||||||
|
self, dim: int, base=10000, scale_base=0, device=None, max_position_embeddings=2048, scaling_factor=1.0
|
||||||
|
):
|
||||||
super().__init__(dim=dim, base=base, scale_base=scale_base, device=device)
|
super().__init__(dim=dim, base=base, scale_base=scale_base, device=device)
|
||||||
self.max_position_embeddings = max_position_embeddings
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.scaling_factor = scaling_factor
|
self.scaling_factor = scaling_factor
|
||||||
|
@ -279,11 +282,14 @@ class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):
|
||||||
else:
|
else:
|
||||||
seqlen = indexes + 1 # eval_forward
|
seqlen = indexes + 1 # eval_forward
|
||||||
if seqlen <= self.max_position_embeddings:
|
if seqlen <= self.max_position_embeddings:
|
||||||
# Reset the tables if the sequence length has changed,
|
# Reset the tables if the sequence length has changed,
|
||||||
# or if we're on a new device (possibly due to tracing for instance)
|
# or if we're on a new device (possibly due to tracing for instance)
|
||||||
if self._seq_len_cached > self.max_position_embeddings or seqlen > self._seq_len_cached \
|
if (
|
||||||
or self._cos_cached.device != x.device or self._cos_cached.dtype != x.dtype:
|
self._seq_len_cached > self.max_position_embeddings
|
||||||
|
or seqlen > self._seq_len_cached
|
||||||
|
or self._cos_cached.device != x.device
|
||||||
|
or self._cos_cached.dtype != x.dtype
|
||||||
|
):
|
||||||
self._update(seqlen, x)
|
self._update(seqlen, x)
|
||||||
else:
|
else:
|
||||||
self._update(seqlen, x)
|
self._update(seqlen, x)
|
||||||
|
|
Loading…
Reference in New Issue