diff --git a/.github/workflows/e2e_test.yaml b/.github/workflows/e2e_test.yaml index 7610b07..c70e69d 100644 --- a/.github/workflows/e2e_test.yaml +++ b/.github/workflows/e2e_test.yaml @@ -2,7 +2,6 @@ name: e2e-tests on: pull_request: branches: - - "main" - "develop" paths-ignore: - "doc/**" @@ -12,45 +11,16 @@ env: SLURM_PARTITION: llm_s jobs: - check-requirements: + training_8GPU: runs-on: [t_cluster] - steps: - - name: mask env - run: | - echo "::add-mask::${{env.WORKSPACE_PREFIX}}" - - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - name: check-requirements - run: | - source activate internlm-env-test - changed_files=$(git diff --name-only -r HEAD^1 HEAD) - echo $changed_files - if [[ $changed_files =~ "runtime.txt" ]]; then - pip install -r requirements/runtime.txt - fi - - if [[ $changed_files =~ "torch.txt" ]]; then - pip install -r requirements/torch.txt - fi - - - e2e_tests: - if: ${{ always() }} - needs: check-requirements - runs-on: [t_cluster] - timeout-minutes: 30 + timeout-minutes: 5 steps: - name: mask env run: | echo "::add-mask::${{env.WORKSPACE_PREFIX}}" - uses: actions/checkout@v3 - - name: e2e-test + - name: training_8GPU run: | - source activate internlm-env-test + source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n8 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_8GPU" ./tests/test_training - srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2TP" ./tests/test_training - srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2TPSP" ./tests/test_training - srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2PP" ./tests/test_training - srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} -n16 --ntasks-per-node=8 --cpus-per-task=4 --gpus-per-task=1 pytest -s -v --color=yes -m "training_16GPU_8DP2PP_InterleavedOverlap" ./tests/test_training diff --git a/internlm/model/embedding.py b/internlm/model/embedding.py index 01a9d56..d177053 100644 --- a/internlm/model/embedding.py +++ b/internlm/model/embedding.py @@ -241,11 +241,14 @@ class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding): https://github.com/huggingface/transformers/blob/eb8489971ac1415f67b0abdd1584fde8 \ b659ced9/src/transformers/models/llama/modeling_llama.py#L147 """ - def __init__(self, dim: int, base=10000, scale_base=0, device=None, max_position_embeddings=2048, scaling_factor=1.0): + + def __init__( + self, dim: int, base=10000, scale_base=0, device=None, max_position_embeddings=2048, scaling_factor=1.0 + ): super().__init__(dim=dim, base=base, scale_base=scale_base, device=device) self.max_position_embeddings = max_position_embeddings self.scaling_factor = scaling_factor - + def _update(self, seqlen, x): self._seq_len_cached = seqlen if seqlen > self.max_position_embeddings: @@ -271,7 +274,7 @@ class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding): self._sin_cached = (torch.sin(freqs) * scale).to(x.dtype) self._cos_k_cached = (torch.cos(freqs) / scale).to(x.dtype) self._sin_k_cached = (torch.sin(freqs) / scale).to(x.dtype) - + def _update_cos_sin_cache(self, x, indexes): """x: (batch, seqlen, nheads, headdim) or (batch, seqlen, 3, nheads, headdim)""" if not isinstance(indexes, int): @@ -279,11 +282,14 @@ class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding): else: seqlen = indexes + 1 # eval_forward if seqlen <= self.max_position_embeddings: - # Reset the tables if the sequence length has changed, + # Reset the tables if the sequence length has changed, # or if we're on a new device (possibly due to tracing for instance) - if self._seq_len_cached > self.max_position_embeddings or seqlen > self._seq_len_cached \ - or self._cos_cached.device != x.device or self._cos_cached.dtype != x.dtype: + if ( + self._seq_len_cached > self.max_position_embeddings + or seqlen > self._seq_len_cached + or self._cos_cached.device != x.device + or self._cos_cached.dtype != x.dtype + ): self._update(seqlen, x) else: self._update(seqlen, x) - \ No newline at end of file