fix based on comment

2023-12-13 12:15:22 +08:00 · 2023-12-13 12:15:22 +08:00 · 7cc343dafc
parent 39a2fb5677
commit 7cc343dafc
2 changed files with 4 additions and 1 deletions
--- a/internlm/core/engine.py
+++ b/internlm/core/engine.py
@ -186,7 +186,7 @@ class Engine:
        if to_gpu:
            batch_data = move_to_device(batch_data)

-        # For packed-dataset, batch_data is (micro_num, micro_num*micro_bsz),
+        # For packed-dataset, batch_data is (micro_num, micro_bsz*seq_len),
        #   therefore 'batch_size' is equal to 'micro_num'
        # For nopacked-dataset, batch_data is (micro_num*micro_bsz, seq_len),
        #   therefore 'batch_size' is equal to 'micro_num*micro_bsz'
--- a/internlm/core/scheduler/pipeline_scheduler.py
+++ b/internlm/core/scheduler/pipeline_scheduler.py
@ -193,6 +193,9 @@ class PipelineScheduler(BaseScheduler):
        # Pipeline schedule just puts data in memory,
        batch_data, actual_batch_size = engine.load_batch(data_iter, to_gpu=False)

+        # Even if 'use_flash_attn' is False, the data seen when the 'load_batch' is called is still packed,
+        # because internlm's current train dataset is packed, even using dummy data.
+        # The unpack operation is performed in load_micro_batch().
        if check_data_is_packed(batch_data):
            micro_num = actual_batch_size
        else: