fix based on comment

pull/538/head
877825076@qq.com 2023-12-13 12:15:22 +08:00
parent 39a2fb5677
commit 7cc343dafc
2 changed files with 4 additions and 1 deletions

View File

@ -186,7 +186,7 @@ class Engine:
if to_gpu:
batch_data = move_to_device(batch_data)
# For packed-dataset, batch_data is (micro_num, micro_num*micro_bsz),
# For packed-dataset, batch_data is (micro_num, micro_bsz*seq_len),
# therefore 'batch_size' is equal to 'micro_num'
# For nopacked-dataset, batch_data is (micro_num*micro_bsz, seq_len),
# therefore 'batch_size' is equal to 'micro_num*micro_bsz'

View File

@ -193,6 +193,9 @@ class PipelineScheduler(BaseScheduler):
# Pipeline schedule just puts data in memory,
batch_data, actual_batch_size = engine.load_batch(data_iter, to_gpu=False)
# Even if 'use_flash_attn' is False, the data seen when the 'load_batch' is called is still packed,
# because internlm's current train dataset is packed, even using dummy data.
# The unpack operation is performed in load_micro_batch().
if check_data_is_packed(batch_data):
micro_num = actual_batch_size
else: