mirror of https://github.com/InternLM/InternLM
fix based on comment
parent
39a2fb5677
commit
7cc343dafc
|
@ -186,7 +186,7 @@ class Engine:
|
|||
if to_gpu:
|
||||
batch_data = move_to_device(batch_data)
|
||||
|
||||
# For packed-dataset, batch_data is (micro_num, micro_num*micro_bsz),
|
||||
# For packed-dataset, batch_data is (micro_num, micro_bsz*seq_len),
|
||||
# therefore 'batch_size' is equal to 'micro_num'
|
||||
# For nopacked-dataset, batch_data is (micro_num*micro_bsz, seq_len),
|
||||
# therefore 'batch_size' is equal to 'micro_num*micro_bsz'
|
||||
|
|
|
@ -193,6 +193,9 @@ class PipelineScheduler(BaseScheduler):
|
|||
# Pipeline schedule just puts data in memory,
|
||||
batch_data, actual_batch_size = engine.load_batch(data_iter, to_gpu=False)
|
||||
|
||||
# Even if 'use_flash_attn' is False, the data seen when the 'load_batch' is called is still packed,
|
||||
# because internlm's current train dataset is packed, even using dummy data.
|
||||
# The unpack operation is performed in load_micro_batch().
|
||||
if check_data_is_packed(batch_data):
|
||||
micro_num = actual_batch_size
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue