mirror of https://github.com/InternLM/InternLM
fix dataset types when using random dataset (#489)
parent
5d3242027a
commit
07026d1821
|
@ -145,3 +145,4 @@ core.*
|
|||
llm_ckpts
|
||||
events.*
|
||||
memory_trace
|
||||
RUN*/
|
||||
|
|
|
@ -226,6 +226,7 @@ def get_train_data_loader(
|
|||
dataset_types = list(get_dataset_type_ids_map(train_folder).keys())
|
||||
|
||||
if not train_folder:
|
||||
dataset_types = ["en", "cn", "code"]
|
||||
train_ds = RandomDataset(num_samples=1000000, max_len=data_cfg.seq_len)
|
||||
if data_cfg.pack_sample_into_one:
|
||||
train_ds = PackedDatasetWithoutCuSeqlen(
|
||||
|
|
Loading…
Reference in New Issue