mirror of https://github.com/InternLM/InternLM
fix dataset types when using random dataset (#489)
parent
5d3242027a
commit
07026d1821
|
@ -145,3 +145,4 @@ core.*
|
||||||
llm_ckpts
|
llm_ckpts
|
||||||
events.*
|
events.*
|
||||||
memory_trace
|
memory_trace
|
||||||
|
RUN*/
|
||||||
|
|
|
@ -226,6 +226,7 @@ def get_train_data_loader(
|
||||||
dataset_types = list(get_dataset_type_ids_map(train_folder).keys())
|
dataset_types = list(get_dataset_type_ids_map(train_folder).keys())
|
||||||
|
|
||||||
if not train_folder:
|
if not train_folder:
|
||||||
|
dataset_types = ["en", "cn", "code"]
|
||||||
train_ds = RandomDataset(num_samples=1000000, max_len=data_cfg.seq_len)
|
train_ds = RandomDataset(num_samples=1000000, max_len=data_cfg.seq_len)
|
||||||
if data_cfg.pack_sample_into_one:
|
if data_cfg.pack_sample_into_one:
|
||||||
train_ds = PackedDatasetWithoutCuSeqlen(
|
train_ds = PackedDatasetWithoutCuSeqlen(
|
||||||
|
|
Loading…
Reference in New Issue