fix dataset types when using random dataset

pull/489/head
gaoyang07 2023-11-10 14:59:52 +08:00
parent 5d3242027a
commit a399d74363
2 changed files with 2 additions and 0 deletions

1
.gitignore vendored
View File

@ -145,3 +145,4 @@ core.*
llm_ckpts
events.*
memory_trace
RUN*/

View File

@ -226,6 +226,7 @@ def get_train_data_loader(
dataset_types = list(get_dataset_type_ids_map(train_folder).keys())
if not train_folder:
dataset_types = ["en", "cn", "code"]
train_ds = RandomDataset(num_samples=1000000, max_len=data_cfg.seq_len)
if data_cfg.pack_sample_into_one:
train_ds = PackedDatasetWithoutCuSeqlen(