From fd28bcab58f83b1b0f0d987ae14a897b9ff80c69 Mon Sep 17 00:00:00 2001 From: huangting4201 <1538303371@qq.com> Date: Thu, 24 Aug 2023 13:46:18 +0800 Subject: [PATCH] feat(data/utils.py): add new dataset type code for streaming dataset (#225) --- internlm/data/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internlm/data/utils.py b/internlm/data/utils.py index 3eee9d9..724fb9f 100644 --- a/internlm/data/utils.py +++ b/internlm/data/utils.py @@ -5,7 +5,7 @@ import torch from internlm.core.context import global_context as gpc -DATASET_TYPE_IDS_MAP = {"en": 0, "cn": 1} +DATASET_TYPE_IDS_MAP = {"en": 0, "cn": 1, "code": 2} def get_dataset_type_id(path):