mirror of https://github.com/InternLM/InternLM
modifications by pre-commit hook
parent
4187bfbfe8
commit
d4a81fad5d
|
@ -103,4 +103,3 @@ msgstr ""
|
|||
|
||||
#~ msgid "traning dataloader object"
|
||||
#~ msgstr ""
|
||||
|
||||
|
|
|
@ -47,4 +47,3 @@ msgstr "Training Results"
|
|||
#: ../../source/example/30B_demo.rst:175 615a3481b0aa49729b7219b1365519aa
|
||||
msgid "基于以上训练配置和启动命令,两节点 16GPU 下的模型训练部分日志展示如下:"
|
||||
msgstr "Taking the configuration of the demo training on two nodes with 16 GPUs on slurm as an example, the training result log is shown below:"
|
||||
|
||||
|
|
|
@ -47,4 +47,3 @@ msgstr "Training Results"
|
|||
#: ../../source/example/7B_demo.rst:173 33ec81f34e3c4340beacdb5254069d08
|
||||
msgid "基于以上训练配置和启动命令,单节点 8GPU 下的模型训练部分日志展示如下:"
|
||||
msgstr "Taking the configuration of the demo training on a single machine with 8 GPUs on slurm as an example, the training result log is shown below:"
|
||||
|
||||
|
|
|
@ -30,4 +30,3 @@ msgstr ""
|
|||
#: ../../source/example/index.rst:13 b095e27dfc924a7a943b7cba5361700a
|
||||
msgid "30B Demo"
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -78,4 +78,3 @@ msgstr ""
|
|||
#: ../../source/index.rst:95 a164b772960f4ab8b18c7e8820f69f55
|
||||
msgid ":ref:`search`"
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -245,4 +245,3 @@ msgid ""
|
|||
"A tuple of ``(trainer, train_dataloader, test_dataloader, lr_scheduler)``"
|
||||
" where only ``trainer`` could not be None."
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -137,4 +137,3 @@ msgstr "For the local standard image built with dockerfile or pulled, use the fo
|
|||
#: ../../../install.md:87 66613606256e4094a6be5ab2af1269ae
|
||||
msgid "容器内默认目录即 `/InternLM`,根据[使用文档](./usage.md)即可启动训练。"
|
||||
msgstr "The default directory in the container is `/InternLM`, please start training according to the [Usage](./usage.md)."
|
||||
|
||||
|
|
|
@ -195,4 +195,3 @@ msgstr ""
|
|||
#: internlm.monitor.alert.send_feishu_msg_with_webhook:12 of
|
||||
msgid "An exception rasied by the HTTP post request."
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -454,4 +454,3 @@ msgstr ""
|
|||
#: internlm.solver.optimizer.hybrid_zero_optim.HybridZeroOptimizer.step:7 of
|
||||
msgid "Whether the gradient is success updated, and the gradient."
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -172,4 +172,3 @@ msgstr ""
|
|||
#: internlm.utils.simple_memory_profiler.SimpleMemoryProfiler.step:1 of
|
||||
msgid "Update the memory state of the optimizer state."
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -22,4 +22,3 @@ msgstr ""
|
|||
#: ../../source/qa.rst:2 e3b22a39640a40cfb527068a7f4bbfc9
|
||||
msgid "问&答"
|
||||
msgstr "Q&A"
|
||||
|
||||
|
|
|
@ -159,4 +159,3 @@ msgstr ""
|
|||
|
||||
#~ msgid "InternLM训练流程图"
|
||||
#~ msgstr "InternLM training process"
|
||||
|
||||
|
|
|
@ -364,4 +364,3 @@ msgstr ""
|
|||
#~ msgstr ""
|
||||
#~ "`load_model_only_folder` and `load_ckpt_folder` "
|
||||
#~ "cannot be set at the same time."
|
||||
|
||||
|
|
|
@ -90,4 +90,3 @@ When `Activation Ckpt` is turned off, the test results are as shown in the table
|
|||
<div align="left">
|
||||
<img src="../imgs/flops.png" width="580"/>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -87,4 +87,3 @@ InternLM中`zero1`的配置决定了优化器状态的分配范围。
|
|||
<div align="left">
|
||||
<img src="../doc/imgs/flops.png" width="580"/>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# flake8: noqa
|
||||
|
||||
# This file is modified from:
|
||||
# hhttps://github.com/reasoning-machines/pal/blob/main/pal/core/interface.py
|
||||
#
|
||||
|
@ -27,8 +29,8 @@ import tqdm
|
|||
from datasets import load_dataset
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
from tools.transformers.interface import GenerationConfig, generate_interactive
|
||||
from internlm.utils.timeout import Timeout
|
||||
from tools.transformers.interface import GenerationConfig, generate_interactive
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
|
|
@ -19,9 +19,8 @@
|
|||
# limitations under the License.
|
||||
""" InternLM model configuration"""
|
||||
|
||||
from transformers.utils import logging
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
@ -30,9 +29,9 @@ INTERNLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
|||
|
||||
class InternLMConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`InternLMModel`]. It is used to instantiate an InternLM
|
||||
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
||||
defaults will yield a similar configuration to that of the InternLM-7B.
|
||||
This is the configuration class to store the configuration of a [`InternLMModel`]. It is used to instantiate an
|
||||
InternLM model according to the specified arguments, defining the model architecture. Instantiating a
|
||||
configuration with the defaults will yield a similar configuration to that of the InternLM-7B.
|
||||
|
||||
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
||||
documentation from [`PretrainedConfig`] for more information.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import argparse
|
||||
import math
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
|
@ -110,7 +110,7 @@ def merge_pp(states_tp_pp):
|
|||
states = states_tp_pp[tp][pp]
|
||||
keys = list(states.keys())
|
||||
for key in keys:
|
||||
match = re.search("\.\d+\.", key)
|
||||
match = re.search("\.\d+\.", key) # noqa: W605
|
||||
if match is not None:
|
||||
s, e = match.span()
|
||||
layer_idx = int(key[s + 1 : e - 1]) + layer_shift
|
||||
|
@ -126,9 +126,9 @@ def merge_pp(states_tp_pp):
|
|||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--src_folder', type=str, default='~/test/') # 需要转换为hf格式的checkpoint文件夹
|
||||
parser.add_argument('--tgt_folder', type=str, default='~/output/') # 存放转换后checkpoint的目标文件夹
|
||||
parser.add_argument('--tokenizer', type=str, default='~/test/tokenizer.model') # Tokenizer 文件的路径
|
||||
parser.add_argument("--src_folder", type=str, default="~/test/") # 需要转换为hf格式的checkpoint文件夹
|
||||
parser.add_argument("--tgt_folder", type=str, default="~/output/") # 存放转换后checkpoint的目标文件夹
|
||||
parser.add_argument("--tokenizer", type=str, default="~/test/tokenizer.model") # Tokenizer 文件的路径
|
||||
args = parser.parse_args()
|
||||
|
||||
def load(fp):
|
||||
|
|
|
@ -5,7 +5,6 @@ from typing import Callable, List, Optional
|
|||
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
|
||||
from transformers.utils import logging
|
||||
|
||||
|
@ -38,12 +37,12 @@ def generate_interactive(
|
|||
for k, v in inputs.items():
|
||||
inputs[k] = v.cuda()
|
||||
input_ids = inputs["input_ids"]
|
||||
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
|
||||
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841
|
||||
if generation_config is None:
|
||||
generation_config = model.generation_config
|
||||
generation_config = copy.deepcopy(generation_config)
|
||||
model_kwargs = generation_config.update(**kwargs)
|
||||
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
|
||||
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id # noqa: F841
|
||||
if isinstance(eos_token_id, int):
|
||||
eos_token_id = [eos_token_id]
|
||||
if additional_eos_token_id is not None:
|
||||
|
@ -119,9 +118,7 @@ def generate_interactive(
|
|||
|
||||
# update generated ids, model inputs, and length for next step
|
||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||
model_kwargs = model._update_model_kwargs_for_generation(
|
||||
outputs, model_kwargs, is_encoder_decoder=False
|
||||
)
|
||||
model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False)
|
||||
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
|
||||
|
||||
output_token_ids = input_ids[0].cpu().tolist()
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
import torch
|
||||
from moss_002_sft import collate_fn, get_dataset
|
||||
from peft import LoraConfig, TaskType, get_peft_model
|
||||
from torch.utils.data import DataLoader
|
||||
from peft import get_peft_model, LoraConfig, TaskType
|
||||
from transformers import get_linear_schedule_with_warmup
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from tqdm import tqdm
|
||||
|
||||
from moss_002_sft import get_dataset, collate_fn
|
||||
from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
AutoTokenizer,
|
||||
get_linear_schedule_with_warmup,
|
||||
)
|
||||
|
||||
model_path = "model_path"
|
||||
data_dir = "moss_002_sft"
|
||||
|
@ -16,8 +18,11 @@ epochs = 5
|
|||
val_per_steps = 1000
|
||||
lr = 9e-6
|
||||
peft_config = LoraConfig(
|
||||
task_type=TaskType.CAUSAL_LM, r=32, lora_alpha=32, lora_dropout=0.1,
|
||||
target_modules=["gate_proj", "down_proj", "up_proj", "q_proj", "k_proj", "v_proj", "o_proj"]
|
||||
task_type=TaskType.CAUSAL_LM,
|
||||
r=32,
|
||||
lora_alpha=32,
|
||||
lora_dropout=0.1,
|
||||
target_modules=["gate_proj", "down_proj", "up_proj", "q_proj", "k_proj", "v_proj", "o_proj"],
|
||||
)
|
||||
|
||||
|
||||
|
@ -29,12 +34,12 @@ model.cuda()
|
|||
|
||||
# dataset
|
||||
train_dataset, val_dataset = get_dataset(tokenizer, data_dir, num=data_num, test_size=test_size)
|
||||
train_dataloader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, collate_fn=lambda x: collate_fn(x, tokenizer))
|
||||
train_dataloader = DataLoader(
|
||||
train_dataset, batch_size=train_batch_size, shuffle=True, collate_fn=lambda x: collate_fn(x, tokenizer)
|
||||
)
|
||||
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr)
|
||||
scheduler = get_linear_schedule_with_warmup(
|
||||
optimizer, 1000, epochs * len(train_dataloader)
|
||||
)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, 1000, epochs * len(train_dataloader))
|
||||
|
||||
# train
|
||||
fp = open("output", "w")
|
||||
|
@ -42,7 +47,7 @@ model.train()
|
|||
for epoch in tqdm(range(epochs), desc="Traning Epoch"):
|
||||
batch_bar = tqdm(train_dataloader, desc="Training Batch")
|
||||
for step, batch in enumerate(batch_bar):
|
||||
batch = {k:v.cuda() for k, v in batch.items()}
|
||||
batch = {k: v.cuda() for k, v in batch.items()}
|
||||
with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
|
||||
output = model(**batch)
|
||||
|
||||
|
@ -58,7 +63,15 @@ for epoch in tqdm(range(epochs), desc="Traning Epoch"):
|
|||
data, label = val_dataset[i]
|
||||
prefix = tokenizer.decode(data.tolist(), skip_special_tokens=True)
|
||||
try:
|
||||
generate = model.generate(input_ids=data.unsqueeze(0).cuda(), temperature=0.7, top_k=50, do_sample=True, repetition_penalty=1.02, max_new_tokens=100, top_p=0.9)
|
||||
generate = model.generate(
|
||||
input_ids=data.unsqueeze(0).cuda(),
|
||||
temperature=0.7,
|
||||
top_k=50,
|
||||
do_sample=True,
|
||||
repetition_penalty=1.02,
|
||||
max_new_tokens=100,
|
||||
top_p=0.9,
|
||||
)
|
||||
text = tokenizer.decode(generate[0].tolist(), skip_special_tokens=True)
|
||||
text = text.replace(prefix, "")
|
||||
fp.write(f"Prefix: {prefix}\nGenerated: {text}" + "\n---------------------------------\n")
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import os
|
||||
import copy
|
||||
import os
|
||||
|
||||
import torch
|
||||
from datasets import Dataset as HFDataset
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import Dataset
|
||||
from datasets import load_dataset, Dataset as HFDataset
|
||||
|
||||
|
||||
class SFTDataset(Dataset):
|
||||
# https://github.com/OpenLMLab/MOSS/blob/main/finetune_moss.py
|
||||
|
@ -26,21 +28,25 @@ class SFTDataset(Dataset):
|
|||
|
||||
return data, label
|
||||
|
||||
|
||||
def collate_fn(batch, tokenizer):
|
||||
batch_input_ids, batch_labels = [], []
|
||||
for input_ids, label in batch:
|
||||
batch_input_ids.append(input_ids)
|
||||
batch_labels.append(label)
|
||||
|
||||
batch_input_ids = torch.nn.utils.rnn.pad_sequence(batch_input_ids, batch_first=True, padding_value=tokenizer.eos_token_id)
|
||||
batch_input_ids = torch.nn.utils.rnn.pad_sequence(
|
||||
batch_input_ids, batch_first=True, padding_value=tokenizer.eos_token_id
|
||||
)
|
||||
batch_labels = torch.nn.utils.rnn.pad_sequence(batch_labels, batch_first=True, padding_value=-100)
|
||||
|
||||
return {
|
||||
"input_ids": batch_input_ids,
|
||||
"attention_mask": (batch_input_ids == tokenizer.eos_token_id).long(),
|
||||
"labels": batch_labels
|
||||
"labels": batch_labels,
|
||||
}
|
||||
|
||||
|
||||
def process(sample, tokenizer, max_len):
|
||||
chat = sample["plain_text"].split("<eoa>")[:-1]
|
||||
num_turns = sample["num_turns"]
|
||||
|
@ -81,20 +87,20 @@ def load_data(save_dir, tokenizer, max_len, num=-1) -> HFDataset:
|
|||
if os.path.exists(save_dir):
|
||||
print(f"Loading moss-002-sft from {save_dir}")
|
||||
else:
|
||||
print(f"Loading moss-002-sft from datasets")
|
||||
print("Loading moss-002-sft from datasets")
|
||||
moss_sft = load_dataset("fnlp/moss-002-sft-data", split="train")
|
||||
moss_sft = moss_sft.map(lambda x:process(x, tokenizer, max_len), num_proc=10)
|
||||
moss_sft = moss_sft.filter(lambda x:len(x["input_ids"]) != 0)
|
||||
moss_sft = moss_sft.map(lambda x: process(x, tokenizer, max_len), num_proc=10)
|
||||
moss_sft = moss_sft.filter(lambda x: len(x["input_ids"]) != 0)
|
||||
moss_sft.save_to_disk(save_dir)
|
||||
|
||||
moss_sft = HFDataset.load_from_disk(save_dir)
|
||||
if num != -1:
|
||||
moss_sft = moss_sft.select(range(num))
|
||||
print(
|
||||
f"Load successfully, total {len(moss_sft)} samples.")
|
||||
print(f"Load successfully, total {len(moss_sft)} samples.")
|
||||
|
||||
return moss_sft
|
||||
|
||||
|
||||
def get_dataset(tokenizer, save_dir, max_len=1024, num=-1, test_size=0.1):
|
||||
moss_sft_data = load_data(save_dir, tokenizer, max_len, num)
|
||||
moss_sft_split = moss_sft_data.train_test_split(test_size=test_size)
|
||||
|
@ -102,4 +108,3 @@ def get_dataset(tokenizer, save_dir, max_len=1024, num=-1, test_size=0.1):
|
|||
val_dataset = SFTDataset(moss_sft_split["test"])
|
||||
|
||||
return train_dataset, val_dataset
|
||||
|
||||
|
|
|
@ -19,26 +19,35 @@
|
|||
# limitations under the License.
|
||||
""" PyTorch InternLM model."""
|
||||
import math
|
||||
import queue
|
||||
import threading
|
||||
from typing import List, Optional, Tuple, Union
|
||||
import threading, queue
|
||||
|
||||
import torch
|
||||
import torch.utils.checkpoint
|
||||
from configuration_internlm import InternLMConfig
|
||||
from torch import nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from transformers.activations import ACT2FN
|
||||
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
|
||||
from transformers.modeling_utils import PreTrainedModel
|
||||
from transformers.generation.streamers import BaseStreamer
|
||||
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
|
||||
from configuration_internlm import InternLMConfig
|
||||
|
||||
from transformers.modeling_outputs import (
|
||||
BaseModelOutputWithPast,
|
||||
CausalLMOutputWithPast,
|
||||
SequenceClassifierOutputWithPast,
|
||||
)
|
||||
from transformers.modeling_utils import PreTrainedModel
|
||||
from transformers.utils import (
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
logging,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
_CONFIG_FOR_DOC = "InternLMConfig"
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart._make_causal_mask
|
||||
def _make_causal_mask(
|
||||
input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
|
||||
|
@ -423,7 +432,7 @@ INTERNLM_INPUTS_DOCSTRING = r"""
|
|||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
|
@ -437,6 +446,7 @@ class InternLMModel(InternLMPreTrainedModel):
|
|||
Args:
|
||||
config: InternLMConfig
|
||||
"""
|
||||
|
||||
_auto_class = "AutoModel"
|
||||
|
||||
def __init__(self, config: InternLMConfig):
|
||||
|
@ -776,41 +786,47 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
|
|||
return tokenizer([prompt], return_tensors="pt")
|
||||
|
||||
@torch.no_grad()
|
||||
def chat(self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
streamer: Optional[BaseStreamer] = None,
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs):
|
||||
def chat(
|
||||
self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
streamer: Optional[BaseStreamer] = None,
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs,
|
||||
):
|
||||
inputs = self.build_inputs(tokenizer, query, history)
|
||||
inputs = {k: v.to(self.device) for k, v in inputs.items() if torch.is_tensor(v)}
|
||||
outputs = self.generate(**inputs,
|
||||
streamer=streamer,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=do_sample,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs)
|
||||
outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]):]
|
||||
outputs = self.generate(
|
||||
**inputs,
|
||||
streamer=streamer,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=do_sample,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs,
|
||||
)
|
||||
outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]) :]
|
||||
response = tokenizer.decode(outputs, skip_special_tokens=True)
|
||||
response = response.split("<eoa>")[0]
|
||||
history = history + [(query, response)]
|
||||
return response, history
|
||||
|
||||
@torch.no_grad()
|
||||
def stream_chat(self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs):
|
||||
def stream_chat(
|
||||
self,
|
||||
tokenizer,
|
||||
query: str,
|
||||
history: List[Tuple[str, str]] = [],
|
||||
max_new_tokens: int = 1024,
|
||||
do_sample: bool = True,
|
||||
temperature: float = 0.8,
|
||||
top_p: float = 0.8,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Return a generator in format: (response, history)
|
||||
Eg.
|
||||
|
@ -861,7 +877,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
|
|||
do_sample=do_sample,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def consumer():
|
||||
|
|
|
@ -24,11 +24,9 @@ from shutil import copyfile
|
|||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import sentencepiece as spm
|
||||
|
||||
from transformers.tokenization_utils import PreTrainedTokenizer
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"}
|
||||
|
|
Loading…
Reference in New Issue