mirror of https://github.com/InternLM/InternLM
fix(tools): fix InternLMTokenizer to fit transformers==4.34.0
parent
e611817442
commit
7b1b892084
|
@ -65,6 +65,13 @@ class InternLMTokenizer(PreTrainedTokenizer):
|
|||
**kwargs,
|
||||
):
|
||||
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
||||
self.vocab_file = vocab_file
|
||||
self.add_bos_token = add_bos_token
|
||||
self.add_eos_token = add_eos_token
|
||||
self.decode_with_prefix_space = decode_with_prefix_space
|
||||
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
||||
self.sp_model.Load(vocab_file)
|
||||
self._no_prefix_space_tokens = None
|
||||
super().__init__(
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
|
@ -73,15 +80,8 @@ class InternLMTokenizer(PreTrainedTokenizer):
|
|||
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
||||
**kwargs,
|
||||
)
|
||||
self.vocab_file = vocab_file
|
||||
self.add_bos_token = add_bos_token
|
||||
self.add_eos_token = add_eos_token
|
||||
self.decode_with_prefix_space = decode_with_prefix_space
|
||||
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
||||
self.sp_model.Load(vocab_file)
|
||||
self._no_prefix_space_tokens = None
|
||||
|
||||
""" Initialisation"""
|
||||
""" Initialization"""
|
||||
|
||||
@property
|
||||
def no_prefix_space_tokens(self):
|
||||
|
|
Loading…
Reference in New Issue