mirror of https://github.com/InternLM/InternLM
fix(tools): fix InternLMTokenizer to fit transformers==4.34.0
parent
e611817442
commit
7b1b892084
|
@ -65,6 +65,13 @@ class InternLMTokenizer(PreTrainedTokenizer):
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
||||||
|
self.vocab_file = vocab_file
|
||||||
|
self.add_bos_token = add_bos_token
|
||||||
|
self.add_eos_token = add_eos_token
|
||||||
|
self.decode_with_prefix_space = decode_with_prefix_space
|
||||||
|
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
||||||
|
self.sp_model.Load(vocab_file)
|
||||||
|
self._no_prefix_space_tokens = None
|
||||||
super().__init__(
|
super().__init__(
|
||||||
bos_token=bos_token,
|
bos_token=bos_token,
|
||||||
eos_token=eos_token,
|
eos_token=eos_token,
|
||||||
|
@ -73,15 +80,8 @@ class InternLMTokenizer(PreTrainedTokenizer):
|
||||||
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
self.vocab_file = vocab_file
|
|
||||||
self.add_bos_token = add_bos_token
|
|
||||||
self.add_eos_token = add_eos_token
|
|
||||||
self.decode_with_prefix_space = decode_with_prefix_space
|
|
||||||
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
|
||||||
self.sp_model.Load(vocab_file)
|
|
||||||
self._no_prefix_space_tokens = None
|
|
||||||
|
|
||||||
""" Initialisation"""
|
""" Initialization"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def no_prefix_space_tokens(self):
|
def no_prefix_space_tokens(self):
|
||||||
|
|
Loading…
Reference in New Issue