fix(tools): fix InternLMTokenizer to fit transformers==4.34.0

pull/440/head
x54-729 2023-10-23 18:35:30 +08:00 committed by GitHub
parent e611817442
commit 7b1b892084
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 8 deletions

View File

@ -65,6 +65,13 @@ class InternLMTokenizer(PreTrainedTokenizer):
**kwargs,
):
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
self.vocab_file = vocab_file
self.add_bos_token = add_bos_token
self.add_eos_token = add_eos_token
self.decode_with_prefix_space = decode_with_prefix_space
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
self.sp_model.Load(vocab_file)
self._no_prefix_space_tokens = None
super().__init__(
bos_token=bos_token,
eos_token=eos_token,
@ -73,15 +80,8 @@ class InternLMTokenizer(PreTrainedTokenizer):
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
**kwargs,
)
self.vocab_file = vocab_file
self.add_bos_token = add_bos_token
self.add_eos_token = add_eos_token
self.decode_with_prefix_space = decode_with_prefix_space
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
self.sp_model.Load(vocab_file)
self._no_prefix_space_tokens = None
""" Initialisation"""
""" Initialization"""
@property
def no_prefix_space_tokens(self):