Skip to content

Commit 90ef28d

Browse files
authored
spec token map lazy. (#2715)
1 parent b37585e commit 90ef28d

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

fastdeploy/input/ernie_tokenizer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def __init__(
8383
self.sp_model = spm.SentencePieceProcessor()
8484
self.sp_model.Load(vocab_file)
8585
# pre-process map-type all spec token for decode accelerate.
86-
self.all_spec_tok = set(self.all_special_tokens)
8786

8887
@property
8988
def space_token(self):
@@ -138,8 +137,13 @@ def _convert_id_to_token(self, id):
138137
"""doc"""
139138
return self.sp_model.id_to_piece(id)
140139

140+
def spec_init(self):
141+
if not hasattr(self, "all_spec_tok"):
142+
self.all_spec_tok = set(self.all_special_tokens)
143+
141144
def convert_tokens_to_string(self, tokens):
142145
"""Converts a sequence of tokens (string) in a single string."""
146+
spec_init()
143147
current_sub_tokens = []
144148
out_string = ""
145149
# prev_is_special = False
@@ -212,6 +216,7 @@ def tokenize(self, text: TextInput, **kwargs) -> List[str]:
212216
# if isinstance(t, AddedToken)
213217
# )
214218

219+
spec_init()
215220
text, kwargs = self.prepare_for_tokenization(text, **kwargs)
216221

217222
# TODO: should this be in the base class?

0 commit comments

Comments
 (0)