@@ -181,26 +181,34 @@ def _add_to_cache(self, value, cache):
181
181
):
182
182
cache .pop (list (cache .keys ())[0 ])
183
183
184
- def _split_by_known_words (self , string , keep_formatting ):
185
- if not string :
186
- return string
187
-
184
+ def _split_by_known_words (self , string : str , keep_formatting : bool ):
188
185
regex = self ._get_split_regex_cache ()
189
- match = regex .match (string )
190
- if not match :
191
- return (
192
- self ._split_by_numerals (string , keep_formatting )
193
- if self ._should_capture (string , keep_formatting )
194
- else []
195
- )
196
-
197
- unparsed , known , unknown = match .groups ()
198
- splitted = [known ] if self ._should_capture (known , keep_formatting ) else []
199
- if unparsed and self ._should_capture (unparsed , keep_formatting ):
200
- splitted = self ._split_by_numerals (unparsed , keep_formatting ) + splitted
201
- if unknown :
202
- splitted .extend (self ._split_by_known_words (unknown , keep_formatting ))
203
-
186
+ splitted = []
187
+ unknown = string
188
+
189
+ while unknown :
190
+ match = regex .match (string )
191
+
192
+ if not match :
193
+ curr_split = (
194
+ self ._split_by_numerals (string , keep_formatting )
195
+ if self ._should_capture (string , keep_formatting )
196
+ else []
197
+ )
198
+ unknown = ""
199
+ else :
200
+ unparsed , known , unknown = match .groups ()
201
+ curr_split = (
202
+ [known ] if self ._should_capture (known , keep_formatting ) else []
203
+ )
204
+ if unparsed and self ._should_capture (unparsed , keep_formatting ):
205
+ curr_split = (
206
+ self ._split_by_numerals (unparsed , keep_formatting ) + curr_split
207
+ )
208
+ if unknown :
209
+ string = unknown if string != unknown else ""
210
+
211
+ splitted .extend (curr_split )
204
212
return splitted
205
213
206
214
def _split_by_numerals (self , string , keep_formatting ):
0 commit comments