Skip to content

Commit a4032c3

Browse files
Fixed RecursionError discovered by OSSFuzz by converting _split_by_known_words into an iterative function (#1201)
Co-authored-by: bcapuano <[email protected]>
1 parent c54d7e4 commit a4032c3

File tree

1 file changed

+27
-19
lines changed

1 file changed

+27
-19
lines changed

dateparser/languages/dictionary.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -181,26 +181,34 @@ def _add_to_cache(self, value, cache):
181181
):
182182
cache.pop(list(cache.keys())[0])
183183

184-
def _split_by_known_words(self, string, keep_formatting):
185-
if not string:
186-
return string
187-
184+
def _split_by_known_words(self, string: str, keep_formatting: bool):
188185
regex = self._get_split_regex_cache()
189-
match = regex.match(string)
190-
if not match:
191-
return (
192-
self._split_by_numerals(string, keep_formatting)
193-
if self._should_capture(string, keep_formatting)
194-
else []
195-
)
196-
197-
unparsed, known, unknown = match.groups()
198-
splitted = [known] if self._should_capture(known, keep_formatting) else []
199-
if unparsed and self._should_capture(unparsed, keep_formatting):
200-
splitted = self._split_by_numerals(unparsed, keep_formatting) + splitted
201-
if unknown:
202-
splitted.extend(self._split_by_known_words(unknown, keep_formatting))
203-
186+
splitted = []
187+
unknown = string
188+
189+
while unknown:
190+
match = regex.match(string)
191+
192+
if not match:
193+
curr_split = (
194+
self._split_by_numerals(string, keep_formatting)
195+
if self._should_capture(string, keep_formatting)
196+
else []
197+
)
198+
unknown = ""
199+
else:
200+
unparsed, known, unknown = match.groups()
201+
curr_split = (
202+
[known] if self._should_capture(known, keep_formatting) else []
203+
)
204+
if unparsed and self._should_capture(unparsed, keep_formatting):
205+
curr_split = (
206+
self._split_by_numerals(unparsed, keep_formatting) + curr_split
207+
)
208+
if unknown:
209+
string = unknown if string != unknown else ""
210+
211+
splitted.extend(curr_split)
204212
return splitted
205213

206214
def _split_by_numerals(self, string, keep_formatting):

0 commit comments

Comments
 (0)