Skip to content

Commit bf95a94

Browse files
committed
Merge branch 'language-data-loader' into feature-iso-date-format-with-non-english-language
2 parents 2f90553 + fe67b6a commit bf95a94

File tree

3 files changed

+29
-12
lines changed

3 files changed

+29
-12
lines changed

dateparser/date.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
from dateparser.date_parser import date_parser
1212
from dateparser.freshness_date_parser import freshness_date_parser
1313
from dateparser.languages import default_language_loader
14+
from dateparser.languages.loader import LanguageDataLoader
1415
from dateparser.languages.detection import AutoDetectLanguage, ExactLanguages
16+
from dateparser.conf import settings
1517

1618

1719
def sanitize_spaces(html_string):
@@ -244,10 +246,16 @@ class DateDataParser(object):
244246
:raises:
245247
ValueError - Unknown Language, TypeError - Languages argument must be a list
246248
"""
249+
_default_args = None
250+
_skip_tokens = None
247251

248252
def __init__(self, languages=None, allow_redetect_language=False):
253+
available_language_map = default_language_loader.get_language_map()
254+
255+
self._default_args = [languages, allow_redetect_language]
256+
self._skip_tokens = settings.SKIP_TOKENS
257+
249258
if isinstance(languages, (list, tuple, collections.Set)):
250-
available_language_map = default_language_loader.get_language_map()
251259

252260
if all([language in available_language_map for language in languages]):
253261
languages = [available_language_map[language] for language in languages]
@@ -258,12 +266,14 @@ def __init__(self, languages=None, allow_redetect_language=False):
258266
raise TypeError("languages argument must be a list (%r given)" % type(languages))
259267

260268
if allow_redetect_language:
261-
self.language_detector = AutoDetectLanguage(languages=languages if languages else None,
262-
allow_redetection=True)
269+
self.language_detector = AutoDetectLanguage(
270+
languages if languages else available_language_map.values(),
271+
allow_redetection=True)
263272
elif languages:
264273
self.language_detector = ExactLanguages(languages=languages)
265274
else:
266-
self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False)
275+
self.language_detector = AutoDetectLanguage(
276+
available_language_map.values(), allow_redetection=False)
267277

268278
def get_date_data(self, date_string, date_formats=None):
269279
"""
@@ -304,6 +314,11 @@ def get_date_data(self, date_string, date_formats=None):
304314
TODO: Timezone issues
305315
306316
"""
317+
global default_language_loader
318+
if settings.SKIP_TOKENS != self._skip_tokens:
319+
default_language_loader = LanguageDataLoader()
320+
self = DateDataParser(*self._default_args)
321+
307322
date_string = date_string.strip()
308323
date_string = sanitize_date(date_string)
309324

dateparser/languages/detection.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# -*- coding: utf-8 -*-
22
from functools import wraps
33

4-
from dateparser.languages import default_language_loader
5-
64

75
def _restore_languages_on_generator_exit(method):
86
@wraps(method)
@@ -39,9 +37,7 @@ def _filter_languages(date_string, languages):
3937

4038

4139
class AutoDetectLanguage(BaseLanguageDetector):
42-
def __init__(self, languages=None, allow_redetection=False):
43-
if languages is None:
44-
languages = default_language_loader.get_languages()
40+
def __init__(self, languages, allow_redetection=False):
4541
super(AutoDetectLanguage, self).__init__(languages=languages[:])
4642
self.language_pool = languages[:]
4743
self.allow_redetection = allow_redetection

dateparser/languages/loader.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
from yaml import load as load_yaml
55

66
from .language import Language
7+
from ..conf import settings
78

89

910
class LanguageDataLoader(object):
1011
_data = None
12+
_raw_data = None
1113

1214
def __init__(self, file=None):
1315
if isinstance(file, basestring):
@@ -34,10 +36,14 @@ def _load_data(self):
3436
data = get_data('data', 'languages.yaml')
3537
else:
3638
data = self.file.read()
37-
data = load_yaml(data)
38-
base_data = data.pop('base', {})
39+
40+
if not self._raw_data:
41+
self._raw_data = load_yaml(data)
42+
43+
base_data = self._raw_data.pop('base', {'skip': []})
44+
base_data['skip'] += settings.SKIP_TOKENS
3945
known_languages = {}
40-
for shortname, language_info in data.iteritems():
46+
for shortname, language_info in self._raw_data.iteritems():
4147
self._update_language_info_with_base_info(language_info, base_data)
4248
language = Language(shortname, language_info)
4349
if language.validate_info():

0 commit comments

Comments
 (0)