99
1010from .dictionary import Dictionary , ALWAYS_KEEP_TOKENS
1111from .validation import LanguageValidator
12- from ..conf import settings
1312
1413
1514class Language (object ):
1615 _dictionary = None
1716 _splitters = None
1817 _wordchars = None
19- _cached = None
2018
2119 def __init__ (self , shortname , language_info ):
2220 self .shortname = shortname
@@ -26,51 +24,40 @@ def __init__(self, shortname, language_info):
2624 if isinstance (value , int ):
2725 simplification [key ] = str (value )
2826
29- self ._cached = self
30-
3127 def validate_info (self , validator = None ):
3228 if validator is None :
3329 validator = LanguageValidator
3430
35- return validator .validate_info (language_id = self .shortname , info = self ._cached . info )
31+ return validator .validate_info (language_id = self .shortname , info = self .info )
3632
3733 def is_applicable (self , date_string , strip_timezone = False ):
3834 if strip_timezone :
3935 date_string , timezone = pop_tz_offset_from_string (date_string , as_offset = False )
4036
41- date_string = self ._cached . _simplify (date_string )
42- tokens = self ._cached . _split (date_string , keep_formatting = False )
43- if self ._cached . _is_date_consists_of_digits_only (tokens ):
37+ date_string = self ._simplify (date_string )
38+ tokens = self ._split (date_string , keep_formatting = False )
39+ if self ._is_date_consists_of_digits_only (tokens ):
4440 return True
4541 else :
46- return self ._cached . _are_all_words_in_the_dictionary (tokens )
42+ return self ._are_all_words_in_the_dictionary (tokens )
4743
4844 def translate (self , date_string , keep_formatting = False ):
49- tokens = self ._cached ._get_new_skip_tokens (settings .SKIP_TOKENS )
50-
51- if tokens :
52- self ._cached .info ['skip' ] += tokens
53- self ._cached = Language (self ._language_object .shortname , self ._language_object .info )
54-
55- date_string = self ._cached ._simplify (date_string )
56- words = self ._cached ._split (date_string , keep_formatting )
45+ date_string = self ._simplify (date_string )
46+ words = self ._split (date_string , keep_formatting )
5747
58- dictionary = self ._cached . _get_dictionary ()
48+ dictionary = self ._get_dictionary ()
5949 for i , word in enumerate (words ):
6050 word = word .lower ()
6151 if word in dictionary :
6252 words [i ] = dictionary [word ] or ''
6353
64- return self ._cached ._join (filter (bool , words ), separator = "" if keep_formatting else " " )
65-
66- def _get_new_skip_tokens (self , tokens ):
67- return [token for token in tokens if token not in self ._cached .info .get ('skip' , [])]
54+ return self ._join (filter (bool , words ), separator = "" if keep_formatting else " " )
6855
6956 def _simplify (self , date_string ):
7057 date_string = date_string .lower ()
71- for simplification in self ._cached . info .get ('simplifications' , []):
58+ for simplification in self .info .get ('simplifications' , []):
7259 pattern , replacement = simplification .items ()[0 ]
73- if not self ._cached . info .get ('no_word_spacing' , False ):
60+ if not self .info .get ('no_word_spacing' , False ):
7461 replacement = wrap_replacement_for_regex (replacement , pattern )
7562 pattern = ur'(\A|\d|_|\W)%s(\d|_|\W|\Z)' % pattern
7663 date_string = re .sub (pattern , replacement , date_string , flags = re .IGNORECASE | re .UNICODE ).lower ()
@@ -84,7 +71,7 @@ def _is_date_consists_of_digits_only(self, tokens):
8471 return True
8572
8673 def _are_all_words_in_the_dictionary (self , words ):
87- dictionary = self ._cached . _get_dictionary ()
74+ dictionary = self ._get_dictionary ()
8875 for word in words :
8976 word = word .lower ()
9077 if word .isdigit () or word in dictionary :
@@ -96,8 +83,8 @@ def _are_all_words_in_the_dictionary(self, words):
9683
9784 def _split (self , date_string , keep_formatting ):
9885 tokens = [date_string ]
99- tokens = self ._cached . _split_tokens_with_regex (tokens , "(\d+)" )
100- tokens = self ._cached . _split_tokens_by_known_words (tokens , keep_formatting )
86+ tokens = self ._split_tokens_with_regex (tokens , "(\d+)" )
87+ tokens = self ._split_tokens_by_known_words (tokens , keep_formatting )
10188 return tokens
10289
10390 def _split_tokens_with_regex (self , tokens , regex ):
@@ -107,7 +94,7 @@ def _split_tokens_with_regex(self, tokens, regex):
10794 return filter (bool , chain (* tokens ))
10895
10996 def _split_tokens_by_known_words (self , tokens , keep_formatting ):
110- dictionary = self ._cached . _get_dictionary ()
97+ dictionary = self ._get_dictionary ()
11198 for i , token in enumerate (tokens ):
11299 tokens [i ] = dictionary .split (token , keep_formatting )
113100 return list (chain (* tokens ))
@@ -116,7 +103,7 @@ def _join(self, tokens, separator=" "):
116103 if not tokens :
117104 return ""
118105
119- capturing_splitters = self ._cached . _get_splitters ()['capturing' ]
106+ capturing_splitters = self ._get_splitters ()['capturing' ]
120107 joined = tokens [0 ]
121108 for i in range (1 , len (tokens )):
122109 left , right = tokens [i - 1 ], tokens [i ]
@@ -127,19 +114,19 @@ def _join(self, tokens, separator=" "):
127114 return joined
128115
129116 def _get_dictionary (self ):
130- if self ._cached . _dictionary is None :
131- self ._cached . _generate_dictionary ()
132- return self ._cached . _dictionary
117+ if self ._dictionary is None :
118+ self ._generate_dictionary ()
119+ return self ._dictionary
133120
134121 def _get_wordchars (self ):
135- if self ._cached . _wordchars is None :
136- self ._cached . _set_wordchars ()
137- return self ._cached . _wordchars
122+ if self ._wordchars is None :
123+ self ._set_wordchars ()
124+ return self ._wordchars
138125
139126 def _get_splitters (self ):
140- if self ._cached . _splitters is None :
141- self ._cached . _set_splitters ()
142- return self ._cached . _splitters
127+ if self ._splitters is None :
128+ self ._set_splitters ()
129+ return self ._splitters
143130
144131 def _set_splitters (self ):
145132 splitters = {
@@ -148,28 +135,28 @@ def _set_splitters(self):
148135 }
149136 splitters ['capturing' ] |= set (ALWAYS_KEEP_TOKENS )
150137
151- wordchars = self ._cached . _get_wordchars ()
152- skip = set (self ._cached . info .get ('skip' , [])) | splitters ['capturing' ]
138+ wordchars = self ._get_wordchars ()
139+ skip = set (self .info .get ('skip' , [])) | splitters ['capturing' ]
153140 for token in skip :
154141 if not re .match ('^\W+$' , token , re .UNICODE ):
155142 continue
156143 if token in wordchars :
157144 splitters ['wordchars' ].add (token )
158145
159- self ._cached . _splitters = splitters
146+ self ._splitters = splitters
160147
161148 def _set_wordchars (self ):
162149 wordchars = set ()
163- for word in self ._cached . _get_dictionary ():
150+ for word in self ._get_dictionary ():
164151 if re .match ('^[\W\d_]+$' , word , re .UNICODE ):
165152 continue
166153 for char in word :
167154 wordchars .add (char .lower ())
168155
169- self ._cached . _wordchars = wordchars - {" " } | {"0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" }
156+ self ._wordchars = wordchars - {" " } | {"0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" }
170157
171158 def _generate_dictionary (self ):
172- self ._cached . _dictionary = Dictionary (self .info )
159+ self ._dictionary = Dictionary (self .info )
173160
174161 def to_parserinfo (self , base_cls = parser .parserinfo ):
175162 attributes = {
0 commit comments