1414import locale
1515import calendar
1616from re import compile as re_compile
17+ from re import sub as re_sub
1718from re import IGNORECASE
1819from re import escape as re_escape
1920from datetime import (date as datetime_date ,
@@ -27,6 +28,18 @@ def _getlang():
2728 # Figure out what the current language is set to.
2829 return locale .getlocale (locale .LC_TIME )
2930
31+ def _findall (haystack , needle ):
32+ # Find all positions of needle in haystack.
33+ if not needle :
34+ return
35+ i = 0
36+ while True :
37+ i = haystack .find (needle , i )
38+ if i < 0 :
39+ break
40+ yield i
41+ i += len (needle )
42+
3043class LocaleTime (object ):
3144 """Stores and handles locale-specific information related to time.
3245
@@ -101,7 +114,8 @@ def __calc_am_pm(self):
101114 am_pm = []
102115 for hour in (1 , 22 ):
103116 time_tuple = time .struct_time ((1999 ,3 ,17 ,hour ,44 ,55 ,2 ,76 ,0 ))
104- am_pm .append (time .strftime ("%p" , time_tuple ).lower ())
117+ # br_FR has AM/PM info (' ',' ').
118+ am_pm .append (time .strftime ("%p" , time_tuple ).lower ().strip ())
105119 self .am_pm = am_pm
106120
107121 def __calc_date_time (self ):
@@ -113,42 +127,130 @@ def __calc_date_time(self):
113127 # values within the format string is very important; it eliminates
114128 # possible ambiguity for what something represents.
115129 time_tuple = time .struct_time ((1999 ,3 ,17 ,22 ,44 ,55 ,2 ,76 ,0 ))
116- date_time = [None , None , None ]
117- date_time [0 ] = time .strftime ("%c" , time_tuple ).lower ()
118- date_time [1 ] = time .strftime ("%x" , time_tuple ).lower ()
119- date_time [2 ] = time .strftime ("%X" , time_tuple ).lower ()
120- replacement_pairs = [('%' , '%%' ), (self .f_weekday [2 ], '%A' ),
121- (self .f_month [3 ], '%B' ), (self .a_weekday [2 ], '%a' ),
122- (self .a_month [3 ], '%b' ), (self .am_pm [1 ], '%p' ),
123- ('1999' , '%Y' ), ('99' , '%y' ), ('22' , '%H' ),
124- ('44' , '%M' ), ('55' , '%S' ), ('76' , '%j' ),
125- ('17' , '%d' ), ('03' , '%m' ), ('3' , '%m' ),
126- # '3' needed for when no leading zero.
127- ('2' , '%w' ), ('10' , '%I' )]
128- replacement_pairs .extend ([(tz , "%Z" ) for tz_values in self .timezone
129- for tz in tz_values ])
130- for offset ,directive in ((0 ,'%c' ), (1 ,'%x' ), (2 ,'%X' )):
131- current_format = date_time [offset ]
132- for old , new in replacement_pairs :
130+ time_tuple2 = time .struct_time ((1999 ,1 ,3 ,1 ,1 ,1 ,6 ,3 ,0 ))
131+ replacement_pairs = [
132+ ('1999' , '%Y' ), ('99' , '%y' ), ('22' , '%H' ),
133+ ('44' , '%M' ), ('55' , '%S' ), ('76' , '%j' ),
134+ ('17' , '%d' ), ('03' , '%m' ), ('3' , '%m' ),
135+ # '3' needed for when no leading zero.
136+ ('2' , '%w' ), ('10' , '%I' ),
137+ # Non-ASCII digits
138+ ('\u0661 \u0669 \u0669 \u0669 ' , '%Y' ),
139+ ('\u0669 \u0669 ' , '%Oy' ),
140+ ('\u0662 \u0662 ' , '%OH' ),
141+ ('\u0664 \u0664 ' , '%OM' ),
142+ ('\u0665 \u0665 ' , '%OS' ),
143+ ('\u0661 \u0667 ' , '%Od' ),
144+ ('\u0660 \u0663 ' , '%Om' ),
145+ ('\u0663 ' , '%Om' ),
146+ ('\u0662 ' , '%Ow' ),
147+ ('\u0661 \u0660 ' , '%OI' ),
148+ ]
149+ date_time = []
150+ for directive in ('%c' , '%x' , '%X' ):
151+ current_format = time .strftime (directive , time_tuple ).lower ()
152+ current_format = current_format .replace ('%' , '%%' )
153+ # The month and the day of the week formats are treated specially
154+ # because of a possible ambiguity in some locales where the full
155+ # and abbreviated names are equal or names of different types
156+ # are equal. See doc of __find_month_format for more details.
157+ lst , fmt = self .__find_weekday_format (directive )
158+ if lst :
159+ current_format = current_format .replace (lst [2 ], fmt , 1 )
160+ lst , fmt = self .__find_month_format (directive )
161+ if lst :
162+ current_format = current_format .replace (lst [3 ], fmt , 1 )
163+ if self .am_pm [1 ]:
133164 # Must deal with possible lack of locale info
134165 # manifesting itself as the empty string (e.g., Swedish's
135166 # lack of AM/PM info) or a platform returning a tuple of empty
136167 # strings (e.g., MacOS 9 having timezone as ('','')).
137- if old :
138- current_format = current_format .replace (old , new )
168+ current_format = current_format .replace (self .am_pm [1 ], '%p' )
169+ for tz_values in self .timezone :
170+ for tz in tz_values :
171+ if tz :
172+ current_format = current_format .replace (tz , "%Z" )
173+ # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
174+ current_format = re_sub (r'\d(?<![0-9])' ,
175+ lambda m : chr (0x0660 + int (m [0 ])),
176+ current_format )
177+ for old , new in replacement_pairs :
178+ current_format = current_format .replace (old , new )
139179 # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
140180 # 2005-01-03 occurs before the first Monday of the year. Otherwise
141181 # %U is used.
142- time_tuple = time .struct_time ((1999 ,1 ,3 ,1 ,1 ,1 ,6 ,3 ,0 ))
143- if '00' in time .strftime (directive , time_tuple ):
182+ if '00' in time .strftime (directive , time_tuple2 ):
144183 U_W = '%W'
145184 else :
146185 U_W = '%U'
147- date_time [offset ] = current_format .replace ('11' , U_W )
186+ current_format = current_format .replace ('11' , U_W )
187+ date_time .append (current_format )
148188 self .LC_date_time = date_time [0 ]
149189 self .LC_date = date_time [1 ]
150190 self .LC_time = date_time [2 ]
151191
192+ def __find_month_format (self , directive ):
193+ """Find the month format appropriate for the current locale.
194+
195+ In some locales (for example French and Hebrew), the default month
196+ used in __calc_date_time has the same name in full and abbreviated
197+ form. Also, the month name can by accident match other part of the
198+ representation: the day of the week name (for example in Morisyen)
199+ or the month number (for example in Japanese). Thus, cycle months
200+ of the year and find all positions that match the month name for
201+ each month, If no common positions are found, the representation
202+ does not use the month name.
203+ """
204+ full_indices = abbr_indices = None
205+ for m in range (1 , 13 ):
206+ time_tuple = time .struct_time ((1999 , m , 17 , 22 , 44 , 55 , 2 , 76 , 0 ))
207+ datetime = time .strftime (directive , time_tuple ).lower ()
208+ indices = set (_findall (datetime , self .f_month [m ]))
209+ if full_indices is None :
210+ full_indices = indices
211+ else :
212+ full_indices &= indices
213+ indices = set (_findall (datetime , self .a_month [m ]))
214+ if abbr_indices is None :
215+ abbr_indices = indices
216+ else :
217+ abbr_indices &= indices
218+ if not full_indices and not abbr_indices :
219+ return None , None
220+ if full_indices :
221+ return self .f_month , '%B'
222+ if abbr_indices :
223+ return self .a_month , '%b'
224+ return None , None
225+
226+ def __find_weekday_format (self , directive ):
227+ """Find the day of the week format appropriate for the current locale.
228+
229+ Similar to __find_month_format().
230+ """
231+ full_indices = abbr_indices = None
232+ for wd in range (7 ):
233+ time_tuple = time .struct_time ((1999 , 3 , 17 , 22 , 44 , 55 , wd , 76 , 0 ))
234+ datetime = time .strftime (directive , time_tuple ).lower ()
235+ indices = set (_findall (datetime , self .f_weekday [wd ]))
236+ if full_indices is None :
237+ full_indices = indices
238+ else :
239+ full_indices &= indices
240+ if self .f_weekday [wd ] != self .a_weekday [wd ]:
241+ indices = set (_findall (datetime , self .a_weekday [wd ]))
242+ if abbr_indices is None :
243+ abbr_indices = indices
244+ else :
245+ abbr_indices &= indices
246+ if not full_indices and not abbr_indices :
247+ return None , None
248+ if full_indices :
249+ return self .f_weekday , '%A'
250+ if abbr_indices :
251+ return self .a_weekday , '%a'
252+ return None , None
253+
152254 def __calc_timezone (self ):
153255 # Set self.timezone by using time.tzname.
154256 # Do not worry about possibility of time.tzname[0] == time.tzname[1]
@@ -181,12 +283,12 @@ def __init__(self, locale_time=None):
181283 else :
182284 self .locale_time = LocaleTime ()
183285 base = super ()
184- base . __init__ ( {
286+ mapping = {
185287 # The " [1-9]" part of the regex is to make %c from ANSI C work
186288 'd' : r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])" ,
187289 'f' : r"(?P<f>[0-9]{1,6})" ,
188290 'H' : r"(?P<H>2[0-3]|[0-1]\d|\d)" ,
189- 'I' : r"(?P<I>1[0-2]|0[1-9]|[1-9])" ,
291+ 'I' : r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9] )" ,
190292 'G' : r"(?P<G>\d\d\d\d)" ,
191293 'j' : r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])" ,
192294 'm' : r"(?P<m>1[0-2]|0[1-9]|[1-9])" ,
@@ -210,11 +312,15 @@ def __init__(self, locale_time=None):
210312 'Z' : self .__seqToRE ((tz for tz_names in self .locale_time .timezone
211313 for tz in tz_names ),
212314 'Z' ),
213- '%' : '%' })
214- base .__setitem__ ('W' , base .__getitem__ ('U' ).replace ('U' , 'W' ))
215- base .__setitem__ ('c' , self .pattern (self .locale_time .LC_date_time ))
216- base .__setitem__ ('x' , self .pattern (self .locale_time .LC_date ))
315+ '%' : '%' }
316+ for d in 'dmyHIMS' :
317+ mapping ['O' + d ] = r'(?P<%s>\d\d|\d| \d)' % d
318+ mapping ['Ow' ] = r'(?P<w>\d)'
319+ mapping ['W' ] = mapping ['U' ].replace ('U' , 'W' )
320+ base .__init__ (mapping )
217321 base .__setitem__ ('X' , self .pattern (self .locale_time .LC_time ))
322+ base .__setitem__ ('x' , self .pattern (self .locale_time .LC_date ))
323+ base .__setitem__ ('c' , self .pattern (self .locale_time .LC_date_time ))
218324
219325 def __seqToRE (self , to_convert , directive ):
220326 """Convert a list to a regex string for matching a directive.
@@ -242,21 +348,16 @@ def pattern(self, format):
242348 regex syntax are escaped.
243349
244350 """
245- processed_format = ''
246351 # The sub() call escapes all characters that might be misconstrued
247352 # as regex syntax. Cannot use re.escape since we have to deal with
248353 # format directives (%m, etc.).
249- regex_chars = re_compile (r"([\\.^$*+?\(\){}\[\]|])" )
250- format = regex_chars .sub (r"\\\1" , format )
251- whitespace_replacement = re_compile (r'\s+' )
252- format = whitespace_replacement .sub (r'\\s+' , format )
253- while '%' in format :
254- directive_index = format .index ('%' )+ 1
255- processed_format = "%s%s%s" % (processed_format ,
256- format [:directive_index - 1 ],
257- self [format [directive_index ]])
258- format = format [directive_index + 1 :]
259- return "%s%s" % (processed_format , format )
354+ format = re_sub (r"([\\.^$*+?\(\){}\[\]|])" , r"\\\1" , format )
355+ format = re_sub (r'\s+' , r'\\s+' , format )
356+ format = re_sub (r"'" , "['\u02bc ]" , format ) # needed for br_FR
357+ def repl (m ):
358+ return self [m [1 ]]
359+ format = re_sub (r'%(O?.)' , repl , format )
360+ return format
260361
261362 def compile (self , format ):
262363 """Return a compiled re object for the format string."""
0 commit comments