Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / v8plus / lib / python2.4 / _strptime.py
CommitLineData
920dae64
AT
1"""Strptime-related classes and functions.
2
3CLASSES:
4 LocaleTime -- Discovers and stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information
7
8FUNCTIONS:
9 _getlang -- Figure out what language is being used for the locale
10 strptime -- Calculates the time struct represented by the passed-in string
11
12"""
13import time
14import locale
15import calendar
16from re import compile as re_compile
17from re import IGNORECASE
18from re import escape as re_escape
19from datetime import date as datetime_date
20try:
21 from thread import allocate_lock as _thread_allocate_lock
22except:
23 from dummy_thread import allocate_lock as _thread_allocate_lock
24
25__author__ = "Brett Cannon"
26__email__ = "brett@python.org"
27
28__all__ = ['strptime']
29
30def _getlang():
31 # Figure out what the current language is set to.
32 return locale.getlocale(locale.LC_TIME)
33
34class LocaleTime(object):
35 """Stores and handles locale-specific information related to time.
36
37 ATTRIBUTES:
38 f_weekday -- full weekday names (7-item list)
39 a_weekday -- abbreviated weekday names (7-item list)
40 f_month -- full month names (13-item list; dummy value in [0], which
41 is added by code)
42 a_month -- abbreviated month names (13-item list, dummy value in
43 [0], which is added by code)
44 am_pm -- AM/PM representation (2-item list)
45 LC_date_time -- format string for date/time representation (string)
46 LC_date -- format string for date representation (string)
47 LC_time -- format string for time representation (string)
48 timezone -- daylight- and non-daylight-savings timezone representation
49 (2-item list of sets)
50 lang -- Language used by instance (2-item tuple)
51 """
52
53 def __init__(self):
54 """Set all attributes.
55
56 Order of methods called matters for dependency reasons.
57
58 The locale language is set at the offset and then checked again before
59 exiting. This is to make sure that the attributes were not set with a
60 mix of information from more than one locale. This would most likely
61 happen when using threads where one thread calls a locale-dependent
62 function while another thread changes the locale while the function in
63 the other thread is still running. Proper coding would call for
64 locks to prevent changing the locale while locale-dependent code is
65 running. The check here is done in case someone does not think about
66 doing this.
67
68 Only other possible issue is if someone changed the timezone and did
69 not call tz.tzset . That is an issue for the programmer, though,
70 since changing the timezone is worthless without that call.
71
72 """
73 self.lang = _getlang()
74 self.__calc_weekday()
75 self.__calc_month()
76 self.__calc_am_pm()
77 self.__calc_timezone()
78 self.__calc_date_time()
79 if _getlang() != self.lang:
80 raise ValueError("locale changed during initialization")
81
82 def __pad(self, seq, front):
83 # Add '' to seq to either the front (is True), else the back.
84 seq = list(seq)
85 if front:
86 seq.insert(0, '')
87 else:
88 seq.append('')
89 return seq
90
91 def __calc_weekday(self):
92 # Set self.a_weekday and self.f_weekday using the calendar
93 # module.
94 a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
95 f_weekday = [calendar.day_name[i].lower() for i in range(7)]
96 self.a_weekday = a_weekday
97 self.f_weekday = f_weekday
98
99 def __calc_month(self):
100 # Set self.f_month and self.a_month using the calendar module.
101 a_month = [calendar.month_abbr[i].lower() for i in range(13)]
102 f_month = [calendar.month_name[i].lower() for i in range(13)]
103 self.a_month = a_month
104 self.f_month = f_month
105
106 def __calc_am_pm(self):
107 # Set self.am_pm by using time.strftime().
108
109 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
110 # magical; just happened to have used it everywhere else where a
111 # static date was needed.
112 am_pm = []
113 for hour in (01,22):
114 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
115 am_pm.append(time.strftime("%p", time_tuple).lower())
116 self.am_pm = am_pm
117
118 def __calc_date_time(self):
119 # Set self.date_time, self.date, & self.time by using
120 # time.strftime().
121
122 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
123 # overloaded numbers is minimized. The order in which searches for
124 # values within the format string is very important; it eliminates
125 # possible ambiguity for what something represents.
126 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
127 date_time = [None, None, None]
128 date_time[0] = time.strftime("%c", time_tuple).lower()
129 date_time[1] = time.strftime("%x", time_tuple).lower()
130 date_time[2] = time.strftime("%X", time_tuple).lower()
131 replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
132 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
133 (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
134 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
135 ('44', '%M'), ('55', '%S'), ('76', '%j'),
136 ('17', '%d'), ('03', '%m'), ('3', '%m'),
137 # '3' needed for when no leading zero.
138 ('2', '%w'), ('10', '%I')]
139 replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
140 for tz in tz_values])
141 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
142 current_format = date_time[offset]
143 for old, new in replacement_pairs:
144 # Must deal with possible lack of locale info
145 # manifesting itself as the empty string (e.g., Swedish's
146 # lack of AM/PM info) or a platform returning a tuple of empty
147 # strings (e.g., MacOS 9 having timezone as ('','')).
148 if old:
149 current_format = current_format.replace(old, new)
150 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
151 if '00' in time.strftime(directive, time_tuple):
152 U_W = '%W'
153 else:
154 U_W = '%U'
155 date_time[offset] = current_format.replace('11', U_W)
156 self.LC_date_time = date_time[0]
157 self.LC_date = date_time[1]
158 self.LC_time = date_time[2]
159
160 def __calc_timezone(self):
161 # Set self.timezone by using time.tzname.
162 # Do not worry about possibility of time.tzname[0] == timetzname[1]
163 # and time.daylight; handle that in strptime .
164 try:
165 time.tzset()
166 except AttributeError:
167 pass
168 no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
169 if time.daylight:
170 has_saving = frozenset([time.tzname[1].lower()])
171 else:
172 has_saving = frozenset()
173 self.timezone = (no_saving, has_saving)
174
175
176class TimeRE(dict):
177 """Handle conversion from format directives to regexes."""
178
179 def __init__(self, locale_time=None):
180 """Create keys/values.
181
182 Order of execution is important for dependency reasons.
183
184 """
185 if locale_time:
186 self.locale_time = locale_time
187 else:
188 self.locale_time = LocaleTime()
189 base = super(TimeRE, self)
190 base.__init__({
191 # The " \d" part of the regex is to make %c from ANSI C work
192 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
193 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
194 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
195 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
196 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
197 'M': r"(?P<M>[0-5]\d|\d)",
198 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
199 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
200 'w': r"(?P<w>[0-6])",
201 # W is set below by using 'U'
202 'y': r"(?P<y>\d\d)",
203 #XXX: Does 'Y' need to worry about having less or more than
204 # 4 digits?
205 'Y': r"(?P<Y>\d\d\d\d)",
206 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
207 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
208 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
209 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
210 'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
211 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
212 for tz in tz_names),
213 'Z'),
214 '%': '%'})
215 base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
216 base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
217 base.__setitem__('x', self.pattern(self.locale_time.LC_date))
218 base.__setitem__('X', self.pattern(self.locale_time.LC_time))
219
220 def __seqToRE(self, to_convert, directive):
221 """Convert a list to a regex string for matching a directive.
222
223 Want possible matching values to be from longest to shortest. This
224 prevents the possibility of a match occuring for a value that also
225 a substring of a larger value that should have matched (e.g., 'abc'
226 matching when 'abcdef' should have been the match).
227
228 """
229 to_convert = sorted(to_convert, key=len, reverse=True)
230 for value in to_convert:
231 if value != '':
232 break
233 else:
234 return ''
235 regex = '|'.join(re_escape(stuff) for stuff in to_convert)
236 regex = '(?P<%s>%s' % (directive, regex)
237 return '%s)' % regex
238
239 def pattern(self, format):
240 """Return regex pattern for the format string.
241
242 Need to make sure that any characters that might be interpreted as
243 regex syntax are escaped.
244
245 """
246 processed_format = ''
247 # The sub() call escapes all characters that might be misconstrued
248 # as regex syntax. Cannot use re.escape since we have to deal with
249 # format directives (%m, etc.).
250 regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
251 format = regex_chars.sub(r"\\\1", format)
252 whitespace_replacement = re_compile('\s+')
253 format = whitespace_replacement.sub('\s*', format)
254 while '%' in format:
255 directive_index = format.index('%')+1
256 processed_format = "%s%s%s" % (processed_format,
257 format[:directive_index-1],
258 self[format[directive_index]])
259 format = format[directive_index+1:]
260 return "%s%s" % (processed_format, format)
261
262 def compile(self, format):
263 """Return a compiled re object for the format string."""
264 return re_compile(self.pattern(format), IGNORECASE)
265
266_cache_lock = _thread_allocate_lock()
267# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
268# first!
269_TimeRE_cache = TimeRE()
270_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
271_regex_cache = {}
272
273def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
274 """Return a time struct based on the input string and the format string."""
275 global _TimeRE_cache, _regex_cache
276 _cache_lock.acquire()
277 try:
278 time_re = _TimeRE_cache
279 locale_time = time_re.locale_time
280 if _getlang() != locale_time.lang:
281 _TimeRE_cache = TimeRE()
282 _regex_cache = {}
283 if len(_regex_cache) > _CACHE_MAX_SIZE:
284 _regex_cache.clear()
285 format_regex = _regex_cache.get(format)
286 if not format_regex:
287 format_regex = time_re.compile(format)
288 _regex_cache[format] = format_regex
289 finally:
290 _cache_lock.release()
291 found = format_regex.match(data_string)
292 if not found:
293 raise ValueError("time data did not match format: data=%s fmt=%s" %
294 (data_string, format))
295 if len(data_string) != found.end():
296 raise ValueError("unconverted data remains: %s" %
297 data_string[found.end():])
298 year = 1900
299 month = day = 1
300 hour = minute = second = 0
301 tz = -1
302 # Default to -1 to signify that values not known; not critical to have,
303 # though
304 week_of_year = -1
305 week_of_year_start = -1
306 # weekday and julian defaulted to -1 so as to signal need to calculate
307 # values
308 weekday = julian = -1
309 found_dict = found.groupdict()
310 for group_key in found_dict.iterkeys():
311 # Directives not explicitly handled below:
312 # c, x, X
313 # handled by making out of other directives
314 # U, W
315 # worthless without day of the week
316 if group_key == 'y':
317 year = int(found_dict['y'])
318 # Open Group specification for strptime() states that a %y
319 #value in the range of [00, 68] is in the century 2000, while
320 #[69,99] is in the century 1900
321 if year <= 68:
322 year += 2000
323 else:
324 year += 1900
325 elif group_key == 'Y':
326 year = int(found_dict['Y'])
327 elif group_key == 'm':
328 month = int(found_dict['m'])
329 elif group_key == 'B':
330 month = locale_time.f_month.index(found_dict['B'].lower())
331 elif group_key == 'b':
332 month = locale_time.a_month.index(found_dict['b'].lower())
333 elif group_key == 'd':
334 day = int(found_dict['d'])
335 elif group_key == 'H':
336 hour = int(found_dict['H'])
337 elif group_key == 'I':
338 hour = int(found_dict['I'])
339 ampm = found_dict.get('p', '').lower()
340 # If there was no AM/PM indicator, we'll treat this like AM
341 if ampm in ('', locale_time.am_pm[0]):
342 # We're in AM so the hour is correct unless we're
343 # looking at 12 midnight.
344 # 12 midnight == 12 AM == hour 0
345 if hour == 12:
346 hour = 0
347 elif ampm == locale_time.am_pm[1]:
348 # We're in PM so we need to add 12 to the hour unless
349 # we're looking at 12 noon.
350 # 12 noon == 12 PM == hour 12
351 if hour != 12:
352 hour += 12
353 elif group_key == 'M':
354 minute = int(found_dict['M'])
355 elif group_key == 'S':
356 second = int(found_dict['S'])
357 elif group_key == 'A':
358 weekday = locale_time.f_weekday.index(found_dict['A'].lower())
359 elif group_key == 'a':
360 weekday = locale_time.a_weekday.index(found_dict['a'].lower())
361 elif group_key == 'w':
362 weekday = int(found_dict['w'])
363 if weekday == 0:
364 weekday = 6
365 else:
366 weekday -= 1
367 elif group_key == 'j':
368 julian = int(found_dict['j'])
369 elif group_key in ('U', 'W'):
370 week_of_year = int(found_dict[group_key])
371 if group_key == 'U':
372 # U starts week on Sunday
373 week_of_year_start = 6
374 else:
375 # W starts week on Monday
376 week_of_year_start = 0
377 elif group_key == 'Z':
378 # Since -1 is default value only need to worry about setting tz if
379 # it can be something other than -1.
380 found_zone = found_dict['Z'].lower()
381 for value, tz_values in enumerate(locale_time.timezone):
382 if found_zone in tz_values:
383 # Deal with bad locale setup where timezone names are the
384 # same and yet time.daylight is true; too ambiguous to
385 # be able to tell what timezone has daylight savings
386 if (time.tzname[0] == time.tzname[1] and
387 time.daylight and found_zone not in ("utc", "gmt")):
388 break
389 else:
390 tz = value
391 break
392 # If we know the week of the year and what day of that week, we can figure
393 # out the Julian day of the year
394 # Calculations below assume 0 is a Monday
395 if julian == -1 and week_of_year != -1 and weekday != -1:
396 # Calculate how many days in week 0
397 first_weekday = datetime_date(year, 1, 1).weekday()
398 preceeding_days = 7 - first_weekday
399 if preceeding_days == 7:
400 preceeding_days = 0
401 # Adjust for U directive so that calculations are not dependent on
402 # directive used to figure out week of year
403 if weekday == 6 and week_of_year_start == 6:
404 week_of_year -= 1
405 # If a year starts and ends on a Monday but a week is specified to
406 # start on a Sunday we need to up the week to counter-balance the fact
407 # that with %W that first Monday starts week 1 while with %U that is
408 # week 0 and thus shifts everything by a week
409 if weekday == 0 and first_weekday == 0 and week_of_year_start == 6:
410 week_of_year += 1
411 # If in week 0, then just figure out how many days from Jan 1 to day of
412 # week specified, else calculate by multiplying week of year by 7,
413 # adding in days in week 0, and the number of days from Monday to the
414 # day of the week
415 if week_of_year == 0:
416 julian = 1 + weekday - first_weekday
417 else:
418 days_to_week = preceeding_days + (7 * (week_of_year - 1))
419 julian = 1 + days_to_week + weekday
420 # Cannot pre-calculate datetime_date() since can change in Julian
421 #calculation and thus could have different value for the day of the week
422 #calculation
423 if julian == -1:
424 # Need to add 1 to result since first day of the year is 1, not 0.
425 julian = datetime_date(year, month, day).toordinal() - \
426 datetime_date(year, 1, 1).toordinal() + 1
427 else: # Assume that if they bothered to include Julian day it will
428 #be accurate
429 datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
430 year = datetime_result.year
431 month = datetime_result.month
432 day = datetime_result.day
433 if weekday == -1:
434 weekday = datetime_date(year, month, day).weekday()
435 return time.struct_time((year, month, day,
436 hour, minute, second,
437 weekday, julian, tz))