Implemented zone abbreviations (DST, etc.) and abbr+-offset functionality (accept zones like 'CET+0100'), for the list of abbreviations see strptime.TZ_STR;

Tokens `%z` and `%Z` are more precise now;
Introduced new tokens `%Exz` and `%ExZ` that fully support zone abbreviations and/or offset-based zones;

# TODO: because python currently does not support mixing of case-sensitive with case-insensitive matching,
#       check how TZ (in uppercase) can be combined with %a/%b etc. (that are currently case-insensitive),
#       to avoid invalid date-time recognition in strings like '11-Aug-2013 03:36:11.372 error ...'
#       with wrong TZ "error", which is at least not backwards compatible.
#       Hence %z currently match literal Z|UTC|GMT only (and offset-based), and %Exz - all zone abbreviations.
pull/1792/head
sebres 2017-06-09 20:29:34 +02:00
parent 39c4acf6bd
commit 030f89bf7a
2 changed files with 116 additions and 23 deletions

View File

@ -26,8 +26,9 @@ from _strptime import LocaleTime, TimeRE, _calc_julian_from_U_or_W
from .mytime import MyTime
locale_time = LocaleTime()
timeRE = TimeRE()
FIXED_OFFSET_TZ_RE = re.compile(r'(?:Z|UTC|GMT)?([+-]\d{2}(?::?\d{2})?)?$')
TZ_ABBR_RE = r"[A-Z](?:[A-Z]{2,4})?"
FIXED_OFFSET_TZ_RE = re.compile(r"(%s)?([+-][01]\d(?::?\d{2})?)?$" % (TZ_ABBR_RE,))
def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNow)):
""" Build century regex for last year and the next years (distance).
@ -40,10 +41,20 @@ def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNo
exprset |= set( cent(now[1].year + i) for i in (-1, distance) )
return "(?:%s)" % "|".join(exprset) if len(exprset) > 1 else "".join(exprset)
#todo: implement literal time zone support like CET, PST, PDT, etc (via pytz):
#timeRE['z'] = r"%s?(?P<z>Z|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z']
timeRE['Z'] = r"(?P<Z>[A-Z]{3,5})"
timeRE['z'] = r"(?P<z>Z|UTC|GMT|[+-]\d{2}(?::?[0-5]\d)?)"
timeRE = TimeRE()
# TODO: because python currently does not support mixing of case-sensitive with case-insensitive matching,
# check how TZ (in uppercase) can be combined with %a/%b etc. (that are currently case-insensitive),
# to avoid invalid date-time recognition in strings like '11-Aug-2013 03:36:11.372 error ...'
# with wrong TZ "error", which is at least not backwards compatible.
# Hence %z currently match literal Z|UTC|GMT only (and offset-based), and %Exz - all zone abbreviations.
timeRE['Z'] = r"(?P<Z>Z|[A-Z]{3,5})"
timeRE['z'] = r"(?P<z>Z|UTC|GMT|[+-][01]\d(?::?\d{2})?)"
# Note: this extended tokens supported zone abbreviations, but it can parse 1 or 3-5 char(s) in lowercase,
# see todo above. Don't use them in default date-patterns (if not anchored, few precise resp. optional).
timeRE['ExZ'] = r"(?P<Z>%s)" % (TZ_ABBR_RE,)
timeRE['Exz'] = r"(?P<z>(?:%s)?[+-][01]\d(?::?\d{2})?|%s)" % (TZ_ABBR_RE, TZ_ABBR_RE)
# Extend build-in TimeRE with some exact patterns
# exact two-digit patterns:
@ -82,20 +93,22 @@ def getTimePatternRE():
def validateTimeZone(tz):
"""Validate a timezone.
"""Validate a timezone and convert it to offset if it can (offset-based TZ).
For now this accepts only the UTC[+-]hhmm format (UTC has aliases GMT/Z and optional).
For now this accepts the UTC[+-]hhmm format (UTC has aliases GMT/Z and optional).
Additionally it accepts all zone abbreviations mentioned below in TZ_STR.
Note that currently this zone abbreviations are offset-based and used fixed
offset without automatically DST-switch (if CET used then no automatically CEST-switch).
In the future, it may be extended for named time zones (such as Europe/Paris)
present on the system, if a suitable tz library is present.
present on the system, if a suitable tz library is present (pytz).
"""
if tz is None:
return None
m = FIXED_OFFSET_TZ_RE.match(tz)
if m is None:
raise ValueError("Unknown or unsupported time zone: %r" % tz)
tz = m.group(1)
if tz is None or tz == '': # UTC/GMT
return 0; # fixed zero offzet
tz = m.groups()
return zone2offset(tz, 0)
def zone2offset(tz, dt):
@ -103,21 +116,29 @@ def zone2offset(tz, dt):
Parameters
----------
tz: symbolic timezone or offset (for now only [+-]hhmm is supported, and it's assumed to have
been validated already)
dt: datetime instance for offset computation
tz: symbolic timezone or offset (for now only TZA?([+-]hh:?mm?)? is supported,
as value are accepted:
int offset;
string in form like 'CET+0100' or 'UTC' or '-0400';
tuple (or list) in form (zone name, zone offset);
dt: datetime instance for offset computation (currently unused)
"""
if isinstance(tz, int):
return tz
if len(tz) <= 3: # short tz (hh only)
if isinstance(tz, basestring):
return validateTimeZone(tz)
tz, tzo = tz
if tzo is None or tzo == '': # without offset
return TZ_ABBR_OFFS[tz]
if len(tzo) <= 3: # short tzo (hh only)
# [+-]hh --> [+-]hh*60
return int(tz)*60
if tz[3] != ':':
return TZ_ABBR_OFFS[tz] + int(tzo)*60
if tzo[3] != ':':
# [+-]hhmm --> [+-]1 * (hh*60 + mm)
return (-1 if tz[0] == '-' else 1) * (int(tz[1:3])*60 + int(tz[3:5]))
return TZ_ABBR_OFFS[tz] + (-1 if tzo[0] == '-' else 1) * (int(tzo[1:3])*60 + int(tzo[3:5]))
else:
# [+-]hh:mm --> [+-]1 * (hh*60 + mm)
return (-1 if tz[0] == '-' else 1) * (int(tz[1:3])*60 + int(tz[4:6]))
return TZ_ABBR_OFFS[tz] + (-1 if tzo[0] == '-' else 1) * (int(tzo[1:3])*60 + int(tzo[4:6]))
def reGroupDictStrptime(found_dict, msec=False, default_tz=None):
"""Return time from dictionary of strptime fields
@ -275,3 +296,56 @@ def reGroupDictStrptime(found_dict, msec=False, default_tz=None):
if msec: # pragma: no cover - currently unused
tm += fraction/1000000.0
return tm
TZ_ABBR_OFFS = {'':0, None:0}
TZ_STR = '''
-12 Y
-11 X NUT SST
-10 W CKT HAST HST TAHT TKT
-9 V AKST GAMT GIT HADT HNY
-8 U AKDT CIST HAY HNP PST PT
-7 T HAP HNR MST PDT
-6 S CST EAST GALT HAR HNC MDT
-5 R CDT COT EASST ECT EST ET HAC HNE PET
-4 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT
-3 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT
-2 O BRST FNT PMDT UYST WGST
-1 N AZOT CVT EGT
0 Z EGST GMT UTC WET WT
1 A CET DFT WAT WEDT WEST
2 B CAT CEDT CEST EET SAST WAST
3 C EAT EEDT EEST IDT MSK
4 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT
5 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT
6 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST
7 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB
8 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST
9 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT
10 K AEST ChST PGT VLAT YAKST YAPT
11 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT
12 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT
13 FJST NZDT
11.5 NFT
10.5 ACDT LHST
9.5 ACST
6.5 CCT MMT
5.75 NPT
5.5 SLT
4.5 AFT IRDT
3.5 IRST
-2.5 HAT NDT
-3.5 HNT NST NT
-4.5 HLV VET
-9.5 MART MIT
'''
def _init_TZ_ABBR():
"""Initialized TZ_ABBR_OFFS dictionary (TZ -> offset in minutes)"""
for tzline in map(str.split, TZ_STR.split('\n')):
if not len(tzline): continue
tzoffset = int(float(tzline[0]) * 60)
for tz in tzline[1:]:
TZ_ABBR_OFFS[tz] = tzoffset
_init_TZ_ABBR()

View File

@ -90,16 +90,32 @@ class DateDetectorTest(LogCaptureTestCase):
self.assertEqual(matchlog.group(1), 'Jan 23 21:59:59')
def testDefaultTimeZone(self):
dd = self.datedetector
# use special date-pattern (with %Exz), because %z currently does not supported
# zone abbreviations except Z|UTC|GMT.
dd = DateDetector()
dd.appendTemplate('^%ExY-%Exm-%Exd %H:%M:%S(?: ?%Exz)?')
dt = datetime.datetime
logdt = "2017-01-23 15:00:00"
dtUTC = dt(2017, 1, 23, 15, 0)
for tz, log, desired in (
# no TZ in input-string:
('UTC+0300', logdt, dt(2017, 1, 23, 12, 0)), # so in UTC, it was noon
('UTC', logdt, dtUTC), # UTC
('UTC-0430', logdt, dt(2017, 1, 23, 19, 30)),
('GMT+12', logdt, dt(2017, 1, 23, 3, 0)),
(None, logdt, dt(2017, 1, 23, 14, 0)), # default CET in our test-framework
# CET:
('CET', logdt, dt(2017, 1, 23, 14, 0)),
('+0100', logdt, dt(2017, 1, 23, 14, 0)),
('CEST-01', logdt, dt(2017, 1, 23, 14, 0)),
# CEST:
('CEST', logdt, dt(2017, 1, 23, 13, 0)),
('+0200', logdt, dt(2017, 1, 23, 13, 0)),
('CET+01', logdt, dt(2017, 1, 23, 13, 0)),
('CET+0100', logdt, dt(2017, 1, 23, 13, 0)),
# check offset in minutes:
('CET+0130', logdt, dt(2017, 1, 23, 12, 30)),
# TZ in input-string have precedence:
('UTC+0300', logdt+' GMT', dtUTC), # GMT wins
('UTC', logdt+' GMT', dtUTC), # GMT wins
('UTC-0430', logdt+' GMT', dtUTC), # GMT wins
@ -108,13 +124,16 @@ class DateDetectorTest(LogCaptureTestCase):
(None, logdt+' -10:45', dt(2017, 1, 24, 1, 45)), # -1045 wins
('UTC', logdt+' +0945', dt(2017, 1, 23, 5, 15)), # +0945 wins
(None, logdt+' +09:45', dt(2017, 1, 23, 5, 15)), # +0945 wins
(None, logdt+' Z', dtUTC), # Z wins (UTC)
('UTC+0300', logdt+' Z', dtUTC), # Z wins (UTC)
('GMT+12', logdt+' CET', dt(2017, 1, 23, 14, 0)), # CET wins
('GMT+12', logdt+' CEST', dt(2017, 1, 23, 13, 0)), # CEST wins
('GMT+12', logdt+' CET+0130', dt(2017, 1, 23, 12, 30)), # CET+0130 wins
):
logSys.debug('== test %r with TZ %r', log, tz)
dd.default_tz=tz; datelog, _ = dd.getTime(log)
val = dt.utcfromtimestamp(datelog)
self.assertEqual(val, desired,
"wrong offset %r != %r by %r with TZ %r (%r)" % (val, desired, log, tz, dd.default_tz))
"wrong offset %r != %r by %r with default TZ %r (%r)" % (val, desired, log, tz, dd.default_tz))
self.assertRaises(ValueError, setattr, dd, 'default_tz', 'WRONG-TZ')
dd.default_tz = None