From 030f89bf7a7877224e7095fdf34f898354b25f63 Mon Sep 17 00:00:00 2001 From: sebres Date: Fri, 9 Jun 2017 20:29:34 +0200 Subject: [PATCH] Implemented zone abbreviations (DST, etc.) and abbr+-offset functionality (accept zones like 'CET+0100'), for the list of abbreviations see strptime.TZ_STR; Tokens `%z` and `%Z` are more precise now; Introduced new tokens `%Exz` and `%ExZ` that fully support zone abbreviations and/or offset-based zones; # TODO: because python currently does not support mixing of case-sensitive with case-insensitive matching, # check how TZ (in uppercase) can be combined with %a/%b etc. (that are currently case-insensitive), # to avoid invalid date-time recognition in strings like '11-Aug-2013 03:36:11.372 error ...' # with wrong TZ "error", which is at least not backwards compatible. # Hence %z currently match literal Z|UTC|GMT only (and offset-based), and %Exz - all zone abbreviations. --- fail2ban/server/strptime.py | 114 ++++++++++++++++++++----- fail2ban/tests/datedetectortestcase.py | 25 +++++- 2 files changed, 116 insertions(+), 23 deletions(-) diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index 2da27c58..dbf75d21 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -26,8 +26,9 @@ from _strptime import LocaleTime, TimeRE, _calc_julian_from_U_or_W from .mytime import MyTime locale_time = LocaleTime() -timeRE = TimeRE() -FIXED_OFFSET_TZ_RE = re.compile(r'(?:Z|UTC|GMT)?([+-]\d{2}(?::?\d{2})?)?$') + +TZ_ABBR_RE = r"[A-Z](?:[A-Z]{2,4})?" +FIXED_OFFSET_TZ_RE = re.compile(r"(%s)?([+-][01]\d(?::?\d{2})?)?$" % (TZ_ABBR_RE,)) def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNow)): """ Build century regex for last year and the next years (distance). @@ -40,10 +41,20 @@ def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNo exprset |= set( cent(now[1].year + i) for i in (-1, distance) ) return "(?:%s)" % "|".join(exprset) if len(exprset) > 1 else "".join(exprset) -#todo: implement literal time zone support like CET, PST, PDT, etc (via pytz): -#timeRE['z'] = r"%s?(?PZ|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z'] -timeRE['Z'] = r"(?P[A-Z]{3,5})" -timeRE['z'] = r"(?PZ|UTC|GMT|[+-]\d{2}(?::?[0-5]\d)?)" +timeRE = TimeRE() + +# TODO: because python currently does not support mixing of case-sensitive with case-insensitive matching, +# check how TZ (in uppercase) can be combined with %a/%b etc. (that are currently case-insensitive), +# to avoid invalid date-time recognition in strings like '11-Aug-2013 03:36:11.372 error ...' +# with wrong TZ "error", which is at least not backwards compatible. +# Hence %z currently match literal Z|UTC|GMT only (and offset-based), and %Exz - all zone abbreviations. +timeRE['Z'] = r"(?PZ|[A-Z]{3,5})" +timeRE['z'] = r"(?PZ|UTC|GMT|[+-][01]\d(?::?\d{2})?)" + +# Note: this extended tokens supported zone abbreviations, but it can parse 1 or 3-5 char(s) in lowercase, +# see todo above. Don't use them in default date-patterns (if not anchored, few precise resp. optional). +timeRE['ExZ'] = r"(?P%s)" % (TZ_ABBR_RE,) +timeRE['Exz'] = r"(?P(?:%s)?[+-][01]\d(?::?\d{2})?|%s)" % (TZ_ABBR_RE, TZ_ABBR_RE) # Extend build-in TimeRE with some exact patterns # exact two-digit patterns: @@ -82,20 +93,22 @@ def getTimePatternRE(): def validateTimeZone(tz): - """Validate a timezone. + """Validate a timezone and convert it to offset if it can (offset-based TZ). - For now this accepts only the UTC[+-]hhmm format (UTC has aliases GMT/Z and optional). + For now this accepts the UTC[+-]hhmm format (UTC has aliases GMT/Z and optional). + Additionally it accepts all zone abbreviations mentioned below in TZ_STR. + Note that currently this zone abbreviations are offset-based and used fixed + offset without automatically DST-switch (if CET used then no automatically CEST-switch). + In the future, it may be extended for named time zones (such as Europe/Paris) - present on the system, if a suitable tz library is present. + present on the system, if a suitable tz library is present (pytz). """ if tz is None: return None m = FIXED_OFFSET_TZ_RE.match(tz) if m is None: raise ValueError("Unknown or unsupported time zone: %r" % tz) - tz = m.group(1) - if tz is None or tz == '': # UTC/GMT - return 0; # fixed zero offzet + tz = m.groups() return zone2offset(tz, 0) def zone2offset(tz, dt): @@ -103,21 +116,29 @@ def zone2offset(tz, dt): Parameters ---------- - tz: symbolic timezone or offset (for now only [+-]hhmm is supported, and it's assumed to have - been validated already) - dt: datetime instance for offset computation + tz: symbolic timezone or offset (for now only TZA?([+-]hh:?mm?)? is supported, + as value are accepted: + int offset; + string in form like 'CET+0100' or 'UTC' or '-0400'; + tuple (or list) in form (zone name, zone offset); + dt: datetime instance for offset computation (currently unused) """ if isinstance(tz, int): return tz - if len(tz) <= 3: # short tz (hh only) + if isinstance(tz, basestring): + return validateTimeZone(tz) + tz, tzo = tz + if tzo is None or tzo == '': # without offset + return TZ_ABBR_OFFS[tz] + if len(tzo) <= 3: # short tzo (hh only) # [+-]hh --> [+-]hh*60 - return int(tz)*60 - if tz[3] != ':': + return TZ_ABBR_OFFS[tz] + int(tzo)*60 + if tzo[3] != ':': # [+-]hhmm --> [+-]1 * (hh*60 + mm) - return (-1 if tz[0] == '-' else 1) * (int(tz[1:3])*60 + int(tz[3:5])) + return TZ_ABBR_OFFS[tz] + (-1 if tzo[0] == '-' else 1) * (int(tzo[1:3])*60 + int(tzo[3:5])) else: # [+-]hh:mm --> [+-]1 * (hh*60 + mm) - return (-1 if tz[0] == '-' else 1) * (int(tz[1:3])*60 + int(tz[4:6])) + return TZ_ABBR_OFFS[tz] + (-1 if tzo[0] == '-' else 1) * (int(tzo[1:3])*60 + int(tzo[4:6])) def reGroupDictStrptime(found_dict, msec=False, default_tz=None): """Return time from dictionary of strptime fields @@ -275,3 +296,56 @@ def reGroupDictStrptime(found_dict, msec=False, default_tz=None): if msec: # pragma: no cover - currently unused tm += fraction/1000000.0 return tm + + +TZ_ABBR_OFFS = {'':0, None:0} +TZ_STR = ''' + -12 Y + -11 X NUT SST + -10 W CKT HAST HST TAHT TKT + -9 V AKST GAMT GIT HADT HNY + -8 U AKDT CIST HAY HNP PST PT + -7 T HAP HNR MST PDT + -6 S CST EAST GALT HAR HNC MDT + -5 R CDT COT EASST ECT EST ET HAC HNE PET + -4 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT + -3 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT + -2 O BRST FNT PMDT UYST WGST + -1 N AZOT CVT EGT + 0 Z EGST GMT UTC WET WT + 1 A CET DFT WAT WEDT WEST + 2 B CAT CEDT CEST EET SAST WAST + 3 C EAT EEDT EEST IDT MSK + 4 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT + 5 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT + 6 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST + 7 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB + 8 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST + 9 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT + 10 K AEST ChST PGT VLAT YAKST YAPT + 11 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT + 12 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT + 13 FJST NZDT + 11.5 NFT + 10.5 ACDT LHST + 9.5 ACST + 6.5 CCT MMT + 5.75 NPT + 5.5 SLT + 4.5 AFT IRDT + 3.5 IRST + -2.5 HAT NDT + -3.5 HNT NST NT + -4.5 HLV VET + -9.5 MART MIT +''' + +def _init_TZ_ABBR(): + """Initialized TZ_ABBR_OFFS dictionary (TZ -> offset in minutes)""" + for tzline in map(str.split, TZ_STR.split('\n')): + if not len(tzline): continue + tzoffset = int(float(tzline[0]) * 60) + for tz in tzline[1:]: + TZ_ABBR_OFFS[tz] = tzoffset + +_init_TZ_ABBR() diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 56970ac5..02facf30 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -90,16 +90,32 @@ class DateDetectorTest(LogCaptureTestCase): self.assertEqual(matchlog.group(1), 'Jan 23 21:59:59') def testDefaultTimeZone(self): - dd = self.datedetector + # use special date-pattern (with %Exz), because %z currently does not supported + # zone abbreviations except Z|UTC|GMT. + dd = DateDetector() + dd.appendTemplate('^%ExY-%Exm-%Exd %H:%M:%S(?: ?%Exz)?') dt = datetime.datetime logdt = "2017-01-23 15:00:00" dtUTC = dt(2017, 1, 23, 15, 0) for tz, log, desired in ( + # no TZ in input-string: ('UTC+0300', logdt, dt(2017, 1, 23, 12, 0)), # so in UTC, it was noon ('UTC', logdt, dtUTC), # UTC ('UTC-0430', logdt, dt(2017, 1, 23, 19, 30)), ('GMT+12', logdt, dt(2017, 1, 23, 3, 0)), (None, logdt, dt(2017, 1, 23, 14, 0)), # default CET in our test-framework + # CET: + ('CET', logdt, dt(2017, 1, 23, 14, 0)), + ('+0100', logdt, dt(2017, 1, 23, 14, 0)), + ('CEST-01', logdt, dt(2017, 1, 23, 14, 0)), + # CEST: + ('CEST', logdt, dt(2017, 1, 23, 13, 0)), + ('+0200', logdt, dt(2017, 1, 23, 13, 0)), + ('CET+01', logdt, dt(2017, 1, 23, 13, 0)), + ('CET+0100', logdt, dt(2017, 1, 23, 13, 0)), + # check offset in minutes: + ('CET+0130', logdt, dt(2017, 1, 23, 12, 30)), + # TZ in input-string have precedence: ('UTC+0300', logdt+' GMT', dtUTC), # GMT wins ('UTC', logdt+' GMT', dtUTC), # GMT wins ('UTC-0430', logdt+' GMT', dtUTC), # GMT wins @@ -108,13 +124,16 @@ class DateDetectorTest(LogCaptureTestCase): (None, logdt+' -10:45', dt(2017, 1, 24, 1, 45)), # -1045 wins ('UTC', logdt+' +0945', dt(2017, 1, 23, 5, 15)), # +0945 wins (None, logdt+' +09:45', dt(2017, 1, 23, 5, 15)), # +0945 wins - (None, logdt+' Z', dtUTC), # Z wins (UTC) + ('UTC+0300', logdt+' Z', dtUTC), # Z wins (UTC) + ('GMT+12', logdt+' CET', dt(2017, 1, 23, 14, 0)), # CET wins + ('GMT+12', logdt+' CEST', dt(2017, 1, 23, 13, 0)), # CEST wins + ('GMT+12', logdt+' CET+0130', dt(2017, 1, 23, 12, 30)), # CET+0130 wins ): logSys.debug('== test %r with TZ %r', log, tz) dd.default_tz=tz; datelog, _ = dd.getTime(log) val = dt.utcfromtimestamp(datelog) self.assertEqual(val, desired, - "wrong offset %r != %r by %r with TZ %r (%r)" % (val, desired, log, tz, dd.default_tz)) + "wrong offset %r != %r by %r with default TZ %r (%r)" % (val, desired, log, tz, dd.default_tz)) self.assertRaises(ValueError, setattr, dd, 'default_tz', 'WRONG-TZ') dd.default_tz = None