From 030f89bf7a7877224e7095fdf34f898354b25f63 Mon Sep 17 00:00:00 2001
From: sebres <serg.brester@sebres.de>
Date: Fri, 9 Jun 2017 20:29:34 +0200
Subject: [PATCH] Implemented zone abbreviations (DST, etc.) and abbr+-offset
 functionality (accept zones like 'CET+0100'), for the list of abbreviations
 see strptime.TZ_STR; Tokens `%z` and `%Z` are more precise now; Introduced
 new tokens `%Exz` and `%ExZ` that fully support zone abbreviations and/or
 offset-based zones;

# TODO: because python currently does not support mixing of case-sensitive with case-insensitive matching,
#       check how TZ (in uppercase) can be combined with %a/%b etc. (that are currently case-insensitive),
#       to avoid invalid date-time recognition in strings like '11-Aug-2013 03:36:11.372 error ...'
#       with wrong TZ "error", which is at least not backwards compatible.
#       Hence %z currently match literal Z|UTC|GMT only (and offset-based), and %Exz - all zone abbreviations.
---
 fail2ban/server/strptime.py            | 114 ++++++++++++++++++++-----
 fail2ban/tests/datedetectortestcase.py |  25 +++++-
 2 files changed, 116 insertions(+), 23 deletions(-)
diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py
index 2da27c58..dbf75d21 100644
--- a/fail2ban/server/strptime.py
+++ b/fail2ban/server/strptime.py
@@ -26,8 +26,9 @@ from _strptime import LocaleTime, TimeRE, _calc_julian_from_U_or_W
 from .mytime import MyTime
 
 locale_time = LocaleTime()
-timeRE = TimeRE()
-FIXED_OFFSET_TZ_RE = re.compile(r'(?:Z|UTC|GMT)?([+-]\d{2}(?::?\d{2})?)?$')
+
+TZ_ABBR_RE = r"[A-Z](?:[A-Z]{2,4})?"
+FIXED_OFFSET_TZ_RE = re.compile(r"(%s)?([+-][01]\d(?::?\d{2})?)?$" % (TZ_ABBR_RE,))
 
 def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNow)):
 	""" Build century regex for last year and the next years (distance).
@@ -40,10 +41,20 @@ def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNo
 		exprset |= set( cent(now[1].year + i) for i in (-1, distance) )
 	return "(?:%s)" % "|".join(exprset) if len(exprset) > 1 else "".join(exprset)
 
-#todo: implement literal time zone support like CET, PST, PDT, etc (via pytz):
-#timeRE['z'] = r"%s?(?P<z>Z|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z']
-timeRE['Z'] = r"(?P<Z>[A-Z]{3,5})"
-timeRE['z'] = r"(?P<z>Z|UTC|GMT|[+-]\d{2}(?::?[0-5]\d)?)"
+timeRE = TimeRE()
+
+# TODO: because python currently does not support mixing of case-sensitive with case-insensitive matching,
+#       check how TZ (in uppercase) can be combined with %a/%b etc. (that are currently case-insensitive), 
+#       to avoid invalid date-time recognition in strings like '11-Aug-2013 03:36:11.372 error ...' 
+#       with wrong TZ "error", which is at least not backwards compatible.
+#       Hence %z currently match literal Z|UTC|GMT only (and offset-based), and %Exz - all zone abbreviations.
+timeRE['Z'] = r"(?P<Z>Z|[A-Z]{3,5})"
+timeRE['z'] = r"(?P<z>Z|UTC|GMT|[+-][01]\d(?::?\d{2})?)"
+
+# Note: this extended tokens supported zone abbreviations, but it can parse 1 or 3-5 char(s) in lowercase,
+#       see todo above. Don't use them in default date-patterns (if not anchored, few precise resp. optional).
+timeRE['ExZ'] = r"(?P<Z>%s)" % (TZ_ABBR_RE,)
+timeRE['Exz'] = r"(?P<z>(?:%s)?[+-][01]\d(?::?\d{2})?|%s)" % (TZ_ABBR_RE, TZ_ABBR_RE)
 
 # Extend build-in TimeRE with some exact patterns
 # exact two-digit patterns:
@@ -82,20 +93,22 @@ def getTimePatternRE():
 
 
 def validateTimeZone(tz):
-	"""Validate a timezone.
+	"""Validate a timezone and convert it to offset if it can (offset-based TZ).
 
-	For now this accepts only the UTC[+-]hhmm format (UTC has aliases GMT/Z and optional).
+	For now this accepts the UTC[+-]hhmm format (UTC has aliases GMT/Z and optional).
+	Additionally it accepts all zone abbreviations mentioned below in TZ_STR.
+	Note that currently this zone abbreviations are offset-based and used fixed
+	offset without automatically DST-switch (if CET used then no automatically CEST-switch).
+	
 	In the future, it may be extended for named time zones (such as Europe/Paris)
-        present on the system, if a suitable tz library is present.
+	present on the system, if a suitable tz library is present (pytz).
 	"""
 	if tz is None:
 		return None
 	m = FIXED_OFFSET_TZ_RE.match(tz)
 	if m is None:
 		raise ValueError("Unknown or unsupported time zone: %r" % tz)
-	tz = m.group(1)
-	if tz is None or tz == '': # UTC/GMT
-		return 0; # fixed zero offzet
+	tz = m.groups()
 	return zone2offset(tz, 0)
 
 def zone2offset(tz, dt):
@@ -103,21 +116,29 @@ def zone2offset(tz, dt):
 
 	Parameters
 	----------
-	tz: symbolic timezone or offset (for now only [+-]hhmm is supported, and it's assumed to have
-		been validated already)
-	dt: datetime instance for offset computation
+	tz: symbolic timezone or offset (for now only TZA?([+-]hh:?mm?)? is supported,
+		as value are accepted:
+		  int offset;
+		  string in form like 'CET+0100' or 'UTC' or '-0400';
+		  tuple (or list) in form (zone name, zone offset);
+	dt: datetime instance for offset computation (currently unused)
 	"""
 	if isinstance(tz, int):
 		return tz
-	if len(tz) <= 3: # short tz (hh only)
+	if isinstance(tz, basestring):
+		return validateTimeZone(tz)
+	tz, tzo = tz
+	if tzo is None or tzo == '': # without offset
+		return TZ_ABBR_OFFS[tz]
+	if len(tzo) <= 3: # short tzo (hh only)
 		# [+-]hh --> [+-]hh*60
-		return int(tz)*60
-	if tz[3] != ':':
+		return TZ_ABBR_OFFS[tz] + int(tzo)*60
+	if tzo[3] != ':':
 		# [+-]hhmm --> [+-]1 * (hh*60 + mm)
-		return (-1 if tz[0] == '-' else 1) * (int(tz[1:3])*60 + int(tz[3:5]))
+		return TZ_ABBR_OFFS[tz] + (-1 if tzo[0] == '-' else 1) * (int(tzo[1:3])*60 + int(tzo[3:5]))
 	else:
 		# [+-]hh:mm --> [+-]1 * (hh*60 + mm)
-		return (-1 if tz[0] == '-' else 1) * (int(tz[1:3])*60 + int(tz[4:6]))
+		return TZ_ABBR_OFFS[tz] + (-1 if tzo[0] == '-' else 1) * (int(tzo[1:3])*60 + int(tzo[4:6]))
 
 def reGroupDictStrptime(found_dict, msec=False, default_tz=None):
 	"""Return time from dictionary of strptime fields
@@ -275,3 +296,56 @@ def reGroupDictStrptime(found_dict, msec=False, default_tz=None):
 	if msec: # pragma: no cover - currently unused
 		tm += fraction/1000000.0
 	return tm
+
+
+TZ_ABBR_OFFS = {'':0, None:0}
+TZ_STR = '''
+	-12 Y
+	-11 X NUT SST
+	-10 W CKT HAST HST TAHT TKT
+	-9 V AKST GAMT GIT HADT HNY
+	-8 U AKDT CIST HAY HNP PST PT
+	-7 T HAP HNR MST PDT
+	-6 S CST EAST GALT HAR HNC MDT
+	-5 R CDT COT EASST ECT EST ET HAC HNE PET
+	-4 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT
+	-3 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT
+	-2 O BRST FNT PMDT UYST WGST
+	-1 N AZOT CVT EGT
+	0 Z EGST GMT UTC WET WT
+	1 A CET DFT WAT WEDT WEST
+	2 B CAT CEDT CEST EET SAST WAST
+	3 C EAT EEDT EEST IDT MSK
+	4 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT
+	5 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT
+	6 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST
+	7 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB
+	8 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST
+	9 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT
+	10 K AEST ChST PGT VLAT YAKST YAPT
+	11 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT
+	12 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT
+	13 FJST NZDT
+	11.5 NFT
+	10.5 ACDT LHST
+	9.5 ACST
+	6.5 CCT MMT
+	5.75 NPT
+	5.5 SLT
+	4.5 AFT IRDT
+	3.5 IRST
+	-2.5 HAT NDT
+	-3.5 HNT NST NT
+	-4.5 HLV VET
+	-9.5 MART MIT
+'''
+
+def _init_TZ_ABBR():
+	"""Initialized TZ_ABBR_OFFS dictionary (TZ -> offset in minutes)"""
+	for tzline in map(str.split, TZ_STR.split('\n')):
+		if not len(tzline): continue
+		tzoffset = int(float(tzline[0]) * 60)
+		for tz in tzline[1:]:
+			TZ_ABBR_OFFS[tz] = tzoffset 
+
+_init_TZ_ABBR()
diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py
index 56970ac5..02facf30 100644
--- a/fail2ban/tests/datedetectortestcase.py
+++ b/fail2ban/tests/datedetectortestcase.py
@@ -90,16 +90,32 @@ class DateDetectorTest(LogCaptureTestCase):
 		self.assertEqual(matchlog.group(1), 'Jan 23 21:59:59')
 
 	def testDefaultTimeZone(self):
-		dd = self.datedetector
+		# use special date-pattern (with %Exz), because %z currently does not supported 
+		# zone abbreviations except Z|UTC|GMT.
+		dd = DateDetector()
+		dd.appendTemplate('^%ExY-%Exm-%Exd %H:%M:%S(?: ?%Exz)?')
 		dt = datetime.datetime
 		logdt = "2017-01-23 15:00:00"
 		dtUTC = dt(2017, 1, 23, 15, 0)
 		for tz, log, desired in (
+			# no TZ in input-string:
 			('UTC+0300', logdt, dt(2017, 1, 23, 12, 0)), # so in UTC, it was noon
 			('UTC',      logdt, dtUTC), # UTC
 			('UTC-0430', logdt, dt(2017, 1, 23, 19, 30)),
 			('GMT+12',   logdt, dt(2017, 1, 23, 3, 0)),
 			(None,       logdt, dt(2017, 1, 23, 14, 0)), # default CET in our test-framework
+			# CET:
+			('CET',      logdt, dt(2017, 1, 23, 14, 0)),
+			('+0100',    logdt, dt(2017, 1, 23, 14, 0)),
+			('CEST-01',  logdt, dt(2017, 1, 23, 14, 0)),
+			# CEST:
+			('CEST',     logdt, dt(2017, 1, 23, 13, 0)),
+			('+0200',    logdt, dt(2017, 1, 23, 13, 0)),
+			('CET+01',   logdt, dt(2017, 1, 23, 13, 0)),
+			('CET+0100', logdt, dt(2017, 1, 23, 13, 0)),
+			# check offset in minutes:
+			('CET+0130', logdt, dt(2017, 1, 23, 12, 30)),
+			# TZ in input-string have precedence:
 			('UTC+0300', logdt+' GMT', dtUTC), # GMT wins
 			('UTC',      logdt+' GMT', dtUTC), # GMT wins
 			('UTC-0430', logdt+' GMT', dtUTC), # GMT wins
@@ -108,13 +124,16 @@ class DateDetectorTest(LogCaptureTestCase):
 			(None,       logdt+' -10:45', dt(2017, 1, 24, 1, 45)), # -1045 wins
 			('UTC',      logdt+' +0945', dt(2017, 1, 23, 5, 15)), # +0945 wins
 			(None,       logdt+' +09:45', dt(2017, 1, 23, 5, 15)), # +0945 wins
-			(None,       logdt+' Z', dtUTC), # Z wins (UTC)
+			('UTC+0300', logdt+' Z', dtUTC), # Z wins (UTC)
+			('GMT+12',   logdt+' CET',  dt(2017, 1, 23, 14, 0)), # CET wins
+			('GMT+12',   logdt+' CEST', dt(2017, 1, 23, 13, 0)), # CEST wins
+			('GMT+12',   logdt+' CET+0130', dt(2017, 1, 23, 12, 30)), # CET+0130 wins
 		):
 			logSys.debug('== test %r with TZ %r', log, tz)
 			dd.default_tz=tz; datelog, _ = dd.getTime(log)
 			val = dt.utcfromtimestamp(datelog)
 			self.assertEqual(val, desired,
-					 "wrong offset %r != %r by %r with TZ %r (%r)" % (val, desired, log, tz, dd.default_tz))
+					 "wrong offset %r != %r by %r with default TZ %r (%r)" % (val, desired, log, tz, dd.default_tz))
 
 		self.assertRaises(ValueError, setattr, dd, 'default_tz', 'WRONG-TZ')
 		dd.default_tz = None