ENH: iso8601 - remove default_timezone set to UTC, isn't valid any more - calculate from local timezone. Enforce T date/time separator in iso8601 regex. Make minutes in timezone optional as per ISO8601. Use consistant regex for ISO8601

pull/368/head
Daniel Black 2013-09-21 11:47:24 +10:00
parent a0676cdd1e
commit 37de5462be
2 changed files with 23 additions and 22 deletions

View File

@ -300,10 +300,7 @@ class DateISO8601(DateTemplate):
def __init__(self): def __init__(self):
DateTemplate.__init__(self) DateTemplate.__init__(self)
date_re = "[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}" \ self.setRegex(iso8601.ISO8601_REGEX_RAW)
".[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?" \
"(Z|(([-+])([0-9]{2}):([0-9]{2})))?"
self.setRegex(date_re)
def getDate(self, line): def getDate(self, line):
dateMatch = self.matchDate(line) dateMatch = self.matchDate(line)

View File

@ -32,17 +32,17 @@ datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.iso8601.Utc ...>)
""" """
from datetime import datetime, timedelta, tzinfo from datetime import datetime, timedelta, tzinfo, time
import re import re
__all__ = ["parse_date", "ParseError"] __all__ = ["parse_date", "ParseError"]
# Adapted from http://delete.me.uk/2005/03/iso8601.html # Adapted from http://delete.me.uk/2005/03/iso8601.html
ISO8601_REGEX = re.compile(r"(?P<year>[0-9]{4})(-(?P<month>[0-9]{1,2})(-(?P<day>[0-9]{1,2})" ISO8601_REGEX_RAW = "(?P<year>[0-9]{4})-(?P<month>[0-9]{1,2})-(?P<day>[0-9]{1,2})" \
r"((?P<separator>.)(?P<hour>[0-9]{2}):(?P<minute>[0-9]{2})(:(?P<second>[0-9]{2})(\.(?P<fraction>[0-9]+))?)?" "T(?P<hour>[0-9]{2}):(?P<minute>[0-9]{2})(:(?P<second>[0-9]{2})(\.(?P<fraction>[0-9]+))?)?" \
r"(?P<timezone>Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?" "(?P<timezone>Z|(([-+])([0-9]{2}):([0-9]{2})))?"
) ISO8601_REGEX = re.compile(ISO8601_REGEX_RAW)
TIMEZONE_REGEX = re.compile("(?P<prefix>[+-])(?P<hours>[0-9]{2}).(?P<minutes>[0-9]{2})") TIMEZONE_REGEX = re.compile("(?P<prefix>[+-])(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})?")
class ParseError(Exception): class ParseError(Exception):
"""Raised when there is a problem parsing a date string""" """Raised when there is a problem parsing a date string"""
@ -67,8 +67,8 @@ class FixedOffset(tzinfo):
"""Fixed offset in hours and minutes from UTC """Fixed offset in hours and minutes from UTC
""" """
def __init__(self, offset_hours, offset_minutes, name): def __init__(self, name, offset_hours, offset_minutes, offset_seconds=0):
self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes) self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes, seconds=offset_seconds)
self.__name = name self.__name = name
def utcoffset(self, dt): def utcoffset(self, dt):
@ -83,26 +83,30 @@ class FixedOffset(tzinfo):
def __repr__(self): def __repr__(self):
return "<FixedOffset %r>" % self.__name return "<FixedOffset %r>" % self.__name
def parse_timezone(tzstring, default_timezone=UTC): def parse_timezone(tzstring):
"""Parses ISO 8601 time zone specs into tzinfo offsets """Parses ISO 8601 time zone specs into tzinfo offsets
""" """
if tzstring == "Z": if tzstring == "Z":
return default_timezone return UTC
# This isn't strictly correct, but it's common to encounter dates without
# timezones so I'll assume the default (which defaults to UTC).
# Addresses issue 4.
if tzstring is None: if tzstring is None:
return default_timezone zone_sec = -time.timezone
return FixedOffset(name=time.tzname[0],hours=(zone_sec / 3600),minutes=(zone_sec % 3600)/60,seconds=zone_sec % 60)
m = TIMEZONE_REGEX.match(tzstring) m = TIMEZONE_REGEX.match(tzstring)
prefix, hours, minutes = m.groups() prefix, hours, minutes = m.groups()
hours, minutes = int(hours), int(minutes) if minutes is None:
minutes = 0
else:
minutes = int(minutes)
hours = int(hours)
if prefix == "-": if prefix == "-":
hours = -hours hours = -hours
minutes = -minutes minutes = -minutes
return FixedOffset(hours, minutes, tzstring) return FixedOffset(tzstring, hours, minutes)
def parse_date(datestring, default_timezone=UTC): def parse_date(datestring):
"""Parses ISO 8601 dates into datetime objects """Parses ISO 8601 dates into datetime objects
The timezone is parsed from the date string. However it is quite common to The timezone is parsed from the date string. However it is quite common to
@ -116,7 +120,7 @@ def parse_date(datestring, default_timezone=UTC):
if not m: if not m:
raise ParseError("Unable to parse date string %r" % datestring) raise ParseError("Unable to parse date string %r" % datestring)
groups = m.groupdict() groups = m.groupdict()
tz = parse_timezone(groups["timezone"], default_timezone=default_timezone) tz = parse_timezone(groups["timezone"])
if groups["fraction"] is None: if groups["fraction"] is None:
groups["fraction"] = 0 groups["fraction"] = 0
else: else: