invalid date recognition, irregular because of sorting template list, now via setRegex

pull/130/head
sebres 2013-03-11 13:52:31 +01:00
parent d17b415371
commit b6bb2f88c1
2 changed files with 18 additions and 15 deletions

View File

@ -55,80 +55,80 @@ class DateDetector:
# standard
template = DateStrptime()
template.setName("MONTH Day Hour:Minute:Second")
template.setRegex("(?<!\w)\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}")
template.setRegex("\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}")
template.setPattern("%b %d %H:%M:%S")
self._appendTemplate(template)
# asctime
template = DateStrptime()
template.setName("WEEKDAY MONTH Day Hour:Minute:Second Year")
template.setRegex("(?<!\w)\S{3} \S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2} \d{4}")
template.setRegex("\S{3} \S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2} \d{4}")
template.setPattern("%a %b %d %H:%M:%S %Y")
self._appendTemplate(template)
# asctime without year
template = DateStrptime()
template.setName("WEEKDAY MONTH Day Hour:Minute:Second")
template.setRegex("(?<!\w)\S{3} \S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}")
template.setRegex("\S{3} \S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}")
template.setPattern("%a %b %d %H:%M:%S")
self._appendTemplate(template)
# simple date
template = DateStrptime()
template.setName("Year/Month/Day Hour:Minute:Second")
template.setRegex("(?<!\w)\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}")
template.setPattern("%Y/%m/%d %H:%M:%S")
self._appendTemplate(template)
# simple date too (from x11vnc)
template = DateStrptime()
template.setName("Day/Month/Year Hour:Minute:Second")
template.setRegex("(?<!\w)\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}")
template.setPattern("%d/%m/%Y %H:%M:%S")
self._appendTemplate(template)
# previous one but with year given by 2 digits
# (See http://bugs.debian.org/537610)
template = DateStrptime()
template.setName("Day/Month/Year2 Hour:Minute:Second")
template.setRegex("(?<!\w)\d{2}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}")
template.setPattern("%d/%m/%y %H:%M:%S")
self._appendTemplate(template)
# Apache format [31/Oct/2006:09:22:55 -0000]
template = DateStrptime()
template.setName("Day/MONTH/Year:Hour:Minute:Second")
template.setRegex("(?<!\w)\d{2}/\S{3}/\d{4}:\d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}/\S{3}/\d{4}:\d{2}:\d{2}:\d{2}")
template.setPattern("%d/%b/%Y:%H:%M:%S")
self._appendTemplate(template)
# CPanel 05/20/2008:01:57:39
template = DateStrptime()
template.setName("Month/Day/Year:Hour:Minute:Second")
template.setRegex("(?<!\w)\d{2}/\d{2}/\d{4}:\d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}/\d{2}/\d{4}:\d{2}:\d{2}:\d{2}")
template.setPattern("%m/%d/%Y:%H:%M:%S")
self._appendTemplate(template)
# Exim 2006-12-21 06:43:20
template = DateStrptime()
template.setName("Year-Month-Day Hour:Minute:Second")
template.setRegex("(?<!\w)\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
template.setPattern("%Y-%m-%d %H:%M:%S")
self._appendTemplate(template)
# custom for syslog-ng 2006.12.21 06:43:20
template = DateStrptime()
template.setName("Year.Month.Day Hour:Minute:Second")
template.setRegex("(?<!\w)\d{4}.\d{2}.\d{2} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{4}.\d{2}.\d{2} \d{2}:\d{2}:\d{2}")
template.setPattern("%Y.%m.%d %H:%M:%S")
self._appendTemplate(template)
# named 26-Jul-2007 15:20:52.252
template = DateStrptime()
template.setName("Day-MONTH-Year Hour:Minute:Second[.Millisecond]")
template.setRegex("(?<!\w)\d{2}-\S{3}-\d{4} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}-\S{3}-\d{4} \d{2}:\d{2}:\d{2}")
template.setPattern("%d-%b-%Y %H:%M:%S")
self._appendTemplate(template)
# 17-07-2008 17:23:25
template = DateStrptime()
template.setName("Day-Month-Year Hour:Minute:Second")
template.setRegex("(?<!\w)\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}")
template.setPattern("%d-%m-%Y %H:%M:%S")
self._appendTemplate(template)
# 01-27-2012 16:22:44.252
template = DateStrptime()
template.setName("Month-Day-Year Hour:Minute:Second[.Millisecond]")
template.setRegex("(?<!\w)\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}")
template.setRegex("\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}")
template.setPattern("%m-%d-%Y %H:%M:%S")
self._appendTemplate(template)
# TAI64N

View File

@ -50,8 +50,11 @@ class DateTemplate:
def getName(self):
return self.__name
def setRegex(self, regex):
self.__regex = regex.strip()
def setRegex(self, regex, wordBegin = True):
regex = regex.strip()
if (wordBegin and not re.search(r'^\^', regex)):
regex = r'\b' + regex
self.__regex = regex
self.__cRegex = re.compile(regex)
def getRegex(self):