mirror of https://github.com/fail2ban/fail2ban
Merge pull request #423 from yarikoptic/enh/gen_badbots
badbots filter: adding the script which was used + updated filterpull/427/head
commit
a169badb95
|
@ -149,6 +149,9 @@ some obscure corner of the Internet.
|
|||
* filter.d/roundcube-auth.conf -- anchored version
|
||||
* date matching - for standard asctime formats prefer more detailed
|
||||
first (thus use year if available)
|
||||
* files/gen_badbots was added and filter.d/apache-badbots.conf was
|
||||
regenerated to get updated (although now still an old) list of
|
||||
"bad" bots
|
||||
Alexander Dietrich
|
||||
* action.d/sendmail-common.conf -- added common sendmail settings file
|
||||
and made the sender display name configurable
|
||||
|
|
|
@ -3,12 +3,12 @@
|
|||
# Regexp to catch known spambots and software alike. Please verify
|
||||
# that it is your intent to block IPs which were driven by
|
||||
# above mentioned bots.
|
||||
|
||||
|
||||
|
||||
[Definition]
|
||||
|
||||
badbotscustom = EmailCollector|WebEMailExtrac|TrackBack/1\.02|sogou music spider
|
||||
badbots = atSpider/1\.0|autoemailspider|China Local Browse 2\.6|ContentSmartz|DataCha0s/2\.0|DBrowse 1\.4b|DBrowse 1\.4d|Demo Bot DOT 16b|Demo Bot Z 16b|DSurf15a 01|DSurf15a 71|DSurf15a 81|DSurf15a VA|EBrowse 1\.4b|Educate Search VxB|EmailSiphon|EmailWolf 1\.00|ESurf15a 15|ExtractorPro|Franklin Locator 1\.8|FSurf15a 01|Full Web Bot 0416B|Full Web Bot 0516B|Full Web Bot 2816B|Industry Program 1\.0\.x|ISC Systems iRc Search 2\.1|IUPUI Research Bot v 1\.9a|LARBIN-EXPERIMENTAL \(efp@gmx\.net\)|LetsCrawl\.com/1\.0 +http\://letscrawl\.com/|Lincoln State Web Browser|LWP\:\:Simple/5\.803|Mac Finder 1\.0\.xx|MFC Foundation Class Library 4\.0|Microsoft URL Control - 6\.00\.8xxx|Missauga Locate 1\.0\.0|Missigua Locator 1\.9|Missouri College Browse|Mizzu Labs 2\.2|Mo College 1\.9|Mozilla/2\.0 \(compatible; NEWT ActiveX; Win32\)|Mozilla/3\.0 \(compatible; Indy Library\)|Mozilla/4\.0 \(compatible; Advanced Email Extractor v2\.xx\)|Mozilla/4\.0 \(compatible; Iplexx Spider/1\.0 http\://www\.iplexx\.at\)|Mozilla/4\.0 \(compatible; MSIE 5\.0; Windows NT; DigExt; DTS Agent|Mozilla/4\.0 efp@gmx\.net|Mozilla/5\.0 \(Version\: xxxx Type\:xx\)|MVAClient|NASA Search 1\.0|Nsauditor/1\.x|PBrowse 1\.4b|PEval 1\.4b|Poirot|Port Huron Labs|Production Bot 0116B|Production Bot 2016B|Production Bot DOT 3016B|Program Shareware 1\.0\.2|PSurf15a 11|PSurf15a 51|PSurf15a VA|psycheclone|RSurf15a 41|RSurf15a 51|RSurf15a 81|searchbot admin@google\.com|sogou spider|sohu agent|SSurf15a 11 |TSurf15a 11|Under the Rainbow 2\.2|User-Agent\: Mozilla/4\.0 \(compatible; MSIE 6\.0; Windows NT 5\.1\)|WebVulnCrawl\.blogspot\.com/1\.0 libwww-perl/5\.803|Wells Search II|WEP Search 00
|
||||
badbots = Atomic_Email_Hunter/4\.0|atSpider/1\.0|autoemailspider|bwh3_user_agent|China Local Browse 2\.6|ContactBot/0\.2|ContentSmartz|DataCha0s/2\.0|DBrowse 1\.4b|DBrowse 1\.4d|Demo Bot DOT 16b|Demo Bot Z 16b|DSurf15a 01|DSurf15a 71|DSurf15a 81|DSurf15a VA|EBrowse 1\.4b|Educate Search VxB|EmailSiphon|EmailSpider|EmailWolf 1\.00|ESurf15a 15|ExtractorPro|Franklin Locator 1\.8|FSurf15a 01|Full Web Bot 0416B|Full Web Bot 0516B|Full Web Bot 2816B|Guestbook Auto Submitter|Industry Program 1\.0\.x|ISC Systems iRc Search 2\.1|IUPUI Research Bot v 1\.9a|LARBIN-EXPERIMENTAL \(efp@gmx\.net\)|LetsCrawl\.com/1\.0 +http\://letscrawl\.com/|Lincoln State Web Browser|LMQueueBot/0\.2|LWP\:\:Simple/5\.803|Mac Finder 1\.0\.xx|MFC Foundation Class Library 4\.0|Microsoft URL Control - 6\.00\.8xxx|Missauga Locate 1\.0\.0|Missigua Locator 1\.9|Missouri College Browse|Mizzu Labs 2\.2|Mo College 1\.9|MVAClient|Mozilla/2\.0 \(compatible; NEWT ActiveX; Win32\)|Mozilla/3\.0 \(compatible; Indy Library\)|Mozilla/3\.0 \(compatible; scan4mail \(advanced version\) http\://www\.peterspages\.net/?scan4mail\)|Mozilla/4\.0 \(compatible; Advanced Email Extractor v2\.xx\)|Mozilla/4\.0 \(compatible; Iplexx Spider/1\.0 http\://www\.iplexx\.at\)|Mozilla/4\.0 \(compatible; MSIE 5\.0; Windows NT; DigExt; DTS Agent|Mozilla/4\.0 efp@gmx\.net|Mozilla/5\.0 \(Version\: xxxx Type\:xx\)|NameOfAgent \(CMS Spider\)|NASA Search 1\.0|Nsauditor/1\.x|PBrowse 1\.4b|PEval 1\.4b|Poirot|Port Huron Labs|Production Bot 0116B|Production Bot 2016B|Production Bot DOT 3016B|Program Shareware 1\.0\.2|PSurf15a 11|PSurf15a 51|PSurf15a VA|psycheclone|RSurf15a 41|RSurf15a 51|RSurf15a 81|searchbot admin@google\.com|ShablastBot 1\.0|snap\.com beta crawler v0|Snapbot/1\.0|Snapbot/1\.0 \(Snap Shots, +http\://www\.snap\.com\)|sogou develop spider|Sogou Orion spider/3\.0\(+http\://www\.sogou\.com/docs/help/webmasters\.htm#07\)|sogou spider|Sogou web spider/3\.0\(+http\://www\.sogou\.com/docs/help/webmasters\.htm#07\)|sohu agent|SSurf15a 11 |TSurf15a 11|Under the Rainbow 2\.2|User-Agent\: Mozilla/4\.0 \(compatible; MSIE 6\.0; Windows NT 5\.1\)|VadixBot|WebVulnCrawl\.unknown/1\.0 libwww-perl/5\.803|Wells Search II|WEP Search 00
|
||||
|
||||
failregex = ^<HOST> -.*"(GET|POST).*HTTP.*"(?:%(badbots)s|%(badbotscustom)s)"$
|
||||
|
||||
|
@ -16,6 +16,6 @@ ignoreregex =
|
|||
|
||||
# DEV Notes:
|
||||
# List of bad bots fetched from http://www.user-agents.org
|
||||
# Generated on Sun Feb 11 01:09:15 EST 2007 by ./badbots.sh
|
||||
# Generated on Thu Nov 7 14:23:35 PST 2013 by files/gen_badbots.
|
||||
#
|
||||
# Author: Yaroslav Halchenko
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
#!/bin/bash
|
||||
#-------------------------- =+- Shell script -+= --------------------------
|
||||
#
|
||||
# Yaroslav Halchenko CS@UNM, CS@NJIT
|
||||
# web: http://www.onerussian.com & PSYCH@RUTGERS
|
||||
# e-mail: yoh@onerussian.com ICQ#: 60653192
|
||||
#
|
||||
# DESCRIPTION (NOTES):
|
||||
#
|
||||
# Script to fetch list of agent strings from http://www.user-agents.org
|
||||
# which are known to be from mailicious bots, and create apache-badbots.conf
|
||||
# filter for fail2ban
|
||||
#
|
||||
# COPYRIGHT: Yaroslav Halchenko 2007-2013
|
||||
#
|
||||
# LICENSE:
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
# On Debian system see /usr/share/common-licenses/GPL for the full license.
|
||||
#
|
||||
#-----------------\____________________________________/------------------
|
||||
|
||||
url=http://www.user-agents.org/index.shtml
|
||||
badbots=$(
|
||||
for f in "" "?g_m" "?moz" "?n_s" "?t_z"; do
|
||||
wget -q -O- $url$f;
|
||||
done \
|
||||
| grep -h -B4 '<td class="smallcell" nowrap>S </td>'\
|
||||
| sed -e 's/ //g' \
|
||||
| awk '/^--/{getline; gsub(" ",""); print $0}' \
|
||||
| sed -e 's/\([.\:|()]\)/\\\1/g' \
|
||||
| uniq \
|
||||
| tr '\n' '|' \
|
||||
| sed -e 's/|$//g'
|
||||
)
|
||||
|
||||
echo $badbots >| /tmp/badbots.tmp
|
||||
|
||||
cat >| config/filter.d/apache-badbots.conf <<EOF
|
||||
# Fail2Ban configuration file
|
||||
#
|
||||
# Regexp to catch known spambots and software alike. Please verify
|
||||
# that it is your intent to block IPs which were driven by
|
||||
# above mentioned bots.
|
||||
|
||||
|
||||
[Definition]
|
||||
|
||||
badbotscustom = EmailCollector|WebEMailExtrac|TrackBack/1\.02|sogou music spider
|
||||
badbots = $badbots
|
||||
|
||||
failregex = ^<HOST> -.*"(GET|POST).*HTTP.*"(?:%(badbots)s|%(badbotscustom)s)"$
|
||||
|
||||
ignoreregex =
|
||||
|
||||
# DEV Notes:
|
||||
# List of bad bots fetched from http://www.user-agents.org
|
||||
# Generated on `date` by $0.
|
||||
#
|
||||
# Author: Yaroslav Halchenko
|
||||
EOF
|
Loading…
Reference in New Issue