ENH: adding gen_badbots script to generate apache-badbots.conf

git-svn-id: https://fail2ban.svn.sourceforge.net/svnroot/fail2ban/trunk@760 a942ae1a-1317-0410-a47c-b1dcaea8d605
0.x
Yaroslav Halchenko 2010-06-22 14:33:05 +00:00
parent 4f4f0399ef
commit 24d8e29ace
2 changed files with 84 additions and 1 deletions

View File

@ -1,7 +1,7 @@
# Fail2Ban configuration file
#
# List of bad bots fetched from http://www.user-agents.org
# Generated on Sun Feb 11 01:09:15 EST 2007 by ./badbots.sh
# Generated on Sun Feb 11 01:09:15 EST 2007 by gen_badbots
#
# Author: Yaroslav Halchenko
#

83
files/gen_badbots Executable file
View File

@ -0,0 +1,83 @@
#!/bin/bash
#-------------------------- =+- Shell script -+= --------------------------
#
# @file badbots.sh
# @date Sun Feb 11 00:49:53 2007
# @brief
#
#
# Yaroslav Halchenko CS@UNM, CS@NJIT
# web: http://www.onerussian.com & PSYCH@RUTGERS
# e-mail: yoh@onerussian.com ICQ#: 60653192
#
# DESCRIPTION (NOTES):
#
# Script to fetch list of agent strings from http://www.user-agents.org
# which are known to be from mailicious bots, and create apache-badbots.conf
# filter for fail2ban
#
# COPYRIGHT: Yaroslav Halchenko 2007-2010
#
# LICENSE:
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# On Debian system see /usr/share/common-licenses/GPL for the full license.
#
#-----------------\____________________________________/------------------
url=http://www.user-agents.org/index.shtml
badbots=$(
for f in "" "?g_l" "?m" "?n_s" "?t_z"; do
wget -q -O- $url$f;
done \
| grep -h -B4 '<td class="smallcell" nowrap>S&nbsp;</td>'\
| sed -e 's/&nbsp;//g' \
| awk '/^--/{getline; gsub(" ",""); print $0}' \
| sed -e 's/\([.\:|()]\)/\\\1/g' \
| tr '\n' '|' \
| sed -e 's/|$//g'
)
echo $badbots >| /tmp/badbots.tmp
cat >| apache-badbots.conf <<EOF
# Fail2Ban configuration file
#
# List of bad bots fetched from http://www.user-agents.org
# Generated on `date` by $0
#
# Author: Yaroslav Halchenko
#
#
[Definition]
badbotscustom = EmailCollector|WebEMailExtrac
badbots = $badbots
# Option: failregex
# Notes.: Regexp to catch known spambots and software alike. Please verify that
# it is your intent to block IPs which were driven by abovementioned bots
# Values: TEXT
#
failregex = ^(?P<host>\S*) -.*"GET.*HTTP.*"(?:%(badbots)s|%(badbotscustom)s)"\$
# Option: ignoreregex
# Notes.: regex to ignore. If this regex matches, the line is ignored.
# Values: TEXT
#
ignoreregex =
EOF