SandakovMM 2025-06-23 12:43:51 +00:00 committed by GitHub
commit 5f8546873b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 17 additions and 14 deletions

View File

@ -6,7 +6,7 @@ check-hidden = true
# Try to identify incomplete words which are part of a regex, hence having [] at the beginning
# Ignore all urls as something with :// in it
# Ignore all lines with codespell-ignore in them for pragma annotation
ignore-regex = (\b([A-Z][A-Z][A-Z]+|gir\.st)\b)|\[[a-zA-Z]+\][a-z]+\b|[a-z]+://\S+|.*codespell-ignore.*
ignore-regex = (\b([A-Z][A-Z][A-Z]+|gir\.st)\b)|\[[a-zA-Z]+\][a-z]+\b|[a-z]+://\S+|^\w*bots\w*\s*=.*|.*codespell-ignore.*
# some oddly named variables, some names, etc
# wee -- comes in regex etc for weeks
ignore-words-list = assertIn,theis,timere,alls,wee,wight,ans,re-use,pre-emptive

File diff suppressed because one or more lines are too long

View File

@ -34,16 +34,12 @@
#
#-----------------\____________________________________/------------------
url=http://www.user-agents.org/index.shtml
url=https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list
badbots=$(
for f in "" "?g_m" "?moz" "?n_s" "?t_z"; do
wget -q -O- $url$f;
done \
| grep -h -B4 '<td class="smallcell" nowrap>S&nbsp;</td>'\
| sed -e 's/&nbsp;//g' \
| awk '/^--/{getline; gsub(" ",""); print $0}' \
| sed -e 's/\([.\:|()+]\)/\\\1/g' \
curl -sS $url \
| uniq \
| sed -e 's/\\ / /g' \
| sed -e 's/\([.\:|()+]\)/\\\1/g' \
| tr '\n' '|' \
| sed -e 's/|$//g'
)
@ -60,13 +56,18 @@ cat >| config/filter.d/apache-badbots.conf <<EOF
[Definition]
badbotscustom = EmailCollector|WebEMailExtrac|TrackBack/1\.02|sogou music spider
badbotscustom = EmailCollector|WebEMailExtrac|TrackBack/1\.02|sogou music spider|(?:Mozilla/\d+\.\d+ )?Jorgee
badbots = $badbots
failregex = ^<HOST> -.*"(GET|POST).*HTTP.*"(?:%(badbots)s|%(badbotscustom)s)"$
requri = /\S*
rescode = \d+
failregex = ^<ADDR> [^"]*"[A-Z]+\s+%(requri)s\s+[^"]*" %(rescode)s \d+ "[^"]*" "(?:%(badbots)s|%(badbotscustom)s)"$
ignoreregex =
datepattern = ^[^\[]*\[({DATE})
{^LN-BEG}
# DEV Notes:
# List of bad bots fetched from http://www.user-agents.org
# Generated on `date` by $0.