summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbill-auger <mr.j.spam.me@gmail.com>2018-08-13 01:55:17 -0400
committerbill-auger <mr.j.spam.me@gmail.com>2018-09-28 23:30:24 -0400
commit7652409698a535acea66a74ef31c88e200ba4a0c (patch)
treecebb84b85022c34881f4f72297907cd32c822b29
parent46b29f9ce18383d45a81d2b048321bf6044adea8 (diff)
filter nonsense messages
-rw-r--r--modules/m_spamfilter.sh32
1 files changed, 24 insertions, 8 deletions
diff --git a/modules/m_spamfilter.sh b/modules/m_spamfilter.sh
index be7a76f..2471b5e 100644
--- a/modules/m_spamfilter.sh
+++ b/modules/m_spamfilter.sh
@@ -34,9 +34,13 @@ readonly RELAY_NICK='a-user'
readonly FILTER_CHANNELS="${config_module_spamfilter_channels}"
readonly II_DIR=/home/pbot/irc/${config_server}
readonly II_LOG_REGEX='[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} '
-readonly SPAMFILTER_CHANNELS="${config_module_spamfilter_channels}"
readonly BOT_PASS="${config_server_passwd}"
+# generic spam-like regexes
+readonly ASCII_SET_REGEX='[^ -~]'
+readonly ASCII_ART_REGEX='[^A-Za-z0-9]{4}'
+readonly GIBBERISH_REGEX='[^\ ]{12}'
+
# known spam trigger constants
readonly SPAM=(
'blog by freenode staff'
@@ -96,9 +100,9 @@ module_spamfilter_on_JOIN()
DBG_JOIN "${whojoined}" "${channel}"
- if [[ "${whojoined}" == "${bot_nick}" ]] && \
- [[ " ${SPAMFILTER_CHANNELS} " =~ " ${channel} " ]] && \
- [[ " ${OP_CHANNELS} " =~ " ${channel} " ]] && (( ${BECOME_OP_ON_JOIN} ))
+ if [[ "${whojoined}" == "${bot_nick}" ]] && \
+ [[ " ${FILTER_CHANNELS} " =~ " ${channel} " ]] && \
+ [[ " ${OP_CHANNELS} " =~ " ${channel} " ]] && (( ${BECOME_OP_ON_JOIN} ))
then send_modes "${channel}" '+qz $~a'
# launch a second bot so we can compare the chat logs
@@ -145,7 +149,7 @@ DBG_CRITERIA "${sender}" "${target}" "${query}"
DBG_UNREGISTERED "${sender}"
# ignore chat that is known spam or otherwise nonsense
- if is_spam "${query}"
+ if is_nonsense "${query}" || is_spam "${query}"
then was_handled=1
DBG_SPAM ${sender}
@@ -190,6 +194,16 @@ is_public_chat() # (sender , query)
[[ "${chat_log}" =~ ${II_LOG_REGEX}"${log_line}" ]]
}
+is_nonsense() # (chat_msg)
+{
+ local chat_msg=$1
+
+ [[ -z "$(echo ${chat_msg})" ]] || \
+ [[ "${chat_msg}" =~ ${ASCII_SET_REGEX} ]] || \
+ [[ "${chat_msg}" =~ ${ASCII_ART_REGEX} ]] || \
+ [[ "${chat_msg}" =~ ${GIBBERISH_REGEX} ]]
+}
+
is_spam() # (chat_msg)
{
local needle=$1
@@ -216,9 +230,11 @@ DBG_CRITERIA()
local target=$2
local query=$3
- echo -n "[SPAMFILTER]: target='${target}'" ; ! is_filtered_channel "${target}" && echo -n " => wrong channel - returning" ; echo
- echo -n "[SPAMFILTER]: sender='${sender}'" ; is_internal_user "${sender}" && echo -n " => from internal user - returning" ; echo
- echo -n "[SPAMFILTER]: query='${query}'" ; is_public_chat "${sender}" "${query}" && echo -n " => from registered user - returning" ; echo
+ echo -n "[SPAMFILTER]: target='${target}'" ; ! is_filtered_channel "${target}" && echo -n " => wrong channel - returning" ; echo ;
+ echo -n "[SPAMFILTER]: sender='${sender}'" ; is_internal_user "${sender}" && echo -n " => from internal user - returning" ; echo ;
+ echo -n "[SPAMFILTER]: query='${query}'" ; is_public_chat "${sender}" "${query}" && echo -n " => from registered user - returning" ;
+ is_nonsense "${query}" && echo -n " => is nonsense - returning" ;
+ is_spam "${query}" && echo -n " => is known spam - returning" ; echo ;
}
DBG_UNREGISTERED() { (( ${DEBUG} )) || return ; local sender=$1 ; echo "[SPAMFILTER]: unregistered user sender=${sender}" ; }
DBG_SPAM() { (( ${DEBUG} )) || return ; local sender=$1 ; echo "[SPAMFILTER]: !!!triggered!!! spambot=${sender}" ; }