diff options
author | bill-auger <mr.j.spam.me@gmail.com> | 2018-08-13 01:55:17 -0400 |
---|---|---|
committer | bill-auger <mr.j.spam.me@gmail.com> | 2018-09-28 23:30:24 -0400 |
commit | 7652409698a535acea66a74ef31c88e200ba4a0c (patch) | |
tree | cebb84b85022c34881f4f72297907cd32c822b29 | |
parent | 46b29f9ce18383d45a81d2b048321bf6044adea8 (diff) |
filter nonsense messages
-rw-r--r-- | modules/m_spamfilter.sh | 32 |
1 files changed, 24 insertions, 8 deletions
diff --git a/modules/m_spamfilter.sh b/modules/m_spamfilter.sh index be7a76f..2471b5e 100644 --- a/modules/m_spamfilter.sh +++ b/modules/m_spamfilter.sh @@ -34,9 +34,13 @@ readonly RELAY_NICK='a-user' readonly FILTER_CHANNELS="${config_module_spamfilter_channels}" readonly II_DIR=/home/pbot/irc/${config_server} readonly II_LOG_REGEX='[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} ' -readonly SPAMFILTER_CHANNELS="${config_module_spamfilter_channels}" readonly BOT_PASS="${config_server_passwd}" +# generic spam-like regexes +readonly ASCII_SET_REGEX='[^ -~]' +readonly ASCII_ART_REGEX='[^A-Za-z0-9]{4}' +readonly GIBBERISH_REGEX='[^\ ]{12}' + # known spam trigger constants readonly SPAM=( 'blog by freenode staff' @@ -96,9 +100,9 @@ module_spamfilter_on_JOIN() DBG_JOIN "${whojoined}" "${channel}" - if [[ "${whojoined}" == "${bot_nick}" ]] && \ - [[ " ${SPAMFILTER_CHANNELS} " =~ " ${channel} " ]] && \ - [[ " ${OP_CHANNELS} " =~ " ${channel} " ]] && (( ${BECOME_OP_ON_JOIN} )) + if [[ "${whojoined}" == "${bot_nick}" ]] && \ + [[ " ${FILTER_CHANNELS} " =~ " ${channel} " ]] && \ + [[ " ${OP_CHANNELS} " =~ " ${channel} " ]] && (( ${BECOME_OP_ON_JOIN} )) then send_modes "${channel}" '+qz $~a' # launch a second bot so we can compare the chat logs @@ -145,7 +149,7 @@ DBG_CRITERIA "${sender}" "${target}" "${query}" DBG_UNREGISTERED "${sender}" # ignore chat that is known spam or otherwise nonsense - if is_spam "${query}" + if is_nonsense "${query}" || is_spam "${query}" then was_handled=1 DBG_SPAM ${sender} @@ -190,6 +194,16 @@ is_public_chat() # (sender , query) [[ "${chat_log}" =~ ${II_LOG_REGEX}"${log_line}" ]] } +is_nonsense() # (chat_msg) +{ + local chat_msg=$1 + + [[ -z "$(echo ${chat_msg})" ]] || \ + [[ "${chat_msg}" =~ ${ASCII_SET_REGEX} ]] || \ + [[ "${chat_msg}" =~ ${ASCII_ART_REGEX} ]] || \ + [[ "${chat_msg}" =~ ${GIBBERISH_REGEX} ]] +} + is_spam() # (chat_msg) { local needle=$1 @@ -216,9 +230,11 @@ DBG_CRITERIA() local target=$2 local query=$3 - echo -n "[SPAMFILTER]: target='${target}'" ; ! is_filtered_channel "${target}" && echo -n " => wrong channel - returning" ; echo - echo -n "[SPAMFILTER]: sender='${sender}'" ; is_internal_user "${sender}" && echo -n " => from internal user - returning" ; echo - echo -n "[SPAMFILTER]: query='${query}'" ; is_public_chat "${sender}" "${query}" && echo -n " => from registered user - returning" ; echo + echo -n "[SPAMFILTER]: target='${target}'" ; ! is_filtered_channel "${target}" && echo -n " => wrong channel - returning" ; echo ; + echo -n "[SPAMFILTER]: sender='${sender}'" ; is_internal_user "${sender}" && echo -n " => from internal user - returning" ; echo ; + echo -n "[SPAMFILTER]: query='${query}'" ; is_public_chat "${sender}" "${query}" && echo -n " => from registered user - returning" ; + is_nonsense "${query}" && echo -n " => is nonsense - returning" ; + is_spam "${query}" && echo -n " => is known spam - returning" ; echo ; } DBG_UNREGISTERED() { (( ${DEBUG} )) || return ; local sender=$1 ; echo "[SPAMFILTER]: unregistered user sender=${sender}" ; } DBG_SPAM() { (( ${DEBUG} )) || return ; local sender=$1 ; echo "[SPAMFILTER]: !!!triggered!!! spambot=${sender}" ; } |