summaryrefslogtreecommitdiff
path: root/scratch.rb
diff options
context:
space:
mode:
Diffstat (limited to 'scratch.rb')
-rwxr-xr-xscratch.rb149
1 files changed, 149 insertions, 0 deletions
diff --git a/scratch.rb b/scratch.rb
new file mode 100755
index 0000000..2c3bcf6
--- /dev/null
+++ b/scratch.rb
@@ -0,0 +1,149 @@
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+
+load 'mwapi.rb'
+require 'yaml'
+require 'pp'
+
+mw = MWApi.new('https://wiki.parabolagnulinux.org/api.php')
+credentials = YAML.load_file('credentials.yml')
+mw.login(credentials['username'], credentials['password'])
+
+# apcontinue = ''
+# while not apcontinue.nil? do
+# print "Searching...\n"
+# data = mw.query(:list => :allpages,:aplimit => 200, :apcontinue => apcontinue)
+# titles = data['query']['allpages'].select{|page| page['title'].split(' ').length > 9 }.map{|page| page['title']}
+# print "Deleting #{titles.length} articles...\n"
+# if (titles.length > 0)
+# mw.delete_by_title(titles, { :reason => 'Spam' })
+# end
+# if data['query-continue'].nil?
+# apcontinue = nil
+# else
+# apcontinue = data['query-continue']['allpages']['apcontinue']
+# end
+# print "apcontinue = #{apcontinue.inspect}\n"
+# end
+
+# continue = ''
+# while not continue.nil? do
+# print "Searching...\n"
+# data = mw.query(:list => :allimages,:ailimit => 200, :aicontinue => continue)
+# titles = data['query']['allimages'].select{|page| /^File:[A-Z].* [0-9]?[0-9][0-9][0-9]\.jpg$/ =~ page['title'] }.map{|page| page['title']}
+# print "Deleting #{titles.length} articles...\n"
+# if (titles.length > 0)
+# mw.delete_by_title(titles, { :reason => 'Spam' })
+# end
+# if data['query-continue'].nil?
+# continue = nil
+# else
+# continue = data['query-continue']['allimages']['aicontinue']
+# end
+# print "continue = #{continue.inspect}\n"
+# end
+
+
+@keywords=[
+ # brand names
+ /(Crimson|Pink|Purple|Green|Orange) Dye/i,
+ /Air Jordan/i,
+ /Andrew Ting/i,
+ /Beats by Dre/i,
+ /Buccaneers/i,
+ /Canada Goose/i,
+ /Club Penguin/i,
+ /Diablo 3/i,
+ /Doudoune/i,
+ /Gamma Blue/i,
+ /Garcinia/i,
+ /Jeffraham/i,
+ /Jordan Fusion/i,
+ /Jordan Retro/i,
+ /Kate Spade/i,
+ /Michael[ _]Kors/i,
+ /\b49ers\b/i,
+ /\bCisco 200-120\b/i,
+ /\bDr\.? Dre\b/i,
+ /\bGucci\b/i,
+ /\bretro 11 /i,
+ /\buggs?\b/i,
+ /officialnflprostore/i,
+ # script kiddie topics
+ / on Hack Wi-Fi$/i,
+ /Cracked Steam/i,
+ /Psn code generator/i,
+ /Steam Key Generator/i,
+ /\bpc games? (free|crack)/i,
+ /crack pc/i,
+ # health topics
+ /Resistance Band/i,
+ /diabetes/i,
+ /elliptical (equipment|machines?)/i,
+ /fat burning/i,
+ /health care/i,
+ /heart (disease|attack)/i,
+ /more wellness/i,
+ /pilates/i,
+ /skin care/i,
+ /weight loss/i,
+ /(body|excess) weight/i,
+ /Arrhythmia/i,
+ /Cardiovascular/,
+ # sex topics
+ /\b(sex|adult) cam/i,
+ /\b(male|breast) enhancement\b/i,
+ /\bpenis\b/i,
+ # other topics
+ /\b(coffee|tea) extract\b/i,
+ /\b(good|quality) social media\b/i,
+ /\b(green|ginseng) (coffee|tea)\b/i,
+ /\b(world|globe|planet) cup\b/i,
+ /\bbaby shower\b/i,
+ /\bcash loan\b/i,
+ /\bclick here\b/i,
+ /\bcredit (check|repair)\b/i,
+ /\bcredit card\b/i,
+ /\bdiy l[ue]x[ue]ry\b/i,
+ /\bgreen pan\b/i,
+ /\bipage (web)?host/i,
+ /\bmen.?s (fashion|casual wear|health)\b/i,
+ /\brap beats\b/i,
+ /\bsearch engine marketing\b/i,
+ /\bsocial media strategy\b/i,
+ /\bvigorous motivators\b/i,
+ # formats
+ /^(aid|assist) on (where|the place)/i,
+ /^A Background In/i,
+ /^An? ( (simple|informative))? analysis of /i,
+ /1st Impressions in/i,
+ /The (Selection|Choice|Decision) of the .* Is Your/i,
+ # unsorted
+ /\b(jerseys?|vegan|shit|marketing|finance|footwear|shoes|muscle|lesbian|islamist|bodybuilding|nfl|nba|ejaculation|wholesale|nike|Hypertension|sherbet|bankrupt|stairlifts?|outfits|surcharges?)\b/i,
+]
+
+def kw(title)
+ @keywords.each do |re|
+ if re =~ title
+ return true
+ end
+ end
+ return false
+end
+
+apcontinue = ''
+while not apcontinue.nil? do
+ print "Searching...\n"
+ data = mw.query(:list => :allpages, :aplimit => 200, :apcontinue => apcontinue)
+ titles = data['query']['allpages'].select{|page| kw(page['title']) }.map{|page| page['title']}
+ print "Deleting #{titles.length} articles...\n"
+ if (titles.length > 0)
+ mw.delete_by_title(titles, { :reason => 'Spam' })
+ end
+ if data['query-continue'].nil?
+ apcontinue = nil
+ else
+ apcontinue = data['query-continue']['allpages']['apcontinue']
+ end
+ print "apcontinue = #{apcontinue.inspect}\n"
+end