#!/usr/bin/env ruby # -*- coding: utf-8 -*- load 'mwapi.rb' require 'yaml' mw = MWApi.new('https://wiki.parabolagnulinux.org/api.php') credentials = YAML.load_file('credentials.yml') mw.login(credentials['username'], credentials['password']) @keywords=[ # brand names /(Crimson|Pink|Purple|Green|Orange) Dye/i, /Air Jordan/i, /Andrew Ting/i, /Beats by Dre/i, /Braun 5270/i, /Buccaneers/i, /Canada Goose/i, /Club Penguin/i, /Diablo 3/i, /Doudoune/i, /Focus T25/i, /Gamma Blue/i, /Google Pagerank/i, /Jeffraham/i, /Jillian Michaels/i, /Jordan Fusion/i, /Jordan Retro/i, /Kate Spade/i, /Michael[ _]Kors/i, /\b(NBA|NFL)\b/i, /\b(world|globe|planet) cup\b/i, /\b49ers\b/i, /\bCisco 200-120\b/i, /\bDr\.? Dre\b/i, /\bGucci\b/i, /\bNike\b/i /\bretro 11 /i, /\buggs?\b/i, /officialnflprostore/i, # script kiddie topics / on Hack Wi-Fi$/i, /Cracked Steam/i, /Psn code generator/i, /Steam Key Generator/i, /\bpc games? (free|crack)/i, /crack pc/i, # health topics /(body|excess) weight/i, /Arrhythmia/i, /Cardiovascular/, /Garcinia/i, /P90X/, /Resistance Band/i, /\bbodybuilding\b/i, /\bhypertension\b/i, /\bmuscle\b/i, /\bstairlifts?\b/i, /\bvegan\b/i, /diabetes/i, /dr oz/i, /elliptical (equipment|machines?)/i, /fat (burning|loss)/i, /health care/i, /heart (disease|attack)/i, /hemorrhoids/i, /herbalife/i, /more wellness/i, /pilates/i, /prescription/i, /skin care/i, /weight[ -](loss|reduction)/i, # sex topics /\b(male|breast) enhancement\b/i, /\b(sex|adult) cam/i, /\b(ejaculation|lesbian|penis|viagra)\b/i, # consumer topics /Possum (Removalist|Infestation)/i, /\b(coffee|tea) extract\b/i, /\b(green|ginseng) (coffee|tea)\b/i, /\bbaby shower\b/i, /\bdiy l[ue]x[ue]ry\b/i, /\bgreen pan\b/i, /\bipage (web)?host/i, /\bjerseys?\b/i, /\bmen.?s (fashion|casual wear|health)\b/i, /\brap beats\b/i, /\breal estate\b/i, /\bsearch engine (marketing|optimization)\b/i, /\bvigorous motivators\b/i, /apartment moves/i, /auto insurance/i, /furniture removal/i, /goji berr(ies|y)/i, /jewellery/i, # internet topics /Twitter follower/i, /\b(good|quality) social media\b/i, /\bclick here\b/i, /\bsocial media strategy\b/i, /affiliate advertising/i, /pay[ -]per[ -]click/i # formats /^(aid|assist) on (where|the place)/i, /^A Background In/i, /^An? ( (simple|informative))? analysis of /i, /1st Impressions in/i, /The (Selection|Choice|Decision) of the .* Is Your/i, # unsorted /\b(chinchilla|shit|marketing|finance|footwear|shoes|islamist|wholesale|sherbet|bankrupt|outfits|casinos?|surcharges?)\b/i, /(Plombier|Serrurier) paris/i, /\bcash (online|loan)/i, /\bcredit (card|check|repair)\b/i, ] def kw(title) @keywords.each do |re| if re =~ title return true end end return false end apcontinue = '' while not apcontinue.nil? do print "Searching...\n" data = mw.query(:list => :allpages, :aplimit => 200, :apcontinue => apcontinue) titles = data['query']['allpages'].select{|page| kw(page['title']) }.map{|page| page['title']} print "Deleting #{titles.length} articles...\n" if (titles.length > 0) mw.delete_by_title(titles, { :reason => 'Spam' }) end if data['query-continue'].nil? apcontinue = nil else apcontinue = data['query-continue']['allpages']['apcontinue'] end print "apcontinue = #{apcontinue.inspect}\n" end