Benutzer:Drahflow's Bot/Source
Zur Navigation springen
Zur Suche springen
#!/usr/bin/perl use strict; use warnings; use MediaWiki::Bot; use Data::Dumper; use LWP; use LWP::UserAgent; use Encode; use URI::Escape; use Storable qw(nstore retrieve); use Carp; $SIG{__DIE__} = sub { confess; }; use encoding 'utf8'; my $wiki; my $lwp; my $WIKINAME = $ARGV[0] or die "usage: ./bot.pl <Wiki>"; my $conf; if($WIKINAME eq "AK") { $conf = { 'wiki' => { 'host' => 'wiki.vorratsdatenspeicherung.de', 'path' => '/' }, 'bot' => { 'user' => 'Drahflow\'s Bot', 'pass' => 'geheim' }, }; } elsif($WIKINAME eq "Piraten") { $conf = { 'wiki' => { 'host' => 'wiki.piratenpartei.de', 'path' => 'wiki/' }, 'bot' => { 'user' => 'Drahflow\'s Bot', 'pass' => 'geheim' }, }; } else { die "Unknown wiki: $WIKINAME"; } sub cycleConnection { $wiki->logout() if($wiki); $wiki = MediaWiki::Bot->new({ 'assert' => 'user', 'agent' => "Drahflow's Wiki Bot", 'protocol' => 'http', 'host' => $conf->{'wiki'}->{'host'}, 'path' => $conf->{'wiki'}->{'path'}, 'login_data' => { 'username' => $conf->{'bot'}->{'user'}, 'password' => $conf->{'bot'}->{'pass'} }, 'debug' => 1000, }) or die "Wiki init failed"; $lwp = LWP::UserAgent->new(); $lwp->agent("Drahflow's Wiki Bot"); } cycleConnection(); while(my $command = <STDIN>) { chomp $command; last if($command eq "q" or $command eq "quit"); print "len: " . length($command) . "\n"; my $force = 0; if ($command =~ /^!(.*)/) { $force = 1; $command = $1; } my $error; do { print "executing: $command\n"; eval { dumpContent($1) if($command =~ /^DUMP ([^|]*)$/); execTest() if($command eq 'TEST'); uploadSource() if($command eq 'CUPLOAD'); cleanupRedirect($1, $2, $3? 1: 0) if($command =~ /^CREDIR ([^|]*)\|?((?:del)?)\|?((?:auto)?)$/); cleanupDoubleRedirect() if($command =~ /^CDBLREDIR$/); checkout($1) if($command =~ /^MVOUT ([^|]*)$/); checkin($1, $2) if($command =~ /^MVIN ([^|]*)\|?([^|]*)$/); syncin($1, $2, $3) if($command =~ /^MVSYN ([^|]*)\|?([^|]*)\|?([^|]*)$/); storein($1, $2) if($command =~ /^MVSTORE ([^|]*)\|([^|]*)$/); copyout($1) if($command =~ /^GET ([^|]*)$/); masscopyout($1,$2) if($command =~ /^MGET ([^|]*)\|?((?:follow)?)$/); fetchCategoryMatrix($1, $2) if($command =~ /^GETC ([^|]*)\|(.*)$/); checkToDoUsage() if($command =~ /^QTODO$/); checkLanguageSync() if($command =~ /^QLANG$/); moveCategory($1, $2) if($command =~ /^CMV ([^|]*)\|?([^|]*)$/); addCategories($1, $2) if($command =~ /^CADD (.*)\|\|(.*)$/); putCategoryMatrix($1, $2, $3) if($command =~ /^CPUT ([^|]*)(?:\|([^|]+))?(?:\|(auto|ask))?$/); pirateBoardUpdate() if($command =~ /^CPP$/); }; $error = $@; print $error if $error } while ($error and $force); } $wiki->logout(); sub loadSure { my ($name, $mode) = @_; die "no mode given" unless $mode; my $content = $wiki->get_text($name); unless(defined $content) { die "could not load $name"; } print "Page $name loaded.\n"; return $content; } sub loadCategorySure { my ($name) = @_; unless($name =~ /Kategorie:|Category:/) { die "category name must be given with prefix"; } my $req = HTTP::Request->new( 'GET' => 'http://' . $conf->{'wiki'}->{'host'} . '/' . $name . '?redirect=no'); my $res = $lwp->request($req); if(not $res->is_success()) { die "could not load $name"; } my ($subcatsPart) = $res->content() =~ /\n<h2>Unterkategorien(.*?)\n<h2/s; my ($articlesPart) = $res->content() =~ /\n<h2>Seiten in der Kategorie(.*?)\nVon/s; my $subcats = []; my $articles = []; while(defined $subcatsPart and $subcatsPart =~ s/.*?<a href="\/([^"]+)" title="([^"]+)">//) { push @$subcats, $2; } while(defined $articlesPart and $articlesPart =~ s/.*?<a href="\/([^"]+)" title="([^"]+)">//) { push @$articles, $2; } print "Category $name loaded.\n"; return $articles, $subcats; } sub looksOffLimit { my ($name) = @_; my $result = ( $name =~ /Ortsgruppe/ or $name =~ /^Benutzer/ or $name =~ /^Presse:/ ); } sub saveSure { my ($name, $text, $summary, $minor) = @_; if(looksOffLimit($name)) { askConfirmation("Page " . $name . " looks like it should be left alone"); } die "no summary given" unless $summary; $wiki->edit({ 'page' => $name, 'text' => $text, 'summary' => $summary, 'is_minor' => $minor? 1: 0, }) or die "could not save " . $name; print "Page " . $name . " saved.\n"; } sub askConfirmation { my ($message) = @_; while(1) { print "==> $message, continue [N/y]\n"; my $answer = <STDIN>; chomp $answer; if($answer eq '' or $answer eq 'n') { die "User confirmation failed."; } if($answer eq 'y') { last; } } } sub dumpContent { my ($name) = @_; die "no name given" unless $name; my $text = loadSure($name, "r"); print Dumper($text); } sub execTest { my $text = loadSure('Benutzer:Drahflow/Sandkasten', "rw"); print Dumper($text); saveSure( 'Benutzer:Drahflow/Sandkasten', $text . 'Minimaler Testlauf', 'Testing [[Benutzer:Drahflow]]\'s Bot', ); } sub uploadSource { open SRC, '<', 'bot.pl' or die "cannot open bot.pl: $!"; my $text = ' <no' . 'wiki>' . (join '', map { s/'pass' => 'geheim']*'/'pass' => 'geheim'/; $_ } <SRC>) . '</no' . 'wiki>'; close SRC; saveSure( 'Benutzer:Drahflow\'s Bot/Source', $text, '[[Benutzer:Drahflow\'s Bot|Mich selbst]] hochladen' ); } sub fetchIncoming { my ($name) = @_; my @incoming = map { $_->{'title'} } $wiki->what_links_here($name); print Dumper(\@incoming); return @incoming; } sub cleanupRedirect { my ($name, $del, $auto) = @_; die "no name given" unless $name; my $content = loadSure($name, "r"); $content =~ m!#(?:redirect|weiterleitung):? ?\[\[([^\]|]+)( ?\|([^\]]+))?\]\]!i or die "could not find redirect"; my ($redirect, undef, $redirectDisplay) = ($1, $2, $3); $redirectDisplay = $name unless defined $redirectDisplay; $redirectDisplay =~ s/_/ /g; print "Redirect to: $redirect, Display: $redirectDisplay\n"; my $fix = sub { my ($in) = @_; return if($auto and looksOffLimit($in)); my $content = loadSure($in, "rw"); my $any = 0; my $mask = $name; $mask =~ s/[ _]/[ _]/g; $mask =~ s/\(/\\(/g; $mask =~ s/\)/\\)/g; $mask =~ s/\\/\\\\/g; while($content =~ m!\[\[$mask(#[^ ]*)?( ?\|([^\]]+))?\]\]!s) { my ($anchor, undef, $display) = ($1, $2, $3); if(not defined $anchor) { $anchor = ''; } if(not defined $display) { $display = $redirectDisplay; } print "Displayname: $display\n"; $content =~ s!\[\[$mask(#[^ ]*)?( ?(\|[^\]]+)?)\]\]![[$redirect$anchor\|$display]]!; print "Link on $in fixed.\n"; ++$any; } if($any) { unless($auto) { askConfirmation("Page $in will be saved"); } saveSure( $in, $content, "Weiterleitungs-Cleanup, Link von $name auf $redirect verbogen" ); } else { if($auto) { warn "incoming link not found" if(not $any); } else { die "incoming link not found" if(not $any); } } }; my @incoming = fetchIncoming($name); my $templateFixed = 0; foreach my $in ( sort { looksOffLimit($a) <=> looksOffLimit($b) } grep { $_ =~ /^Vorlage:/ } @incoming) { &$fix($in); $templateFixed = 1; } if($templateFixed) { cycleConnection(); @incoming = fetchIncoming($name); } foreach my $in ( sort { looksOffLimit($a) <=> looksOffLimit($b) } grep { $_ !~ /^Vorlage:/ } @incoming) { &$fix($in); } if($del) { $content = loadSure($name, "rw"); if($content =~ m!^#redirect ?\[\[$redirect\]\]$!si) { $content .= "\n{{Vorlage:Drahflow/Löschen/Weiterleitung}}"; print "Inserted deletion remark.\n"; } askConfirmation("Page $name will be saved"); saveSure( $name, $content, "Weiterleitungs-Cleanup, Weiterleitung zum Löschen eingetragen"); } print "Done.\n"; } sub cleanupDoubleRedirect { my $req = HTTP::Request->new( 'GET' => 'http://' . $conf->{'wiki'}->{'host'} . '/Spezial:Doppelte_Weiterleitungen'); my $res = $lwp->request($req); if(not $res->is_success()) { die "could not load Spezial:Doppelte_Weiterleitungen"; } my @names = map { uri_unescape($_) } map { $_ =~ qr{.*<li><a href="/index\.php\?title=([^"]+)&redirect=no".*?→.*?<a href="/index\.php\?title=([^"]+)&redirect=no".*}; ($1, $2) } grep { /^<li>/ } split /\n/, $res->content(); for(my $i = 0; $i < @names; ++$i) { eval { cleanupRedirect($names[$i], 0, 1); }; warn if($@); print "($i / " . scalar(@names) . ")\n"; } print "Done.\n"; } sub sanitizeFilename { my ($name) = @_; $name =~ s/-/--/g; $name =~ s!/!-+!g; return "checkout/$name"; } sub checkout { my ($name) = @_; die "no name given" unless $name; my $content = loadSure($name, "rw"); if($content =~ /{{ *InArbeit/ or $content =~ /{{ *Vorlage: *InArbeit/) { askConfirmation("Page $name is tagged with {{Vorlage:InArbeit}}"); } my $origContent = $content; $content = "{{Vorlage:InArbeit|[[Benutzer:Drahflow]]}}\n" . $content; saveSure($name, $content, "{{:Vorlage:InArbeit}} gesetzt", 1); my $filename = sanitizeFilename($name); open PAGE, '>:utf8', $filename or die "cannot open $filename: $!"; print PAGE $origContent; close PAGE; print "Done.\n"; } sub copyout { my ($name) = @_; die "no name given" unless $name; my $content = loadSure($name, "rw"); my $filename = sanitizeFilename($name); open PAGE, '>:utf8', $filename or die "cannot open $filename: $!"; print PAGE $content; close PAGE; print "Done.\n"; } sub masscopyout { my ($filename,$follow) = @_; die "no filename given" unless $filename; my @failures; open LIST, '<:utf8', $filename or die "cannot open $filename: $!"; my $i = 0; while(my $name = <LIST>) { chomp $name; if($name =~ /%/) { $name = decode('utf8', encode('utf8', uri_unescape(encode('utf8', $name)))); } eval { my $content = loadSure($name, "r"); if($follow and $content =~ m!#(?:redirect|weiterleitung):? ?\[\[([^\]|]+)( ?\|([^\]]+))?\]\]!i) { $name = $1; if($name =~ /%/) { $name = decode('utf8', encode('utf8', uri_unescape(encode('utf8', $name)))); } $content = loadSure($name, "r"); } my $filename = sanitizeFilename(sprintf("%06d", $i)); open PAGE, '>:utf8', $filename or die "cannot open $filename: $!"; print PAGE $content; close PAGE; }; if($@) { push @failures, [$i, $name]; } $i++; sleep 3; } close LIST; print "Failures:\n"; foreach my $fail (@failures) { printf "%06d: %s\n", @$fail; } print "Done.\n"; } sub checkin { my ($name, $reason) = @_; die "no name given" unless $name; die "no reason given" unless $reason; my $filename = sanitizeFilename($name); open PAGE, '<:utf8', $filename or die "cannot open $filename: $!"; my $origContent = join('', <PAGE>); close PAGE; my $content = loadSure($name, "rw"); if($content !~ /^{{Vorlage:InArbeit|\[\[Benutzer:Drahflow\]\]}}/s) { askConfirmation("Page $name is not tagged as being edited by you"); } saveSure($name, $origContent, $reason); unlink $filename; print "Done.\n"; } sub syncin { my ($filename, $name, $reason) = @_; die "no file given" unless $filename; die "no name given" unless $name; die "no reason given" unless $reason; while(1) { eval { open PAGE, '<:utf8', $filename or die "cannot open $filename: $!"; my $origContent = join('', <PAGE>); close PAGE; saveSure($name, $origContent, $reason); print "Synced.\n"; }; if($@) { print $@; } sleep 15; } } sub storein { my ($filename, $reason) = @_; die "no file given" unless $filename; die "no reason given" unless $reason; my $data = retrieve($filename) or die "could not retrieve from $filename: $!"; foreach my $d (@$data) { eval { saveSure($d->{'wiki_location'}, $d->{'wiki_content'}, $reason); print "Stored " . $d->{'wiki_location'} . "\n"; }; if($@) { print $@; } } } sub getTemplateUsers { my ($name) = @_; return map { $_->{'title'} } $wiki->list_transclusions($name); } sub checkToDoUsage { my @users = getTemplateUsers("Vorlage:ToDo"); my @problems; foreach my $user (@users) { my $content = loadSure($user, "r"); if($content =~ /{{Vorlage: *ToDo/) { push @problems, $user; } } foreach my $user (@problems) { print "Problematic usage: $user\n"; } print "Done.\n"; } sub moveCategory { my ($from, $to) = @_; die "no from category given" unless $from; die "no to category given" unless $to; my ($articles, $subcats) = loadCategorySure($from); my @problems; foreach my $entry (@$articles, @$subcats) { my $content = loadSure($entry, "rw"); my $success = 0; if($content =~ /\[\[$to(?:\|[^\]|]*)?\]\]/) { if($content =~ s/\[\[$from(?:\|[^\]|]*)?\]\]//) { $success = 1; } } else { if($content =~ s/\[\[$from((?:\|[^\]|]*)?)\]\]/[[$to$1]]/) { $success = 1; } } if($success) { saveSure( $entry, $content, "Kategorie-Umbenennung, von $from nach $to"); } else { push @problems, $entry; } } foreach my $entry (@problems) { print "Problematic usage: $entry\n"; } print "Done.\n"; } sub fetchCategoryMatrix { my ($name, $categories) = @_; my @categories = split /\|/, $categories; die "no category given" unless @categories; my %articles; my $filename = sanitizeFilename($name); open OUTPUT, '>:utf8', $filename or die "cannot open $filename: $!"; foreach my $cat (@categories) { my ($articles, $subcats) = loadCategorySure($cat); print OUTPUT "$cat\n"; foreach my $article (@$articles, @$subcats) { $articles{$article}->{$cat} = 1; } } print OUTPUT "\n"; foreach my $article (sort keys %articles) { foreach my $cat (@categories) { print OUTPUT $articles{$article}->{$cat}? "x ": " "; } print OUTPUT " " . $article . "\n"; } close OUTPUT; print "Done.\n"; } sub putCategoryMatrix { my ($name, $reason, $mode) = @_; die "no reason has been given" unless $reason; my $filename = sanitizeFilename($name); my @categories; open INPUT, '<:utf8', $filename or die "cannot open $filename: $!"; while(my $line = <INPUT>) { chomp $line; last if($line eq ""); push @categories, $line; } my %articlesNew; while(my $line = <INPUT>) { chomp $line; my $article = substr($line, 1 + 2 * @categories); my $i = 0; foreach my $cat (@categories) { $articlesNew{$article}->{$cat} = (substr($line, $i, 1) eq 'x'); $i += 2; } } close INPUT; my %articlesCurrent; foreach my $cat (@categories) { my ($articles, $subcats) = loadCategorySure($cat); foreach my $article (@$articles, @$subcats) { $articlesCurrent{$article}->{$cat} = 1; } } my @problems; # only articles mentioned in the file are touched foreach my $article (sort keys %articlesNew) { my @toInsert; my @toDelete; # only categories mentioned in the file are touched foreach my $newCat (@categories) { next unless $articlesNew{$article}->{$newCat}; push @toInsert, $newCat unless $articlesCurrent{$article}->{$newCat}; } # only categories mentioned in the file are touched foreach my $curCat (@categories) { unless($articlesCurrent{$article}->{$curCat}) { next; } push @toDelete, $curCat unless $articlesNew{$article}->{$curCat}; } if(@toInsert or @toDelete) { print "Modifying $article\n" . " Insert: @toInsert\n" . " Delete: @toDelete\n"; if(defined $mode) { if($mode eq 'ask') { askConfirmation("$article will be changed as above"); } my $content = loadSure($article, "rw"); my $success = 1; my $catDisplay = undef; foreach my $cat (@toDelete) { my $catRegex = $cat; $catRegex =~ s/ /[ _]/g; if($content =~ s/\n\[\[$catRegex((?:\|[^\]|]*)?)\]\]\n/\n/s) { $catDisplay = substr($1, 1); } elsif ($content =~ s/\[\[$catRegex((?:\|[^\]|]*)?)\]\]//) { $catDisplay = substr($1, 1); } else { push @problems, "$article <-> $cat\n"; $success = 0; } } foreach my $cat (@toInsert) { unless($content =~ /\[\[$cat((?:\|[^\]|]*)?)\]\]/) { unless($content =~ /\n$/s) { $content .= "\n"; } if(defined $catDisplay) { $content .= "[[$cat|$catDisplay]]"; } else { $content .= "[[$cat]]"; } } } if($success) { saveSure($article, $content, "Kategorie-Zuordnungs-Upload, $reason"); } } } } foreach my $entry (@problems) { print "Problematic usage: $entry\n"; } } sub checkLanguageSync { my %users = map { ($_, $_) } getTemplateUsers("Vorlage:Mehrsprachig"); my @problems; while(%users) { my ($first) = keys %users; my $content = loadSure($first, "r"); unless($content =~ /{{(Vorlage:)? *Mehrsprachig\b(.*)}}/s) { delete $users{$first}; push @problems, "Could not find template call: $first"; next; } my $parameters = $2; unless($parameters =~ /\bsynchronisiert *= *1/) { delete $users{$first}; next; } my @otherPages; push @otherPages, $1 if($parameters =~ /\bde *= *([^|{}]*)/s); push @otherPages, $1 if($parameters =~ /\ben *= *([^|{}]*)/s); @otherPages = grep { $_ ne $first } map { chomp; $_ } @otherPages; if(@otherPages < 1) { delete $users{$first}; push @problems, "Synchronization group of less than 2 on $first"; next; } OTHERS: foreach my $other (@otherPages) { my @firstLines = split /\n/, $content; my $otherContent = loadSure($other, "r"); unless($otherContent =~ /{{(Vorlage:)? *Mehrsprachig\b(.*)}}/s) { push @problems, "Could not find template call: $other"; next; } my $parameters = $2; unless($parameters =~ /\bsynchronisiert *= *1/) { next; } my @otherLines = split /\n/, $otherContent; if(@firstLines != @otherLines) { push @problems, "Line counts differ between $first and $other"; last OTHERS; } @firstLines = map { length($_)? 1: 0; } @firstLines; @otherLines = map { length($_)? 1: 0; } @otherLines; for(my $i = 0; $i < @firstLines; ++$i) { if($firstLines[$i] ne $otherLines[$i]) { push @problems, "Line " . ($i + 1) . " differs between $first and $other"; last OTHERS; } } } delete $users{$first}; foreach my $name (@otherPages) { delete $users{$name}; } } foreach my $entry (@problems) { print "$entry\n"; } print "Done.\n"; } sub addCategories { my ($categories, $names) = @_; my @categories = split(/\|/, $categories); my @names = split(/\|/, $names); die "no categories given" unless @categories; die "no pages given" unless @names; my %pagesInCat; foreach my $cat (@categories) { my $correctPages = loadCategorySure($cat); $pagesInCat{$cat} = $correctPages; } foreach my $name (@names) { my $content = loadSure($name, "rw"); my $changes = 0; foreach my $cat (@categories) { next if(grep { $_ eq $name } @{$pagesInCat{$cat}}); $content .= "\n[[$cat]]"; print "$cat added.\n"; $changes = 1; } if($changes) { saveSure( $name, $content, "Kategorie hinzugefügt"); } } print "Done.\n"; } sub pirateBoardUpdate { my @files = glob("/home/drahflow/piraten/vorstand/*-protokoll"); my %decisions; foreach my $file (@files) { open FILE, '<:utf8', $file or die "cannot read $file: $!"; my $lastLine; while(my $line = <FILE>) { chomp $line; if($line =~ /(#\d\d\d\d-\d\d-\d\d.\d+)/) { my $decision = $1; if(length($line) < 80) { $line = $lastLine . $line; } $line =~ s/$decision//g; $decisions{$decision} = $line; print $line, "\n"; } $lastLine = $line; } close FILE; } my $content = loadSure("Landesverband_Niedersachsen/Vorstand/Beschlüsse", "rw"); foreach my $decision (sort keys %decisions) { next if($content =~ $decision); $content .= <<EOWIKI; '''$decision''': ''Unkategorisiert'': $decisions{$decision} EOWIKI } saveSure( "Landesverband_Niedersachsen/Vorstand/Beschlüsse", $content, "Update der Vorstandsbeschlüsse"); $content = <<EOWIKI; [[Kategorie:Landesverband Niedersachsen]] '''Achtung''': Die Inhalte dieser Seite halte [[Benutzer:Drahflow|ich]] auf meinem eigenen Rechner autorativ. Änderungen werden nach der nächsten Vorstandstelko unbesehen gelöscht. EOWIKI my $name; open FILE, '<:utf8', '/home/drahflow/piraten/vorstand/todo' or die "open fail: ~/piraten/vorstand/todo: $!"; while(my $line = <FILE>) { chomp $line; if($line =~ /^([A-Z].*)$/) { if(defined $name) { $content .= "}}\n"; } $name = $1; $content .= "{{Kasten grau|$name|\n"; } elsif($line =~ /^ +(.*)$/) { my $task = $1; $content .= "* $task\n"; } } if(defined $name) { $content .= "}}\n"; } close FILE; saveSure( "Landesverband_Niedersachsen/Vorstand/TODO", $content, "Update der VorstandsTODOs"); print "Done.\n"; } #TODO: Something within a directory which is also a name of a category should # belong to said category #TODO: Nothing should belong to a category and also directly to some category # above it. #TODO: Everything should have a category #TODO: Categories should not be cyclic