User:AnomieBOT/source/tasks/TemplateSubster/Base.pm
Appearance
package tasks::TemplateSubster::Base;
use utf8;
use strict;
use Data::Dumper;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
sub new {
my $class=shift;
my $self=$class->SUPER::new();
$self->{'deferred'}=0;
$self->{'curtitle'}=undef;
$self->{'ei iter'}=undef;
bless $self, $class;
return $self;
}
# Process a set of templates. Params:
# - $api: AnomieBOT::API
# - $process: Hash mapping templates to process to their status bitmaps.
# Values may be modified during processing.
# - $r: Hash mapping redirect names to template names
# - $endtime: Timestamp at which to return to allow other tasks a chance
# Returns the value to return from run().
sub process {
my ($self, $api, $process, $r, $endtime) = @_;
my @process = sort keys %$process;
if(defined($self->{'curtitle'})) {
$api->debug( 2, "Skipping templates before $self->{'curtitle'}" );
@process = grep { $_ ge $self->{'curtitle'} } @process;
if ( !@process || $self->{'curtitle'} ne $process[0] ) {
$self->{'curtitle'} = $process[0] // undef;
$self->{'ei iter'} = undef;
if ( !defined( $self->{'curtitle'} ) ) { # Err...
$api->debug( 2, "Nothing? Will continue." );
$self->{'deferred'} = 0;
return 0;
}
}
} else {
$self->{'curtitle'} = $process[0];
$self->{'ei iter'} = undef;
}
my $checkEnd = 0;
while(defined($self->{'curtitle'})){
if(!defined($self->{'ei iter'})){
$api->debug( 2, "Starting processing of $self->{'curtitle'}" );
$self->{'ei iter'}=$api->iterator(
generator => 'embeddedin',
geititle => $self->{'curtitle'},
geilimit => '100',
prop => 'info',
);
$process->{$self->{'curtitle'}} = 0;
} else {
$api->debug( 2, "Continuing processing of $self->{'curtitle'}" );
}
while($_=$self->{'ei iter'}->next){
return 0 if $api->halting;
if(!$_->{'_ok_'}){
$api->warn("Failed to retrieve transclusions for $self->{curtitle}: ".$_->{'error'}."\n");
return 60;
}
my $title=$_->{'title'};
# Can't edit user js or css
if($_->{'ns'}==2 && $title=~/\.(?:js|css)$/){
$process->{$self->{'curtitle'}} |= 0x01;
next;
}
# Can't edit Mediawiki namespace either
if($_->{'ns'}==8){
$process->{$self->{'curtitle'}} |= 0x01;
next;
}
# Skip if we checked this revision already
my $revid=$_->{'lastrevid'};
my $key=$self->{'curtitle'}."|$title";
my $tried = $api->store->{$key} // [ 0, 0 ];
if ( ref($tried) eq 'ARRAY' && $tried->[0] == $revid ) {
$process->{$self->{'curtitle'}} |= $tried->[1];
next;
}
# Did we run out of time?
if ( $checkEnd ) {
shift @process;
$self->{'curtitle'} = $process[0] // undef;
$self->{'ei iter'} = undef;
$self->{'deferred'} = defined( $self->{'curtitle'} ) ? 1 : 0;
$api->debug( 2, "Ran out of time, will continue with the following template." );
return 0;
}
# Ok, check the page
my $tok=$api->edittoken($title, EditRedir=>1);
$revid=$tok->{'lastrevid'} // $revid; # In case MW somehow returned an older revision than it did earlier, use the rev in the actual token.
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
# Clear iterators so a restart actually restarts
$self->{'ei iter'} = undef;
$self->{'curtitle'} = undef;
return 300;
}
if($tok->{'code'} eq 'pageprotected'){
# Don't worry about protected pages, just mark them and continue
$process->{$self->{'curtitle'}} |= 0x01;
$api->store->{$key} = [ $revid, 0x01 ];
next;
}
if($tok->{'code'} eq 'botexcluded'){
# Don't retry on bot exclusion either
$api->warn("TemplateSubster excluded from $title: ".$tok->{'error'}."\n");
$process->{$self->{'curtitle'}} |= 0x02;
$api->store->{$key} = [ $revid, 0x02 ];
next;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
$process->{$self->{'curtitle'}} |= 0x4000;
next;
}
next if exists($tok->{'missing'});
# Get page text
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
# Perform the removal
my %remv=();
my $fail=0;
my $outtxt=$api->process_templates($intxt, sub {
return undef if $fail;
my $name=shift;
my $params=shift;
my $wikitext=shift;
my $data=shift;
my $oname=shift;
my $nl=shift;
return undef unless exists($r->{"Template:$name"}) || exists($r->{$name});
foreach ($api->process_paramlist(@$params)){
if ($_->{'name'}=~/^\s*(?:nosubst|demo)\s*$/) {
$process->{$self->{'curtitle'}} |= 0x04;
return undef;
}
}
my ($ret, $fatal) = $self->do_subst($api, $title, $oname, $name, $wikitext, $nl);
$fail = 1 if $fatal;
$remv{$name}=1 if defined( $ret );
return $ret;
});
if($fail) {
$process->{$self->{'curtitle'}} |= 0x4000;
return 60;
}
# Need to edit?
if(%remv){
my @remv=sort keys %remv;
my $summary=$self->summary( $api, @remv );
$api->log( "$summary in $title" );
my $res2=$api->edit($tok, $outtxt, $summary, 1, 1);
if($res2->{'code'} ne 'success'){
$api->warn("Write failed on $title: ".$res2->{'error'}."\n");
$process->{$self->{'curtitle'}} |= 0x4000;
next;
}
$revid=$res2->{'edit'}{'newrevid'};
}
# Check whether the edit (or lack thereof) actually removed all transclusions of the template
my $res2=$api->query( action => 'parse', oldid => $revid, prop => 'templates', formatversion => 2 );
if($res2->{'code'} eq 'success') {
my $flag = ( grep { $_->{'title'} eq $self->{'curtitle'} } @{$res2->{'parse'}{'templates'}} ) ? 0x08 : 0;
$process->{$self->{'curtitle'}} |= $flag;
$api->store->{$key} = [ $revid, $flag ];
} else {
# Err? Just re-check it later.
$api->warn( "Failed to parse $title (rev $revid): " . $res2->{'error'} . "\n" );
$process->{$self->{'curtitle'}} |= 0x4000;
$api->store->{$key} = [ 0, 0 ];
}
# If we've been at it long enough, let another task have a go. Set
# a flag here and exit once we know if we need to set the
# 'deferred' flag or not.
if ( time()>=$endtime ) {
$checkEnd = 1;
}
}
$api->debug( 2, "Finished processing of $self->{'curtitle'}" );
$process->{$self->{'curtitle'}} |= 0x8000;
shift @process;
$self->{'curtitle'} = $process[0] // undef;
$self->{'ei iter'} = undef;
if ( $checkEnd ) {
$self->{'deferred'} = 0 if !defined( $self->{'curtitle'} );
$api->debug( 2, "Ran out of time, will continue." );
return 0;
}
}
# If we deferred any during this go-round, do another right away.
if ( $self->{'deferred'} ) {
$self->{'deferred'} = 0;
$api->debug( 2, "Finished list, but deferred. Will continue." );
return 0;
}
# No more pages to check.
$api->debug( 2, "No more pages to check, sleeping" );
return 3600;
}
# Generate an edit summary for the removal
sub summary {
my ($self, $api, @remv) = @_;
die "You must override summary()";
}
# Call this somewhere near the start of run().
sub fetchSig {
my ($self, $api) = @_;
if(!exists($self->{'sig'})){
my $res=$api->query(action=>'parse', text=>"~\x7e~", pst=>1, onlypst=>1, 'contentmodel'=>'wikitext');
if($res->{'code'} ne 'success'){
$api->warn("Failed to load bot sig: ".$res->{'error'}."\n");
return 60;
}
$self->{'sig'}=$res->{'parse'}{'text'}{'*'};
}
return undef;
}
sub do_subst {
my ($self,$api,$title,$oname,$name,$txt,$nl)=@_;
my $bot=$api->user;
my $sig=$self->{'sig'};
my $botr=$bot;
$botr=~s/(.)/ sprintf("&#%d;",ord($1)) /ge;
my $itxt=$txt;
$itxt=~s/^\{\{\Q$oname\E/{{subst:$name/;
if ( $itxt eq $txt ) {
$api->warn("Huh, \$txt doesn't begin with {{\$oname?\noname = $oname\ntxt = $txt\n");
return (undef, 0);
}
$itxt=~s/\}\}$/|subst=subst:}}/;
$itxt=~s/\Q$bot\E/$botr/g;
$itxt="T14974\n$itxt" if $nl; # Work around T14974
my $res=$api->query(action=>"parse", text=>$itxt, title=>$title, pst=>1, onlypst=>1);
if($res->{'code'} ne 'success'){
$api->warn("Failed to expand template: ".$res->{'error'}."\n");
return (undef, 1);
}
my $otxt=$res->{'parse'}{'text'}{'*'};
$otxt=substr($otxt,7) if $nl;
if($otxt =~ /^\{\{subst:/ ) {
my $err = $otxt;
$err =~ s/\|.*/|.../s;
$api->warn("Template didn't subst: $err\n");
return (undef, 0);
}
if($otxt=~/\Q$bot\E/){
my %q=(
titles => $title,
prop => 'revisions',
rvprop => 'user',
rvlimit => 1,
);
my $u='';
do {
$res=$api->query(%q);
if($res->{'code'} ne 'success'){
$api->warn("Failed to fetch revisions for $title: ".$res->{'error'}."\n");
return (undef, 1);
}
if(exists($res->{'query-continue'}{'revisions'}{'rvcontinue'})){
$q{'rvcontinue'}=$res->{'query-continue'}{'revisions'}{'rvcontinue'};
$q{'rvprop'}='user|content';
$q{'rvslots'}='main';
} else {
delete $q{'rvcontinue'};
}
$res=(values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0];
if(!exists($res->{'slots'}{'main'}{'*'}) || $res->{'slots'}{'main'}{'*'}=~/\Q$txt\E/){
$u=$res->{'user'};
} else {
delete $q{'rvcontinue'};
}
} while(exists($q{'rvcontinue'}));
# Signatures
$otxt=~s/\Q$sig\E/[[User:$u]] ([[User talk:$u|talk]])/g;
# Try to handle User links inside URLs. Not perfect, but the best we
# can do in the situation.
my $eu = $u;
$eu =~ s/ /_/g;
$eu =~ s/([%"&])/ sprintf("%%X", ord($1)) /ge;
my $tmp;
do {
$tmp = $otxt;
$otxt=~s!((?:\[|https?:)//[^][<>"\x00-\x20\x7F\p{Zs}] )\Q$bot\E!$1$eu!g;
} while ( $tmp ne $otxt );
# Other usename mentions
$otxt=~s/\Q$bot\E/$u/g;
}
$otxt=~s/\Q$botr\E/$bot/g;
$botr=~s/&/&/g;
$botr=~s/#/#/g;
$botr=~s/;/;/g;
$otxt=~s/\Q$botr\E/$bot/g;
return ($otxt, 0);
}
1;