mDescription = "Does something with a dump"; $this->addOption( 'file', 'File with text to run.', false, true ); $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true ); $this->addOption( 'from', 'Article from XML dump to start from.', false, true ); } public function execute() { if (! ( $this->hasOption('file') ^ $this->hasOption('dump') ) ) { $this->error("You must provide a file or dump", true); } $this->checkOptions(); if ( $this->hasOption('file') ) { $revision = new WikiRevision; $revision->setText( file_get_contents( $this->getOption( 'file' ) ) ); $revision->setTitle( Title::newFromText( rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) ) ); $this->handleRevision( $revision ); return; } $this->startTime = wfTime(); if ( $this->getOption('dump') == '-' ) { $source = new ImportStreamSource( $this->getStdin() ); } else { $this->error("Sorry, I don't support dump filenames yet. Use - and provide it on stdin on the meantime.", true); } $importer = new WikiImporter( $source ); $importer->setRevisionCallback( array( &$this, 'handleRevision' ) ); $this->from = $this->getOption( 'from', null ); $this->count = 0; $importer->doImport(); $this->conclusions(); $delta = wfTime() - $this->startTime; $this->error( "Done {$this->count} revisions in " . round($delta, 2) . " seconds " ); if ($delta > 0) $this->error( round($this->count / $delta, 2) . " pages/sec" ); # Perform the memory_get_peak_usage() when all the other data has been output so there's no damage if it dies. # It is only available since 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit) $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" ); } public function finalSetup() { parent::finalSetup(); if ( $this->getDbType() == Maintenance::DB_NONE ) { global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks; $wgUseDatabaseMessages = false; $wgLocalisationCacheConf['storeClass'] = 'LCStore_Null'; $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis'; } } static function disableInterwikis( $prefix, &$data ) { # Title::newFromText will check on each namespaced article if it's an interwiki. # We always answer that it is not. return false; } /** * Callback function for each revision, child classes should override * processRevision instead. * @param $rev Revision */ public function handleRevision( $rev ) { $title = $rev->getTitle(); if ( !$title ) { $this->error( "Got bogus revision with null title!" ); return; } $this->count++; if ( isset( $this->from ) ) { if ( $this->from != $title ) return; $this->output( "Skipped " . ($this->count - 1) . " pages\n" ); $this->count = 1; $this->from = null; } $this->processRevision( $rev ); } /* Stub function for processing additional options */ public function checkOptions() { return; } /* Stub function for giving data about what was computed */ public function conclusions() { return; } /* Core function which does whatever the maintenance script is designed to do */ abstract public function processRevision( $rev ); } class SearchDump extends DumpIterator { public function __construct() { parent::__construct(); $this->mDescription = "Runs a regex in the revisions from a dump"; $this->addOption( 'regex', 'Searching regex', true, true ); } public function getDbType() { return Maintenance::DB_NONE; } /** * @param $rev Revision */ public function processRevision( $rev ) { if ( preg_match( $this->getOption( 'regex' ), $rev->getText() ) ) { $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" ); } } } $maintClass = "SearchDump"; require_once( RUN_MAINTENANCE_IF_MAIN );