summaryrefslogtreecommitdiff
path: root/maintenance/importDump.php
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/importDump.php')
-rw-r--r--maintenance/importDump.php162
1 files changed, 121 insertions, 41 deletions
diff --git a/maintenance/importDump.php b/maintenance/importDump.php
index 5f47635e..c160b036 100644
--- a/maintenance/importDump.php
+++ b/maintenance/importDump.php
@@ -22,24 +22,108 @@
* @ingroup Maintenance
*/
-$optionsWithArgs = array( 'report' );
-
-require_once( dirname( __FILE__ ) . '/commandLine.inc' );
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
/**
* @ingroup Maintenance
*/
-class BackupReader {
+class BackupReader extends Maintenance {
var $reportingInterval = 100;
- var $reporting = true;
var $pageCount = 0;
var $revCount = 0;
var $dryRun = false;
- var $debug = false;
var $uploads = false;
+ var $imageBasePath = false;
+ var $nsFilter = false;
function __construct() {
+ parent::__construct();
+ $gz = in_array('compress.zlib', stream_get_wrappers()) ? 'ok' : '(disabled; requires PHP zlib module)';
+ $bz2 = in_array('compress.bzip2', stream_get_wrappers()) ? 'ok' : '(disabled; requires PHP bzip2 module)';
+
+ $this->mDescription = <<<TEXT
+This script reads pages from an XML file as produced from Special:Export or
+dumpBackup.php, and saves them into the current wiki.
+
+Compressed XML files may be read directly:
+ .gz $gz
+ .bz2 $bz2
+ .7z (if 7za executable is in PATH)
+
+Note that for very large data sets, importDump.php may be slow; there are
+alternate methods which can be much faster for full site restoration:
+<http://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
+TEXT;
$this->stderr = fopen( "php://stderr", "wt" );
+ $this->addOption( 'report',
+ 'Report position and speed after every n pages processed', false, true );
+ $this->addOption( 'namespaces',
+ 'Import only the pages from namespaces belonging to the list of ' .
+ 'pipe-separated namespace names or namespace indexes', false, true );
+ $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
+ $this->addOption( 'debug', 'Output extra verbose debug information' );
+ $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
+ $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
+ $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
+ }
+
+ public function execute() {
+ if( wfReadOnly() ) {
+ $this->error( "Wiki is in read-only mode; you'll need to disable it for import to work.", true );
+ }
+
+ $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
+ $this->dryRun = $this->hasOption( 'dry-run' );
+ $this->uploads = $this->hasOption( 'uploads' ); // experimental!
+ if ( $this->hasOption( 'image-base-path' ) ) {
+ $this->imageBasePath = $this->getOption( 'image-base-path' );
+ }
+ if ( $this->hasOption( 'namespaces' ) ) {
+ $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
+ }
+
+ if( $this->hasArg() ) {
+ $this->importFromFile( $this->getArg() );
+ } else {
+ $this->importFromStdin();
+ }
+
+ $this->output( "Done!\n" );
+ $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges\n" );
+ }
+
+ function setNsfilter( array $namespaces ) {
+ if ( count( $namespaces ) == 0 ) {
+ $this->nsFilter = false;
+ return;
+ }
+ $this->nsFilter = array_unique( array_map( array( $this, 'getNsIndex' ), $namespaces ) );
+ }
+
+ private function getNsIndex( $namespace ) {
+ global $wgContLang;
+ if ( ( $result = $wgContLang->getNsIndex( $namespace ) ) !== false ) {
+ return $result;
+ }
+ $ns = intval( $namespace );
+ if ( strval( $ns ) === $namespace && $wgContLang->getNsText( $ns ) !== false ) {
+ return $ns;
+ }
+ $this->error( "Unknown namespace text / index specified: $namespace", true );
+ }
+
+ private function skippedNamespace( $obj ) {
+ if ( $obj instanceof Title ) {
+ $ns = $obj->getNamespace();
+ } elseif ( $obj instanceof Revision ) {
+ $ns = $obj->getTitle()->getNamespace();
+ } elseif ( $obj instanceof WikiRevision ) {
+ $ns = $obj->title->getNamespace();
+ } else {
+ echo wfBacktrace();
+ $this->error( "Cannot get namespace of object in " . __METHOD__, true );
+ }
+ return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
}
function reportPage( $page ) {
@@ -53,6 +137,10 @@ class BackupReader {
return;
}
+ if ( $this->skippedNamespace( $title ) ) {
+ return;
+ }
+
$this->revCount++;
$this->report();
@@ -63,6 +151,9 @@ class BackupReader {
function handleUpload( $revision ) {
if ( $this->uploads ) {
+ if ( $this->skippedNamespace( $revision ) ) {
+ return;
+ }
$this->uploadCount++;
// $this->report();
$this->progress( "upload: " . $revision->getFilename() );
@@ -77,6 +168,9 @@ class BackupReader {
}
function handleLogItem( $rev ) {
+ if ( $this->skippedNamespace( $rev ) ) {
+ return;
+ }
$this->revCount++;
$this->report();
@@ -92,7 +186,7 @@ class BackupReader {
}
function showReport() {
- if ( $this->reporting ) {
+ if ( $this->mQuiet ) {
$delta = wfTime() - $this->startTime;
if ( $delta ) {
$rate = sprintf( "%.2f", $this->pageCount / $delta );
@@ -102,12 +196,15 @@ class BackupReader {
$revrate = '-';
}
# Logs dumps don't have page tallies
- if ( $this->pageCount )
+ if ( $this->pageCount ) {
$this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
- else
+ } else {
$this->progress( "$this->revCount ($revrate revs/sec)" );
+ }
}
- wfWaitForSlaves( 5 );
+ wfWaitForSlaves();
+ // XXX: Don't let deferred jobs array get absurdly large (bug 24375)
+ wfDoUpdates( 'commit' );
}
function progress( $string ) {
@@ -131,6 +228,9 @@ class BackupReader {
function importFromStdin() {
$file = fopen( 'php://stdin', 'rt' );
+ if( posix_isatty( $file ) ) {
+ $this->maybeHelp( true );
+ }
return $this->importFromHandle( $file );
}
@@ -140,7 +240,9 @@ class BackupReader {
$source = new ImportStreamSource( $handle );
$importer = new WikiImporter( $source );
- $importer->setDebug( $this->debug );
+ if( $this->hasOption( 'debug' ) ) {
+ $importer->setDebug( true );
+ }
$importer->setPageCallback( array( &$this, 'reportPage' ) );
$this->importCallback = $importer->setRevisionCallback(
array( &$this, 'handleRevision' ) );
@@ -148,6 +250,12 @@ class BackupReader {
array( &$this, 'handleUpload' ) );
$this->logItemCallback = $importer->setLogItemCallback(
array( &$this, 'handleLogItem' ) );
+ if ( $this->uploads ) {
+ $importer->setImportUploads( true );
+ }
+ if ( $this->imageBasePath ) {
+ $importer->setImageBasePath( $this->imageBasePath );
+ }
if ( $this->dryRun ) {
$importer->setPageOutCallback( null );
@@ -157,33 +265,5 @@ class BackupReader {
}
}
-if ( wfReadOnly() ) {
- wfDie( "Wiki is in read-only mode; you'll need to disable it for import to work.\n" );
-}
-
-$reader = new BackupReader();
-if ( isset( $options['quiet'] ) ) {
- $reader->reporting = false;
-}
-if ( isset( $options['report'] ) ) {
- $reader->reportingInterval = intval( $options['report'] );
-}
-if ( isset( $options['dry-run'] ) ) {
- $reader->dryRun = true;
-}
-if ( isset( $options['debug'] ) ) {
- $reader->debug = true;
-}
-if ( isset( $options['uploads'] ) ) {
- $reader->uploads = true; // experimental!
-}
-
-if ( isset( $args[0] ) ) {
- $result = $reader->importFromFile( $args[0] );
-} else {
- $result = $reader->importFromStdin();
-}
-
-echo "Done!\n";
-echo "You might want to run rebuildrecentchanges.php to regenerate\n";
-echo "the recentchanges page.\n";
+$maintClass = 'BackupReader';
+require_once( RUN_MAINTENANCE_IF_MAIN );