summaryrefslogtreecommitdiff
path: root/includes/Import.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/Import.php')
-rw-r--r--includes/Import.php290
1 files changed, 187 insertions, 103 deletions
diff --git a/includes/Import.php b/includes/Import.php
index c76a6834..b874462e 100644
--- a/includes/Import.php
+++ b/includes/Import.php
@@ -35,16 +35,22 @@ class WikiImporter {
private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
private $mDebug;
+ private $mImportUploads, $mImageBasePath;
/**
* Creates an ImportXMLReader drawing from the source provided
- */
+ */
function __construct( $source ) {
$this->reader = new XMLReader();
stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
$id = UploadSourceAdapter::registerSource( $source );
- $this->reader->open( "uploadsource://$id" );
+ if (defined( 'LIBXML_PARSEHUGE' ) ) {
+ $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
+ }
+ else {
+ $this->reader->open( "uploadsource://$id" );
+ }
// Default callbacks
$this->setRevisionCallback( array( $this, "importRevision" ) );
@@ -163,12 +169,22 @@ class WikiImporter {
// Don't override namespaces
$this->mTargetNamespace = null;
} elseif( $namespace >= 0 ) {
- // FIXME: Check for validity
+ // @todo FIXME: Check for validity
$this->mTargetNamespace = intval( $namespace );
} else {
return false;
}
}
+
+ /**
+ *
+ */
+ public function setImageBasePath( $dir ) {
+ $this->mImageBasePath = $dir;
+ }
+ public function setImportUploads( $import ) {
+ $this->mImportUploads = $import;
+ }
/**
* Default per-revision callback, performs the import.
@@ -192,9 +208,8 @@ class WikiImporter {
* Dummy for now...
*/
public function importUpload( $revision ) {
- //$dbw = wfGetDB( DB_MASTER );
- //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
- return false;
+ $dbw = wfGetDB( DB_MASTER );
+ return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
}
/**
@@ -295,7 +310,7 @@ class WikiImporter {
return $buffer;
}
}
-
+
$this->reader->close();
return '';
}
@@ -545,18 +560,27 @@ class WikiImporter {
private function processRevision( $pageInfo, $revisionInfo ) {
$revision = new WikiRevision;
- $revision->setID( $revisionInfo['id'] );
- $revision->setText( $revisionInfo['text'] );
+ if( isset( $revisionInfo['id'] ) ) {
+ $revision->setID( $revisionInfo['id'] );
+ }
+ if ( isset( $revisionInfo['text'] ) ) {
+ $revision->setText( $revisionInfo['text'] );
+ }
$revision->setTitle( $pageInfo['_title'] );
- $revision->setTimestamp( $revisionInfo['timestamp'] );
+
+ if ( isset( $revisionInfo['timestamp'] ) ) {
+ $revision->setTimestamp( $revisionInfo['timestamp'] );
+ } else {
+ $revision->setTimestamp( wfTimestampNow() );
+ }
if ( isset( $revisionInfo['comment'] ) ) {
$revision->setComment( $revisionInfo['comment'] );
}
- if ( isset( $revisionInfo['minor'] ) )
+ if ( isset( $revisionInfo['minor'] ) ) {
$revision->setMinor( true );
-
+ }
if ( isset( $revisionInfo['contributor']['ip'] ) ) {
$revision->setUserIP( $revisionInfo['contributor']['ip'] );
}
@@ -572,7 +596,7 @@ class WikiImporter {
$uploadInfo = array();
$normalFields = array( 'timestamp', 'comment', 'filename', 'text',
- 'src', 'size' );
+ 'src', 'size', 'sha1base36', 'archivename', 'rel' );
$skip = false;
@@ -591,24 +615,59 @@ class WikiImporter {
$uploadInfo[$tag] = $this->nodeContents();
} elseif ( $tag == 'contributor' ) {
$uploadInfo['contributor'] = $this->handleContributor();
+ } elseif ( $tag == 'contents' ) {
+ $contents = $this->nodeContents();
+ $encoding = $this->reader->getAttribute( 'encoding' );
+ if ( $encoding === 'base64' ) {
+ $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
+ $uploadInfo['isTempSrc'] = true;
+ }
} elseif ( $tag != '#text' ) {
$this->warn( "Unhandled upload XML tag $tag" );
$skip = true;
}
}
+
+ if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
+ $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
+ if ( file_exists( $path ) ) {
+ $uploadInfo['fileSrc'] = $path;
+ $uploadInfo['isTempSrc'] = false;
+ }
+ }
- return $this->processUpload( $pageInfo, $uploadInfo );
+ if ( $this->mImportUploads ) {
+ return $this->processUpload( $pageInfo, $uploadInfo );
+ }
+ }
+
+ private function dumpTemp( $contents ) {
+ $filename = tempnam( wfTempDir(), 'importupload' );
+ file_put_contents( $filename, $contents );
+ return $filename;
}
+
private function processUpload( $pageInfo, $uploadInfo ) {
$revision = new WikiRevision;
+ $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
$revision->setTitle( $pageInfo['_title'] );
- $revision->setID( $uploadInfo['id'] );
+ $revision->setID( $pageInfo['id'] );
$revision->setTimestamp( $uploadInfo['timestamp'] );
- $revision->setText( $uploadInfo['text'] );
+ $revision->setText( $text );
$revision->setFilename( $uploadInfo['filename'] );
+ if ( isset( $uploadInfo['archivename'] ) ) {
+ $revision->setArchiveName( $uploadInfo['archivename'] );
+ }
$revision->setSrc( $uploadInfo['src'] );
+ if ( isset( $uploadInfo['fileSrc'] ) ) {
+ $revision->setFileSrc( $uploadInfo['fileSrc'],
+ !empty( $uploadInfo['isTempSrc'] ) );
+ }
+ if ( isset( $uploadInfo['sha1base36'] ) ) {
+ $revision->setSha1Base36( $uploadInfo['sha1base36'] );
+ }
$revision->setSize( intval( $uploadInfo['size'] ) );
$revision->setComment( $uploadInfo['comment'] );
@@ -619,7 +678,7 @@ class WikiImporter {
$revision->setUserName( $uploadInfo['contributor']['username'] );
}
- return $this->uploadCallback( $revision );
+ return call_user_func( $this->mUploadCallback, $revision );
}
private function handleContributor() {
@@ -778,6 +837,7 @@ class XMLReader2 extends XMLReader {
* @ingroup SpecialPage
*/
class WikiRevision {
+ var $importer = null;
var $title = null;
var $id = 0;
var $timestamp = "20010115000000";
@@ -789,6 +849,10 @@ class WikiRevision {
var $type = "";
var $action = "";
var $params = "";
+ var $fileSrc = '';
+ var $sha1base36 = false;
+ var $isTemp = false;
+ var $archiveName = '';
function setTitle( $title ) {
if( is_object( $title ) ) {
@@ -832,27 +896,40 @@ class WikiRevision {
function setSrc( $src ) {
$this->src = $src;
}
+ function setFileSrc( $src, $isTemp ) {
+ $this->fileSrc = $src;
+ $this->fileIsTemp = $isTemp;
+ }
+ function setSha1Base36( $sha1base36 ) {
+ $this->sha1base36 = $sha1base36;
+ }
function setFilename( $filename ) {
$this->filename = $filename;
}
+ function setArchiveName( $archiveName ) {
+ $this->archiveName = $archiveName;
+ }
function setSize( $size ) {
$this->size = intval( $size );
}
-
+
function setType( $type ) {
$this->type = $type;
}
-
+
function setAction( $action ) {
$this->action = $action;
}
-
+
function setParams( $params ) {
$this->params = $params;
}
+ /**
+ * @return Title
+ */
function getTitle() {
return $this->title;
}
@@ -884,23 +961,38 @@ class WikiRevision {
function getSrc() {
return $this->src;
}
+ function getSha1() {
+ if ( $this->sha1base36 ) {
+ return wfBaseConvert( $this->sha1base36, 36, 16 );
+ }
+ return false;
+ }
+ function getFileSrc() {
+ return $this->fileSrc;
+ }
+ function isTempSrc() {
+ return $this->isTemp;
+ }
function getFilename() {
return $this->filename;
}
+ function getArchiveName() {
+ return $this->archiveName;
+ }
function getSize() {
return $this->size;
}
-
+
function getType() {
return $this->type;
}
-
+
function getAction() {
return $this->action;
}
-
+
function getParams() {
return $this->params;
}
@@ -913,9 +1005,11 @@ class WikiRevision {
if( $user ) {
$userId = intval( $user->getId() );
$userText = $user->getName();
+ $userObj = $user;
} else {
$userId = 0;
$userText = $this->getUser();
+ $userObj = new User;
}
// avoid memory leak...?
@@ -928,6 +1022,7 @@ class WikiRevision {
# must create the page...
$pageId = $article->insertOn( $dbw );
$created = true;
+ $oldcountable = null;
} else {
$created = false;
@@ -939,14 +1034,15 @@ class WikiRevision {
__METHOD__
);
if( $prior ) {
- // FIXME: this could fail slightly for multiple matches :P
+ // @todo FIXME: This could fail slightly for multiple matches :P
wfDebug( __METHOD__ . ": skipping existing revision for [[" .
$this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
return false;
}
+ $oldcountable = $article->isCountable();
}
- # FIXME: Use original rev_id optionally (better for backups)
+ # @todo FIXME: Use original rev_id optionally (better for backups)
# Insert the row
$revision = new Revision( array(
'page' => $pageId,
@@ -957,47 +1053,27 @@ class WikiRevision {
'timestamp' => $this->timestamp,
'minor_edit' => $this->minor,
) );
- $revId = $revision->insertOn( $dbw );
+ $revision->insertOn( $dbw );
$changed = $article->updateIfNewerOn( $dbw, $revision );
-
- # To be on the safe side...
- $tempTitle = $GLOBALS['wgTitle'];
- $GLOBALS['wgTitle'] = $this->title;
- if( $created ) {
- wfDebug( __METHOD__ . ": running onArticleCreate\n" );
- Article::onArticleCreate( $this->title );
-
- wfDebug( __METHOD__ . ": running create updates\n" );
- $article->createUpdates( $revision );
-
- } elseif( $changed ) {
- wfDebug( __METHOD__ . ": running onArticleEdit\n" );
- Article::onArticleEdit( $this->title );
-
- wfDebug( __METHOD__ . ": running edit updates\n" );
- $article->editUpdates(
- $this->getText(),
- $this->getComment(),
- $this->minor,
- $this->timestamp,
- $revId );
+ if ( $changed !== false ) {
+ wfDebug( __METHOD__ . ": running updates\n" );
+ $article->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
}
- $GLOBALS['wgTitle'] = $tempTitle;
return true;
}
-
+
function importLogItem() {
$dbw = wfGetDB( DB_MASTER );
- # FIXME: this will not record autoblocks
+ # @todo FIXME: This will not record autoblocks
if( !$this->getTitle() ) {
- wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
+ wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
$this->timestamp . "\n" );
return;
}
# Check if it exists already
- // FIXME: use original log ID (better for backups)
+ // @todo FIXME: Use original log ID (better for backups)
$prior = $dbw->selectField( 'logging', '1',
array( 'log_type' => $this->getType(),
'log_action' => $this->getAction(),
@@ -1009,9 +1085,9 @@ class WikiRevision {
'log_params' => $this->params ),
__METHOD__
);
- // FIXME: this could fail slightly for multiple matches :P
+ // @todo FIXME: This could fail slightly for multiple matches :P
if( $prior ) {
- wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
+ wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
$this->timestamp . "\n" );
return false;
}
@@ -1032,60 +1108,66 @@ class WikiRevision {
}
function importUpload() {
- wfDebug( __METHOD__ . ": STUB\n" );
-
- /**
- // from file revert...
- $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
- $comment = $wgRequest->getText( 'wpComment' );
- // TODO: Preserve file properties from database instead of reloading from file
- $status = $this->file->upload( $source, $comment, $comment );
- if( $status->isGood() ) {
- */
-
- /**
- // from file upload...
- $this->mLocalFile = wfLocalFile( $nt );
- $this->mDestName = $this->mLocalFile->getName();
- //....
- $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
- File::DELETE_SOURCE, $this->mFileProps );
- if ( !$status->isGood() ) {
- $resultDetails = array( 'internal' => $status->getWikiText() );
- */
-
- // @todo Fixme: upload() uses $wgUser, which is wrong here
- // it may also create a page without our desire, also wrong potentially.
- // and, it will record a *current* upload, but we might want an archive version here
-
- $file = wfLocalFile( $this->getTitle() );
+ # Construct a file
+ $archiveName = $this->getArchiveName();
+ if ( $archiveName ) {
+ wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
+ $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
+ RepoGroup::singleton()->getLocalRepo(), $archiveName );
+ } else {
+ $file = wfLocalFile( $this->getTitle() );
+ wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
+ if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
+ $archiveName = $file->getTimestamp() . '!' . $file->getName();
+ $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
+ RepoGroup::singleton()->getLocalRepo(), $archiveName );
+ wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
+ }
+ }
if( !$file ) {
- wfDebug( "IMPORT: Bad file. :(\n" );
+ wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
return false;
}
-
- $source = $this->downloadSource();
+
+ # Get the file source or download if necessary
+ $source = $this->getFileSrc();
+ $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
+ if ( !$source ) {
+ $source = $this->downloadSource();
+ $flags |= File::DELETE_SOURCE;
+ }
if( !$source ) {
- wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
+ wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
+ return false;
+ }
+ $sha1 = $this->getSha1();
+ if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
+ if ( $flags & File::DELETE_SOURCE ) {
+ # Broken file; delete it if it is a temporary file
+ unlink( $source );
+ }
+ wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
return false;
}
- $status = $file->upload( $source,
- $this->getComment(),
- $this->getComment(), // Initial page, if none present...
- File::DELETE_SOURCE,
- false, // props...
- $this->getTimestamp() );
-
- if( $status->isGood() ) {
- // yay?
- wfDebug( "IMPORT: is ok?\n" );
+ $user = User::newFromName( $this->user_text );
+
+ # Do the actual upload
+ if ( $archiveName ) {
+ $status = $file->uploadOld( $source, $archiveName,
+ $this->getTimestamp(), $this->getComment(), $user, $flags );
+ } else {
+ $status = $file->upload( $source, $this->getComment(), $this->getComment(),
+ $flags, false, $this->getTimestamp(), $user );
+ }
+
+ if ( $status->isGood() ) {
+ wfDebug( __METHOD__ . ": Succesful\n" );
return true;
+ } else {
+ wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
+ return false;
}
-
- wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
- return false;
-
}
function downloadSource() {
@@ -1101,7 +1183,7 @@ class WikiRevision {
return false;
}
- // @todo Fixme!
+ // @todo FIXME!
$src = $this->getSrc();
$data = Http::get( $src );
if( !$data ) {
@@ -1161,7 +1243,9 @@ class ImportStreamSource {
}
static function newFromFile( $filename ) {
- $file = @fopen( $filename, 'rt' );
+ wfSuppressWarnings();
+ $file = fopen( $filename, 'rt' );
+ wfRestoreWarnings();
if( !$file ) {
return Status::newFatal( "importcantopen" );
}
@@ -1202,7 +1286,7 @@ class ImportStreamSource {
# quicker and sorts out user-agent problems which might
# otherwise prevent importing from large sites, such
# as the Wikimedia cluster, etc.
- $data = Http::request( $method, $url );
+ $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
if( $data !== false ) {
$file = tmpfile();
fwrite( $file, $data );