summaryrefslogtreecommitdiff
path: root/maintenance/importImages.php
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/importImages.php')
-rw-r--r--maintenance/importImages.php168
1 files changed, 133 insertions, 35 deletions
diff --git a/maintenance/importImages.php b/maintenance/importImages.php
index 7997b0d5..f0dd388a 100644
--- a/maintenance/importImages.php
+++ b/maintenance/importImages.php
@@ -2,17 +2,25 @@
/**
* Maintenance script to import one or more images from the local file system into
- * the wiki without using the web-based interface
+ * the wiki without using the web-based interface.
+ *
+ * "Smart import" additions:
+ * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
+ * - process:
+ * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
+ * - fetch metadata from source wiki for each file to import.
+ * - commit the fetched metadata to the destination wiki while submitting.
*
* @file
* @ingroup Maintenance
* @author Rob Church <robchur@gmail.com>
+ * @author Mij <mij@bitchx.it>
*/
-$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license' );
-require_once( 'commandLine.inc' );
-require_once( 'importImages.inc.php' );
-$added = $skipped = $overwritten = 0;
+$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
+require_once( dirname(__FILE__) . '/commandLine.inc' );
+require_once( dirname(__FILE__) . '/importImages.inc' );
+$processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
echo( "Import Images\n\n" );
@@ -25,7 +33,7 @@ if( count( $args ) > 0 ) {
if (isset($options['protect']) && isset($options['unprotect']))
die("Cannot specify both protect and unprotect. Only 1 is allowed.\n");
- if ($options['protect'] == 1)
+if (isset($options['protect']) && $options['protect'] == 1)
die("You must specify a protection option.\n");
# Prepare the list of allowed extensions
@@ -45,8 +53,27 @@ if( count( $args ) > 0 ) {
$user = User::newFromName( 'Maintenance script' );
$wgUser = $user;
+ # Get block check. If a value is given, this specified how often the check is performed
+ if ( isset( $options['check-userblock'] ) ) {
+ if ( !$options['check-userblock'] ) $checkUserBlock = 1;
+ else $checkUserBlock = (int)$options['check-userblock'];
+ } else {
+ $checkUserBlock = false;
+ }
+
+ # Get --from
+ $from = @$options['from'];
+
+ # Get sleep time.
+ $sleep = @$options['sleep'];
+ if ( $sleep ) $sleep = (int)$sleep;
+
+ # Get limit number
+ $limit = @$options['limit'];
+ if ( $limit ) $limit = (int)$limit;
+
# Get the upload comment
- $comment = 'Importing image file';
+ $comment = NULL;
if ( isset( $options['comment-file'] ) ) {
$comment = file_get_contents( $options['comment-file'] );
@@ -76,6 +103,23 @@ if( count( $args ) > 0 ) {
continue;
}
+ if ( $from ) {
+ if ( $from == $title->getDBkey() ) {
+ $from = NULL;
+ } else {
+ $ignored++;
+ continue;
+ }
+ }
+
+ if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
+ $user->clearInstanceCache( 'name' ); //reload from DB!
+ if ( $user->isBlocked() ) {
+ echo( $user->getName() . " was blocked! Aborting.\n" );
+ break;
+ }
+ }
+
# Check existence
$image = wfLocalFile( $title );
if( $image->exists() ) {
@@ -88,36 +132,73 @@ if( count( $args ) > 0 ) {
continue;
}
} else {
- echo( "Importing {$base}..." );
- $svar = 'added';
- }
+ if ( isset( $options['skip-dupes'] ) ) {
+ $repo = $image->getRepo();
+ $sha1 = File::sha1Base36( $file ); #XXX: we end up calculating this again when actually uploading. that sucks.
- # Find comment text
- $commentText = false;
+ $dupes = $repo->findBySha1( $sha1 );
- if ( $commentExt ) {
- $f = findAuxFile( $file, $commentExt );
- if ( !$f ) {
- echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
- } else {
- $commentText = file_get_contents( $f );
- if ( !$f ) {
- echo( " Failed to load comment file {$f}, using default comment. " );
+ if ( $dupes ) {
+ echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" );
+ $skipped++;
+ continue;
}
}
- }
- if ( !$commentText ) {
- $commentText = $comment;
+ echo( "Importing {$base}..." );
+ $svar = 'added';
}
+ if (isset( $options['source-wiki-url'])) {
+ /* find comment text directly from source wiki, through MW's API */
+ $real_comment = getFileCommentFromSourceWiki($options['source-wiki-url'], $base);
+ if ($real_comment === false)
+ $commentText = $comment;
+ else
+ $commentText = $real_comment;
+
+ /* find user directly from source wiki, through MW's API */
+ $real_user = getFileUserFromSourceWiki($options['source-wiki-url'], $base);
+ if ($real_user === false) {
+ $wgUser = $user;
+ } else {
+ $wgUser = User::newFromName($real_user);
+ if ($wgUser === false) {
+ # user does not exist in target wiki
+ echo ("failed: user '$real_user' does not exist in target wiki.");
+ continue;
+ }
+ }
+ } else {
+ # Find comment text
+ $commentText = false;
+
+ if ( $commentExt ) {
+ $f = findAuxFile( $file, $commentExt );
+ if ( !$f ) {
+ echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
+ } else {
+ $commentText = file_get_contents( $f );
+ if ( !$f ) {
+ echo( " Failed to load comment file {$f}, using default comment. " );
+ }
+ }
+ }
+
+ if ( !$commentText ) {
+ $commentText = $comment;
+ }
+ }
+
+
# Import the file
if ( isset( $options['dry'] ) ) {
- echo( " publishing {$file}... " );
+ echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
} else {
$archive = $image->publish( $file );
if( WikiError::isError( $archive ) || !$archive->isGood() ) {
echo( "failed.\n" );
+ $failed++;
continue;
}
}
@@ -141,7 +222,6 @@ if( count( $args ) > 0 ) {
}
- $$svar++;
if ( isset( $options['dry'] ) ) {
echo( "done.\n" );
} else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
@@ -164,14 +244,24 @@ if( count( $args ) > 0 ) {
} else {
echo( "failed.\n" );
+ $svar = 'failed';
}
+ $$svar++;
+ $processed++;
+
+ if ( $limit && $processed >= $limit )
+ break;
+
+ if ( $sleep )
+ sleep( $sleep );
}
# Print out some statistics
echo( "\n" );
- foreach( array( 'count' => 'Found', 'added' => 'Added',
- 'skipped' => 'Skipped', 'overwritten' => 'Overwritten' ) as $var => $desc ) {
+ foreach( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
+ 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
+ 'failed' => 'Failed' ) as $var => $desc ) {
if( $$var > 0 )
echo( "{$desc}: {$$var}\n" );
}
@@ -184,14 +274,14 @@ if( count( $args ) > 0 ) {
showUsage();
}
-exit();
+exit(0);
function showUsage( $reason = false ) {
if( $reason ) {
echo( $reason . "\n" );
}
- echo <<<END
+ echo <<<TEXT
Imports images and other media files into the wiki
USAGE: php importImages.php [options] <dir>
@@ -199,17 +289,25 @@ USAGE: php importImages.php [options] <dir>
Options:
--extensions=<exts> Comma-separated list of allowable extensions, defaults to \$wgFileExtensions
---overwrite Overwrite existing images if a conflicting-named image is found
+--overwrite Overwrite existing images with the same name (default is to skip them)
+--limit=<num> Limit the number of images to process. Ignored or skipped images are not counted.
+--from=<name> Ignore all files until the one with the given name. Useful for resuming
+ aborted imports. <name> should be the file's canonical database form.
+--skip-dupes Skip images that were already uploaded under a different name (check SHA1)
+--sleep=<sec> Sleep between files. Useful mostly for debugging.
--user=<username> Set username of uploader, default 'Maintenance script'
---comment=<text> Set upload summary comment, default 'Importing image file'
+--check-userblock Check if the user got blocked during import.
+--comment=<text> Set upload summary comment, default 'Importing image file'.
--comment-file=<file> Set upload summary comment the the content of <file>.
--comment-ext=<ext> Causes the comment for each file to be loaded from a file with the same name
- but the extension <ext>.
+ but the extension <ext>. If a global comment is also given, it is appended.
--license=<code> Use an optional license template
--dry Dry run, don't import anything
--protect=<protect> Specify the protect value (autoconfirmed,sysop)
--unprotect Unprotects all uploaded images
+--source-wiki-url if specified, take User and Comment data for each imported file from this URL.
+ For example, --source-wiki-url="http://en.wikipedia.org/"
-END;
- exit();
-} \ No newline at end of file
+TEXT;
+ exit(1);
+}