summaryrefslogtreecommitdiff
path: root/maintenance/namespaceDupes.php
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/namespaceDupes.php')
-rw-r--r--maintenance/namespaceDupes.php390
1 files changed, 260 insertions, 130 deletions
diff --git a/maintenance/namespaceDupes.php b/maintenance/namespaceDupes.php
index cbc389be..96e01fe4 100644
--- a/maintenance/namespaceDupes.php
+++ b/maintenance/namespaceDupes.php
@@ -39,28 +39,46 @@ class NamespaceConflictChecker extends Maintenance {
*/
protected $db;
+ private $resolvableCount = 0;
+ private $totalPages = 0;
+
public function __construct() {
parent::__construct();
$this->mDescription = "";
$this->addOption( 'fix', 'Attempt to automatically fix errors' );
- $this->addOption( 'suffix', "Dupes will be renamed with correct namespace with " .
+ $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
+ "the correct title" );
+ $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
"<text> appended after the article name", false, true );
- $this->addOption( 'prefix', "Do an explicit check for the given title prefix " .
- "appended after the article name", false, true );
+ $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
+ "<text> prepended before the article name", false, true );
+ $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
+ "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
+ "the colon will be replaced with a hyphen.",
+ false, true );
+ $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
+ "specify the namespace ID of the destination.", false, true );
+ $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
+ "begin with a conflicting prefix will be renamed, for example " .
+ "Talk:File:Foo -> File_Talk:Foo" );
}
public function execute() {
$this->db = wfGetDB( DB_MASTER );
- $fix = $this->hasOption( 'fix' );
- $suffix = $this->getOption( 'suffix', '' );
- $prefix = $this->getOption( 'prefix', '' );
- $key = intval( $this->getOption( 'key', 0 ) );
-
- if ( $prefix ) {
- $retval = $this->checkPrefix( $key, $prefix, $fix, $suffix );
+ $options = array(
+ 'fix' => $this->hasOption( 'fix' ),
+ 'merge' => $this->hasOption( 'merge' ),
+ 'add-suffix' => $this->getOption( 'add-suffix', '' ),
+ 'add-prefix' => $this->getOption( 'add-prefix', '' ),
+ 'move-talk' => $this->hasOption( 'move-talk' ),
+ 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
+ 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) );
+
+ if ( $options['source-pseudo-namespace'] !== '' ) {
+ $retval = $this->checkPrefix( $options );
} else {
- $retval = $this->checkAll( $fix, $suffix );
+ $retval = $this->checkAll( $options );
}
if ( $retval ) {
@@ -71,13 +89,13 @@ class NamespaceConflictChecker extends Maintenance {
}
/**
- * @todo Document
- * @param bool $fix Whether or not to fix broken entries
- * @param string $suffix Suffix to append to renamed articles
+ * Check all namespaces
+ *
+ * @param array $options Associative array of validated command-line options
*
* @return bool
*/
- private function checkAll( $fix, $suffix = '' ) {
+ private function checkAll( $options ) {
global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
$spaces = array();
@@ -131,14 +149,31 @@ class NamespaceConflictChecker extends Maintenance {
}
}
- ksort( $spaces );
- asort( $spaces );
+ // Sort by namespace index, and if there are two with the same index,
+ // break the tie by sorting by name
+ $origSpaces = $spaces;
+ uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
+ if ( $origSpaces[$a] < $origSpaces[$b] ) {
+ return -1;
+ } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
+ return 1;
+ } elseif ( $a < $b ) {
+ return -1;
+ } elseif ( $a > $b ) {
+ return 1;
+ } else {
+ return 0;
+ }
+ } );
$ok = true;
foreach ( $spaces as $name => $ns ) {
- $ok = $this->checkNamespace( $ns, $name, $fix, $suffix ) && $ok;
+ $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
}
+ $this->output( "{$this->totalPages} pages to fix, " .
+ "{$this->resolvableCount} were resolvable.\n" );
+
return $ok;
}
@@ -158,178 +193,273 @@ class NamespaceConflictChecker extends Maintenance {
}
/**
- * @todo Document
- * @param int $ns A namespace id
+ * Check a given prefix and try to move it into the given destination namespace
+ *
+ * @param int $ns Destination namespace id
* @param string $name
- * @param bool $fix Whether to fix broken entries
- * @param string $suffix Suffix to append to renamed articles
+ * @param array $options Associative array of validated command-line options
* @return bool
*/
- private function checkNamespace( $ns, $name, $fix, $suffix = '' ) {
- $conflicts = $this->getConflicts( $ns, $name );
- $count = count( $conflicts );
+ private function checkNamespace( $ns, $name, $options ) {
+ $targets = $this->getTargetList( $ns, $name, $options );
+ $count = $targets->numRows();
+ $this->totalPages += $count;
if ( $count == 0 ) {
return true;
}
+ $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
+
$ok = true;
- foreach ( $conflicts as $row ) {
- $resolvable = $this->reportConflict( $row, $suffix );
- $ok = $ok && $resolvable;
- if ( $fix && ( $resolvable || $suffix != '' ) ) {
- $ok = $this->resolveConflict( $row, $resolvable, $suffix ) && $ok;
+ foreach ( $targets as $row ) {
+
+ // Find the new title and determine the action to take
+
+ $newTitle = $this->getDestinationTitle( $ns, $name, $row, $options );
+ $logStatus = false;
+ if ( !$newTitle ) {
+ $logStatus = 'invalid title';
+ $action = 'abort';
+ } elseif ( $newTitle->exists() ) {
+ if ( $options['merge'] ) {
+ if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
+ $action = 'merge';
+ } else {
+ $action = 'abort';
+ }
+ } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
+ $action = 'abort';
+ $logStatus = 'dest title exists and --add-prefix not specified';
+ } else {
+ $newTitle = $this->getAlternateTitle( $newTitle, $options );
+ if ( !$newTitle ) {
+ $action = 'abort';
+ $logStatus = 'alternate title is invalid';
+ } elseif ( $newTitle->exists() ) {
+ $action = 'abort';
+ $logStatus = 'title conflict';
+ } else {
+ $action = 'move';
+ $logStatus = 'alternate';
+ }
+ }
+ } else {
+ $action = 'move';
+ $logStatus = 'no conflict';
+ }
+
+ // Take the action or log a dry run message
+
+ $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
+ $pageOK = true;
+
+ switch ( $action ) {
+ case 'abort':
+ $this->output( "$logTitle *** $logStatus\n" );
+ $pageOK = false;
+ break;
+ case 'move':
+ $this->output( "$logTitle -> " .
+ $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
+
+ if ( $options['fix'] ) {
+ $pageOK = $this->movePage( $row->page_id, $newTitle );
+ }
+ break;
+ case 'merge':
+ $this->output( "$logTitle => " .
+ $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
+
+ if ( $options['fix'] ) {
+ $pageOK = $this->mergePage( $row->page_id, $newTitle );
+ }
+ break;
+ }
+
+ if ( $pageOK ) {
+ $this->resolvableCount++;
+ } else {
+ $ok = false;
}
}
+ // @fixme Also needs to do like self::getTargetList() on the
+ // *_namespace and *_title fields of pagelinks, templatelinks, and
+ // redirects, and schedule a LinksUpdate job or similar for each found
+ // *_from.
+
return $ok;
}
/**
- * @todo Do this for real
- * @param int $key
- * @param string $prefix
- * @param bool $fix
- * @param string $suffix
+ * Move the given pseudo-namespace, either replacing the colon with a hyphen
+ * (useful for pseudo-namespaces that conflict with interwiki links) or move
+ * them to another namespace if specified.
+ * @param array $options Associative array of validated command-line options
* @return bool
*/
- private function checkPrefix( $key, $prefix, $fix, $suffix = '' ) {
- $this->output( "Checking prefix \"$prefix\" vs namespace $key\n" );
+ private function checkPrefix( $options ) {
+ $prefix = $options['source-pseudo-namespace'];
+ $ns = $options['dest-namespace'];
+ $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
- return $this->checkNamespace( $key, $prefix, $fix, $suffix );
+ return $this->checkNamespace( $ns, $prefix, $options );
}
/**
- * Find pages in mainspace that have a prefix of the new namespace
- * so we know titles that will need migrating
+ * Find pages in main and talk namespaces that have a prefix of the new
+ * namespace so we know titles that will need migrating
*
- * @param int $ns Namespace id (id for new namespace?)
+ * @param int $ns Destination namespace id
* @param string $name Prefix that is being made a namespace
+ * @param array $options Associative array of validated command-line options
*
- * @return array
+ * @return ResultWrapper
*/
- private function getConflicts( $ns, $name ) {
- $titleSql = "TRIM(LEADING {$this->db->addQuotes( "$name:" )} FROM page_title)";
- if ( $ns == 0 ) {
- // An interwiki; try an alternate encoding with '-' for ':'
- $titleSql = $this->db->buildConcat( array(
- $this->db->addQuotes( "$name-" ),
- $titleSql,
- ) );
+ private function getTargetList( $ns, $name, $options ) {
+ if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
+ $checkNamespaces = array( NS_MAIN, NS_TALK );
+ } else {
+ $checkNamespaces = NS_MAIN;
}
- return iterator_to_array( $this->db->select( 'page',
+ return $this->db->select( 'page',
array(
- 'id' => 'page_id',
- 'oldtitle' => 'page_title',
- 'namespace' => $this->db->addQuotes( $ns ) . ' + page_namespace',
- 'title' => $titleSql,
- 'oldnamespace' => 'page_namespace',
+ 'page_id',
+ 'page_title',
+ 'page_namespace',
),
array(
- 'page_namespace' => array( 0, 1 ),
+ 'page_namespace' => $checkNamespaces,
'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
),
__METHOD__
- ) );
+ );
}
/**
- * Report any conflicts we find
- *
+ * Get the preferred destination title for a given target page row.
+ * @param integer $ns The destination namespace ID
+ * @param string $name The conflicting prefix
* @param stdClass $row
- * @param string $suffix
- * @return bool
+ * @param array $options Associative array of validated command-line options
+ * @return Title|false
*/
- private function reportConflict( $row, $suffix ) {
- $newTitle = Title::makeTitleSafe( $row->namespace, $row->title );
- if ( is_null( $newTitle ) || !$newTitle->canExist() ) {
- // Title is also an illegal title...
- // For the moment we'll let these slide to cleanupTitles or whoever.
- $this->output( sprintf( "... %d (%d,\"%s\")\n",
- $row->id,
- $row->oldnamespace,
- $row->oldtitle ) );
- $this->output( "... *** cannot resolve automatically; illegal title ***\n" );
-
- return false;
+ private function getDestinationTitle( $ns, $name, $row, $options ) {
+ $dbk = substr( $row->page_title, strlen( "$name:" ) );
+ if ( $ns == 0 ) {
+ // An interwiki; try an alternate encoding with '-' for ':'
+ $dbk = "$name-" . $dbk;
}
-
- $this->output( sprintf( "... %d (%d,\"%s\") -> (%d,\"%s\") [[%s]]\n",
- $row->id,
- $row->oldnamespace,
- $row->oldtitle,
- $newTitle->getNamespace(),
- $newTitle->getDBkey(),
- $newTitle->getPrefixedText() ) );
-
- $id = $newTitle->getArticleID();
- if ( $id ) {
- $this->output( "... *** cannot resolve automatically; page exists with ID $id ***\n" );
-
+ $destNS = $ns;
+ if ( $row->page_namespace == NS_TALK && MWNamespace::isSubject( $ns ) ) {
+ // This is an associated talk page moved with the --move-talk feature.
+ $destNS = MWNamespace::getTalk( $destNS );
+ }
+ $newTitle = Title::makeTitleSafe( $destNS, $dbk );
+ if ( !$newTitle || !$newTitle->canExist() ) {
return false;
- } else {
- return true;
}
+ return $newTitle;
}
/**
- * Resolve any conflicts
+ * Get an alternative title to move a page to. This is used if the
+ * preferred destination title already exists.
*
- * @param stClass $row Row from the page table to fix
- * @param bool $resolvable
- * @param string $suffix Suffix to append to the fixed page
- * @return bool
+ * @param Title $title
+ * @param array $options Associative array of validated command-line options
+ * @return Title|bool
*/
- private function resolveConflict( $row, $resolvable, $suffix ) {
- if ( !$resolvable ) {
- $this->output( "... *** old title {$row->title}\n" );
- while ( true ) {
- $row->title .= $suffix;
- $this->output( "... *** new title {$row->title}\n" );
- $title = Title::makeTitleSafe( $row->namespace, $row->title );
- if ( !$title ) {
- $this->output( "... !!! invalid title\n" );
-
- return false;
- }
- $id = $title->getArticleID();
- if ( $id ) {
- $this->output( "... *** page exists with ID $id ***\n" );
- } else {
- break;
- }
+ private function getAlternateTitle( $title, $options ) {
+ $prefix = $options['add-prefix'];
+ $suffix = $options['add-suffix'];
+ if ( $prefix == '' && $suffix == '' ) {
+ return false;
+ }
+ while ( true ) {
+ $dbk = $prefix . $title->getDBkey() . $suffix;
+ $title = Title::makeTitleSafe( $title->getNamespace(), $dbk );
+ if ( !$title ) {
+ return false;
+ }
+ if ( !$title->exists() ) {
+ return $title;
}
- $this->output( "... *** using suffixed form [[" . $title->getPrefixedText() . "]] ***\n" );
}
- $this->resolveConflictOn( $row, 'page', 'page' );
-
- return true;
}
/**
- * Resolve a given conflict
+ * Move a page
+ *
+ * @fixme Update pl_from_namespace etc.
*
- * @param stdClass $row Row from the old broken entry
- * @param string $table Table to update
- * @param string $prefix Prefix for column name, like page or ar
+ * @param integer $id The page_id
+ * @param Title $newTitle The new title
* @return bool
*/
- private function resolveConflictOn( $row, $table, $prefix ) {
- $this->output( "... resolving on $table... " );
- $newTitle = Title::makeTitleSafe( $row->namespace, $row->title );
- $this->db->update( $table,
+ private function movePage( $id, Title $newTitle ) {
+ $this->db->update( 'page',
array(
- "{$prefix}_namespace" => $newTitle->getNamespace(),
- "{$prefix}_title" => $newTitle->getDBkey(),
+ "page_namespace" => $newTitle->getNamespace(),
+ "page_title" => $newTitle->getDBkey(),
),
array(
- // "{$prefix}_namespace" => 0,
- // "{$prefix}_title" => $row->oldtitle,
- "{$prefix}_id" => $row->id,
+ "page_id" => $id,
),
__METHOD__ );
- $this->output( "ok.\n" );
+ // @fixme Needs updating the *_from_namespace fields in categorylinks,
+ // pagelinks, templatelinks and imagelinks.
+
+ return true;
+ }
+
+ /**
+ * Determine if we can merge a page.
+ * We check if an inaccessible revision would become the latest and
+ * deny the merge if so -- it's theoretically possible to update the
+ * latest revision, but opens a can of worms -- search engine updates,
+ * recentchanges review, etc.
+ *
+ * @param integer $id The page_id
+ * @param Title $newTitle The new title
+ * @param string $logStatus This is set to the log status message on failure
+ * @return bool
+ */
+ private function canMerge( $id, Title $newTitle, &$logStatus ) {
+ $latestDest = Revision::newFromTitle( $newTitle, 0, Revision::READ_LATEST );
+ $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
+ if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
+ $logStatus = 'cannot merge since source is later';
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ /**
+ * Merge page histories
+ *
+ * @param integer $id The page_id
+ * @param Title $newTitle The new title
+ */
+ private function mergePage( $id, Title $newTitle ) {
+ $destId = $newTitle->getArticleId();
+ $this->db->begin( __METHOD__ );
+ $this->db->update( 'revision',
+ // SET
+ array( 'rev_page' => $destId ),
+ // WHERE
+ array( 'rev_page' => $id ),
+ __METHOD__ );
+
+ $this->db->delete( 'page', array( 'page_id' => $id ), __METHOD__ );
+
+ // @fixme Need WikiPage::doDeleteUpdates() or similar to avoid orphan
+ // rows in the links tables.
+
+ $this->db->commit( __METHOD__ );
return true;
}
}