summaryrefslogtreecommitdiff
path: root/maintenance/storage
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/storage')
-rw-r--r--maintenance/storage/checkStorage.php74
-rw-r--r--maintenance/storage/compressOld.inc21
-rw-r--r--maintenance/storage/compressOld.php20
-rw-r--r--maintenance/storage/dumpRev.php16
-rw-r--r--maintenance/storage/fixBug20757.php43
-rw-r--r--maintenance/storage/moveToExternal.php30
-rw-r--r--maintenance/storage/orphanStats.php13
-rw-r--r--maintenance/storage/recompressTracked.php70
-rw-r--r--maintenance/storage/resolveStubs.php21
-rw-r--r--maintenance/storage/storageTypeStats.php7
-rw-r--r--maintenance/storage/testCompression.php12
-rw-r--r--maintenance/storage/trackBlobs.php56
12 files changed, 211 insertions, 172 deletions
diff --git a/maintenance/storage/checkStorage.php b/maintenance/storage/checkStorage.php
index 245c2fec..c288d682 100644
--- a/maintenance/storage/checkStorage.php
+++ b/maintenance/storage/checkStorage.php
@@ -9,7 +9,7 @@
define( 'CONCAT_HEADER', 'O:27:"concatenatedgziphistoryblob"' );
if ( !defined( 'MEDIAWIKI' ) ) {
- require_once( dirname(__FILE__) . '/../commandLine.inc' );
+ require_once( dirname( __FILE__ ) . '/../commandLine.inc' );
$cs = new CheckStorage;
$fix = isset( $options['fix'] );
@@ -22,7 +22,7 @@ if ( !defined( 'MEDIAWIKI' ) ) {
}
-//----------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------
/**
* @ingroup Maintenance ExternalStorage
@@ -37,7 +37,7 @@ class CheckStorage {
'unfixable' => 'Unexpected errors with no automated fixing method',
'fixed' => 'Errors already fixed',
'fixable' => 'Errors which would already be fixed if --fix was specified',
- );
+ );
function check( $fix = false, $xml = '' ) {
$fname = 'checkStorage';
@@ -63,14 +63,14 @@ class CheckStorage {
for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
$chunkEnd = $chunkStart + $chunkSize - 1;
- //print "$chunkStart of $maxRevId\n";
+ // print "$chunkStart of $maxRevId\n";
// Fetch revision rows
$this->oldIdMap = array();
- $dbr->ping();
- $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
+ $dbr->ping();
+ $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
$this->oldIdMap[$row->rev_id] = $row->rev_text_id;
}
$dbr->freeResult( $res );
@@ -83,9 +83,9 @@ class CheckStorage {
$missingTextRows = array_flip( $this->oldIdMap );
$externalRevs = array();
$objectRevs = array();
- $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
$flags = $row->old_flags;
$id = $row->old_id;
@@ -116,7 +116,7 @@ class CheckStorage {
if ( $fix ) {
$this->error( 'fixed', "Warning: old_flags set to 0", $id );
$dbw->ping();
- $dbw->update( 'text', array( 'old_flags' => '' ),
+ $dbw->update( 'text', array( 'old_flags' => '' ),
array( 'old_id' => $id ), $fname );
echo "Fixed\n";
} else {
@@ -137,15 +137,15 @@ class CheckStorage {
$externalConcatBlobs = array();
$externalNormalBlobs = array();
if ( count( $externalRevs ) ) {
- $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
$urlParts = explode( '://', $row->old_text, 2 );
if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
$this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
continue;
}
- list( $proto, $path ) = $urlParts;
+ list( $proto, ) = $urlParts;
if ( $proto != 'DB' ) {
$this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
continue;
@@ -164,7 +164,7 @@ class CheckStorage {
// Check external concat blobs for the right header
$this->checkExternalConcatBlobs( $externalConcatBlobs );
-
+
// Check external normal blobs for existence
if ( count( $externalNormalBlobs ) ) {
if ( is_null( $this->dbStore ) ) {
@@ -174,10 +174,10 @@ class CheckStorage {
$blobIds = array_keys( $xBlobIds );
$extDb =& $this->dbStore->getSlave( $cluster );
$blobsTable = $this->dbStore->getTable( $extDb );
- $res = $extDb->select( $blobsTable,
- array( 'blob_id' ),
+ $res = $extDb->select( $blobsTable,
+ array( 'blob_id' ),
array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
- while ( $row = $extDb->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
unset( $xBlobIds[$row->blob_id] );
}
$extDb->freeResult( $res );
@@ -194,9 +194,9 @@ class CheckStorage {
$curIds = array();
if ( count( $objectRevs ) ) {
$headerLength = 300;
- $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
$oldId = $row->old_id;
$matches = array();
if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
@@ -245,9 +245,9 @@ class CheckStorage {
$externalConcatBlobs = array();
if ( count( $concatBlobs ) ) {
$headerLength = 300;
- $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
$flags = explode( ',', $row->old_flags );
if ( in_array( 'external', $flags ) ) {
// Concat blob is in external storage?
@@ -261,7 +261,7 @@ class CheckStorage {
if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
$externalConcatBlobs[$cluster][$id] = array();
}
- $externalConcatBlobs[$cluster][$id] = array_merge(
+ $externalConcatBlobs[$cluster][$id] = array_merge(
$externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
);
}
@@ -270,7 +270,7 @@ class CheckStorage {
$concatBlobs[$row->old_id] );
}
} elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {
- $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
+ $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
$concatBlobs[$row->old_id] );
} # else good
@@ -286,7 +286,7 @@ class CheckStorage {
}
print "\n\nErrors:\n";
- foreach( $this->errors as $name => $errors ) {
+ foreach ( $this->errors as $name => $errors ) {
if ( count( $errors ) ) {
$description = $this->errorDescriptions[$name];
echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
@@ -323,7 +323,7 @@ class CheckStorage {
foreach ( $ids as $id ) {
$revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
}
- print "$msg in text rows " . implode( ', ', $ids ) .
+ print "$msg in text rows " . implode( ', ', $ids ) .
", revisions " . implode( ', ', $revIds ) . "\n";
} else {
$id = $ids;
@@ -346,18 +346,18 @@ class CheckStorage {
if ( is_null( $this->dbStore ) ) {
$this->dbStore = new ExternalStoreDB;
}
-
+
foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
$blobIds = array_keys( $oldIds );
$extDb =& $this->dbStore->getSlave( $cluster );
$blobsTable = $this->dbStore->getTable( $extDb );
$headerLength = strlen( CONCAT_HEADER );
- $res = $extDb->select( $blobsTable,
- array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
+ $res = $extDb->select( $blobsTable,
+ array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
- while ( $row = $extDb->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {
- $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
+ $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
$oldIds[$row->blob_id] );
}
unset( $oldIds[$row->blob_id] );
@@ -383,7 +383,7 @@ class CheckStorage {
$revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname";
$filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml";
-
+
// Write revision list
if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
echo "Error writing revision list, can't restore text\n";
@@ -393,8 +393,8 @@ class CheckStorage {
// Run mwdumper
echo "Filtering XML dump...\n";
$exitStatus = 0;
- passthru( 'mwdumper ' .
- wfEscapeShellArg(
+ passthru( 'mwdumper ' .
+ wfEscapeShellArg(
"--output=file:$filteredXmlFileName",
"--filter=revlist:$revFileName",
$xml
@@ -416,7 +416,7 @@ class CheckStorage {
$dbw = wfGetDB( DB_MASTER );
$dbr->ping();
$dbw->ping();
-
+
$source = new ImportStreamSource( $file );
$importer = new WikiImporter( $source );
$importer->setRevisionCallback( array( &$this, 'importRevision' ) );
@@ -429,8 +429,8 @@ class CheckStorage {
$id = $revision->getID();
$text = $revision->getText();
if ( $text === '' ) {
- // This is what happens if the revision was broken at the time the
- // dump was made. Unfortunately, it also happens if the revision was
+ // This is what happens if the revision was broken at the time the
+ // dump was made. Unfortunately, it also happens if the revision was
// legitimately blank, so there's no way to tell the difference. To
// be safe, we'll skip it and leave it broken
$id = $id ? $id : '';
@@ -457,7 +457,7 @@ class CheckStorage {
// Update the text row
$dbw = wfGetDB( DB_MASTER );
- $dbw->update( 'text',
+ $dbw->update( 'text',
array( 'old_flags' => $flags, 'old_text' => $text ),
array( 'old_id' => $oldId ),
$fname, array( 'LIMIT' => 1 )
diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc
index 981cfda5..93be5f75 100644
--- a/maintenance/storage/compressOld.inc
+++ b/maintenance/storage/compressOld.inc
@@ -18,12 +18,11 @@ function compressOldPages( $start = 0, $extdb = '' ) {
break;
}
$last = $start;
- while( $row = $dbw->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
# print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n";
compressPage( $row, $extdb );
$last = $row->old_id;
}
- $dbw->freeResult( $res );
$start = $last + 1; # Deletion may leave long empty stretches
print "$start...\n";
} while( true );
@@ -67,7 +66,7 @@ define( 'LS_INDIVIDUAL', 0 );
define( 'LS_CHUNKED', 1 );
/** @todo document */
-function compressWithConcat( $startId, $maxChunkSize, $beginDate,
+function compressWithConcat( $startId, $maxChunkSize, $beginDate,
$endDate, $extdb="", $maxPageId = false )
{
$fname = 'compressWithConcat';
@@ -94,12 +93,12 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate,
$pageConds[] = 'page_namespace<>0';
}
if ( $queryExtra ) {
- $pageConds[] = $queryExtra;
+ $pageConds[] = $queryExtra;
}
*/
# For each article, get a list of revisions which fit the criteria
-
+
# No recompression, use a condition on old_flags
# Don't compress object type entities, because that might produce data loss when
# overwriting bulk storage concat rows. Don't compress external references, because
@@ -142,10 +141,10 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate,
wfWaitForSlaves( 5 );
# Wake up
- $dbr->ping();
+ $dbr->ping();
# Get the page row
- $pageRes = $dbr->select( 'page',
+ $pageRes = $dbr->select( 'page',
array('page_id', 'page_namespace', 'page_title','page_latest'),
$pageConds + array('page_id' => $pageId), $fname );
if ( $dbr->numRows( $pageRes ) == 0 ) {
@@ -159,10 +158,10 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate,
# Load revisions
$revRes = $dbw->select( $tables, $fields,
- array_merge( array(
- 'rev_page' => $pageRow->page_id,
+ array_merge( array(
+ 'rev_page' => $pageRow->page_id,
# Don't operate on the current revision
- # Use < instead of <> in case the current revision has changed
+ # Use < instead of <> in case the current revision has changed
# since the page select, which wasn't locking
'rev_id < ' . $pageRow->page_latest
), $conds ),
@@ -170,7 +169,7 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate,
$revLoadOptions
);
$revs = array();
- while ( $revRow = $dbw->fetchObject( $revRes ) ) {
+ foreach ( $revRes as $revRow ) {
$revs[] = $revRow;
}
diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php
index 7ff102a5..bc05b340 100644
--- a/maintenance/storage/compressOld.php
+++ b/maintenance/storage/compressOld.php
@@ -25,10 +25,10 @@
*/
$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid', 'e' );
-require_once( dirname(__FILE__) . '/../commandLine.inc' );
+require_once( dirname( __FILE__ ) . '/../commandLine.inc' );
require_once( "compressOld.inc" );
-if( !function_exists( "gzdeflate" ) ) {
+if ( !function_exists( "gzdeflate" ) ) {
print "You must enable zlib support in PHP to compress old revisions!\n";
print "Please see http://www.php.net/manual/en/ref.zlib.php\n\n";
wfDie();
@@ -39,9 +39,9 @@ $defaults = array(
'c' => 20,
's' => 0,
'b' => '',
- 'e' => '',
- 'extdb' => '',
- 'endid' => false,
+ 'e' => '',
+ 'extdb' => '',
+ 'endid' => false,
);
$options = $options + $defaults;
@@ -51,15 +51,15 @@ if ( $options['t'] != 'concat' && $options['t'] != 'gzip' ) {
}
if ( $options['extdb'] != '' ) {
- print "Compressing database $wgDBname to external cluster {$options['extdb']}\n" . str_repeat('-', 76) . "\n\n";
+ print "Compressing database $wgDBname to external cluster {$options['extdb']}\n" . str_repeat( '-', 76 ) . "\n\n";
} else {
- print "Compressing database $wgDBname\n" . str_repeat('-', 76) . "\n\n";
+ print "Compressing database $wgDBname\n" . str_repeat( '-', 76 ) . "\n\n";
}
$success = true;
if ( $options['t'] == 'concat' ) {
- $success = compressWithConcat( $options['s'], $options['c'], $options['b'],
- $options['e'], $options['extdb'], $options['endid'] );
+ $success = compressWithConcat( $options['s'], $options['c'], $options['b'],
+ $options['e'], $options['extdb'], $options['endid'] );
} else {
compressOldPages( $options['s'], $options['extdb'] );
}
@@ -68,6 +68,6 @@ if ( $success ) {
print "Done.\n";
}
-exit(0);
+exit( 0 );
diff --git a/maintenance/storage/dumpRev.php b/maintenance/storage/dumpRev.php
index 95404244..b200d8af 100644
--- a/maintenance/storage/dumpRev.php
+++ b/maintenance/storage/dumpRev.php
@@ -18,7 +18,7 @@
* @ingroup Maintenance ExternalStorage
*/
-require_once( dirname(__FILE__) . '/../Maintenance.php' );
+require_once( dirname( __FILE__ ) . '/../Maintenance.php' );
class DumpRev extends Maintenance {
public function __construct() {
@@ -28,15 +28,15 @@ class DumpRev extends Maintenance {
public function execute() {
$dbr = wfGetDB( DB_SLAVE );
- $row = $dbr->selectRow(
- array( 'text', 'revision' ),
- array( 'old_flags', 'old_text' ),
+ $row = $dbr->selectRow(
+ array( 'text', 'revision' ),
+ array( 'old_flags', 'old_text' ),
array( 'old_id=rev_text_id', 'rev_id' => $this->getArg() )
);
if ( !$row ) {
$this->error( "Row not found", true );
}
-
+
$flags = explode( ',', $row->old_flags );
$text = $row->old_text;
if ( in_array( 'external', $flags ) ) {
@@ -65,15 +65,15 @@ class DumpRev extends Maintenance {
$obj = unserialize( $text );
$text = $obj->getText();
}
-
+
if ( is_object( $text ) ) {
$this->error( "Unexpectedly got object of type: " . get_class( $text ) );
} else {
- $this->output( "Text length: " . strlen( $text ) ."\n" );
+ $this->output( "Text length: " . strlen( $text ) . "\n" );
$this->output( substr( $text, 0, 100 ) . "\n" );
}
}
}
$maintClass = "DumpRev";
-require_once( DO_MAINTENANCE );
+require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php
index 922d4725..4aac1202 100644
--- a/maintenance/storage/fixBug20757.php
+++ b/maintenance/storage/fixBug20757.php
@@ -14,7 +14,7 @@ class FixBug20757 extends Maintenance {
$this->addOption( 'dry-run', 'Report only' );
$this->addOption( 'start', 'old_id to start at', false, true );
}
-
+
function execute() {
$dbr = wfGetDB( DB_SLAVE );
$dbw = wfGetDB( DB_MASTER );
@@ -31,19 +31,29 @@ class FixBug20757 extends Maintenance {
$totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
+ if ( $dbr->getType() == 'mysql'
+ && version_compare( $dbr->getServerVersion(), '4.1.0', '>=' ) )
+ {
+ // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
+ $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
+ } else {
+ // No CONVERT() in MySQL 4.0
+ $lowerLeft = 'LOWER(LEFT(old_text,22))';
+ }
+
while ( true ) {
print "ID: $startId / $totalRevs\r";
$res = $dbr->select(
'text',
array( 'old_id', 'old_flags', 'old_text' ),
- array(
+ array(
'old_id > ' . intval( $startId ),
'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
- 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+ "$lowerLeft = 'o:15:\"historyblobstub\"'",
),
__METHOD__,
- array(
+ array(
'ORDER BY' => 'old_id',
'LIMIT' => $this->batchSize,
)
@@ -68,7 +78,7 @@ class FixBug20757 extends Maintenance {
}
if ( !is_object( $obj ) ) {
- print "{$row->old_id}: unrecoverable: unserialized to type " .
+ print "{$row->old_id}: unrecoverable: unserialized to type " .
gettype( $obj ) . ", possible double-serialization\n";
++$numBad;
continue;
@@ -120,22 +130,21 @@ class FixBug20757 extends Maintenance {
}
// Process the stubs
- $stubsToFix = array();
foreach ( $stubs as $primaryId => $stub ) {
$secondaryId = $stub['secondaryId'];
if ( !isset( $trackedBlobs[$secondaryId] ) ) {
// No tracked blob. Work out what went wrong
- $secondaryRow = $dbr->selectRow(
- 'text',
+ $secondaryRow = $dbr->selectRow(
+ 'text',
array( 'old_flags', 'old_text' ),
- array( 'old_id' => $secondaryId ),
+ array( 'old_id' => $secondaryId ),
__METHOD__
);
if ( !$secondaryRow ) {
print "$primaryId: unrecoverable: secondary row is missing\n";
++$numBad;
} elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
- // Not broken yet, and not in the tracked clusters so it won't get
+ // Not broken yet, and not in the tracked clusters so it won't get
// broken by the current RCT run.
++$numGood;
} elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
@@ -196,7 +205,7 @@ class FixBug20757 extends Maintenance {
__METHOD__
);
- // Add a blob_tracking row so that the new reference can be recompressed
+ // Add a blob_tracking row so that the new reference can be recompressed
// without needing to run trackBlobs.php again
$dbw->insert( 'blob_tracking',
array(
@@ -255,7 +264,7 @@ class FixBug20757 extends Maintenance {
$dbr = wfGetDB( DB_SLAVE );
$map = array();
- $res = $dbr->select( 'revision',
+ $res = $dbr->select( 'revision',
array( 'rev_id', 'rev_text_id' ),
array( 'rev_page' => $pageId ),
__METHOD__
@@ -276,7 +285,7 @@ class FixBug20757 extends Maintenance {
function isUnbrokenStub( $stub, $secondaryRow ) {
$flags = explode( ',', $secondaryRow->old_flags );
$text = $secondaryRow->old_text;
- if( in_array( 'external', $flags ) ) {
+ if ( in_array( 'external', $flags ) ) {
$url = $text;
@list( /* $proto */ , $path ) = explode( '://', $url, 2 );
if ( $path == "" ) {
@@ -284,17 +293,17 @@ class FixBug20757 extends Maintenance {
}
$text = ExternalStore::fetchFromUrl( $url );
}
- if( !in_array( 'object', $flags ) ) {
+ if ( !in_array( 'object', $flags ) ) {
return false;
}
- if( in_array( 'gzip', $flags ) ) {
+ if ( in_array( 'gzip', $flags ) ) {
$obj = unserialize( gzinflate( $text ) );
} else {
$obj = unserialize( $text );
}
- if( !is_object( $obj ) ) {
+ if ( !is_object( $obj ) ) {
// Correct for old double-serialization bug.
$obj = unserialize( $obj );
}
@@ -310,5 +319,5 @@ class FixBug20757 extends Maintenance {
}
$maintClass = 'FixBug20757';
-require_once( DO_MAINTENANCE );
+require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php
index dc11856a..928cbf97 100644
--- a/maintenance/storage/moveToExternal.php
+++ b/maintenance/storage/moveToExternal.php
@@ -9,10 +9,8 @@
define( 'REPORTING_INTERVAL', 1 );
if ( !defined( 'MEDIAWIKI' ) ) {
- $optionsWithArgs = array( 'e', 's' );
-
- require_once( dirname(__FILE__) . '/../commandLine.inc' );
- require_once( 'ExternalStoreDB.php' );
+ require_once( dirname( __FILE__ ) . '/../commandLine.inc' );
+ require_once( dirname( __FILE__ ) . '/../../includes/ExternalStoreDB.php' );
require_once( 'resolveStubs.php' );
$fname = 'moveToExternal';
@@ -35,8 +33,6 @@ if ( !defined( 'MEDIAWIKI' ) ) {
moveToExternal( $cluster, $maxID, $minID );
}
-
-
function moveToExternal( $cluster, $maxID, $minID = 1 ) {
$fname = 'moveToExternal';
$dbw = wfGetDB( DB_MASTER );
@@ -48,23 +44,22 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) {
print "Moving text rows from $minID to $maxID to external storage\n";
$ext = new ExternalStoreDB;
$numMoved = 0;
- $numStubs = 0;
-
+
for ( $block = 0; $block < $numBlocks; $block++ ) {
$blockStart = $block * $blockSize + $minID;
$blockEnd = $blockStart + $blockSize - 1;
-
- if ( !($block % REPORTING_INTERVAL) ) {
+
+ if ( !( $block % REPORTING_INTERVAL ) ) {
print "oldid=$blockStart, moved=$numMoved\n";
wfWaitForSlaves( 2 );
}
-
+
$res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
array(
"old_id BETWEEN $blockStart AND $blockEnd",
'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
), $fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
# Resolve stubs
$text = $row->old_text;
$id = $row->old_id;
@@ -73,13 +68,13 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) {
} else {
$flags = "{$row->old_flags},external";
}
-
+
if ( strpos( $flags, 'object' ) !== false ) {
$obj = unserialize( $text );
$className = strtolower( get_class( $obj ) );
if ( $className == 'historyblobstub' ) {
- #resolveStub( $id, $row->old_text, $row->old_flags );
- #$numStubs++;
+ # resolveStub( $id, $row->old_text, $row->old_flags );
+ # $numStubs++;
continue;
} elseif ( $className == 'historyblobcurstub' ) {
$text = gzdeflate( $obj->getText() );
@@ -99,8 +94,8 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) {
continue;
}
- #print "Storing " . strlen( $text ) . " bytes to $url\n";
- #print "old_id=$id\n";
+ # print "Storing " . strlen( $text ) . " bytes to $url\n";
+ # print "old_id=$id\n";
$url = $ext->store( $cluster, $text );
if ( !$url ) {
@@ -112,7 +107,6 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) {
array( 'old_id' => $id ), $fname );
$numMoved++;
}
- $dbr->freeResult( $res );
}
}
diff --git a/maintenance/storage/orphanStats.php b/maintenance/storage/orphanStats.php
index 63f9025b..f30f07e4 100644
--- a/maintenance/storage/orphanStats.php
+++ b/maintenance/storage/orphanStats.php
@@ -20,7 +20,7 @@
*
* @ingroup Maintenance ExternalStorage
*/
-require_once( dirname(__FILE__) . '/../Maintenance.php' );
+require_once( dirname( __FILE__ ) . '/../Maintenance.php' );
class OrphanStats extends Maintenance {
public function __construct() {
@@ -34,13 +34,12 @@ class OrphanStats extends Maintenance {
}
public function execute() {
- $extDBs = array();
$dbr = wfGetDB( DB_SLAVE );
- if( !$dbr->tableExists( 'blob_orphans' ) ) {
+ if ( !$dbr->tableExists( 'blob_orphans' ) ) {
$this->error( "blob_orphans doesn't seem to exist, need to run trackBlobs.php first", true );
}
$res = $dbr->select( 'blob_orphans', '*', false, __METHOD__ );
-
+
$num = 0;
$totalSize = 0;
$hashes = array();
@@ -49,7 +48,7 @@ class OrphanStats extends Maintenance {
foreach ( $res as $boRow ) {
$extDB = $this->getDB( $boRow->bo_cluster );
$blobRow = $extDB->selectRow( 'blobs', '*', array( 'blob_id' => $boRow->bo_blob_id ), __METHOD__ );
-
+
$num++;
$size = strlen( $blobRow->blob_text );
$totalSize += $size;
@@ -61,11 +60,11 @@ class OrphanStats extends Maintenance {
$this->output( "Number of orphans: $num\n" );
if ( $num > 0 ) {
$this->output( "Average size: " . round( $totalSize / $num, 0 ) . " bytes\n" .
- "Max size: $maxSize\n" .
+ "Max size: $maxSize\n" .
"Number of unique texts: " . count( $hashes ) . "\n" );
}
}
}
$maintClass = "OrphanStats";
-require_once( DO_MAINTENANCE );
+require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/storage/recompressTracked.php b/maintenance/storage/recompressTracked.php
index e43dbe5c..8974a74d 100644
--- a/maintenance/storage/recompressTracked.php
+++ b/maintenance/storage/recompressTracked.php
@@ -1,14 +1,14 @@
<?php
$optionsWithArgs = RecompressTracked::getOptionsWithArgs();
-require( dirname( __FILE__ ) .'/../commandLine.inc' );
+require( dirname( __FILE__ ) . '/../commandLine.inc' );
if ( count( $args ) < 1 ) {
echo "Usage: php recompressTracked.php [options] <cluster> [... <cluster>...]
Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, and recompresses them in the process. Restartable.
-Options:
- --procs <procs> Set the number of child processes (default 1)
+Options:
+ --procs <procs> Set the number of child processes (default 1)
--copy-only Copy only, do not update the text table. Restart without this option to complete.
--debug-log <file> Log debugging data to the specified file
--info-log <file> Log progress messages to the specified file
@@ -99,7 +99,7 @@ class RecompressTracked {
}
function logToFile( $msg, $file ) {
- $header = '[' . date('d\TH:i:s') . '] ' . wfHostname() . ' ' . posix_getpid();
+ $header = '[' . date( 'd\TH:i:s' ) . '] ' . wfHostname() . ' ' . posix_getpid();
if ( $this->slaveId !== false ) {
$header .= "({$this->slaveId})";
}
@@ -109,8 +109,8 @@ class RecompressTracked {
/**
* Wait until the selected slave has caught up to the master.
- * This allows us to use the slave for things that were committed in a
- * previous part of this batch process.
+ * This allows us to use the slave for things that were committed in a
+ * previous part of this batch process.
*/
function syncDBs() {
$dbw = wfGetDB( DB_MASTER );
@@ -179,14 +179,14 @@ class RecompressTracked {
$cmd .= " --$cmdOption";
}
}
- $cmd .= ' --child' .
+ $cmd .= ' --child' .
' --wiki ' . wfEscapeShellArg( wfWikiID() ) .
' ' . call_user_func_array( 'wfEscapeShellArg', $this->destClusters );
$this->slavePipes = $this->slaveProcs = array();
for ( $i = 0; $i < $this->numProcs; $i++ ) {
$pipes = false;
- $spec = array(
+ $spec = array(
array( 'pipe', 'r' ),
array( 'file', 'php://stdout', 'w' ),
array( 'file', 'php://stderr', 'w' )
@@ -228,7 +228,7 @@ class RecompressTracked {
function dispatch( /*...*/ ) {
$args = func_get_args();
$pipes = $this->slavePipes;
- $numPipes = stream_select( $x=array(), $pipes, $y=array(), 3600 );
+ $numPipes = stream_select( $x = array(), $pipes, $y = array(), 3600 );
if ( !$numPipes ) {
$this->critical( "Error waiting to write to slaves. Aborting" );
exit( 1 );
@@ -264,8 +264,8 @@ class RecompressTracked {
if ( $this->noCount ) {
$numPages = '[unknown]';
} else {
- $numPages = $dbr->selectField( 'blob_tracking',
- 'COUNT(DISTINCT bt_page)',
+ $numPages = $dbr->selectField( 'blob_tracking',
+ 'COUNT(DISTINCT bt_page)',
# A condition is required so that this query uses the index
array( 'bt_moved' => 0 ),
__METHOD__
@@ -277,15 +277,15 @@ class RecompressTracked {
$this->info( "Moving pages..." );
}
while ( true ) {
- $res = $dbr->select( 'blob_tracking',
+ $res = $dbr->select( 'blob_tracking',
array( 'bt_page' ),
- array(
+ array(
'bt_moved' => 0,
'bt_page > ' . $dbr->addQuotes( $startId )
),
__METHOD__,
- array(
- 'DISTINCT',
+ array(
+ 'DISTINCT',
'ORDER BY' => 'bt_page',
'LIMIT' => $this->batchSize,
)
@@ -330,8 +330,8 @@ class RecompressTracked {
if ( $this->noCount ) {
$numOrphans = '[unknown]';
} else {
- $numOrphans = $dbr->selectField( 'blob_tracking',
- 'COUNT(DISTINCT bt_text_id)',
+ $numOrphans = $dbr->selectField( 'blob_tracking',
+ 'COUNT(DISTINCT bt_text_id)',
array( 'bt_moved' => 0, 'bt_page' => 0 ),
__METHOD__ );
if ( !$numOrphans ) {
@@ -440,8 +440,8 @@ class RecompressTracked {
$trx = new CgzCopyTransaction( $this, $this->pageBlobClass );
while ( true ) {
- $res = $dbr->select(
- array( 'blob_tracking', 'text' ),
+ $res = $dbr->select(
+ array( 'blob_tracking', 'text' ),
'*',
array(
'bt_page' => $pageId,
@@ -451,7 +451,7 @@ class RecompressTracked {
'bt_text_id=old_id',
),
__METHOD__,
- array(
+ array(
'ORDER BY' => 'bt_text_id',
'LIMIT' => $this->batchSize
)
@@ -496,7 +496,7 @@ class RecompressTracked {
*
* This is done in a single transaction to provide restartable behaviour
* without data loss.
- *
+ *
* The transaction is kept short to reduce locking.
*/
function moveTextRow( $textId, $url ) {
@@ -536,16 +536,16 @@ class RecompressTracked {
$dbr = wfGetDB( DB_SLAVE );
$startId = 0;
- $conds = array_merge( $conds, array(
+ $conds = array_merge( $conds, array(
'bt_moved' => 0,
'bt_new_url IS NOT NULL'
- ));
+ ) );
while ( true ) {
$res = $dbr->select( 'blob_tracking',
'*',
array_merge( $conds, array( 'bt_text_id > ' . $dbr->addQuotes( $startId ) ) ),
__METHOD__,
- array(
+ array(
'ORDER BY' => 'bt_text_id',
'LIMIT' => $this->batchSize,
)
@@ -592,17 +592,17 @@ class RecompressTracked {
$this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) );
$this->syncDBs();
}
-
+
$trx = new CgzCopyTransaction( $this, $this->orphanBlobClass );
$res = wfGetDB( DB_SLAVE )->select(
- array( 'text', 'blob_tracking' ),
- array( 'old_id', 'old_text', 'old_flags' ),
- array(
+ array( 'text', 'blob_tracking' ),
+ array( 'old_id', 'old_text', 'old_flags' ),
+ array(
'old_id' => $textIds,
'bt_text_id=old_id',
'bt_moved' => 0,
- ),
+ ),
__METHOD__,
array( 'DISTINCT' )
);
@@ -610,10 +610,10 @@ class RecompressTracked {
foreach ( $res as $row ) {
$text = Revision::getRevisionText( $row );
if ( $text === false ) {
- $this->critical( "Error: cannot load revision text for old_id=$textId" );
+ $this->critical( "Error: cannot load revision text for old_id={$row->old_id}" );
continue;
}
-
+
if ( !$trx->addItem( $text, $row->old_id ) ) {
$this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" );
$trx->commit();
@@ -625,7 +625,7 @@ class RecompressTracked {
$trx->commit();
}
- /**
+ /**
* Wait for slaves (quietly)
*/
function waitForSlaves() {
@@ -704,14 +704,14 @@ class CgzCopyTransaction {
// Check to see if the target text_ids have been moved already.
//
- // We originally read from the slave, so this can happen when a single
- // text_id is shared between multiple pages. It's rare, but possible
+ // We originally read from the slave, so this can happen when a single
+ // text_id is shared between multiple pages. It's rare, but possible
// if a delete/move/undelete cycle splits up a null edit.
//
// We do a locking read to prevent closer-run race conditions.
$dbw = wfGetDB( DB_MASTER );
$dbw->begin();
- $res = $dbw->select( 'blob_tracking',
+ $res = $dbw->select( 'blob_tracking',
array( 'bt_text_id', 'bt_moved' ),
array( 'bt_text_id' => array_keys( $this->referrers ) ),
__METHOD__, array( 'FOR UPDATE' ) );
diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php
index 346151e9..2269e37f 100644
--- a/maintenance/storage/resolveStubs.php
+++ b/maintenance/storage/resolveStubs.php
@@ -9,7 +9,7 @@ define( 'REPORTING_INTERVAL', 100 );
if ( !defined( 'MEDIAWIKI' ) ) {
$optionsWithArgs = array( 'm' );
- require_once( dirname(__FILE__) . '/../commandLine.inc' );
+ require_once( dirname( __FILE__ ) . '/../commandLine.inc' );
resolveStubs();
}
@@ -28,22 +28,19 @@ function resolveStubs() {
for ( $b = 0; $b < $numBlocks; $b++ ) {
wfWaitForSlaves( 2 );
-
+
printf( "%5.2f%%\n", $b / $numBlocks * 100 );
- $start = intval($maxID / $numBlocks) * $b + 1;
- $end = intval($maxID / $numBlocks) * ($b + 1);
-
+ $start = intval( $maxID / $numBlocks ) * $b + 1;
+ $end = intval( $maxID / $numBlocks ) * ( $b + 1 );
+
$res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ),
"old_id>=$start AND old_id<=$end " .
- "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
- 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+ "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " .
+ 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
$fname );
- while ( $row = $dbr->fetchObject( $res ) ) {
+ foreach ( $res as $row ) {
resolveStub( $row->old_id, $row->old_text, $row->old_flags );
}
- $dbr->freeResult( $res );
-
-
}
print "100%\n";
}
@@ -84,7 +81,7 @@ function resolveStub( $id, $stubText, $flags ) {
}
# Update the row
- #print "oldid=$id\n";
+ # print "oldid=$id\n";
$dbw->update( 'text',
array( /* SET */
'old_flags' => $newFlags,
diff --git a/maintenance/storage/storageTypeStats.php b/maintenance/storage/storageTypeStats.php
index 85858620..be86c531 100644
--- a/maintenance/storage/storageTypeStats.php
+++ b/maintenance/storage/storageTypeStats.php
@@ -1,6 +1,6 @@
<?php
-require_once( dirname(__FILE__).'/../Maintenance.php' );
+require_once( dirname( __FILE__ ) . '/../Maintenance.php' );
class StorageTypeStats extends Maintenance {
function execute() {
@@ -12,7 +12,6 @@ class StorageTypeStats extends Maintenance {
exit( 1 );
}
- $rangeStart = 0;
$binSize = intval( pow( 10, floor( log10( $endId ) ) - 3 ) );
if ( $binSize < 100 ) {
$binSize = 100;
@@ -86,7 +85,7 @@ SQL;
echo str_repeat( '-', 120 ) . "\n";
foreach ( $stats as $flags => $flagStats ) {
foreach ( $flagStats as $class => $entry ) {
- printf( $format, $flags, $class, $entry['count'],
+ printf( $format, $flags, $class, $entry['count'],
sprintf( "%-13d - %-13d", $entry['first'], $entry['last'] ) );
}
}
@@ -94,5 +93,5 @@ SQL;
}
$maintClass = 'StorageTypeStats';
-require_once( DO_MAINTENANCE );
+require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/storage/testCompression.php b/maintenance/storage/testCompression.php
index 9c96c9f8..e2718325 100644
--- a/maintenance/storage/testCompression.php
+++ b/maintenance/storage/testCompression.php
@@ -1,7 +1,7 @@
<?php
$optionsWithArgs = array( 'start', 'limit', 'type' );
-require( dirname(__FILE__).'/../commandLine.inc' );
+require( dirname( __FILE__ ) . '/../commandLine.inc' );
if ( !isset( $args[0] ) ) {
echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] [--limit=<num-revs>] <page-title>\n";
@@ -26,10 +26,10 @@ $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryB
$dbr = wfGetDB( DB_SLAVE );
-$res = $dbr->select(
+$res = $dbr->select(
array( 'page', 'revision', 'text' ),
'*',
- array(
+ array(
'page_namespace' => $title->getNamespace(),
'page_title' => $title->getDBkey(),
'page_id=rev_page',
@@ -56,9 +56,9 @@ foreach ( $res as $row ) {
$serialized = serialize( $blob );
$t += microtime( true );
-#print_r( $blob->mDiffMap );
+# print_r( $blob->mDiffMap );
-printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
+printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
$type,
count( $hashes ),
$uncompressedSize / strlen( $serialized ),
@@ -73,7 +73,7 @@ foreach ( $keys as $id => $key ) {
$text = $blob->getItem( $key );
if ( md5( $text ) != $hashes[$id] ) {
echo "Content hash mismatch for rev_id $id\n";
- #var_dump( $text );
+ # var_dump( $text );
}
}
$t += microtime( true );
diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php
index 63327d53..15aeec3b 100644
--- a/maintenance/storage/trackBlobs.php
+++ b/maintenance/storage/trackBlobs.php
@@ -1,6 +1,6 @@
<?php
-require( dirname( __FILE__ ) .'/../commandLine.inc' );
+require( dirname( __FILE__ ) . '/../commandLine.inc' );
if ( count( $args ) < 1 ) {
@@ -35,6 +35,7 @@ class TrackBlobs {
}
function run() {
+ $this->checkIntegrity();
$this->initTrackingTable();
$this->trackRevisions();
$this->trackOrphanText();
@@ -43,6 +44,47 @@ class TrackBlobs {
}
}
+ function checkIntegrity() {
+ echo "Doing integrity check...\n";
+ $dbr = wfGetDB( DB_SLAVE );
+
+ // Scan for HistoryBlobStub objects in the text table (bug 20757)
+
+ $exists = $dbr->selectField( 'text', 1,
+ 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
+ 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+ __METHOD__
+ );
+
+ if ( $exists ) {
+ echo "Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
+ "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
+ "to fix this.\n";
+ exit( 1 );
+ }
+
+ // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624)
+ $flags = $dbr->selectField( 'archive', 'ar_flags',
+ 'ar_flags LIKE \'%external%\' OR (' .
+ 'ar_flags LIKE \'%object%\' ' .
+ 'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )',
+ __METHOD__
+ );
+
+ if ( strpos( $flags, 'external' ) !== false ) {
+ echo "Integrity check failed: found external storage pointers in your archive table.\n" .
+ "Run normaliseArchiveTable.php to fix this.\n";
+ exit( 1 );
+ } elseif ( $flags ) {
+ echo "Integrity check failed: found HistoryBlobStub objects in your archive table.\n" .
+ "These objects are probably already broken, continuing would make them\n" .
+ "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n";
+ exit( 1 );
+ }
+
+ echo "Integrity check OK\n";
+ }
+
function initTrackingTable() {
$dbw = wfGetDB( DB_MASTER );
if ( $dbw->tableExists( 'blob_tracking' ) ) {
@@ -170,9 +212,9 @@ class TrackBlobs {
# Scan the text table for orphan text
while ( true ) {
- $res = $dbr->select( array( 'text', 'blob_tracking' ),
+ $res = $dbr->select( array( 'text', 'blob_tracking' ),
array( 'old_id', 'old_flags', 'old_text' ),
- array(
+ array(
'old_id>' . $dbr->addQuotes( $startId ),
$textClause,
'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
@@ -181,7 +223,7 @@ class TrackBlobs {
__METHOD__,
array(
'ORDER BY' => 'old_id',
- 'LIMIT' => $this->batchSize
+ 'LIMIT' => $this->batchSize
),
array( 'blob_tracking' => array( 'LEFT JOIN', 'bt_text_id=old_id' ) )
);
@@ -275,8 +317,8 @@ class TrackBlobs {
// Build a bitmap of actual blob rows
while ( true ) {
- $res = $extDB->select( $table,
- array( 'blob_id' ),
+ $res = $extDB->select( $table,
+ array( 'blob_id' ),
array( 'blob_id > ' . $extDB->addQuotes( $startId ) ),
__METHOD__,
array( 'LIMIT' => $this->batchSize, 'ORDER BY' => 'blob_id' )
@@ -301,7 +343,7 @@ class TrackBlobs {
// Find actual blobs that weren't tracked by the previous passes
// This is a set-theoretic difference A \ B, or in bitwise terms, A & ~B
$orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
-
+
// Traverse the orphan list
$insertBatch = array();
$id = 0;