summaryrefslogtreecommitdiff
path: root/maintenance/storage
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/storage')
-rw-r--r--maintenance/storage/compressOld.inc6
-rw-r--r--maintenance/storage/compressOld.php2
-rw-r--r--maintenance/storage/dumpRev.php111
-rw-r--r--maintenance/storage/fixBug20757.php314
-rwxr-xr-xmaintenance/storage/make-blobs11
-rw-r--r--maintenance/storage/moveToExternal.php2
-rw-r--r--maintenance/storage/orphanStats.php43
-rw-r--r--maintenance/storage/recompressTracked.php57
-rw-r--r--maintenance/storage/resolveStubs.php10
-rw-r--r--maintenance/storage/storageTypeStats.php98
-rw-r--r--maintenance/storage/trackBlobs.php28
11 files changed, 594 insertions, 88 deletions
diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc
index fb8cc422..981cfda5 100644
--- a/maintenance/storage/compressOld.inc
+++ b/maintenance/storage/compressOld.inc
@@ -57,7 +57,8 @@ function compressPage( $row, $extdb ) {
'old_text' => $compress
), array( /* WHERE */
'old_id' => $row->old_id
- ), $fname, 'LIMIT 1'
+ ), $fname,
+ array( 'LIMIT' => 1 )
);
return true;
}
@@ -104,7 +105,8 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate,
# overwriting bulk storage concat rows. Don't compress external references, because
# the script doesn't yet delete rows from external storage.
$conds = array(
- "old_flags NOT LIKE '%object%' AND old_flags NOT LIKE '%external%'");
+ 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString() ) . ' AND old_flags NOT '
+ . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) );
if ( $beginDate ) {
if ( !preg_match( '/^\d{14}$/', $beginDate ) ) {
diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php
index 6f8b48eb..7ff102a5 100644
--- a/maintenance/storage/compressOld.php
+++ b/maintenance/storage/compressOld.php
@@ -68,6 +68,6 @@ if ( $success ) {
print "Done.\n";
}
-exit();
+exit(0);
diff --git a/maintenance/storage/dumpRev.php b/maintenance/storage/dumpRev.php
index c84d8aa5..95404244 100644
--- a/maintenance/storage/dumpRev.php
+++ b/maintenance/storage/dumpRev.php
@@ -1,56 +1,79 @@
<?php
/**
- * @file
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
* @ingroup Maintenance ExternalStorage
*/
-require_once( dirname(__FILE__) . '/../commandLine.inc' );
+require_once( dirname(__FILE__) . '/../Maintenance.php' );
-$wgDebugLogFile = '/dev/stdout';
-
-
-$dbr = wfGetDB( DB_SLAVE );
-$row = $dbr->selectRow(
- array( 'text', 'revision' ),
- array( 'old_flags', 'old_text' ),
- array( 'old_id=rev_text_id', 'rev_id' => $args[0] )
-);
-if ( !$row ) {
- print "Row not found\n";
- exit;
-}
+class DumpRev extends Maintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->addArg( 'rev-id', 'Revision ID', true );
+ }
-$flags = explode( ',', $row->old_flags );
-$text = $row->old_text;
-if ( in_array( 'external', $flags ) ) {
- print "External $text\n";
- if ( preg_match( '!^DB://(\w+)/(\w+)/(\w+)$!', $text, $m ) ) {
- $es = ExternalStore::getStoreObject( 'DB' );
- $blob = $es->fetchBlob( $m[1], $m[2], $m[3] );
- if ( strtolower( get_class( $blob ) ) == 'concatenatedgziphistoryblob' ) {
- print "Found external CGZ\n";
- $blob->uncompress();
- print "Items: (" . implode( ', ', array_keys( $blob->mItems ) ) . ")\n";
- $text = $blob->getItem( $m[3] );
+ public function execute() {
+ $dbr = wfGetDB( DB_SLAVE );
+ $row = $dbr->selectRow(
+ array( 'text', 'revision' ),
+ array( 'old_flags', 'old_text' ),
+ array( 'old_id=rev_text_id', 'rev_id' => $this->getArg() )
+ );
+ if ( !$row ) {
+ $this->error( "Row not found", true );
+ }
+
+ $flags = explode( ',', $row->old_flags );
+ $text = $row->old_text;
+ if ( in_array( 'external', $flags ) ) {
+ $this->output( "External $text\n" );
+ if ( preg_match( '!^DB://(\w+)/(\w+)/(\w+)$!', $text, $m ) ) {
+ $es = ExternalStore::getStoreObject( 'DB' );
+ $blob = $es->fetchBlob( $m[1], $m[2], $m[3] );
+ if ( strtolower( get_class( $blob ) ) == 'concatenatedgziphistoryblob' ) {
+ $this->output( "Found external CGZ\n" );
+ $blob->uncompress();
+ $this->output( "Items: (" . implode( ', ', array_keys( $blob->mItems ) ) . ")\n" );
+ $text = $blob->getItem( $m[3] );
+ } else {
+ $this->output( "CGZ expected at $text, got " . gettype( $blob ) . "\n" );
+ $text = $blob;
+ }
+ } else {
+ $this->output( "External plain $text\n" );
+ $text = ExternalStore::fetchFromURL( $text );
+ }
+ }
+ if ( in_array( 'gzip', $flags ) ) {
+ $text = gzinflate( $text );
+ }
+ if ( in_array( 'object', $flags ) ) {
+ $obj = unserialize( $text );
+ $text = $obj->getText();
+ }
+
+ if ( is_object( $text ) ) {
+ $this->error( "Unexpectedly got object of type: " . get_class( $text ) );
} else {
- print "CGZ expected at $text, got " . gettype( $blob ) . "\n";
- $text = $blob;
+ $this->output( "Text length: " . strlen( $text ) ."\n" );
+ $this->output( substr( $text, 0, 100 ) . "\n" );
}
- } else {
- print "External plain $text\n";
- $text = ExternalStore::fetchFromURL( $text );
}
}
-if ( in_array( 'gzip', $flags ) ) {
- $text = gzinflate( $text );
-}
-if ( in_array( 'object', $flags ) ) {
- $text = unserialize( $text );
-}
-if ( is_object( $text ) ) {
- print "Unexpectedly got object of type: " . get_class( $text ) . "\n";
-} else {
- print "Text length: " . strlen( $text ) ."\n";
- print substr( $text, 0, 100 ) . "\n";
-}
+$maintClass = "DumpRev";
+require_once( DO_MAINTENANCE );
diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php
new file mode 100644
index 00000000..922d4725
--- /dev/null
+++ b/maintenance/storage/fixBug20757.php
@@ -0,0 +1,314 @@
+<?php
+
+require_once( dirname( __FILE__ ) . '/../Maintenance.php' );
+
+class FixBug20757 extends Maintenance {
+ var $batchSize = 10000;
+ var $mapCache = array();
+ var $mapCacheSize = 0;
+ var $maxMapCacheSize = 1000000;
+
+ function __construct() {
+ parent::__construct();
+ $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
+ $this->addOption( 'dry-run', 'Report only' );
+ $this->addOption( 'start', 'old_id to start at', false, true );
+ }
+
+ function execute() {
+ $dbr = wfGetDB( DB_SLAVE );
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dryRun = $this->getOption( 'dry-run' );
+ if ( $dryRun ) {
+ print "Dry run only.\n";
+ }
+
+ $startId = $this->getOption( 'start', 0 );
+ $numGood = 0;
+ $numFixed = 0;
+ $numBad = 0;
+
+ $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
+
+ while ( true ) {
+ print "ID: $startId / $totalRevs\r";
+
+ $res = $dbr->select(
+ 'text',
+ array( 'old_id', 'old_flags', 'old_text' ),
+ array(
+ 'old_id > ' . intval( $startId ),
+ 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
+ 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+ ),
+ __METHOD__,
+ array(
+ 'ORDER BY' => 'old_id',
+ 'LIMIT' => $this->batchSize,
+ )
+ );
+
+ if ( !$res->numRows() ) {
+ break;
+ }
+
+ $secondaryIds = array();
+ $stubs = array();
+
+ foreach ( $res as $row ) {
+ $startId = $row->old_id;
+
+ // Basic sanity checks
+ $obj = unserialize( $row->old_text );
+ if ( $obj === false ) {
+ print "{$row->old_id}: unrecoverable: cannot unserialize\n";
+ ++$numBad;
+ continue;
+ }
+
+ if ( !is_object( $obj ) ) {
+ print "{$row->old_id}: unrecoverable: unserialized to type " .
+ gettype( $obj ) . ", possible double-serialization\n";
+ ++$numBad;
+ continue;
+ }
+
+ if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
+ print "{$row->old_id}: unrecoverable: unexpected object class " .
+ get_class( $obj ) . "\n";
+ ++$numBad;
+ continue;
+ }
+
+ // Process flags
+ $flags = explode( ',', $row->old_flags );
+ if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
+ $legacyEncoding = false;
+ } else {
+ $legacyEncoding = true;
+ }
+
+ // Queue the stub for future batch processing
+ $id = intval( $obj->mOldId );
+ $secondaryIds[] = $id;
+ $stubs[$row->old_id] = array(
+ 'legacyEncoding' => $legacyEncoding,
+ 'secondaryId' => $id,
+ 'hash' => $obj->mHash,
+ );
+ }
+
+ $secondaryIds = array_unique( $secondaryIds );
+
+ if ( !count( $secondaryIds ) ) {
+ continue;
+ }
+
+ // Run the batch query on blob_tracking
+ $res = $dbr->select(
+ 'blob_tracking',
+ '*',
+ array(
+ 'bt_text_id' => $secondaryIds,
+ ),
+ __METHOD__
+ );
+ $trackedBlobs = array();
+ foreach ( $res as $row ) {
+ $trackedBlobs[$row->bt_text_id] = $row;
+ }
+
+ // Process the stubs
+ $stubsToFix = array();
+ foreach ( $stubs as $primaryId => $stub ) {
+ $secondaryId = $stub['secondaryId'];
+ if ( !isset( $trackedBlobs[$secondaryId] ) ) {
+ // No tracked blob. Work out what went wrong
+ $secondaryRow = $dbr->selectRow(
+ 'text',
+ array( 'old_flags', 'old_text' ),
+ array( 'old_id' => $secondaryId ),
+ __METHOD__
+ );
+ if ( !$secondaryRow ) {
+ print "$primaryId: unrecoverable: secondary row is missing\n";
+ ++$numBad;
+ } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
+ // Not broken yet, and not in the tracked clusters so it won't get
+ // broken by the current RCT run.
+ ++$numGood;
+ } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
+ print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
+ ++$numBad;
+ } else {
+ print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
+ ++$numBad;
+ }
+ unset( $stubs[$primaryId] );
+ continue;
+ }
+ $trackRow = $trackedBlobs[$secondaryId];
+
+ // Check that the specified text really is available in the tracked source row
+ $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
+ $text = ExternalStore::fetchFromURL( $url );
+ if ( $text === false ) {
+ print "$primaryId: unrecoverable: source text missing\n";
+ ++$numBad;
+ unset( $stubs[$primaryId] );
+ continue;
+ }
+ if ( md5( $text ) !== $stub['hash'] ) {
+ print "$primaryId: unrecoverable: content hashes do not match\n";
+ ++$numBad;
+ unset( $stubs[$primaryId] );
+ continue;
+ }
+
+ // Find the page_id and rev_id
+ // The page is probably the same as the page of the secondary row
+ $pageId = intval( $trackRow->bt_page );
+ if ( !$pageId ) {
+ $revId = $pageId = 0;
+ } else {
+ $revId = $this->findTextIdInPage( $pageId, $primaryId );
+ if ( !$revId ) {
+ // Actually an orphan
+ $pageId = $revId = 0;
+ }
+ }
+
+ $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
+
+ if ( !$dryRun ) {
+ // Reset the text row to point to the original copy
+ $dbw->begin();
+ $dbw->update(
+ 'text',
+ // SET
+ array(
+ 'old_flags' => $newFlags,
+ 'old_text' => $url
+ ),
+ // WHERE
+ array( 'old_id' => $primaryId ),
+ __METHOD__
+ );
+
+ // Add a blob_tracking row so that the new reference can be recompressed
+ // without needing to run trackBlobs.php again
+ $dbw->insert( 'blob_tracking',
+ array(
+ 'bt_page' => $pageId,
+ 'bt_rev_id' => $revId,
+ 'bt_text_id' => $primaryId,
+ 'bt_cluster' => $trackRow->bt_cluster,
+ 'bt_blob_id' => $trackRow->bt_blob_id,
+ 'bt_cgz_hash' => $stub['hash'],
+ 'bt_new_url' => null,
+ 'bt_moved' => 0,
+ ),
+ __METHOD__
+ );
+ $dbw->commit();
+ $this->waitForSlaves();
+ }
+
+ print "$primaryId: resolved to $url\n";
+ ++$numFixed;
+ }
+ }
+
+ print "\n";
+ print "Fixed: $numFixed\n";
+ print "Unrecoverable: $numBad\n";
+ print "Good stubs: $numGood\n";
+ }
+
+ function waitForSlaves() {
+ static $iteration = 0;
+ ++$iteration;
+ if ( ++$iteration > 50 == 0 ) {
+ wfWaitForSlaves( 5 );
+ $iteration = 0;
+ }
+ }
+
+ function findTextIdInPage( $pageId, $textId ) {
+ $ids = $this->getRevTextMap( $pageId );
+ if ( !isset( $ids[$textId] ) ) {
+ return null;
+ } else {
+ return $ids[$textId];
+ }
+ }
+
+ function getRevTextMap( $pageId ) {
+ if ( !isset( $this->mapCache[$pageId] ) ) {
+ // Limit cache size
+ while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
+ $key = key( $this->mapCache );
+ $this->mapCacheSize -= count( $this->mapCache[$key] );
+ unset( $this->mapCache[$key] );
+ }
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $map = array();
+ $res = $dbr->select( 'revision',
+ array( 'rev_id', 'rev_text_id' ),
+ array( 'rev_page' => $pageId ),
+ __METHOD__
+ );
+ foreach ( $res as $row ) {
+ $map[$row->rev_text_id] = $row->rev_id;
+ }
+ $this->mapCache[$pageId] = $map;
+ $this->mapCacheSize += count( $map );
+ }
+ return $this->mapCache[$pageId];
+ }
+
+ /**
+ * This is based on part of HistoryBlobStub::getText().
+ * Determine if the text can be retrieved from the row in the normal way.
+ */
+ function isUnbrokenStub( $stub, $secondaryRow ) {
+ $flags = explode( ',', $secondaryRow->old_flags );
+ $text = $secondaryRow->old_text;
+ if( in_array( 'external', $flags ) ) {
+ $url = $text;
+ @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
+ if ( $path == "" ) {
+ return false;
+ }
+ $text = ExternalStore::fetchFromUrl( $url );
+ }
+ if( !in_array( 'object', $flags ) ) {
+ return false;
+ }
+
+ if( in_array( 'gzip', $flags ) ) {
+ $obj = unserialize( gzinflate( $text ) );
+ } else {
+ $obj = unserialize( $text );
+ }
+
+ if( !is_object( $obj ) ) {
+ // Correct for old double-serialization bug.
+ $obj = unserialize( $obj );
+ }
+
+ if ( !is_object( $obj ) ) {
+ return false;
+ }
+
+ $obj->uncompress();
+ $text = $obj->getItem( $stub['hash'] );
+ return $text !== false;
+ }
+}
+
+$maintClass = 'FixBug20757';
+require_once( DO_MAINTENANCE );
+
diff --git a/maintenance/storage/make-blobs b/maintenance/storage/make-blobs
index 9eb7e83e..36cf9ced 100755
--- a/maintenance/storage/make-blobs
+++ b/maintenance/storage/make-blobs
@@ -1,11 +1,16 @@
#!/bin/bash
-if [ X$2 == X ];then
- echo 'Usage: make-blobs <server> <db>'
+if [ -z $2 ];then
+ echo 'Usage: make-blobs <server> <db> [<table name>]'
exit 1
fi
+if [ -z $3 ]; then
+ table=blobs
+else
+ table=$3
+fi
echo "CREATE DATABASE $2" | mysql -u wikiadmin -p`wikiadmin_pass` -h $1 && \
-mysql -u wikiadmin -p`wikiadmin_pass` -h $1 $2 < blobs.sql
+sed "s/blobs\>/$table/" blobs.sql | mysql -u wikiadmin -p`wikiadmin_pass` -h $1 $2
diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php
index a8b2f93b..dc11856a 100644
--- a/maintenance/storage/moveToExternal.php
+++ b/maintenance/storage/moveToExternal.php
@@ -62,7 +62,7 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) {
$res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
array(
"old_id BETWEEN $blockStart AND $blockEnd",
- "old_flags NOT LIKE '%external%'",
+ 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
), $fname );
while ( $row = $dbr->fetchObject( $res ) ) {
# Resolve stubs
diff --git a/maintenance/storage/orphanStats.php b/maintenance/storage/orphanStats.php
index afea815e..63f9025b 100644
--- a/maintenance/storage/orphanStats.php
+++ b/maintenance/storage/orphanStats.php
@@ -2,21 +2,43 @@
/**
* Show some statistics on the blob_orphans table, created with trackBlobs.php
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Maintenance ExternalStorage
*/
-require_once( dirname(__FILE__).'/../commandLine.inc' );
+require_once( dirname(__FILE__) . '/../Maintenance.php' );
-$stats = new OrphanStats;
-$stats->execute();
+class OrphanStats extends Maintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->mDescription = "how some statistics on the blob_orphans table, created with trackBlobs.php";
+ }
-class OrphanStats {
- function getDB( $cluster ) {
+ private function getDB( $cluster ) {
$lb = wfGetLBFactory()->getExternalLB( $cluster );
return $lb->getConnection( DB_SLAVE );
}
- function execute() {
+ public function execute() {
$extDBs = array();
$dbr = wfGetDB( DB_SLAVE );
+ if( !$dbr->tableExists( 'blob_orphans' ) ) {
+ $this->error( "blob_orphans doesn't seem to exist, need to run trackBlobs.php first", true );
+ }
$res = $dbr->select( 'blob_orphans', '*', false, __METHOD__ );
$num = 0;
@@ -36,11 +58,14 @@ class OrphanStats {
}
unset( $res );
- echo "Number of orphans: $num\n";
+ $this->output( "Number of orphans: $num\n" );
if ( $num > 0 ) {
- echo "Average size: " . round( $totalSize / $num, 0 ) . " bytes\n" .
+ $this->output( "Average size: " . round( $totalSize / $num, 0 ) . " bytes\n" .
"Max size: $maxSize\n" .
- "Number of unique texts: " . count( $hashes ) . "\n";
+ "Number of unique texts: " . count( $hashes ) . "\n" );
}
}
}
+
+$maintClass = "OrphanStats";
+require_once( DO_MAINTENANCE );
diff --git a/maintenance/storage/recompressTracked.php b/maintenance/storage/recompressTracked.php
index d8d2e4ef..e43dbe5c 100644
--- a/maintenance/storage/recompressTracked.php
+++ b/maintenance/storage/recompressTracked.php
@@ -31,11 +31,13 @@ class RecompressTracked {
var $copyOnly = false;
var $isChild = false;
var $slaveId = false;
+ var $noCount = false;
var $debugLog, $infoLog, $criticalLog;
var $store;
static $optionsWithArgs = array( 'procs', 'slave-id', 'debug-log', 'info-log', 'critical-log' );
static $cmdLineOptionMap = array(
+ 'no-count' => 'noCount',
'procs' => 'numProcs',
'copy-only' => 'copyOnly',
'child' => 'isChild',
@@ -259,12 +261,16 @@ class RecompressTracked {
$dbr = wfGetDB( DB_SLAVE );
$i = 0;
$startId = 0;
- $numPages = $dbr->selectField( 'blob_tracking',
- 'COUNT(DISTINCT bt_page)',
- # A condition is required so that this query uses the index
- array( 'bt_moved' => 0 ),
- __METHOD__
- );
+ if ( $this->noCount ) {
+ $numPages = '[unknown]';
+ } else {
+ $numPages = $dbr->selectField( 'blob_tracking',
+ 'COUNT(DISTINCT bt_page)',
+ # A condition is required so that this query uses the index
+ array( 'bt_moved' => 0 ),
+ __METHOD__
+ );
+ }
if ( $this->copyOnly ) {
$this->info( "Copying pages..." );
} else {
@@ -310,7 +316,7 @@ class RecompressTracked {
if ( $current == $end || $this->numBatches >= $this->reportingInterval ) {
$this->numBatches = 0;
$this->info( "$label: $current / $end" );
- wfWaitForSlaves( 5 );
+ $this->waitForSlaves();
}
}
@@ -321,12 +327,16 @@ class RecompressTracked {
$dbr = wfGetDB( DB_SLAVE );
$startId = 0;
$i = 0;
- $numOrphans = $dbr->selectField( 'blob_tracking',
- 'COUNT(DISTINCT bt_text_id)',
- array( 'bt_moved' => 0, 'bt_page' => 0 ),
- __METHOD__ );
- if ( !$numOrphans ) {
- return;
+ if ( $this->noCount ) {
+ $numOrphans = '[unknown]';
+ } else {
+ $numOrphans = $dbr->selectField( 'blob_tracking',
+ 'COUNT(DISTINCT bt_text_id)',
+ array( 'bt_moved' => 0, 'bt_page' => 0 ),
+ __METHOD__ );
+ if ( !$numOrphans ) {
+ return;
+ }
}
if ( $this->copyOnly ) {
$this->info( "Copying orphans..." );
@@ -404,7 +414,7 @@ class RecompressTracked {
case 'quit':
return;
}
- wfWaitForSlaves( 5 );
+ $this->waitForSlaves();
}
}
@@ -469,6 +479,7 @@ class RecompressTracked {
$this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" );
$trx->commit();
$trx = new CgzCopyTransaction( $this, $this->pageBlobClass );
+ $this->waitForSlaves();
}
}
$startId = $row->bt_text_id;
@@ -545,6 +556,9 @@ class RecompressTracked {
$this->debug( 'Incomplete: ' . $res->numRows() . ' rows' );
foreach ( $res as $row ) {
$this->moveTextRow( $row->bt_text_id, $row->bt_new_url );
+ if ( $row->bt_text_id % 10 == 0 ) {
+ $this->waitForSlaves();
+ }
}
$startId = $row->bt_text_id;
}
@@ -604,11 +618,26 @@ class RecompressTracked {
$this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" );
$trx->commit();
$trx = new CgzCopyTransaction( $this, $this->orphanBlobClass );
+ $this->waitForSlaves();
}
}
$this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" );
$trx->commit();
}
+
+ /**
+ * Wait for slaves (quietly)
+ */
+ function waitForSlaves() {
+ $lb = wfGetLB();
+ while ( true ) {
+ list( $host, $maxLag ) = $lb->getMaxLag();
+ if ( $maxLag < 2 ) {
+ break;
+ }
+ sleep( 5 );
+ }
+ }
}
/**
diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php
index 3db9e480..346151e9 100644
--- a/maintenance/storage/resolveStubs.php
+++ b/maintenance/storage/resolveStubs.php
@@ -35,11 +35,9 @@ function resolveStubs() {
$res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ),
"old_id>=$start AND old_id<=$end " .
- # Using a more restrictive flag set for now, until I do some more analysis -- TS
- #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
-
- "AND old_flags='object' " .
- "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname );
+ "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
+ 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+ $fname );
while ( $row = $dbr->fetchObject( $res ) ) {
resolveStub( $row->old_id, $row->old_text, $row->old_flags );
}
@@ -69,7 +67,7 @@ function resolveStub( $id, $stubText, $flags ) {
# Get the (maybe) external row
$externalRow = $dbr->selectRow( 'text', array( 'old_text' ),
- array( 'old_id' => $stub->mOldId, "old_flags LIKE '%external%'" ),
+ array( 'old_id' => $stub->mOldId, 'old_flags' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) ),
$fname
);
diff --git a/maintenance/storage/storageTypeStats.php b/maintenance/storage/storageTypeStats.php
new file mode 100644
index 00000000..85858620
--- /dev/null
+++ b/maintenance/storage/storageTypeStats.php
@@ -0,0 +1,98 @@
+<?php
+
+require_once( dirname(__FILE__).'/../Maintenance.php' );
+
+class StorageTypeStats extends Maintenance {
+ function execute() {
+ $dbr = wfGetDB( DB_SLAVE );
+
+ $endId = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
+ if ( !$endId ) {
+ echo "No text rows!\n";
+ exit( 1 );
+ }
+
+ $rangeStart = 0;
+ $binSize = intval( pow( 10, floor( log10( $endId ) ) - 3 ) );
+ if ( $binSize < 100 ) {
+ $binSize = 100;
+ }
+ echo "Using bin size of $binSize\n";
+
+ $stats = array();
+
+ $classSql = <<<SQL
+ IF(old_flags LIKE '%external%',
+ IF(old_text REGEXP '^DB://[[:alnum:]]+/[0-9]+/[0-9a-f]{32}$',
+ 'CGZ pointer',
+ IF(old_text REGEXP '^DB://[[:alnum:]]+/[0-9]+/[0-9]{1,6}$',
+ 'DHB pointer',
+ IF(old_text REGEXP '^DB://[[:alnum:]]+/[0-9]+$',
+ 'simple pointer',
+ 'UNKNOWN pointer'
+ )
+ )
+ ),
+ IF(old_flags LIKE '%object%',
+ TRIM('"' FROM SUBSTRING_INDEX(SUBSTRING_INDEX(old_text, ':', 3), ':', -1)),
+ '[none]'
+ )
+ )
+SQL;
+
+ for ( $rangeStart = 0; $rangeStart < $endId; $rangeStart += $binSize ) {
+ if ( $rangeStart / $binSize % 10 == 0 ) {
+ echo "$rangeStart\r";
+ }
+ $res = $dbr->select(
+ 'text',
+ array(
+ 'old_flags',
+ "$classSql AS class",
+ 'COUNT(*) as count',
+ ),
+ array(
+ 'old_id >= ' . intval( $rangeStart ),
+ 'old_id < ' . intval( $rangeStart + $binSize )
+ ),
+ __METHOD__,
+ array( 'GROUP BY' => 'old_flags, class' )
+ );
+
+ foreach ( $res as $row ) {
+ $flags = $row->old_flags;
+ if ( $flags === '' ) {
+ $flags = '[none]';
+ }
+ $class = $row->class;
+ $count = $row->count;
+ if ( !isset( $stats[$flags][$class] ) ) {
+ $stats[$flags][$class] = array(
+ 'count' => 0,
+ 'first' => $rangeStart,
+ 'last' => 0
+ );
+ }
+ $entry =& $stats[$flags][$class];
+ $entry['count'] += $count;
+ $entry['last'] = max( $entry['last'], $rangeStart + $binSize );
+ unset( $entry );
+ }
+ }
+ echo "\n\n";
+
+ $format = "%-29s %-39s %-19s %-29s\n";
+ printf( $format, "Flags", "Class", "Count", "old_id range" );
+ echo str_repeat( '-', 120 ) . "\n";
+ foreach ( $stats as $flags => $flagStats ) {
+ foreach ( $flagStats as $class => $entry ) {
+ printf( $format, $flags, $class, $entry['count'],
+ sprintf( "%-13d - %-13d", $entry['first'], $entry['last'] ) );
+ }
+ }
+ }
+}
+
+$maintClass = 'StorageTypeStats';
+require_once( DO_MAINTENANCE );
+
diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php
index 0f25fb94..63327d53 100644
--- a/maintenance/storage/trackBlobs.php
+++ b/maintenance/storage/trackBlobs.php
@@ -12,6 +12,7 @@ if ( count( $args ) < 1 ) {
}
$tracker = new TrackBlobs( $args );
$tracker->run();
+echo "All done.\n";
class TrackBlobs {
var $clusters, $textClause;
@@ -59,7 +60,7 @@ class TrackBlobs {
if ( $this->textClause != '' ) {
$this->textClause .= ' OR ';
}
- $this->textClause .= 'old_text LIKE ' . $dbr->addQuotes( $dbr->escapeLike( "DB://$cluster/" ) . '%' );
+ $this->textClause .= 'old_text' . $dbr->buildLike( "DB://$cluster/", $dbr->anyString() );
}
}
return $this->textClause;
@@ -72,7 +73,7 @@ class TrackBlobs {
return array(
'cluster' => $m[1],
'id' => intval( $m[2] ),
- 'hash' => isset( $m[3] ) ? $m[2] : null
+ 'hash' => isset( $m[3] ) ? $m[3] : null
);
}
@@ -98,7 +99,7 @@ class TrackBlobs {
'rev_id > ' . $dbr->addQuotes( $startId ),
'rev_text_id=old_id',
$textClause,
- "old_flags LIKE '%external%'",
+ 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
),
__METHOD__,
array(
@@ -174,7 +175,7 @@ class TrackBlobs {
array(
'old_id>' . $dbr->addQuotes( $startId ),
$textClause,
- "old_flags LIKE '%external%'",
+ 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
'bt_text_id IS NULL'
),
__METHOD__,
@@ -263,6 +264,10 @@ class TrackBlobs {
if ( is_null( $table ) ) {
$table = 'blobs';
}
+ if ( !$extDB->tableExists( $table ) ) {
+ echo "No blobs table on cluster $cluster\n";
+ continue;
+ }
$startId = 0;
$batchesDone = 0;
$actualBlobs = gmp_init( 0 );
@@ -300,6 +305,7 @@ class TrackBlobs {
// Traverse the orphan list
$insertBatch = array();
$id = 0;
+ $numOrphans = 0;
while ( true ) {
$id = gmp_scan1( $orphans, $id );
if ( $id == -1 ) {
@@ -309,12 +315,18 @@ class TrackBlobs {
'bo_cluster' => $cluster,
'bo_blob_id' => $id
);
+ if ( count( $insertBatch ) > $this->batchSize ) {
+ $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+ $insertBatch = array();
+ }
+
++$id;
+ ++$numOrphans;
}
-
- // Insert the batch
- echo "Found " . count( $insertBatch ) . " orphan(s) in $cluster\n";
- $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+ if ( $insertBatch ) {
+ $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ );
+ }
+ echo "Found $numOrphans orphan(s) in $cluster\n";
}
}
}