From 222b01f5169f1c7e69762e0e8904c24f78f71882 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 28 Jul 2010 11:52:48 +0200 Subject: update to MediaWiki 1.16.0 --- maintenance/storage/compressOld.inc | 6 +- maintenance/storage/compressOld.php | 2 +- maintenance/storage/dumpRev.php | 111 ++++++----- maintenance/storage/fixBug20757.php | 314 ++++++++++++++++++++++++++++++ maintenance/storage/make-blobs | 11 +- maintenance/storage/moveToExternal.php | 2 +- maintenance/storage/orphanStats.php | 43 +++- maintenance/storage/recompressTracked.php | 57 ++++-- maintenance/storage/resolveStubs.php | 10 +- maintenance/storage/storageTypeStats.php | 98 ++++++++++ maintenance/storage/trackBlobs.php | 28 ++- 11 files changed, 594 insertions(+), 88 deletions(-) create mode 100644 maintenance/storage/fixBug20757.php create mode 100644 maintenance/storage/storageTypeStats.php (limited to 'maintenance/storage') diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc index fb8cc422..981cfda5 100644 --- a/maintenance/storage/compressOld.inc +++ b/maintenance/storage/compressOld.inc @@ -57,7 +57,8 @@ function compressPage( $row, $extdb ) { 'old_text' => $compress ), array( /* WHERE */ 'old_id' => $row->old_id - ), $fname, 'LIMIT 1' + ), $fname, + array( 'LIMIT' => 1 ) ); return true; } @@ -104,7 +105,8 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate, # overwriting bulk storage concat rows. Don't compress external references, because # the script doesn't yet delete rows from external storage. $conds = array( - "old_flags NOT LIKE '%object%' AND old_flags NOT LIKE '%external%'"); + 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString() ) . ' AND old_flags NOT ' + . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) ); if ( $beginDate ) { if ( !preg_match( '/^\d{14}$/', $beginDate ) ) { diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php index 6f8b48eb..7ff102a5 100644 --- a/maintenance/storage/compressOld.php +++ b/maintenance/storage/compressOld.php @@ -68,6 +68,6 @@ if ( $success ) { print "Done.\n"; } -exit(); +exit(0); diff --git a/maintenance/storage/dumpRev.php b/maintenance/storage/dumpRev.php index c84d8aa5..95404244 100644 --- a/maintenance/storage/dumpRev.php +++ b/maintenance/storage/dumpRev.php @@ -1,56 +1,79 @@ selectRow( - array( 'text', 'revision' ), - array( 'old_flags', 'old_text' ), - array( 'old_id=rev_text_id', 'rev_id' => $args[0] ) -); -if ( !$row ) { - print "Row not found\n"; - exit; -} +class DumpRev extends Maintenance { + public function __construct() { + parent::__construct(); + $this->addArg( 'rev-id', 'Revision ID', true ); + } -$flags = explode( ',', $row->old_flags ); -$text = $row->old_text; -if ( in_array( 'external', $flags ) ) { - print "External $text\n"; - if ( preg_match( '!^DB://(\w+)/(\w+)/(\w+)$!', $text, $m ) ) { - $es = ExternalStore::getStoreObject( 'DB' ); - $blob = $es->fetchBlob( $m[1], $m[2], $m[3] ); - if ( strtolower( get_class( $blob ) ) == 'concatenatedgziphistoryblob' ) { - print "Found external CGZ\n"; - $blob->uncompress(); - print "Items: (" . implode( ', ', array_keys( $blob->mItems ) ) . ")\n"; - $text = $blob->getItem( $m[3] ); + public function execute() { + $dbr = wfGetDB( DB_SLAVE ); + $row = $dbr->selectRow( + array( 'text', 'revision' ), + array( 'old_flags', 'old_text' ), + array( 'old_id=rev_text_id', 'rev_id' => $this->getArg() ) + ); + if ( !$row ) { + $this->error( "Row not found", true ); + } + + $flags = explode( ',', $row->old_flags ); + $text = $row->old_text; + if ( in_array( 'external', $flags ) ) { + $this->output( "External $text\n" ); + if ( preg_match( '!^DB://(\w+)/(\w+)/(\w+)$!', $text, $m ) ) { + $es = ExternalStore::getStoreObject( 'DB' ); + $blob = $es->fetchBlob( $m[1], $m[2], $m[3] ); + if ( strtolower( get_class( $blob ) ) == 'concatenatedgziphistoryblob' ) { + $this->output( "Found external CGZ\n" ); + $blob->uncompress(); + $this->output( "Items: (" . implode( ', ', array_keys( $blob->mItems ) ) . ")\n" ); + $text = $blob->getItem( $m[3] ); + } else { + $this->output( "CGZ expected at $text, got " . gettype( $blob ) . "\n" ); + $text = $blob; + } + } else { + $this->output( "External plain $text\n" ); + $text = ExternalStore::fetchFromURL( $text ); + } + } + if ( in_array( 'gzip', $flags ) ) { + $text = gzinflate( $text ); + } + if ( in_array( 'object', $flags ) ) { + $obj = unserialize( $text ); + $text = $obj->getText(); + } + + if ( is_object( $text ) ) { + $this->error( "Unexpectedly got object of type: " . get_class( $text ) ); } else { - print "CGZ expected at $text, got " . gettype( $blob ) . "\n"; - $text = $blob; + $this->output( "Text length: " . strlen( $text ) ."\n" ); + $this->output( substr( $text, 0, 100 ) . "\n" ); } - } else { - print "External plain $text\n"; - $text = ExternalStore::fetchFromURL( $text ); } } -if ( in_array( 'gzip', $flags ) ) { - $text = gzinflate( $text ); -} -if ( in_array( 'object', $flags ) ) { - $text = unserialize( $text ); -} -if ( is_object( $text ) ) { - print "Unexpectedly got object of type: " . get_class( $text ) . "\n"; -} else { - print "Text length: " . strlen( $text ) ."\n"; - print substr( $text, 0, 100 ) . "\n"; -} +$maintClass = "DumpRev"; +require_once( DO_MAINTENANCE ); diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php new file mode 100644 index 00000000..922d4725 --- /dev/null +++ b/maintenance/storage/fixBug20757.php @@ -0,0 +1,314 @@ +mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact'; + $this->addOption( 'dry-run', 'Report only' ); + $this->addOption( 'start', 'old_id to start at', false, true ); + } + + function execute() { + $dbr = wfGetDB( DB_SLAVE ); + $dbw = wfGetDB( DB_MASTER ); + + $dryRun = $this->getOption( 'dry-run' ); + if ( $dryRun ) { + print "Dry run only.\n"; + } + + $startId = $this->getOption( 'start', 0 ); + $numGood = 0; + $numFixed = 0; + $numBad = 0; + + $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); + + while ( true ) { + print "ID: $startId / $totalRevs\r"; + + $res = $dbr->select( + 'text', + array( 'old_id', 'old_flags', 'old_text' ), + array( + 'old_id > ' . intval( $startId ), + 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', + 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + ), + __METHOD__, + array( + 'ORDER BY' => 'old_id', + 'LIMIT' => $this->batchSize, + ) + ); + + if ( !$res->numRows() ) { + break; + } + + $secondaryIds = array(); + $stubs = array(); + + foreach ( $res as $row ) { + $startId = $row->old_id; + + // Basic sanity checks + $obj = unserialize( $row->old_text ); + if ( $obj === false ) { + print "{$row->old_id}: unrecoverable: cannot unserialize\n"; + ++$numBad; + continue; + } + + if ( !is_object( $obj ) ) { + print "{$row->old_id}: unrecoverable: unserialized to type " . + gettype( $obj ) . ", possible double-serialization\n"; + ++$numBad; + continue; + } + + if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { + print "{$row->old_id}: unrecoverable: unexpected object class " . + get_class( $obj ) . "\n"; + ++$numBad; + continue; + } + + // Process flags + $flags = explode( ',', $row->old_flags ); + if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { + $legacyEncoding = false; + } else { + $legacyEncoding = true; + } + + // Queue the stub for future batch processing + $id = intval( $obj->mOldId ); + $secondaryIds[] = $id; + $stubs[$row->old_id] = array( + 'legacyEncoding' => $legacyEncoding, + 'secondaryId' => $id, + 'hash' => $obj->mHash, + ); + } + + $secondaryIds = array_unique( $secondaryIds ); + + if ( !count( $secondaryIds ) ) { + continue; + } + + // Run the batch query on blob_tracking + $res = $dbr->select( + 'blob_tracking', + '*', + array( + 'bt_text_id' => $secondaryIds, + ), + __METHOD__ + ); + $trackedBlobs = array(); + foreach ( $res as $row ) { + $trackedBlobs[$row->bt_text_id] = $row; + } + + // Process the stubs + $stubsToFix = array(); + foreach ( $stubs as $primaryId => $stub ) { + $secondaryId = $stub['secondaryId']; + if ( !isset( $trackedBlobs[$secondaryId] ) ) { + // No tracked blob. Work out what went wrong + $secondaryRow = $dbr->selectRow( + 'text', + array( 'old_flags', 'old_text' ), + array( 'old_id' => $secondaryId ), + __METHOD__ + ); + if ( !$secondaryRow ) { + print "$primaryId: unrecoverable: secondary row is missing\n"; + ++$numBad; + } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { + // Not broken yet, and not in the tracked clusters so it won't get + // broken by the current RCT run. + ++$numGood; + } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { + print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; + ++$numBad; + } else { + print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; + ++$numBad; + } + unset( $stubs[$primaryId] ); + continue; + } + $trackRow = $trackedBlobs[$secondaryId]; + + // Check that the specified text really is available in the tracked source row + $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; + $text = ExternalStore::fetchFromURL( $url ); + if ( $text === false ) { + print "$primaryId: unrecoverable: source text missing\n"; + ++$numBad; + unset( $stubs[$primaryId] ); + continue; + } + if ( md5( $text ) !== $stub['hash'] ) { + print "$primaryId: unrecoverable: content hashes do not match\n"; + ++$numBad; + unset( $stubs[$primaryId] ); + continue; + } + + // Find the page_id and rev_id + // The page is probably the same as the page of the secondary row + $pageId = intval( $trackRow->bt_page ); + if ( !$pageId ) { + $revId = $pageId = 0; + } else { + $revId = $this->findTextIdInPage( $pageId, $primaryId ); + if ( !$revId ) { + // Actually an orphan + $pageId = $revId = 0; + } + } + + $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; + + if ( !$dryRun ) { + // Reset the text row to point to the original copy + $dbw->begin(); + $dbw->update( + 'text', + // SET + array( + 'old_flags' => $newFlags, + 'old_text' => $url + ), + // WHERE + array( 'old_id' => $primaryId ), + __METHOD__ + ); + + // Add a blob_tracking row so that the new reference can be recompressed + // without needing to run trackBlobs.php again + $dbw->insert( 'blob_tracking', + array( + 'bt_page' => $pageId, + 'bt_rev_id' => $revId, + 'bt_text_id' => $primaryId, + 'bt_cluster' => $trackRow->bt_cluster, + 'bt_blob_id' => $trackRow->bt_blob_id, + 'bt_cgz_hash' => $stub['hash'], + 'bt_new_url' => null, + 'bt_moved' => 0, + ), + __METHOD__ + ); + $dbw->commit(); + $this->waitForSlaves(); + } + + print "$primaryId: resolved to $url\n"; + ++$numFixed; + } + } + + print "\n"; + print "Fixed: $numFixed\n"; + print "Unrecoverable: $numBad\n"; + print "Good stubs: $numGood\n"; + } + + function waitForSlaves() { + static $iteration = 0; + ++$iteration; + if ( ++$iteration > 50 == 0 ) { + wfWaitForSlaves( 5 ); + $iteration = 0; + } + } + + function findTextIdInPage( $pageId, $textId ) { + $ids = $this->getRevTextMap( $pageId ); + if ( !isset( $ids[$textId] ) ) { + return null; + } else { + return $ids[$textId]; + } + } + + function getRevTextMap( $pageId ) { + if ( !isset( $this->mapCache[$pageId] ) ) { + // Limit cache size + while ( $this->mapCacheSize > $this->maxMapCacheSize ) { + $key = key( $this->mapCache ); + $this->mapCacheSize -= count( $this->mapCache[$key] ); + unset( $this->mapCache[$key] ); + } + + $dbr = wfGetDB( DB_SLAVE ); + $map = array(); + $res = $dbr->select( 'revision', + array( 'rev_id', 'rev_text_id' ), + array( 'rev_page' => $pageId ), + __METHOD__ + ); + foreach ( $res as $row ) { + $map[$row->rev_text_id] = $row->rev_id; + } + $this->mapCache[$pageId] = $map; + $this->mapCacheSize += count( $map ); + } + return $this->mapCache[$pageId]; + } + + /** + * This is based on part of HistoryBlobStub::getText(). + * Determine if the text can be retrieved from the row in the normal way. + */ + function isUnbrokenStub( $stub, $secondaryRow ) { + $flags = explode( ',', $secondaryRow->old_flags ); + $text = $secondaryRow->old_text; + if( in_array( 'external', $flags ) ) { + $url = $text; + @list( /* $proto */ , $path ) = explode( '://', $url, 2 ); + if ( $path == "" ) { + return false; + } + $text = ExternalStore::fetchFromUrl( $url ); + } + if( !in_array( 'object', $flags ) ) { + return false; + } + + if( in_array( 'gzip', $flags ) ) { + $obj = unserialize( gzinflate( $text ) ); + } else { + $obj = unserialize( $text ); + } + + if( !is_object( $obj ) ) { + // Correct for old double-serialization bug. + $obj = unserialize( $obj ); + } + + if ( !is_object( $obj ) ) { + return false; + } + + $obj->uncompress(); + $text = $obj->getItem( $stub['hash'] ); + return $text !== false; + } +} + +$maintClass = 'FixBug20757'; +require_once( DO_MAINTENANCE ); + diff --git a/maintenance/storage/make-blobs b/maintenance/storage/make-blobs index 9eb7e83e..36cf9ced 100755 --- a/maintenance/storage/make-blobs +++ b/maintenance/storage/make-blobs @@ -1,11 +1,16 @@ #!/bin/bash -if [ X$2 == X ];then - echo 'Usage: make-blobs ' +if [ -z $2 ];then + echo 'Usage: make-blobs []' exit 1 fi +if [ -z $3 ]; then + table=blobs +else + table=$3 +fi echo "CREATE DATABASE $2" | mysql -u wikiadmin -p`wikiadmin_pass` -h $1 && \ -mysql -u wikiadmin -p`wikiadmin_pass` -h $1 $2 < blobs.sql +sed "s/blobs\>/$table/" blobs.sql | mysql -u wikiadmin -p`wikiadmin_pass` -h $1 $2 diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php index a8b2f93b..dc11856a 100644 --- a/maintenance/storage/moveToExternal.php +++ b/maintenance/storage/moveToExternal.php @@ -62,7 +62,7 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) { $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), array( "old_id BETWEEN $blockStart AND $blockEnd", - "old_flags NOT LIKE '%external%'", + 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), ), $fname ); while ( $row = $dbr->fetchObject( $res ) ) { # Resolve stubs diff --git a/maintenance/storage/orphanStats.php b/maintenance/storage/orphanStats.php index afea815e..63f9025b 100644 --- a/maintenance/storage/orphanStats.php +++ b/maintenance/storage/orphanStats.php @@ -2,21 +2,43 @@ /** * Show some statistics on the blob_orphans table, created with trackBlobs.php + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @ingroup Maintenance ExternalStorage */ -require_once( dirname(__FILE__).'/../commandLine.inc' ); +require_once( dirname(__FILE__) . '/../Maintenance.php' ); -$stats = new OrphanStats; -$stats->execute(); +class OrphanStats extends Maintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = "how some statistics on the blob_orphans table, created with trackBlobs.php"; + } -class OrphanStats { - function getDB( $cluster ) { + private function getDB( $cluster ) { $lb = wfGetLBFactory()->getExternalLB( $cluster ); return $lb->getConnection( DB_SLAVE ); } - function execute() { + public function execute() { $extDBs = array(); $dbr = wfGetDB( DB_SLAVE ); + if( !$dbr->tableExists( 'blob_orphans' ) ) { + $this->error( "blob_orphans doesn't seem to exist, need to run trackBlobs.php first", true ); + } $res = $dbr->select( 'blob_orphans', '*', false, __METHOD__ ); $num = 0; @@ -36,11 +58,14 @@ class OrphanStats { } unset( $res ); - echo "Number of orphans: $num\n"; + $this->output( "Number of orphans: $num\n" ); if ( $num > 0 ) { - echo "Average size: " . round( $totalSize / $num, 0 ) . " bytes\n" . + $this->output( "Average size: " . round( $totalSize / $num, 0 ) . " bytes\n" . "Max size: $maxSize\n" . - "Number of unique texts: " . count( $hashes ) . "\n"; + "Number of unique texts: " . count( $hashes ) . "\n" ); } } } + +$maintClass = "OrphanStats"; +require_once( DO_MAINTENANCE ); diff --git a/maintenance/storage/recompressTracked.php b/maintenance/storage/recompressTracked.php index d8d2e4ef..e43dbe5c 100644 --- a/maintenance/storage/recompressTracked.php +++ b/maintenance/storage/recompressTracked.php @@ -31,11 +31,13 @@ class RecompressTracked { var $copyOnly = false; var $isChild = false; var $slaveId = false; + var $noCount = false; var $debugLog, $infoLog, $criticalLog; var $store; static $optionsWithArgs = array( 'procs', 'slave-id', 'debug-log', 'info-log', 'critical-log' ); static $cmdLineOptionMap = array( + 'no-count' => 'noCount', 'procs' => 'numProcs', 'copy-only' => 'copyOnly', 'child' => 'isChild', @@ -259,12 +261,16 @@ class RecompressTracked { $dbr = wfGetDB( DB_SLAVE ); $i = 0; $startId = 0; - $numPages = $dbr->selectField( 'blob_tracking', - 'COUNT(DISTINCT bt_page)', - # A condition is required so that this query uses the index - array( 'bt_moved' => 0 ), - __METHOD__ - ); + if ( $this->noCount ) { + $numPages = '[unknown]'; + } else { + $numPages = $dbr->selectField( 'blob_tracking', + 'COUNT(DISTINCT bt_page)', + # A condition is required so that this query uses the index + array( 'bt_moved' => 0 ), + __METHOD__ + ); + } if ( $this->copyOnly ) { $this->info( "Copying pages..." ); } else { @@ -310,7 +316,7 @@ class RecompressTracked { if ( $current == $end || $this->numBatches >= $this->reportingInterval ) { $this->numBatches = 0; $this->info( "$label: $current / $end" ); - wfWaitForSlaves( 5 ); + $this->waitForSlaves(); } } @@ -321,12 +327,16 @@ class RecompressTracked { $dbr = wfGetDB( DB_SLAVE ); $startId = 0; $i = 0; - $numOrphans = $dbr->selectField( 'blob_tracking', - 'COUNT(DISTINCT bt_text_id)', - array( 'bt_moved' => 0, 'bt_page' => 0 ), - __METHOD__ ); - if ( !$numOrphans ) { - return; + if ( $this->noCount ) { + $numOrphans = '[unknown]'; + } else { + $numOrphans = $dbr->selectField( 'blob_tracking', + 'COUNT(DISTINCT bt_text_id)', + array( 'bt_moved' => 0, 'bt_page' => 0 ), + __METHOD__ ); + if ( !$numOrphans ) { + return; + } } if ( $this->copyOnly ) { $this->info( "Copying orphans..." ); @@ -404,7 +414,7 @@ class RecompressTracked { case 'quit': return; } - wfWaitForSlaves( 5 ); + $this->waitForSlaves(); } } @@ -469,6 +479,7 @@ class RecompressTracked { $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); $trx->commit(); $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); + $this->waitForSlaves(); } } $startId = $row->bt_text_id; @@ -545,6 +556,9 @@ class RecompressTracked { $this->debug( 'Incomplete: ' . $res->numRows() . ' rows' ); foreach ( $res as $row ) { $this->moveTextRow( $row->bt_text_id, $row->bt_new_url ); + if ( $row->bt_text_id % 10 == 0 ) { + $this->waitForSlaves(); + } } $startId = $row->bt_text_id; } @@ -604,11 +618,26 @@ class RecompressTracked { $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); $trx->commit(); $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); + $this->waitForSlaves(); } } $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); $trx->commit(); } + + /** + * Wait for slaves (quietly) + */ + function waitForSlaves() { + $lb = wfGetLB(); + while ( true ) { + list( $host, $maxLag ) = $lb->getMaxLag(); + if ( $maxLag < 2 ) { + break; + } + sleep( 5 ); + } + } } /** diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php index 3db9e480..346151e9 100644 --- a/maintenance/storage/resolveStubs.php +++ b/maintenance/storage/resolveStubs.php @@ -35,11 +35,9 @@ function resolveStubs() { $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ), "old_id>=$start AND old_id<=$end " . - # Using a more restrictive flag set for now, until I do some more analysis -- TS - #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". - - "AND old_flags='object' " . - "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname ); + "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". + 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + $fname ); while ( $row = $dbr->fetchObject( $res ) ) { resolveStub( $row->old_id, $row->old_text, $row->old_flags ); } @@ -69,7 +67,7 @@ function resolveStub( $id, $stubText, $flags ) { # Get the (maybe) external row $externalRow = $dbr->selectRow( 'text', array( 'old_text' ), - array( 'old_id' => $stub->mOldId, "old_flags LIKE '%external%'" ), + array( 'old_id' => $stub->mOldId, 'old_flags' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) ), $fname ); diff --git a/maintenance/storage/storageTypeStats.php b/maintenance/storage/storageTypeStats.php new file mode 100644 index 00000000..85858620 --- /dev/null +++ b/maintenance/storage/storageTypeStats.php @@ -0,0 +1,98 @@ +selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); + if ( !$endId ) { + echo "No text rows!\n"; + exit( 1 ); + } + + $rangeStart = 0; + $binSize = intval( pow( 10, floor( log10( $endId ) ) - 3 ) ); + if ( $binSize < 100 ) { + $binSize = 100; + } + echo "Using bin size of $binSize\n"; + + $stats = array(); + + $classSql = <<select( + 'text', + array( + 'old_flags', + "$classSql AS class", + 'COUNT(*) as count', + ), + array( + 'old_id >= ' . intval( $rangeStart ), + 'old_id < ' . intval( $rangeStart + $binSize ) + ), + __METHOD__, + array( 'GROUP BY' => 'old_flags, class' ) + ); + + foreach ( $res as $row ) { + $flags = $row->old_flags; + if ( $flags === '' ) { + $flags = '[none]'; + } + $class = $row->class; + $count = $row->count; + if ( !isset( $stats[$flags][$class] ) ) { + $stats[$flags][$class] = array( + 'count' => 0, + 'first' => $rangeStart, + 'last' => 0 + ); + } + $entry =& $stats[$flags][$class]; + $entry['count'] += $count; + $entry['last'] = max( $entry['last'], $rangeStart + $binSize ); + unset( $entry ); + } + } + echo "\n\n"; + + $format = "%-29s %-39s %-19s %-29s\n"; + printf( $format, "Flags", "Class", "Count", "old_id range" ); + echo str_repeat( '-', 120 ) . "\n"; + foreach ( $stats as $flags => $flagStats ) { + foreach ( $flagStats as $class => $entry ) { + printf( $format, $flags, $class, $entry['count'], + sprintf( "%-13d - %-13d", $entry['first'], $entry['last'] ) ); + } + } + } +} + +$maintClass = 'StorageTypeStats'; +require_once( DO_MAINTENANCE ); + diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php index 0f25fb94..63327d53 100644 --- a/maintenance/storage/trackBlobs.php +++ b/maintenance/storage/trackBlobs.php @@ -12,6 +12,7 @@ if ( count( $args ) < 1 ) { } $tracker = new TrackBlobs( $args ); $tracker->run(); +echo "All done.\n"; class TrackBlobs { var $clusters, $textClause; @@ -59,7 +60,7 @@ class TrackBlobs { if ( $this->textClause != '' ) { $this->textClause .= ' OR '; } - $this->textClause .= 'old_text LIKE ' . $dbr->addQuotes( $dbr->escapeLike( "DB://$cluster/" ) . '%' ); + $this->textClause .= 'old_text' . $dbr->buildLike( "DB://$cluster/", $dbr->anyString() ); } } return $this->textClause; @@ -72,7 +73,7 @@ class TrackBlobs { return array( 'cluster' => $m[1], 'id' => intval( $m[2] ), - 'hash' => isset( $m[3] ) ? $m[2] : null + 'hash' => isset( $m[3] ) ? $m[3] : null ); } @@ -98,7 +99,7 @@ class TrackBlobs { 'rev_id > ' . $dbr->addQuotes( $startId ), 'rev_text_id=old_id', $textClause, - "old_flags LIKE '%external%'", + 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), ), __METHOD__, array( @@ -174,7 +175,7 @@ class TrackBlobs { array( 'old_id>' . $dbr->addQuotes( $startId ), $textClause, - "old_flags LIKE '%external%'", + 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), 'bt_text_id IS NULL' ), __METHOD__, @@ -263,6 +264,10 @@ class TrackBlobs { if ( is_null( $table ) ) { $table = 'blobs'; } + if ( !$extDB->tableExists( $table ) ) { + echo "No blobs table on cluster $cluster\n"; + continue; + } $startId = 0; $batchesDone = 0; $actualBlobs = gmp_init( 0 ); @@ -300,6 +305,7 @@ class TrackBlobs { // Traverse the orphan list $insertBatch = array(); $id = 0; + $numOrphans = 0; while ( true ) { $id = gmp_scan1( $orphans, $id ); if ( $id == -1 ) { @@ -309,12 +315,18 @@ class TrackBlobs { 'bo_cluster' => $cluster, 'bo_blob_id' => $id ); + if ( count( $insertBatch ) > $this->batchSize ) { + $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); + $insertBatch = array(); + } + ++$id; + ++$numOrphans; } - - // Insert the batch - echo "Found " . count( $insertBatch ) . " orphan(s) in $cluster\n"; - $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); + if ( $insertBatch ) { + $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); + } + echo "Found $numOrphans orphan(s) in $cluster\n"; } } } -- cgit v1.2.2