From d81f562b712f2387fa02290bf2ca86392ab356f2 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 11 Oct 2006 20:21:25 +0000 Subject: Aktualisierung auf Version 1.8.1 --- maintenance/FiveUpgrade.inc | 7 +- maintenance/InitialiseMessages.inc | 67 +- maintenance/addwiki.php | 56 +- maintenance/archives/patch-ipb_anon_only.sql | 44 + .../archives/patch-page_no_title_convert.sql | 0 maintenance/archives/patch-profiling.sql | 10 +- .../archives/patch-recentchanges-utindex.sql | 4 + maintenance/attachLatest.php | 6 +- maintenance/backup.inc | 16 +- maintenance/checkUsernames.php | 4 +- maintenance/cleanupDupes.inc | 9 +- maintenance/cleanupImages.php | 168 ++ maintenance/cleanupTable.inc | 86 + maintenance/cleanupTitles.php | 83 +- maintenance/cleanupWatchlist.php | 3 +- maintenance/commandLine.inc | 14 +- maintenance/convertLinks.inc | 4 +- maintenance/createAndPromote.php | 5 +- maintenance/deleteBatch.php | 8 +- maintenance/deleteImageMemcached.php | 6 +- maintenance/deleteRevision.php | 2 +- maintenance/dumpBackup.php | 6 +- maintenance/dumpHTML.inc | 539 ++++- maintenance/dumpHTML.php | 99 +- maintenance/dumpInterwiki.inc | 3 +- maintenance/dumpSisterSites.php | 49 + maintenance/dumpTextPass.php | 34 +- maintenance/dumpUploads.php | 116 + maintenance/fixSlaveDesync.php | 133 +- maintenance/fuzz-tester.php | 2458 ++++++++++++++++++++ maintenance/generateSitemap.php | 11 +- maintenance/importImages.php | 35 +- maintenance/installExtension.php | 642 +++++ maintenance/language/alltrans.php | 16 + maintenance/language/checkLanguage.php | 177 ++ maintenance/language/checktrans.php | 44 + maintenance/language/date-formats.php | 45 + maintenance/language/diffLanguage.php | 159 ++ maintenance/language/dumpMessages.php | 20 + maintenance/language/duplicatetrans.php | 43 + maintenance/language/function-list.php | 44 + maintenance/language/lang2po.php | 154 ++ maintenance/language/langmemusage.php | 30 + maintenance/language/languages.inc | 686 ++++++ maintenance/language/splitLanguageFiles.inc | 1168 ++++++++++ maintenance/language/splitLanguageFiles.php | 13 + maintenance/language/transstat.php | 211 ++ maintenance/language/unusedMessages.php | 42 + maintenance/language/validate.php | 40 + maintenance/mctest.php | 15 +- maintenance/mysql5/tables.sql | 23 +- maintenance/namespaceDupes.php | 13 +- maintenance/ourusers.php | 43 +- maintenance/parserTests.inc | 12 +- maintenance/parserTests.php | 2 +- maintenance/parserTests.txt | 413 +++- maintenance/postgres/compare_schemas.pl | 181 ++ maintenance/postgres/tables.sql | 156 +- maintenance/postgres/wp_mysql2postgres.pl | 400 ++++ maintenance/rebuildImages.php | 4 +- maintenance/refreshImageCount.php | 4 +- maintenance/runJobs.php | 12 + maintenance/stats.php | 26 +- maintenance/storage/checkStorage.php | 936 ++++---- maintenance/storage/compressOld.inc | 11 + maintenance/tables.sql | 121 +- maintenance/update.php | 12 +- maintenance/updateSpecialPages.php | 4 +- maintenance/updaters.inc | 144 +- maintenance/userDupes.inc | 12 +- 70 files changed, 9108 insertions(+), 1055 deletions(-) create mode 100644 maintenance/archives/patch-ipb_anon_only.sql create mode 100644 maintenance/archives/patch-page_no_title_convert.sql create mode 100644 maintenance/archives/patch-recentchanges-utindex.sql create mode 100644 maintenance/cleanupImages.php create mode 100644 maintenance/cleanupTable.inc create mode 100644 maintenance/dumpSisterSites.php create mode 100644 maintenance/dumpUploads.php create mode 100644 maintenance/fuzz-tester.php create mode 100644 maintenance/installExtension.php create mode 100644 maintenance/language/alltrans.php create mode 100644 maintenance/language/checkLanguage.php create mode 100644 maintenance/language/checktrans.php create mode 100644 maintenance/language/date-formats.php create mode 100644 maintenance/language/diffLanguage.php create mode 100644 maintenance/language/dumpMessages.php create mode 100644 maintenance/language/duplicatetrans.php create mode 100644 maintenance/language/function-list.php create mode 100644 maintenance/language/lang2po.php create mode 100644 maintenance/language/langmemusage.php create mode 100644 maintenance/language/languages.inc create mode 100644 maintenance/language/splitLanguageFiles.inc create mode 100644 maintenance/language/splitLanguageFiles.php create mode 100644 maintenance/language/transstat.php create mode 100644 maintenance/language/unusedMessages.php create mode 100644 maintenance/language/validate.php create mode 100644 maintenance/postgres/compare_schemas.pl create mode 100644 maintenance/postgres/wp_mysql2postgres.pl (limited to 'maintenance') diff --git a/maintenance/FiveUpgrade.inc b/maintenance/FiveUpgrade.inc index 7caf6810..4bbf0733 100644 --- a/maintenance/FiveUpgrade.inc +++ b/maintenance/FiveUpgrade.inc @@ -64,7 +64,7 @@ class FiveUpgrade { function &newConnection() { global $wgDBadminuser, $wgDBadminpassword; global $wgDBserver, $wgDBname; - $db =& new Database( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + $db = new Database( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); return $db; } @@ -159,8 +159,7 @@ class FiveUpgrade { * @access private */ function log( $message ) { - global $wgDBname; - echo $wgDBname . ' ' . wfTimestamp( TS_DB ) . ': ' . $message . "\n"; + echo wfWikiID() . ' ' . wfTimestamp( TS_DB ) . ': ' . $message . "\n"; flush(); } @@ -804,7 +803,7 @@ END; array_shift( $against ); } - array_push( $pieces, basename( $path ) ); + array_push( $pieces, wfBaseName( $path ) ); return implode( '/', $pieces ); } diff --git a/maintenance/InitialiseMessages.inc b/maintenance/InitialiseMessages.inc index 189fbd25..22e26b94 100644 --- a/maintenance/InitialiseMessages.inc +++ b/maintenance/InitialiseMessages.inc @@ -11,9 +11,9 @@ */ /** */ -function initialiseMessages( $overwrite = false, $messageArray = false ) { +function initialiseMessages( $overwrite = false, $messageArray = false, $outputCallback = false ) { global $wgContLang, $wgContLanguageCode; - global $wgContLangClass, $wgAllMessagesEn; + global $wgContLangClass; global $wgDisableLangConversion; global $wgForceUIMsgAsContentMsg; global $wgLanguageNames; @@ -26,7 +26,7 @@ function initialiseMessages( $overwrite = false, $messageArray = false ) { if ( $messageArray ) { $sortedArray = $messageArray; } else { - $sortedArray = $wgAllMessagesEn; + $sortedArray = Language::getMessagesFor( 'en' ); } ksort( $sortedArray ); @@ -37,11 +37,7 @@ function initialiseMessages( $overwrite = false, $messageArray = false ) { $variants[]=$wgContLanguageCode; foreach ($variants as $v) { - $langclass = 'Language'. str_replace( '-', '_', ucfirst( $v ) ); - if( !class_exists($langclass) ) { - wfDie( "class $langclass not defined. perhaps you need to include the file $langclass.php in $wgContLangClass.php?" ); - } - $lang = new $langclass; + $lang = Language::factory( $v ); if($v==$wgContLanguageCode) $suffix=''; @@ -69,14 +65,14 @@ function initialiseMessages( $overwrite = false, $messageArray = false ) { } } } - initialiseMessagesReal( $overwrite, $messages ); + initialiseMessagesReal( $overwrite, $messages, $outputCallback ); } /** */ -function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { - global $wgContLang, $wgScript, $wgServer, $wgAllMessagesEn; +function initialiseMessagesReal( $overwrite = false, $messageArray = false, $outputCallback = false ) { + global $wgContLang, $wgScript, $wgServer, $wgLanguageCode; global $wgOut, $wgArticle, $wgUser; - global $wgMessageCache, $wgMemc, $wgDBname, $wgUseMemCached; + global $wgMessageCache, $wgMemc, $wgUseMemCached; # Initialise $wgOut and $wgUser for a command line script $wgOut->disable(); @@ -91,14 +87,24 @@ function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { $fname = 'initialiseMessages'; $ns = NS_MEDIAWIKI; - # cur_user_text responsible for the modifications + # username responsible for the modifications # Don't change it unless you're prepared to update the DBs accordingly, otherwise the - # default messages won't be overwritte + # default messages won't be overwritten $username = 'MediaWiki default'; + if ( !$outputCallback ) { + # Print is not a function, and there doesn't appear to be any built-in + # workalikes, so let's just make our own anonymous function to do the + # same thing. + $outputCallback = create_function( '$s', 'print $s;' ); + } - print "Initialising \"MediaWiki\" namespace...\n"; + $outputCallback( "Initialising \"MediaWiki\" namespace for language code $wgLanguageCode...\n" ); + # Check that the serialized data files are OK + if ( Language::isLocalisationOutOfDate( $wgLanguageCode ) ) { + $outputCallback( "Warning: serialized data file may be out of date.\n" ); + } $dbr =& wfGetDB( DB_SLAVE ); $dbw =& wfGetDB( DB_MASTER ); @@ -107,13 +113,11 @@ function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { $timestamp = wfTimestampNow(); - #$sql = "SELECT cur_title,cur_is_new,cur_user_text FROM $cur WHERE cur_namespace=$ns AND cur_title IN("; - # Get keys from $wgAllMessagesEn, which is more complete than the local language $first = true; if ( $messageArray ) { $sortedArray = $messageArray; } else { - $sortedArray = $wgAllMessagesEn; + $sortedArray = $wgContLang->getAllMessages(); } ksort( $sortedArray ); @@ -132,7 +136,7 @@ function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { foreach ($chunks as $chunk) { $first = true; $sql = "SELECT page_title,page_is_new,rev_user_text FROM $page, $revision WHERE - page_namespace=$ns AND rev_page=page_id AND page_title IN("; + page_namespace=$ns AND rev_id=page_latest AND page_title IN("; foreach ( $chunk as $key => $enMsg ) { if ( $key == '' ) { @@ -171,20 +175,28 @@ function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { $talk = $wgContLang->getNsText( NS_TALK ); $mwtalk = $wgContLang->getNsText( NS_MEDIAWIKI_TALK ); + $numUpdated = 0; + $numKept = 0; + $numInserted = 0; + # Merge these into a single transaction for speed $dbw->begin(); # Process each message - foreach ( $sortedArray as $key => $enMsg ) { + foreach ( $sortedArray as $key => $message ) { if ( $key == '' ) { continue; // Skip odd members } # Get message text - if ( $messageArray ) { - $message = $enMsg; - } else { + if ( !$messageArray ) { $message = wfMsgNoDBForContent( $key ); } + if ( is_null( $message ) ) { + # This happens sometimes with out of date serialized data files + $outputCallback( "Warning: Skipping null message $key\n" ); + continue; + } + $titleObj = Title::newFromText( $wgContLang->ucfirst( $key ), NS_MEDIAWIKI ); $title = $titleObj->getDBkey(); @@ -197,7 +209,12 @@ function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { if( is_null( $revision ) || $revision->getText() != $message ) { $article = new Article( $titleObj ); $article->quickEdit( $message ); + ++$numUpdated; + } else { + ++$numKept; } + } else { + ++$numKept; } } else { $article = new Article( $titleObj ); @@ -212,14 +229,14 @@ function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { ) ); $revid = $revision->insertOn( $dbw ); $article->updateRevisionOn( $dbw, $revision ); + ++$numInserted; } } $dbw->commit(); # Clear the relevant memcached key - print 'Clearing message cache...'; $wgMessageCache->clear(); - print "Done.\n"; + $outputCallback( "Done. Updated: $numUpdated, inserted: $numInserted, kept: $numKept.\n" ); } /** */ diff --git a/maintenance/addwiki.php b/maintenance/addwiki.php index 253033a3..b7843632 100644 --- a/maintenance/addwiki.php +++ b/maintenance/addwiki.php @@ -33,26 +33,40 @@ function addWiki( $lang, $site, $dbName ) print "Initialising tables\n"; dbsource( "$maintenance/tables.sql", $dbw ); dbsource( "$IP/extensions/OAI/update_table.sql", $dbw ); + dbsource( "$IP/extensions/AntiSpoof/mysql/patch-antispoof.sql", $dbw ); $dbw->query( "INSERT INTO site_stats(ss_row_id) VALUES (1)" ); # Initialise external storage - if ( $wgDefaultExternalStore && preg_match( '!^DB://(.*)$!', $wgDefaultExternalStore, $m ) ) { - print "Initialising external storage...\n"; + if ( is_array( $wgDefaultExternalStore ) ) { + $stores = $wgDefaultExternalStore; + } elseif ( $stores ) { + $stores = array( $wgDefaultExternalStore ); + } else { + $stores = array(); + } + if ( count( $stores ) ) { require_once( 'ExternalStoreDB.php' ); + print "Initialising external storage $store...\n"; global $wgDBuser, $wgDBpassword, $wgExternalServers; - $cluster = $m[1]; - - # Hack - $wgExternalServers[$cluster][0]['user'] = $wgDBuser; - $wgExternalServers[$cluster][0]['password'] = $wgDBpassword; - - $store = new ExternalStoreDB; - $extdb =& $store->getMaster( $cluster ); - $extdb->query( "SET table_type=InnoDB" ); - $extdb->query( "CREATE DATABASE $dbName" ); - $extdb->selectDB( $dbName ); - dbsource( "$maintenance/storage/blobs.sql", $extdb ); - $extdb->immediateCommit(); + foreach ( $stores as $storeURL ) { + if ( !preg_match( '!^DB://(.*)$!', $storeURL, $m ) ) { + continue; + } + + $cluster = $m[1]; + + # Hack + $wgExternalServers[$cluster][0]['user'] = $wgDBuser; + $wgExternalServers[$cluster][0]['password'] = $wgDBpassword; + + $store = new ExternalStoreDB; + $extdb =& $store->getMaster( $cluster ); + $extdb->query( "SET table_type=InnoDB" ); + $extdb->query( "CREATE DATABASE $dbName" ); + $extdb->selectDB( $dbName ); + dbsource( "$maintenance/storage/blobs.sql", $extdb ); + $extdb->immediateCommit(); + } } $wgTitle = Title::newMainPage(); @@ -203,7 +217,17 @@ See the [http://www.wikipedia.org Wikipedia portal] for other language Wikipedia fclose( $file ); print "Sourcing interwiki SQL\n"; dbsource( $tempname, $dbw ); - unlink( $tempname ); + #unlink( $tempname ); + + # Create the upload dir + global $wgUploadDirectory; + if( file_exists( $wgUploadDirectory ) ) { + echo "$wgUploadDirectory already exists.\n"; + } else { + echo "Creating $wgUploadDirectory...\n"; + mkdir( $wgUploadDirectory, 0777 ); + chmod( $wgUploadDirectory, 0777 ); + } print "Script ended. You now want to run sync-common-all to publish *dblist files (check them for duplicates first)\n"; } diff --git a/maintenance/archives/patch-ipb_anon_only.sql b/maintenance/archives/patch-ipb_anon_only.sql new file mode 100644 index 00000000..709308a2 --- /dev/null +++ b/maintenance/archives/patch-ipb_anon_only.sql @@ -0,0 +1,44 @@ +-- Add extra option fields to the ipblocks table, add some extra indexes, +-- convert infinity values in ipb_expiry to something that sorts better, +-- extend ipb_address and range fields, add a unique index for block conflict +-- detection. + +-- Conflicts in the new unique index can be handled by creating a new +-- table and inserting into it instead of doing an ALTER TABLE. + + +DROP TABLE IF EXISTS /*$wgDBprefix*/ipblocks_newunique; + +CREATE TABLE /*$wgDBprefix*/ipblocks_newunique ( + ipb_id int(8) NOT NULL auto_increment, + ipb_address tinyblob NOT NULL default '', + ipb_user int(8) unsigned NOT NULL default '0', + ipb_by int(8) unsigned NOT NULL default '0', + ipb_reason tinyblob NOT NULL default '', + ipb_timestamp char(14) binary NOT NULL default '', + ipb_auto bool NOT NULL default 0, + ipb_anon_only bool NOT NULL default 0, + ipb_create_account bool NOT NULL default 1, + ipb_expiry char(14) binary NOT NULL default '', + ipb_range_start tinyblob NOT NULL default '', + ipb_range_end tinyblob NOT NULL default '', + + PRIMARY KEY ipb_id (ipb_id), + UNIQUE INDEX ipb_address_unique (ipb_address(255), ipb_user, ipb_auto), + INDEX ipb_user (ipb_user), + INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)), + INDEX ipb_timestamp (ipb_timestamp), + INDEX ipb_expiry (ipb_expiry) + +) TYPE=InnoDB; + +INSERT IGNORE INTO /*$wgDBprefix*/ipblocks_newunique + (ipb_id, ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto, ipb_expiry, ipb_range_start, ipb_range_end, ipb_anon_only, ipb_create_account) + SELECT ipb_id, ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto, ipb_expiry, ipb_range_start, ipb_range_end, 0 , ipb_user=0 + FROM /*$wgDBprefix*/ipblocks; + +DROP TABLE IF EXISTS /*$wgDBprefix*/ipblocks_old; +RENAME TABLE /*$wgDBprefix*/ipblocks TO /*$wgDBprefix*/ipblocks_old; +RENAME TABLE /*$wgDBprefix*/ipblocks_newunique TO /*$wgDBprefix*/ipblocks; + +UPDATE /*$wgDBprefix*/ipblocks SET ipb_expiry='infinity' WHERE ipb_expiry=''; diff --git a/maintenance/archives/patch-page_no_title_convert.sql b/maintenance/archives/patch-page_no_title_convert.sql new file mode 100644 index 00000000..e69de29b diff --git a/maintenance/archives/patch-profiling.sql b/maintenance/archives/patch-profiling.sql index 49b488e9..bafd2b67 100644 --- a/maintenance/archives/patch-profiling.sql +++ b/maintenance/archives/patch-profiling.sql @@ -2,9 +2,9 @@ -- This is optional CREATE TABLE /*$wgDBprefix*/profiling ( - pf_count integer not null default 0, - pf_time float not null default 0, - pf_name varchar(255) not null default '', - pf_server varchar(30) not null default '', - UNIQUE KEY pf_name_server (pf_name, pf_server) + pf_count int NOT NULL default 0, + pf_time float NOT NULL default 0, + pf_name varchar(255) NOT NULL default '', + pf_server varchar(30) NOT NULL default '', + UNIQUE KEY pf_name_server (pf_name, pf_server) ) TYPE=HEAP; diff --git a/maintenance/archives/patch-recentchanges-utindex.sql b/maintenance/archives/patch-recentchanges-utindex.sql new file mode 100644 index 00000000..4ebe3165 --- /dev/null +++ b/maintenance/archives/patch-recentchanges-utindex.sql @@ -0,0 +1,4 @@ +--- July 2006 +--- Index on recentchanges.( rc_namespace, rc_user_text ) +--- Helps the username filtering in Special:Newpages +ALTER TABLE /*$wgDBprefix*/recentchanges ADD INDEX `rc_ns_usertext` ( `rc_namespace` , `rc_user_text` ); \ No newline at end of file diff --git a/maintenance/attachLatest.php b/maintenance/attachLatest.php index 024a4fac..f4c11c01 100644 --- a/maintenance/attachLatest.php +++ b/maintenance/attachLatest.php @@ -47,17 +47,17 @@ while( $row = $dbw->fetchObject( $result ) ) { array( 'rev_page' => $pageId ), $fname ); if( !$latestTime ) { - echo "$wgDBname $pageId [[$name]] can't find latest rev time?!\n"; + echo wfWikiID()." $pageId [[$name]] can't find latest rev time?!\n"; continue; } $revision = Revision::loadFromTimestamp( $dbw, $title, $latestTime ); if( is_null( $revision ) ) { - echo "$wgDBname $pageId [[$name]] latest time $latestTime, can't find revision id\n"; + echo wfWikiID()." $pageId [[$name]] latest time $latestTime, can't find revision id\n"; continue; } $id = $revision->getId(); - echo "$wgDBname $pageId [[$name]] latest time $latestTime, rev id $id\n"; + echo wfWikiID()." $pageId [[$name]] latest time $latestTime, rev id $id\n"; if( $fixit ) { $article = new Article( $title ); $article->updateRevisionOn( $dbw, $revision ); diff --git a/maintenance/backup.inc b/maintenance/backup.inc index d3603bd1..8b4b6726 100644 --- a/maintenance/backup.inc +++ b/maintenance/backup.inc @@ -168,9 +168,6 @@ class BackupDumper { } function dump( $history, $text = MW_EXPORT_TEXT ) { - # This shouldn't happen if on console... ;) - header( 'Content-type: text/html; charset=UTF-8' ); - # Notice messages will foul up your XML output even if they're # relatively harmless. ini_set( 'display_errors', false ); @@ -206,11 +203,11 @@ class BackupDumper { * Initialise starting time and maximum revision count. * We'll make ETA calculations based an progress, assuming relatively * constant per-revision rate. - * @param int $history MW_EXPORT_CURRENT or MW_EXPORT_FULL + * @param int $history WikiExporter::CURRENT or WikiExporter::FULL */ - function initProgress( $history = MW_EXPORT_FULL ) { - $table = ($history == MW_EXPORT_CURRENT) ? 'page' : 'revision'; - $field = ($history == MW_EXPORT_CURRENT) ? 'page_id' : 'rev_id'; + function initProgress( $history = WikiExporter::FULL ) { + $table = ($history == WikiExporter::CURRENT) ? 'page' : 'revision'; + $field = ($history == WikiExporter::CURRENT) ? 'page_id' : 'rev_id'; $dbr =& wfGetDB( DB_SLAVE ); $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', 'BackupDumper::dump' ); @@ -221,7 +218,7 @@ class BackupDumper { global $wgDBadminuser, $wgDBadminpassword; global $wgDBname, $wgDebugDumpSql; $flags = ($wgDebugDumpSql ? DBO_DEBUG : 0) | DBO_DEFAULT; // god-damn hack - $db =& new Database( $this->backupServer(), $wgDBadminuser, $wgDBadminpassword, $wgDBname, false, $flags ); + $db = new Database( $this->backupServer(), $wgDBadminuser, $wgDBadminpassword, $wgDBname, false, $flags ); $timeout = 3600 * 24; $db->query( "SET net_read_timeout=$timeout" ); $db->query( "SET net_write_timeout=$timeout" ); @@ -265,9 +262,8 @@ class BackupDumper { $revrate = '-'; $etats = '-'; } - global $wgDBname; $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), ETA %s [max %d]", - $now, $wgDBname, $this->pageCount, $rate, $this->revCount, $revrate, $etats, $this->maxCount ) ); + $now, wfWikiID(), $this->pageCount, $rate, $this->revCount, $revrate, $etats, $this->maxCount ) ); } } diff --git a/maintenance/checkUsernames.php b/maintenance/checkUsernames.php index b577ebc6..4c0ecdce 100644 --- a/maintenance/checkUsernames.php +++ b/maintenance/checkUsernames.php @@ -10,7 +10,6 @@ class checkUsernames { $this->log = fopen( '/home/wikipedia/logs/checkUsernames.log', 'at' ); } function main() { - global $wgDBname; $fname = 'checkUsernames::main'; $dbr =& wfGetDB( DB_SLAVE ); @@ -21,10 +20,9 @@ class checkUsernames { $fname ); - #fwrite( $this->stderr, "Checking $wgDBname\n" ); while ( $row = $dbr->fetchObject( $res ) ) { if ( ! User::isValidUserName( $row->user_name ) ) { - $out = sprintf( "%s: %6d: '%s'\n", $wgDBname, $row->user_id, $row->user_name ); + $out = sprintf( "%s: %6d: '%s'\n", wfWikiID(), $row->user_id, $row->user_name ); fwrite( $this->stderr, $out ); fwrite( $this->log, $out ); } diff --git a/maintenance/cleanupDupes.inc b/maintenance/cleanupDupes.inc index 18daab08..5db6bb39 100644 --- a/maintenance/cleanupDupes.inc +++ b/maintenance/cleanupDupes.inc @@ -113,19 +113,18 @@ END } function checkDupes( $fixthem = false, $indexonly = false ) { - global $wgDBname; $dbw =& wfGetDB( DB_MASTER ); if( $dbw->indexExists( 'cur', 'name_title' ) && $dbw->indexUnique( 'cur', 'name_title' ) ) { - echo "$wgDBname: cur table has the current unique index; no duplicate entries.\n"; + echo wfWikiID().": cur table has the current unique index; no duplicate entries.\n"; } elseif( $dbw->indexExists( 'cur', 'name_title_dup_prevention' ) ) { - echo "$wgDBname: cur table has a temporary name_title_dup_prevention unique index; no duplicate entries.\n"; + echo wfWikiID().": cur table has a temporary name_title_dup_prevention unique index; no duplicate entries.\n"; } else { - echo "$wgDBname: cur table has the old non-unique index and may have duplicate entries.\n"; + echo wfWikiID().": cur table has the old non-unique index and may have duplicate entries.\n"; if( !$indexonly ) { fixDupes( $fixthem ); } } } -?> \ No newline at end of file +?> diff --git a/maintenance/cleanupImages.php b/maintenance/cleanupImages.php new file mode 100644 index 00000000..8ae5561a --- /dev/null +++ b/maintenance/cleanupImages.php @@ -0,0 +1,168 @@ + + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Brion Vibber + * @package MediaWiki + * @subpackage maintenance + */ + +require_once( 'commandLine.inc' ); +require_once( 'cleanupTable.inc' ); + +class ImageCleanup extends TableCleanup { + function __construct( $dryrun = false ) { + parent::__construct( 'image', $dryrun ); + } + + function processPage( $row ) { + global $wgContLang; + + $source = $row->img_name; + if( $source == '' ) { + // Ye olde empty rows. Just kill them. + $this->killRow( $source ); + return $this->progress( 1 ); + } + + $cleaned = $source; + + // About half of old bad image names have percent-codes + $cleaned = rawurldecode( $cleaned ); + + // Some are old latin-1 + $cleaned = $wgContLang->checkTitleEncoding( $cleaned ); + + // Many of remainder look like non-normalized unicode + $cleaned = UtfNormal::cleanUp( $cleaned ); + + $title = Title::makeTitleSafe( NS_IMAGE, $cleaned ); + + if( is_null( $title ) ) { + $this->log( "page $source ($cleaned) is illegal." ); + $safe = $this->buildSafeTitle( $cleaned ); + $this->pokeFile( $source, $safe ); + return $this->progress( 1 ); + } + + if( $title->getDbKey() !== $source ) { + $munged = $title->getDbKey(); + $this->log( "page $source ($munged) doesn't match self." ); + $this->pokeFile( $source, $munged ); + return $this->progress( 1 ); + } + + $this->progress( 0 ); + } + + function killRow( $name ) { + if( $this->dryrun ) { + $this->log( "DRY RUN: would delete bogus row '$name'" ); + } else { + $this->log( "deleting bogus row '$name'" ); + $db = wfGetDB( DB_MASTER ); + $db->delete( 'image', + array( 'img_name' => $name ), + __METHOD__ ); + } + } + + function filePath( $name ) { + return wfImageDir( $name ) . "/$name"; + } + + function pokeFile( $orig, $new ) { + $path = $this->filePath( $orig ); + if( !file_exists( $path ) ) { + $this->log( "missing file: $path" ); + return $this->killRow( $orig ); + } + + $db = wfGetDB( DB_MASTER ); + $version = 0; + $final = $new; + + while( $db->selectField( 'image', 'img_name', + array( 'img_name' => $final ), __METHOD__ ) ) { + $this->log( "Rename conflicts with '$final'..." ); + $version++; + $final = $this->appendTitle( $new, "_$version" ); + } + + $finalPath = $this->filePath( $final ); + + if( $this->dryrun ) { + $this->log( "DRY RUN: would rename $path to $finalPath" ); + } else { + $this->log( "renaming $path to $finalPath" ); + $db->begin(); + $db->update( 'image', + array( 'img_name' => $final ), + array( 'img_name' => $orig ), + __METHOD__ ); + $dir = dirname( $finalPath ); + if( !file_exists( $dir ) ) { + if( !mkdir( $dir, 0777, true ) ) { + $this->log( "RENAME FAILED, COULD NOT CREATE $dir" ); + $db->rollback(); + return; + } + } + if( rename( $path, $finalPath ) ) { + $db->commit(); + } else { + $this->log( "RENAME FAILED" ); + $db->rollback(); + } + } + } + + function appendTitle( $name, $suffix ) { + return preg_replace( '/^(.*)(\..*?)$/', + "\\1$suffix\\2", $name ); + } + + function buildSafeTitle( $name ) { + global $wgLegalTitleChars; + $x = preg_replace_callback( + "/([^$wgLegalTitleChars])/", + array( $this, 'hexChar' ), + $name ); + + $test = Title::makeTitleSafe( NS_IMAGE, $x ); + if( is_null( $test ) || $test->getDbKey() !== $x ) { + $this->log( "Unable to generate safe title from '$name', got '$x'" ); + return false; + } + + return $x; + } +} + +$wgUser->setName( 'Conversion script' ); +$caps = new ImageCleanup( !isset( $options['fix'] ) ); +$caps->cleanup(); + +?> diff --git a/maintenance/cleanupTable.inc b/maintenance/cleanupTable.inc new file mode 100644 index 00000000..cc551bce --- /dev/null +++ b/maintenance/cleanupTable.inc @@ -0,0 +1,86 @@ +targetTable = $table; + $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait + $this->dryrun = $dryrun; + } + + function cleanup() { + if( $this->dryrun ) { + echo "Checking for bad titles...\n"; + } else { + echo "Checking and fixing bad titles...\n"; + } + $this->runTable( $this->targetTable, + '', //'WHERE page_namespace=0', + array( $this, 'processPage' ) ); + } + + function init( $count, $table ) { + $this->processed = 0; + $this->updated = 0; + $this->count = $count; + $this->startTime = wfTime(); + $this->table = $table; + } + + function progress( $updated ) { + $this->updated += $updated; + $this->processed++; + if( $this->processed % 100 != 0 ) { + return; + } + $portion = $this->processed / $this->count; + $updateRate = $this->updated / $this->processed; + + $now = wfTime(); + $delta = $now - $this->startTime; + $estimatedTotalTime = $delta / $portion; + $eta = $this->startTime + $estimatedTotalTime; + + printf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", + wfWikiID(), + wfTimestamp( TS_DB, intval( $now ) ), + $portion * 100.0, + $this->table, + wfTimestamp( TS_DB, intval( $eta ) ), + $this->processed, + $this->count, + $this->processed / $delta, + $updateRate * 100.0 ); + flush(); + } + + function runTable( $table, $where, $callback ) { + $fname = 'CapsCleanup::buildTable'; + + $count = $this->dbw->selectField( $table, 'count(*)', '', $fname ); + $this->init( $count, $table ); + $this->log( "Processing $table..." ); + + $tableName = $this->dbr->tableName( $table ); + $sql = "SELECT * FROM $tableName $where"; + $result = $this->dbr->query( $sql, $fname ); + + while( $row = $this->dbr->fetchObject( $result ) ) { + $updated = call_user_func( $callback, $row ); + } + $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); + $this->dbr->freeResult( $result ); + } + + function hexChar( $matches ) { + return sprintf( "\\x%02x", ord( $matches[1] ) ); + } + + abstract function processPage( $row ); + +} + +?> diff --git a/maintenance/cleanupTitles.php b/maintenance/cleanupTitles.php index 930072de..12e07b67 100644 --- a/maintenance/cleanupTitles.php +++ b/maintenance/cleanupTitles.php @@ -2,9 +2,9 @@ /* * Script to clean up broken, unparseable titles. * - * Usage: php cleanupTitles.php [--dry-run] + * Usage: php cleanupTitles.php [--fix] * Options: - * --dry-run don't actually try moving them + * --fix Actually clean up titles; otherwise just checks for them * * Copyright (C) 2005 Brion Vibber * http://www.mediawiki.org/ @@ -29,77 +29,12 @@ * @subpackage maintenance */ -$options = array( 'dry-run' ); - require_once( 'commandLine.inc' ); -require_once( 'FiveUpgrade.inc' ); - -class TitleCleanup extends FiveUpgrade { - function TitleCleanup( $dryrun = false ) { - parent::FiveUpgrade(); - - $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait - $this->dryrun = $dryrun; - } - - function cleanup() { - $this->runTable( 'page', - '', //'WHERE page_namespace=0', - array( &$this, 'processPage' ) ); - } +require_once( 'cleanupTable.inc' ); - function init( $count, $table ) { - $this->processed = 0; - $this->updated = 0; - $this->count = $count; - $this->startTime = wfTime(); - $this->table = $table; - } - - function progress( $updated ) { - $this->updated += $updated; - $this->processed++; - if( $this->processed % 100 != 0 ) { - return; - } - $portion = $this->processed / $this->count; - $updateRate = $this->updated / $this->processed; - - $now = wfTime(); - $delta = $now - $this->startTime; - $estimatedTotalTime = $delta / $portion; - $eta = $this->startTime + $estimatedTotalTime; - - global $wgDBname; - printf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", - $wgDBname, - wfTimestamp( TS_DB, intval( $now ) ), - $portion * 100.0, - $this->table, - wfTimestamp( TS_DB, intval( $eta ) ), - $this->processed, - $this->count, - $this->processed / $delta, - $updateRate * 100.0 ); - flush(); - } - - function runTable( $table, $where, $callback ) { - $fname = 'CapsCleanup::buildTable'; - - $count = $this->dbw->selectField( $table, 'count(*)', '', $fname ); - $this->init( $count, 'page' ); - $this->log( "Processing $table..." ); - - $tableName = $this->dbr->tableName( $table ); - $sql = "SELECT * FROM $tableName $where"; - $result = $this->dbr->query( $sql, $fname ); - - while( $row = $this->dbr->fetchObject( $result ) ) { - $updated = call_user_func( $callback, $row ); - } - $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); - $this->dbr->freeResult( $result ); +class TitleCleanup extends TableCleanup { + function __construct( $dryrun = false ) { + parent::__construct( 'page', $dryrun ); } function processPage( $row ) { @@ -197,14 +132,10 @@ class TitleCleanup extends FiveUpgrade { $linkCache->clear(); } } - - function hexChar( $matches ) { - return sprintf( "\\x%02x", ord( $matches[1] ) ); - } } $wgUser->setName( 'Conversion script' ); -$caps = new TitleCleanup( isset( $options['dry-run'] ) ); +$caps = new TitleCleanup( !isset( $options['fix'] ) ); $caps->cleanup(); ?> diff --git a/maintenance/cleanupWatchlist.php b/maintenance/cleanupWatchlist.php index d2925db3..027859a4 100644 --- a/maintenance/cleanupWatchlist.php +++ b/maintenance/cleanupWatchlist.php @@ -70,9 +70,8 @@ class WatchlistCleanup extends FiveUpgrade { $estimatedTotalTime = $delta / $portion; $eta = $this->startTime + $estimatedTotalTime; - global $wgDBname; printf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", - $wgDBname, + wfWikiID(), wfTimestamp( TS_DB, intval( $now ) ), $portion * 100.0, $this->table, diff --git a/maintenance/commandLine.inc b/maintenance/commandLine.inc index 2bb5389e..2549057e 100644 --- a/maintenance/commandLine.inc +++ b/maintenance/commandLine.inc @@ -28,16 +28,15 @@ if ( !isset( $optionsWithArgs ) ) { $optionsWithArgs[] = 'conf'; # For specifying the location of LocalSettings.php $self = array_shift( $argv ); -$self = __FILE__; -$IP = realpath( dirname( $self ) . '/..' ); +$IP = realpath( dirname( __FILE__ ) . '/..' ); #chdir( $IP ); +require_once( "$IP/StartProfiler.php" ); $options = array(); $args = array(); # Parse arguments - for( $arg = reset( $argv ); $arg !== false; $arg = next( $argv ) ) { if ( $arg == '--' ) { # End of options, remainder should be considered arguments @@ -133,6 +132,7 @@ if ( file_exists( '/home/wikipedia/common/langlist' ) ) { # This is for the IRC scripts, which now run as the apache user # The apache user doesn't have access to the wikiadmin_pass command if ( $_ENV['USER'] == 'apache' ) { + #if ( posix_geteuid() == 48 ) { $wgUseNormalUser = true; } @@ -141,7 +141,7 @@ if ( file_exists( '/home/wikipedia/common/langlist' ) ) { $DP = $IP; ini_set( 'include_path', ".:$IP:$IP/includes:$IP/languages:$IP/maintenance" ); - require_once( $IP.'/includes/ProfilerStub.php' ); + #require_once( $IP.'/includes/ProfilerStub.php' ); require_once( $IP.'/includes/Defines.php' ); require_once( $IP.'/CommonSettings.php' ); @@ -168,7 +168,7 @@ if ( file_exists( '/home/wikipedia/common/langlist' ) ) { } $wgCommandLineMode = true; $DP = $IP; - require_once( $IP.'/includes/ProfilerStub.php' ); + #require_once( $IP.'/includes/ProfilerStub.php' ); require_once( $IP.'/includes/Defines.php' ); require_once( $settingsFile ); ini_set( 'include_path', ".$sep$IP$sep$IP/includes$sep$IP/languages$sep$IP/maintenance" ); @@ -202,9 +202,11 @@ if ( defined( 'MW_CMDLINE_CALLBACK' ) ) { ini_set( 'memory_limit', -1 ); +$wgShowSQLErrors = true; + require_once( 'Setup.php' ); require_once( 'install-utils.inc' ); -$wgTitle = Title::newFromText( 'Command line script' ); +$wgTitle = null; # Much much faster startup than creating a title object set_time_limit(0); // -------------------------------------------------------------------- diff --git a/maintenance/convertLinks.inc b/maintenance/convertLinks.inc index f0d2c439..5f8c27a5 100644 --- a/maintenance/convertLinks.inc +++ b/maintenance/convertLinks.inc @@ -8,8 +8,8 @@ /** */ function convertLinks() { global $wgDBtype; - if( $wgDBtype == 'PostgreSQL' ) { - print "Links table already ok on PostgreSQL.\n"; + if( $wgDBtype == 'postgres' ) { + print "Links table already ok on Postgres.\n"; return; } diff --git a/maintenance/createAndPromote.php b/maintenance/createAndPromote.php index df29c114..43ddcdd1 100644 --- a/maintenance/createAndPromote.php +++ b/maintenance/createAndPromote.php @@ -18,8 +18,7 @@ if( !count( $args ) == 2 ) { $username = $args[0]; $password = $args[1]; -global $wgDBname; -echo( "{$wgDBname}: Creating and promoting User:{$username}..." ); +echo( wfWikiID() . ": Creating and promoting User:{$username}..." ); # Validate username and check it doesn't exist $user = User::newFromName( $username ); @@ -45,4 +44,4 @@ $ssu->doUpdate(); echo( "done.\n" ); -?> \ No newline at end of file +?> diff --git a/maintenance/deleteBatch.php b/maintenance/deleteBatch.php index 697dffd7..234744c3 100644 --- a/maintenance/deleteBatch.php +++ b/maintenance/deleteBatch.php @@ -71,9 +71,13 @@ for ( $linenum = 1; !feof( $file ); $linenum++ ) { } else { $art = new Article( $page ); } - $art->doDelete( $reason ); + $success = $art->doDeleteArticle( $reason ); $dbw->immediateCommit(); - print "\n"; + if ( $success ) { + print "\n"; + } else { + print " FAILED\n"; + } if ( $interval ) { sleep( $interval ); diff --git a/maintenance/deleteImageMemcached.php b/maintenance/deleteImageMemcached.php index 4e17d21e..6af0e3a9 100644 --- a/maintenance/deleteImageMemcached.php +++ b/maintenance/deleteImageMemcached.php @@ -14,7 +14,7 @@ class DeleteImageCache { } function main() { - global $wgMemc, $wgDBname; + global $wgMemc; $fname = 'DeleteImageCache::main'; ini_set( 'display_errors', false ); @@ -32,9 +32,9 @@ class DeleteImageCache { while ( $row = $dbr->fetchObject( $res ) ) { if ($i % $this->report == 0) - printf("%s: %13s done (%s)\n", $wgDBname, "$i/$total", wfPercent( $i / $total * 100 )); + printf("%s: %13s done (%s)\n", wfWikiID(), "$i/$total", wfPercent( $i / $total * 100 )); $md5 = md5( $row->img_name ); - $wgMemc->delete( "$wgDBname:Image:$md5" ); + $wgMemc->delete( wfMemcKey( 'Image', $md5 ) ); if ($this->sleep != 0) usleep( $this->sleep ); diff --git a/maintenance/deleteRevision.php b/maintenance/deleteRevision.php index e7d005b6..eb65e234 100644 --- a/maintenance/deleteRevision.php +++ b/maintenance/deleteRevision.php @@ -8,7 +8,7 @@ if ( count( $args ) == 0 ) { exit(1); } -echo "Deleting revision(s) " . implode( ',', $args ) . " from $wgDBname...\n"; +echo "Deleting revision(s) " . implode( ',', $args ) . " from ".wfWikiID()."...\n"; $affected = 0; foreach ( $args as $revID ) { diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php index 1735422d..ef5d47c9 100644 --- a/maintenance/dumpBackup.php +++ b/maintenance/dumpBackup.php @@ -57,12 +57,12 @@ if( isset( $options['end'] ) ) { $dumper->skipHeader = isset( $options['skip-header'] ); $dumper->skipFooter = isset( $options['skip-footer'] ); -$textMode = isset( $options['stub'] ) ? MW_EXPORT_STUB : MW_EXPORT_TEXT; +$textMode = isset( $options['stub'] ) ? WikiExporter::STUB : WikiExporter::TEXT; if( isset( $options['full'] ) ) { - $dumper->dump( MW_EXPORT_FULL, $textMode ); + $dumper->dump( WikiExporter::FULL, $textMode ); } elseif( isset( $options['current'] ) ) { - $dumper->dump( MW_EXPORT_CURRENT, $textMode ); + $dumper->dump( WikiExporter::CURRENT, $textMode ); } else { $dumper->progress( << $value ) { $this->$var = $value; } } + function loadCheckpoints() { + if ( $this->checkpoints !== false ) { + return true; + } elseif ( !$this->checkpointFile ) { + return false; + } else { + $lines = @file( $this->checkpointFile ); + if ( $lines === false ) { + print "Starting new checkpoint file \"{$this->checkpointFile}\"\n"; + $this->checkpoints = array(); + } else { + $lines = array_map( 'trim', $lines ); + $this->checkpoints = array(); + foreach ( $lines as $line ) { + list( $name, $value ) = explode( '=', $line, 2 ); + $this->checkpoints[$name] = $value; + } + } + return true; + } + } + + function getCheckpoint( $type, $defValue = false ) { + if ( !$this->loadCheckpoints() ) { + return false; + } + if ( !isset( $this->checkpoints[$type] ) ) { + return false; + } else { + return $this->checkpoints[$type]; + } + } + + function setCheckpoint( $type, $value ) { + if ( !$this->checkpointFile ) { + return; + } + $this->checkpoints[$type] = $value; + $blob = ''; + foreach ( $this->checkpoints as $type => $value ) { + $blob .= "$type=$value\n"; + } + file_put_contents( $this->checkpointFile, $blob ); + } + + function doEverything() { + if ( $this->getCheckpoint( 'everything' ) == 'done' ) { + print "Checkpoint says everything is already done\n"; + return; + } + $this->doArticles(); + $this->doLocalImageDescriptions(); + $this->doSharedImageDescriptions(); + $this->doCategories(); + $this->doRedirects(); + if ( $this->sliceNumerator == 1 ) { + $this->doSpecials(); + } + + $this->setCheckpoint( 'everything', 'done' ); + } + /** * Write a set of articles specified by start and end page_id * Skip categories and images, they will be done separately */ - function doArticles( $start, $end = false ) { - $fname = 'DumpHTML::doArticles'; + function doArticles() { + if ( $this->endID === false ) { + $end = $this->getMaxPageID(); + } else { + $end = $this->endID; + } + $start = $this->startID; + + # Start from the checkpoint + $cp = $this->getCheckpoint( 'article' ); + if ( $cp == 'done' ) { + print "Articles already done\n"; + return; + } elseif ( $cp !== false ) { + $start = $cp; + print "Resuming article dump from checkpoint at page_id $start of $end\n"; + } else { + print "Starting from page_id $start of $end\n"; + } - $this->setupGlobals(); + # Move the start point to the correct slice if it isn't there already + $start = $this->modSliceStart( $start ); - if ( $end === false ) { - $dbr =& wfGetDB( DB_SLAVE ); - $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); - } + $this->setupGlobals(); $mainPageObj = Title::newMainPage(); $mainPage = $mainPageObj->getPrefixedDBkey(); - - for ($id = $start; $id <= $end; $id++) { + for ( $id = $start, $i = 0; $id <= $end; $id += $this->sliceDenominator, $i++ ) { wfWaitForSlaves( 20 ); - if ( !($id % REPORTING_INTERVAL) ) { + if ( !( $i % REPORTING_INTERVAL) ) { print "Processing ID: $id\r"; + $this->setCheckpoint( 'article', $id ); } - if ( !($id % (REPORTING_INTERVAL*10) ) ) { + if ( !($i % (REPORTING_INTERVAL*10) ) ) { print "\n"; } $title = Title::newFromID( $id ); @@ -85,6 +184,7 @@ class DumpHTML { } } } + $this->setCheckpoint( 'article', 'done' ); print "\n"; } @@ -107,6 +207,11 @@ class DumpHTML { $title = Title::newMainPage(); $text = $this->getArticleHTML( $title ); + + # Parse the XHTML to find the images + $images = $this->findImages( $text ); + $this->copyImages( $images ); + $file = fopen( "{$this->dest}/index.html", "w" ); if ( !$file ) { print "\nCan't open index.html for writing\n"; @@ -118,49 +223,98 @@ class DumpHTML { } function doImageDescriptions() { + $this->doLocalImageDescriptions(); + $this->doSharedImageDescriptions(); + } + + /** + * Dump image description pages that don't have an associated article, but do + * have a local image + */ + function doLocalImageDescriptions() { global $wgSharedUploadDirectory; + $chunkSize = 1000; - $fname = 'DumpHTML::doImageDescriptions'; + $dbr =& wfGetDB( DB_SLAVE ); + + $cp = $this->getCheckpoint( 'local image' ); + if ( $cp == 'done' ) { + print "Local image descriptions already done\n"; + return; + } elseif ( $cp !== false ) { + print "Writing image description pages starting from $cp\n"; + $conds = array( 'img_name >= ' . $dbr->addQuotes( $cp ) ); + } else { + print "Writing image description pages for local images\n"; + $conds = false; + } $this->setupGlobals(); + $i = 0; - /** - * Dump image description pages that don't have an associated article, but do - * have a local image - */ - $dbr =& wfGetDB( DB_SLAVE ); - extract( $dbr->tableNames( 'image', 'page' ) ); - $res = $dbr->select( 'image', array( 'img_name' ), false, $fname ); + do { + $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__, + array( 'ORDER BY' => 'img_name', 'LIMIT' => $chunkSize ) ); + $numRows = $dbr->numRows( $res ); + + while ( $row = $dbr->fetchObject( $res ) ) { + # Update conds for the next chunk query + $conds = array( 'img_name > ' . $dbr->addQuotes( $row->img_name ) ); + + // Slice the result set with a filter + if ( !$this->sliceFilter( $row->img_name ) ) { + continue; + } - $i = 0; - print "Writing image description pages for local images\n"; - $num = $dbr->numRows( $res ); - while ( $row = $dbr->fetchObject( $res ) ) { - wfWaitForSlaves( 10 ); - if ( !( ++$i % REPORTING_INTERVAL ) ) { - print "Done $i of $num\r"; - } - $title = Title::makeTitle( NS_IMAGE, $row->img_name ); - if ( $title->getArticleID() ) { - // Already done by dumpHTML - continue; + wfWaitForSlaves( 10 ); + if ( !( ++$i % REPORTING_INTERVAL ) ) { + print "{$row->img_name}\n"; + if ( $row->img_name !== 'done' ) { + $this->setCheckpoint( 'local image', $row->img_name ); + } + } + $title = Title::makeTitle( NS_IMAGE, $row->img_name ); + if ( $title->getArticleID() ) { + // Already done by dumpHTML + continue; + } + $this->doArticle( $title ); } - $this->doArticle( $title ); - } + $dbr->freeResult( $res ); + } while ( $numRows ); + + $this->setCheckpoint( 'local image', 'done' ); print "\n"; + } + + /** + * Dump images which only have a real description page on commons + */ + function doSharedImageDescriptions() { + list( $start, $end ) = $this->sliceRange( 0, 255 ); + + $cp = $this->getCheckpoint( 'shared image' ); + if ( $cp == 'done' ) { + print "Shared description pages already done\n"; + return; + } elseif ( $cp !== false ) { + print "Writing description pages for commons images starting from directory $cp/255\n"; + $start = $cp; + } else { + print "Writing description pages for commons images\n"; + } - /** - * Dump images which only have a real description page on commons - */ - print "Writing description pages for commons images\n"; + $this->setupGlobals(); $i = 0; - for ( $hash = 0; $hash < 256; $hash++ ) { + for ( $hash = $start; $hash <= $end; $hash++ ) { + $this->setCheckpoint( 'shared image', $hash ); + $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash ); - $paths = array_merge( glob( "{$this->sharedStaticPath}/$dir/*" ), - glob( "{$this->sharedStaticPath}/thumb/$dir/*" ) ); + $paths = array_merge( glob( "{$this->sharedStaticDirectory}/$dir/*" ), + glob( "{$this->sharedStaticDirectory}/thumb/$dir/*" ) ); foreach ( $paths as $path ) { - $file = basename( $path ); + $file = wfBaseName( $path ); if ( !(++$i % REPORTING_INTERVAL ) ) { print "$i\r"; } @@ -169,49 +323,106 @@ class DumpHTML { $this->doArticle( $title ); } } + $this->setCheckpoint( 'shared image', 'done' ); print "\n"; } function doCategories() { - $fname = 'DumpHTML::doCategories'; + $chunkSize = 1000; + $this->setupGlobals(); - $dbr =& wfGetDB( DB_SLAVE ); - print "Selecting categories..."; - $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' ); - $res = $dbr->query( $sql, $fname ); + + $cp = $this->getCheckpoint( 'category' ); + if ( $cp == 'done' ) { + print "Category pages already done\n"; + return; + } elseif ( $cp !== false ) { + print "Resuming category page dump from $cp\n"; + $conds = array( 'cl_to >= ' . $dbr->addQuotes( $cp ) ); + } else { + print "Starting category pages\n"; + $conds = false; + } - print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; $i = 0; - while ( $row = $dbr->fetchObject( $res ) ) { - wfWaitForSlaves( 10 ); - if ( !(++$i % REPORTING_INTERVAL ) ) { - print "$i\r"; + do { + $res = $dbr->select( 'categorylinks', 'DISTINCT cl_to', $conds, __METHOD__, + array( 'ORDER BY' => 'cl_to', 'LIMIT' => $chunkSize ) ); + $numRows = $dbr->numRows( $res ); + + while ( $row = $dbr->fetchObject( $res ) ) { + // Set conditions for next chunk + $conds = array( 'cl_to > ' . $dbr->addQuotes( $row->cl_to ) ); + + // Filter pages from other slices + if ( !$this->sliceFilter( $row->cl_to ) ) { + continue; + } + + wfWaitForSlaves( 10 ); + if ( !(++$i % REPORTING_INTERVAL ) ) { + print "{$row->cl_to}\n"; + if ( $row->cl_to != 'done' ) { + $this->setCheckpoint( 'category', $row->cl_to ); + } + } + $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); + $this->doArticle( $title ); } - $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); - $this->doArticle( $title ); - } + $dbr->freeResult( $res ); + } while ( $numRows ); + + $this->setCheckpoint( 'category', 'done' ); print "\n"; } function doRedirects() { print "Doing redirects...\n"; - $fname = 'DumpHTML::doRedirects'; + + $chunkSize = 10000; + $end = $this->getMaxPageID(); + $cp = $this->getCheckpoint( 'redirect' ); + if ( $cp == 'done' ) { + print "Redirects already done\n"; + return; + } elseif ( $cp !== false ) { + print "Resuming redirect generation from page_id $cp\n"; + $start = intval( $cp ); + } else { + $start = 1; + } + $this->setupGlobals(); $dbr =& wfGetDB( DB_SLAVE ); - - $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ), - array( 'page_is_redirect' => 1 ), $fname ); - $num = $dbr->numRows( $res ); - print "$num redirects to do...\n"; $i = 0; - while ( $row = $dbr->fetchObject( $res ) ) { - $title = Title::makeTitle( $row->page_namespace, $row->page_title ); - if ( !(++$i % (REPORTING_INTERVAL*10) ) ) { - print "Done $i of $num\n"; - } - $this->doArticle( $title ); + + for ( $chunkStart = $start; $chunkStart <= $end; $chunkStart += $chunkSize ) { + $chunkEnd = min( $end, $chunkStart + $chunkSize - 1 ); + $conds = array( + 'page_is_redirect' => 1, + "page_id BETWEEN $chunkStart AND $chunkEnd" + ); + # Modulo slicing in SQL + if ( $this->sliceDenominator != 1 ) { + $n = intval( $this->sliceNumerator ); + $m = intval( $this->sliceDenominator ); + $conds[] = "page_id % $m = $n"; + } + $res = $dbr->select( 'page', array( 'page_id', 'page_namespace', 'page_title' ), + $conds, __METHOD__ ); + + while ( $row = $dbr->fetchObject( $res ) ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + if ( !(++$i % (REPORTING_INTERVAL*10) ) ) { + printf( "Done %d redirects (%2.3f%%)\n", $i, $row->page_id / $end * 100 ); + $this->setCheckpoint( 'redirect', $row->page_id ); + } + $this->doArticle( $title ); + } + $dbr->freeResult( $res ); } + $this->setCheckpoint( 'redirect', 'done' ); } /** Write an article specified by title */ @@ -219,6 +430,13 @@ class DumpHTML { global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; global $wgUploadDirectory; + if ( $this->noOverwrite ) { + $fileName = $this->dest.'/'.$this->getHashedFilename( $title ); + if ( file_exists( $fileName ) ) { + return; + } + } + $this->rawPages = array(); $text = $this->getArticleHTML( $title ); @@ -263,11 +481,19 @@ class DumpHTML { $fullName = "{$this->dest}/$filename"; $fullDir = dirname( $fullName ); + if ( $this->compress ) { + $fullName .= ".gz"; + $text = gzencode( $text, 9 ); + } + wfMkdirParents( $fullDir, 0755 ); + wfSuppressWarnings(); $file = fopen( $fullName, 'w' ); + wfRestoreWarnings(); + if ( !$file ) { - print("Can't open file $fullName for writing\n"); + die("Can't open file '$fullName' for writing.\nCheck permissions or use another destination (-d).\n"); return; } @@ -281,13 +507,16 @@ class DumpHTML { global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer; - global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon; + global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon, $wgEnableSidebarCache; + global $wgGenerateThumbnailOnParse; static $oldLogo = NULL; if ( !$this->setupDone ) { $wgHooks['GetLocalURL'][] =& $this; $wgHooks['GetFullURL'][] =& $this; + $wgHooks['SiteNoticeBefore'][] =& $this; + $wgHooks['SiteNoticeAfter'][] =& $this; $this->oldArticlePath = $wgServer . $wgArticlePath; } @@ -331,8 +560,6 @@ class DumpHTML { $wgCopyrightIcon = str_replace( 'src="/images', 'src="' . htmlspecialchars( $wgScriptPath ) . '/images', $this->oldCopyrightIcon ); - - $wgStylePath = "$wgScriptPath/skins"; $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; $wgSharedUploadPath = "$wgUploadPath/shared"; @@ -341,6 +568,8 @@ class DumpHTML { $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; $wgEnableParserCache = false; $wgMathPath = "$wgScriptPath/math"; + $wgEnableSidebarCache = false; + $wgGenerateThumbnailOnParse = true; if ( !empty( $wgRightsText ) ) { $wgRightsUrl = "$wgScriptPath/COPYING.html"; @@ -350,7 +579,14 @@ class DumpHTML { $wgUser->setOption( 'skin', $this->skin ); $wgUser->setOption( 'editsection', 0 ); - $this->sharedStaticPath = "$wgUploadDirectory/shared"; + if ( $this->makeSnapshot ) { + $this->destUploadDirectory = "{$this->dest}/{$this->imageRel}"; + if ( realpath( $this->destUploadDirectory == $wgUploadDirectory ) ) { + $this->makeSnapshot = false; + } + } + + $this->sharedStaticDirectory = "{$this->destUploadDirectory}/shared"; $this->setupDone = true; } @@ -391,6 +627,7 @@ class DumpHTML { } } + $sk =& $wgUser->getSkin(); ob_start(); $sk->outputPage( $wgOut ); @@ -430,67 +667,72 @@ ENDTEXT; return $wgDumpImages; } + /** + * Copy a file specified by a URL to a given directory + * + * @param string $srcPath The source URL + * @param string $srcPathBase The base directory of the source URL + * @param string $srcDirBase The base filesystem directory of the source URL + * @param string $destDirBase The base filesystem directory of the destination URL + */ + function relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) { + $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); // +1 for slash + $sourceLoc = "$srcDirBase/$rel"; + $destLoc = "$destDirBase/$rel"; + #print "Copying $sourceLoc to $destLoc\n"; + if ( !file_exists( $destLoc ) ) { + wfMkdirParents( dirname( $destLoc ), 0755 ); + if ( function_exists( 'symlink' ) && !$this->forceCopy ) { + symlink( $sourceLoc, $destLoc ); + } else { + copy( $sourceLoc, $destLoc ); + } + } + } + + /** + * Copy an image, and if it is a thumbnail, copy its parent image too + */ + function copyImage( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) { + global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath; + $this->relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ); + if ( substr( $srcPath, strlen( $srcPathBase ) + 1, 6 ) == 'thumb/' ) { + # The image was a thumbnail + # Copy the source image as well + $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); + $parts = explode( '/', $rel ); + $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}"; + $newSrc = "$srcPathBase/$rel"; + $this->relativeCopy( $newSrc, $srcPathBase, $srcDirBase, $destDirBase ); + } + } + /** * Copy images (or create symlinks) from commons to a static directory. * This is necessary even if you intend to distribute all of commons, because * the directory contents is used to work out which image description pages * are needed. * - * Also copies math images + * Also copies math images, and full-sized images if the makeSnapshot option + * is specified. * */ function copyImages( $images ) { - global $wgSharedUploadPath, $wgSharedUploadDirectory, $wgMathPath, $wgMathDirectory; + global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath, $wgSharedUploadDirectory, + $wgMathPath, $wgMathDirectory; # Find shared uploads and copy them into the static directory $sharedPathLength = strlen( $wgSharedUploadPath ); $mathPathLength = strlen( $wgMathPath ); + $uploadPathLength = strlen( $wgUploadPath ); foreach ( $images as $escapedImage => $dummy ) { $image = urldecode( $escapedImage ); - # Is it shared? if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) { - # Reconstruct full filename - $rel = substr( $image, $sharedPathLength + 1 ); // +1 for slash - $sourceLoc = "$wgSharedUploadDirectory/$rel"; - $staticLoc = "{$this->sharedStaticPath}/$rel"; - #print "Copying $sourceLoc to $staticLoc\n"; - # Copy to static directory - if ( !file_exists( $staticLoc ) ) { - wfMkdirParents( dirname( $staticLoc ), 0755 ); - if ( function_exists( 'symlink' ) && !$this->forceCopy ) { - symlink( $sourceLoc, $staticLoc ); - } else { - copy( $sourceLoc, $staticLoc ); - } - } - - if ( substr( $rel, 0, 6 ) == 'thumb/' ) { - # That was a thumbnail - # We will also copy the real image - $parts = explode( '/', $rel ); - $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}"; - $sourceLoc = "$wgSharedUploadDirectory/$rel"; - $staticLoc = "{$this->sharedStaticPath}/$rel"; - #print "Copying $sourceLoc to $staticLoc\n"; - if ( !file_exists( $staticLoc ) ) { - wfMkdirParents( dirname( $staticLoc ), 0755 ); - if ( function_exists( 'symlink' ) && !$this->forceCopy ) { - symlink( $sourceLoc, $staticLoc ); - } else { - copy( $sourceLoc, $staticLoc ); - } - } - } - } else - # Is it math? - if ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) { - $rel = substr( $image, $mathPathLength + 1 ); // +1 for slash - $source = "$wgMathDirectory/$rel"; - $dest = "{$this->dest}/math/$rel"; - @mkdir( "{$this->dest}/math", 0755 ); - if ( !file_exists( $dest ) ) { - copy( $source, $dest ); - } + $this->copyImage( $image, $wgSharedUploadPath, $wgSharedUploadDirectory, $this->sharedStaticDirectory ); + } elseif ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) { + $this->relativeCopy( $image, $wgMathPath, $wgMathDirectory, "{$this->dest}/math" ); + } elseif ( $this->makeSnapshot && substr( $image, 0, $uploadPathLength ) == $wgUploadPath ) { + $this->copyImage( $image, $wgUploadPath, $wgUploadDirectory, $this->destUploadDirectory ); } } } @@ -506,6 +748,7 @@ ENDTEXT; $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); } + $url .= $this->compress ? ".gz" : ""; return false; } else { return true; @@ -540,7 +783,7 @@ ENDTEXT; if ( $url === false ) { $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); } - + $url .= $this->compress ? ".gz" : ""; return false; } @@ -632,6 +875,60 @@ ENDTEXT; return $dir; } + /** + * Calculate the start end end of a job based on the current slice + * @param integer $start + * @param integer $end + * @return array of integers + */ + function sliceRange( $start, $end ) { + $count = $end - $start + 1; + $each = $count / $this->sliceDenominator; + $sliceStart = $start + intval( $each * ( $this->sliceNumerator - 1 ) ); + if ( $this->sliceNumerator == $this->sliceDenominator ) { + $sliceEnd = $end; + } else { + $sliceEnd = $start + intval( $each * $this->sliceNumerator ) - 1; + } + return array( $sliceStart, $sliceEnd ); + } + + /** + * Adjust a start point so that it belongs to the current slice, where slices are defined by integer modulo + * @param integer $start + * @param integer $base The true start of the range; the minimum start + */ + function modSliceStart( $start, $base = 1 ) { + return $start - ( $start % $this->sliceDenominator ) + $this->sliceNumerator - 1 + $base; + } + + /** + * Determine whether a string belongs to the current slice, based on hash + */ + function sliceFilter( $s ) { + return crc32( $s ) % $this->sliceDenominator == $this->sliceNumerator - 1; + } + + /** + * No site notice + */ + function onSiteNoticeBefore( &$text ) { + $text = ''; + return false; + } + function onSiteNoticeAfter( &$text ) { + $text = ''; + return false; + } + + function getMaxPageID() { + if ( $this->maxPageID === false ) { + $dbr =& wfGetDB( DB_SLAVE ); + $this->maxPageID = $dbr->selectField( 'page', 'max(page_id)', false, __METHOD__ ); + } + return $this->maxPageID; + } + } /** XML parser callback */ diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php index 37a46465..5e347e4b 100644 --- a/maintenance/dumpHTML.php +++ b/maintenance/dumpHTML.php @@ -9,20 +9,25 @@ * Usage: * php dumpHTML.php [options...] * - * -d destination directory - * -s start ID - * -e end ID - * -k skin to use (defaults to dumphtml) - * --images only do image description pages - * --categories only do category pages - * --redirects only do redirects - * --special only do miscellaneous stuff - * --force-copy copy commons instead of symlink, needed for Wikimedia - * --interlang allow interlanguage links + * -d destination directory + * -s start ID + * -e end ID + * -k skin to use (defaults to htmldump) + * --no-overwrite skip existing HTML files + * --checkpoint use a checkpoint file to allow restarting of interrupted dumps + * --slice split the job into m segments and do the n'th one + * --images only do image description pages + * --categories only do category pages + * --redirects only do redirects + * --special only do miscellaneous stuff + * --force-copy copy commons instead of symlink, needed for Wikimedia + * --interlang allow interlanguage links + * --image-snapshot copy all images used to the destination directory + * --compress generate compressed version of the html pages */ -$optionsWithArgs = array( 's', 'd', 'e', 'k' ); +$optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice' ); $profiling = false; @@ -40,7 +45,6 @@ require_once( "commandLine.inc" ); require_once( "dumpHTML.inc" ); error_reporting( E_ALL & (~E_NOTICE) ); -define( 'CHUNK_SIZE', 50 ); if ( !empty( $options['s'] ) ) { $start = $options['s']; @@ -58,10 +62,22 @@ if ( !empty( $options['e'] ) ) { if ( !empty( $options['d'] ) ) { $dest = $options['d']; } else { - $dest = 'static'; + $dest = "$IP/static"; } -$skin = isset( $options['k'] ) ? $options['k'] : 'dumphtml'; +$skin = isset( $options['k'] ) ? $options['k'] : 'htmldump'; + +if ( $options['slice'] ) { + $bits = explode( '/', $options['slice'] ); + if ( count( $bits ) != 2 || $bits[0] < 1 || $bits[0] > $bits[1] ) { + print "Invalid slice specification"; + exit; + } + $sliceNumerator = $bits[0]; + $sliceDenominator = $bits[1]; +} else { + $sliceNumerator = $sliceDenominator = 1; +} $wgHTMLDump = new DumpHTML( array( 'dest' => $dest, @@ -69,6 +85,14 @@ $wgHTMLDump = new DumpHTML( array( 'alternateScriptPath' => $options['interlang'], 'interwiki' => $options['interlang'], 'skin' => $skin, + 'makeSnapshot' => $options['image-snapshot'], + 'checkpointFile' => $options['checkpoint'], + 'startID' => $start, + 'endID' => $end, + 'sliceNumerator' => $sliceNumerator, + 'sliceDenominator' => $sliceDenominator, + 'noOverwrite' => $options['no-overwrite'], + 'compress' => $options['compress'], )); @@ -81,47 +105,32 @@ if ( $options['special'] ) { } elseif ( $options['redirects'] ) { $wgHTMLDump->doRedirects(); } else { - print("Creating static HTML dump in directory $dest. \n". - "Starting from page_id $start of $end.\n"); - + print "Creating static HTML dump in directory $dest. \n"; $dbr =& wfGetDB( DB_SLAVE ); $server = $dbr->getProperty( 'mServer' ); print "Using database {$server}\n"; - $wgHTMLDump->doArticles( $start, $end ); if ( !isset( $options['e'] ) ) { - $wgHTMLDump->doImageDescriptions(); - $wgHTMLDump->doCategories(); - $wgHTMLDump->doSpecials(); - } - - /* - if ( $end - $start > CHUNK_SIZE * 2 ) { - // Split the problem into smaller chunks, run them in different PHP instances - // This is a memory/resource leak workaround - print("Creating static HTML dump in directory $dest. \n". - "Starting from page_id $start of $end.\n"); - - chdir( "maintenance" ); - for ( $chunkStart = $start; $chunkStart < $end; $chunkStart += CHUNK_SIZE ) { - $chunkEnd = $chunkStart + CHUNK_SIZE - 1; - if ( $chunkEnd > $end ) { - $chunkEnd = $end; - } - passthru( "php dumpHTML.php -d " . wfEscapeShellArg( $dest ) . " -s $chunkStart -e $chunkEnd" ); - } - chdir( ".." ); - $d->doImageDescriptions(); - $d->doCategories(); - $d->doMainPage( $dest ); + $wgHTMLDump->doEverything(); } else { - $d->doArticles( $start, $end ); + $wgHTMLDump->doArticles(); } - */ } if ( isset( $options['debug'] ) ) { - print_r($GLOBALS); + #print_r($GLOBALS); + # Workaround for bug #36957 + $globals = array_keys( $GLOBALS ); + #sort( $globals ); + $sizes = array(); + foreach ( $globals as $name ) { + $sizes[$name] = strlen( serialize( $GLOBALS[$name] ) ); + } + arsort($sizes); + $sizes = array_slice( $sizes, 0, 20 ); + foreach ( $sizes as $name => $size ) { + printf( "%9d %s\n", $size, $name ); + } } if ( $profiling ) { diff --git a/maintenance/dumpInterwiki.inc b/maintenance/dumpInterwiki.inc index 3cca1e02..2039f2df 100644 --- a/maintenance/dumpInterwiki.inc +++ b/maintenance/dumpInterwiki.inc @@ -31,7 +31,7 @@ class Site { } function getRebuildInterwikiDump() { - global $langlist, $languageAliases, $prefixRewrites, $wgDBname; + global $langlist, $languageAliases, $prefixRewrites; # Multi-language sites # db suffix => db suffix, iw prefix, hostname @@ -43,6 +43,7 @@ function getRebuildInterwikiDump() { 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), + 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ), ); # List of language prefixes likely to be found in multi-language sites diff --git a/maintenance/dumpSisterSites.php b/maintenance/dumpSisterSites.php new file mode 100644 index 00000000..50e121e6 --- /dev/null +++ b/maintenance/dumpSisterSites.php @@ -0,0 +1,49 @@ + + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage SpecialPage + */ + +require_once( 'commandLine.inc' ); + +$dbr = wfGetDB( DB_SLAVE ); +$dbr->bufferResults( false ); +$result = $dbr->select( 'page', + array( 'page_namespace', 'page_title' ), + array( + 'page_namespace' => NS_MAIN, + 'page_is_redirect' => 0, + ), + 'dumpSisterSites' ); + +while( $row = $dbr->fetchObject( $result ) ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $url = $title->getFullUrl(); + $text = $title->getPrefixedText(); + echo "$url $text\n"; +} + +$dbr->freeResult( $result ); + +?> diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php index 78367c0b..8c1563ad 100644 --- a/maintenance/dumpTextPass.php +++ b/maintenance/dumpTextPass.php @@ -99,9 +99,13 @@ stream_wrapper_register( 'mediawiki.compress.7z', 'SevenZipStream' ); class TextPassDumper extends BackupDumper { var $prefetch = null; var $input = "php://stdin"; - var $history = MW_EXPORT_FULL; + var $history = WikiExporter::FULL; var $fetchCount = 0; var $prefetchCount = 0; + + var $failures = 0; + var $maxFailures = 200; + var $failureTimeout = 5; // Seconds to sleep after db failure function dump() { # This shouldn't happen if on console... ;) @@ -139,10 +143,10 @@ class TextPassDumper extends BackupDumper { $this->input = $url; break; case 'current': - $this->history = MW_EXPORT_CURRENT; + $this->history = WikiExporter::CURRENT; break; case 'full': - $this->history = MW_EXPORT_FULL; + $this->history = WikiExporter::FULL; break; } } @@ -186,9 +190,8 @@ class TextPassDumper extends BackupDumper { $etats = '-'; $fetchrate = '-'; } - global $wgDBname; $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), %0.1f%% prefetched, ETA %s [max %d]", - $now, $wgDBname, $this->pageCount, $rate, $this->revCount, $revrate, $fetchrate, $etats, $this->maxCount ) ); + $now, wfWikiID(), $this->pageCount, $rate, $this->revCount, $revrate, $fetchrate, $etats, $this->maxCount ) ); } } @@ -236,6 +239,27 @@ class TextPassDumper extends BackupDumper { return $text; } } + while( true ) { + try { + return $this->doGetText( $id ); + } catch (DBQueryError $ex) { + $this->failures++; + if( $this->failures > $this->maxFailures ) { + throw $ex; + } else { + $this->progress( "Database failure $this->failures " . + "of allowed $this->maxFailures! " . + "Pausing $this->failureTimeout seconds..." ); + sleep( $this->failureTimeout ); + } + } + } + } + + /** + * May throw a database error if, say, the server dies during query. + */ + private function doGetText( $id ) { $id = intval( $id ); $row = $this->db->selectRow( 'text', array( 'old_text', 'old_flags' ), diff --git a/maintenance/dumpUploads.php b/maintenance/dumpUploads.php new file mode 100644 index 00000000..8ba4e87b --- /dev/null +++ b/maintenance/dumpUploads.php @@ -0,0 +1,116 @@ +mAction = 'fetchUsed'; + $this->mBasePath = $IP; + $this->mShared = $wgUseSharedUploads; + + if( isset( $args['help'] ) ) { + $this->mAction = 'help'; + } + + if( isset( $args['base'] ) ) { + $this->mBasePath = $args['base']; + } + } + + function run() { + $this->{$this->mAction}(); + } + + function help() { + echo << list-o-files.txt + +Options: +--base= Set base relative path instead of wiki include root + +FIXME: other options not implemented yet ;) + +--local List all local files, used or not. No shared files included. +--used Skip local images that are not used +--shared Include images used from shared repository + +END; + } + + /** + * Fetch a list of all or used images from a particular image source. + * @param string $table + * @param string $directory Base directory where files are located + * @param bool $shared true to pass shared-dir settings to hash func + */ + function fetchUsed() { + $dbr = wfGetDB( DB_SLAVE ); + $image = $dbr->tableName( 'image' ); + $imagelinks = $dbr->tableName( 'imagelinks' ); + + $sql = "SELECT DISTINCT il_to, img_name + FROM $imagelinks + LEFT OUTER JOIN $image + ON il_to=img_name"; + $result = $dbr->query( $sql ); + + while( $row = $dbr->fetchObject( $result ) ) { + if( is_null( $row->img_name ) ) { + if( $this->mShared ) { + $this->outputShared( $row->il_to ); + } + } else { + $this->outputLocal( $row->il_to ); + } + } + $dbr->freeResult( $result ); + } + + function outputLocal( $name ) { + global $wgUploadDirectory; + return $this->outputItem( $name, $wgUploadDirectory, false ); + } + + function outputShared( $name ) { + global $wgSharedUploadDirectory; + return $this->outputItem( $name, $wgSharedUploadDirectory, true ); + } + + function outputItem( $name, $directory, $shared ) { + $filename = $directory . + wfGetHashPath( $name, $shared ) . + $name; + $rel = $this->relativePath( $filename, $this->mBasePath ); + echo "$rel\n"; + } + + /** + * Return a relative path to $path from the base directory $base + * For instance relativePath( '/foo/bar/baz', '/foo' ) should return + * 'bar/baz'. + */ + function relativePath( $path, $base) { + $path = explode( DIRECTORY_SEPARATOR, $path ); + $base = explode( DIRECTORY_SEPARATOR, $base ); + while( count( $base ) && $path[0] == $base[0] ) { + array_shift( $path ); + array_shift( $base ); + } + foreach( $base as $prefix ) { + array_unshift( $path, '..' ); + } + return implode( DIRECTORY_SEPARATOR, $path ); + } +} + +$dumper = new UploadDumper( $options ); +$dumper->run(); + +?> \ No newline at end of file diff --git a/maintenance/fixSlaveDesync.php b/maintenance/fixSlaveDesync.php index e97f96c9..d2dffe54 100644 --- a/maintenance/fixSlaveDesync.php +++ b/maintenance/fixSlaveDesync.php @@ -22,12 +22,50 @@ if ( isset( $args[0] ) ) { } else { $dbw =& wfGetDB( DB_MASTER ); $maxPage = $dbw->selectField( 'page', 'MAX(page_id)', false, 'fixDesync.php' ); + $corrupt = findPageLatestCorruption(); + foreach ( $corrupt as $id => $dummy ) { + desyncFixPage( $id ); + } + /* for ( $i=1; $i <= $maxPage; $i++ ) { desyncFixPage( $i ); if ( !($i % $reportingInterval) ) { print "$i\n"; } + }*/ +} + +function findPageLatestCorruption() { + $desync = array(); + $n = 0; + $dbw =& wfGetDB( DB_MASTER ); + $masterIDs = array(); + $res = $dbw->select( 'page', array( 'page_id', 'page_latest' ), array( 'page_id<6054123' ), __METHOD__ ); + print "Number of pages: " . $dbw->numRows( $res ) . "\n"; + while ( $row = $dbw->fetchObject( $res ) ) { + $masterIDs[$row->page_id] = $row->page_latest; + if ( !( ++$n % 10000 ) ) { + print "$n\r"; + } } + print "\n"; + $dbw->freeResult( $res ); + + global $slaveIndexes; + foreach ( $slaveIndexes as $i ) { + $slaveIDs = array(); + $db =& wfGetDB( $i ); + $res = $db->select( 'page', array( 'page_id', 'page_latest' ), array( 'page_id<6054123' ), __METHOD__ ); + while ( $row = $db->fetchObject( $res ) ) { + if ( isset( $masterIDs[$row->page_id] ) && $masterIDs[$row->page_id] != $row->page_latest ) { + $desync[$row->page_id] = true; + print $row->page_id . "\t"; + } + } + $db->freeResult( $res ); + } + print "\n"; + return $desync; } function desyncFixPage( $pageID ) { @@ -36,10 +74,20 @@ function desyncFixPage( $pageID ) { # Check for a corrupted page_latest $dbw =& wfGetDB( DB_MASTER ); - $realLatest = $dbw->selectField( 'page', 'page_latest', array( 'page_id' => $pageID ), $fname ); + $dbw->begin(); + $realLatest = $dbw->selectField( 'page', 'page_latest', array( 'page_id' => $pageID ), + $fname, 'FOR UPDATE' ); + #list( $masterFile, $masterPos ) = $dbw->getMasterPos(); $found = false; foreach ( $slaveIndexes as $i ) { $db =& wfGetDB( $i ); + /* + if ( !$db->masterPosWait( $masterFile, $masterPos, 10 ) ) { + echo "Slave is too lagged, aborting\n"; + $dbw->commit(); + sleep(10); + return; + }*/ $latest = $db->selectField( 'page', 'page_latest', array( 'page_id' => $pageID ), $fname ); $max = $db->selectField( 'revision', 'MAX(rev_id)', false, $fname ); if ( $latest != $realLatest && $realLatest < $max ) { @@ -49,11 +97,14 @@ function desyncFixPage( $pageID ) { } } if ( !$found ) { + print "page_id $pageID seems fine\n"; + $dbw->commit(); return; } - # Find the missing revision - $res = $dbw->select( 'revision', array( 'rev_id' ), array( 'rev_page' => $pageID ), $fname ); + # Find the missing revisions + $res = $dbw->select( 'revision', array( 'rev_id' ), array( 'rev_page' => $pageID ), + $fname, 'FOR UPDATE' ); $masterIDs = array(); while ( $row = $dbw->fetchObject( $res ) ) { $masterIDs[] = $row->rev_id; @@ -66,35 +117,79 @@ function desyncFixPage( $pageID ) { $slaveIDs[] = $row->rev_id; } $db->freeResult( $res ); - $missingIDs = array_diff( $masterIDs, $slaveIDs ); + if ( count( $masterIDs ) < count( $slaveIDs ) ) { + $missingIDs = array_diff( $slaveIDs, $masterIDs ); + if ( count( $missingIDs ) ) { + print "Found " . count( $missingIDs ) . " lost in master, copying from slave... "; + $dbFrom = $db; + $dbTo = $dbw; + $found = true; + $toMaster = true; + } else { + $found = false; + } + } else { + $missingIDs = array_diff( $masterIDs, $slaveIDs ); + if ( count( $missingIDs ) ) { + print "Found " . count( $missingIDs ) . " missing revision(s), copying from master... "; + $dbFrom = $dbw; + $dbTo = $db; + $found = true; + $toMaster = false; + } else { + $found = false; + } + } - if ( count( $missingIDs ) ) { - print "Found " . count( $missingIDs ) . " missing revision(s), copying from master... "; + if ( $found ) { foreach ( $missingIDs as $rid ) { print "$rid "; # Revision - $row = $dbw->selectRow( 'revision', '*', array( 'rev_id' => $rid ), $fname ); - foreach ( $slaveIndexes as $i ) { - $db =& wfGetDB( $i ); - $db->insert( 'revision', get_object_vars( $row ), $fname, 'IGNORE' ); + $row = $dbFrom->selectRow( 'revision', '*', array( 'rev_id' => $rid ), $fname ); + if ( $toMaster ) { + $id = $dbw->selectField( 'revision', 'rev_id', array( 'rev_id' => $rid ), + $fname, 'FOR UPDATE' ); + if ( $id ) { + echo "Revision already exists\n"; + $found = false; + break; + } else { + $dbw->insert( 'revision', get_object_vars( $row ), $fname, 'IGNORE' ); + } + } else { + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $db->insert( 'revision', get_object_vars( $row ), $fname, 'IGNORE' ); + } } # Text - $row = $dbw->selectRow( 'text', '*', array( 'old_id' => $row->rev_text_id ), $fname ); - foreach ( $slaveIndexes as $i ) { - $db =& wfGetDB( $i ); - $db->insert( 'text', get_object_vars( $row ), $fname, 'IGNORE' ); + $row = $dbFrom->selectRow( 'text', '*', array( 'old_id' => $row->rev_text_id ), $fname ); + if ( $toMaster ) { + $dbw->insert( 'text', get_object_vars( $row ), $fname, 'IGNORE' ); + } else { + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $db->insert( 'text', get_object_vars( $row ), $fname, 'IGNORE' ); + } } } print "done\n"; } - print "Fixing page_latest... "; - foreach ( $slaveIndexes as $i ) { - $db =& wfGetDB( $i ); - $db->update( 'page', array( 'page_latest' => $realLatest ), array( 'page_id' => $pageID ), $fname ); + if ( $found ) { + print "Fixing page_latest... "; + if ( $toMaster ) { + #$dbw->update( 'page', array( 'page_latest' => $realLatest ), array( 'page_id' => $pageID ), $fname ); + } else { + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $db->update( 'page', array( 'page_latest' => $realLatest ), array( 'page_id' => $pageID ), $fname ); + } + } + print "done\n"; } - print "done\n"; + $dbw->commit(); } ?> diff --git a/maintenance/fuzz-tester.php b/maintenance/fuzz-tester.php new file mode 100644 index 00000000..23c3cd7c --- /dev/null +++ b/maintenance/fuzz-tester.php @@ -0,0 +1,2458 @@ +] + [--directory=] [--include-binary] + [--w3c-validate] [--delete-passed-retests] [--help] + [--user=] [--password=] + [--rerun-failed-tests] [--max-errors=] + [--max-runtime=] + [--specific-test=] + +Options: + --quiet : Hides passed tests, shows only failed tests. + --base-url : URL to a wiki on which to run the tests. + The "http://" is optional and can be omitted. + --directory : Full path to directory for storing failed tests. + Will be created if it does not exist. + --include-binary : Includes non-alphanumeric characters in the tests. + --w3c-validate : Validates pages using the W3C's web validator. + Slow. Currently many pages fail validation. + --user : Login name of a valid user on your test wiki. + --password : Password for the valid user on your test wiki. + --delete-passed-retests : Will delete retests that now pass. + Requires --rerun-failed-tests to be meaningful. + --rerun-failed-tests : Whether to rerun any previously failed tests. + --max-errors : Maximum number of errors to report before exiting. + Does not include errors from --rerun-failed-tests + --max-runtime : Maximum runtime, in minutes, to run before exiting. + Only applies to new tests, not --rerun-failed-tests + --specific-test : Runs only the specified fuzz test. + Only applies to new tests, not --rerun-failed-tests + --help : Show this help message. + +Example: + If you wanted to fuzz test a nightly MediaWiki checkout using cron for 1 hour, + and only wanted to be informed of errors, and did not want to redo previously + failed tests, and wanted a maximum of 100 errors, then you could do: + php {$_SERVER["SCRIPT_NAME"]} --quiet --max-errors=100 --max-runtime=60 + + +ENDS; + + exit( 0 ); +} + + +// if we got command line options, check they look valid. +$validOptions = array ("quiet", "base-url", "directory", "include-binary", + "w3c-validate", "user", "password", "delete-passed-retests", + "rerun-failed-tests", "max-errors", + "max-runtime", "specific-test", "help" ); +if (!empty($options)) { + $unknownArgs = array_diff (array_keys($options), $validOptions); + foreach ($unknownArgs as $invalidArg) { + print "Ignoring invalid command-line option: --$invalidArg\n"; + } +} + + +///////////////////////////// CONFIGURATION //////////////////////////////////// + +// URL to some wiki on which we can run our tests. +if (!empty($options["base-url"])) { + define("WIKI_BASE_URL", $options["base-url"]); +} else { + define("WIKI_BASE_URL", $wgServer . $wgScriptPath . '/'); +} + +// The directory name where we store the output. +// Example for Windows: "c:\\temp\\wiki-fuzz" +if (!empty($options["directory"])) { + define("DIRECTORY", $options["directory"] ); +} else { + define("DIRECTORY", "{$wgUploadDirectory}/fuzz-tests"); +} + +// Should our test fuzz data include binary strings? +define("INCLUDE_BINARY", isset($options["include-binary"]) ); + +// Whether we want to validate HTML output on the web. +// At the moment very few generated pages will validate, so not recommended. +define("VALIDATE_ON_WEB", isset($options["w3c-validate"]) ); +// URL to use to validate our output: +define("VALIDATOR_URL", "http://validator.w3.org/check"); + +// Location of Tidy standalone executable. +define("PATH_TO_TIDY", "/usr/bin/tidy"); + +// The name of a user who has edited on your wiki. Used +// when testing the Special:Contributions and Special:Userlogin page. +if (!empty($options["user"])) { + define("USER_ON_WIKI", $options["user"] ); +} else { + define("USER_ON_WIKI", "nickj"); +} + +// The password of the above user. Used when testing the login page, +// and to do this we sometimes need to login successfully. +if (!empty($options["password"])) { + define("USER_PASSWORD", $options["password"] ); +} else { + // And no, this is not a valid password on any public wiki. + define("USER_PASSWORD", "nickj"); +} + +// If we have a test that failed, and then we run it again, and it passes, +// do you want to delete it or keep it? +define("DELETE_PASSED_RETESTS", isset($options["delete-passed-retests"]) ); + +// Do we want to rerun old saved tests at script startup? +// Set to true to help catch regressions, or false if you only want new stuff. +define("RERUN_OLD_TESTS", isset($options["rerun-failed-tests"]) ); + +// File where the database errors are logged. Should be defined in LocalSettings.php. +define("DB_ERROR_LOG_FILE", $wgDBerrorLog ); + +// Run in chatty mode (all output, default), or run in quiet mode (only prints out details of failed tests)? +define("QUIET", isset($options["quiet"]) ); + +// The maximum runtime, if specified. +if (!empty($options["max-runtime"]) && intval($options["max-runtime"])>0) { + define("MAX_RUNTIME", intval($options["max-runtime"]) ); +} + +// The maximum number of problems to find, if specified. Excludes retest errors. +if (!empty($options["max-errors"]) && intval($options["max-errors"])>0) { + define("MAX_ERRORS", intval($options["max-errors"]) ); +} + +// if the user has requested a specific test (instead of all tests), and the test they asked for looks valid. +if (!empty($options["specific-test"])) { + if (class_exists($options["specific-test"]) && get_parent_class($options["specific-test"])=="pageTest") { + define("SPECIFIC_TEST", $options["specific-test"] ); + } + else { + print "Ignoring invalid --specific-test\n"; + } +} + +// Define the file extensions we'll use: +define("PHP_TEST" , ".test.php"); +define("CURL_TEST", ".curl.sh" ); +define("DATA_FILE", ".data.bin"); +define("INFO_FILE", ".info.txt"); +define("HTML_FILE", ".wiki_preview.html"); + +// If it goes wrong, we want to know about it. +error_reporting(E_ALL | E_STRICT); + +//////////////// A CLASS THAT GENERATES RANDOM NASTY WIKI & HTML STRINGS ////////////////////// + +class wikiFuzz { + + // Only some HTML tags are understood with params by MediaWiki, the rest are ignored. + // List the tags that accept params below, as well as what those params are. + public static $data = array( + "B" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "CAPTION" => array("CLASS", "ID", "STYLE", "align", "lang", "dir", "title"), + "CENTER" => array("CLASS", "STYLE", "ID", "lang", "dir", "title"), + "DIV" => array("CLASS", "STYLE", "ID", "align", "lang", "dir", "title"), + "FONT" => array("CLASS", "STYLE", "ID", "lang", "dir", "title", "face", "size", "color"), + "H1" => array("STYLE", "CLASS", "ID", "align", "lang", "dir", "title"), + "H2" => array("STYLE", "CLASS", "ID", "align", "lang", "dir", "title"), + "HR" => array("STYLE", "CLASS", "ID", "WIDTH", "lang", "dir", "title", "size", "noshade"), + "LI" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "type", "value"), + "TABLE" => array("STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "BORDER", "CELLPADDING", + "CELLSPACING", "lang", "dir", "title", "summary", "frame", "rules"), + "TD" => array("STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "COLSPAN", "ROWSPAN", + "VALIGN", "abbr", "axis", "headers", "scope", "nowrap", "height", "lang", + "dir", "title", "char", "charoff"), + "TH" => array("STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "COLSPAN", "ROWSPAN", + "VALIGN", "abbr", "axis", "headers", "scope", "nowrap", "height", "lang", + "dir", "title", "char", "charoff"), + "TR" => array("CLASS", "STYLE", "ID", "BGCOLOR", "ALIGN", "VALIGN", "lang", "dir", "title", "char", "charoff"), + "UL" => array("CLASS", "STYLE", "ID", "lang", "dir", "title", "type"), + "P" => array("style", "class", "id", "align", "lang", "dir", "title"), + "blockquote" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "cite"), + "span" => array("CLASS", "ID", "STYLE", "align", "lang", "dir", "title"), + "code" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "tt" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "small" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "big" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "s" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "u" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "del" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "datetime", "cite"), + "ins" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "datetime", "cite"), + "sub" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "sup" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "ol" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "type", "start"), + "br" => array("CLASS", "ID", "STYLE", "title", "clear"), + "cite" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "var" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "dl" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "ruby" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "rt" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "rp" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "dt" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "dl" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "em" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "strong" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "i" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"), + "thead" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign'), + "tfoot" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign'), + "tbody" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign'), + "colgroup" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign', 'span', 'width'), + "col" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign', 'span', 'width'), + "pre" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "width"), + + // extension tags that accept parameters: + "sort" => array("order", "class"), + "ref" => array("name"), + "categorytree" => array("hideroot", "mode", "style"), + ); + + // The types of the HTML that we will be testing were defined above + // Note: this needs to be initialized later to be equal to: array_keys(wikiFuzz::$data); + // as such, it also needs to also be publicly modifiable. + public static $types; + + + // Some attribute values. + static private $other = array("&","=",":","?","\"","\n","%n%n%n%n%n%n%n%n%n%n%n%n","\\"); + static private $ints = array( + // various numbers + "0","-1","127","-7897","89000","808080","90928345", + "0xfffffff","ffff", + + // Different ways of saying: ' + "'", // Long UTF-8 Unicode encoding + "'", // dec version. + "'", // hex version. + "§", // malformed hex variant, MSB not zero. + + // Different ways of saying: " + """, // Long UTF-8 Unicode encoding + """, + """, // hex version. + "¢", // malformed hex variant, MSB not zero. + + // Different ways of saying: < + "<", + "<", // Long UTF-8 Unicode encoding without semicolon (Mediawiki wants the colon) + "<", // Long UTF-8 Unicode encoding with semicolon + "<", + "<", // hex version. + "¼", // malformed hex variant, MSB not zero. + "<", // mid-length hex version + "<", // slightly longer hex version, with capital "X" + + // Different ways of saying: > + ">", + ">", // Long UTF-8 Unicode encoding + ">", + ">", // hex version. + "¾", // malformed variant, MSB not zero. + + // Different ways of saying: [ + "[", // Long UTF-8 Unicode encoding + "[", + "[", // hex version. + + // Different ways of saying: {{ + "{{", // Long UTF-8 Unicode encoding + "{{", + "{{", // hex version. + + // Different ways of saying: | + "|", // Long UTF-8 Unicode encoding + "|", + "|", // hex version. + "ü", // malformed hex variant, MSB not zero. + + // a "lignature" - http://www.robinlionheart.com/stds/html4/spchars#ligature + "‌" + ); + + // Defines various wiki-related bits of syntax, that can potentially cause + // MediaWiki to do something other than just print that literal text. + static private $ext = array( + // links, templates, parameters. + "[[", "]]", "{{", "}}", "|", "[", "]", "{{{", "}}}", "|]]", + + // wiki tables. + "\n{|", "\n|}", + "!", + "\n!", + "!!", + "||", + "\n|-", "| ", "\n|", + + // section headings. + "=", "==", "===", "====", "=====", "======", + + // lists (ordered and unordered) and indentation. + "\n*", "*", "\n:", ":", + "\n#", "#", + + // definition lists (dl, dt, dd), newline, and newline with pre, and a tab. + "\n;", ";", "\n ", + + // Whitespace: newline, tab, space. + "\n", "\t", " ", + + // Some XSS attack vectors from http://ha.ckers.org/xss.html + " ", // tab + " ", // newline + " ", // carriage return + "\0", // null character + "  ", // spaces and meta characters + "'';!--\"=&{()}", // compact injection of XSS & SQL tester + + // various NULL fields + "%00", + "�", + "\0", + + // horizontal rule. + "-----", "\n-----", + + // signature, redirect, bold, italics. + "~~~~", "#REDIRECT [[", "'''", "''", + + // comments. + "", + + // quotes. + "\"", "'", + + // tag start and tag end. + "<", ">", + + // implicit link creation on URIs. + "http://", + "https://", + "ftp://", + "irc://", + "news:", + 'gopher://', + 'telnet://', + 'nntp://', + 'worldwind://', + 'mailto:', + + // images. + "[[image:", + ".gif", + ".png", + ".jpg", + ".jpeg", + 'thumbnail=', + 'thumbnail', + 'thumb=', + 'thumb', + 'right', + 'none', + 'left', + 'framed', + 'frame', + 'enframed', + 'centre', + 'center', + "Image:", + "[[:Image", + 'px', + + // misc stuff to throw at the Parser. + '%08X', + '/', + ":x{|", + "\n|+", + "", + "", + " \302\273", + " :", + " !", + " ;", + "\302\253", + "[[category:", + "?=", + "(", + ")", + "]]]", + "../", + "{{{{", + "}}}}", + "[[Special:", + "", + "", + "', + + // implicit link creation on booknum, RFC, and PubMed ID usage (both with and without IDs) + "ISBN 2", + "RFC 000", + "PMID 000", + "ISBN ", + "RFC ", + "PMID ", + + // magic words: + '__NOTOC__', + '__FORCETOC__', + '__NOEDITSECTION__', + '__START__', + '__NOTITLECONVERT__', + '__NOCONTENTCONVERT__', + '__END__', + '__TOC__', + '__NOTC__', + '__NOCC__', + "__FORCETOC__", + "__NEWSECTIONLINK__", + "__NOGALLERY__", + + // more magic words / internal templates. + '{{PAGENAME}}', + '{{PAGENAMEE}}', + '{{NAMESPACE}}', + "{{MSG:", + "}}", + "{{MSGNW:", + "}}", + "{{INT:", + "}}", + '{{SITENAME}}', + "{{NS:", + "}}", + "{{LOCALURL:", + "}}", + "{{LOCALURLE:", + "}}", + "{{SCRIPTPATH}}", + "{{GRAMMAR:gentiv|", + "}}", + "{{REVISIONID}}", + "{{SUBPAGENAME}}", + "{{SUBPAGENAMEE}}", + "{{ns:0}}", + "{{fullurle:", + "}}", + "{{subst:", + "}}", + "{{UCFIRST:", + "}}", + "{{UC:", + '{{SERVERNAME}}', + '{{SERVER}}', + "{{RAW:", + "}}", + "{{PLURAL:", + "}}", + "{{LCFIRST:", + "}}", + "{{LC:", + "}}", + '{{CURRENTWEEK}}', + '{{CURRENTDOW}}', + "{{INT:{{LC:contribs-showhideminor}}|", + "}}", + "{{INT:googlesearch|", + "}}", + "{{BASEPAGENAME}}", + "{{CONTENTLANGUAGE}}", + "{{PAGESINNAMESPACE:}}", + "{{#language:", + "}}", + + // Some raw link for magic words. + "{{NUMBEROFPAGES:R", + "}}", + "{{NUMBEROFUSERS:R", + "}}", + "{{NUMBEROFARTICLES:R", + "}}", + "{{NUMBEROFFILES:R", + "}}", + "{{NUMBEROFADMINS:R", + "}}", + "{{padleft:", + "}}", + "{{padright:", + "}}", + + // internal Math "extension": + "", + "", + + // Parser extension functions: + "{{#expr:", + "{{#if:", + "{{#ifeq:", + "{{#ifexist:", + "{{#ifexpr:", + "{{#switch:", + "{{#time:", + "}}", + + // references table for the Cite extension. + "", + + // Internal Parser tokens - try inserting some of these. + "UNIQ25f46b0524f13e67NOPARSE", + "UNIQ17197916557e7cd6-HTMLCommentStrip46238afc3bb0cf5f00000002", + "\x07UNIQ17197916557e7cd6-HTMLCommentStrip46238afc3bb0cf5f00000002-QINU", + + // Inputbox extension: + "\ntype=search\nsearchbuttonlabel=\n", + "", + + // charInsert extension: + "", + "", + + // wikiHiero extension: + "", + "", + + // Image gallery: + "", + "", + + // FixedImage: + "", + + // Timeline extension: currently untested. + + // Nowiki: + "", + "", + + // an external image to test the external image displaying code + "http://debian.org/Pics/debian.png", + ); + + /** + ** @desc: Randomly returns one element of the input array. + */ + static public function chooseInput(array $input) { + $randindex = wikiFuzz::randnum(count($input) - 1); + return $input[$randindex]; + } + + // Max number of parameters for HTML attributes. + static private $maxparams = 10; + + /** + ** @desc: Returns random number between finish and start. + */ + static public function randnum($finish,$start=0) { + return mt_rand($start,$finish); + } + + /** + ** @desc: Returns a mix of random text and random wiki syntax. + */ + static private function randstring() { + $thestring = ""; + + for ($i=0; $i<40; $i++) { + $what = wikiFuzz::randnum(1); + + if ($what == 0) { // include some random wiki syntax + $which = wikiFuzz::randnum(count(wikiFuzz::$ext) - 1); + $thestring .= wikiFuzz::$ext[$which]; + } + else { // include some random text + $char = INCLUDE_BINARY + // Decimal version: + // "&#" . wikiFuzz::randnum(255) . ";" + // Hex version: + ? "&#x" . str_pad(dechex(wikiFuzz::randnum(255)), wikiFuzz::randnum(2, 7), "0", STR_PAD_LEFT) . ";" + : chr(wikiFuzz::randnum(126,32)); + + $length = wikiFuzz::randnum(8); + $thestring .= str_repeat ($char, $length); + } + } + return $thestring; + } + + /** + ** @desc: Returns either random text, or random wiki syntax, or random data from "ints", + ** or random data from "other". + */ + static private function makestring() { + $what = wikiFuzz::randnum(2); + if ($what == 0) { + return wikiFuzz::randstring(); + } + elseif ($what == 1) { + return wikiFuzz::$ints[wikiFuzz::randnum(count(wikiFuzz::$ints) - 1)]; + } + else { + return wikiFuzz::$other[wikiFuzz::randnum(count(wikiFuzz::$other) - 1)]; + } + } + + + /** + ** @desc: Strips out the stuff that Mediawiki balks at in a page's title. + ** Implementation copied/pasted from cleanupTable.inc & cleanupImages.php + */ + static public function makeTitleSafe($str) { + $legalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF"; + return preg_replace_callback( + "/([^$legalTitleChars])/", + create_function( + // single quotes are essential here, + // or alternative escape all $ as \$ + '$matches', + 'return sprintf( "\\x%02x", ord( $matches[1] ) );' + ), + $str ); + } + + /** + ** @desc: Returns a string of fuzz text. + */ + static private function loop() { + switch ( wikiFuzz::randnum(3) ) { + case 1: // an opening tag, with parameters. + $string = ""; + $i = wikiFuzz::randnum(count(wikiFuzz::$types) - 1); + $t = wikiFuzz::$types[$i]; + $arr = wikiFuzz::$data[$t]; + $string .= "<" . $t . " "; + $num_params = min(wikiFuzz::$maxparams, count($arr)); + for ($z=0; $z<$num_params; $z++) { + $badparam = $arr[wikiFuzz::randnum(count($arr) - 1)]; + $badstring = wikiFuzz::makestring(); + $string .= $badparam . "=" . wikiFuzz::getRandQuote() . $badstring . wikiFuzz::getRandQuote() . " "; + } + $string .= ">\n"; + return $string; + case 2: // a closing tag. + $i = wikiFuzz::randnum(count(wikiFuzz::$types) - 1); + return ""; + case 3: // a random string, between tags. + return wikiFuzz::makeString(); + } + return ""; // catch-all, should never be called. + } + + /** + ** @desc: Returns one of the three styles of random quote: ', ", and nothing. + */ + static private function getRandQuote() { + switch ( wikiFuzz::randnum(3) ) { + case 1 : return "'"; + case 2 : return "\""; + default: return ""; + } + } + + /** + ** @desc: Returns fuzz text, with the parameter indicating approximately how many lines of text you want. + */ + static public function makeFuzz($maxtypes = 2) { + $page = ""; + for ($k=0; $k<$maxtypes; $k++) { + $page .= wikiFuzz::loop(); + } + return $page; + } +} + + +//////// MEDIAWIKI PAGES TO TEST, AND HOW TO TEST THEM /////// + +/** + ** @desc: A page test has just these things: + ** 1) Form parameters. + ** 2) the URL we are going to test those parameters on. + ** 3) Any cookies required for the test. + ** Declared abstract because it should be extended by a class + ** that supplies these parameters. + */ +abstract class pageTest { + protected $params; + protected $pagePath; + protected $cookie = ""; + + public function getParams() { + return $this->params; + } + + public function getPagePath() { + return $this->pagePath; + } + + public function getCookie() { + return $this->cookie; + } +} + + +/** + ** @desc: a page test for the "Edit" page. Tests Parser.php and Sanitizer.php. + */ +class editPageTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=WIKIFUZZ"; + + $this->params = array ( + "action" => "submit", + "wpMinoredit" => wikiFuzz::makeFuzz(2), + "wpPreview" => wikiFuzz::makeFuzz(2), + "wpSection" => wikiFuzz::makeFuzz(2), + "wpEdittime" => wikiFuzz::makeFuzz(2), + "wpSummary" => wikiFuzz::makeFuzz(2), + "wpScrolltop" => wikiFuzz::makeFuzz(2), + "wpStarttime" => wikiFuzz::makeFuzz(2), + "wpAutoSummary" => wikiFuzz::makeFuzz(2), + "wpTextbox1" => wikiFuzz::makeFuzz(40) // the main wiki text, need lots of this. + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpSection"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpEdittime"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpSummary"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpScrolltop"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpStarttime"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpAutoSummary"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpTextbox1"]); + } +} + + +/** + ** @desc: a page test for "Special:Listusers". + */ +class listusersTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Listusers"; + + $this->params = array ( + "title" => wikiFuzz::makeFuzz(2), + "group" => wikiFuzz::makeFuzz(2), + "username" => wikiFuzz::makeFuzz(2), + "Go" => wikiFuzz::makeFuzz(2), + "limit" => wikiFuzz::chooseInput( array("0", "-1", "---'----------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "offset" => wikiFuzz::chooseInput( array("0", "-1", "--------'-----0", "+1", "81343242346234234", wikiFuzz::makeFuzz(2)) ) + ); + } +} + + +/** + ** @desc: a page test for "Special:Search". + */ +class searchTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Search"; + + $this->params = array ( + "action" => "index.php/Special:Search", + "ns0" => wikiFuzz::makeFuzz(2), + "ns1" => wikiFuzz::makeFuzz(2), + "ns2" => wikiFuzz::makeFuzz(2), + "ns3" => wikiFuzz::makeFuzz(2), + "ns4" => wikiFuzz::makeFuzz(2), + "ns5" => wikiFuzz::makeFuzz(2), + "ns6" => wikiFuzz::makeFuzz(2), + "ns7" => wikiFuzz::makeFuzz(2), + "ns8" => wikiFuzz::makeFuzz(2), + "ns9" => wikiFuzz::makeFuzz(2), + "ns10" => wikiFuzz::makeFuzz(2), + "ns11" => wikiFuzz::makeFuzz(2), + "ns12" => wikiFuzz::makeFuzz(2), + "ns13" => wikiFuzz::makeFuzz(2), + "ns14" => wikiFuzz::makeFuzz(2), + "ns15" => wikiFuzz::makeFuzz(2), + "redirs" => wikiFuzz::makeFuzz(2), + "search" => wikiFuzz::makeFuzz(2), + "offset" => wikiFuzz::chooseInput( array("", "0", "-1", "--------'-----0", "+1", "81343242346234234", wikiFuzz::makeFuzz(2)) ), + "fulltext" => wikiFuzz::chooseInput( array("", "0", "1", "--------'-----0", "+1", wikiFuzz::makeFuzz(2)) ), + "searchx" => wikiFuzz::chooseInput( array("", "0", "1", "--------'-----0", "+1", wikiFuzz::makeFuzz(2)) ) + ); + } +} + + +/** + ** @desc: a page test for "Special:Recentchanges". + */ +class recentchangesTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Recentchanges"; + + $this->params = array ( + "action" => wikiFuzz::makeFuzz(2), + "title" => wikiFuzz::makeFuzz(2), + "namespace" => wikiFuzz::chooseInput( range(-1, 15) ), + "Go" => wikiFuzz::makeFuzz(2), + "invert" => wikiFuzz::chooseInput( array("-1", "---'----------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "hideanons" => wikiFuzz::chooseInput( array("-1", "------'-------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + 'limit' => wikiFuzz::chooseInput( array("0", "-1", "---------'----0", "+1", "81340909772349234", wikiFuzz::makeFuzz(2)) ), + "days" => wikiFuzz::chooseInput( array("-1", "----------'---0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "hideminor" => wikiFuzz::chooseInput( array("-1", "-----------'--0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "hidebots" => wikiFuzz::chooseInput( array("-1", "---------'----0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "hideliu" => wikiFuzz::chooseInput( array("-1", "-------'------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "hidepatrolled" => wikiFuzz::chooseInput( array("-1", "-----'--------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "hidemyself" => wikiFuzz::chooseInput( array("-1", "--'-----------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + 'categories_any'=> wikiFuzz::chooseInput( array("-1", "--'-----------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + 'categories' => wikiFuzz::chooseInput( array("-1", "--'-----------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + 'feed' => wikiFuzz::chooseInput( array("-1", "--'-----------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ) + ); + } +} + + +/** + ** @desc: a page test for "Special:Prefixindex". + */ +class prefixindexTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Prefixindex"; + + $this->params = array ( + "title" => "Special:Prefixindex", + "namespace" => wikiFuzz::randnum(-10,101), + "Go" => wikiFuzz::makeFuzz(2) + ); + + // sometimes we want 'prefix', sometimes we want 'from', and sometimes we want nothing. + if (wikiFuzz::randnum(3) == 0) { + $this->params["prefix"] = wikiFuzz::chooseInput( array("-1", "-----'--------0", "+++--+1", + wikiFuzz::randnum(-10,8134), wikiFuzz::makeFuzz(2)) ); + } + if (wikiFuzz::randnum(3) == 0) { + $this->params["from"] = wikiFuzz::chooseInput( array("-1", "-----'--------0", "+++--+1", + wikiFuzz::randnum(-10,8134), wikiFuzz::makeFuzz(2)) ); + } + } +} + + +/** + ** @desc: a page test for "Special:MIMEsearch". + */ +class mimeSearchTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:MIMEsearch"; + + $this->params = array ( + "action" => "/wiki/index.php/Special:MIMEsearch", + "mime" => wikiFuzz::makeFuzz(3), + 'limit' => wikiFuzz::chooseInput( array("0", "-1", "-------'------0", "+1", "81342321351235325", wikiFuzz::makeFuzz(2)) ), + 'offset' => wikiFuzz::chooseInput( array("0", "-1", "-----'--------0", "+1", "81341231235365252234324", wikiFuzz::makeFuzz(2)) ) + ); + } +} + + +/** + ** @desc: a page test for "Special:Log". + */ +class specialLogTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Log"; + + $this->params = array ( + "type" => wikiFuzz::chooseInput( array("", wikiFuzz::makeFuzz(2)) ), + "par" => wikiFuzz::makeFuzz(2), + "user" => wikiFuzz::makeFuzz(2), + "page" => wikiFuzz::makeFuzz(2), + "from" => wikiFuzz::makeFuzz(2), + "until" => wikiFuzz::makeFuzz(2), + "title" => wikiFuzz::makeFuzz(2) + ); + } +} + + +/** + ** @desc: a page test for "Special:Userlogin", with a successful login. + */ +class successfulUserLoginTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Userlogin&action=submitlogin&type=login&returnto=" . wikiFuzz::makeFuzz(2); + + $this->params = array ( + "wpName" => USER_ON_WIKI, + // sometimes real password, sometimes not: + 'wpPassword' => wikiFuzz::chooseInput( array( wikiFuzz::makeFuzz(2), USER_PASSWORD ) ), + 'wpRemember' => wikiFuzz::makeFuzz(2) + ); + + $this->cookie = "wikidb_session=" . wikiFuzz::chooseInput( array("1" , wikiFuzz::makeFuzz(2) ) ); + } +} + + +/** + ** @desc: a page test for "Special:Userlogin". + */ +class userLoginTest extends pageTest { + function __construct() { + + $this->pagePath = "index.php/Special:Userlogin"; + + $this->params = array ( + 'wpRetype' => wikiFuzz::makeFuzz(2), + 'wpRemember' => wikiFuzz::makeFuzz(2), + 'wpRealName' => wikiFuzz::makeFuzz(2), + 'wpPassword' => wikiFuzz::makeFuzz(2), + 'wpName' => wikiFuzz::makeFuzz(2), + 'wpMailmypassword'=> wikiFuzz::makeFuzz(2), + 'wpLoginattempt' => wikiFuzz::makeFuzz(2), + 'wpEmail' => wikiFuzz::makeFuzz(2), + 'wpDomain' => wikiFuzz::chooseInput( array("", "local", wikiFuzz::makeFuzz(2)) ), + 'wpCreateaccountMail' => wikiFuzz::chooseInput( array("", wikiFuzz::makeFuzz(2)) ), + 'wpCreateaccount' => wikiFuzz::chooseInput( array("", wikiFuzz::makeFuzz(2)) ), + 'wpCookieCheck' => wikiFuzz::chooseInput( array("", wikiFuzz::makeFuzz(2)) ), + 'type' => wikiFuzz::chooseInput( array("signup", "login", "", wikiFuzz::makeFuzz(2)) ), + 'returnto' => wikiFuzz::makeFuzz(2), + 'action' => wikiFuzz::chooseInput( array("", "submitlogin", wikiFuzz::makeFuzz(2)) ) + ); + + $this->cookie = "wikidb_session=" . wikiFuzz::chooseInput( array("1" , wikiFuzz::makeFuzz(2) ) ); + } +} + + +/** + ** @desc: a page test for "Special:Ipblocklist" (also includes unblocking) + */ +class ipblocklistTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Ipblocklist"; + + $this->params = array ( + 'wpUnblockAddress'=> wikiFuzz::makeFuzz(2), + 'ip' => wikiFuzz::chooseInput( array("20398702394", "", "Nickj2", wikiFuzz::makeFuzz(2), + // something like an IP address, sometimes invalid: + ( wikiFuzz::randnum(300,-20) . "." . wikiFuzz::randnum(300,-20) . "." + . wikiFuzz::randnum(300,-20) . "." .wikiFuzz::randnum(300,-20) ) ) ), + 'id' => wikiFuzz::makeFuzz(2), + 'wpUnblockReason' => wikiFuzz::makeFuzz(2), + 'action' => wikiFuzz::chooseInput( array(wikiFuzz::makeFuzz(2), "success", "submit", "unblock") ), + 'wpEditToken' => wikiFuzz::makeFuzz(2), + 'wpBlock' => wikiFuzz::chooseInput( array(wikiFuzz::makeFuzz(2), "") ), + 'limit' => wikiFuzz::chooseInput( array("0", "-1", "--------'-----0", "+1", + "09700982312351132098234", wikiFuzz::makeFuzz(2)) ), + 'offset' => wikiFuzz::chooseInput( array("0", "-1", "------'-------0", "+1", + "09700980982341535324234234", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(4) == 0) unset($this->params["action"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["ip"]); + if (wikiFuzz::randnum(2) == 0) unset($this->params["id"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["wpUnblockAddress"]); + } +} + + +/** + ** @desc: a page test for "Special:Newimages". + */ +class newImagesTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Newimages"; + + $this->params = array ( + 'hidebots' => wikiFuzz::chooseInput( array(wikiFuzz::makeFuzz(2), "1", "", "-1") ), + 'wpIlMatch' => wikiFuzz::makeFuzz(2), + 'until' => wikiFuzz::makeFuzz(2), + 'from' => wikiFuzz::makeFuzz(2) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["until"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["from"]); + } +} + + +/** + ** @desc: a page test for the "Special:Imagelist" page. + */ +class imagelistTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Imagelist"; + + $this->params = array ( + 'sort' => wikiFuzz::chooseInput( array("bysize", "byname" , "bydate", wikiFuzz::makeFuzz(2)) ), + 'limit' => wikiFuzz::chooseInput( array("0", "-1", "--------'-----0", "+1", "09700982312351132098234", wikiFuzz::makeFuzz(2)) ), + 'offset' => wikiFuzz::chooseInput( array("0", "-1", "------'-------0", "+1", "09700980982341535324234234", wikiFuzz::makeFuzz(2)) ), + 'wpIlMatch' => wikiFuzz::makeFuzz(2) + ); + } +} + + +/** + ** @desc: a page test for "Special:Export". + */ +class specialExportTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Export"; + + $this->params = array ( + 'action' => wikiFuzz::chooseInput( array("submit", "", wikiFuzz::makeFuzz(2)) ), + 'pages' => wikiFuzz::makeFuzz(2), + 'curonly' => wikiFuzz::chooseInput( array("", "0", "-1", wikiFuzz::makeFuzz(2)) ), + 'listauthors' => wikiFuzz::chooseInput( array("", "0", "-1", wikiFuzz::makeFuzz(2)) ), + 'history' => wikiFuzz::chooseInput( array("0", "-1", "------'-------0", "+1", "09700980982341535324234234", wikiFuzz::makeFuzz(2)) ), + + ); + + // For the time being, need to disable "submit" action as Tidy barfs on MediaWiki's XML export. + if ($this->params['action'] == 'submit') $this->params['action'] = ''; + + // Sometimes remove the history field. + if (wikiFuzz::randnum(2) == 0) unset($this->params["history"]); + } +} + + +/** + ** @desc: a page test for "Special:Booksources". + */ +class specialBooksourcesTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Booksources"; + + $this->params = array ( + 'go' => wikiFuzz::makeFuzz(2), + // ISBN codes have to contain some semi-numeric stuff or will be ignored: + 'isbn' => "0X0" . wikiFuzz::makeFuzz(2) + ); + } +} + + +/** + ** @desc: a page test for "Special:Allpages". + */ +class specialAllpagesTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special%3AAllpages"; + + $this->params = array ( + 'from' => wikiFuzz::makeFuzz(2), + 'namespace' => wikiFuzz::chooseInput( range(-1, 15) ), + 'go' => wikiFuzz::makeFuzz(2) + ); + } +} + + +/** + ** @desc: a page test for the page History. + */ +class pageHistoryTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Main_Page&action=history"; + + $this->params = array ( + 'limit' => wikiFuzz::chooseInput( array("-1", "0", "-------'------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + 'offset' => wikiFuzz::chooseInput( array("-1", "0", "------'-------0", "+1", "9823412312312412435", wikiFuzz::makeFuzz(2)) ), + "go" => wikiFuzz::chooseInput( array("first", "last", wikiFuzz::makeFuzz(2)) ), + "dir" => wikiFuzz::chooseInput( array("prev", "next", wikiFuzz::makeFuzz(2)) ), + "diff" => wikiFuzz::chooseInput( array("-1", "--------'-----0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "oldid" => wikiFuzz::chooseInput( array("prev", "-1", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + "feed" => wikiFuzz::makeFuzz(2) + ); + } +} + + +/** + ** @desc: a page test for the Special:Contributions". + */ +class contributionsTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Contributions/" . USER_ON_WIKI; + + $this->params = array ( + 'target' => wikiFuzz::chooseInput( array(wikiFuzz::makeFuzz(2), "newbies") ), + 'namespace' => wikiFuzz::chooseInput( array(-1, 15, 1, wikiFuzz::makeFuzz(2)) ), + 'offset' => wikiFuzz::chooseInput( array("0", "-1", "------'-------0", "+1", "982342131232131231241", wikiFuzz::makeFuzz(2)) ), + 'bot' => wikiFuzz::chooseInput( array("", "-1", "0", "1", wikiFuzz::makeFuzz(2)) ), + 'go' => wikiFuzz::chooseInput( array("-1", 'prev', 'next', wikiFuzz::makeFuzz(2)) ) + ); + } +} + + +/** + ** @desc: a page test for viewing a normal page, whilst posting various params. + */ +class viewPageTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Main_Page"; + + $this->params = array ( + "useskin" => wikiFuzz::chooseInput( array("chick", "cologneblue", "myskin", + "nostalgia", "simple", "standard", wikiFuzz::makeFuzz(2)) ), + "uselang" => wikiFuzz::chooseInput( array( wikiFuzz::makeFuzz(2), + "ab", "af", "an", "ar", "arc", "as", "ast", "av", "ay", "az", "ba", + "bat-smg", "be", "bg", "bm", "bn", "bo", "bpy", "br", "bs", "ca", + "ce", "cs", "csb", "cv", "cy", "da", "de", "dv", "dz", "el", "en", + "eo", "es", "et", "eu", "fa", "fi", "fo", "fr", "fur", "fy", "ga", + "gn", "gsw", "gu", "he", "hi", "hr", "hu", "ia", "id", "ii", "is", + "it", "ja", "jv", "ka", "km", "kn", "ko", "ks", "ku", "kv", "la", + "li", "lo", "lt", "lv", "mk", "ml", "ms", "nah", "nap", "nds", + "nds-nl", "nl", "nn", "no", "non", "nv", "oc", "or", "os", "pa", + "pl", "pms", "ps", "pt", "pt-br", "qu", "rmy", "ro", "ru", "sc", + "sd", "sk", "sl", "sq", "sr", "sr-ec", "sr-el", "sr-jc", "sr-jl", + "su", "sv", "ta", "te", "th", "tlh", "tr", "tt", "ty", "tyv", "udm", + "ug", "uk", "ur", "utf8", "vec", "vi", "wa", "xal", "yi", "za", + "zh", "zh-cn", "zh-hk", "zh-sg", "zh-tw", "zh-tw") ), + "returnto" => wikiFuzz::makeFuzz(2), + "feed" => wikiFuzz::chooseInput( array("atom", "rss", wikiFuzz::makeFuzz(2)) ), + "rcid" => wikiFuzz::makeFuzz(2), + "action" => wikiFuzz::chooseInput( array("view", "raw", "render", wikiFuzz::makeFuzz(2), "markpatrolled") ), + "printable" => wikiFuzz::makeFuzz(2), + "oldid" => wikiFuzz::makeFuzz(2), + "redirect" => wikiFuzz::makeFuzz(2), + "diff" => wikiFuzz::makeFuzz(2), + "search" => wikiFuzz::makeFuzz(2), + "rdfrom" => wikiFuzz::makeFuzz(2), // things from Article.php from here on: + "token" => wikiFuzz::makeFuzz(2), + "tbid" => wikiFuzz::makeFuzz(2), + "action" => wikiFuzz::chooseInput( array("purge", wikiFuzz::makeFuzz(2)) ), + "wpReason" => wikiFuzz::makeFuzz(2), + "wpEditToken" => wikiFuzz::makeFuzz(2), + "from" => wikiFuzz::makeFuzz(2), + "bot" => wikiFuzz::makeFuzz(2), + "summary" => wikiFuzz::makeFuzz(2), + "direction" => wikiFuzz::chooseInput( array("next", "prev", wikiFuzz::makeFuzz(2)) ), + "section" => wikiFuzz::makeFuzz(2), + "preload" => wikiFuzz::makeFuzz(2), + + ); + + // Tidy does not know how to valid atom or rss, so exclude from testing for the time being. + if ($this->params["feed"] == "atom") unset($this->params["feed"]); + else if ($this->params["feed"] == "rss") unset($this->params["feed"]); + + // Raw pages cannot really be validated + if ($this->params["action"] == "raw") unset($this->params["action"]); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["rcid"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["diff"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["rdfrom"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["oldid"]); + + // usually don't want action == purge. + if (wikiFuzz::randnum(6) > 1) unset($this->params["action"]); + } +} + + +/** + ** @desc: a page test for "Special:Allmessages". + */ +class specialAllmessagesTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Allmessages"; + + // only really has one parameter + $this->params = array ( + "ot" => wikiFuzz::chooseInput( array("php", "html", wikiFuzz::makeFuzz(2)) ) + ); + } +} + +/** + ** @desc: a page test for "Special:Newpages". + */ +class specialNewpages extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Newpages"; + + $this->params = array ( + "namespace" => wikiFuzz::chooseInput( range(-1, 15) ), + "feed" => wikiFuzz::chooseInput( array("atom", "rss", wikiFuzz::makeFuzz(2)) ), + 'limit' => wikiFuzz::chooseInput( array("-1", "0", "-------'------0", "+1", "8134", wikiFuzz::makeFuzz(2)) ), + 'offset' => wikiFuzz::chooseInput( array("-1", "0", "------'-------0", "+1", "9823412312312412435", wikiFuzz::makeFuzz(2)) ) + ); + + // Tidy does not know how to valid atom or rss, so exclude from testing for the time being. + if ($this->params["feed"] == "atom") unset($this->params["feed"]); + else if ($this->params["feed"] == "rss") unset($this->params["feed"]); + } +} + +/** + ** @desc: a page test for "redirect.php" + */ +class redirectTest extends pageTest { + function __construct() { + $this->pagePath = "redirect.php"; + + $this->params = array ( + "wpDropdown" => wikiFuzz::makeFuzz(2) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpDropdown"]); + } +} + + +/** + ** @desc: a page test for "Special:Confirmemail" + */ +class confirmEmail extends pageTest { + function __construct() { + // sometimes we send a bogus confirmation code, and sometimes we don't. + $this->pagePath = "index.php?title=Special:Confirmemail" . wikiFuzz::chooseInput( array("", "/" . wikiFuzz::makeTitleSafe(wikiFuzz::makeFuzz(1)) ) ); + + $this->params = array ( + "token" => wikiFuzz::makeFuzz(2) + ); + } +} + + +/** + ** @desc: a page test for "Special:Watchlist" + ** Note: this test would be better if we were logged in. + */ +class watchlistTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Watchlist"; + + $this->params = array ( + "remove" => wikiFuzz::chooseInput( array("Remove checked items from watchlist", wikiFuzz::makeFuzz(2))), + 'days' => wikiFuzz::chooseInput( array(0, -1, -230, "--", 3, 9, wikiFuzz::makeFuzz(2)) ), + 'hideOwn' => wikiFuzz::chooseInput( array("", "0", "1", wikiFuzz::makeFuzz(2)) ), + 'hideBots' => wikiFuzz::chooseInput( array("", "0", "1", wikiFuzz::makeFuzz(2)) ), + 'namespace'=> wikiFuzz::chooseInput( array("", "0", "1", wikiFuzz::makeFuzz(2)) ), + 'action' => wikiFuzz::chooseInput( array("submit", "clear", wikiFuzz::makeFuzz(2)) ), + 'id[]' => wikiFuzz::makeFuzz(2), + 'edit' => wikiFuzz::makeFuzz(2), + 'token' => wikiFuzz::chooseInput( array("", "1243213", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we specifiy "reset", and sometimes we don't. + if (wikiFuzz::randnum(3) == 0) $this->params["reset"] = wikiFuzz::chooseInput( array("", "0", "1", wikiFuzz::makeFuzz(2)) ); + } +} + + +/** + ** @desc: a page test for "Special:Blockme" + */ +class specialBlockmeTest extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Blockme"; + + $this->params = array ( ); + + // sometimes we specify "ip", and sometimes we don't. + if (wikiFuzz::randnum(1) == 0) { + $this->params["ip"] = wikiFuzz::chooseInput( array("10.12.41.213", wikiFuzz::randnum(-10,8134), wikiFuzz::makeFuzz(2)) ); + } + } +} + + +/** + ** @desc: a page test for "Special:Movepage" + */ +class specialMovePage extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Movepage"; + + $this->params = array ( + "action" => wikiFuzz::chooseInput( array("success", "submit", "", wikiFuzz::makeFuzz(2)) ), + 'wpEditToken' => wikiFuzz::chooseInput( array('', 0, 34987987, wikiFuzz::makeFuzz(2)) ), + 'target' => wikiFuzz::chooseInput( array("x", wikiFuzz::makeTitleSafe(wikiFuzz::makeFuzz(2)) ) ), + 'wpOldTitle' => wikiFuzz::chooseInput( array("z", wikiFuzz::makeTitleSafe(wikiFuzz::makeFuzz(2)), wikiFuzz::makeFuzz(2) ) ), + 'wpNewTitle' => wikiFuzz::chooseInput( array("y", wikiFuzz::makeTitleSafe(wikiFuzz::makeFuzz(2)), wikiFuzz::makeFuzz(2) ) ), + 'wpReason' => wikiFuzz::chooseInput( array(wikiFuzz::makeFuzz(2)) ), + 'wpMovetalk' => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + 'wpDeleteAndMove' => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + 'wpConfirm' => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + 'talkmoved' => wikiFuzz::chooseInput( array("1", wikiFuzz::makeFuzz(2), "articleexists", 'notalkpage') ), + 'oldtitle' => wikiFuzz::makeFuzz(2), + 'newtitle' => wikiFuzz::makeFuzz(2), + 'wpMovetalk' => wikiFuzz::chooseInput( array("1", "0", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(2) == 0) unset($this->params["wpEditToken"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["target"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["wpNewTitle"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpReason"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpOldTitle"]); + } +} + + +/** + ** @desc: a page test for "Special:Undelete" + */ +class specialUndelete extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Undelete"; + + $this->params = array ( + "action" => wikiFuzz::chooseInput( array("submit", "", wikiFuzz::makeFuzz(2)) ), + 'wpEditToken' => wikiFuzz::chooseInput( array('', 0, 34987987, wikiFuzz::makeFuzz(2)) ), + 'target' => wikiFuzz::chooseInput( array("x", wikiFuzz::makeTitleSafe(wikiFuzz::makeFuzz(2)) ) ), + 'timestamp' => wikiFuzz::chooseInput( array("125223", wikiFuzz::makeFuzz(2) ) ), + 'file' => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + 'restore' => wikiFuzz::chooseInput( array("0", "1", wikiFuzz::makeFuzz(2)) ), + 'preview' => wikiFuzz::chooseInput( array("0", "1", wikiFuzz::makeFuzz(2)) ), + 'wpComment' => wikiFuzz::makeFuzz(2) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(2) == 0) unset($this->params["wpEditToken"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["target"]); + if (wikiFuzz::randnum(1) == 0) unset($this->params["restore"]); + if (wikiFuzz::randnum(1) == 0) unset($this->params["preview"]); + } +} + + +/** + ** @desc: a page test for "Special:Unlockdb" + */ +class specialUnlockdb extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Unlockdb"; + + $this->params = array ( + "action" => wikiFuzz::chooseInput( array("submit", "success", "", wikiFuzz::makeFuzz(2)) ), + 'wpEditToken' => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + 'wpLockConfirm' => wikiFuzz::chooseInput( array("0", "1", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpEditToken"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["action"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpLockConfirm"]); + } +} + + +/** + ** @desc: a page test for "Special:Lockdb" + */ +class specialLockdb extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Lockdb"; + + $this->params = array ( + "action" => wikiFuzz::chooseInput( array("submit", "success", "", wikiFuzz::makeFuzz(2)) ), + 'wpEditToken' => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + 'wpLockReason' => wikiFuzz::makeFuzz(2), + 'wpLockConfirm'=> wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpEditToken"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["action"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpLockConfirm"]); + } +} + + +/** + ** @desc: a page test for "Special:Userrights" + */ +class specialUserrights extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Userrights"; + + $this->params = array ( + 'wpEditToken' => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + 'user-editname' => wikiFuzz::chooseInput( array("Nickj2", "Nickj2\n", wikiFuzz::makeFuzz(2)) ), + 'ssearchuser' => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + 'saveusergroups'=> wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)), "Save User Groups"), + 'member[]' => wikiFuzz::chooseInput( array("0", "bot", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "available[]" => wikiFuzz::chooseInput( array("0", "sysop", "bureaucrat", "1", "++--34234", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(3) == 0) unset($this->params['ssearchuser']); + if (wikiFuzz::randnum(3) == 0) unset($this->params['saveusergroups']); + } +} + + +/** + ** @desc: a test for page protection and unprotection. + */ +class pageProtectionForm extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Main_Page"; + + $this->params = array ( + "action" => "protect", + 'wpEditToken' => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + "mwProtect-level-edit" => wikiFuzz::chooseInput( array('', 'autoconfirmed', 'sysop', wikifuzz::makeFuzz(2)) ), + "mwProtect-level-move" => wikiFuzz::chooseInput( array('', 'autoconfirmed', 'sysop', wikifuzz::makeFuzz(2)) ), + "mwProtectUnchained" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + 'mwProtect-reason' => wikiFuzz::chooseInput( array("because it was there", wikifuzz::makeFuzz(2)) ) + ); + + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(3) == 0) unset($this->params["mwProtectUnchained"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params['mwProtect-reason']); + } +} + + +/** + ** @desc: a page test for "Special:Blockip". + */ +class specialBlockip extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Blockip"; + + $this->params = array ( + "action" => wikiFuzz::chooseInput( array("submit", "", wikiFuzz::makeFuzz(2)) ), + 'wpEditToken' => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + "wpBlockAddress" => wikiFuzz::chooseInput( array("20398702394", "", "Nickj2", wikiFuzz::makeFuzz(2), + // something like an IP address, sometimes invalid: + ( wikiFuzz::randnum(300,-20) . "." . wikiFuzz::randnum(300,-20) . "." + . wikiFuzz::randnum(300,-20) . "." .wikiFuzz::randnum(300,-20) ) ) ), + "ip" => wikiFuzz::chooseInput( array("20398702394", "", "Nickj2", wikiFuzz::makeFuzz(2), + // something like an IP address, sometimes invalid: + ( wikiFuzz::randnum(300,-20) . "." . wikiFuzz::randnum(300,-20) . "." + . wikiFuzz::randnum(300,-20) . "." .wikiFuzz::randnum(300,-20) ) ) ), + "wpBlockOther" => wikiFuzz::chooseInput( array('', 'Nickj2', wikifuzz::makeFuzz(2)) ), + "wpBlockExpiry" => wikiFuzz::chooseInput( array("other", "2 hours", "1 day", "3 days", "1 week", "2 weeks", + "1 month", "3 months", "6 months", "1 year", "infinite", wikiFuzz::makeFuzz(2)) ), + "wpBlockReason" => wikiFuzz::chooseInput( array("because it was there", wikifuzz::makeFuzz(2)) ), + "wpAnonOnly" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "wpCreateAccount" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "wpBlock" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ) + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpBlockOther"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpBlockExpiry"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpBlockReason"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpAnonOnly"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpCreateAccount"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["wpBlockAddress"]); + if (wikiFuzz::randnum(4) == 0) unset($this->params["ip"]); + } +} + + +/** + ** @desc: a test for the imagepage. + */ +class imagepageTest extends pageTest { + function __construct() { + $this->pagePath = "index.php/Image:Small-email.png"; + + $this->params = array ( + "image" => wikiFuzz::chooseInput( array("Small-email.png", wikifuzz::makeFuzz(2)) ), + "wpReason" => wikifuzz::makeFuzz(2), + "oldimage" => wikiFuzz::chooseInput( array("Small-email.png", wikifuzz::makeFuzz(2)) ), + "wpEditToken" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["image"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpReason"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["oldimage"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpEditToken"]); + } +} + + +/** + ** @desc: a test for page deletion form. + */ +class pageDeletion extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Main_Page&action=delete"; + + $this->params = array ( + "wpEditToken" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + "wpReason" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "wpConfirm" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(5) == 0) unset($this->params["wpReason"]); + if (wikiFuzz::randnum(5) == 0) unset($this->params["wpEditToken"]); + if (wikiFuzz::randnum(5) == 0) unset($this->params["wpConfirm"]); + } +} + + + +/** + ** @desc: a test for Revision Deletion. + */ +class specialRevisionDelete extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Revisiondelete"; + + $this->params = array ( + "target" => wikiFuzz::chooseInput( array("Main Page", wikifuzz::makeFuzz(2)) ), + "oldid" => wikifuzz::makeFuzz(2), + "oldid[]" => wikifuzz::makeFuzz(2), + "wpReason" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "revdelete-hide-text" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "revdelete-hide-comment" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "revdelete-hide-user" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "revdelete-hide-restricted" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(3) == 0) unset($this->params["target"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["oldid"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["oldid[]"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["wpReason"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["revdelete-hide-text"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["revdelete-hide-comment"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["revdelete-hide-user"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["revdelete-hide-restricted"]); + } +} + + +/** + ** @desc: a test for Special:Import. + */ +class specialImport extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Import"; + + $this->params = array ( + "action" => "submit", + "source" => wikiFuzz::chooseInput( array("upload", "interwiki", wikifuzz::makeFuzz(2)) ), + "MAX_FILE_SIZE" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikifuzz::makeFuzz(2)) ), + "xmlimport" => wikiFuzz::chooseInput( array("/var/www/hosts/mediawiki/wiki/AdminSettings.php", "1", "++--34234", wikiFuzz::makeFuzz(2)) ), + "namespace" => wikiFuzz::chooseInput( array(wikiFuzz::randnum(30,-6), wikiFuzz::makeFuzz(2)) ), + "interwiki" => wikiFuzz::makeFuzz(2), + "interwikiHistory" => wikiFuzz::makeFuzz(2), + "frompage" => wikiFuzz::makeFuzz(2), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["action"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["source"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["MAX_FILE_SIZE"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["xmlimport"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["interwiki"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["interwikiHistory"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["frompage"]); + + // Note: Need to do a file upload to fully test this Special page. + } +} + + + +/** + ** @desc: a test for thumb.php + */ +class thumbTest extends pageTest { + function __construct() { + $this->pagePath = "thumb.php"; + + $this->params = array ( + "f" => wikiFuzz::chooseInput( array("..", "\\", "small-email.png", wikifuzz::makeFuzz(2)) ), + "w" => wikiFuzz::chooseInput( array("80", wikiFuzz::randnum(6000,-200), wikifuzz::makeFuzz(2)) ), + "r" => wikiFuzz::chooseInput( array("0", wikifuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["f"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["w"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["r"]); + } +} + + +/** + ** @desc: a test for trackback.php + */ +class trackbackTest extends pageTest { + function __construct() { + $this->pagePath = "trackback.php"; + + $this->params = array ( + "url" => wikifuzz::makeFuzz(2), + "blog_name" => wikiFuzz::chooseInput( array("80", wikiFuzz::randnum(6000,-200), wikifuzz::makeFuzz(2)) ), + "article" => wikiFuzz::chooseInput( array("Main Page", wikifuzz::makeFuzz(2)) ), + "title" => wikiFuzz::chooseInput( array("Main Page", wikifuzz::makeFuzz(2)) ), + "excerpt" => wikifuzz::makeFuzz(2), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(3) == 0) unset($this->params["title"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["excerpt"]); + } +} + + +/** + ** @desc: a test for profileinfo.php + */ +class profileInfo extends pageTest { + function __construct() { + $this->pagePath = "profileinfo.php"; + + $this->params = array ( + "expand" => wikifuzz::makeFuzz(2), + "sort" => wikiFuzz::chooseInput( array("time", "count", "name", wikifuzz::makeFuzz(2)) ), + "filter" => wikiFuzz::chooseInput( array("Main Page", wikifuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(3) == 0) unset($this->params["sort"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["filter"]); + } +} + + +/** + ** @desc: a test for Special:Cite (extension Special page). + */ +class specialCite extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:Cite"; + + $this->params = array ( + "page" => wikiFuzz::chooseInput( array("\" onmouseover=\"alert(1);\"", "Main Page", wikifuzz::makeFuzz(2)) ), + "id" => wikiFuzz::chooseInput( array("-1", "0", "------'-------0", "+1", "-9823412312312412435", wikiFuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(6) == 0) unset($this->params["page"]); + if (wikiFuzz::randnum(6) == 0) unset($this->params["id"]); + } +} + + +/** + ** @desc: a test for Special:Filepath (extension Special page). + */ +class specialFilepath extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Filepath"; + + $this->params = array ( + "file" => wikiFuzz::chooseInput( array("Small-email.png", "Small-email.png" . wikifuzz::makeFuzz(1), wikiFuzz::makeFuzz(2)) ), + ); + } +} + + +/** + ** @desc: a test for Special:Makebot (extension Special page). + */ +class specialMakebot extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Makebot"; + + $this->params = array ( + "username" => wikiFuzz::chooseInput( array("Nickj2", "192.168.0.2", wikifuzz::makeFuzz(1) ) ), + "dosearch" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikifuzz::makeFuzz(2)) ), + "grant" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikifuzz::makeFuzz(2)) ), + "comment" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + "token" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(2) == 0) unset($this->params["dosearch"]); + if (wikiFuzz::randnum(2) == 0) unset($this->params["grant"]); + if (wikiFuzz::randnum(5) == 0) unset($this->params["token"]); + } +} + + +/** + ** @desc: a test for Special:Makesysop (extension Special page). + */ +class specialMakesysop extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Makesysop"; + + $this->params = array ( + "wpMakesysopUser" => wikiFuzz::chooseInput( array("Nickj2", "192.168.0.2", wikifuzz::makeFuzz(1) ) ), + "action" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikifuzz::makeFuzz(2)) ), + "wpMakesysopSubmit" => wikiFuzz::chooseInput( array("0", "1", "++--34234", wikifuzz::makeFuzz(2)) ), + "wpEditToken" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + "wpSetBureaucrat" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(3) == 0) unset($this->params["wpMakesysopSubmit"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["wpEditToken"]); + if (wikiFuzz::randnum(3) == 0) unset($this->params["wpSetBureaucrat"]); + } +} + + +/** + ** @desc: a test for Special:Renameuser (extension Special page). + */ +class specialRenameuser extends pageTest { + function __construct() { + $this->pagePath = "index.php/Special:Renameuser"; + + $this->params = array ( + "oldusername" => wikiFuzz::chooseInput( array("Nickj2", "192.168.0.2", wikifuzz::makeFuzz(1) ) ), + "newusername" => wikiFuzz::chooseInput( array("Nickj2", "192.168.0.2", wikifuzz::makeFuzz(1) ) ), + "token" => wikiFuzz::chooseInput( array("20398702394", "", wikiFuzz::makeFuzz(2)) ), + ); + } +} + + +/** + ** @desc: a test for Special:Linksearch (extension Special page). + */ +class specialLinksearch extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special%3ALinksearch"; + + $this->params = array ( + "target" => wikifuzz::makeFuzz(2), + ); + + // sometimes we don't want to specify certain parameters. + if (wikiFuzz::randnum(10) == 0) unset($this->params["target"]); + } +} + + +/** + ** @desc: a test for Special:CategoryTree (extension Special page). + */ +class specialCategoryTree extends pageTest { + function __construct() { + $this->pagePath = "index.php?title=Special:CategoryTree"; + + $this->params = array ( + "target" => wikifuzz::makeFuzz(2), + "from" => wikifuzz::makeFuzz(2), + "until" => wikifuzz::makeFuzz(2), + "showas" => wikifuzz::makeFuzz(2), + "mode" => wikiFuzz::chooseInput( array("pages", "categories", "all", wikifuzz::makeFuzz(2)) ), + ); + + // sometimes we do want to specify certain parameters. + if (wikiFuzz::randnum(5) == 0) $this->params["notree"] = wikiFuzz::chooseInput( array("1", 0, "", wikiFuzz::makeFuzz(2)) ); + } +} + + + +/** + ** @desc: selects a page test to run. + */ +function selectPageTest($count) { + + // if the user only wants a specific test, then only ever give them that. + if (defined("SPECIFIC_TEST")) { + $testType = SPECIFIC_TEST; + return new $testType (); + } + + // Some of the time we test Special pages, the remaining + // time we test using the standard edit page. + switch ($count % 100) { + case 0 : return new successfulUserLoginTest(); + case 1 : return new listusersTest(); + case 2 : return new searchTest(); + case 3 : return new recentchangesTest(); + case 4 : return new prefixindexTest(); + case 5 : return new mimeSearchTest(); + case 6 : return new specialLogTest(); + case 7 : return new userLoginTest(); + case 8 : return new ipblocklistTest(); + case 9 : return new newImagesTest(); + case 10: return new imagelistTest(); + case 11: return new specialExportTest(); + case 12: return new specialBooksourcesTest(); + case 13: return new specialAllpagesTest(); + case 14: return new pageHistoryTest(); + case 15: return new contributionsTest(); + case 16: return new viewPageTest(); + case 17: return new specialAllmessagesTest(); + case 18: return new specialNewpages(); + case 19: return new searchTest(); + case 20: return new redirectTest(); + case 21: return new confirmEmail(); + case 22: return new watchlistTest(); + case 23: return new specialBlockmeTest(); + case 24: return new specialUndelete(); + case 25: return new specialMovePage(); + case 26: return new specialUnlockdb(); + case 27: return new specialLockdb(); + case 28: return new specialUserrights(); + case 29: return new pageProtectionForm(); + case 30: return new specialBlockip(); + case 31: return new imagepageTest(); + case 32: return new pageDeletion(); + case 33: return new specialRevisionDelete(); + case 34: return new specialImport(); + case 35: return new thumbTest(); + case 36: return new trackbackTest(); + case 37: return new profileInfo(); + case 38: return new specialCite(); + case 39: return new specialFilepath(); + case 40: return new specialMakebot(); + case 41: return new specialMakesysop(); + case 42: return new specialRenameuser(); + case 43: return new specialLinksearch(); + case 44: return new specialCategoryTree(); + default: return new editPageTest(); + } +} + + +/////////////////////// SAVING OUTPUT ///////////////////////// + +/** + ** @desc: Utility function for saving a file. Currently has no error checking. + */ +function saveFile($data, $name) { + file_put_contents($name, $data); +} + + +/** + ** @desc: Returns a test as an experimental GET-to-POST URL. + ** This doesn't seem to always work though, and sometimes the output is too long + ** to be a valid GET URL, so we also save in other formats. + */ +function getAsURL(pageTest $test) { + $used_question_mark = (strpos($test->getPagePath(), "?") !== false); + $retval = "http://get-to-post.nickj.org/?http://" . WIKI_BASE_URL . $test->getPagePath(); + foreach ($test->getParams() as $param => $value) { + if (!$used_question_mark) { + $retval .= "?"; + $used_question_mark = true; + } + else { + $retval .= "&"; + } + $retval .= $param . "=" . urlencode($value); + } + return $retval; +} + + +/** + ** @desc: Saves a plain-text human-readable version of a test. + */ +function saveTestAsText(pageTest $test, $filename) { + $str = "Test: " . $test->getPagePath(); + foreach ($test->getParams() as $param => $value) { + $str .= "\n$param: $value"; + } + $str .= "\nGet-to-post URL: " . getAsURL($test) . "\n"; + saveFile($str, $filename); +} + + +/** + ** @desc: Saves a test as a standalone basic PHP script that shows this one problem. + ** Resulting script requires PHP-Curl be installed in order to work. + */ +function saveTestAsPHP(pageTest $test, $filename) { + $str = "getParams()), true) . ";\n" + . "\$ch = curl_init();\n" + . "curl_setopt(\$ch, CURLOPT_POST, 1);\n" + . "curl_setopt(\$ch, CURLOPT_POSTFIELDS, \$params );\n" + . "curl_setopt(\$ch, CURLOPT_URL, " . var_export(WIKI_BASE_URL . $test->getPagePath(), true) . ");\n" + . "curl_setopt(\$ch, CURLOPT_RETURNTRANSFER,1);\n" + . ($test->getCookie() ? "curl_setopt(\$ch, CURLOPT_COOKIE, " . var_export($test->getCookie(), true) . ");\n" : "") + . "\$result=curl_exec(\$ch);\n" + . "curl_close (\$ch);\n" + . "print \$result;\n" + . "?>\n"; + saveFile($str, $filename); +} + + +/** + ** @desc: Escapes a value so that it can be used on the command line by Curl. + ** Specifically, "<" and "@" need to be escaped if they are the first character, + ** otherwise curl interprets these as meaning that we want to insert a file. + */ +function escapeForCurl(array $input_params) { + $output_params = array(); + foreach ($input_params as $param => $value) { + if (strlen($value) > 0 && ( $value[0] == "@" || $value[0] == "<")) { + $value = "\\" . $value; + } + $output_params[$param] = $value; + } + return $output_params; +} + + +/** + ** @desc: Saves a test as a standalone CURL shell script that shows this one problem. + ** Resulting script requires standalone Curl be installed in order to work. + */ +function saveTestAsCurl(pageTest $test, $filename) { + $str = "#!/bin/bash\n" + . "curl --silent --include --globoff \\\n" + . ($test->getCookie() ? " --cookie " . escapeshellarg($test->getCookie()) . " \\\n" : ""); + foreach (escapeForCurl($test->getParams()) as $param => $value) { + $str .= " -F " . escapeshellarg($param) . "=" . escapeshellarg($value) . " \\\n"; + } + $str .= " " . escapeshellarg(WIKI_BASE_URL . $test->getPagePath()); // beginning space matters. + $str .= "\n"; + saveFile($str, $filename); + chmod($filename, 0755); // make executable +} + + +/** + ** @desc: Saves the internal data structure to file. + */ +function saveTestData (pageTest $test, $filename) { + saveFile(serialize($test), $filename); +} + + +/** + ** @desc: saves a test in the various formats. + */ +function saveTest(pageTest $test, $testname) { + $base_name = DIRECTORY . "/" . $testname; + saveTestAsText($test, $base_name . INFO_FILE); + saveTestAsPHP ($test, $base_name . PHP_TEST ); + saveTestAsCurl($test, $base_name . CURL_TEST); + saveTestData ($test, $base_name . DATA_FILE); +} + + +//////////////////// MEDIAWIKI OUTPUT ///////////////////////// + +/** + ** @desc: Asks MediaWiki for the HTML output of a test. + */ +function wikiTestOutput(pageTest $test) { + + $ch = curl_init(); + + // specify the cookie, if required. + if ($test->getCookie()) curl_setopt($ch, CURLOPT_COOKIE, $test->getCookie()); + curl_setopt($ch, CURLOPT_POST, 1); // save form using a POST + + $params = escapeForCurl($test->getParams()); + curl_setopt($ch, CURLOPT_POSTFIELDS, $params ); // load the POST variables + + curl_setopt($ch, CURLOPT_URL, WIKI_BASE_URL . $test->getPagePath() ); // set url to post to + curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable + + $result=curl_exec ($ch); + + // if we encountered an error, then say so, and return an empty string. + if (curl_error($ch)) { + print "\nCurl error #: " . curl_errno($ch) . " - " . curl_error ($ch); + $result = ""; + } + + curl_close ($ch); + + return $result; +} + + +//////////////////// HTML VALIDATION ///////////////////////// + +/* + ** @desc: Asks the validator whether this is valid HTML, or not. + */ +function validateHTML($text) { + + $params = array ("fragment" => $text); + + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_POST, 1); // save form using a POST + curl_setopt($ch, CURLOPT_POSTFIELDS, $params); // load the POST variables + curl_setopt($ch, CURLOPT_URL, VALIDATOR_URL); // set url to post to + curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable + + $result=curl_exec ($ch); + + // if we encountered an error, then log it, and exit. + if (curl_error($ch)) { + trigger_error("Curl error #: " . curl_errno($ch) . " - " . curl_error ($ch) ); + print "Curl error #: " . curl_errno($ch) . " - " . curl_error ($ch) . " - exiting.\n"; + exit(); + } + + curl_close ($ch); + + $valid = (strpos($result, "Failed validation") === false ? true : false); + + return array($valid, $result); +} + + +/** + ** @desc: Get tidy to check for no HTML errors in the output file (e.g. unescaped strings). + */ +function tidyCheckFile($name) { + $file = DIRECTORY . "/" . $name; + $command = PATH_TO_TIDY . " -output /tmp/out.html -quiet $file 2>&1"; + $x = `$command`; + + // Look for the most interesting Tidy errors and warnings. + if ( strpos($x,"end of file while parsing attributes") !== false + || strpos($x,"attribute with missing trailing quote mark") !== false + || strpos($x,"missing '>' for end of tag") !== false + || strpos($x,"Error:") !== false) { + print "\nTidy found something - view details with: $command"; + return false; + } else { + return true; + } +} + + +/** + ** @desc: Returns whether or not an database error log file has changed in size since + ** the last time this was run. This is used to tell if a test caused a DB error. + */ +function dbErrorLogged() { + static $filesize; + + // first time running this function + if (!isset($filesize)) { + // create log if it does not exist + if (!file_exists(DB_ERROR_LOG_FILE)) { + saveFile("", DB_ERROR_LOG_FILE); + } + $filesize = filesize(DB_ERROR_LOG_FILE); + return false; + } + + $newsize = filesize(DB_ERROR_LOG_FILE); + // if the log has grown, then assume the current test caused it. + if ($newsize != $filesize) { + $filesize = $newsize; + return true; + } + + return false; +} + +////////////////// TOP-LEVEL PROBLEM-FINDING FUNCTION //////////////////////// + +/** + ** @desc: takes a page test, and runs it and tests it for problems in the output. + ** Returns: False on finding a problem, or True on no problems being found. + */ +function runWikiTest(pageTest $test, &$testname, $can_overwrite = false) { + + // by default don't overwrite a previous test of the same name. + while ( ! $can_overwrite && file_exists(DIRECTORY . "/" . $testname . DATA_FILE)) { + $testname .= "-" . mt_rand(0,9); + } + + $filename = DIRECTORY . "/" . $testname . DATA_FILE; + + // Store the time before and after, to find slow pages. + $before = microtime(true); + + // Get MediaWiki to give us the output of this test. + $wiki_preview = wikiTestOutput($test); + + $after = microtime(true); + + // if we received no response, then that's interesting. + if ($wiki_preview == "") { + print "\nNo response received for: $filename"; + return false; + } + + // save output HTML to file. + $html_file = DIRECTORY . "/" . $testname . HTML_FILE; + saveFile($wiki_preview, $html_file); + + // if there were PHP errors in the output, then that's interesting too. + if ( strpos($wiki_preview, "Warning: " ) !== false + || strpos($wiki_preview, "Fatal error: ") !== false + || strpos($wiki_preview, "Notice: " ) !== false + || strpos($wiki_preview, "Error: " ) !== false ) { + $error = substr($wiki_preview, strpos($wiki_preview, ":") + 7, 50); + // Avoid probable PHP bug with bad session ids; http://bugs.php.net/bug.php?id=38224 + if ($error != "Unknown: The session id contains illegal character") { + print "\nPHP error/warning/notice in HTML output: $html_file ; $error"; + return false; + } + } + + // if there was a MediaWiki Backtrace message in the output, then that's also interesting. + if (strpos($wiki_preview, "Backtrace:") !== false) { + print "\nInternal MediaWiki error in HTML output: $html_file"; + return false; + } + + // if there was a Parser error comment in the output, then that's potentially interesting. + if (strpos($wiki_preview, "!-- ERR") !== false) { + print "\nParser Error comment in HTML output: $html_file"; + return false; + } + + // if a database error was logged, then that's definitely interesting. + if (dbErrorLogged()) { + print "\nDatabase Error logged for: $filename"; + return false; + } + + // validate result + $valid = true; + if (VALIDATE_ON_WEB) { + list ($valid, $validator_output) = validateHTML($wiki_preview); + if (!$valid) print "\nW3C web validation failed - view details with: html2text " . DIRECTORY . "/" . $testname . ".validator_output.html"; + } + + // Get tidy to check the page, unless it is a test which produces XML. + if (!$test instanceof trackbackTest && !$test instanceof specialExportTest) { + $valid = tidyCheckFile( $testname . HTML_FILE ) && $valid; + } + + // if it took more than 2 seconds to render, then it may be interesting too. (Possible DoS attack?) + if (($after - $before) >= 2) { + print "\nParticularly slow to render (" . round($after - $before, 2) . " seconds): $filename"; + return false; + } + + if( $valid ) { + // Remove temp HTML file if test was valid: + unlink( $html_file ); + } elseif( VALIDATE_ON_WEB ) { + saveFile($validator_output, DIRECTORY . "/" . $testname . ".validator_output.html"); + } + + return $valid; +} + + +/////////////////// RERUNNING OLD TESTS /////////////////// + +/** + ** @desc: We keep our failed tests so that they can be rerun. + ** This function does that retesting. + */ +function rerunPreviousTests() { + print "Retesting previously found problems.\n"; + + $dir_contents = scandir (DIRECTORY); + + // sort file into the order a normal person would use. + natsort ($dir_contents); + + foreach ($dir_contents as $file) { + + // if file is not a test, then skip it. + // Note we need to escape any periods or will be treated as "any character". + $matches = array(); + if (!ereg("(.*)" . str_replace(".", "\.", DATA_FILE) . "$", $file, $matches)) continue; + + // reload the test. + $full_path = DIRECTORY . "/" . $file; + $test = unserialize(file_get_contents($full_path)); + + // if this is not a valid test, then skip it. + if (! $test instanceof pageTest) { + print "\nSkipping invalid test - $full_path"; + continue; + } + + // The date format is in Apache log format, which makes it easier to locate + // which retest caused which error in the Apache logs (only happens usually if + // apache segfaults). + if (!QUIET) print "[" . date ("D M d H:i:s Y") . "] Retesting $file (" . get_class($test) . ")"; + + // run test + $testname = $matches[1]; + $valid = runWikiTest($test, $testname, true); + + if (!$valid) { + saveTest($test, $testname); + if (QUIET) { + print "\nTest: " . get_class($test) . " ; Testname: $testname\n------"; + } else { + print "\n"; + } + } + else { + if (!QUIET) print "\r"; + if (DELETE_PASSED_RETESTS) { + $prefix = DIRECTORY . "/" . $testname; + if (is_file($prefix . DATA_FILE)) unlink($prefix . DATA_FILE); + if (is_file($prefix . PHP_TEST )) unlink($prefix . PHP_TEST ); + if (is_file($prefix . CURL_TEST)) unlink($prefix . CURL_TEST); + if (is_file($prefix . INFO_FILE)) unlink($prefix . INFO_FILE); + } + } + } + + print "\nDone retesting.\n"; +} + + +////////////////////// MAIN LOOP //////////////////////// + + +// first check whether CURL is installed, because sometimes it's not. +if( ! function_exists('curl_init') ) { + die("Could not find 'curl_init' function. Is the curl extension compiled into PHP?\n"); +} + +// Initialization of types. wikiFuzz doesn't have a constructor because we want to +// access it staticly and not have any globals. +wikiFuzz::$types = array_keys(wikiFuzz::$data); + +// Make directory if doesn't exist +if (!is_dir(DIRECTORY)) { + mkdir (DIRECTORY, 0700 ); +} +// otherwise, we first retest the things that we have found in previous runs +else if (RERUN_OLD_TESTS) { + rerunPreviousTests(); +} + +// seed the random number generator +mt_srand(crc32(microtime())); + +// main loop. +$start_time = date("U"); +$num_errors = 0; +if (!QUIET) print "Beginning main loop. Results are stored in the " . DIRECTORY . " directory.\n"; +if (!QUIET) print "Press CTRL+C to stop testing.\n"; + +for ($count=0; true; $count++) { + if (!QUIET) { + // spinning progress indicator. + switch( $count % 4 ) { + case '0': print "\r/"; break; + case '1': print "\r-"; break; + case '2': print "\r\\"; break; + case '3': print "\r|"; break; + } + print " $count"; + } + + // generate a page test to run. + $test = selectPageTest($count); + + $mins = ( date("U") - $start_time ) / 60; + if (!QUIET && $mins > 0) { + print ". $num_errors poss errors. " + . floor($mins) . " mins. " + . round ($count / $mins, 0) . " tests/min. " + . get_class($test); // includes the current test name. + } + + // run this test against MediaWiki, and see if the output was valid. + $testname = $count; + $valid = runWikiTest($test, $testname, false); + + // save the failed test + if (!$valid) { + if (QUIET) { + print "\nTest: " . get_class($test) . " ; Testname: $testname\n------"; + } else { + print "\n"; + } + saveTest($test, $testname); + $num_errors += 1; + } + + // stop if we have reached max number of errors. + if (defined("MAX_ERRORS") && $num_errors>=MAX_ERRORS) { + break; + } + + // stop if we have reached max number of mins runtime. + if (defined("MAX_RUNTIME") && $mins>=MAX_RUNTIME) { + break; + } +} + +?> diff --git a/maintenance/generateSitemap.php b/maintenance/generateSitemap.php index 2cf8312a..a0b6979d 100644 --- a/maintenance/generateSitemap.php +++ b/maintenance/generateSitemap.php @@ -145,7 +145,7 @@ class GenerateSitemap { * @param bool $compress Whether to compress the sitemap files */ function GenerateSitemap( $fspath, $path, $compress ) { - global $wgDBname, $wgScriptPath; + global $wgScriptPath; $this->url_limit = 50000; $this->size_limit = pow( 2, 20 ) * 10; @@ -157,7 +157,7 @@ class GenerateSitemap { $this->dbr =& wfGetDB( DB_SLAVE ); $this->generateNamespaces(); $this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() ); - $this->findex = fopen( "{$this->fspath}sitemap-index-$wgDBname.xml", 'wb' ); + $this->findex = fopen( "{$this->fspath}sitemap-index-" . wfWikiID() . ".xml", 'wb' ); } /** @@ -232,7 +232,7 @@ class GenerateSitemap { * @access public */ function main() { - global $wgDBname, $wgContLang; + global $wgContLang; fwrite( $this->findex, $this->openIndex() ); @@ -314,11 +314,8 @@ class GenerateSitemap { * @return string */ function sitemapFilename( $namespace, $count ) { - global $wgDBname; - $ext = $this->compress ? '.gz' : ''; - - return "sitemap-$wgDBname-NS_$namespace-$count.xml$ext"; + return "sitemap-".wfWikiID()."-NS_$namespace-$count.xml$ext"; } /** diff --git a/maintenance/importImages.php b/maintenance/importImages.php index 925c64b7..2cf8bd19 100644 --- a/maintenance/importImages.php +++ b/maintenance/importImages.php @@ -26,13 +26,25 @@ if( count( $args ) > 1 ) { $files = findFiles( $dir, $exts ); # Set up a fake user for this operation - $wgUser = User::newFromName( 'Image import script' ); - $wgUser->setLoaded( true ); + if( isset( $options['user'] ) ) { + $wgUser = User::newFromName( $options['user'] ); + } else { + $wgUser = User::newFromName( 'Image import script' ); + $wgUser->setLoaded( true ); + } + + # Get the upload comment + $comment = isset( $options['comment'] ) + ? $options['comment'] + : 'Importing image file'; + + # Get the license specifier + $license = isset( $options['license'] ) ? $options['license'] : ''; # Batch "upload" operation foreach( $files as $file ) { - $base = basename( $file ); + $base = wfBaseName( $file ); # Validate a title $title = Title::makeTitleSafe( NS_IMAGE, $base ); @@ -59,7 +71,7 @@ if( count( $args ) > 1 ) { $image->loadFromFile(); # Record the upload - if( $image->recordUpload( '', 'Importing image file' ) ) { + if( $image->recordUpload( '', $comment, $license ) ) { # We're done! echo( "done.\n" ); @@ -92,9 +104,18 @@ exit(); function showUsage( $reason = false ) { if( $reason ) echo( $reason . "\n" ); - echo( "USAGE: php importImages.php \n\n" ); - echo( " : Path to the directory containing images to be imported\n" ); - echo( " File extensions to import\n\n" ); + echo << ... + + : Path to the directory containing images to be imported + File extensions to import + +Options: +--user= Set username of uploader, default 'Image import script' +--comment= Set upload summary comment, default 'Importing image file' +--license= Use an optional license template + +END; exit(); } diff --git a/maintenance/installExtension.php b/maintenance/installExtension.php new file mode 100644 index 00000000..f6b2dff4 --- /dev/null +++ b/maintenance/installExtension.php @@ -0,0 +1,642 @@ +path = $path; + } + + function printListing( ) { + trigger_error( 'override InstallerRepository::printListing()', E_USER_ERROR ); + } + + function getResource( $name ) { + trigger_error( 'override InstallerRepository::getResource()', E_USER_ERROR ); + } + + /*static*/ function makeRepository( $path, $type = NULL ) { + if ( !$type ) { + preg_match( '!(([-+\w]+)://)?.*?(\.[-\w\d.]+)?$!', $path, $m ); + $proto = @$m[2]; + + if( !$proto ) $type = 'dir'; + else if ( ( $proto == 'http' || $proto == 'https' ) + && preg_match( '!([^\w]svn|svn[^\w])!i', $path) ) $type = 'svn'; #HACK! + else $type = $proto; + } + + if ( $type == 'dir' || $type == 'file' ) return new LocalInstallerRepository( $path ); + else if ( $type == 'http' || $type == 'http' ) return new WebInstallerRepository( $path ); + else return new SVNInstallerRepository( $path ); + } +} + +class LocalInstallerRepository extends InstallerRepository { + + function LocalInstallerRepository ( $path ) { + InstallerRepository::InstallerRepository( $path ); + } + + function printListing( ) { + $ff = glob( "{$this->path}/*" ); + if ( $ff === false || $ff === NULL ) { + ExtensionInstaller::error( "listing directory $repos failed!" ); + return false; + } + + foreach ( $ff as $f ) { + $n = basename($f); + + if ( !is_dir( $f ) ) { + if ( !preg_match( '/(.*)\.(tgz|tar\.gz|zip)/', $n, $m ) ) continue; + $n = $m[1]; + } + + print "\t$n\n"; + } + } + + function getResource( $name ) { + $path = $this->path . '/' . $name; + + if ( !file_exists( $path ) || !is_dir( $path ) ) $path = $this->path . '/' . $name . '.tgz'; + if ( !file_exists( $path ) ) $path = $this->path . '/' . $name . '.tar.gz'; + if ( !file_exists( $path ) ) $path = $this->path . '/' . $name . '.zip'; + + return new LocalInstallerResource( $path ); + } +} + +class WebInstallerRepository extends InstallerRepository { + + function WebInstallerRepository ( $path ) { + InstallerRepository::InstallerRepository( $path ); + } + + function printListing( ) { + ExtensionInstaller::note( "listing index from {$this->path}..." ); + + $txt = @file_get_contents( $this->path . '/index.txt' ); + if ( $txt ) { + print $txt; + print "\n"; + } + else { + $txt = file_get_contents( $this->path ); + if ( !$txt ) { + ExtensionInstaller::error( "listing index from {$this->path} failed!" ); + print ( $txt ); + return false; + } + + $ok = preg_match_all( '!]*href\s*=\s*['."'".'"]([^/'."'".'"]+)\.tgz['."'".'"][^>]*>.*?!si', $txt, $m, PREG_SET_ORDER ); + if ( !$ok ) { + ExtensionInstaller::error( "listing index from {$this->path} does not match!" ); + print ( $txt ); + return false; + } + + foreach ( $m as $l ) { + $n = $l[1]; + print "\t$n\n"; + } + } + } + + function getResource( $name ) { + $path = $this->path . '/' . $name . '.tgz'; + return new WebInstallerResource( $path ); + } +} + +class SVNInstallerRepository extends InstallerRepository { + + function SVNInstallerRepository ( $path ) { + InstallerRepository::InstallerRepository( $path ); + } + + function printListing( ) { + ExtensionInstaller::note( "SVN list {$this->path}..." ); + $txt = wfShellExec( 'svn ls ' . escapeshellarg( $this->path ), $code ); + if ( $code !== 0 ) { + ExtensionInstaller::error( "svn list for {$this->path} failed!" ); + return false; + } + + $ll = preg_split('/(\s*[\r\n]\s*)+/', $txt); + + foreach ( $ll as $line ) { + if ( !preg_match('!^(.*)/$!', $line, $m) ) continue; + $n = $m[1]; + + print "\t$n\n"; + } + } + + function getResource( $name ) { + $path = $this->path . '/' . $name; + return new SVNInstallerResource( $path ); + } +} + +class InstallerResource { + var $path; + var $isdir; + var $islocal; + + function InstallerResource( $path, $isdir, $islocal ) { + $this->path = $path; + + $this->isdir= $isdir; + $this->islocal = $islocal; + + preg_match( '!([-+\w]+://)?.*?(\.[-\w\d.]+)?$!', $path, $m ); + + $this->protocol = @$m[1]; + $this->extensions = @$m[2]; + + if ( $this->extensions ) $this->extensions = strtolower( $this->extensions ); + } + + function fetch( $target ) { + trigger_error( 'override InstallerResource::fetch()', E_USER_ERROR ); + } + + function extract( $file, $target ) { + + if ( $this->extensions == '.tgz' || $this->extensions == '.tar.gz' ) { #tgz file + ExtensionInstaller::note( "extracting $file..." ); + wfShellExec( 'tar zxvf ' . escapeshellarg( $file ) . ' -C ' . escapeshellarg( $target ), $code ); + + if ( $code !== 0 ) { + ExtensionInstaller::error( "failed to extract $file!" ); + return false; + } + } + else if ( $this->extensions == '.zip' ) { #zip file + ExtensionInstaller::note( "extracting $file..." ); + wfShellExec( 'unzip ' . escapeshellarg( $file ) . ' -d ' . escapeshellarg( $target ) , $code ); + + if ( $code !== 0 ) { + ExtensionInstaller::error( "failed to extract $file!" ); + return false; + } + } + else { + ExtensionInstaller::error( "unknown extension {$this->extensions}!" ); + return false; + } + + return true; + } + + /*static*/ function makeResource( $url ) { + preg_match( '!(([-+\w]+)://)?.*?(\.[-\w\d.]+)?$!', $url, $m ); + $proto = @$m[2]; + $ext = @$m[3]; + if ( $ext ) $ext = strtolower( $ext ); + + if ( !$proto ) return new LocalInstallerResource( $url, $ext ? false : true ); + else if ( $ext && ( $proto == 'http' || $proto == 'http' || $proto == 'ftp' ) ) return new WebInstallerResource( $url ); + else return new SVNInstallerResource( $url ); + } +} + +class LocalInstallerResource extends InstallerResource { + function LocalInstallerResource( $path ) { + InstallerResource::InstallerResource( $path, is_dir( $path ), true ); + } + + function fetch( $target ) { + if ( $this->isdir ) return ExtensionInstaller::copyDir( $this->path, dirname( $target ) ); + else return $this->extract( $this->path, dirname( $target ) ); + } + +} + +class WebInstallerResource extends InstallerResource { + function WebInstallerResource( $path ) { + InstallerResource::InstallerResource( $path, false, false ); + } + + function fetch( $target ) { + $tmp = wfTempDir() . '/' . basename( $this->path ); + + ExtensionInstaller::note( "downloading {$this->path}..." ); + $ok = copy( $this->path, $tmp ); + + if ( !$ok ) { + ExtensionInstaller::error( "failed to download {$this->path}" ); + return false; + } + + $this->extract( $tmp, dirname( $target ) ); + unlink($tmp); + + return true; + } +} + +class SVNInstallerResource extends InstallerResource { + function SVNInstallerResource( $path ) { + InstallerResource::InstallerResource( $path, true, false ); + } + + function fetch( $target ) { + ExtensionInstaller::note( "SVN checkout of {$this->path}..." ); + wfShellExec( 'svn co ' . escapeshellarg( $this->path ) . ' ' . escapeshellarg( $target ), $code ); + + if ( $code !== 0 ) { + ExtensionInstaller::error( "checkout failed for {$this->path}!" ); + return false; + } + + return true; + } +} + +class ExtensionInstaller { + var $source; + var $target; + var $name; + var $dir; + var $tasks; + + function ExtensionInstaller( $name, $source, $target ) { + if ( !is_object( $source ) ) $source = InstallerResource::makeResource( $source ); + + $this->name = $name; + $this->source = $source; + $this->target = realpath( $target ); + $this->extdir = "$target/extensions"; + $this->dir = "{$this->extdir}/$name"; + $this->incpath = "extensions/$name"; + $this->tasks = array(); + + #TODO: allow a subdir different from "extensions" + #TODO: allow a config file different from "LocalSettings.php" + } + + function note( $msg ) { + print "$msg\n"; + } + + function warn( $msg ) { + print "WARNING: $msg\n"; + } + + function error( $msg ) { + print "ERROR: $msg\n"; + } + + function prompt( $msg ) { + if ( function_exists( 'readline' ) ) { + $s = readline( $msg ); + } + else { + if ( !@$this->stdin ) $this->stdin = fopen( 'php://stdin', 'r' ); + if ( !$this->stdin ) die( "Failed to open stdin for user interaction!\n" ); + + print $msg; + flush(); + + $s = fgets( $this->stdin ); + } + + $s = trim( $s ); + return $s; + } + + function confirm( $msg ) { + while ( true ) { + $s = $this->prompt( $msg . " [yes/no]: "); + $s = strtolower( trim($s) ); + + if ( $s == 'yes' || $s == 'y' ) return true; + else if ( $s == 'no' || $s == 'n' ) return false; + else print "bad response: $s\n"; + } + } + + function deleteContents( $dir ) { + $ff = glob( $dir . "/*" ); + if ( !$ff ) return; + + foreach ( $ff as $f ) { + if ( is_dir( $f ) && !is_link( $f ) ) $this->deleteContents( $f ); + unlink( $f ); + } + } + + function copyDir( $dir, $tgt ) { + $d = $tgt . '/' . basename( $dir ); + + if ( !file_exists( $d ) ) { + $ok = mkdir( $d ); + if ( !$ok ) { + ExtensionInstaller::error( "failed to create director $d" ); + return false; + } + } + + $ff = glob( $dir . "/*" ); + if ( $ff === false || $ff === NULL ) return false; + + foreach ( $ff as $f ) { + if ( is_dir( $f ) && !is_link( $f ) ) { + $ok = ExtensionInstaller::copyDir( $f, $d ); + if ( !$ok ) return false; + } + else { + $t = $d . '/' . basename( $f ); + $ok = copy( $f, $t ); + + if ( !$ok ) { + ExtensionInstaller::error( "failed to copy $f to $t" ); + return false; + } + } + } + + return true; + } + + function setPermissions( $dir, $dirbits, $filebits ) { + if ( !chmod( $dir, $dirbits ) ) ExtensionInstaller::warn( "faield to set permissions for $dir" ); + + $ff = glob( $dir . "/*" ); + if ( $ff === false || $ff === NULL ) return false; + + foreach ( $ff as $f ) { + $n= basename( $f ); + if ( $n{0} == '.' ) continue; #HACK: skip dot files + + if ( is_link( $f ) ) continue; #skip link + + if ( is_dir( $f ) ) { + ExtensionInstaller::setPermissions( $f, $dirbits, $filebits ); + } + else { + if ( !chmod( $f, $filebits ) ) ExtensionInstaller::warn( "faield to set permissions for $f" ); + } + } + + return true; + } + + function fetchExtension( ) { + if ( $this->source->islocal && $this->source->isdir && realpath( $this->source->path ) === $this->dir ) { + $this->note( "files are already in the extension dir" ); + return true; + } + + if ( file_exists( $this->dir ) && glob( $this->dir . "/*" ) ) { + if ( $this->confirm( "{$this->dir} exists and is not empty.\nDelete all files in that directory?" ) ) { + $this->deleteContents( $this->dir ); + } + else { + return false; + } + } + + $ok = $this->source->fetch( $this->dir ); + if ( !$ok ) return false; + + if ( !file_exists( $this->dir ) && glob( $this->dir . "/*" ) ) { + $this->error( "{$this->dir} does not exist or is empty. Something went wrong, sorry." ); + return false; + } + + if ( file_exists( $this->dir . '/README' ) ) $this->tasks[] = "read the README file in {$this->dir}"; + if ( file_exists( $this->dir . '/INSTALL' ) ) $this->tasks[] = "read the INSTALL file in {$this->dir}"; + if ( file_exists( $this->dir . '/RELEASE-NOTES' ) ) $this->tasks[] = "read the RELEASE-NOTES file in {$this->dir}"; + + #TODO: configure this smartly...? + $this->setPermissions( $this->dir, 0755, 0644 ); + + $this->note( "fetched extension to {$this->dir}" ); + return true; + } + + function patchLocalSettings( $mode ) { + #NOTE: if we get a better way to hook up extensions, that should be used instead. + + $f = $this->dir . '/install.settings'; + $t = $this->target . '/LocalSettings.php'; + + #TODO: assert version ?! + #TODO: allow custom installer scripts + sql patches + + if ( !file_exists( $f ) ) { + $this->warn( "No install.settings file provided!" ); + $this->tasks[] = "Please read the instructions and edit LocalSettings.php manually to activate the extension."; + return '?'; + } + else { + $this->note( "applying settings patch..." ); + } + + $settings = file_get_contents( $f ); + + if ( !$settings ) { + $this->error( "failed to read settings from $f!" ); + return false; + } + + $settings = str_replace( '{{path}}', $this->incpath, $settings ); + + if ( $mode == EXTINST_NOPATCH ) { + $this->tasks[] = "Please put the following into your LocalSettings.php:" . "\n$settings\n"; + $this->note( "Skipping patch phase, automatic patching is off." ); + return true; + } + + if ( $mode == EXTINST_HOTPATCH ) { + #NOTE: keep php extension for backup file! + $bak = $this->target . '/LocalSettings.install-' . $this->name . '-' . wfTimestamp(TS_MW) . '.bak.php'; + + $ok = copy( $t, $bak ); + + if ( !$ok ) { + $this->warn( "failed to create backup of LocalSettings.php!" ); + return false; + } + else { + $this->note( "created backup of LocalSettings.php at $bak" ); + } + } + + $localsettings = file_get_contents( $t ); + + if ( !$settings ) { + $this->error( "failed to read $t for patching!" ); + return false; + } + + $marker = "<@< extension {$this->name} >@>"; + $blockpattern = "/\n\s*#\s*BEGIN\s*$marker.*END\s*$marker\s*/smi"; + + if ( preg_match( $blockpattern, $localsettings ) ) { + $localsettings = preg_replace( $blockpattern, "\n", $localsettings ); + $this->warn( "removed old configuration block for extension {$this->name}!" ); + } + + $newblock= "\n# BEGIN $marker\n$settings\n# END $marker\n"; + + $localsettings = preg_replace( "/\?>\s*$/si", "$newblock?>", $localsettings ); + + if ( $mode != EXTINST_HOTPATCH ) { + $t = $this->target . '/LocalSettings.install-' . $this->name . '-' . wfTimestamp(TS_MW) . '.php'; + } + + $ok = file_put_contents( $t, $localsettings ); + + if ( !$ok ) { + $this->error( "failed to patch $t!" ); + return false; + } + else if ( $mode == EXTINST_HOTPATCH ) { + $this->note( "successfully patched $t" ); + } + else { + $this->note( "created patched settings file $t" ); + $this->tasks[] = "Replace your current LocalSettings.php with ".basename($t); + } + + return true; + } + + function printNotices( ) { + if ( !$this->tasks ) { + $this->note( "Installation is complete, no pending tasks" ); + } + else { + $this->note( "" ); + $this->note( "PENDING TASKS:" ); + $this->note( "" ); + + foreach ( $this->tasks as $t ) { + $this->note ( "* " . $t ); + } + + $this->note( "" ); + } + + return true; + } + +} + +$tgt = isset ( $options['target'] ) ? $options['target'] : $IP; + +$repos = @$options['repository']; +if ( !$repos ) $repos = @$options['repos']; +if ( !$repos ) $repos = @$wgExtensionInstallerRepository; + +if ( !$repos && file_exists("$tgt/.svn") && is_dir("$tgt/.svn") ) { + $svn = file_get_contents( "$tgt/.svn/entries" ); + + if ( preg_match( '!url="(.*?)"!', $svn, $m ) ) { + $repos = dirname( $m[1] ) . '/extensions'; + } +} + +if ( !$repos ) $repos = 'http://svn.wikimedia.org/svnroot/mediawiki/trunk/extensions'; + +if( !isset( $args[0] ) && !@$options['list'] ) { + die( "USAGE: installExtension.php [options] [source]\n" . + "OPTIONS: \n" . + " --list list available extensions. is ignored / may be omitted.\n" . + " --repository repository to fetch extensions from. May be a local directoy,\n" . + " an SVN repository or a HTTP directory\n" . + " --target mediawiki installation directory to use\n" . + " --nopatch don't create a patched LocalSettings.php\n" . + " --hotpatch patched LocalSettings.php directly (creates a backup)\n" . + "SOURCE: specifies the package source directly. If given, the repository is ignored.\n" . + " The source my be a local file (tgz or zip) or directory, the URL of a\n" . + " remote file (tgz or zip), or a SVN path.\n" + ); +} + +$repository = InstallerRepository::makeRepository( $repos ); + +if ( isset( $options['list'] ) ) { + $repository->printListing(); + exit(0); +} + +$name = $args[0]; + +$src = isset( $args[1] ) ? $args[1] : $repository->getResource( $name ); + +#TODO: detect $source mismatching $name !! + +$mode = EXTINST_WRITEPATCH; +if ( isset( $options['nopatch'] ) || @$wgExtensionInstallerNoPatch ) $mode = EXTINST_NOPATCH; +else if ( isset( $options['hotpatch'] ) || @$wgExtensionInstallerHotPatch ) $mode = EXTINST_HOTPATCH; + +if ( !file_exists( "$tgt/LocalSettings.php" ) ) { + die("can't find $tgt/LocalSettings.php\n"); +} + +if ( $mode == EXTINST_HOTPATCH && !is_writable( "$tgt/LocalSettings.php" ) ) { + die("can't write to $tgt/LocalSettings.php\n"); +} + +if ( !file_exists( "$tgt/extensions" ) ) { + die("can't find $tgt/extensions\n"); +} + +if ( !is_writable( "$tgt/extensions" ) ) { + die("can't write to $tgt/extensions\n"); +} + +$installer = new ExtensionInstaller( $name, $src, $tgt ); + +$installer->note( "Installing extension {$installer->name} from {$installer->source->path} to {$installer->dir}" ); + +print "\n"; +print "\tTHIS TOOL IS EXPERIMENTAL!\n"; +print "\tEXPECT THE UNEXPECTED!\n"; +print "\n"; + +if ( !$installer->confirm("continue") ) die("aborted\n"); + +$ok = $installer->fetchExtension(); + +if ( $ok ) $ok = $installer->patchLocalSettings( $mode ); + +if ( $ok ) $ok = $installer->printNotices(); + +if ( $ok ) $installer->note( "$name extension installed." ); +?> diff --git a/maintenance/language/alltrans.php b/maintenance/language/alltrans.php new file mode 100644 index 00000000..f8db9c0d --- /dev/null +++ b/maintenance/language/alltrans.php @@ -0,0 +1,16 @@ + diff --git a/maintenance/language/checkLanguage.php b/maintenance/language/checkLanguage.php new file mode 100644 index 00000000..11c8ec92 --- /dev/null +++ b/maintenance/language/checkLanguage.php @@ -0,0 +1,177 @@ +getMessages( $code ); + $messagesNumber = count( $messages['translated'] ); + + # Skip the checks if specified + if ( $wgDisplayLevel == 0 ) { + return; + } + + # Untranslated messages + if ( in_array( 'untranslated', $wgChecks ) ) { + $untranslatedMessages = $wgLanguages->getUntranslatedMessages( $code ); + $untranslatedMessagesNumber = count( $untranslatedMessages ); + $wgLanguages->outputMessagesList( $untranslatedMessages, $code, "\n$untranslatedMessagesNumber messages of $wgRequiredMessagesNumber are not translated to $code, but exist in en:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Duplicate messages + if ( in_array( 'duplicate', $wgChecks ) ) { + $duplicateMessages = $wgLanguages->getDuplicateMessages( $code ); + $duplicateMessagesNumber = count( $duplicateMessages ); + $wgLanguages->outputMessagesList( $duplicateMessages, $code, "\n$duplicateMessagesNumber messages of $messagesNumber are translated the same in en and $code:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Obsolete messages + if ( in_array( 'obsolete', $wgChecks ) ) { + $obsoleteMessages = $messages['obsolete']; + $obsoleteMessagesNumber = count( $obsoleteMessages ); + $wgLanguages->outputMessagesList( $obsoleteMessages, $code, "\n$obsoleteMessagesNumber messages of $messagesNumber are not exist in en (or are in the ignored list), but still exist in $code:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Messages without variables + if ( in_array( 'variables', $wgChecks ) ) { + $messagesWithoutVariables = $wgLanguages->getMessagesWithoutVariables( $code ); + $messagesWithoutVariablesNumber = count( $messagesWithoutVariables ); + $wgLanguages->outputMessagesList( $messagesWithoutVariables, $code, "\n$messagesWithoutVariablesNumber messages of $messagesNumber in $code don't use some variables while en uses them:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Empty messages + if ( in_array( 'empty', $wgChecks ) ) { + $emptyMessages = $wgLanguages->getEmptyMessages( $code ); + $emptyMessagesNumber = count( $emptyMessages ); + $wgLanguages->outputMessagesList( $emptyMessages, $code, "\n$emptyMessagesNumber messages of $messagesNumber in $code are empty or -:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Messages with whitespace + if ( in_array( 'whitespace', $wgChecks ) ) { + $messagesWithWhitespace = $wgLanguages->getMessagesWithWhitespace( $code ); + $messagesWithWhitespaceNumber = count( $messagesWithWhitespace ); + $wgLanguages->outputMessagesList( $messagesWithWhitespace, $code, "\n$messagesWithWhitespaceNumber messages of $messagesNumber in $code have a trailing whitespace:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Non-XHTML messages + if ( in_array( 'xhtml', $wgChecks ) ) { + $nonXHTMLMessages = $wgLanguages->getNonXHTMLMessages( $code ); + $nonXHTMLMessagesNumber = count( $nonXHTMLMessages ); + $wgLanguages->outputMessagesList( $nonXHTMLMessages, $code, "\n$nonXHTMLMessagesNumber messages of $messagesNumber in $code are not well-formed XHTML:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } + + # Messages with wrong characters + if ( in_array( 'chars', $wgChecks ) ) { + $messagesWithWrongChars = $wgLanguages->getMessagesWithWrongChars( $code ); + $messagesWithWrongCharsNumber = count( $messagesWithWrongChars ); + $wgLanguages->outputMessagesList( $messagesWithWrongChars, $code, "\n$messagesWithWrongCharsNumber messages of $messagesNumber in $code include hidden chars which should not be used in the messages:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage ); + } +} + +# Show help +if ( isset( $options['help'] ) ) { + echo <<\n\n"; + echo "'''Note:''' These statistics can be generated by running php maintenance/language/transstat.php.\n\n"; + echo "For additional information on specific languages (the message names, the actual problems, etc.), run php maintenance/language/checkLanguage.php --lang=foo.\n\n"; + echo '{| border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse;" width="100%"'."\n"; + } + function footer() { + echo "|}\n"; + } + function blockstart() { + echo "|-\n"; + } + function blockend() { + echo ''; + } + function element( $in, $heading = false ) { + echo ($heading ? '!' : '|') . " $in\n"; + } + function formatPercent( $subset, $total, $revert = false, $accuracy = 2 ) { + $v = @round(255 * $subset / $total); + if ( $revert ) { + $v = 255 - $v; + } + if ( $v < 128 ) { + # Red to Yellow + $red = 'FF'; + $green = sprintf( '%02X', 2 * $v ); + } else { + # Yellow to Green + $red = sprintf('%02X', 2 * ( 255 - $v ) ); + $green = 'FF'; + } + $blue = '00'; + $color = $red . $green . $blue; + + $percent = statsOutput::formatPercent( $subset, $total, $revert, $accuracy ); + return 'bgcolor="#'. $color .'" | '. $percent; + } +} + +/** Outputs WikiText and appends category and text only used for Meta-Wiki */ +class metawikiStatsOutput extends wikiStatsOutput { + function heading() { + echo "See [[MediaWiki localisation]] to learn how you can help translating MediaWiki.\n\n"; + parent::heading(); + } + function footer() { + parent::footer(); + echo "\n[[Category:Localisation|Statistics]]\n"; + } +} + +/** Output text. To be used on a terminal for example. */ +class textStatsOutput extends statsOutput { + function element( $in, $heading = false ) { + echo $in."\t"; + } + function blockend() { + echo "\n"; + } +} + +/** csv output. Some people love excel */ +class csvStatsOutput extends statsOutput { + function element( $in, $heading = false ) { + echo $in . ";"; + } + function blockend() { + echo "\n"; + } +} + +# Select an output engine +switch ( $options['output'] ) { + case 'wiki': + $wgOut = new wikiStatsOutput(); + break; + case 'metawiki': + $wgOut = new metawikiStatsOutput(); + break; + case 'text': + $wgOut = new textStatsOutput(); + break; + case 'csv': + $wgOut = new csvStatsOutput(); + break; + default: + showUsage(); +} + +# Languages +$wgLanguages = new languages(); + +# Header +$wgOut->heading(); +$wgOut->blockstart(); +$wgOut->element( 'Language', true ); +$wgOut->element( 'Translated', true ); +$wgOut->element( '%', true ); +$wgOut->element( 'Obsolete', true ); +$wgOut->element( '%', true ); +$wgOut->element( 'Problematic', true ); +$wgOut->element( '%', true ); +$wgOut->blockend(); + +$wgGeneralMessages = $wgLanguages->getGeneralMessages(); +$wgRequiredMessagesNumber = count( $wgGeneralMessages['required'] ); + +foreach ( $wgLanguages->getLanguages() as $code ) { + # Don't check English or RTL English + if ( $code == 'en' || $code == 'enRTL' ) { + continue; + } + + # Calculate the numbers + $language = $wgContLang->getLanguageName( $code ); + $messages = $wgLanguages->getMessages( $code ); + $messagesNumber = count( $messages['translated'] ); + $requiredMessagesNumber = count( $messages['required'] ); + $requiredMessagesPercent = $wgOut->formatPercent( $requiredMessagesNumber, $wgRequiredMessagesNumber ); + $obsoleteMessagesNumber = count( $messages['obsolete'] ); + $obsoleteMessagesPercent = $wgOut->formatPercent( $obsoleteMessagesNumber, $messagesNumber, true ); + $messagesWithoutVariables = $wgLanguages->getMessagesWithoutVariables( $code ); + $emptyMessages = $wgLanguages->getEmptyMessages( $code ); + $messagesWithWhitespace = $wgLanguages->getMessagesWithWhitespace( $code ); + $nonXHTMLMessages = $wgLanguages->getNonXHTMLMessages( $code ); + $messagesWithWrongChars = $wgLanguages->getMessagesWithWrongChars( $code ); + $problematicMessagesNumber = count( array_unique( array_merge( $messagesWithoutVariables, $emptyMessages, $messagesWithWhitespace, $nonXHTMLMessages, $messagesWithWrongChars ) ) ); + $problematicMessagesPercent = $wgOut->formatPercent( $problematicMessagesNumber, $messagesNumber, true ); + + # Output them + $wgOut->blockstart(); + $wgOut->element( "$language ($code)" ); + $wgOut->element( "$requiredMessagesNumber/$wgRequiredMessagesNumber" ); + $wgOut->element( $requiredMessagesPercent ); + $wgOut->element( "$obsoleteMessagesNumber/$messagesNumber" ); + $wgOut->element( $obsoleteMessagesPercent ); + $wgOut->element( "$problematicMessagesNumber/$messagesNumber" ); + $wgOut->element( $problematicMessagesPercent ); + $wgOut->blockend(); +} + +# Footer +$wgOut->footer(); + +?> diff --git a/maintenance/language/unusedMessages.php b/maintenance/language/unusedMessages.php new file mode 100644 index 00000000..8b117eca --- /dev/null +++ b/maintenance/language/unusedMessages.php @@ -0,0 +1,42 @@ +getCode(); +} + +if ( $code == 'en' ) { + print "Current selected language is English. Cannot check translations.\n"; + exit(); +} + +$filename = Language::getMessagesFileName( $code ); +if ( file_exists( $filename ) ) { + require( $filename ); +} else { + $messages = array(); +} + +$count = $total = 0; +$wgEnglishMessages = Language::getMessagesFor( 'en' ); +$wgLocalMessages = $messages; + +foreach ( $wgLocalMessages as $key => $msg ) { + ++$total; + if ( !isset( $wgEnglishMessages[$key] ) ) { + print "* $key\n"; + ++$count; + } +} + +print "{$count} messages of {$total} are unused in the language {$code}\n"; +?> diff --git a/maintenance/language/validate.php b/maintenance/language/validate.php new file mode 100644 index 00000000..10d98d37 --- /dev/null +++ b/maintenance/language/validate.php @@ -0,0 +1,40 @@ +\n"; + exit( 1 ); +} +array_shift( $argv ); + +define( 'MEDIAWIKI', 1 ); +define( 'NOT_REALLY_MEDIAWIKI', 1 ); + +$IP = dirname( __FILE__ ) . '/../..'; + +require_once( "$IP/includes/Defines.php" ); +require_once( "$IP/languages/Language.php" ); + +$files = array(); +foreach ( $argv as $arg ) { + $files = array_merge( $files, glob( $arg ) ); +} + +foreach ( $files as $filename ) { + print "$filename..."; + $vars = getVars( $filename ); + $keys = array_keys( $vars ); + $diff = array_diff( $keys, Language::$mLocalisationKeys ); + if ( $diff ) { + print "\nWarning: unrecognised variable(s): " . implode( ', ', $diff ) ."\n"; + } else { + print " ok\n"; + } +} + +function getVars( $filename ) { + require( $filename ); + $vars = get_defined_vars(); + unset( $vars['filename'] ); + return $vars; +} +?> diff --git a/maintenance/mctest.php b/maintenance/mctest.php index 95249b29..f8f4b965 100644 --- a/maintenance/mctest.php +++ b/maintenance/mctest.php @@ -1,10 +1,17 @@ diff --git a/maintenance/mysql5/tables.sql b/maintenance/mysql5/tables.sql index cc6818d3..81a4690a 100644 --- a/maintenance/mysql5/tables.sql +++ b/maintenance/mysql5/tables.sql @@ -583,8 +583,14 @@ CREATE TABLE /*$wgDBprefix*/ipblocks ( -- Indicates that the IP address was banned because a banned -- user accessed a page through it. If this is 1, ipb_address -- will be hidden, and the block identified by block ID number. - ipb_auto tinyint(1) NOT NULL default '0', + ipb_auto bool NOT NULL default '0', + -- If set to 1, block applies only to logged-out users + ipb_anon_only bool NOT NULL default 0, + + -- Block prevents account creation from matching IP addresses + ipb_create_account bool NOT NULL default 1, + -- Time at which the block will expire. ipb_expiry char(14) binary NOT NULL default '', @@ -594,9 +600,15 @@ CREATE TABLE /*$wgDBprefix*/ipblocks ( ipb_range_end varchar(32) NOT NULL default '', PRIMARY KEY ipb_id (ipb_id), - INDEX ipb_address (ipb_address), + + -- Unique index to support "user already blocked" messages + -- Any new options which prevent collisions should be included + UNIQUE INDEX ipb_address (ipb_address(255), ipb_user, ipb_auto, ipb_anon_only), + INDEX ipb_user (ipb_user), - INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)) + INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)), + INDEX ipb_timestamp (ipb_timestamp), + INDEX ipb_expiry (ipb_expiry) ) TYPE=InnoDB, DEFAULT CHARSET=utf8; @@ -797,7 +809,8 @@ CREATE TABLE /*$wgDBprefix*/recentchanges ( INDEX rc_namespace_title (rc_namespace, rc_title), INDEX rc_cur_id (rc_cur_id), INDEX new_name_timestamp(rc_new,rc_namespace,rc_timestamp), - INDEX rc_ip (rc_ip) + INDEX rc_ip (rc_ip), + INDEX rc_ns_usertext ( rc_namespace, rc_user_text ) ) TYPE=InnoDB, DEFAULT CHARSET=utf8; @@ -1006,4 +1019,4 @@ CREATE TABLE /*$wgDBprefix*/querycache_info ( UNIQUE KEY ( qci_type ) -) TYPE=InnoDB; \ No newline at end of file +) TYPE=InnoDB; diff --git a/maintenance/namespaceDupes.php b/maintenance/namespaceDupes.php index ad56eee7..acd3a708 100644 --- a/maintenance/namespaceDupes.php +++ b/maintenance/namespaceDupes.php @@ -111,12 +111,12 @@ class NamespaceConflictChecker { } function reportConflict( $row, $suffix ) { - $newTitle = Title::makeTitle( $row->namespace, $row->title ); + $newTitle = Title::makeTitleSafe( $row->namespace, $row->title ); printf( "... %d (0,\"%s\") -> (%d,\"%s\") [[%s]]\n", $row->id, $row->oldtitle, - $row->namespace, - $row->title, + $newTitle->getNamespace(), + $newTitle->getDbKey(), $newTitle->getPrefixedText() ); $id = $newTitle->getArticleId(); @@ -131,7 +131,7 @@ class NamespaceConflictChecker { function resolveConflict( $row, $resolvable, $suffix ) { if( !$resolvable ) { $row->title .= $suffix; - $title = Title::makeTitle( $row->namespace, $row->title ); + $title = Title::makeTitleSafe( $row->namespace, $row->title ); echo "... *** using suffixed form [[" . $title->getPrefixedText() . "]] ***\n"; } $tables = $this->newSchema() @@ -146,10 +146,11 @@ class NamespaceConflictChecker { function resolveConflictOn( $row, $table ) { $fname = 'NamespaceConflictChecker::resolveConflictOn'; echo "... resolving on $table... "; + $newTitle = Title::makeTitleSafe( $row->namespace, $row->title ); $this->db->update( $table, array( - "{$table}_namespace" => $row->namespace, - "{$table}_title" => $row->title, + "{$table}_namespace" => $newTitle->getNamespace(), + "{$table}_title" => $newTitle->getDbKey(), ), array( "{$table}_namespace" => 0, diff --git a/maintenance/ourusers.php b/maintenance/ourusers.php index 0d625571..b50519d2 100644 --- a/maintenance/ourusers.php +++ b/maintenance/ourusers.php @@ -39,50 +39,22 @@ if ( @$argv[1] == 'yaseo' ) { } else { $hosts = array( 'localhost', - '207.142.131.194', - '207.142.131.195', - '207.142.131.196', - '207.142.131.197', - '207.142.131.198', - '207.142.131.199', - '207.142.131.221', - '207.142.131.226', - '207.142.131.227', - '207.142.131.228', - '207.142.131.229', - '207.142.131.230', - '207.142.131.231', - '207.142.131.232', - '207.142.131.233', - '207.142.131.234', - '207.142.131.237', - '207.142.131.238', - '207.142.131.239', - '207.142.131.243', - '207.142.131.244', - '207.142.131.249', - '207.142.131.250', - '207.142.131.216', '10.0.%', + '66.230.200.%', ); } $databases = array( - '%wikibooks', - '%wiki', - '%wikiquote', - '%wiktionary', - '%wikisource', - '%wikinews', - '%wikiversity', - '%wikimedia', + '%wik%', ); +print "/*!40100 set old_passwords=1 */;"; +print "/*!40100 set global old_passwords=1 */;"; + foreach( $hosts as $host ) { print "--\n-- $host\n--\n\n-- wikiuser\n\n"; print "GRANT REPLICATION CLIENT,PROCESS ON *.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; - print "GRANT ALL PRIVILEGES ON `boardvote`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; - print "GRANT ALL PRIVILEGES ON `boardvote2005`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; + print "GRANT ALL PRIVILEGES ON `boardvote%`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; foreach( $databases as $db ) { print "GRANT SELECT, INSERT, UPDATE, DELETE ON `$db`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; } @@ -111,8 +83,7 @@ EOS; print "\n-- wikiadmin\n\n"; print "GRANT PROCESS, REPLICATION CLIENT ON *.* TO 'wikiadmin'@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; - print "GRANT ALL PRIVILEGES ON `boardvote`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; - print "GRANT ALL PRIVILEGES ON `boardvote2005`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; + print "GRANT ALL PRIVILEGES ON `boardvote%`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; foreach ( $databases as $db ) { print "GRANT ALL PRIVILEGES ON `$db`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; } diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc index 9f93c4ac..0aabd27b 100644 --- a/maintenance/parserTests.inc +++ b/maintenance/parserTests.inc @@ -31,7 +31,6 @@ $optionsWithArgs = array( 'regex' ); require_once( 'commandLine.inc' ); require_once( "$IP/includes/ObjectCache.php" ); require_once( "$IP/includes/BagOStuff.php" ); -require_once( "$IP/languages/LanguageUtf8.php" ); require_once( "$IP/includes/Hooks.php" ); require_once( "$IP/maintenance/parserTestsParserHook.php" ); require_once( "$IP/maintenance/parserTestsStaticParserHook.php" ); @@ -238,7 +237,7 @@ class ParserTest { $this->setupGlobals($opts); - $user =& new User(); + $user = new User(); $options = ParserOptions::newFromUser( $user ); if (preg_match('/\\bmath\\b/i', $opts)) { @@ -255,7 +254,7 @@ class ParserTest { $noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts ); - $parser =& new Parser(); + $parser = new Parser(); foreach( $this->hooks as $tag => $callback ) { $parser->setHook( $tag, $callback ); } @@ -335,14 +334,12 @@ class ParserTest { 'wgLanguageCode' => $lang, 'wgContLanguageCode' => $lang, 'wgDBprefix' => 'parsertest_', - 'wgDefaultUserOptions' => array(), 'wgLang' => null, 'wgContLang' => null, 'wgNamespacesWithSubpages' => array( 0 => preg_match('/\\bsubpage\\b/i', $opts)), 'wgMaxTocLevel' => 999, 'wgCapitalLinks' => true, - 'wgDefaultUserOptions' => array(), 'wgNoFollowLinks' => true, 'wgThumbnailScriptPath' => false, 'wgUseTeX' => false, @@ -354,13 +351,12 @@ class ParserTest { $this->savedGlobals[$var] = $GLOBALS[$var]; $GLOBALS[$var] = $val; } - $langClass = 'Language' . str_replace( '-', '_', ucfirst( $lang ) ); - $langObj = setupLangObj( $langClass ); + $langObj = Language::factory( $lang ); $GLOBALS['wgLang'] = $langObj; $GLOBALS['wgContLang'] = $langObj; $GLOBALS['wgLoadBalancer']->loadMasterPos(); - $GLOBALS['wgMessageCache'] = new MessageCache( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] ); + //$GLOBALS['wgMessageCache'] = new MessageCache( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] ); $this->setupDatabase(); global $wgUser; diff --git a/maintenance/parserTests.php b/maintenance/parserTests.php index eac7adb0..309bf2e0 100644 --- a/maintenance/parserTests.php +++ b/maintenance/parserTests.php @@ -49,7 +49,7 @@ END; # refer to $wgTitle directly, but instead use the title # passed to it. $wgTitle = Title::newFromText( 'Parser test script do not use' ); -$tester =& new ParserTest(); +$tester = new ParserTest(); if( isset( $options['file'] ) ) { $file = $options['file']; diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 0238051c..66b46a53 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -713,6 +713,24 @@ External links: [encoded equals] (bug 6102)

!! end +!! test +External links: [IDN ignored character reference in hostname; strip it right off] +!! input +[http://e‌xample.com/] +!! result +

[1] +

+!! end + +!! test +External links: IDN ignored character reference in hostname; strip it right off +!! input +http://e‌xample.com/ +!! result +

http://example.com/ +

+!! end + !! test External links: www.jpeg.org (bug 554) !! input @@ -1192,7 +1210,7 @@ Invalid attributes in table cell (bug 1830) # FIXME: this one has incorrect tag nesting still. !! test -Table security: embedded pipes (http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html) +TODO: Table security: embedded pipes (http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html) !! input {| | |[ftp://|x||]" onmouseover="alert(document.cookie)">test @@ -1365,7 +1383,7 @@ Link containing "<#" and ">#" as a hex sequences !! end !! test -Link containing double-single-quotes '' (bug 4598) +TODO: Link containing double-single-quotes '' (bug 4598) !! input [[Lista d''e paise d''o munno]] !! result @@ -2152,7 +2170,7 @@ Template with complex template as argument !! end !! test -Template with thumb image (wiht link in description) +TODO: Template with thumb image (with link in description) !! input {{paramtest| param =[[Image:noimage.png|thumb|[[no link|link]] [[no link|caption]]]]}} @@ -2624,6 +2642,146 @@ pst Foo !! end +!! test +pre-save transform: context links ("pipe trick") +!! options +pst +!! input +[[Article (context)|]] +[[Bar:Article|]] +[[:Bar:Article|]] +[[Bar:Article (context)|]] +[[:Bar:Article (context)|]] +[[|Article]] +[[|Article (context)]] +[[Bar:X (Y) Z|]] +[[:Bar:X (Y) Z|]] +!! result +[[Article (context)|Article]] +[[Bar:Article|Article]] +[[:Bar:Article|Article]] +[[Bar:Article (context)|Article]] +[[:Bar:Article (context)|Article]] +[[Article]] +[[Article (context)]] +[[Bar:X (Y) Z|X (Y) Z]] +[[:Bar:X (Y) Z|X (Y) Z]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with interwiki prefix +!! options +pst +!! input +[[interwiki:Article|]] +[[:interwiki:Article|]] +[[interwiki:Bar:Article|]] +[[:interwiki:Bar:Article|]] +!! result +[[interwiki:Article|Article]] +[[:interwiki:Article|Article]] +[[interwiki:Bar:Article|Bar:Article]] +[[:interwiki:Bar:Article|Bar:Article]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with parens in title +!! options +pst title=[[Somearticle (context)]] +!! input +[[|Article]] +!! result +[[Article (context)|Article]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with comma in title +!! options +pst title=[[Someplace, Somewhere]] +!! input +[[|Otherplace]] +[[Otherplace, Elsewhere|]] +[[Otherplace, Elsewhere, Anywhere|]] +!! result +[[Otherplace, Somewhere|Otherplace]] +[[Otherplace, Elsewhere|Otherplace]] +[[Otherplace, Elsewhere, Anywhere|Otherplace]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with parens and comma +!! options +pst title=[[Someplace (IGNORED), Somewhere]] +!! input +[[|Otherplace]] +[[Otherplace (place), Elsewhere|]] +!! result +[[Otherplace, Somewhere|Otherplace]] +[[Otherplace (place), Elsewhere|Otherplace]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with comma and parens +!! options +pst title=[[Who, me? (context)]] +!! input +[[|Yes, you.]] +[[Me, Myself, and I (1937 song)|]] +!! result +[[Yes, you. (context)|Yes, you.]] +[[Me, Myself, and I (1937 song)|Me, Myself, and I]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with namespace +!! options +pst title=[[Ns:Somearticle]] +!! input +[[|Article]] +!! result +[[Ns:Article|Article]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with namespace and parens +!! options +pst title=[[Ns:Somearticle (context)]] +!! input +[[|Article]] +!! result +[[Ns:Article (context)|Article]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with namespace and comma +!! options +pst title=[[Ns:Somearticle, Context, Whatever]] +!! input +[[|Article]] +!! result +[[Ns:Article, Context, Whatever|Article]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with namespace, comma and parens +!! options +pst title=[[Ns:Somearticle, Context (context)]] +!! input +[[|Article]] +!! result +[[Ns:Article (context)|Article]] +!! end + +!! test +pre-save transform: context links ("pipe trick") with namespace, parens and comma +!! options +pst title=[[Ns:Somearticle (IGNORED), Context]] +!! input +[[|Article]] +!! result +[[Ns:Article, Context|Article]] +!! end + ### ### Message transform tests @@ -2649,7 +2807,7 @@ msg !! end !! test -message transform: in transcluded template (bug 4926) +TODO: message transform: in transcluded template (bug 4926) !! options msg !! input @@ -2659,7 +2817,7 @@ Foobar !! end !! test -message transform: in transcluded template (bug 4926) +TODO: message transform: in transcluded template (bug 4926) !! options msg !! input @@ -4042,6 +4200,16 @@ disabled Something, but defenetly not
... !! end +!! test +Sanitizer: Validating id attribute uniqueness (bug 4515, bug 6301) +!! options +disabled +!! input +

+!! result +Something need to be done. foo-2 ? +!! end + !! test Language converter: output gets cut off unexpectedly (bug 5757) !! options @@ -4094,7 +4262,7 @@ HTML bullet list, closed tags (bug 5497) !! end !! test -HTML bullet list, unclosed tags (bug 5497) +TODO: HTML bullet list, unclosed tags (bug 5497) !! input
  • One @@ -4124,7 +4292,7 @@ HTML ordered list, closed tags (bug 5497) !! end !! test -HTML ordered list, unclosed tags (bug 5497) +TODO: HTML ordered list, unclosed tags (bug 5497) !! input
    1. One @@ -4164,7 +4332,7 @@ HTML nested bullet list, closed tags (bug 5497) !! end !! test -HTML nested bullet list, open tags (bug 5497) +TODO: HTML nested bullet list, open tags (bug 5497) !! input
      • One @@ -4212,7 +4380,7 @@ HTML nested ordered list, closed tags (bug 5497) !! end !! test -HTML nested ordered list, open tags (bug 5497) +TODO: HTML nested ordered list, open tags (bug 5497) !! input
        1. One @@ -4458,7 +4626,7 @@ Fuzz testing: encoded newline in generated HTML replacements (bug 6577) !! end !! test -Parsing optional HTML elements (Bug 6171) +TODO: Parsing optional HTML elements (Bug 6171) !! options !! input @@ -4524,7 +4692,7 @@ New wiki paragraph !! end !! test -Inline HTML vs wiki block nesting +TODO: Inline HTML vs wiki block nesting !! input Bold paragraph @@ -4537,7 +4705,7 @@ New wiki paragraph !!test -Mixing markup for italics and bold +TODO: Mixing markup for italics and bold !! options !! input '''bold''''''bold''bolditalics''''' @@ -5463,6 +5631,226 @@ Handling of in URLs !!end + +!! test +TODO: 5 quotes, code coverage +1 line +!! input +''''' +!! result +!! end + +!! test +Special:Search page linking. +!! input +{{Special:search}} +!! result +

          Special:Search +

          +!! end + +!! test +Say the magic word +!! input +* {{PAGENAME}} +* {{BASEPAGENAME}} +* {{SUBPAGENAME}} +* {{SUBPAGENAMEE}} +* {{BASEPAGENAME}} +* {{BASEPAGENAMEE}} +* {{TALKPAGENAME}} +* {{TALKPAGENAMEE}} +* {{SUBJECTPAGENAME}} +* {{SUBJECTPAGENAMEE}} +* {{NAMESPACEE}} +* {{NAMESPACE}} +* {{TALKSPACE}} +* {{TALKSPACEE}} +* {{SUBJECTSPACE}} +* {{SUBJECTSPACEE}} +* {{Dynamic|{{NUMBEROFUSERS}}|{{NUMBEROFPAGES}}|{{CURRENTVERSION}}|{{CONTENTLANGUAGE}}|{{DIRECTIONMARK}}|{{CURRENTTIMESTAMP}}|{{NUMBEROFARTICLES}}}} +!! result +
          • Parser test +
          • Parser test +
          • Parser test +
          • Parser_test +
          • Parser test +
          • Parser_test +
          • Talk:Parser test +
          • Talk:Parser_test +
          • Parser test +
          • Parser_test +
          • +
          • +
          • Talk +
          • Talk +
          • +
          • +
          • Template:Dynamic +
          + +!! end +### Note: Above tests excludes the "{{NUMBEROFADMINS}}" magic word because it generates a MySQL error when included. + +!! test +Gallery +!! input + +image1.png | +image2.gif||||| + +image3| +image4 |300px| centre + image5.svg| http:///////// +[[x|xx]]]] +* image6 + +!! result +
          + + + + + + + + +!! end + +!! test +TODO: HTML Hex character encoding. +!! input +JavaScript +!! result +

          JavaScript +

          +!! end + +!! test +__FORCETOC__ override +!! input +__NEWSECTIONLINK__ +__FORCETOC__ +!! result +


          +

          +!! end + +!! test +ISBN code coverage +!! input +ISBN 983 987 +!! result +

          ISBN 983 987 +

          +!! end + +!! test +ISBN followed by 5 spaces +!! input +ISBN +!! result +

          ISBN +

          +!! end + +!! test +Double ISBN +!! options +disabled # Disabled until Bug 6560 resolved +!! input +ISBN ISBN 1234 +!! result +

          ISBN ISBN 1234 +

          +!! end + +!! test +Double RFC +!! input +RFC RFC 1234 +!! result +

          RFC RFC 1234 +

          +!! end + +!! test +Double RFC with a wiki link +!! input +RFC [[RFC 1234]] +!! result +

          RFC RFC 1234 +

          +!! end + +!! test +RFC code coverage +!! input +RFC 983 987 +!! result +

          RFC 983 987 +

          +!! end + +!! test +Centre-aligned image +!! input +[[Image:foobar.jpg|centre]] +!! result +
          + +!!end + +!! test +None-aligned image +!! input +[[Image:foobar.jpg|none]] +!! result +
          + +!!end + +!! test +Width + Height sized image (using px) (height is ignored) +!! input +[[Image:foobar.jpg|640x480px]] +!! result +

          +

          +!!end + +!! test +Another italics / bold test +!! input + ''' ''x' +!! result +
          ' x'
          +
          +!!end + +# Note the results may be incorrect, as parserTest output included this: +# XML error: Mismatched tag at byte 6120: +# ...
          +
          +
          + + + + + +!!end + # # # @@ -5473,3 +5861,4 @@ more tables math character entities and much more +Try for 100% code coverage diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl new file mode 100644 index 00000000..4a76b270 --- /dev/null +++ b/maintenance/postgres/compare_schemas.pl @@ -0,0 +1,181 @@ +#!/usr/bin/perl + +## Rough check that the base and postgres "tables.sql" are in sync +## Should be run from maintenance/postgres + +use strict; +use warnings; +use Data::Dumper; + +my @old = ("../tables.sql"); +my $new = "tables.sql"; + +## Read in exceptions and other metadata +my %ok; +while () { + next unless /^(\w+)\s*:\s*([^#]+)/; + my ($name,$val) = ($1,$2); + chomp $val; + if ($name eq 'RENAME') { + die "Invalid rename\n" unless $val =~ /(\w+)\s+(\w+)/; + $ok{OLD}{$1} = $2; + $ok{NEW}{$2} = $1; + next; + } + if ($name eq 'XFILE') { + push @old, $val; + next; + } + for (split(/\s+/ => $val)) { + $ok{$name}{$_} = 0; + } +} + +open my $newfh, "<", $new or die qq{Could not open $new: $!\n}; + +my $datatype = join '|' => qw( +bool +tinyint int bigint real float +tinytext mediumtext text char varchar +timestamp datetime +tinyblob mediumblob blob +); +$datatype .= q{|ENUM\([\"\w, ]+\)}; +$datatype = qr{($datatype)}; + +my $typeval = qr{(\(\d+\))?}; + +my $typeval2 = qr{ unsigned| binary| NOT NULL| NULL| auto_increment| default ['\-\d\w"]+| REFERENCES .+CASCADE}; + +my $indextype = join '|' => qw(INDEX KEY FULLTEXT), "PRIMARY KEY", "UNIQUE INDEX", "UNIQUE KEY"; +$indextype = qr{$indextype}; + +my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+}; + +my ($table,%old); +for my $old (@old) { + open my $oldfh, "<", $old or die qq{Could not open $old: $!\n}; + + while (<$oldfh>) { + next if /^\s*\-\-/ or /^\s+$/; + s/\s*\-\- [\w ]+$//; + chomp; + + if (/CREATE\s*TABLE/i) { + m{^CREATE TABLE /\*\$wgDBprefix\*/(\w+) \($} + or die qq{Invalid CREATE TABLE at line $. of $old\n}; + $table = $1; + $old{$table}{name}=$table; + } + elsif (/^\) TYPE=($tabletype);$/) { + $old{$table}{type}=$1; + } + elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) { + $old{$table}{column}{$1} = $2; + } + elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) { + $old{$table}{lc $1."_name"} = $2 ? $2 : ""; + $old{$table}{lc $1."pk_target"} = $3; + } + else { + die "Cannot parse line $. of $old:\n$_\n"; + } + } + close $oldfh; +} + +$datatype = join '|' => qw( +SMALLINT INTEGER BIGINT NUMERIC SERIAL +TEXT CHAR VARCHAR +BYTEA +TIMESTAMPTZ +CIDR +); +$datatype = qr{($datatype)}; +my %new; +my ($infunction,$inview,$inrule) = (0,0,0); +while (<$newfh>) { + next if /^\s*\-\-/ or /^\s*$/; + s/\s*\-\- [\w ']+$//; + next if /^BEGIN;/ or /^SET / or /^COMMIT;/; + next if /^CREATE SEQUENCE/; + next if /^CREATE(?: UNIQUE)? INDEX/; + next if /^CREATE FUNCTION/; + next if /^CREATE TRIGGER/ or /^ FOR EACH ROW/; + next if /^INSERT INTO/ or /^ VALUES \(/; + next if /^ALTER TABLE/; + chomp; + + if (/^\$mw\$;?$/) { + $infunction = $infunction ? 0 : 1; + next; + } + next if $infunction; + + next if /^CREATE VIEW/ and $inview = 1; + if ($inview) { + /;$/ and $inview = 0; + next; + } + + next if /^CREATE RULE/ and $inrule = 1; + if ($inrule) { + /;$/ and $inrule = 0; + next; + } + + if (/^CREATE TABLE "?(\w+)"? \($/) { + $table = $1; + $new{$table}{name}=$table; + } + elsif (/^\);$/) { + } + elsif (/^ (\w+) +$datatype/) { + $new{$table}{column}{$1} = $2; + } + else { + die "Cannot parse line $. of $new:\n$_\n"; + } +} +close $newfh; + +## Old but not new +for my $t (sort keys %old) { + if (!exists $new{$t} and !exists $ok{OLD}{$t}) { + print "Table not in $new: $t\n"; + next; + } + next if exists $ok{OLD}{$t} and !$ok{OLD}{$t}; + my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t; + my $oldcol = $old{$t}{column}; + my $newcol = $new{$newt}{column}; + for my $c (keys %$oldcol) { + if (!exists $newcol->{$c}) { + print "Column $t.$c not in new\n"; + next; + } + } + for my $c (keys %$newcol) { + if (!exists $oldcol->{$c}) { + print "Column $t.$c not in old\n"; + next; + } + } +} +## New but not old: +for (sort keys %new) { + if (!exists $old{$_} and !exists $ok{NEW}{$_}) { + print "Not in old: $_\n"; + next; + } +} + +__DATA__ +## Known exceptions +OLD: searchindex ## We use tsearch2 directly on the page table instead +OLD: archive ## This is a view due to the char(14) timestamp hack +RENAME: user mwuser ## Reserved word causing lots of problems +RENAME: text pagecontent ## Reserved word +NEW: archive2 ## The real archive table +NEW: mediawiki_version ## Just us, for now +XFILE: ../archives/patch-profiling.sql diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql index 5481a394..9ac329d8 100644 --- a/maintenance/postgres/tables.sql +++ b/maintenance/postgres/tables.sql @@ -11,7 +11,7 @@ BEGIN; SET client_min_messages = 'ERROR'; CREATE SEQUENCE user_user_id_seq MINVALUE 0 START WITH 0; -CREATE TABLE "user" ( +CREATE TABLE mwuser ( -- replace reserved word 'user' user_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('user_user_id_seq'), user_name TEXT NOT NULL UNIQUE, user_real_name TEXT, @@ -26,20 +26,20 @@ CREATE TABLE "user" ( user_touched TIMESTAMPTZ, user_registration TIMESTAMPTZ ); -CREATE INDEX user_email_token_idx ON "user" (user_email_token); +CREATE INDEX user_email_token_idx ON mwuser (user_email_token); -- Create a dummy user to satisfy fk contraints especially with revisions -INSERT INTO "user" VALUES - (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); +INSERT INTO mwuser + VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); CREATE TABLE user_groups ( - ug_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + ug_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, ug_group TEXT NOT NULL ); CREATE UNIQUE INDEX user_groups_unique ON user_groups (ug_user, ug_group); CREATE TABLE user_newtalk ( - user_id INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + user_id INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, user_ip CIDR NULL ); CREATE INDEX user_newtalk_id_idx ON user_newtalk (user_id); @@ -69,18 +69,24 @@ CREATE INDEX page_project_title ON page (page_title) WHERE page_namespace = CREATE INDEX page_random_idx ON page (page_random); CREATE INDEX page_len_idx ON page (page_len); --- Create a dummy page to satisfy fk contraints where a page_id of "0" is added -INSERT INTO page (page_id,page_namespace,page_title,page_random,page_latest,page_len) -VALUES (0,0,'',0.0,0,0); +CREATE FUNCTION page_deleted() RETURNS TRIGGER LANGUAGE plpgsql AS +$mw$ +BEGIN +DELETE FROM recentchanges WHERE rc_namespace = OLD.page_namespace AND rc_title = OLD.page_title; +RETURN NULL; +END; +$mw$; +CREATE TRIGGER page_deleted AFTER DELETE ON page + FOR EACH ROW EXECUTE PROCEDURE page_deleted(); CREATE SEQUENCE rev_rev_id_val; CREATE TABLE revision ( rev_id INTEGER NOT NULL UNIQUE DEFAULT nextval('rev_rev_id_val'), - rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE SET NULL, + rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE CASCADE, rev_text_id INTEGER NULL, -- FK rev_comment TEXT, - rev_user INTEGER NOT NULL REFERENCES "user"(user_id), + rev_user INTEGER NOT NULL REFERENCES mwuser(user_id), rev_user_text TEXT NOT NULL, rev_timestamp TIMESTAMPTZ NOT NULL, rev_minor_edit CHAR NOT NULL DEFAULT '0', @@ -93,19 +99,19 @@ CREATE INDEX rev_user_text_idx ON revision (rev_user_text); CREATE SEQUENCE text_old_id_val; -CREATE TABLE "text" ( +CREATE TABLE pagecontent ( -- replaces reserved word 'text' old_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('text_old_id_val'), old_text TEXT, old_flags TEXT ); -CREATE TABLE archive ( +CREATE TABLE archive2 ( ar_namespace SMALLINT NOT NULL, ar_title TEXT NOT NULL, ar_text TEXT, ar_comment TEXT, - ar_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + ar_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, ar_user_text TEXT NOT NULL, ar_timestamp TIMESTAMPTZ NOT NULL, ar_minor_edit CHAR NOT NULL DEFAULT '0', @@ -113,7 +119,22 @@ CREATE TABLE archive ( ar_rev_id INTEGER, ar_text_id INTEGER ); -CREATE INDEX archive_name_title_timestamp ON archive (ar_namespace,ar_title,ar_timestamp); +CREATE INDEX archive_name_title_timestamp ON archive2 (ar_namespace,ar_title,ar_timestamp); + +-- This is the easiest way to work around the char(15) timestamp hack without modifying PHP code +CREATE VIEW archive AS +SELECT + ar_namespace, ar_title, ar_text, ar_comment, ar_user, ar_user_text, + ar_minor_edit, ar_flags, ar_rev_id, ar_text_id, + TO_CHAR(ar_timestamp, 'YYYYMMDDHH24MISS') AS ar_timestamp +FROM archive2; + +CREATE RULE archive_insert AS ON INSERT TO archive +DO INSTEAD INSERT INTO archive2 VALUES ( + NEW.ar_namespace, NEW.ar_title, NEW.ar_text, NEW.ar_comment, NEW.ar_user, NEW.ar_user_text, + TO_DATE(NEW.ar_timestamp, 'YYYYMMDDHH24MISS'), + NEW.ar_minor_edit, NEW.ar_flags, NEW.ar_rev_id, NEW.ar_text_id +); CREATE TABLE pagelinks ( @@ -121,7 +142,7 @@ CREATE TABLE pagelinks ( pl_namespace SMALLINT NOT NULL, pl_title TEXT NOT NULL ); -CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_namespace,pl_title,pl_from); +CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_from,pl_namespace,pl_title); CREATE TABLE templatelinks ( tl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, @@ -180,16 +201,18 @@ CREATE TABLE hitcounter ( CREATE SEQUENCE ipblocks_ipb_id_val; CREATE TABLE ipblocks ( - ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'), - ipb_address CIDR NULL, - ipb_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, - ipb_by INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, - ipb_reason TEXT NOT NULL, - ipb_timestamp TIMESTAMPTZ NOT NULL, - ipb_auto CHAR NOT NULL DEFAULT '0', - ipb_expiry TIMESTAMPTZ NOT NULL, - ipb_range_start TEXT, - ipb_range_end TEXT + ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'), + ipb_address CIDR NULL, + ipb_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, + ipb_by INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, + ipb_reason TEXT NOT NULL, + ipb_timestamp TIMESTAMPTZ NOT NULL, + ipb_auto CHAR NOT NULL DEFAULT '0', + ipb_anon_only CHAR NOT NULL DEFAULT '0', + ipb_create_account CHAR NOT NULL DEFAULT '1', + ipb_expiry TIMESTAMPTZ NOT NULL, + ipb_range_start TEXT, + ipb_range_end TEXT ); CREATE INDEX ipb_address ON ipblocks (ipb_address); CREATE INDEX ipb_user ON ipblocks (ipb_user); @@ -198,16 +221,16 @@ CREATE INDEX ipb_range ON ipblocks (ipb_range_start,ipb_range_end); CREATE TABLE image ( img_name TEXT NOT NULL PRIMARY KEY, - img_size SMALLINT NOT NULL, - img_width SMALLINT NOT NULL, - img_height SMALLINT NOT NULL, + img_size INTEGER NOT NULL, + img_width INTEGER NOT NULL, + img_height INTEGER NOT NULL, img_metadata TEXT, img_bits SMALLINT, img_media_type TEXT, img_major_mime TEXT DEFAULT 'unknown', img_minor_mime TEXT DEFAULT 'unknown', img_description TEXT NOT NULL, - img_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + img_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, img_user_text TEXT NOT NULL, img_timestamp TIMESTAMPTZ ); @@ -217,12 +240,12 @@ CREATE INDEX img_timestamp_idx ON image (img_timestamp); CREATE TABLE oldimage ( oi_name TEXT NOT NULL REFERENCES image(img_name), oi_archive_name TEXT NOT NULL, - oi_size SMALLINT NOT NULL, - oi_width SMALLINT NOT NULL, - oi_height SMALLINT NOT NULL, + oi_size INTEGER NOT NULL, + oi_width INTEGER NOT NULL, + oi_height INTEGER NOT NULL, oi_bits SMALLINT NOT NULL, oi_description TEXT, - oi_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + oi_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, oi_user_text TEXT NOT NULL, oi_timestamp TIMESTAMPTZ NOT NULL ); @@ -235,7 +258,7 @@ CREATE TABLE filearchive ( fa_archive_name TEXT, fa_storage_group VARCHAR(16), fa_storage_key CHAR(64), - fa_deleted_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + fa_deleted_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, fa_deleted_timestamp TIMESTAMPTZ NOT NULL, fa_deleted_reason TEXT, fa_size SMALLINT NOT NULL, @@ -247,7 +270,7 @@ CREATE TABLE filearchive ( fa_major_mime TEXT DEFAULT 'unknown', fa_minor_mime TEXT DEFAULT 'unknown', fa_description TEXT NOT NULL, - fa_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + fa_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, fa_user_text TEXT NOT NULL, fa_timestamp TIMESTAMPTZ ); @@ -262,7 +285,7 @@ CREATE TABLE recentchanges ( rc_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('rc_rc_id_seq'), rc_timestamp TIMESTAMPTZ NOT NULL, rc_cur_time TIMESTAMPTZ NOT NULL, - rc_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + rc_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, rc_user_text TEXT NOT NULL, rc_namespace SMALLINT NOT NULL, rc_title TEXT NOT NULL, @@ -270,7 +293,7 @@ CREATE TABLE recentchanges ( rc_minor CHAR NOT NULL DEFAULT '0', rc_bot CHAR NOT NULL DEFAULT '0', rc_new CHAR NOT NULL DEFAULT '0', - rc_cur_id INTEGER NOT NULL REFERENCES page(page_id), + rc_cur_id INTEGER NULL REFERENCES page(page_id) ON DELETE SET NULL, rc_this_oldid INTEGER NOT NULL, rc_last_oldid INTEGER NOT NULL, rc_type CHAR NOT NULL DEFAULT '0', @@ -287,7 +310,7 @@ CREATE INDEX rc_ip ON recentchanges (rc_ip); CREATE TABLE watchlist ( - wl_user INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + wl_user INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, wl_namespace SMALLINT NOT NULL DEFAULT 0, wl_title TEXT NOT NULL, wl_notificationtimestamp TIMESTAMPTZ @@ -343,7 +366,7 @@ CREATE TABLE logging ( log_type TEXT NOT NULL, log_action TEXT NOT NULL, log_timestamp TIMESTAMPTZ NOT NULL, - log_user INTEGER REFERENCES "user"(user_id) ON DELETE SET NULL, + log_user INTEGER REFERENCES mwuser(user_id) ON DELETE SET NULL, log_namespace SMALLINT NOT NULL, log_title TEXT NOT NULL, log_comment TEXT, @@ -383,38 +406,71 @@ CREATE FUNCTION ts2_page_title() RETURNS TRIGGER LANGUAGE plpgsql AS $mw$ BEGIN IF TG_OP = 'INSERT' THEN - NEW.titlevector = to_tsvector(NEW.page_title); + NEW.titlevector = to_tsvector('default',NEW.page_title); ELSIF NEW.page_title != OLD.page_title THEN - NEW.titlevector := to_tsvector(NEW.page_title); + NEW.titlevector := to_tsvector('default',NEW.page_title); END IF; RETURN NEW; END; $mw$; CREATE TRIGGER ts2_page_title BEFORE INSERT OR UPDATE ON page -FOR EACH ROW EXECUTE PROCEDURE ts2_page_title(); + FOR EACH ROW EXECUTE PROCEDURE ts2_page_title(); -ALTER TABLE text ADD textvector tsvector; -CREATE INDEX ts2_page_text ON text USING gist(textvector); +ALTER TABLE pagecontent ADD textvector tsvector; +CREATE INDEX ts2_page_text ON pagecontent USING gist(textvector); CREATE FUNCTION ts2_page_text() RETURNS TRIGGER LANGUAGE plpgsql AS $mw$ BEGIN IF TG_OP = 'INSERT' THEN - NEW.textvector = to_tsvector(NEW.old_text); + NEW.textvector = to_tsvector('default',NEW.old_text); ELSIF NEW.old_text != OLD.old_text THEN - NEW.textvector := to_tsvector(NEW.old_text); + NEW.textvector := to_tsvector('default',NEW.old_text); END IF; RETURN NEW; END; $mw$; -CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON text -FOR EACH ROW EXECUTE PROCEDURE ts2_page_text(); +CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON pagecontent + FOR EACH ROW EXECUTE PROCEDURE ts2_page_text(); -CREATE OR REPLACE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS +CREATE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS $mw$ INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES ($1,$2,$3); SELECT 1; $mw$; + +-- This table is not used unless profiling is turned on +CREATE TABLE profiling ( + pf_count INTEGER NOT NULL DEFAULT 0, + pf_time NUMERIC(18,10) NOT NULL DEFAULT 0, + pf_name TEXT NOT NULL, + pf_server TEXT NULL +); +CREATE UNIQUE INDEX pf_name_server ON profiling (pf_name, pf_server); + + +CREATE TABLE mediawiki_version ( + type TEXT NOT NULL, + mw_version TEXT NOT NULL, + notes TEXT NULL, + + pg_version TEXT NULL, + pg_dbname TEXT NULL, + pg_user TEXT NULL, + pg_port TEXT NULL, + mw_schema TEXT NULL, + ts2_schema TEXT NULL, + ctype TEXT NULL, + + sql_version TEXT NULL, + sql_date TEXT NULL, + cdate TIMESTAMPTZ NOT NULL DEFAULT now() +); + +INSERT INTO mediawiki_version (type,mw_version,sql_version,sql_date) + VALUES ('Creation','??','$LastChangedRevision: 16747 $','$LastChangedDate: 2006-10-02 17:55:26 -0700 (Mon, 02 Oct 2006) $'); + + COMMIT; diff --git a/maintenance/postgres/wp_mysql2postgres.pl b/maintenance/postgres/wp_mysql2postgres.pl new file mode 100644 index 00000000..788d9e0b --- /dev/null +++ b/maintenance/postgres/wp_mysql2postgres.pl @@ -0,0 +1,400 @@ +#!/usr/bin/perl + +## Convert data from a MySQL mediawiki database into a Postgres mediawiki database +## svn: $Id: wp_mysql2postgres.pl 16088 2006-08-16 01:12:20Z greg $ + +use strict; +use warnings; +use Data::Dumper; +use Getopt::Long; + +use vars qw(%table %tz %special @torder $COM); +my $VERSION = "1.0"; + +## The following options can be changed via command line arguments: +my $MYSQLDB = 'wikidb'; +my $MYSQLUSER = 'wikiuser'; + +## If the following are zero-length, we omit their arguments entirely: +my $MYSQLHOST = ''; +my $MYSQLPASSWORD = ''; +my $MYSQLSOCKET = ''; + +## Name of the dump file created +my $MYSQLDUMPFILE = "mediawiki_upgrade.pg"; + +## How verbose should this script be (0, 1, or 2) +my $verbose = 0; + +my $USAGE = " +Usage: $0 [OPTION]... +Convert a MediaWiki schema from MySQL to Postgres +Example: $0 --db=wikidb --user=wikiuser --pass=sushi +Options: + db Name of the MySQL database + user MySQL database username + pass MySQL database password + host MySQL database host + socket MySQL database socket + verbose Verbosity, increases with multiple uses +"; + +GetOptions + ( + "db=s" => \$MYSQLDB, + "user=s" => \$MYSQLUSER, + "pass=s" => \$MYSQLPASSWORD, + "host=s" => \$MYSQLHOST, + "socket=s" => \$MYSQLSOCKET, + "verbose+" => \$verbose + ); + +## The Postgres schema file: should not be changed +my $PG_SCHEMA = "tables.sql"; + +## What version we default to when we can't parse the old schema +my $MW_DEFAULT_VERSION = '1.8'; + +## Try and find a working version of mysqldump +$verbose and warn "Locating the mysqldump executable\n"; +my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump"); +my $MYSQLDUMP; +for my $mytry (@MYSQLDUMP) { + next if ! -e $mytry; + -x $mytry or die qq{Not an executable file: "$mytry"\n}; + my $version = qx{$mytry -V}; + $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n}; + $MYSQLDUMP = $mytry; +} +$MYSQLDUMP or die qq{Could not find the mysqldump program\n}; + +## Flags we use for mysqldump +my @MYSQLDUMPARGS = qw( +--skip-lock-tables +--complete-insert +--skip-extended-insert +--skip-add-drop-table +--skip-add-locks +--skip-disable-keys +--skip-set-charset +--skip-comments +--skip-quote-names +); + + +$verbose and warn "Checking that mysqldump can handle our flags\n"; +## Make sure this version can handle all the flags we want. +## Combine with user dump below +my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS; +## Argh. Any way to make this work on Win32? +my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1}; +if ($version =~ /unknown option/) { + die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n}; +} + +push @MYSQLDUMPARGS, "--user=$MYSQLUSER"; +length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD"; +length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST"; + +## Open the dump file to hold the mysqldump output +open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n}; +$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; + +open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB; +my $oldselect = select $mdump; + +print while <$mfork2>; + +## Slurp in the current schema +my $current_schema; +seek $mdump, 0, 0; +{ + local $/; + $current_schema = <$mdump>; +} +seek $mdump, 0, 0; +truncate $mdump, 0; + +warn qq{Trying to determine database version...\n} if $verbose; + +my $current_version = 0; +if ($current_schema =~ /CREATE TABLE \S+cur /) { + $current_version = '1.3'; +} +elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) { + $current_version = '1.4'; +} +elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) { + $current_version = '1.5'; +} +elsif ($current_schema !~ /CREATE TABLE \S+validate /) { + $current_version = '1.6'; +} +elsif ($current_schema !~ /ipb_auto tinyint/) { + $current_version = '1.7'; +} +else { + $current_version = '1.8'; +} + +if (!$current_version) { + warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n}; + $current_version = $MW_DEFAULT_VERSION; +} + +## Check for a table prefix: +my $table_prefix = ''; +if ($current_version =~ /CREATE TABLE (\S+)archive /) { + $table_prefix = $1; +} + +warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose; +warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix; + +$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; +my $now = scalar localtime(); +my $conninfo = ''; +$MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST"; +$MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET"; + +print qq{ +-- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema +-- Performed by the program: $0 +-- Version: $VERSION (subversion }.q{$LastChangedRevision: 16088 $}.qq{) +-- Author: Greg Sabino Mullane Comments welcome +-- +-- This file was created: $now +-- Executable used: $MYSQLDUMP +-- Connection information: +-- database: $MYSQLDB +-- user: $MYSQLUSER$conninfo + +-- This file can be imported manually with psql like so: +-- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename +-- This will overwrite any existing MediaWiki information, so be careful + + +}; + +warn qq{Reading in the Postgres schema information\n} if $verbose; +open my $schema, "<", $PG_SCHEMA + or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n}; +my $t; +while (<$schema>) { + if (/CREATE TABLE\s+(\S+)/) { + $t = $1; + $table{$t}={}; + } + elsif (/^ +(\w+)\s+TIMESTAMP/) { + $tz{$t}{$1}++; + } + elsif (/REFERENCES\s*([^( ]+)/) { + my $ref = $1; + exists $table{$ref} or die qq{No parent table $ref found for $t\n}; + $table{$t}{$ref}++; + } +} +close $schema; + +## Read in special cases and table/version information +$verbose and warn qq{Reading in schema exception information\n}; +my %version_tables; +while () { + if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) { + my $list = join '|' => split /\s+/ => $2; + $version_tables{$1} = qr{\b$list\b}; + next; + } + next unless /^(\w+)\s*(.*)/; + $special{$1} = $2||''; + $special{$2} = $1 if length $2; +} + +## Determine the order of tables based on foreign key constraints +$verbose and warn qq{Figuring out order of tables to dump\n}; +my %dumped; +my $bail = 0; +{ + my $found=0; + T: for my $t (sort keys %table) { + next if exists $dumped{$t} and $dumped{$t} >= 1; + $found=1; + for my $dep (sort keys %{$table{$t}}) { + next T if ! exists $dumped{$dep} or $dumped{$dep} < 0; + } + $dumped{$t} = -1 if ! exists $dumped{$t}; + ## Skip certain tables that are not imported + next if exists $special{$t} and !$special{$t}; + push @torder, $special{$t} || $t; + } + last if !$found; + push @torder, "---"; + for (values %dumped) { $_+=2; } + die "Too many loops!\n" if $bail++ > 1000; + redo; +} + +## Prepare the Postgres database for the move +$verbose and warn qq{Writing Postgres transformation information\n}; + +print "\n-- Empty out all existing tables\n"; +$verbose and warn qq{Writing truncates to empty existing tables\n}; +for my $t (@torder) { + next if $t eq '---'; + my $tname = $special{$t}||$t; + printf qq{TRUNCATE TABLE %-18s CASCADE;\n}, qq{"$tname"}; +} +print "\n\n"; + +print qq{-- Rename the "text" table\n}; +print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n}; + +print qq{-- Allow rc_ip to contain empty string, will convert at end\n}; +print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n}; + +print "-- Changing all timestamp fields to handle raw integers\n"; +for my $t (sort keys %tz) { + next if $t eq "archive2"; + for my $c (sort keys %{$tz{$t}}) { + printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c; + } +} +print "\n"; + +print qq{ +INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10); +}; + +## If we have a table _prefix, we need to temporarily rename all of our Postgres +## tables temporarily for the import. Perhaps consider making this an auto-schema +## thing in the future. +if (length $table_prefix) { + print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n}; + for my $t (@torder) { + next if $t eq '---'; + my $tname = $special{$t}||$t; + printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"}; + } +} + + +## Try and dump the ill-named "user" table: +## We do this table alone because "user" is a reserved word. +print qq{ + +SET escape_string_warning TO 'off'; +\\o /dev/null + +-- Postgres uses a table name of "mwuser" instead of "user" + +-- Create a dummy user to satisfy fk contraints especially with revisions +SELECT setval('user_user_id_seq',0,'false'); +INSERT INTO mwuser + VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); + +}; + +push @MYSQLDUMPARGS, "--no-create-info"; + +$verbose and warn qq{Dumping "user" table\n}; +$verbose > 2 and warn Dumper \@MYSQLDUMPARGS; +my $usertable = "${table_prefix}user"; +open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable; +## Unfortunately, there is no easy way to catch errors +my $numusers = 0; +while (<$mfork>) { + ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/; +} +close $mfork; +if ($numusers < 1) { + warn qq{No users found, probably a connection error.\n}; + print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n}; + close $mdump; + exit; +} +print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n"; + +warn qq{Dumping all other tables from the MySQL schema\n} if $verbose; + +## Dump the rest of the tables, in chunks based on constraints +## We do not need the user table: +my @dumplist = grep { $_ ne 'user'} @torder; +my @alist; +{ + undef @alist; + PICKATABLE: { + my $tname = shift @dumplist; + ## XXX Make this dynamic below + for my $ver (sort {$b <=> $a } keys %version_tables) { + redo PICKATABLE if $tname =~ $version_tables{$ver}; + } + $tname = "${table_prefix}$tname" if length $table_prefix; + push @alist, $tname; + pop @alist and last if index($alist[-1],'---') >= 0; + redo if @dumplist; + } + + ## Dump everything else + open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist; + print while <$mfork2>; + close $mfork2; + warn qq{Finished dumping from MySQL\n} if $verbose; + + redo if @dumplist; +} + +warn qq{Writing information to return Postgres database to normal\n} if $verbose; +print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n}; +print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n}; +print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n}; + +## Return tables to their original names if a table prefix was used. +if (length $table_prefix) { + print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n}; + my $maxsize = 18; + for (@torder) { + $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize; + } + for my $t (@torder) { + next if $t eq '---' or $t eq 'text'; + my $tname = $special{$t}||$t; + printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"}; + } +} + +print qq{\n\n--Returning timestamps to normal\n}; +for my $t (sort keys %tz) { + next if $t eq "archive2"; + for my $c (sort keys %{$tz{$t}}) { + printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n". + " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c; + } +} + +## Finally, make a record in the mediawiki_version table about this import +print qq{ +INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??', +'Imported from file created on $now. Old version: $current_version'); +}; + + +print "\\o\n\n-- End of dump\n\n"; +select $oldselect; +close $mdump; +exit; + + +__DATA__ +## Known remappings: either indicate the MySQL name, +## or leave blank if it should be skipped +pagecontent text +mwuser user +mediawiki_version +archive2 +profiling +objectcache + +## Which tables to ignore depending on the version +VERSION 1.5: trackback +VERSION 1.6: externallinks job templatelinks transcache +VERSION 1.7: filearchive langlinks querycache_info diff --git a/maintenance/rebuildImages.php b/maintenance/rebuildImages.php index 45477097..38b89a48 100644 --- a/maintenance/rebuildImages.php +++ b/maintenance/rebuildImages.php @@ -125,8 +125,8 @@ class ImageBuilder extends FiveUpgrade { // Fill in the new image info fields $info = $this->imageInfo( $row->img_name ); - global $wgMemc, $wgDBname; - $key = $wgDBname . ":Image:" . md5( $row->img_name ); + global $wgMemc; + $key = wfMemcKey( "Image", md5( $row->img_name ) ); $wgMemc->delete( $key ); return array( diff --git a/maintenance/refreshImageCount.php b/maintenance/refreshImageCount.php index 15ce2b91..88ac3c52 100644 --- a/maintenance/refreshImageCount.php +++ b/maintenance/refreshImageCount.php @@ -10,7 +10,7 @@ $dbw =& wfGetDB( DB_MASTER ); // Load the current value from the master $count = $dbw->selectField( 'site_stats', 'ss_images' ); -echo "$wgDBname: forcing ss_images to $count\n"; +echo wfWikiID().": forcing ss_images to $count\n"; // First set to NULL so that it changes on the master $dbw->update( 'site_stats', @@ -22,4 +22,4 @@ $dbw->update( 'site_stats', array( 'ss_images' => $count ), array( 'ss_row_id' => 1 ) ); -?> \ No newline at end of file +?> diff --git a/maintenance/runJobs.php b/maintenance/runJobs.php index d72addc7..343cda8a 100644 --- a/maintenance/runJobs.php +++ b/maintenance/runJobs.php @@ -1,13 +1,22 @@ selectField( 'job', 'count(*)', '', 'runJobs.php' ) ) { while ( false != ($job = Job::pop()) ) { wfWaitForSlaves( 5 ); @@ -15,6 +24,9 @@ while ( $dbw->selectField( 'job', 'count(*)', '', 'runJobs.php' ) ) { if ( !$job->run() ) { print "Error: {$job->error}\n"; } + if ( $maxJobs && ++$n > $maxJobs ) { + break 2; + } } } ?> diff --git a/maintenance/stats.php b/maintenance/stats.php index 8ebc3823..bb19e671 100644 --- a/maintenance/stats.php +++ b/maintenance/stats.php @@ -2,8 +2,8 @@ require_once('commandLine.inc'); print "Requests\n"; -$session = intval($wgMemc->get("$wgDBname:stats:request_with_session")); -$noSession = intval($wgMemc->get("$wgDBname:stats:request_without_session")); +$session = intval($wgMemc->get(wfMemcKey('stats','request_with_session'))); +$noSession = intval($wgMemc->get(wfMemcKey('stats','request_without_session'))); $total = $session + $noSession; printf( "with session: %-10d %6.2f%%\n", $session, $session/$total*100 ); printf( "without session: %-10d %6.2f%%\n", $noSession, $noSession/$total*100 ); @@ -11,11 +11,11 @@ printf( "total: %-10d %6.2f%%\n", $total, 100 ); print "\nParser cache\n"; -$hits = intval($wgMemc->get("$wgDBname:stats:pcache_hit")); -$invalid = intval($wgMemc->get("$wgDBname:stats:pcache_miss_invalid")); -$expired = intval($wgMemc->get("$wgDBname:stats:pcache_miss_expired")); -$absent = intval($wgMemc->get("$wgDBname:stats:pcache_miss_absent")); -$stub = intval($wgMemc->get("$wgDBname:stats:pcache_miss_stub")); +$hits = intval($wgMemc->get(wfMemcKey('stats','pcache_hit'))); +$invalid = intval($wgMemc->get(wfMemcKey('stats','pcache_miss_invalid'))); +$expired = intval($wgMemc->get(wfMemcKey('stats','pcache_miss_expired'))); +$absent = intval($wgMemc->get(wfMemcKey('stats','pcache_miss_absent'))); +$stub = intval($wgMemc->get(wfMemcKey('stats','pcache_miss_stub'))); $total = $hits + $invalid + $expired + $absent + $stub; printf( "hits: %-10d %6.2f%%\n", $hits, $hits/$total*100 ); printf( "invalid: %-10d %6.2f%%\n", $invalid, $invalid/$total*100 ); @@ -24,18 +24,18 @@ printf( "absent: %-10d %6.2f%%\n", $absent, $absent/$total*100 ); printf( "stub threshold: %-10d %6.2f%%\n", $stub, $stub/$total*100 ); printf( "total: %-10d %6.2f%%\n", $total, 100 ); -$hits = intval($wgMemc->get("$wgDBname:stats:image_cache_hit")); -$misses = intval($wgMemc->get("$wgDBname:stats:image_cache_miss")); -$updates = intval($wgMemc->get("$wgDBname:stats:image_cache_update")); +$hits = intval($wgMemc->get(wfMemcKey('stats','image_cache_hit'))); +$misses = intval($wgMemc->get(wfMemcKey('stats','image_cache_miss'))); +$updates = intval($wgMemc->get(wfMemcKey('stats','image_cache_update'))); $total = $hits + $misses; print("\nImage cache\n"); printf( "hits: %-10d %6.2f%%\n", $hits, $hits/$total*100 ); printf( "misses: %-10d %6.2f%%\n", $misses, $misses/$total*100 ); printf( "updates: %-10d\n", $updates ); -$hits = intval($wgMemc->get("$wgDBname:stats:diff_cache_hit")); -$misses = intval($wgMemc->get("$wgDBname:stats:diff_cache_miss")); -$uncacheable = intval($wgMemc->get("$wgDBname:stats:diff_uncacheable")); +$hits = intval($wgMemc->get(wfMemcKey('stats','diff_cache_hit'))); +$misses = intval($wgMemc->get(wfMemcKey('stats','diff_cache_miss'))); +$uncacheable = intval($wgMemc->get(wfMemcKey('stats','diff_uncacheable'))); $total = $hits + $misses + $uncacheable; print("\nDiff cache\n"); printf( "hits: %-10d %6.2f%%\n", $hits, $hits/$total*100 ); diff --git a/maintenance/storage/checkStorage.php b/maintenance/storage/checkStorage.php index a83d2744..579954d5 100644 --- a/maintenance/storage/checkStorage.php +++ b/maintenance/storage/checkStorage.php @@ -1,468 +1,468 @@ -check( $fix, $xml ); -} - - -//---------------------------------------------------------------------------------- - -class CheckStorage -{ - var $oldIdMap, $errors; - var $dbStore = null; - - var $errorDescriptions = array( - 'restore text' => 'Damaged text, need to be restored from a backup', - 'restore revision' => 'Damaged revision row, need to be restored from a backup', - 'unfixable' => 'Unexpected errors with no automated fixing method', - 'fixed' => 'Errors already fixed', - 'fixable' => 'Errors which would already be fixed if --fix was specified', - ); - - function check( $fix = false, $xml = '' ) { - $fname = 'checkStorage'; - $dbr =& wfGetDB( DB_SLAVE ); - if ( $fix ) { - $dbw =& wfGetDB( DB_MASTER ); - print "Checking, will fix errors if possible...\n"; - } else { - print "Checking...\n"; - } - $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname ); - $chunkSize = 1000; - $flagStats = array(); - $objectStats = array(); - $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' ); - $this->errors = array( - 'restore text' => array(), - 'restore revision' => array(), - 'unfixable' => array(), - 'fixed' => array(), - 'fixable' => array(), - ); - - for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) { - $chunkEnd = $chunkStart + $chunkSize - 1; - //print "$chunkStart of $maxRevId\n"; - - // Fetch revision rows - $this->oldIdMap = array(); - $dbr->ping(); - $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ), - array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { - $this->oldIdMap[$row->rev_id] = $row->rev_text_id; - } - $dbr->freeResult( $res ); - - if ( !count( $this->oldIdMap ) ) { - continue; - } - - // Fetch old_flags - $missingTextRows = array_flip( $this->oldIdMap ); - $externalRevs = array(); - $objectRevs = array(); - $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ), - 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { - $flags = $row->old_flags; - $id = $row->old_id; - - // Create flagStats row if it doesn't exist - $flagStats = $flagStats + array( $flags => 0 ); - // Increment counter - $flagStats[$flags]++; - - // Not missing - unset( $missingTextRows[$row->old_id] ); - - // Check for external or object - if ( $flags == '' ) { - $flagArray = array(); - } else { - $flagArray = explode( ',', $flags ); - } - if ( in_array( 'external', $flagArray ) ) { - $externalRevs[] = $id; - } elseif ( in_array( 'object', $flagArray ) ) { - $objectRevs[] = $id; - } - - // Check for unrecognised flags - if ( $flags == '0' ) { - // This is a known bug from 2004 - // It's safe to just erase the old_flags field - if ( $fix ) { - $this->error( 'fixed', "Warning: old_flags set to 0", $id ); - $dbw->ping(); - $dbw->update( 'text', array( 'old_flags' => '' ), - array( 'old_id' => $id ), $fname ); - echo "Fixed\n"; - } else { - $this->error( 'fixable', "Warning: old_flags set to 0", $id ); - } - } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) { - $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id ); - } - } - $dbr->freeResult( $res ); - - // Output errors for any missing text rows - foreach ( $missingTextRows as $oldId => $revId ) { - $this->error( 'restore revision', "Error: missing text row", $oldId ); - } - - // Verify external revisions - $externalConcatBlobs = array(); - $externalNormalBlobs = array(); - if ( count( $externalRevs ) ) { - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), - array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { - $urlParts = explode( '://', $row->old_text, 2 ); - if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) { - $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id ); - continue; - } - list( $proto, $path ) = $urlParts; - if ( $proto != 'DB' ) { - $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id ); - continue; - } - $path = explode( '/', $row->old_text ); - $cluster = $path[2]; - $id = $path[3]; - if ( isset( $path[4] ) ) { - $externalConcatBlobs[$cluster][$id][] = $row->old_id; - } else { - $externalNormalBlobs[$cluster][$id][] = $row->old_id; - } - } - $dbr->freeResult( $res ); - } - - // Check external concat blobs for the right header - $this->checkExternalConcatBlobs( $externalConcatBlobs ); - - // Check external normal blobs for existence - if ( count( $externalNormalBlobs ) ) { - if ( is_null( $this->dbStore ) ) { - $this->dbStore = new ExternalStoreDB; - } - foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) { - $blobIds = array_keys( $xBlobIds ); - $extDb =& $this->dbStore->getSlave( $cluster ); - $blobsTable = $this->dbStore->getTable( $extDb ); - $res = $extDb->select( $blobsTable, - array( 'blob_id' ), - array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname ); - while ( $row = $extDb->fetchObject( $res ) ) { - unset( $xBlobIds[$row->blob_id] ); - } - $extDb->freeResult( $res ); - // Print errors for missing blobs rows - foreach ( $xBlobIds as $blobId => $oldId ) { - $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId ); - } - } - } - - // Check local objects - $dbr->ping(); - $concatBlobs = array(); - $curIds = array(); - if ( count( $objectRevs ) ) { - $headerLength = 300; - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), - array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { - $oldId = $row->old_id; - if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) { - $this->error( 'restore text', "Error: invalid object header", $oldId ); - continue; - } - - $className = strtolower( $matches[2] ); - if ( strlen( $className ) != $matches[1] ) { - $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId ); - continue; - } - - $objectStats = $objectStats + array( $className => 0 ); - $objectStats[$className]++; - - switch ( $className ) { - case 'concatenatedgziphistoryblob': - // Good - break; - case 'historyblobstub': - case 'historyblobcurstub': - if ( strlen( $row->header ) == $headerLength ) { - $this->error( 'unfixable', "Error: overlong stub header", $oldId ); - continue; - } - $stubObj = unserialize( $row->header ); - if ( !is_object( $stubObj ) ) { - $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId ); - continue; - } - if ( $className == 'historyblobstub' ) { - $concatBlobs[$stubObj->mOldId][] = $oldId; - } else { - $curIds[$stubObj->mCurId][] = $oldId; - } - break; - default: - $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId ); - } - } - $dbr->freeResult( $res ); - } - - // Check local concat blob validity - $externalConcatBlobs = array(); - if ( count( $concatBlobs ) ) { - $headerLength = 300; - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), - array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { - $flags = explode( ',', $row->old_flags ); - if ( in_array( 'external', $flags ) ) { - // Concat blob is in external storage? - if ( in_array( 'object', $flags ) ) { - $urlParts = explode( '/', $row->header ); - if ( $urlParts[0] != 'DB:' ) { - $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id ); - } else { - $cluster = $urlParts[2]; - $id = $urlParts[3]; - if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) { - $externalConcatBlobs[$cluster][$id] = array(); - } - $externalConcatBlobs[$cluster][$id] = array_merge( - $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id] - ); - } - } else { - $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}", - $concatBlobs[$row->old_id] ); - } - } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) { - $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}", - $concatBlobs[$row->old_id] ); - } # else good - - unset( $concatBlobs[$row->old_id] ); - } - $dbr->freeResult( $res ); - } - - // Check targets of unresolved stubs - $this->checkExternalConcatBlobs( $externalConcatBlobs ); - - // next chunk - } - - print "\n\nErrors:\n"; - foreach( $this->errors as $name => $errors ) { - if ( count( $errors ) ) { - $description = $this->errorDescriptions[$name]; - echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n"; - } - } - - if ( count( $this->errors['restore text'] ) && $fix ) { - if ( (string)$xml !== '' ) { - $this->restoreText( array_keys( $this->errors['restore text'] ), $xml ); - } else { - echo "Can't fix text, no XML backup specified\n"; - } - } - - print "\nFlag statistics:\n"; - $total = array_sum( $flagStats ); - foreach ( $flagStats as $flag => $count ) { - printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 ); - } - print "\nLocal object statistics:\n"; - $total = array_sum( $objectStats ); - foreach ( $objectStats as $className => $count ) { - printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 ); - } - } - - - function error( $type, $msg, $ids ) { - if ( is_array( $ids ) && count( $ids ) == 1 ) { - $ids = reset( $ids ); - } - if ( is_array( $ids ) ) { - $revIds = array(); - foreach ( $ids as $id ) { - $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) ); - } - print "$msg in text rows " . implode( ', ', $ids ) . - ", revisions " . implode( ', ', $revIds ) . "\n"; - } else { - $id = $ids; - $revIds = array_keys( $this->oldIdMap, $id ); - if ( count( $revIds ) == 1 ) { - print "$msg in old_id $id, rev_id {$revIds[0]}\n"; - } else { - print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n"; - } - } - $this->errors[$type] = $this->errors[$type] + array_flip( $revIds ); - } - - function checkExternalConcatBlobs( $externalConcatBlobs ) { - $fname = 'CheckStorage::checkExternalConcatBlobs'; - if ( !count( $externalConcatBlobs ) ) { - return; - } - - if ( is_null( $this->dbStore ) ) { - $this->dbStore = new ExternalStoreDB; - } - - foreach ( $externalConcatBlobs as $cluster => $oldIds ) { - $blobIds = array_keys( $oldIds ); - $extDb =& $this->dbStore->getSlave( $cluster ); - $blobsTable = $this->dbStore->getTable( $extDb ); - $headerLength = strlen( CONCAT_HEADER ); - $res = $extDb->select( $blobsTable, - array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ), - array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname ); - while ( $row = $extDb->fetchObject( $res ) ) { - if ( strcasecmp( $row->header, CONCAT_HEADER ) ) { - $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL", - $oldIds[$row->blob_id] ); - } - unset( $oldIds[$row->blob_id] ); - - } - $extDb->freeResult( $res ); - - // Print errors for missing blobs rows - foreach ( $oldIds as $blobId => $oldIds ) { - $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds ); - } - } - } - - function restoreText( $revIds, $xml ) { - global $wgTmpDirectory, $wgDBname; - - if ( !count( $revIds ) ) { - return; - } - - print "Restoring text from XML backup...\n"; - - $revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname"; - $filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml"; - - // Write revision list - if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) { - echo "Error writing revision list, can't restore text\n"; - return; - } - - // Run mwdumper - echo "Filtering XML dump...\n"; - $exitStatus = 0; - passthru( 'mwdumper ' . - wfEscapeShellArg( - "--output=file:$filteredXmlFileName", - "--filter=revlist:$revFileName", - $xml - ), $exitStatus - ); - - if ( $exitStatus ) { - echo "mwdumper died with exit status $exitStatus\n"; - return; - } - - $file = fopen( $filteredXmlFileName, 'r' ); - if ( !$file ) { - echo "Unable to open filtered XML file\n"; - return; - } - - $dbr =& wfGetDB( DB_SLAVE ); - $dbw =& wfGetDB( DB_MASTER ); - $dbr->ping(); - $dbw->ping(); - - $source = new ImportStreamSource( $file ); - $importer = new WikiImporter( $source ); - $importer->setRevisionCallback( array( &$this, 'importRevision' ) ); - $importer->doImport(); - } - - function importRevision( &$revision, &$importer ) { - $fname = 'CheckStorage::importRevision'; - - $id = $revision->getID(); - $text = $revision->getText(); - if ( $text === '' ) { - // This is what happens if the revision was broken at the time the - // dump was made. Unfortunately, it also happens if the revision was - // legitimately blank, so there's no way to tell the difference. To - // be safe, we'll skip it and leave it broken - $id = $id ? $id : ''; - echo "Revision $id is blank in the dump, may have been broken before export\n"; - return; - } - - if ( !$id ) { - // No ID, can't import - echo "No id tag in revision, can't import\n"; - return; - } - - // Find text row again - $dbr =& wfGetDB( DB_SLAVE ); - $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), $fname ); - if ( !$oldId ) { - echo "Missing revision row for rev_id $id\n"; - return; - } - - // Compress the text - $flags = Revision::compressRevisionText( $text ); - - // Update the text row - $dbw->update( 'text', - array( 'old_flags' => $flags, 'old_text' => $text ), - array( 'old_id' => $oldId ), - $fname, array( 'LIMIT' => 1 ) - ); - - // Remove it from the unfixed list and add it to the fixed list - unset( $this->errors['restore text'][$id] ); - $this->errors['fixed'][$id] = true; - } -} -?> +check( $fix, $xml ); +} + + +//---------------------------------------------------------------------------------- + +class CheckStorage +{ + var $oldIdMap, $errors; + var $dbStore = null; + + var $errorDescriptions = array( + 'restore text' => 'Damaged text, need to be restored from a backup', + 'restore revision' => 'Damaged revision row, need to be restored from a backup', + 'unfixable' => 'Unexpected errors with no automated fixing method', + 'fixed' => 'Errors already fixed', + 'fixable' => 'Errors which would already be fixed if --fix was specified', + ); + + function check( $fix = false, $xml = '' ) { + $fname = 'checkStorage'; + $dbr =& wfGetDB( DB_SLAVE ); + if ( $fix ) { + $dbw =& wfGetDB( DB_MASTER ); + print "Checking, will fix errors if possible...\n"; + } else { + print "Checking...\n"; + } + $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname ); + $chunkSize = 1000; + $flagStats = array(); + $objectStats = array(); + $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' ); + $this->errors = array( + 'restore text' => array(), + 'restore revision' => array(), + 'unfixable' => array(), + 'fixed' => array(), + 'fixable' => array(), + ); + + for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) { + $chunkEnd = $chunkStart + $chunkSize - 1; + //print "$chunkStart of $maxRevId\n"; + + // Fetch revision rows + $this->oldIdMap = array(); + $dbr->ping(); + $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ), + array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname ); + while ( $row = $dbr->fetchObject( $res ) ) { + $this->oldIdMap[$row->rev_id] = $row->rev_text_id; + } + $dbr->freeResult( $res ); + + if ( !count( $this->oldIdMap ) ) { + continue; + } + + // Fetch old_flags + $missingTextRows = array_flip( $this->oldIdMap ); + $externalRevs = array(); + $objectRevs = array(); + $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ), + 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname ); + while ( $row = $dbr->fetchObject( $res ) ) { + $flags = $row->old_flags; + $id = $row->old_id; + + // Create flagStats row if it doesn't exist + $flagStats = $flagStats + array( $flags => 0 ); + // Increment counter + $flagStats[$flags]++; + + // Not missing + unset( $missingTextRows[$row->old_id] ); + + // Check for external or object + if ( $flags == '' ) { + $flagArray = array(); + } else { + $flagArray = explode( ',', $flags ); + } + if ( in_array( 'external', $flagArray ) ) { + $externalRevs[] = $id; + } elseif ( in_array( 'object', $flagArray ) ) { + $objectRevs[] = $id; + } + + // Check for unrecognised flags + if ( $flags == '0' ) { + // This is a known bug from 2004 + // It's safe to just erase the old_flags field + if ( $fix ) { + $this->error( 'fixed', "Warning: old_flags set to 0", $id ); + $dbw->ping(); + $dbw->update( 'text', array( 'old_flags' => '' ), + array( 'old_id' => $id ), $fname ); + echo "Fixed\n"; + } else { + $this->error( 'fixable', "Warning: old_flags set to 0", $id ); + } + } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) { + $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id ); + } + } + $dbr->freeResult( $res ); + + // Output errors for any missing text rows + foreach ( $missingTextRows as $oldId => $revId ) { + $this->error( 'restore revision', "Error: missing text row", $oldId ); + } + + // Verify external revisions + $externalConcatBlobs = array(); + $externalNormalBlobs = array(); + if ( count( $externalRevs ) ) { + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), + array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname ); + while ( $row = $dbr->fetchObject( $res ) ) { + $urlParts = explode( '://', $row->old_text, 2 ); + if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) { + $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id ); + continue; + } + list( $proto, $path ) = $urlParts; + if ( $proto != 'DB' ) { + $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id ); + continue; + } + $path = explode( '/', $row->old_text ); + $cluster = $path[2]; + $id = $path[3]; + if ( isset( $path[4] ) ) { + $externalConcatBlobs[$cluster][$id][] = $row->old_id; + } else { + $externalNormalBlobs[$cluster][$id][] = $row->old_id; + } + } + $dbr->freeResult( $res ); + } + + // Check external concat blobs for the right header + $this->checkExternalConcatBlobs( $externalConcatBlobs ); + + // Check external normal blobs for existence + if ( count( $externalNormalBlobs ) ) { + if ( is_null( $this->dbStore ) ) { + $this->dbStore = new ExternalStoreDB; + } + foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) { + $blobIds = array_keys( $xBlobIds ); + $extDb =& $this->dbStore->getSlave( $cluster ); + $blobsTable = $this->dbStore->getTable( $extDb ); + $res = $extDb->select( $blobsTable, + array( 'blob_id' ), + array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname ); + while ( $row = $extDb->fetchObject( $res ) ) { + unset( $xBlobIds[$row->blob_id] ); + } + $extDb->freeResult( $res ); + // Print errors for missing blobs rows + foreach ( $xBlobIds as $blobId => $oldId ) { + $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId ); + } + } + } + + // Check local objects + $dbr->ping(); + $concatBlobs = array(); + $curIds = array(); + if ( count( $objectRevs ) ) { + $headerLength = 300; + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), + array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname ); + while ( $row = $dbr->fetchObject( $res ) ) { + $oldId = $row->old_id; + if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) { + $this->error( 'restore text', "Error: invalid object header", $oldId ); + continue; + } + + $className = strtolower( $matches[2] ); + if ( strlen( $className ) != $matches[1] ) { + $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId ); + continue; + } + + $objectStats = $objectStats + array( $className => 0 ); + $objectStats[$className]++; + + switch ( $className ) { + case 'concatenatedgziphistoryblob': + // Good + break; + case 'historyblobstub': + case 'historyblobcurstub': + if ( strlen( $row->header ) == $headerLength ) { + $this->error( 'unfixable', "Error: overlong stub header", $oldId ); + continue; + } + $stubObj = unserialize( $row->header ); + if ( !is_object( $stubObj ) ) { + $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId ); + continue; + } + if ( $className == 'historyblobstub' ) { + $concatBlobs[$stubObj->mOldId][] = $oldId; + } else { + $curIds[$stubObj->mCurId][] = $oldId; + } + break; + default: + $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId ); + } + } + $dbr->freeResult( $res ); + } + + // Check local concat blob validity + $externalConcatBlobs = array(); + if ( count( $concatBlobs ) ) { + $headerLength = 300; + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), + array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname ); + while ( $row = $dbr->fetchObject( $res ) ) { + $flags = explode( ',', $row->old_flags ); + if ( in_array( 'external', $flags ) ) { + // Concat blob is in external storage? + if ( in_array( 'object', $flags ) ) { + $urlParts = explode( '/', $row->header ); + if ( $urlParts[0] != 'DB:' ) { + $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id ); + } else { + $cluster = $urlParts[2]; + $id = $urlParts[3]; + if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) { + $externalConcatBlobs[$cluster][$id] = array(); + } + $externalConcatBlobs[$cluster][$id] = array_merge( + $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id] + ); + } + } else { + $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}", + $concatBlobs[$row->old_id] ); + } + } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) { + $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}", + $concatBlobs[$row->old_id] ); + } # else good + + unset( $concatBlobs[$row->old_id] ); + } + $dbr->freeResult( $res ); + } + + // Check targets of unresolved stubs + $this->checkExternalConcatBlobs( $externalConcatBlobs ); + + // next chunk + } + + print "\n\nErrors:\n"; + foreach( $this->errors as $name => $errors ) { + if ( count( $errors ) ) { + $description = $this->errorDescriptions[$name]; + echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n"; + } + } + + if ( count( $this->errors['restore text'] ) && $fix ) { + if ( (string)$xml !== '' ) { + $this->restoreText( array_keys( $this->errors['restore text'] ), $xml ); + } else { + echo "Can't fix text, no XML backup specified\n"; + } + } + + print "\nFlag statistics:\n"; + $total = array_sum( $flagStats ); + foreach ( $flagStats as $flag => $count ) { + printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 ); + } + print "\nLocal object statistics:\n"; + $total = array_sum( $objectStats ); + foreach ( $objectStats as $className => $count ) { + printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 ); + } + } + + + function error( $type, $msg, $ids ) { + if ( is_array( $ids ) && count( $ids ) == 1 ) { + $ids = reset( $ids ); + } + if ( is_array( $ids ) ) { + $revIds = array(); + foreach ( $ids as $id ) { + $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) ); + } + print "$msg in text rows " . implode( ', ', $ids ) . + ", revisions " . implode( ', ', $revIds ) . "\n"; + } else { + $id = $ids; + $revIds = array_keys( $this->oldIdMap, $id ); + if ( count( $revIds ) == 1 ) { + print "$msg in old_id $id, rev_id {$revIds[0]}\n"; + } else { + print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n"; + } + } + $this->errors[$type] = $this->errors[$type] + array_flip( $revIds ); + } + + function checkExternalConcatBlobs( $externalConcatBlobs ) { + $fname = 'CheckStorage::checkExternalConcatBlobs'; + if ( !count( $externalConcatBlobs ) ) { + return; + } + + if ( is_null( $this->dbStore ) ) { + $this->dbStore = new ExternalStoreDB; + } + + foreach ( $externalConcatBlobs as $cluster => $oldIds ) { + $blobIds = array_keys( $oldIds ); + $extDb =& $this->dbStore->getSlave( $cluster ); + $blobsTable = $this->dbStore->getTable( $extDb ); + $headerLength = strlen( CONCAT_HEADER ); + $res = $extDb->select( $blobsTable, + array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ), + array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname ); + while ( $row = $extDb->fetchObject( $res ) ) { + if ( strcasecmp( $row->header, CONCAT_HEADER ) ) { + $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL", + $oldIds[$row->blob_id] ); + } + unset( $oldIds[$row->blob_id] ); + + } + $extDb->freeResult( $res ); + + // Print errors for missing blobs rows + foreach ( $oldIds as $blobId => $oldIds ) { + $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds ); + } + } + } + + function restoreText( $revIds, $xml ) { + global $wgTmpDirectory, $wgDBname; + + if ( !count( $revIds ) ) { + return; + } + + print "Restoring text from XML backup...\n"; + + $revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname"; + $filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml"; + + // Write revision list + if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) { + echo "Error writing revision list, can't restore text\n"; + return; + } + + // Run mwdumper + echo "Filtering XML dump...\n"; + $exitStatus = 0; + passthru( 'mwdumper ' . + wfEscapeShellArg( + "--output=file:$filteredXmlFileName", + "--filter=revlist:$revFileName", + $xml + ), $exitStatus + ); + + if ( $exitStatus ) { + echo "mwdumper died with exit status $exitStatus\n"; + return; + } + + $file = fopen( $filteredXmlFileName, 'r' ); + if ( !$file ) { + echo "Unable to open filtered XML file\n"; + return; + } + + $dbr =& wfGetDB( DB_SLAVE ); + $dbw =& wfGetDB( DB_MASTER ); + $dbr->ping(); + $dbw->ping(); + + $source = new ImportStreamSource( $file ); + $importer = new WikiImporter( $source ); + $importer->setRevisionCallback( array( &$this, 'importRevision' ) ); + $importer->doImport(); + } + + function importRevision( &$revision, &$importer ) { + $fname = 'CheckStorage::importRevision'; + + $id = $revision->getID(); + $text = $revision->getText(); + if ( $text === '' ) { + // This is what happens if the revision was broken at the time the + // dump was made. Unfortunately, it also happens if the revision was + // legitimately blank, so there's no way to tell the difference. To + // be safe, we'll skip it and leave it broken + $id = $id ? $id : ''; + echo "Revision $id is blank in the dump, may have been broken before export\n"; + return; + } + + if ( !$id ) { + // No ID, can't import + echo "No id tag in revision, can't import\n"; + return; + } + + // Find text row again + $dbr =& wfGetDB( DB_SLAVE ); + $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), $fname ); + if ( !$oldId ) { + echo "Missing revision row for rev_id $id\n"; + return; + } + + // Compress the text + $flags = Revision::compressRevisionText( $text ); + + // Update the text row + $dbw->update( 'text', + array( 'old_flags' => $flags, 'old_text' => $text ), + array( 'old_id' => $oldId ), + $fname, array( 'LIMIT' => 1 ) + ); + + // Remove it from the unfixed list and add it to the fixed list + unset( $this->errors['restore text'][$id] ); + $this->errors['fixed'][$id] = true; + } +} +?> diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc index b7d7094f..3c426841 100644 --- a/maintenance/storage/compressOld.inc +++ b/maintenance/storage/compressOld.inc @@ -155,6 +155,17 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title ); print "$pageId\t" . $titleObj->getPrefixedDBkey() . " "; + print_r( + array( + 'rev_page' => $pageRow->page_id, + # Don't operate on the current revision + # Use < instead of <> in case the current revision has changed + # since the page select, which wasn't locking + 'rev_id < ' . $pageRow->page_latest + ) + $conds + ); + exit; + # Load revisions $revRes = $dbw->select( $tables, $fields, array( diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 288d4a06..3ffa5e5f 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -23,7 +23,7 @@ -- in early 2002 after a lot of trouble with the fields -- auto-updating. -- --- The PostgreSQL backend uses DATETIME fields for timestamps, +-- The Postgres backend uses DATETIME fields for timestamps, -- and we will migrate the MySQL definitions at some point as -- well. -- @@ -97,18 +97,18 @@ CREATE TABLE /*$wgDBprefix*/user ( -- Initially NULL; when a user's e-mail address has been -- validated by returning with a mailed token, this is -- set to the current timestamp. - user_email_authenticated CHAR(14) BINARY, + user_email_authenticated char(14) binary, -- Randomly generated token created when the e-mail address -- is set and a confirmation test mail sent. - user_email_token CHAR(32) BINARY, + user_email_token char(32) binary, -- Expiration date for the user_email_token - user_email_token_expires CHAR(14) BINARY, + user_email_token_expires char(14) binary, -- Timestamp of account registration. -- Accounts predating this schema addition may contain NULL. - user_registration CHAR(14) BINARY, + user_registration char(14) binary, PRIMARY KEY user_id (user_id), UNIQUE INDEX user_name (user_name), @@ -152,7 +152,8 @@ CREATE TABLE /*$wgDBprefix*/user_newtalk ( user_ip varchar(40) NOT NULL default '', INDEX user_id (user_id), INDEX user_ip (user_ip) -); + +) TYPE=InnoDB; -- @@ -365,7 +366,7 @@ CREATE TABLE /*$wgDBprefix*/pagelinks ( pl_namespace int NOT NULL default '0', pl_title varchar(255) binary NOT NULL default '', - UNIQUE KEY pl_from(pl_from,pl_namespace,pl_title), + UNIQUE KEY pl_from (pl_from,pl_namespace,pl_title), KEY (pl_namespace,pl_title) ) TYPE=InnoDB; @@ -385,7 +386,7 @@ CREATE TABLE /*$wgDBprefix*/templatelinks ( tl_namespace int NOT NULL default '0', tl_title varchar(255) binary NOT NULL default '', - UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + UNIQUE KEY tl_from (tl_from,tl_namespace,tl_title), KEY (tl_namespace,tl_title) ) TYPE=InnoDB; @@ -404,7 +405,7 @@ CREATE TABLE /*$wgDBprefix*/imagelinks ( -- all such pages are in namespace 6 (NS_IMAGE). il_to varchar(255) binary NOT NULL default '', - UNIQUE KEY il_from(il_from,il_to), + UNIQUE KEY il_from (il_from,il_to), KEY (il_to) ) TYPE=InnoDB; @@ -439,13 +440,13 @@ CREATE TABLE /*$wgDBprefix*/categorylinks ( -- sorting method by approximate addition time. cl_timestamp timestamp NOT NULL, - UNIQUE KEY cl_from(cl_from,cl_to), + UNIQUE KEY cl_from (cl_from,cl_to), -- We always sort within a given category... - KEY cl_sortkey(cl_to,cl_sortkey), + KEY cl_sortkey (cl_to,cl_sortkey), -- Not really used? - KEY cl_timestamp(cl_to,cl_timestamp) + KEY cl_timestamp (cl_to,cl_timestamp) ) TYPE=InnoDB; @@ -539,7 +540,7 @@ CREATE TABLE /*$wgDBprefix*/site_stats ( -- that have been visited.) -- CREATE TABLE /*$wgDBprefix*/hitcounter ( - hc_id INTEGER UNSIGNED NOT NULL + hc_id int unsigned NOT NULL ) TYPE=HEAP MAX_ROWS=25000; @@ -552,7 +553,7 @@ CREATE TABLE /*$wgDBprefix*/ipblocks ( ipb_id int(8) NOT NULL auto_increment, -- Blocked IP address in dotted-quad form or user name. - ipb_address varchar(40) binary NOT NULL default '', + ipb_address tinyblob NOT NULL default '', -- Blocked user ID or 0 for IP blocks. ipb_user int(8) unsigned NOT NULL default '0', @@ -570,20 +571,32 @@ CREATE TABLE /*$wgDBprefix*/ipblocks ( -- Indicates that the IP address was banned because a banned -- user accessed a page through it. If this is 1, ipb_address -- will be hidden, and the block identified by block ID number. - ipb_auto tinyint(1) NOT NULL default '0', + ipb_auto bool NOT NULL default 0, + + -- If set to 1, block applies only to logged-out users + ipb_anon_only bool NOT NULL default 0, + + -- Block prevents account creation from matching IP addresses + ipb_create_account bool NOT NULL default 1, -- Time at which the block will expire. ipb_expiry char(14) binary NOT NULL default '', -- Start and end of an address range, in hexadecimal -- Size chosen to allow IPv6 - ipb_range_start varchar(32) NOT NULL default '', - ipb_range_end varchar(32) NOT NULL default '', + ipb_range_start tinyblob NOT NULL default '', + ipb_range_end tinyblob NOT NULL default '', PRIMARY KEY ipb_id (ipb_id), - INDEX ipb_address (ipb_address), + + -- Unique index to support "user already blocked" messages + -- Any new options which prevent collisions should be included + UNIQUE INDEX ipb_address (ipb_address(255), ipb_user, ipb_auto, ipb_anon_only), + INDEX ipb_user (ipb_user), - INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)) + INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)), + INDEX ipb_timestamp (ipb_timestamp), + INDEX ipb_expiry (ipb_expiry) ) TYPE=InnoDB; @@ -601,14 +614,14 @@ CREATE TABLE /*$wgDBprefix*/image ( img_size int(8) unsigned NOT NULL default '0', -- For images, size in pixels. - img_width int(5) NOT NULL default '0', - img_height int(5) NOT NULL default '0', + img_width int(5) NOT NULL default '0', + img_height int(5) NOT NULL default '0', -- Extracted EXIF metadata stored as a serialized PHP array. img_metadata mediumblob NOT NULL, -- For images, bits per pixel if known. - img_bits int(3) NOT NULL default '0', + img_bits int(3) NOT NULL default '0', -- Media type as defined by the MEDIATYPE_xxx constants img_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, @@ -676,7 +689,7 @@ CREATE TABLE /*$wgDBprefix*/oldimage ( -- CREATE TABLE /*$wgDBprefix*/filearchive ( -- Unique row id - fa_id int not null auto_increment, + fa_id int NOT NULL auto_increment, -- Original base filename; key to image.img_name, page.page_title, etc fa_name varchar(255) binary NOT NULL default '', @@ -703,10 +716,10 @@ CREATE TABLE /*$wgDBprefix*/filearchive ( -- Duped fields from image fa_size int(8) unsigned default '0', - fa_width int(5) default '0', - fa_height int(5) default '0', + fa_width int(5) default '0', + fa_height int(5) default '0', fa_metadata mediumblob, - fa_bits int(3) default '0', + fa_bits int(3) default '0', fa_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, fa_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") default "unknown", fa_minor_mime varchar(32) default "unknown", @@ -782,8 +795,9 @@ CREATE TABLE /*$wgDBprefix*/recentchanges ( INDEX rc_timestamp (rc_timestamp), INDEX rc_namespace_title (rc_namespace, rc_title), INDEX rc_cur_id (rc_cur_id), - INDEX new_name_timestamp(rc_new,rc_namespace,rc_timestamp), - INDEX rc_ip (rc_ip) + INDEX new_name_timestamp (rc_new,rc_namespace,rc_timestamp), + INDEX rc_ip (rc_ip), + INDEX rc_ns_usertext (rc_namespace, rc_user_text) ) TYPE=InnoDB; @@ -802,7 +816,7 @@ CREATE TABLE /*$wgDBprefix*/watchlist ( wl_notificationtimestamp varchar(14) binary, UNIQUE KEY (wl_user, wl_namespace, wl_title), - KEY namespace_title (wl_namespace,wl_title) + KEY namespace_title (wl_namespace, wl_title) ) TYPE=InnoDB; @@ -870,10 +884,10 @@ CREATE TABLE /*$wgDBprefix*/interwiki ( -- A boolean value indicating whether the wiki is in this project -- (used, for example, to detect redirect loops) - iw_local BOOL NOT NULL, + iw_local bool NOT NULL, -- Boolean value indicating whether interwiki transclusions are allowed. - iw_trans TINYINT(1) NOT NULL DEFAULT 0, + iw_trans tinyint(1) NOT NULL default 0, UNIQUE KEY iw_prefix (iw_prefix) @@ -901,11 +915,11 @@ CREATE TABLE /*$wgDBprefix*/querycache ( -- For a few generic cache operations if not using Memcached -- CREATE TABLE /*$wgDBprefix*/objectcache ( - keyname char(255) binary not null default '', + keyname char(255) binary NOT NULL default '', value mediumblob, exptime datetime, - unique key (keyname), - key (exptime) + UNIQUE KEY (keyname), + KEY (exptime) ) TYPE=InnoDB; @@ -913,10 +927,10 @@ CREATE TABLE /*$wgDBprefix*/objectcache ( -- Cache of interwiki transclusion -- CREATE TABLE /*$wgDBprefix*/transcache ( - tc_url VARCHAR(255) NOT NULL, - tc_contents TEXT, - tc_time INT NOT NULL, - UNIQUE INDEX tc_url_idx(tc_url) + tc_url varchar(255) NOT NULL, + tc_contents text, + tc_time int NOT NULL, + UNIQUE INDEX tc_url_idx (tc_url) ) TYPE=InnoDB; CREATE TABLE /*$wgDBprefix*/logging ( @@ -951,14 +965,15 @@ CREATE TABLE /*$wgDBprefix*/logging ( ) TYPE=InnoDB; CREATE TABLE /*$wgDBprefix*/trackbacks ( - tb_id integer AUTO_INCREMENT PRIMARY KEY, - tb_page integer REFERENCES page(page_id) ON DELETE CASCADE, - tb_title varchar(255) NOT NULL, - tb_url varchar(255) NOT NULL, - tb_ex text, - tb_name varchar(255), - - INDEX (tb_page) + tb_id int auto_increment, + tb_page int REFERENCES page(page_id) ON DELETE CASCADE, + tb_title varchar(255) NOT NULL, + tb_url varchar(255) NOT NULL, + tb_ex text, + tb_name varchar(255), + + PRIMARY KEY (tb_id), + INDEX (tb_page) ) TYPE=InnoDB; @@ -986,13 +1001,15 @@ CREATE TABLE /*$wgDBprefix*/job ( -- Details of updates to cached special pages CREATE TABLE /*$wgDBprefix*/querycache_info ( - -- Special page name - -- Corresponds to a qc_type value - qci_type varchar(32) NOT NULL default '', + -- Special page name + -- Corresponds to a qc_type value + qci_type varchar(32) NOT NULL default '', - -- Timestamp of last update - qci_timestamp char(14) NOT NULL default '19700101000000', + -- Timestamp of last update + qci_timestamp char(14) NOT NULL default '19700101000000', - UNIQUE KEY ( qci_type ) + UNIQUE KEY ( qci_type ) ) TYPE=InnoDB; + +-- vim: sw=2 sts=2 et diff --git a/maintenance/update.php b/maintenance/update.php index 8643aa79..d2dcbf92 100644 --- a/maintenance/update.php +++ b/maintenance/update.php @@ -15,8 +15,6 @@ require_once( "commandLine.inc" ); require_once( "updaters.inc" ); $wgTitle = Title::newFromText( "MediaWiki database updater" ); $dbclass = 'Database' . ucfirst( $wgDBtype ) ; -require_once("$dbclass.php"); -$dbc = new $dbclass; echo( "MediaWiki {$wgVersion} Updater\n\n" ); @@ -32,20 +30,16 @@ if( !isset( $wgDBadminuser ) || !isset( $wgDBadminpassword ) ) { # Attempt to connect to the database as a privileged user # This will vomit up an error if there are permissions problems -$wgDatabase = $dbc->newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname, 1 ); +$wgDatabase = new $dbclass( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname, 1 ); if( !$wgDatabase->isOpen() ) { # Appears to have failed echo( "A connection to the database could not be established. Check the\n" ); - # Let's be a bit clever and guess at what's wrong - if( isset( $wgDBadminuser ) && isset( $wgDBadminpassword ) ) { - # Tell the user the value(s) are wrong - echo( 'values of $wgDBadminuser and $wgDBadminpassword.' . "\n" ); - } + echo( "values of \$wgDBadminuser and \$wgDBadminpassword.\n" ); exit(); } -print "Going to run database updates for $wgDBname\n"; +print "Going to run database updates for ".wfWikiID()."\n"; print "Depending on the size of your database this may take a while!\n"; if( !isset( $options['quick'] ) ) { diff --git a/maintenance/updateSpecialPages.php b/maintenance/updateSpecialPages.php index 71c688fc..a7a72b58 100644 --- a/maintenance/updateSpecialPages.php +++ b/maintenance/updateSpecialPages.php @@ -33,8 +33,8 @@ foreach ( $wgQueryPages as $page ) { print "No such special page: $special\n"; exit; } - $file = $specialObj->getFile(); - if ( $file ) { + if ( !class_exists( $class ) ) { + $file = $specialObj->getFile(); require_once( $file ); } $queryPage = new $class; diff --git a/maintenance/updaters.inc b/maintenance/updaters.inc index 164a00cf..d334660e 100644 --- a/maintenance/updaters.inc +++ b/maintenance/updaters.inc @@ -56,6 +56,7 @@ $wgNewFields = array( array( 'interwiki', 'iw_trans', 'patch-interwiki-trans.sql' ), array( 'ipblocks', 'ipb_range_start', 'patch-ipb_range_start.sql' ), array( 'site_stats', 'ss_images', 'patch-ss_images.sql' ), + array( 'ipblocks', 'ipb_anon_only', 'patch-ipb_anon_only.sql' ), ); function rename_table( $from, $to, $patch ) { @@ -761,11 +762,33 @@ function do_templatelinks_update() { echo "Done. Please run maintenance/refreshLinks.php for a more thorough templatelinks update.\n"; } +# July 2006 +# Add ( rc_namespace, rc_user_text ) index [R. Church] +function do_rc_indices_update() { + global $wgDatabase; + echo( "Checking for additional recent changes indices...\n" ); + # See if we can find the index we want + $info = $wgDatabase->indexInfo( 'recentchanges', 'rc_ns_usertext', __METHOD__ ); + if( !$info ) { + # None, so create + echo( "...index on ( rc_namespace, rc_user_text ) not found; creating\n" ); + dbsource( archive( 'patch-recentchanges-utindex.sql' ) ); + } else { + # Index seems to exist + echo( "...seems to be ok\n" ); + } +} + function do_all_updates( $doShared = false ) { - global $wgNewTables, $wgNewFields, $wgRenamedTables, $wgSharedDB, $wgDatabase; + global $wgNewTables, $wgNewFields, $wgRenamedTables, $wgSharedDB, $wgDatabase, $wgDBtype; $doUser = !$wgSharedDB || $doShared; + if ($wgDBtype === 'postgres') { + do_postgres_updates(); + return; + } + # Rename tables foreach ( $wgRenamedTables as $tableRecord ) { rename_table( $tableRecord[0], $tableRecord[1], $tableRecord[2] ); @@ -819,6 +842,8 @@ function do_all_updates( $doShared = false ) { do_logging_timestamp_index(); flush(); do_page_random_update(); flush(); + + do_rc_indices_update(); flush(); initialiseMessages(); flush(); } @@ -832,4 +857,121 @@ function archive($name) { return "$IP/maintenance/archives/$name"; } } + +function do_postgres_updates() { + global $wgDatabase, $wgVersion, $wgDBmwschema; + + $version = "1.7.1"; + + # Just in case their LocalSetings.php does not have this: + if ( !isset( $wgDBmwschema )) + $wgDBmwschema = 'mediawiki'; + + if ($wgDatabase->tableExists("mediawiki_version")) { + $version = "1.8"; + } + + if ($version == '1.7.1') { + $upgrade = <<query($upgrade); + + } ## end version 1.7.1 upgrade + + else { + print "No updates needed\n"; + } + + return; +} + ?> diff --git a/maintenance/userDupes.inc b/maintenance/userDupes.inc index f66051d4..e632f737 100644 --- a/maintenance/userDupes.inc +++ b/maintenance/userDupes.inc @@ -79,10 +79,8 @@ class UserDupes { * @return bool */ function checkDupes( $doDelete = false ) { - global $wgDBname; - if( $this->hasUniqueIndex() ) { - echo "$wgDBname already has a unique index on its user table.\n"; + echo wfWikiID()." already has a unique index on its user table.\n"; return true; } @@ -92,7 +90,7 @@ class UserDupes { $dupes = $this->getDupes(); $count = count( $dupes ); - echo "Found $count accounts with duplicate records on $wgDBname.\n"; + echo "Found $count accounts with duplicate records on ".wfWikiID().".\n"; $this->trimmed = 0; $this->reassigned = 0; $this->failed = 0; @@ -114,9 +112,9 @@ class UserDupes { if( $this->trimmed > 0 ) { if( $doDelete ) { - echo "$this->trimmed duplicate user records were deleted from $wgDBname.\n"; + echo "$this->trimmed duplicate user records were deleted from ".wfWikiID().".\n"; } else { - echo "$this->trimmed duplicate user accounts were found on $wgDBname which can be removed safely.\n"; + echo "$this->trimmed duplicate user accounts were found on ".wfWikiID()." which can be removed safely.\n"; } } @@ -325,4 +323,4 @@ class UserDupes { } -?> \ No newline at end of file +?> -- cgit v1.2.2