summaryrefslogtreecommitdiff
path: root/maintenance/updateCollation.php
blob: e890bce7818873f599e6162326f390eafca6c41b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
<?php
/**
 * @file
 * @ingroup Maintenance
 * @author Aryeh Gregor (Simetrical)
 */

#$optionsWithArgs = array( 'begin', 'max-slave-lag' );

require_once( dirname( __FILE__ ) . '/Maintenance.php' );

class UpdateCollation extends Maintenance {
	const BATCH_SIZE = 50;

	public function __construct() {
		parent::__construct();

		global $wgCategoryCollation;
		$this->mDescription = <<<TEXT
This script will find all rows in the categorylinks table whose collation is
out-of-date (cl_collation != '$wgCategoryCollation') and repopulate cl_sortkey
using the page title and cl_sortkey_prefix.  If everything's collation is
up-to-date, it will do nothing.
TEXT;

		$this->addOption( 'force', 'Run on all rows, even if the collation is ' . 
			'supposed to be up-to-date.' );
	}
	
	public function syncDBs() {
		$lb = wfGetLB();
		// bug 27975 - Don't try to wait for slaves if there are none
		// Prevents permission error when getting master position
		if ( $lb->getServerCount() > 1 ) {
			$dbw = $lb->getConnection( DB_MASTER );
			$pos = $dbw->getMasterPos();
			$lb->waitForAll( $pos );
		}
	}

	public function execute() {
		global $wgCategoryCollation, $wgMiserMode;

		$dbw = wfGetDB( DB_MASTER );
		$force = $this->getOption( 'force' );

		$options = array( 'LIMIT' => self::BATCH_SIZE );

		if ( $force ) {
			$options['ORDER BY'] = 'cl_from, cl_to';
			$collationConds = array();
		} else {
			$collationConds = array( 0 => 
				'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ) );

			if ( !$wgMiserMode ) {
				$count = $dbw->selectField(
					'categorylinks',
					'COUNT(*)',
					$collationConds,
					__METHOD__
				);

				if ( $count == 0 ) {
					$this->output( "Collations up-to-date.\n" );
					return;
				}
				$this->output( "Fixing collation for $count rows.\n" );
			}
		}

		$count = 0;
		$row = false;
		$batchConds = array();
		do {
			$this->output( 'Processing next ' . self::BATCH_SIZE . ' rows... ');
			$res = $dbw->select(
				array( 'categorylinks', 'page' ),
				array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
					'cl_sortkey', 'page_namespace', 'page_title'
				),
				array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
				__METHOD__,
				$options
			);

			$dbw->begin();
			foreach ( $res as $row ) {
				$title = Title::newFromRow( $row );
				if ( !$row->cl_collation ) {
					# This is an old-style row, so the sortkey needs to be
					# converted.
					if ( $row->cl_sortkey == $title->getText()
					|| $row->cl_sortkey == $title->getPrefixedText() ) {
						$prefix = '';
					} else {
						# Custom sortkey, use it as a prefix
						$prefix = $row->cl_sortkey;
					}
				} else {
					$prefix = $row->cl_sortkey_prefix;
				}
				# cl_type will be wrong for lots of pages if cl_collation is 0,
				# so let's update it while we're here.
				if ( $title->getNamespace() == NS_CATEGORY ) {
					$type = 'subcat';
				} elseif ( $title->getNamespace() == NS_FILE ) {
					$type = 'file';
				} else {
					$type = 'page';
				}
				$dbw->update(
					'categorylinks',
					array(
						'cl_sortkey' => Collation::singleton()->getSortKey(
							$title->getCategorySortkey( $prefix ) ),
						'cl_sortkey_prefix' => $prefix,
						'cl_collation' => $wgCategoryCollation,
						'cl_type' => $type,
						'cl_timestamp = cl_timestamp',
					),
					array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
					__METHOD__
				);
			}
			$dbw->commit();

			if ( $force && $row ) {
				$encFrom = $dbw->addQuotes( $row->cl_from );
				$encTo = $dbw->addQuotes( $row->cl_to );
				$batchConds = array( 
					"(cl_from = $encFrom AND cl_to > $encTo) " .
					" OR cl_from > $encFrom" );
			}

			$count += $res->numRows();
			$this->output( "$count done.\n" );
			
			$this->syncDBs();
		} while ( $res->numRows() == self::BATCH_SIZE );
	}
}

$maintClass = "UpdateCollation";
require_once( RUN_MAINTENANCE_IF_MAIN );