summaryrefslogtreecommitdiff
path: root/maintenance/populateParentId.php
blob: 686d9f2b7f00772e3a147695318b175447cfc9dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
<?php
/**
 * Makes the required database updates for rev_parent_id
 * to be of any use. It can be used for some simple tracking
 * and to find new page edits by users.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Maintenance
 */

require_once __DIR__ . '/Maintenance.php';

/**
 * Maintenance script that makes the required database updates for rev_parent_id
 * to be of any use.
 *
 * @ingroup Maintenance
 */
class PopulateParentId extends LoggedUpdateMaintenance {
	public function __construct() {
		parent::__construct();
		$this->mDescription = "Populates rev_parent_id";
	}

	protected function getUpdateKey() {
		return 'populate rev_parent_id';
	}

	protected function updateSkippedMessage() {
		return 'rev_parent_id column of revision table already populated.';
	}

	protected function doDBUpdates() {
		$db = wfGetDB( DB_MASTER );
		if ( !$db->tableExists( 'revision' ) ) {
			$this->error( "revision table does not exist" );

			return false;
		}
		$this->output( "Populating rev_parent_id column\n" );
		$start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
		$end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
		if ( is_null( $start ) || is_null( $end ) ) {
			$this->output( "...revision table seems to be empty, nothing to do.\n" );

			return true;
		}
		# Do remaining chunk
		$blockStart = intval( $start );
		$blockEnd = intval( $start ) + $this->mBatchSize - 1;
		$count = 0;
		$changed = 0;
		while ( $blockStart <= $end ) {
			$this->output( "...doing rev_id from $blockStart to $blockEnd\n" );
			$cond = "rev_id BETWEEN $blockStart AND $blockEnd";
			$res = $db->select( 'revision',
				array( 'rev_id', 'rev_page', 'rev_timestamp', 'rev_parent_id' ),
				array( $cond, 'rev_parent_id' => null ), __METHOD__ );
			# Go through and update rev_parent_id from these rows.
			# Assume that the previous revision of the title was
			# the original previous revision of the title when the
			# edit was made...
			foreach ( $res as $row ) {
				# First, check rows with the same timestamp other than this one
				# with a smaller rev ID. The highest ID "wins". This avoids loops
				# as timestamp can only decrease and never loops with IDs (from parent to parent)
				$previousID = $db->selectField( 'revision', 'rev_id',
					array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp,
						"rev_id < " . intval( $row->rev_id ) ),
					__METHOD__,
					array( 'ORDER BY' => 'rev_id DESC' ) );
				# If there are none, check the highest ID with a lower timestamp
				if ( !$previousID ) {
					# Get the highest older timestamp
					$lastTimestamp = $db->selectField(
						'revision',
						'rev_timestamp',
						array(
							'rev_page' => $row->rev_page,
							"rev_timestamp < " . $db->addQuotes( $row->rev_timestamp )
						),
						__METHOD__,
						array( 'ORDER BY' => 'rev_timestamp DESC' )
					);
					# If there is one, let the highest rev ID win
					if ( $lastTimestamp ) {
						$previousID = $db->selectField( 'revision', 'rev_id',
							array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ),
							__METHOD__,
							array( 'ORDER BY' => 'rev_id DESC' ) );
					}
				}
				$previousID = intval( $previousID );
				if ( $previousID != $row->rev_parent_id ) {
					$changed++;
				}
				# Update the row...
				$db->update( 'revision',
					array( 'rev_parent_id' => $previousID ),
					array( 'rev_id' => $row->rev_id ),
					__METHOD__ );
				$count++;
			}
			$blockStart += $this->mBatchSize;
			$blockEnd += $this->mBatchSize;
			wfWaitForSlaves();
		}
		$this->output( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" );

		return true;
	}
}

$maintClass = "PopulateParentId";
require_once RUN_MAINTENANCE_IF_MAIN;