summaryrefslogtreecommitdiff
path: root/maintenance/benchmarks/benchmarkParse.php
blob: ce38dad6a0100bf5e5285b8bb4efdc1365e02bfe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
<?php
/**
 * Benchmark script for parse operations
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @author Tim Starling <tstarling@wikimedia.org>
 * @ingroup Benchmark
 */

require __DIR__ . '/../Maintenance.php';

/**
 * Maintenance script to benchmark how long it takes to parse a given title at an optionally
 * specified timestamp
 *
 * @since 1.23
 */
class BenchmarkParse extends Maintenance {
	/** @var string MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS) */
	private $templateTimestamp = null;

	/** @var array Cache that maps a Title DB key to revision ID for the requested timestamp */
	private $idCache = array();

	function __construct() {
		parent::__construct();
		$this->addDescription( 'Benchmark parse operation' );
		$this->addArg( 'title', 'The name of the page to parse' );
		$this->addOption( 'warmup', 'Repeat the parse operation this number of times to warm the cache',
			false, true );
		$this->addOption( 'loops', 'Number of times to repeat parse operation post-warmup',
			false, true );
		$this->addOption( 'page-time',
			'Use the version of the page which was current at the given time',
			false, true );
		$this->addOption( 'tpl-time',
			'Use templates which were current at the given time (except that moves and ' .
			'deletes are not handled properly)',
			false, true );
	}

	function execute() {
		if ( $this->hasOption( 'tpl-time' ) ) {
			$this->templateTimestamp = wfTimestamp( TS_MW, strtotime( $this->getOption( 'tpl-time' ) ) );
			Hooks::register( 'BeforeParserFetchTemplateAndtitle', array( $this, 'onFetchTemplate' ) );
		}

		$title = Title::newFromText( $this->getArg() );
		if ( !$title ) {
			$this->error( "Invalid title" );
			exit( 1 );
		}

		if ( $this->hasOption( 'page-time' ) ) {
			$pageTimestamp = wfTimestamp( TS_MW, strtotime( $this->getOption( 'page-time' ) ) );
			$id = $this->getRevIdForTime( $title, $pageTimestamp );
			if ( !$id ) {
				$this->error( "The page did not exist at that time" );
				exit( 1 );
			}

			$revision = Revision::newFromId( $id );
		} else {
			$revision = Revision::newFromTitle( $title );
		}

		if ( !$revision ) {
			$this->error( "Unable to load revision, incorrect title?" );
			exit( 1 );
		}

		$warmup = $this->getOption( 'warmup', 1 );
		for ( $i = 0; $i < $warmup; $i++ ) {
			$this->runParser( $revision );
		}

		$loops = $this->getOption( 'loops', 1 );
		if ( $loops < 1 ) {
			$this->error( 'Invalid number of loops specified', true );
		}
		$startUsage = getrusage();
		$startTime = microtime( true );
		for ( $i = 0; $i < $loops; $i++ ) {
			$this->runParser( $revision );
		}
		$endUsage = getrusage();
		$endTime = microtime( true );

		printf( "CPU time = %.3f s, wall clock time = %.3f s\n",
			// CPU time
			( $endUsage['ru_utime.tv_sec'] + $endUsage['ru_utime.tv_usec'] * 1e-6
			- $startUsage['ru_utime.tv_sec'] - $startUsage['ru_utime.tv_usec'] * 1e-6 ) / $loops,
			// Wall clock time
			( $endTime - $startTime ) / $loops
		);
	}

	/**
	 * Fetch the ID of the revision of a Title that occurred
	 *
	 * @param Title $title
	 * @param string $timestamp
	 * @return bool|string Revision ID, or false if not found or error
	 */
	function getRevIdForTime( Title $title, $timestamp ) {
		$dbr = wfGetDB( DB_SLAVE );

		$id = $dbr->selectField(
			array( 'revision', 'page' ),
			'rev_id',
			array(
				'page_namespace' => $title->getNamespace(),
				'page_title' => $title->getDBkey(),
				'rev_timestamp <= ' . $dbr->addQuotes( $timestamp )
			),
			__METHOD__,
			array( 'ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 1 ),
			array( 'revision' => array( 'INNER JOIN', 'rev_page=page_id' ) )
		);

		return $id;
	}

	/**
	 * Parse the text from a given Revision
	 *
	 * @param Revision $revision
	 */
	function runParser( Revision $revision ) {
		$content = $revision->getContent();
		$content->getParserOutput( $revision->getTitle(), $revision->getId() );
	}

	/**
	 * Hook into the parser's revision ID fetcher. Make sure that the parser only
	 * uses revisions around the specified timestamp.
	 *
	 * @param Parser $parser
	 * @param Title $title
	 * @param bool &$skip
	 * @param string|bool &$id
	 * @return bool
	 */
	function onFetchTemplate( Parser $parser, Title $title, &$skip, &$id ) {
		$pdbk = $title->getPrefixedDBkey();
		if ( !isset( $this->idCache[$pdbk] ) ) {
			$proposedId = $this->getRevIdForTime( $title, $this->templateTimestamp );
			$this->idCache[$pdbk] = $proposedId;
		}
		if ( $this->idCache[$pdbk] !== false ) {
			$id = $this->idCache[$pdbk];
		}

		return true;
	}
}

$maintClass = 'BenchmarkParse';
require RUN_MAINTENANCE_IF_MAIN;