summaryrefslogtreecommitdiff
path: root/includes/job/RefreshLinksJob.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/job/RefreshLinksJob.php')
-rw-r--r--includes/job/RefreshLinksJob.php162
1 files changed, 117 insertions, 45 deletions
diff --git a/includes/job/RefreshLinksJob.php b/includes/job/RefreshLinksJob.php
index 1aa206f0..b23951c6 100644
--- a/includes/job/RefreshLinksJob.php
+++ b/includes/job/RefreshLinksJob.php
@@ -2,6 +2,21 @@
/**
* Job to update links for a given title.
*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
* @file
* @ingroup JobQueue
*/
@@ -22,7 +37,6 @@ class RefreshLinksJob extends Job {
* @return boolean success
*/
function run() {
- global $wgParser, $wgContLang;
wfProfileIn( __METHOD__ );
$linkCache = LinkCache::singleton();
@@ -34,24 +48,41 @@ class RefreshLinksJob extends Job {
return false;
}
- $revision = Revision::newFromTitle( $this->title );
+ # Wait for the DB of the current/next slave DB handle to catch up to the master.
+ # This way, we get the correct page_latest for templates or files that just changed
+ # milliseconds ago, having triggered this job to begin with.
+ if ( isset( $this->params['masterPos'] ) ) {
+ wfGetLB()->waitFor( $this->params['masterPos'] );
+ }
+
+ $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
if ( !$revision ) {
- $this->error = 'refreshLinks: Article not found "' . $this->title->getPrefixedDBkey() . '"';
+ $this->error = 'refreshLinks: Article not found "' .
+ $this->title->getPrefixedDBkey() . '"';
wfProfileOut( __METHOD__ );
- return false;
+ return false; // XXX: what if it was just deleted?
}
- wfProfileIn( __METHOD__.'-parse' );
- $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
- $parserOutput = $wgParser->parse( $revision->getText(), $this->title, $options, true, true, $revision->getId() );
- wfProfileOut( __METHOD__.'-parse' );
- wfProfileIn( __METHOD__.'-update' );
- $update = new LinksUpdate( $this->title, $parserOutput, false );
- $update->doUpdate();
- wfProfileOut( __METHOD__.'-update' );
+ self::runForTitleInternal( $this->title, $revision, __METHOD__ );
+
wfProfileOut( __METHOD__ );
return true;
}
+
+ public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
+ global $wgParser, $wgContLang;
+
+ wfProfileIn( $fname . '-parse' );
+ $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
+ $parserOutput = $wgParser->parse(
+ $revision->getText(), $title, $options, true, true, $revision->getId() );
+ wfProfileOut( $fname . '-parse' );
+
+ wfProfileIn( $fname . '-update' );
+ $updates = $parserOutput->getSecondaryDataUpdates( $title, false );
+ DataUpdate::runUpdates( $updates );
+ wfProfileOut( $fname . '-update' );
+ }
}
/**
@@ -61,6 +92,7 @@ class RefreshLinksJob extends Job {
* @ingroup JobQueue
*/
class RefreshLinksJob2 extends Job {
+ const MAX_TITLES_RUN = 10;
function __construct( $title, $params, $id = 0 ) {
parent::__construct( 'refreshLinks2', $title, $params, $id );
@@ -71,60 +103,100 @@ class RefreshLinksJob2 extends Job {
* @return boolean success
*/
function run() {
- global $wgParser, $wgContLang;
-
wfProfileIn( __METHOD__ );
$linkCache = LinkCache::singleton();
$linkCache->clear();
- if( is_null( $this->title ) ) {
+ if ( is_null( $this->title ) ) {
$this->error = "refreshLinks2: Invalid title";
wfProfileOut( __METHOD__ );
return false;
- }
- if( !isset($this->params['start']) || !isset($this->params['end']) ) {
+ } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) {
$this->error = "refreshLinks2: Invalid params";
wfProfileOut( __METHOD__ );
return false;
}
+
// Back compat for pre-r94435 jobs
$table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
- $titles = $this->title->getBacklinkCache()->getLinks(
- $table, $this->params['start'], $this->params['end']);
-
- # Not suitable for page load triggered job running!
- # Gracefully switch to refreshLinks jobs if this happens.
- if( php_sapi_name() != 'cli' ) {
+
+ // Avoid slave lag when fetching templates
+ if ( isset( $this->params['masterPos'] ) ) {
+ $masterPos = $this->params['masterPos'];
+ } elseif ( wfGetLB()->getServerCount() > 1 ) {
+ $masterPos = wfGetLB()->getMasterPos();
+ } else {
+ $masterPos = false;
+ }
+
+ $titles = $this->title->getBacklinkCache()->getLinks(
+ $table, $this->params['start'], $this->params['end'] );
+
+ if ( $titles->count() > self::MAX_TITLES_RUN ) {
+ # We don't want to parse too many pages per job as it can starve other jobs.
+ # If there are too many pages to parse, break this up into smaller jobs. By passing
+ # in the master position here we can cut down on the time spent waiting for slaves to
+ # catch up by the runners handling these jobs since time will have passed between now
+ # and when they pop these jobs off the queue.
+ $start = 0; // batch start
+ $end = 0; // batch end
+ $bsize = 0; // batch size
+ $first = true; // first of batch
+ $jobs = array();
+ foreach ( $titles as $title ) {
+ $start = $first ? $title->getArticleId() : $start;
+ $end = $title->getArticleId();
+ $first = false;
+ if ( ++$bsize >= self::MAX_TITLES_RUN ) {
+ $jobs[] = new RefreshLinksJob2( $this->title, array(
+ 'table' => $table,
+ 'start' => $start,
+ 'end' => $end,
+ 'masterPos' => $masterPos
+ ) );
+ $first = true;
+ $start = $end = $bsize = 0;
+ }
+ }
+ if ( $bsize > 0 ) { // group remaining pages into a job
+ $jobs[] = new RefreshLinksJob2( $this->title, array(
+ 'table' => $table,
+ 'start' => $start,
+ 'end' => $end,
+ 'masterPos' => $masterPos
+ ) );
+ }
+ Job::batchInsert( $jobs );
+ } elseif ( php_sapi_name() != 'cli' ) {
+ # Not suitable for page load triggered job running!
+ # Gracefully switch to refreshLinks jobs if this happens.
$jobs = array();
foreach ( $titles as $title ) {
- $jobs[] = new RefreshLinksJob( $title, '' );
+ $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) );
}
Job::batchInsert( $jobs );
-
- wfProfileOut( __METHOD__ );
- return true;
- }
- $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
- # Re-parse each page that transcludes this page and update their tracking links...
- foreach ( $titles as $title ) {
- $revision = Revision::newFromTitle( $title );
- if ( !$revision ) {
- $this->error = 'refreshLinks: Article not found "' . $title->getPrefixedDBkey() . '"';
- wfProfileOut( __METHOD__ );
- return false;
+ } else {
+ # Wait for the DB of the current/next slave DB handle to catch up to the master.
+ # This way, we get the correct page_latest for templates or files that just changed
+ # milliseconds ago, having triggered this job to begin with.
+ if ( $masterPos ) {
+ wfGetLB()->waitFor( $masterPos );
+ }
+ # Re-parse each page that transcludes this page and update their tracking links...
+ foreach ( $titles as $title ) {
+ $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+ if ( !$revision ) {
+ $this->error = 'refreshLinks: Article not found "' .
+ $title->getPrefixedDBkey() . '"';
+ continue; // skip this page
+ }
+ RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ );
+ wfWaitForSlaves();
}
- wfProfileIn( __METHOD__.'-parse' );
- $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() );
- wfProfileOut( __METHOD__.'-parse' );
- wfProfileIn( __METHOD__.'-update' );
- $update = new LinksUpdate( $title, $parserOutput, false );
- $update->doUpdate();
- wfProfileOut( __METHOD__.'-update' );
- wfWaitForSlaves();
}
- wfProfileOut( __METHOD__ );
+ wfProfileOut( __METHOD__ );
return true;
}
}