From c1f9b1f7b1b77776192048005dcc66dcf3df2bfb Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Sat, 27 Dec 2014 15:41:37 +0100 Subject: Update to MediaWiki 1.24.1 --- includes/jobqueue/utils/BacklinkJobUtils.php | 122 +++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 includes/jobqueue/utils/BacklinkJobUtils.php (limited to 'includes/jobqueue/utils') diff --git a/includes/jobqueue/utils/BacklinkJobUtils.php b/includes/jobqueue/utils/BacklinkJobUtils.php new file mode 100644 index 00000000..c8e5df66 --- /dev/null +++ b/includes/jobqueue/utils/BacklinkJobUtils.php @@ -0,0 +1,122 @@ +:(,),...) + * map so that the run() function knows what pages to act on. The leaf jobs will keep + * the same job title as the parent job (e.g. $job). + * + * The partition jobs have the 'range' parameter set to a map of the format + * (start:, end:, batchSize:, subranges:((,),...)), + * the 'table' parameter set to that of $job, and the 'recursive' parameter set to true. + * This method can be called on the resulting job to repeat the process again. + * + * The job provided ($job) must have the 'recursive' parameter set to true and the 'table' + * parameter must be set to a backlink table. The job title will be used as the title to + * find backlinks for. Any 'range' parameter must follow the same format as mentioned above. + * This should be managed by recursive calls to this method. + * + * The first jobs return are always the leaf jobs. This lets the caller use push() to + * put them directly into the queue and works well if the queue is FIFO. In such a queue, + * the leaf jobs have to get finished first before anything can resolve the next partition + * job, which keeps the queue very small. + * + * $opts includes: + * - params : extra job parameters to include in each job + * + * @param Job $job + * @param int $bSize BacklinkCache partition size; usually $wgUpdateRowsPerJob + * @param int $cSize Max titles per leaf job; Usually 1 or a modest value + * @param array $opts Optional parameter map + * @return Job[] List of Job objects + */ + public static function partitionBacklinkJob( Job $job, $bSize, $cSize, $opts = array() ) { + $class = get_class( $job ); + $title = $job->getTitle(); + $params = $job->getParams(); + + if ( isset( $params['pages'] ) || empty( $params['recursive'] ) ) { + $ranges = array(); // sanity; this is a leaf node + wfWarn( __METHOD__ . " called on {$job->getType()} leaf job (explosive recursion)." ); + } elseif ( isset( $params['range'] ) ) { + // This is a range job to trigger the insertion of partitioned/title jobs... + $ranges = $params['range']['subranges']; + $realBSize = $params['range']['batchSize']; + } else { + // This is a base job to trigger the insertion of partitioned jobs... + $ranges = $title->getBacklinkCache()->partition( $params['table'], $bSize ); + $realBSize = $bSize; + } + + $extraParams = isset( $opts['params'] ) ? $opts['params'] : array(); + + $jobs = array(); + // Combine the first range (of size $bSize) backlinks into leaf jobs + if ( isset( $ranges[0] ) ) { + list( $start, $end ) = $ranges[0]; + $titles = $title->getBacklinkCache()->getLinks( $params['table'], $start, $end ); + foreach ( array_chunk( iterator_to_array( $titles ), $cSize ) as $titleBatch ) { + $pages = array(); + foreach ( $titleBatch as $tl ) { + $pages[$tl->getArticleId()] = array( $tl->getNamespace(), $tl->getDBKey() ); + } + $jobs[] = new $class( + $title, // maintain parent job title + array( 'pages' => $pages ) + $extraParams + ); + } + } + // Take all of the remaining ranges and build a partition job from it + if ( isset( $ranges[1] ) ) { + $jobs[] = new $class( + $title, // maintain parent job title + array( + 'recursive' => true, + 'table' => $params['table'], + 'range' => array( + 'start' => $ranges[1][0], + 'end' => $ranges[count( $ranges ) - 1][1], + 'batchSize' => $realBSize, + 'subranges' => array_slice( $ranges, 1 ) + ), + ) + $extraParams + ); + } + + return $jobs; + } +} -- cgit v1.2.2