From 4ac9fa081a7c045f6a9f1cfc529d82423f485b2e Mon Sep 17 00:00:00 2001
From: Pierre Schmitz <pierre@archlinux.de>
Date: Sun, 8 Dec 2013 09:55:49 +0100
Subject: Update to MediaWiki 1.22.0

---
 includes/search/SearchEngine.php | 169 +++++++++++++++++++++++++++++----------
 1 file changed, 125 insertions(+), 44 deletions(-)

(limited to 'includes/search/SearchEngine.php')

diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php
index 6b3e62b1..71c05d8b 100644
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -59,7 +59,7 @@ class SearchEngine {
 	 * STUB
 	 *
 	 * @param string $term raw search term
-	 * @return SearchResultSet
+	 * @return SearchResultSet|Status|null
 	 */
 	function searchText( $term ) {
 		return null;
@@ -71,7 +71,7 @@ class SearchEngine {
 	 * STUB
 	 *
 	 * @param string $term raw search term
-	 * @return SearchResultSet
+	 * @return SearchResultSet|null
 	 */
 	function searchTitle( $term ) {
 		return null;
@@ -93,8 +93,9 @@ class SearchEngine {
 	 * @return Boolean
 	 */
 	public function supports( $feature ) {
-		switch( $feature ) {
+		switch ( $feature ) {
 		case 'list-redirects':
+		case 'search-update':
 			return true;
 		case 'title-suffix-filter':
 		default:
@@ -331,8 +332,9 @@ class SearchEngine {
 				$parsed = substr( $query, strlen( $prefix ) + 1 );
 			}
 		}
-		if ( trim( $parsed ) == '' )
+		if ( trim( $parsed ) == '' ) {
 			$parsed = $query; // prefix was the whole query
+		}
 
 		wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
 
@@ -420,8 +422,9 @@ class SearchEngine {
 
 		$formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
 		foreach ( $formatted as $key => $ns ) {
-			if ( empty( $ns ) )
+			if ( empty( $ns ) ) {
 				$formatted[$key] = wfMessage( 'blanknamespace' )->text();
+			}
 		}
 		return $formatted;
 	}
@@ -451,22 +454,45 @@ class SearchEngine {
 	 * Load up the appropriate search engine class for the currently
 	 * active database backend, and return a configured instance.
 	 *
+	 * @param String $type Type of search backend, if not the default
 	 * @return SearchEngine
 	 */
-	public static function create() {
+	public static function create( $type = null ) {
 		global $wgSearchType;
 		$dbr = null;
-		if ( $wgSearchType ) {
+
+		$alternatives = self::getSearchTypes();
+
+		if ( $type && in_array( $type, $alternatives ) ) {
+			$class = $type;
+		} elseif ( $wgSearchType !== null ) {
 			$class = $wgSearchType;
 		} else {
 			$dbr = wfGetDB( DB_SLAVE );
 			$class = $dbr->getSearchEngine();
 		}
+
 		$search = new $class( $dbr );
 		$search->setLimitOffset( 0, 0 );
 		return $search;
 	}
 
+	/**
+	 * Return the search engines we support. If only $wgSearchType
+	 * is set, it'll be an array of just that one item.
+	 *
+	 * @return array
+	 */
+	public static function getSearchTypes() {
+		global $wgSearchType, $wgSearchTypeAlternatives;
+		static $alternatives = null;
+		if ( $alternatives === null ) {
+			$alternatives = $wgSearchTypeAlternatives ?: array();
+			array_unshift( $alternatives, $wgSearchType );
+		}
+		return $alternatives;
+	}
+
 	/**
 	 * Create or update the search index record for the given page.
 	 * Title and text should be pre-processed.
@@ -492,6 +518,18 @@ class SearchEngine {
 		// no-op
 	}
 
+	/**
+	 * Delete an indexed page
+	 * Title should be pre-processed.
+	 * STUB
+	 *
+	 * @param Integer $id Page id that was deleted
+	 * @param String $title Title of page that was deleted
+	 */
+	function delete( $id, $title ) {
+		// no-op
+	}
+
 	/**
 	 * Get OpenSearch suggestion template
 	 *
@@ -509,6 +547,31 @@ class SearchEngine {
 			return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
 		}
 	}
+
+	/**
+	 * Get the raw text for updating the index from a content object
+	 * Nicer search backends could possibly do something cooler than
+	 * just returning raw text
+	 *
+	 * @todo This isn't ideal, we'd really like to have content-specific handling here
+	 * @param Title $t Title we're indexing
+	 * @param Content $c Content of the page to index
+	 * @return string
+	 */
+	public function getTextFromContent( Title $t, Content $c = null ) {
+		return $c ? $c->getTextForSearchIndex() : '';
+	}
+
+	/**
+	 * If an implementation of SearchEngine handles all of its own text processing
+	 * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s
+	 * rather silly handling, it should return true here instead.
+	 *
+	 * @return bool
+	 */
+	public function textAlreadyUpdatedForIndex() {
+		return false;
+	}
 }
 
 /**
@@ -641,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet {
 	}
 
 	function numRows() {
-		if ( $this->mResultSet === false )
+		if ( $this->mResultSet === false ) {
 			return false;
+		}
 
 		return $this->mResultSet->numRows();
 	}
 
 	function next() {
-		if ( $this->mResultSet === false )
+		if ( $this->mResultSet === false ) {
 			return false;
+		}
 
 		$row = $this->mResultSet->fetchObject();
-		if ( $row === false )
+		if ( $row === false ) {
 			return false;
+		}
 
 		return SearchResult::newFromRow( $row );
 	}
 
 	function free() {
-		if ( $this->mResultSet === false )
+		if ( $this->mResultSet === false ) {
 			return false;
+		}
 
 		$this->mResultSet->free();
 	}
@@ -750,8 +817,9 @@ class SearchResult {
 			wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) );
 			$this->mRevision = Revision::newFromTitle(
 				$this->mTitle, $id, Revision::READ_NORMAL );
-			if ( $this->mTitle->getNamespace() === NS_FILE )
+			if ( $this->mTitle->getNamespace() === NS_FILE ) {
 				$this->mImage = wfFindFile( $this->mTitle );
+			}
 		}
 	}
 
@@ -761,8 +829,9 @@ class SearchResult {
 	 * @return Boolean
 	 */
 	function isBrokenTitle() {
-		if ( is_null( $this->mTitle ) )
+		if ( is_null( $this->mTitle ) ) {
 			return true;
+		}
 		return false;
 	}
 
@@ -795,10 +864,8 @@ class SearchResult {
 	protected function initText() {
 		if ( !isset( $this->mText ) ) {
 			if ( $this->mRevision != null ) {
-				//TODO: if we could plug in some code that knows about special content models *and* about
-				//      special features of the search engine, the search could benefit.
-				$content = $this->mRevision->getContent();
-				$this->mText = $content ? $content->getTextForSearchIndex() : '';
+				$this->mText = SearchEngine::create()
+					->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
 			} else { // TODO: can we fetch raw wikitext for commons images?
 				$this->mText = '';
 			}
@@ -810,16 +877,17 @@ class SearchResult {
 	 * @return String: highlighted text snippet, null (and not '') if not supported
 	 */
 	function getTextSnippet( $terms ) {
-		global $wgUser, $wgAdvancedSearchHighlighting;
+		global $wgAdvancedSearchHighlighting;
 		$this->initText();
 
 		// TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
-		list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser );
+		list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs();
 		$h = new SearchHighlighter();
-		if ( $wgAdvancedSearchHighlighting )
+		if ( $wgAdvancedSearchHighlighting ) {
 			return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
-		else
+		} else {
 			return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
+		}
 	}
 
 	/**
@@ -863,10 +931,11 @@ class SearchResult {
 	 * @return String: timestamp
 	 */
 	function getTimestamp() {
-		if ( $this->mRevision )
+		if ( $this->mRevision ) {
 			return $this->mRevision->getTimestamp();
-		elseif ( $this->mImage )
+		} elseif ( $this->mImage ) {
 			return $this->mImage->getTimestamp();
+		}
 		return '';
 	}
 
@@ -952,8 +1021,9 @@ class SearchHighlighter {
 		global $wgSearchHighlightBoundaries;
 		$fname = __METHOD__;
 
-		if ( $text == '' )
+		if ( $text == '' ) {
 			return '';
+		}
 
 		// spli text into text + templates/links/tables
 		$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
@@ -984,8 +1054,9 @@ class SearchHighlighter {
 						if ( $key == 2 ) {
 							// see if this is an image link
 							$ns = substr( $val[0], 2, - 1 );
-							if ( $wgContLang->getNsIndex( $ns ) != NS_FILE )
+							if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
 								break;
+							}
 
 						}
 						$epat = $endPatterns[$key];
@@ -1006,7 +1077,7 @@ class SearchHighlighter {
 								$len = strlen( $endMatches[2][0] );
 								$off = $endMatches[2][1];
 								$this->splitAndAdd( $otherExt, $count,
-									substr( $text, $start, $off + $len  - $start ) );
+									substr( $text, $start, $off + $len - $start ) );
 								$start = $off + $len;
 								$found = true;
 								break;
@@ -1119,7 +1190,7 @@ class SearchHighlighter {
 			// if begin of the article contains the whole phrase, show only that !!
 			if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
 				&& $offsets[$first] < $contextchars * 2 ) {
-				$snippets = array ( $first => $snippets[$first] );
+				$snippets = array( $first => $snippets[$first] );
 			}
 
 			// calc by how much to extend existing snippets
@@ -1155,17 +1226,19 @@ class SearchHighlighter {
 		$last = - 1;
 		$extract = '';
 		foreach ( $snippets as $index => $line ) {
-			if ( $last == - 1 )
+			if ( $last == - 1 ) {
 				$extract .= $line; // first line
-			elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) )
+			} elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) {
 				$extract .= " " . $line; // continous lines
-			else
+			} else {
 				$extract .= '<b> ... </b>' . $line;
+			}
 
 			$last = $index;
 		}
-		if ( $extract )
+		if ( $extract ) {
 			$extract .= '<b> ... </b>';
+		}
 
 		$processed = array();
 		foreach ( $terms as $term ) {
@@ -1193,8 +1266,9 @@ class SearchHighlighter {
 		$split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text );
 		foreach ( $split as $line ) {
 			$tt = trim( $line );
-			if ( $tt )
+			if ( $tt ) {
 				$extracts[$count++] = $tt;
+			}
 		}
 	}
 
@@ -1268,8 +1342,9 @@ class SearchHighlighter {
 			while ( $char >= 0x80 && $char < 0xc0 ) {
 				// skip trailing bytes
 				$point++;
-				if ( $point >= strlen( $text ) )
+				if ( $point >= strlen( $text ) ) {
 					return strlen( $text );
+				}
 				$char = ord( $text[$point] );
 			}
 			return $point;
@@ -1289,24 +1364,28 @@ class SearchHighlighter {
 	 * @protected
 	 */
 	function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
-		if ( $linesleft == 0 )
+		if ( $linesleft == 0 ) {
 			return; // nothing to do
+		}
 		foreach ( $extracts as $index => $line ) {
-			if ( array_key_exists( $index, $out ) )
+			if ( array_key_exists( $index, $out ) ) {
 				continue; // this line already highlighted
+			}
 
 			$m = array();
-			if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
+			if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
 				continue;
+			}
 
 			$offset = $m[0][1];
 			$len = strlen( $m[0][0] );
-			if ( $offset + $len < $contextchars )
+			if ( $offset + $len < $contextchars ) {
 				$begin = 0;
-			elseif ( $len > $contextchars )
+			} elseif ( $len > $contextchars ) {
 				$begin = $offset;
-			else
+			} else {
 				$begin = $offset + intval( ( $len - $contextchars ) / 2 );
+			}
 
 			$end = $begin + $contextchars;
 
@@ -1315,8 +1394,9 @@ class SearchHighlighter {
 			$out[$index] = $this->extract( $line, $begin, $end, $posBegin );
 			$offsets[$index] = $posBegin;
 			$linesleft--;
-			if ( $linesleft == 0 )
+			if ( $linesleft == 0 ) {
 				return;
+			}
 		}
 	}
 
@@ -1357,16 +1437,17 @@ class SearchHighlighter {
 	 */
 	function linkReplace( $matches ) {
 		$colon = strpos( $matches[1], ':' );
-		if ( $colon === false )
+		if ( $colon === false ) {
 			return $matches[2]; // replace with caption
+		}
 		global $wgContLang;
 		$ns = substr( $matches[1], 0, $colon );
 		$index = $wgContLang->getNsIndex( $ns );
-		if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) )
+		if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
 			return $matches[0]; // return the whole thing
-		else
+		} else {
 			return $matches[2];
-
+		}
 	}
 
 	/**
-- 
cgit v1.2.2