summaryrefslogtreecommitdiff
path: root/includes/search
diff options
context:
space:
mode:
Diffstat (limited to 'includes/search')
-rw-r--r--includes/search/SearchEngine.php169
-rw-r--r--includes/search/SearchMssql.php6
-rw-r--r--includes/search/SearchMySQL.php66
-rw-r--r--includes/search/SearchOracle.php86
-rw-r--r--includes/search/SearchPostgres.php43
-rw-r--r--includes/search/SearchSqlite.php39
-rw-r--r--includes/search/SearchUpdate.php123
7 files changed, 342 insertions, 190 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php
index 6b3e62b1..71c05d8b 100644
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -59,7 +59,7 @@ class SearchEngine {
* STUB
*
* @param string $term raw search term
- * @return SearchResultSet
+ * @return SearchResultSet|Status|null
*/
function searchText( $term ) {
return null;
@@ -71,7 +71,7 @@ class SearchEngine {
* STUB
*
* @param string $term raw search term
- * @return SearchResultSet
+ * @return SearchResultSet|null
*/
function searchTitle( $term ) {
return null;
@@ -93,8 +93,9 @@ class SearchEngine {
* @return Boolean
*/
public function supports( $feature ) {
- switch( $feature ) {
+ switch ( $feature ) {
case 'list-redirects':
+ case 'search-update':
return true;
case 'title-suffix-filter':
default:
@@ -331,8 +332,9 @@ class SearchEngine {
$parsed = substr( $query, strlen( $prefix ) + 1 );
}
}
- if ( trim( $parsed ) == '' )
+ if ( trim( $parsed ) == '' ) {
$parsed = $query; // prefix was the whole query
+ }
wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
@@ -420,8 +422,9 @@ class SearchEngine {
$formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
foreach ( $formatted as $key => $ns ) {
- if ( empty( $ns ) )
+ if ( empty( $ns ) ) {
$formatted[$key] = wfMessage( 'blanknamespace' )->text();
+ }
}
return $formatted;
}
@@ -451,23 +454,46 @@ class SearchEngine {
* Load up the appropriate search engine class for the currently
* active database backend, and return a configured instance.
*
+ * @param String $type Type of search backend, if not the default
* @return SearchEngine
*/
- public static function create() {
+ public static function create( $type = null ) {
global $wgSearchType;
$dbr = null;
- if ( $wgSearchType ) {
+
+ $alternatives = self::getSearchTypes();
+
+ if ( $type && in_array( $type, $alternatives ) ) {
+ $class = $type;
+ } elseif ( $wgSearchType !== null ) {
$class = $wgSearchType;
} else {
$dbr = wfGetDB( DB_SLAVE );
$class = $dbr->getSearchEngine();
}
+
$search = new $class( $dbr );
$search->setLimitOffset( 0, 0 );
return $search;
}
/**
+ * Return the search engines we support. If only $wgSearchType
+ * is set, it'll be an array of just that one item.
+ *
+ * @return array
+ */
+ public static function getSearchTypes() {
+ global $wgSearchType, $wgSearchTypeAlternatives;
+ static $alternatives = null;
+ if ( $alternatives === null ) {
+ $alternatives = $wgSearchTypeAlternatives ?: array();
+ array_unshift( $alternatives, $wgSearchType );
+ }
+ return $alternatives;
+ }
+
+ /**
* Create or update the search index record for the given page.
* Title and text should be pre-processed.
* STUB
@@ -493,6 +519,18 @@ class SearchEngine {
}
/**
+ * Delete an indexed page
+ * Title should be pre-processed.
+ * STUB
+ *
+ * @param Integer $id Page id that was deleted
+ * @param String $title Title of page that was deleted
+ */
+ function delete( $id, $title ) {
+ // no-op
+ }
+
+ /**
* Get OpenSearch suggestion template
*
* @return String
@@ -509,6 +547,31 @@ class SearchEngine {
return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
}
}
+
+ /**
+ * Get the raw text for updating the index from a content object
+ * Nicer search backends could possibly do something cooler than
+ * just returning raw text
+ *
+ * @todo This isn't ideal, we'd really like to have content-specific handling here
+ * @param Title $t Title we're indexing
+ * @param Content $c Content of the page to index
+ * @return string
+ */
+ public function getTextFromContent( Title $t, Content $c = null ) {
+ return $c ? $c->getTextForSearchIndex() : '';
+ }
+
+ /**
+ * If an implementation of SearchEngine handles all of its own text processing
+ * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s
+ * rather silly handling, it should return true here instead.
+ *
+ * @return bool
+ */
+ public function textAlreadyUpdatedForIndex() {
+ return false;
+ }
}
/**
@@ -641,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet {
}
function numRows() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
return $this->mResultSet->numRows();
}
function next() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
$row = $this->mResultSet->fetchObject();
- if ( $row === false )
+ if ( $row === false ) {
return false;
+ }
return SearchResult::newFromRow( $row );
}
function free() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
$this->mResultSet->free();
}
@@ -750,8 +817,9 @@ class SearchResult {
wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) );
$this->mRevision = Revision::newFromTitle(
$this->mTitle, $id, Revision::READ_NORMAL );
- if ( $this->mTitle->getNamespace() === NS_FILE )
+ if ( $this->mTitle->getNamespace() === NS_FILE ) {
$this->mImage = wfFindFile( $this->mTitle );
+ }
}
}
@@ -761,8 +829,9 @@ class SearchResult {
* @return Boolean
*/
function isBrokenTitle() {
- if ( is_null( $this->mTitle ) )
+ if ( is_null( $this->mTitle ) ) {
return true;
+ }
return false;
}
@@ -795,10 +864,8 @@ class SearchResult {
protected function initText() {
if ( !isset( $this->mText ) ) {
if ( $this->mRevision != null ) {
- //TODO: if we could plug in some code that knows about special content models *and* about
- // special features of the search engine, the search could benefit.
- $content = $this->mRevision->getContent();
- $this->mText = $content ? $content->getTextForSearchIndex() : '';
+ $this->mText = SearchEngine::create()
+ ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
} else { // TODO: can we fetch raw wikitext for commons images?
$this->mText = '';
}
@@ -810,16 +877,17 @@ class SearchResult {
* @return String: highlighted text snippet, null (and not '') if not supported
*/
function getTextSnippet( $terms ) {
- global $wgUser, $wgAdvancedSearchHighlighting;
+ global $wgAdvancedSearchHighlighting;
$this->initText();
// TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
- list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser );
+ list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs();
$h = new SearchHighlighter();
- if ( $wgAdvancedSearchHighlighting )
+ if ( $wgAdvancedSearchHighlighting ) {
return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
- else
+ } else {
return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
+ }
}
/**
@@ -863,10 +931,11 @@ class SearchResult {
* @return String: timestamp
*/
function getTimestamp() {
- if ( $this->mRevision )
+ if ( $this->mRevision ) {
return $this->mRevision->getTimestamp();
- elseif ( $this->mImage )
+ } elseif ( $this->mImage ) {
return $this->mImage->getTimestamp();
+ }
return '';
}
@@ -952,8 +1021,9 @@ class SearchHighlighter {
global $wgSearchHighlightBoundaries;
$fname = __METHOD__;
- if ( $text == '' )
+ if ( $text == '' ) {
return '';
+ }
// spli text into text + templates/links/tables
$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
@@ -984,8 +1054,9 @@ class SearchHighlighter {
if ( $key == 2 ) {
// see if this is an image link
$ns = substr( $val[0], 2, - 1 );
- if ( $wgContLang->getNsIndex( $ns ) != NS_FILE )
+ if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
break;
+ }
}
$epat = $endPatterns[$key];
@@ -1006,7 +1077,7 @@ class SearchHighlighter {
$len = strlen( $endMatches[2][0] );
$off = $endMatches[2][1];
$this->splitAndAdd( $otherExt, $count,
- substr( $text, $start, $off + $len - $start ) );
+ substr( $text, $start, $off + $len - $start ) );
$start = $off + $len;
$found = true;
break;
@@ -1119,7 +1190,7 @@ class SearchHighlighter {
// if begin of the article contains the whole phrase, show only that !!
if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
&& $offsets[$first] < $contextchars * 2 ) {
- $snippets = array ( $first => $snippets[$first] );
+ $snippets = array( $first => $snippets[$first] );
}
// calc by how much to extend existing snippets
@@ -1155,17 +1226,19 @@ class SearchHighlighter {
$last = - 1;
$extract = '';
foreach ( $snippets as $index => $line ) {
- if ( $last == - 1 )
+ if ( $last == - 1 ) {
$extract .= $line; // first line
- elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) )
+ } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) {
$extract .= " " . $line; // continous lines
- else
+ } else {
$extract .= '<b> ... </b>' . $line;
+ }
$last = $index;
}
- if ( $extract )
+ if ( $extract ) {
$extract .= '<b> ... </b>';
+ }
$processed = array();
foreach ( $terms as $term ) {
@@ -1193,8 +1266,9 @@ class SearchHighlighter {
$split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text );
foreach ( $split as $line ) {
$tt = trim( $line );
- if ( $tt )
+ if ( $tt ) {
$extracts[$count++] = $tt;
+ }
}
}
@@ -1268,8 +1342,9 @@ class SearchHighlighter {
while ( $char >= 0x80 && $char < 0xc0 ) {
// skip trailing bytes
$point++;
- if ( $point >= strlen( $text ) )
+ if ( $point >= strlen( $text ) ) {
return strlen( $text );
+ }
$char = ord( $text[$point] );
}
return $point;
@@ -1289,24 +1364,28 @@ class SearchHighlighter {
* @protected
*/
function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
- if ( $linesleft == 0 )
+ if ( $linesleft == 0 ) {
return; // nothing to do
+ }
foreach ( $extracts as $index => $line ) {
- if ( array_key_exists( $index, $out ) )
+ if ( array_key_exists( $index, $out ) ) {
continue; // this line already highlighted
+ }
$m = array();
- if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
+ if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
continue;
+ }
$offset = $m[0][1];
$len = strlen( $m[0][0] );
- if ( $offset + $len < $contextchars )
+ if ( $offset + $len < $contextchars ) {
$begin = 0;
- elseif ( $len > $contextchars )
+ } elseif ( $len > $contextchars ) {
$begin = $offset;
- else
+ } else {
$begin = $offset + intval( ( $len - $contextchars ) / 2 );
+ }
$end = $begin + $contextchars;
@@ -1315,8 +1394,9 @@ class SearchHighlighter {
$out[$index] = $this->extract( $line, $begin, $end, $posBegin );
$offsets[$index] = $posBegin;
$linesleft--;
- if ( $linesleft == 0 )
+ if ( $linesleft == 0 ) {
return;
+ }
}
}
@@ -1357,16 +1437,17 @@ class SearchHighlighter {
*/
function linkReplace( $matches ) {
$colon = strpos( $matches[1], ':' );
- if ( $colon === false )
+ if ( $colon === false ) {
return $matches[2]; // replace with caption
+ }
global $wgContLang;
$ns = substr( $matches[1], 0, $colon );
$index = $wgContLang->getNsIndex( $ns );
- if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) )
+ if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
return $matches[0]; // return the whole thing
- else
+ } else {
return $matches[2];
-
+ }
}
/**
diff --git a/includes/search/SearchMssql.php b/includes/search/SearchMssql.php
index 163d9dc3..cbc1a7a7 100644
--- a/includes/search/SearchMssql.php
+++ b/includes/search/SearchMssql.php
@@ -170,8 +170,9 @@ class SearchMssql extends SearchEngine {
if ( !empty( $terms[3] ) ) {
$regexp = preg_quote( $terms[3], '/' );
- if ( $terms[4] )
+ if ( $terms[4] ) {
$regexp .= "[0-9A-Za-z_]+";
+ }
} else {
$regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
}
@@ -247,8 +248,9 @@ class MssqlSearchResultSet extends SearchResultSet {
function next() {
$row = $this->mResultSet->fetchObject();
- if ( $row === false )
+ if ( $row === false ) {
return false;
+ }
return new SearchResult( $row );
}
}
diff --git a/includes/search/SearchMySQL.php b/includes/search/SearchMySQL.php
index 4a501fd0..b2bc1c26 100644
--- a/includes/search/SearchMySQL.php
+++ b/includes/search/SearchMySQL.php
@@ -57,12 +57,12 @@ class SearchMySQL extends SearchEngine {
# @todo FIXME: This doesn't handle parenthetical expressions.
$m = array();
- if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
$filteredText, $m, PREG_SET_ORDER ) ) {
- foreach( $m as $bits ) {
+ foreach ( $m as $bits ) {
@list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
- if( $nonQuoted != '' ) {
+ if ( $nonQuoted != '' ) {
$term = $nonQuoted;
$quote = '';
} else {
@@ -70,8 +70,10 @@ class SearchMySQL extends SearchEngine {
$quote = '"';
}
- if( $searchon !== '' ) $searchon .= ' ';
- if( $this->strictMatching && ($modifier == '') ) {
+ if ( $searchon !== '' ) {
+ $searchon .= ' ';
+ }
+ if ( $this->strictMatching && ( $modifier == '' ) ) {
// If we leave this out, boolean op defaults to OR which is rarely helpful.
$modifier = '+';
}
@@ -79,7 +81,7 @@ class SearchMySQL extends SearchEngine {
// Some languages such as Serbian store the input form in the search index,
// so we may need to search for matches in multiple writing system variants.
$convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
- if( is_array( $convertedVariants ) ) {
+ if ( is_array( $convertedVariants ) ) {
$variants = array_unique( array_values( $convertedVariants ) );
} else {
$variants = array( $term );
@@ -99,11 +101,12 @@ class SearchMySQL extends SearchEngine {
$strippedVariants = array_unique( $strippedVariants );
$searchon .= $modifier;
- if( count( $strippedVariants) > 1 )
+ if ( count( $strippedVariants ) > 1 ) {
$searchon .= '(';
- foreach( $strippedVariants as $stripped ) {
+ }
+ foreach ( $strippedVariants as $stripped ) {
$stripped = $this->normalizeText( $stripped );
- if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
+ if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
// Hack for Chinese: we need to toss in quotes for
// multiple-character phrases since normalizeForSearch()
// added spaces between them to make word breaks.
@@ -111,8 +114,9 @@ class SearchMySQL extends SearchEngine {
}
$searchon .= "$quote$stripped$quote$wildcard ";
}
- if( count( $strippedVariants) > 1 )
+ if ( count( $strippedVariants ) > 1 ) {
$searchon .= ')';
+ }
// Match individual terms or quoted phrase in result highlighting...
// Note that variants will be introduced in a later stage for highlighting!
@@ -134,8 +138,8 @@ class SearchMySQL extends SearchEngine {
global $wgContLang;
$regex = preg_quote( $string, '/' );
- if( $wgContLang->hasWordBreaks() ) {
- if( $wildcard ) {
+ if ( $wgContLang->hasWordBreaks() ) {
+ if ( $wildcard ) {
// Don't cut off the final bit!
$regex = "\b$regex";
} else {
@@ -177,7 +181,9 @@ class SearchMySQL extends SearchEngine {
global $wgCountTotalSearchHits;
// This seems out of place, why is this called with empty term?
- if ( trim( $term ) === '' ) return null;
+ if ( trim( $term ) === '' ) {
+ return null;
+ }
$filteredTerm = $this->filter( $term );
$query = $this->getQuery( $filteredTerm, $fulltext );
@@ -187,7 +193,7 @@ class SearchMySQL extends SearchEngine {
);
$total = null;
- if( $wgCountTotalSearchHits ) {
+ if ( $wgCountTotalSearchHits ) {
$query = $this->getCountQuery( $filteredTerm, $fulltext );
$totalResult = $this->db->select(
$query['tables'], $query['fields'], $query['conds'],
@@ -195,7 +201,7 @@ class SearchMySQL extends SearchEngine {
);
$row = $totalResult->fetchObject();
- if( $row ) {
+ if ( $row ) {
$total = intval( $row->c );
}
$totalResult->free();
@@ -205,12 +211,11 @@ class SearchMySQL extends SearchEngine {
}
public function supports( $feature ) {
- switch( $feature ) {
- case 'list-redirects':
+ switch ( $feature ) {
case 'title-suffix-filter':
return true;
default:
- return false;
+ return parent::supports( $feature );
}
}
@@ -223,7 +228,7 @@ class SearchMySQL extends SearchEngine {
foreach ( $this->features as $feature => $value ) {
if ( $feature === 'list-redirects' && !$value ) {
$query['conds']['page_is_redirect'] = 0;
- } elseif( $feature === 'title-suffix-filter' && $value ) {
+ } elseif ( $feature === 'title-suffix-filter' && $value ) {
$query['conds'][] = 'page_title' . $this->db->buildLike( $this->db->anyString(), $value );
}
}
@@ -358,12 +363,25 @@ class SearchMySQL extends SearchEngine {
$dbw->update( 'searchindex',
array( 'si_title' => $this->normalizeText( $title ) ),
- array( 'si_page' => $id ),
+ array( 'si_page' => $id ),
__METHOD__,
array( $dbw->lowPriorityOption() ) );
}
/**
+ * Delete an indexed page
+ * Title should be pre-processed.
+ *
+ * @param Integer $id Page id that was deleted
+ * @param String $title Title of page that was deleted
+ */
+ function delete( $id, $title ) {
+ $dbw = wfGetDB( DB_MASTER );
+
+ $dbw->delete( 'searchindex', array( 'si_page' => $id ), __METHOD__ );
+ }
+
+ /**
* Converts some characters for MySQL's indexing to grok it correctly,
* and pads short words to overcome limitations.
* @return mixed|string
@@ -386,7 +404,7 @@ class SearchMySQL extends SearchEngine {
// ignores short words... Pad them so we can pass them
// through without reconfiguring the server...
$minLength = $this->minSearchLength();
- if( $minLength > 1 ) {
+ if ( $minLength > 1 ) {
$n = $minLength - 1;
$out = preg_replace(
"/\b(\w{1,$n})\b/",
@@ -427,7 +445,7 @@ class SearchMySQL extends SearchEngine {
* @return int
*/
protected function minSearchLength() {
- if( is_null( self::$mMinSearchLength ) ) {
+ if ( is_null( self::$mMinSearchLength ) ) {
$sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'";
$dbr = wfGetDB( DB_SLAVE );
@@ -435,7 +453,7 @@ class SearchMySQL extends SearchEngine {
$row = $result->fetchObject();
$result->free();
- if( $row && $row->Variable_name == 'ft_min_word_len' ) {
+ if ( $row && $row->Variable_name == 'ft_min_word_len' ) {
self::$mMinSearchLength = intval( $row->Value );
} else {
self::$mMinSearchLength = 0;
@@ -449,7 +467,7 @@ class SearchMySQL extends SearchEngine {
* @ingroup Search
*/
class MySQLSearchResultSet extends SqlSearchResultSet {
- function __construct( $resultSet, $terms, $totalHits=null ) {
+ function __construct( $resultSet, $terms, $totalHits = null ) {
parent::__construct( $resultSet, $terms );
$this->mTotalHits = $totalHits;
}
diff --git a/includes/search/SearchOracle.php b/includes/search/SearchOracle.php
index b0ea97fe..a8479654 100644
--- a/includes/search/SearchOracle.php
+++ b/includes/search/SearchOracle.php
@@ -30,32 +30,34 @@
*/
class SearchOracle extends SearchEngine {
- private $reservedWords = array ('ABOUT' => 1,
- 'ACCUM' => 1,
- 'AND' => 1,
- 'BT' => 1,
- 'BTG' => 1,
- 'BTI' => 1,
- 'BTP' => 1,
- 'FUZZY' => 1,
- 'HASPATH' => 1,
- 'INPATH' => 1,
- 'MINUS' => 1,
- 'NEAR' => 1,
- 'NOT' => 1,
- 'NT' => 1,
- 'NTG' => 1,
- 'NTI' => 1,
- 'NTP' => 1,
- 'OR' => 1,
- 'PT' => 1,
- 'RT' => 1,
- 'SQE' => 1,
- 'SYN' => 1,
- 'TR' => 1,
- 'TRSYN' => 1,
- 'TT' => 1,
- 'WITHIN' => 1);
+ private $reservedWords = array(
+ 'ABOUT' => 1,
+ 'ACCUM' => 1,
+ 'AND' => 1,
+ 'BT' => 1,
+ 'BTG' => 1,
+ 'BTI' => 1,
+ 'BTP' => 1,
+ 'FUZZY' => 1,
+ 'HASPATH' => 1,
+ 'INPATH' => 1,
+ 'MINUS' => 1,
+ 'NEAR' => 1,
+ 'NOT' => 1,
+ 'NT' => 1,
+ 'NTG' => 1,
+ 'NTI' => 1,
+ 'NTP' => 1,
+ 'OR' => 1,
+ 'PT' => 1,
+ 'RT' => 1,
+ 'SQE' => 1,
+ 'SYN' => 1,
+ 'TR' => 1,
+ 'TRSYN' => 1,
+ 'TT' => 1,
+ 'WITHIN' => 1,
+ );
/**
* Creates an instance of this class
@@ -72,8 +74,9 @@ class SearchOracle extends SearchEngine {
* @return SqlSearchResultSet
*/
function searchText( $term ) {
- if ( $term == '' )
+ if ( $term == '' ) {
return new SqlSearchResultSet( false, '' );
+ }
$resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) );
return new SqlSearchResultSet( $resultSet, $this->searchTerms );
@@ -86,8 +89,9 @@ class SearchOracle extends SearchEngine {
* @return SqlSearchResultSet
*/
function searchTitle( $term ) {
- if ( $term == '' )
+ if ( $term == '' ) {
return new SqlSearchResultSet( false, '' );
+ }
$resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) );
return new MySQLSearchResultSet( $resultSet, $this->searchTerms );
@@ -110,8 +114,9 @@ class SearchOracle extends SearchEngine {
* @return String
*/
function queryNamespaces() {
- if( is_null( $this->namespaces ) )
+ if ( is_null( $this->namespaces ) ) {
return '';
+ }
if ( !count( $this->namespaces ) ) {
$namespaces = '0';
} else {
@@ -195,23 +200,24 @@ class SearchOracle extends SearchEngine {
$searchon = '';
if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
$filteredText, $m, PREG_SET_ORDER ) ) {
- foreach( $m as $terms ) {
+ foreach ( $m as $terms ) {
// Search terms in all variant forms, only
// apply on wiki with LanguageConverter
$temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
- if( is_array( $temp_terms )) {
- $temp_terms = array_unique( array_values( $temp_terms ));
- foreach( $temp_terms as $t ) {
- $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t );
+ if ( is_array( $temp_terms ) ) {
+ $temp_terms = array_unique( array_values( $temp_terms ) );
+ foreach ( $temp_terms as $t ) {
+ $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $t );
}
}
else {
- $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] );
+ $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $terms[2] );
}
if ( !empty( $terms[3] ) ) {
$regexp = preg_quote( $terms[3], '/' );
- if ( $terms[4] )
+ if ( $terms[4] ) {
$regexp .= "[0-9A-Za-z_]+";
+ }
} else {
$regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' );
}
@@ -227,9 +233,9 @@ class SearchOracle extends SearchEngine {
private function escapeTerm( $t ) {
global $wgContLang;
$t = $wgContLang->normalizeForSearch( $t );
- $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{'.$t.'}' : $t;
- $t = preg_replace('/^"(.*)"$/', '($1)', $t);
- $t = preg_replace('/([-&|])/', '\\\\$1', $t);
+ $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{' . $t . '}' : $t;
+ $t = preg_replace( '/^"(.*)"$/', '($1)', $t );
+ $t = preg_replace( '/([-&|])/', '\\\\$1', $t );
return $t;
}
/**
@@ -273,7 +279,7 @@ class SearchOracle extends SearchEngine {
$dbw->update( 'searchindex',
array( 'si_title' => $title ),
- array( 'si_page' => $id ),
+ array( 'si_page' => $id ),
'SearchOracle::updateTitle',
array() );
}
diff --git a/includes/search/SearchPostgres.php b/includes/search/SearchPostgres.php
index 56464e98..7f19ed13 100644
--- a/includes/search/SearchPostgres.php
+++ b/includes/search/SearchPostgres.php
@@ -64,7 +64,7 @@ class SearchPostgres extends SearchEngine {
function searchText( $term ) {
$q = $this->searchQuery( $term, 'textvector', 'old_text' );
- $olderror = error_reporting(E_ERROR);
+ $olderror = error_reporting( E_ERROR );
$resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) );
error_reporting( $olderror );
if ( !$resultSet ) {
@@ -86,19 +86,19 @@ class SearchPostgres extends SearchEngine {
wfDebug( "parseQuery received: $term \n" );
## No backslashes allowed
- $term = preg_replace('/\\\/', '', $term);
+ $term = preg_replace( '/\\\/', '', $term );
## Collapse parens into nearby words:
- $term = preg_replace('/\s*\(\s*/', ' (', $term);
- $term = preg_replace('/\s*\)\s*/', ') ', $term);
+ $term = preg_replace( '/\s*\(\s*/', ' (', $term );
+ $term = preg_replace( '/\s*\)\s*/', ') ', $term );
## Treat colons as word separators:
- $term = preg_replace('/:/', ' ', $term);
+ $term = preg_replace( '/:/', ' ', $term );
$searchstring = '';
$m = array();
- if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
- foreach( $m as $terms ) {
+ if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $terms ) {
if ( strlen( $terms[1] ) ) {
$searchstring .= ' & !';
}
@@ -118,19 +118,19 @@ class SearchPostgres extends SearchEngine {
}
## Strip out leading junk
- $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring);
+ $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring );
## Remove any doubled-up operators
- $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring);
+ $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring );
## Remove any non-spaced operators (e.g. "Zounds!")
- $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring);
+ $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring );
## Remove any trailing whitespace or operators
- $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring);
+ $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring );
## Remove unnecessary quotes around everything
- $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring);
+ $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring );
## Quote the whole thing
$searchstring = $this->db->addQuotes( $searchstring );
@@ -163,30 +163,31 @@ class SearchPostgres extends SearchEngine {
$top = $top[0];
if ( $top === "" ) { ## e.g. if only stopwords are used XXX return something better
- $query = "SELECT page_id, page_namespace, page_title, 0 AS score ".
+ $query = "SELECT page_id, page_namespace, page_title, 0 AS score " .
"FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
"AND r.rev_text_id = c.old_id AND 1=0";
}
else {
$m = array();
- if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
- foreach( $m as $terms ) {
+ if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
+ foreach ( $m as $terms ) {
$this->searchTerms[$terms[1]] = $terms[1];
}
}
- $query = "SELECT page_id, page_namespace, page_title, ".
- "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score ".
+ $query = "SELECT page_id, page_namespace, page_title, " .
+ "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " .
"FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " .
"AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($searchstring)";
}
## Redirects
- if ( !$this->showRedirects )
+ if ( !$this->showRedirects ) {
$query .= ' AND page_is_redirect = 0';
+ }
## Namespaces - defaults to 0
- if( !is_null( $this->namespaces ) ) { // null -> search all
+ if ( !is_null( $this->namespaces ) ) { // null -> search all
if ( count( $this->namespaces ) < 1 ) {
$query .= ' AND page_namespace = 0';
} else {
@@ -208,7 +209,7 @@ class SearchPostgres extends SearchEngine {
function update( $pageid, $title, $text ) {
## We don't want to index older revisions
- $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id IN ".
+ $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id IN " .
"(SELECT rev_text_id FROM revision WHERE rev_page = " . intval( $pageid ) .
" ORDER BY rev_text_id DESC OFFSET 1)";
$this->db->query( $SQL );
@@ -244,7 +245,7 @@ class PostgresSearchResultSet extends SqlSearchResultSet {
function next() {
$row = $this->mResultSet->fetchObject();
- if( $row === false ) {
+ if ( $row === false ) {
return false;
} else {
return new PostgresSearchResult( $row );
diff --git a/includes/search/SearchSqlite.php b/includes/search/SearchSqlite.php
index f3f4788c..554181f6 100644
--- a/includes/search/SearchSqlite.php
+++ b/includes/search/SearchSqlite.php
@@ -61,12 +61,12 @@ class SearchSqlite extends SearchEngine {
$this->searchTerms = array();
$m = array();
- if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
+ if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
$filteredText, $m, PREG_SET_ORDER ) ) {
- foreach( $m as $bits ) {
+ foreach ( $m as $bits ) {
@list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
- if( $nonQuoted != '' ) {
+ if ( $nonQuoted != '' ) {
$term = $nonQuoted;
$quote = '';
} else {
@@ -74,14 +74,14 @@ class SearchSqlite extends SearchEngine {
$quote = '"';
}
- if( $searchon !== '' ) {
+ if ( $searchon !== '' ) {
$searchon .= ' ';
}
// Some languages such as Serbian store the input form in the search index,
// so we may need to search for matches in multiple writing system variants.
$convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
- if( is_array( $convertedVariants ) ) {
+ if ( is_array( $convertedVariants ) ) {
$variants = array_unique( array_values( $convertedVariants ) );
} else {
$variants = array( $term );
@@ -101,10 +101,11 @@ class SearchSqlite extends SearchEngine {
$strippedVariants = array_unique( $strippedVariants );
$searchon .= $modifier;
- if( count( $strippedVariants) > 1 )
+ if ( count( $strippedVariants ) > 1 ) {
$searchon .= '(';
- foreach( $strippedVariants as $stripped ) {
- if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
+ }
+ foreach ( $strippedVariants as $stripped ) {
+ if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
// Hack for Chinese: we need to toss in quotes for
// multiple-character phrases since normalizeForSearch()
// added spaces between them to make word breaks.
@@ -112,8 +113,9 @@ class SearchSqlite extends SearchEngine {
}
$searchon .= "$quote$stripped$quote$wildcard ";
}
- if( count( $strippedVariants) > 1 )
+ if ( count( $strippedVariants ) > 1 ) {
$searchon .= ')';
+ }
// Match individual terms or quoted phrase in result highlighting...
// Note that variants will be introduced in a later stage for highlighting!
@@ -134,8 +136,8 @@ class SearchSqlite extends SearchEngine {
global $wgContLang;
$regex = preg_quote( $string, '/' );
- if( $wgContLang->hasWordBreaks() ) {
- if( $wildcard ) {
+ if ( $wgContLang->hasWordBreaks() ) {
+ if ( $wildcard ) {
// Don't cut off the final bit!
$regex = "\b$regex";
} else {
@@ -184,10 +186,10 @@ class SearchSqlite extends SearchEngine {
$resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
$total = null;
- if( $wgCountTotalSearchHits ) {
+ if ( $wgCountTotalSearchHits ) {
$totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
$row = $totalResult->fetchObject();
- if( $row ) {
+ if ( $row ) {
$total = intval( $row->c );
}
$totalResult->free();
@@ -201,7 +203,7 @@ class SearchSqlite extends SearchEngine {
* @return String
*/
function queryRedirect() {
- if( $this->showRedirects ) {
+ if ( $this->showRedirects ) {
return '';
} else {
return 'AND page_is_redirect=0';
@@ -213,8 +215,9 @@ class SearchSqlite extends SearchEngine {
* @return String
*/
function queryNamespaces() {
- if( is_null( $this->namespaces ) )
+ if ( is_null( $this->namespaces ) ) {
return ''; # search all
+ }
if ( !count( $this->namespaces ) ) {
$namespaces = '0';
} else {
@@ -295,7 +298,7 @@ class SearchSqlite extends SearchEngine {
if ( !$this->fulltextSearchSupported() ) {
return;
}
- // @todo: find a method to do it in a single request,
+ // @todo find a method to do it in a single request,
// couldn't do it so far due to typelessness of FTS3 tables.
$dbw = wfGetDB( DB_MASTER );
@@ -324,7 +327,7 @@ class SearchSqlite extends SearchEngine {
$dbw->update( 'searchindex',
array( 'si_title' => $title ),
- array( 'rowid' => $id ),
+ array( 'rowid' => $id ),
__METHOD__ );
}
}
@@ -333,7 +336,7 @@ class SearchSqlite extends SearchEngine {
* @ingroup Search
*/
class SqliteSearchResultSet extends SqlSearchResultSet {
- function __construct( $resultSet, $terms, $totalHits=null ) {
+ function __construct( $resultSet, $terms, $totalHits = null ) {
parent::__construct( $resultSet, $terms );
$this->mTotalHits = $totalHits;
}
diff --git a/includes/search/SearchUpdate.php b/includes/search/SearchUpdate.php
index eabcda3e..82a413e9 100644
--- a/includes/search/SearchUpdate.php
+++ b/includes/search/SearchUpdate.php
@@ -29,51 +29,108 @@
* @ingroup Search
*/
class SearchUpdate implements DeferrableUpdate {
-
- private $mId = 0, $mNamespace, $mTitle, $mText;
- private $mTitleWords;
-
- function __construct( $id, $title, $text = false ) {
+ /**
+ * Page id being updated
+ * @var int
+ */
+ private $id = 0;
+
+ /**
+ * Title we're updating
+ * @var Title
+ */
+ private $title;
+
+ /**
+ * Content of the page (not text)
+ * @var Content|false
+ */
+ private $content;
+
+ /**
+ * Constructor
+ *
+ * @param int $id Page id to update
+ * @param Title|string $title Title of page to update
+ * @param Content|string|false $c Content of the page to update.
+ * If a Content object, text will be gotten from it. String is for back-compat.
+ * Passing false tells the backend to just update the title, not the content
+ */
+ public function __construct( $id, $title, $c = false ) {
if ( is_string( $title ) ) {
$nt = Title::newFromText( $title );
} else {
$nt = $title;
}
- if( $nt ) {
- $this->mId = $id;
- $this->mText = $text;
-
- $this->mNamespace = $nt->getNamespace();
- $this->mTitle = $nt->getText(); # Discard namespace
-
- $this->mTitleWords = $this->mTextWords = array();
+ if ( $nt ) {
+ $this->id = $id;
+ // is_string() check is back-compat for ApprovedRevs
+ if ( is_string( $c ) ) {
+ $this->content = new TextContent( $c );
+ } else {
+ $this->content = $c ?: false;
+ }
+ $this->title = $nt;
} else {
wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
}
}
- function doUpdate() {
- global $wgContLang, $wgDisableSearchUpdate;
+ /**
+ * Perform actual update for the entry
+ */
+ public function doUpdate() {
+ global $wgDisableSearchUpdate;
- if( $wgDisableSearchUpdate || !$this->mId ) {
+ if ( $wgDisableSearchUpdate || !$this->id ) {
return;
}
wfProfileIn( __METHOD__ );
- $search = SearchEngine::create();
- $lc = SearchEngine::legalSearchChars() . '&#;';
+ $page = WikiPage::newFromId( $this->id, WikiPage::READ_LATEST );
+ $indexTitle = Title::indexTitle( $this->title->getNamespace(), $this->title->getText() );
- if( $this->mText === false ) {
- $search->updateTitle($this->mId,
- $search->normalizeText( Title::indexTitle( $this->mNamespace, $this->mTitle ) ) );
- wfProfileOut( __METHOD__ );
- return;
+ foreach ( SearchEngine::getSearchTypes() as $type ) {
+ $search = SearchEngine::create( $type );
+ if ( !$search->supports( 'search-update' ) ) {
+ continue;
+ }
+
+ $normalTitle = $search->normalizeText( $indexTitle );
+
+ if ( $page === null ) {
+ $search->delete( $this->id, $normalTitle );
+ continue;
+ } elseif ( $this->content === false ) {
+ $search->updateTitle( $this->id, $normalTitle );
+ continue;
+ }
+
+ $text = $search->getTextFromContent( $this->title, $this->content );
+ if ( !$search->textAlreadyUpdatedForIndex() ) {
+ $text = self::updateText( $text );
+ }
+
+ # Perform the actual update
+ $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );
}
+ wfProfileOut( __METHOD__ );
+ }
+
+ /**
+ * Clean text for indexing. Only really suitable for indexing in databases.
+ * If you're using a real search engine, you'll probably want to override
+ * this behavior and do something nicer with the original wikitext.
+ */
+ public static function updateText( $text ) {
+ global $wgContLang;
+
# Language-specific strip/conversion
- $text = $wgContLang->normalizeForSearch( $this->mText );
+ $text = $wgContLang->normalizeForSearch( $text );
+ $lc = SearchEngine::legalSearchChars() . '&#;';
wfProfileIn( __METHOD__ . '-regexps' );
$text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/",
@@ -123,22 +180,6 @@ class SearchUpdate implements DeferrableUpdate {
# Strip wiki '' and '''
$text = preg_replace( "/''[']*/", " ", $text );
wfProfileOut( __METHOD__ . '-regexps' );
-
- wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) );
-
- # Perform the actual update
- $search->update( $this->mId, $search->normalizeText( Title::indexTitle( $this->mNamespace, $this->mTitle ) ),
- $search->normalizeText( $text ) );
-
- wfProfileOut( __METHOD__ );
+ return $text;
}
}
-
-/**
- * Placeholder class
- *
- * @ingroup Search
- */
-class SearchUpdateMyISAM extends SearchUpdate {
- # Inherits everything
-}