summaryrefslogtreecommitdiff
path: root/languages/classes/LanguageZh_hans.php
diff options
context:
space:
mode:
Diffstat (limited to 'languages/classes/LanguageZh_hans.php')
-rw-r--r--languages/classes/LanguageZh_hans.php43
1 files changed, 26 insertions, 17 deletions
diff --git a/languages/classes/LanguageZh_hans.php b/languages/classes/LanguageZh_hans.php
index 983dd485..5b03d731 100644
--- a/languages/classes/LanguageZh_hans.php
+++ b/languages/classes/LanguageZh_hans.php
@@ -4,21 +4,30 @@
* @ingroup Language
*/
class LanguageZh_hans extends Language {
- function stripForSearch( $string ) {
- # MySQL fulltext index doesn't grok utf-8, so we
- # need to fold cases and convert to hex
- # we also separate characters as "words"
- if( function_exists( 'mb_strtolower' ) ) {
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' U8' . bin2hex( \"$1\" )",
- mb_strtolower( $string ) );
- } else {
- list( , $wikiLowerChars ) = Language::getCaseMaps();
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
- $string );
- }
+ function hasWordBreaks() {
+ return false;
}
-}
+
+ /**
+ * Eventually this should be a word segmentation;
+ * for now just treat each character as a word.
+ * @todo Fixme: only do this for Han characters...
+ */
+ function wordSegmentation( $string ) {
+ $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
+
+ function normalizeForSearch( $string ) {
+ wfProfileIn( __METHOD__ );
+
+ // Double-width roman characters
+ $s = self::convertDoubleWidth( $string );
+ $s = trim( $s );
+ $s = parent::normalizeForSearch( $s );
+
+ wfProfileOut( __METHOD__ );
+ return $s;
+ }
+} \ No newline at end of file