summaryrefslogtreecommitdiff
path: root/languages/Language.php
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2013-08-12 09:28:15 +0200
committerPierre Schmitz <pierre@archlinux.de>2013-08-12 09:28:15 +0200
commit08aa4418c30cfc18ccc69a0f0f9cb9e17be6c196 (patch)
tree577a29fb579188d16003a209ce2a2e9c5b0aa2bd /languages/Language.php
parentcacc939b34e315b85e2d72997811eb6677996cc1 (diff)
Update to MediaWiki 1.21.1
Diffstat (limited to 'languages/Language.php')
-rw-r--r--languages/Language.php367
1 files changed, 267 insertions, 100 deletions
diff --git a/languages/Language.php b/languages/Language.php
index 40d1f36c..1d2e7164 100644
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -48,12 +48,13 @@ class FakeConverter {
/**
* @var Language
*/
- var $mLang;
+ public $mLang;
function __construct( $langobj ) { $this->mLang = $langobj; }
function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
function convert( $t ) { return $t; }
function convertTo( $text, $variant ) { return $text; }
function convertTitle( $t ) { return $t->getPrefixedText(); }
+ function convertNamespace( $ns ) { return $this->mLang->getFormattedNsText( $ns ); }
function getVariants() { return array( $this->mLang->getCode() ); }
function getPreferredVariant() { return $this->mLang->getCode(); }
function getDefaultVariant() { return $this->mLang->getCode(); }
@@ -77,21 +78,21 @@ class Language {
/**
* @var LanguageConverter
*/
- var $mConverter;
+ public $mConverter;
- var $mVariants, $mCode, $mLoaded = false;
- var $mMagicExtensions = array(), $mMagicHookDone = false;
+ public $mVariants, $mCode, $mLoaded = false;
+ public $mMagicExtensions = array(), $mMagicHookDone = false;
private $mHtmlCode = null;
- var $dateFormatStrings = array();
- var $mExtendedSpecialPageAliases;
+ public $dateFormatStrings = array();
+ public $mExtendedSpecialPageAliases;
protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
/**
* ReplacementArray object caches
*/
- var $transformData = array();
+ public $transformData = array();
/**
* @var LocalisationCache
@@ -159,10 +160,10 @@ class Language {
* @var array
*/
static public $durationIntervals = array(
- 'millennia' => 31557600000,
- 'centuries' => 3155760000,
- 'decades' => 315576000,
- 'years' => 31557600, // 86400 * 365.25
+ 'millennia' => 31556952000,
+ 'centuries' => 3155695200,
+ 'decades' => 315569520,
+ 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
'weeks' => 604800,
'days' => 86400,
'hours' => 3600,
@@ -171,19 +172,28 @@ class Language {
);
/**
- * Get a cached language object for a given language code
+ * Get a cached or new language object for a given language code
* @param $code String
* @return Language
*/
static function factory( $code ) {
- if ( !isset( self::$mLangObjCache[$code] ) ) {
- if ( count( self::$mLangObjCache ) > 10 ) {
- // Don't keep a billion objects around, that's stupid.
- self::$mLangObjCache = array();
- }
- self::$mLangObjCache[$code] = self::newFromCode( $code );
+ global $wgDummyLanguageCodes, $wgLangObjCacheSize;
+
+ if ( isset( $wgDummyLanguageCodes[$code] ) ) {
+ $code = $wgDummyLanguageCodes[$code];
}
- return self::$mLangObjCache[$code];
+
+ // get the language object to process
+ $langObj = isset( self::$mLangObjCache[$code] )
+ ? self::$mLangObjCache[$code]
+ : self::newFromCode( $code );
+
+ // merge the language object in to get it up front in the cache
+ self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache );
+ // get rid of the oldest ones in case we have an overflow
+ self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true );
+
+ return $langObj;
}
/**
@@ -236,6 +246,78 @@ class Language {
}
/**
+ * Checks whether any localisation is available for that language tag
+ * in MediaWiki (MessagesXx.php exists).
+ *
+ * @param string $code Language tag (in lower case)
+ * @return bool Whether language is supported
+ * @since 1.21
+ */
+ public static function isSupportedLanguage( $code ) {
+ return $code === strtolower( $code ) && is_readable( self::getMessagesFileName( $code ) );
+ }
+
+ /**
+ * Returns true if a language code string is a well-formed language tag
+ * according to RFC 5646.
+ * This function only checks well-formedness; it doesn't check that
+ * language, script or variant codes actually exist in the repositories.
+ *
+ * Based on regexes by Mark Davis of the Unicode Consortium:
+ * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
+ *
+ * @param $code string
+ * @param $lenient boolean Whether to allow '_' as separator. The default is only '-'.
+ *
+ * @return bool
+ * @since 1.21
+ */
+ public static function isWellFormedLanguageTag( $code, $lenient = false ) {
+ $alpha = '[a-z]';
+ $digit = '[0-9]';
+ $alphanum = '[a-z0-9]';
+ $x = 'x' ; # private use singleton
+ $singleton = '[a-wy-z]'; # other singleton
+ $s = $lenient ? '[-_]' : '-';
+
+ $language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
+ $script = "$alpha{4}"; # ISO 15924
+ $region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
+ $variant = "(?:$alphanum{5,8}|$digit$alphanum{3})";
+ $extension = "$singleton(?:$s$alphanum{2,8})+";
+ $privateUse = "$x(?:$s$alphanum{1,8})+";
+
+ # Define certain grandfathered codes, since otherwise the regex is pretty useless.
+ # Since these are limited, this is safe even later changes to the registry --
+ # the only oddity is that it might change the type of the tag, and thus
+ # the results from the capturing groups.
+ # http://www.iana.org/assignments/language-subtag-registry
+
+ $grandfathered = "en{$s}GB{$s}oed"
+ . "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
+ . "|no{$s}(?:bok|nyn)"
+ . "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
+ . "|zh{$s}min{$s}nan";
+
+ $variantList = "$variant(?:$s$variant)*";
+ $extensionList = "$extension(?:$s$extension)*";
+
+ $langtag = "(?:($language)"
+ . "(?:$s$script)?"
+ . "(?:$s$region)?"
+ . "(?:$s$variantList)?"
+ . "(?:$s$extensionList)?"
+ . "(?:$s$privateUse)?)";
+
+ # The final breakdown, with capturing groups for each of these components
+ # The variants, extensions, grandfathered, and private-use may have interior '-'
+
+ $root = "^(?:$langtag|$privateUse|$grandfathered)$";
+
+ return (bool)preg_match( "/$root/", strtolower( $code ) );
+ }
+
+ /**
* Returns true if a language code string is of a valid form, whether or
* not it exists. This includes codes which are used solely for
* customisation via the MediaWiki namespace.
@@ -276,7 +358,31 @@ class Language {
throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" );
}
- return preg_match( '/^[a-z0-9-]+$/i', $code );
+ return (bool)preg_match( '/^[a-z0-9-]+$/i', $code );
+ }
+
+ /**
+ * Returns true if a language code is an IETF tag known to MediaWiki.
+ *
+ * @param $code string
+ *
+ * @since 1.21
+ * @return bool
+ */
+ public static function isKnownLanguageTag( $tag ) {
+ static $coreLanguageNames;
+
+ if ( $coreLanguageNames === null ) {
+ include( MWInit::compiledPath( 'languages/Names.php' ) );
+ }
+
+ if ( isset( $coreLanguageNames[$tag] )
+ || self::fetchLanguageName( $tag, $tag ) !== ''
+ ) {
+ return true;
+ }
+
+ return false;
}
/**
@@ -356,7 +462,7 @@ class Language {
* @deprecated in 1.19
*/
function getFallbackLanguageCode() {
- wfDeprecated( __METHOD__ );
+ wfDeprecated( __METHOD__, '1.19' );
return self::getFallbackFor( $this->mCode );
}
@@ -419,6 +525,16 @@ class Language {
*/
public function setNamespaces( array $namespaces ) {
$this->namespaceNames = $namespaces;
+ $this->mNamespaceIds = null;
+ }
+
+ /**
+ * Resets all of the namespace caches. Mainly used for testing
+ */
+ public function resetNamespaces() {
+ $this->namespaceNames = null;
+ $this->mNamespaceIds = null;
+ $this->namespaceAliases = null;
}
/**
@@ -457,9 +573,13 @@ class Language {
* getNsText() except with '_' changed to ' ', useful for
* producing output.
*
- * @param $index string
+ * <code>
+ * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
+ * echo $mw_ns; // prints 'MediaWiki talk'
+ * </code>
*
- * @return array
+ * @param int $index The array key of the namespace to return
+ * @return string Namespace name without underscores (empty string if namespace does not exist)
*/
function getFormattedNsText( $index ) {
$ns = $this->getNsText( $index );
@@ -1965,7 +2085,7 @@ class Language {
$segments = array();
foreach ( $intervals as $intervalName => $intervalValue ) {
- $message = new Message( 'duration-' . $intervalName, array( $intervalValue ) );
+ $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
$segments[] = $message->inLanguage( $this )->escaped();
}
@@ -2405,19 +2525,7 @@ class Language {
if ( is_array( $s ) ) {
wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
}
- # Check for non-UTF-8 URLs
- $ishigh = preg_match( '/[\x80-\xff]/', $s );
- if ( !$ishigh ) {
- return $s;
- }
-
- if ( function_exists( 'mb_check_encoding' ) ) {
- $isutf8 = mb_check_encoding( $s, 'UTF-8' );
- } else {
- $isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
- '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
- }
- if ( $isutf8 ) {
+ if ( StringUtils::isUtf8( $s ) ) {
return $s;
}
@@ -2855,30 +2963,30 @@ class Language {
return "<em>$text</em>";
}
- /**
- * Normally we output all numbers in plain en_US style, that is
- * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
- * point twohundredthirtyfive. However this is not suitable for all
- * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
- * Icelandic just want to use commas instead of dots, and dots instead
- * of commas like "293.291,235".
- *
- * An example of this function being called:
- * <code>
- * wfMessage( 'message' )->numParams( $num )->text()
- * </code>
- *
- * See LanguageGu.php for the Gujarati implementation and
- * $separatorTransformTable on MessageIs.php for
- * the , => . and . => , implementation.
- *
- * @todo check if it's viable to use localeconv() for the decimal
- * separator thing.
- * @param $number Mixed: the string to be formatted, should be an integer
- * or a floating point number.
- * @param $nocommafy Bool: set to true for special numbers like dates
- * @return string
- */
+ /**
+ * Normally we output all numbers in plain en_US style, that is
+ * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
+ * point twohundredthirtyfive. However this is not suitable for all
+ * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
+ * Icelandic just want to use commas instead of dots, and dots instead
+ * of commas like "293.291,235".
+ *
+ * An example of this function being called:
+ * <code>
+ * wfMessage( 'message' )->numParams( $num )->text()
+ * </code>
+ *
+ * See LanguageGu.php for the Gujarati implementation and
+ * $separatorTransformTable on MessageIs.php for
+ * the , => . and . => , implementation.
+ *
+ * @todo check if it's viable to use localeconv() for the decimal
+ * separator thing.
+ * @param $number Mixed: the string to be formatted, should be an integer
+ * or a floating point number.
+ * @param $nocommafy Bool: set to true for special numbers like dates
+ * @return string
+ */
public function formatNum( $number, $nocommafy = false ) {
global $wgTranslateNumerals;
if ( !$nocommafy ) {
@@ -2900,6 +3008,18 @@ class Language {
}
/**
+ * Front-end for non-commafied formatNum
+ *
+ * @param mixed $number the string to be formatted, should be an integer
+ * or a floating point number.
+ * @since 1.21
+ * @return string
+ */
+ public function formatNumNoSeparators( $number ) {
+ return $this->formatNum( $number, true );
+ }
+
+ /**
* @param $number string
* @return string
*/
@@ -2921,37 +3041,37 @@ class Language {
/**
* Adds commas to a given number
* @since 1.19
- * @param $_ mixed
+ * @param $number mixed
* @return string
*/
- function commafy( $_ ) {
+ function commafy( $number ) {
$digitGroupingPattern = $this->digitGroupingPattern();
- if ( $_ === null ) {
+ if ( $number === null ) {
return '';
}
if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
// default grouping is at thousands, use the same for ###,###,### pattern too.
- return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $_ ) ) );
+ return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
} else {
// Ref: http://cldr.unicode.org/translation/number-patterns
$sign = "";
- if ( intval( $_ ) < 0 ) {
+ if ( intval( $number ) < 0 ) {
// For negative numbers apply the algorithm like positive number and add sign.
$sign = "-";
- $_ = substr( $_, 1 );
+ $number = substr( $number, 1 );
}
- $numberpart = array();
- $decimalpart = array();
+ $integerPart = array();
+ $decimalPart = array();
$numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
- preg_match( "/\d+/", $_, $numberpart );
- preg_match( "/\.\d*/", $_, $decimalpart );
- $groupedNumber = ( count( $decimalpart ) > 0 ) ? $decimalpart[0]:"";
- if ( $groupedNumber === $_ ) {
+ preg_match( "/\d+/", $number, $integerPart );
+ preg_match( "/\.\d*/", $number, $decimalPart );
+ $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0]:"";
+ if ( $groupedNumber === $number ) {
// the string does not have any number part. Eg: .12345
return $sign . $groupedNumber;
}
- $start = $end = strlen( $numberpart[0] );
+ $start = $end = strlen( $integerPart[0] );
while ( $start > 0 ) {
$match = $matches[0][$numMatches -1] ;
$matchLen = strlen( $match );
@@ -2959,7 +3079,7 @@ class Language {
if ( $start < 0 ) {
$start = 0;
}
- $groupedNumber = substr( $_ , $start, $end -$start ) . $groupedNumber ;
+ $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber ;
$end = $start;
if ( $numMatches > 1 ) {
// use the last pattern for the rest of the number
@@ -2972,6 +3092,7 @@ class Language {
return $sign . $groupedNumber;
}
}
+
/**
* @return String
*/
@@ -3003,25 +3124,26 @@ class Language {
* @return string
*/
function listToText( array $l ) {
- $s = '';
$m = count( $l ) - 1;
-
- if ( $m === 0 ) {
- return $l[0];
- } elseif ( $m === 1 ) {
- return $l[0] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $l[1];
- } else {
- for ( $i = $m; $i >= 0; $i-- ) {
- if ( $i == $m ) {
- $s = $l[$i];
- } elseif ( $i == $m - 1 ) {
- $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
- } else {
- $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
- }
+ if ( $m < 0 ) {
+ return '';
+ }
+ if ( $m > 0 ) {
+ $and = $this->getMessageFromDB( 'and' );
+ $space = $this->getMessageFromDB( 'word-separator' );
+ if ( $m > 1 ) {
+ $comma = $this->getMessageFromDB( 'comma-separator' );
+ }
+ }
+ $s = $l[$m];
+ for ( $i = $m - 1; $i >= 0; $i-- ) {
+ if ( $i == $m - 1 ) {
+ $s = $l[$i] . $and . $space . $s;
+ } else {
+ $s = $l[$i] . $comma . $s;
}
- return $s;
}
+ return $s;
}
/**
@@ -3409,6 +3531,19 @@ class Language {
if ( !count( $forms ) ) {
return '';
}
+
+ // Handle explicit n=pluralform cases
+ foreach ( $forms as $index => $form ) {
+ if ( preg_match( '/\d+=/i', $form ) ) {
+ $pos = strpos( $form, '=' );
+ if ( substr( $form, 0, $pos ) === (string) $count ) {
+ return substr( $form, $pos + 1 );
+ }
+ unset( $forms[$index] );
+ }
+ }
+ $forms = array_values( $forms );
+
$pluralForm = $this->getPluralForm( $count );
$pluralForm = min( $pluralForm, count( $forms ) - 1 );
return $forms[$pluralForm];
@@ -3459,8 +3594,22 @@ class Language {
}
}
}
- // If all else fails, return the original string.
- return $str;
+
+ // If all else fails, return a standard duration or timestamp description.
+ $time = strtotime( $str, 0 );
+ if ( $time === false ) { // Unknown format. Return it as-is in case.
+ return $str;
+ } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
+ // $time is relative to 0 so it's a duration length.
+ return $this->formatDuration( $time );
+ } else { // It's an absolute timestamp.
+ if ( $time === 0 ) {
+ // wfTimestamp() handles 0 as current time instead of epoch.
+ return $this->timeanddate( '19700101000000' );
+ } else {
+ return $this->timeanddate( $time );
+ }
+ }
}
/**
@@ -3525,12 +3674,22 @@ class Language {
}
/**
+ * Convert a namespace index to a string in the preferred variant
+ *
+ * @param $ns int
+ * @return string
+ */
+ public function convertNamespace( $ns ) {
+ return $this->mConverter->convertNamespace( $ns );
+ }
+
+ /**
* Check if this is a language with variants
*
* @return bool
*/
public function hasVariants() {
- return sizeof( $this->getVariants() ) > 1;
+ return count( $this->getVariants() ) > 1;
}
/**
@@ -3657,15 +3816,24 @@ class Language {
}
/**
- * Enclose a string with the "no conversion" tag. This is used by
- * various functions in the Parser
+ * Prepare external link text for conversion. When the text is
+ * a URL, it shouldn't be converted, and it'll be wrapped in
+ * the "raw" tag (-{R| }-) to prevent conversion.
+ *
+ * This function is called "markNoConversion" for historical
+ * reasons.
*
- * @param $text String: text to be tagged for no conversion
- * @param $noParse bool
+ * @param $text String: text to be used for external link
+ * @param $noParse bool: wrap it without confirming it's a real URL first
* @return string the tagged text
*/
public function markNoConversion( $text, $noParse = false ) {
- return $this->mConverter->markNoConversion( $text, $noParse );
+ // Excluding protocal-relative URLs may avoid many false positives.
+ if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
+ return $this->mConverter->markNoConversion( $text );
+ } else {
+ return $text;
+ }
}
/**
@@ -4229,5 +4397,4 @@ class Language {
$form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules );
return $form;
}
-
}