diff options
Diffstat (limited to 'languages/utils/CLDRPluralRuleEvaluator.php')
-rw-r--r-- | languages/utils/CLDRPluralRuleEvaluator.php | 182 |
1 files changed, 151 insertions, 31 deletions
diff --git a/languages/utils/CLDRPluralRuleEvaluator.php b/languages/utils/CLDRPluralRuleEvaluator.php index 6b117043..afe88a5b 100644 --- a/languages/utils/CLDRPluralRuleEvaluator.php +++ b/languages/utils/CLDRPluralRuleEvaluator.php @@ -2,12 +2,31 @@ /** * Parse and evaluate a plural rule. * - * http://unicode.org/reports/tr35/#Language_Plural_Rules + * UTS #35 Revision 33 + * http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules * * @author Niklas Laxstrom, Tim Starling * * @copyright Copyright © 2010-2012, Niklas Laxström - * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 + * or later + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * * @file * @since 1.20 */ @@ -45,18 +64,52 @@ class CLDRPluralRuleEvaluator { /** * Evaluate a compiled set of rules returned by compile(). Do not allow * the user to edit the compiled form, or else PHP errors may result. + * + * @param string The number to be evaluated against the rules, in English, or it + * may be a type convertible to string. + * @param array The associative array of plural rules in pluralform => rule format. + * @return int The index of the plural form which passed the evaluation */ public static function evaluateCompiled( $number, array $rules ) { + // Calculate the values of the operand symbols + $number = strval( $number ); + if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) { + wfDebug( __METHOD__.': invalid number input, returning "other"' ); + return count( $rules ); + } + if ( !isset( $m[3] ) ) { + $operandSymbols = array( + 'n' => intval( $m[1] ), + 'i' => intval( $m[1] ), + 'v' => 0, + 'w' => 0, + 'f' => 0, + 't' => 0 + ); + } else { + $absValStr = $m[1]; + $intStr = $m[2]; + $fracStr = $m[3]; + $operandSymbols = array( + 'n' => floatval( $absValStr ), + 'i' => intval( $intStr ), + 'v' => strlen( $fracStr ), + 'w' => strlen( rtrim( $fracStr, '0' ) ), + 'f' => intval( $fracStr ), + 't' => intval( rtrim( $fracStr, '0' ) ), + ); + } + // The compiled form is RPN, with tokens strictly delimited by // spaces, so this is a simple RPN evaluator. - foreach ( $rules as $i => $rule ) { + foreach ( $rules as $i => $rule ) { $stack = array(); $zero = ord( '0' ); $nine = ord( '9' ); foreach ( StringUtils::explode( ' ', $rule ) as $token ) { $ord = ord( $token ); - if ( $token === 'n' ) { - $stack[] = $number; + if ( isset( $operandSymbols[$token] ) ) { + $stack[] = $operandSymbols[$token]; } elseif ( $ord >= $zero && $ord <= $nine ) { $stack[] = intval( $token ); } else { @@ -70,8 +123,8 @@ class CLDRPluralRuleEvaluator { return $i; } } - // None of the provided rules match. The number belongs to caregory - // 'other' which comes last. + // None of the provided rules match. The number belongs to category + // 'other', which comes last. return count( $rules ); } @@ -81,11 +134,12 @@ class CLDRPluralRuleEvaluator { * @param $token string The token string * @param $left The left operand. If it is an object, its state may be destroyed. * @param $right The right operand + * @throws CLDRPluralRuleError * @return mixed */ private static function doOperation( $token, $left, $right ) { if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) { - if ( !($right instanceof CLDRPluralRuleEvaluator_Range ) ) { + if ( !( $right instanceof CLDRPluralRuleEvaluator_Range ) ) { $right = new CLDRPluralRuleEvaluator_Range( $right ); } } @@ -108,7 +162,7 @@ class CLDRPluralRuleEvaluator { return !$right->isNumberWithin( $left ); case 'mod': if ( is_int( $left ) ) { - return (int) fmod( $left, $right ); + return (int)fmod( $left, $right ); } return fmod( $left, $right ); case ',': @@ -131,7 +185,7 @@ class CLDRPluralRuleEvaluator { * Evaluator helper class representing a range list. */ class CLDRPluralRuleEvaluator_Range { - var $parts = array(); + public $parts = array(); function __construct( $start, $end = false ) { if ( $end === false ) { @@ -208,9 +262,40 @@ class CLDRPluralRuleEvaluator_Range { * Helper class for converting rules to reverse polish notation (RPN). */ class CLDRPluralRuleConverter { - var $rule, $pos, $end; - var $operators = array(); - var $operands = array(); + /** + * The input string + * + * @var string + */ + public $rule; + + /** + * The current position + * + * @var int + */ + public $pos; + + /** + * The past-the-end position + * + * @var int + */ + public $end; + + /** + * The operator stack + * + * @var array + */ + public $operators = array(); + + /** + * The operand stack + * + * @var array + */ + public $operands = array(); /** * Precedence levels. Note that there's no need to worry about associativity @@ -238,14 +323,19 @@ class CLDRPluralRuleConverter { /** * Same for digits. Note that the grammar given in UTS #35 doesn't allow - * negative numbers or decimals. + * negative numbers or decimal separators. */ const NUMBER_CLASS = '0123456789'; /** + * A character list of symbolic operands. + */ + const OPERAND_SYMBOLS = 'nivwft'; + + /** * An anchored regular expression which matches a word at the current offset. */ - const WORD_REGEX = '/[a-zA-Z]+/A'; + const WORD_REGEX = '/[a-zA-Z@]+/A'; /** * Convert a rule to RPN. This is the only public entry point. @@ -286,7 +376,7 @@ class CLDRPluralRuleConverter { continue; } else { // Operator - if ( !$expectOperator ) { + if ( !$expectOperator ) { $token->error( 'unexpected operator' ); } // Resolve higher precedence levels @@ -346,23 +436,25 @@ class CLDRPluralRuleConverter { return $token; } - // Comma - if ( $this->rule[$this->pos] === ',' ) { - $token = $this->newOperator( ',', $this->pos, 1 ); - $this->pos ++; + // Two-character operators + $op2 = substr( $this->rule, $this->pos, 2 ); + if ( $op2 === '..' || $op2 === '!=' ) { + $token = $this->newOperator( $op2, $this->pos, 2 ); + $this->pos += 2; return $token; } - // Dot dot - if ( substr( $this->rule, $this->pos, 2 ) === '..' ) { - $token = $this->newOperator( '..', $this->pos, 2 ); - $this->pos += 2; + // Single-character operators + $op1 = $this->rule[$this->pos]; + if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { + $token = $this->newOperator( $op1, $this->pos, 1 ); + $this->pos ++; return $token; } // Word if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { - $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); + $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); } $word1 = strtolower( $m[0] ); $word2 = ''; @@ -395,13 +487,21 @@ class CLDRPluralRuleConverter { return $token; } - // The special numerical keyword "n" - if ( $word1 === 'n' ) { - $token = $this->newNumber( 'n', $this->pos ); + // The single-character operand symbols + if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { + $token = $this->newNumber( $word1, $this->pos ); $this->pos ++; return $token; } + // Samples + if ( $word1 === '@integer' || $word1 === '@decimal' ) { + // Samples are like comments, they have no effect on rule evaluation. + // They run from the first sample indicator to the end of the string. + $this->pos = $this->end; + return false; + } + $this->error( 'unrecognised word' ); } @@ -447,7 +547,7 @@ class CLDRPluralRuleConverter { * The base class for operators and expressions, describing a region of the input string. */ class CLDRPluralRuleConverter_Fragment { - var $parser, $pos, $length, $end; + public $parser, $pos, $length, $end; function __construct( $parser, $pos, $length ) { $this->parser = $parser; @@ -473,7 +573,7 @@ class CLDRPluralRuleConverter_Fragment { * validation. */ class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { - var $type, $rpn; + public $type, $rpn; function __construct( $parser, $type, $rpn, $pos, $length ) { parent::__construct( $parser, $pos, $length ); @@ -498,7 +598,7 @@ class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragmen * messages), and the binary operator at that location. */ class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { - var $name; + public $name; /** * Each op type has three characters: left operand type, right operand type and result type @@ -532,8 +632,28 @@ class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment 'r' => 'range', ); + /** + * Map for converting the new operators introduced in Rev 33 to the old forms + */ + static $aliasMap = array( + '%' => 'mod', + '!=' => 'not-in', + '=' => 'in' + ); + + /** + * Initialize a new instance of a CLDRPluralRuleConverter_Operator object + * + * @param CLDRPluralRuleConverter $parser The parser + * @param string $name The operator name + * @param int $pos The position + * @param int $pos The length + */ function __construct( $parser, $name, $pos, $length ) { parent::__construct( $parser, $pos, $length ); + if ( isset( self::$aliasMap[$name] ) ) { + $name = self::$aliasMap[$name]; + } $this->name = $name; } |