summaryrefslogtreecommitdiff
path: root/languages/utils
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2014-12-27 15:41:37 +0100
committerPierre Schmitz <pierre@archlinux.de>2014-12-31 11:43:28 +0100
commitc1f9b1f7b1b77776192048005dcc66dcf3df2bfb (patch)
tree2b38796e738dd74cb42ecd9bfd151803108386bc /languages/utils
parentb88ab0086858470dd1f644e64cb4e4f62bb2be9b (diff)
Update to MediaWiki 1.24.1
Diffstat (limited to 'languages/utils')
-rw-r--r--languages/utils/CLDRPluralRuleConverter.php322
-rw-r--r--languages/utils/CLDRPluralRuleConverterExpression.php41
-rw-r--r--languages/utils/CLDRPluralRuleConverterFragment.php34
-rw-r--r--languages/utils/CLDRPluralRuleConverterOperator.php114
-rw-r--r--languages/utils/CLDRPluralRuleError.php20
-rw-r--r--languages/utils/CLDRPluralRuleEvaluator.php555
-rw-r--r--languages/utils/CLDRPluralRuleEvaluatorRange.php110
7 files changed, 665 insertions, 531 deletions
diff --git a/languages/utils/CLDRPluralRuleConverter.php b/languages/utils/CLDRPluralRuleConverter.php
new file mode 100644
index 00000000..2eabcab1
--- /dev/null
+++ b/languages/utils/CLDRPluralRuleConverter.php
@@ -0,0 +1,322 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper class for converting rules to reverse polish notation (RPN).
+ */
+class CLDRPluralRuleConverter {
+ /**
+ * The input string
+ *
+ * @var string
+ */
+ public $rule;
+
+ /**
+ * The current position
+ *
+ * @var int
+ */
+ public $pos;
+
+ /**
+ * The past-the-end position
+ *
+ * @var int
+ */
+ public $end;
+
+ /**
+ * The operator stack
+ *
+ * @var array
+ */
+ public $operators = array();
+
+ /**
+ * The operand stack
+ *
+ * @var array
+ */
+ public $operands = array();
+
+ /**
+ * Precedence levels. Note that there's no need to worry about associativity
+ * for the level 4 operators, since they return boolean and don't accept
+ * boolean inputs.
+ */
+ private static $precedence = array(
+ 'or' => 2,
+ 'and' => 3,
+ 'is' => 4,
+ 'is-not' => 4,
+ 'in' => 4,
+ 'not-in' => 4,
+ 'within' => 4,
+ 'not-within' => 4,
+ 'mod' => 5,
+ ',' => 6,
+ '..' => 7,
+ );
+
+ /**
+ * A character list defining whitespace, for use in strspn() etc.
+ */
+ const WHITESPACE_CLASS = " \t\r\n";
+
+ /**
+ * Same for digits. Note that the grammar given in UTS #35 doesn't allow
+ * negative numbers or decimal separators.
+ */
+ const NUMBER_CLASS = '0123456789';
+
+ /**
+ * A character list of symbolic operands.
+ */
+ const OPERAND_SYMBOLS = 'nivwft';
+
+ /**
+ * An anchored regular expression which matches a word at the current offset.
+ */
+ const WORD_REGEX = '/[a-zA-Z@]+/A';
+
+ /**
+ * Convert a rule to RPN. This is the only public entry point.
+ *
+ * @param string $rule The rule to convert
+ * @return string The RPN representation of the rule
+ */
+ public static function convert( $rule ) {
+ $parser = new self( $rule );
+
+ return $parser->doConvert();
+ }
+
+ /**
+ * Private constructor.
+ * @param string $rule
+ */
+ protected function __construct( $rule ) {
+ $this->rule = $rule;
+ $this->pos = 0;
+ $this->end = strlen( $rule );
+ }
+
+ /**
+ * Do the operation.
+ *
+ * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
+ */
+ protected function doConvert() {
+ $expectOperator = true;
+
+ // Iterate through all tokens, saving the operators and operands to a
+ // stack per Dijkstra's shunting yard algorithm.
+ /** @var CLDRPluralRuleConverterOperator $token */
+ while ( false !== ( $token = $this->nextToken() ) ) {
+ // In this grammar, there are only binary operators, so every valid
+ // rule string will alternate between operator and operand tokens.
+ $expectOperator = !$expectOperator;
+
+ if ( $token instanceof CLDRPluralRuleConverterExpression ) {
+ // Operand
+ if ( $expectOperator ) {
+ $token->error( 'unexpected operand' );
+ }
+ $this->operands[] = $token;
+ continue;
+ } else {
+ // Operator
+ if ( !$expectOperator ) {
+ $token->error( 'unexpected operator' );
+ }
+ // Resolve higher precedence levels
+ $lastOp = end( $this->operators );
+ while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
+ $this->doOperation( $lastOp, $this->operands );
+ array_pop( $this->operators );
+ $lastOp = end( $this->operators );
+ }
+ $this->operators[] = $token;
+ }
+ }
+
+ // Finish off the stack
+ while ( $op = array_pop( $this->operators ) ) {
+ $this->doOperation( $op, $this->operands );
+ }
+
+ // Make sure the result is sane. The first case is possible for an empty
+ // string input, the second should be unreachable.
+ if ( !count( $this->operands ) ) {
+ $this->error( 'condition expected' );
+ } elseif ( count( $this->operands ) > 1 ) {
+ $this->error( 'missing operator or too many operands' );
+ }
+
+ $value = $this->operands[0];
+ if ( $value->type !== 'boolean' ) {
+ $this->error( 'the result must have a boolean type' );
+ }
+
+ return $this->operands[0]->rpn;
+ }
+
+ /**
+ * Fetch the next token from the input string.
+ *
+ * @return CLDRPluralRuleConverterFragment The next token
+ */
+ protected function nextToken() {
+ if ( $this->pos >= $this->end ) {
+ return false;
+ }
+
+ // Whitespace
+ $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
+ $this->pos += $length;
+
+ if ( $this->pos >= $this->end ) {
+ return false;
+ }
+
+ // Number
+ $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
+ if ( $length !== 0 ) {
+ $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
+ $this->pos += $length;
+
+ return $token;
+ }
+
+ // Two-character operators
+ $op2 = substr( $this->rule, $this->pos, 2 );
+ if ( $op2 === '..' || $op2 === '!=' ) {
+ $token = $this->newOperator( $op2, $this->pos, 2 );
+ $this->pos += 2;
+
+ return $token;
+ }
+
+ // Single-character operators
+ $op1 = $this->rule[$this->pos];
+ if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
+ $token = $this->newOperator( $op1, $this->pos, 1 );
+ $this->pos++;
+
+ return $token;
+ }
+
+ // Word
+ if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
+ $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
+ }
+ $word1 = strtolower( $m[0] );
+ $word2 = '';
+ $nextTokenPos = $this->pos + strlen( $word1 );
+ if ( $word1 === 'not' || $word1 === 'is' ) {
+ // Look ahead one word
+ $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
+ if ( $nextTokenPos < $this->end
+ && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
+ ) {
+ $word2 = strtolower( $m[0] );
+ $nextTokenPos += strlen( $word2 );
+ }
+ }
+
+ // Two-word operators like "is not" take precedence over single-word operators like "is"
+ if ( $word2 !== '' ) {
+ $bothWords = "{$word1}-{$word2}";
+ if ( isset( self::$precedence[$bothWords] ) ) {
+ $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
+ $this->pos = $nextTokenPos;
+
+ return $token;
+ }
+ }
+
+ // Single-word operators
+ if ( isset( self::$precedence[$word1] ) ) {
+ $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
+ $this->pos += strlen( $word1 );
+
+ return $token;
+ }
+
+ // The single-character operand symbols
+ if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
+ $token = $this->newNumber( $word1, $this->pos );
+ $this->pos++;
+
+ return $token;
+ }
+
+ // Samples
+ if ( $word1 === '@integer' || $word1 === '@decimal' ) {
+ // Samples are like comments, they have no effect on rule evaluation.
+ // They run from the first sample indicator to the end of the string.
+ $this->pos = $this->end;
+
+ return false;
+ }
+
+ $this->error( 'unrecognised word' );
+ }
+
+ /**
+ * For the binary operator $op, pop its operands off the stack and push
+ * a fragment with rpn and type members describing the result of that
+ * operation.
+ *
+ * @param CLDRPluralRuleConverterOperator $op
+ */
+ protected function doOperation( $op ) {
+ if ( count( $this->operands ) < 2 ) {
+ $op->error( 'missing operand' );
+ }
+ $right = array_pop( $this->operands );
+ $left = array_pop( $this->operands );
+ $result = $op->operate( $left, $right );
+ $this->operands[] = $result;
+ }
+
+ /**
+ * Create a numerical expression object
+ *
+ * @param string $text
+ * @param int $pos
+ * @return CLDRPluralRuleConverterExpression The numerical expression
+ */
+ protected function newNumber( $text, $pos ) {
+ return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
+ }
+
+ /**
+ * Create a binary operator
+ *
+ * @param string $type
+ * @param int $pos
+ * @param int $length
+ * @return CLDRPluralRuleConverterOperator The operator
+ */
+ protected function newOperator( $type, $pos, $length ) {
+ return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
+ }
+
+ /**
+ * Throw an error
+ * @param string $message
+ */
+ protected function error( $message ) {
+ throw new CLDRPluralRuleError( $message );
+ }
+}
diff --git a/languages/utils/CLDRPluralRuleConverterExpression.php b/languages/utils/CLDRPluralRuleConverterExpression.php
new file mode 100644
index 00000000..1ee6b4c5
--- /dev/null
+++ b/languages/utils/CLDRPluralRuleConverterExpression.php
@@ -0,0 +1,41 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper for CLDRPluralRuleConverter.
+ * An expression object, representing a region of the input string (for error
+ * messages), the RPN notation used to evaluate it, and the result type for
+ * validation.
+ */
+class CLDRPluralRuleConverterExpression extends CLDRPluralRuleConverterFragment {
+ /** @var string */
+ public $type;
+
+ /** @var string */
+ public $rpn;
+
+ function __construct( $parser, $type, $rpn, $pos, $length ) {
+ parent::__construct( $parser, $pos, $length );
+ $this->type = $type;
+ $this->rpn = $rpn;
+ }
+
+ public function isType( $type ) {
+ if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
+ return true;
+ }
+ if ( $type === $this->type ) {
+ return true;
+ }
+
+ return false;
+ }
+}
diff --git a/languages/utils/CLDRPluralRuleConverterFragment.php b/languages/utils/CLDRPluralRuleConverterFragment.php
new file mode 100644
index 00000000..df299cbd
--- /dev/null
+++ b/languages/utils/CLDRPluralRuleConverterFragment.php
@@ -0,0 +1,34 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper for CLDRPluralRuleConverter.
+ * The base class for operators and expressions, describing a region of the input string.
+ */
+class CLDRPluralRuleConverterFragment {
+ public $parser, $pos, $length, $end;
+
+ function __construct( $parser, $pos, $length ) {
+ $this->parser = $parser;
+ $this->pos = $pos;
+ $this->length = $length;
+ $this->end = $pos + $length;
+ }
+
+ public function error( $message ) {
+ $text = $this->getText();
+ throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
+ }
+
+ public function getText() {
+ return substr( $this->parser->rule, $this->pos, $this->length );
+ }
+}
diff --git a/languages/utils/CLDRPluralRuleConverterOperator.php b/languages/utils/CLDRPluralRuleConverterOperator.php
new file mode 100644
index 00000000..de17f291
--- /dev/null
+++ b/languages/utils/CLDRPluralRuleConverterOperator.php
@@ -0,0 +1,114 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper for CLDRPluralRuleConverter.
+ * An operator object, representing a region of the input string (for error
+ * messages), and the binary operator at that location.
+ */
+class CLDRPluralRuleConverterOperator extends CLDRPluralRuleConverterFragment {
+ /** @var string The name */
+ public $name;
+
+ /**
+ * Each op type has three characters: left operand type, right operand type and result type
+ *
+ * b = boolean
+ * n = number
+ * r = range
+ *
+ * A number is a kind of range.
+ *
+ * @var array
+ */
+ private static $opTypes = array(
+ 'or' => 'bbb',
+ 'and' => 'bbb',
+ 'is' => 'nnb',
+ 'is-not' => 'nnb',
+ 'in' => 'nrb',
+ 'not-in' => 'nrb',
+ 'within' => 'nrb',
+ 'not-within' => 'nrb',
+ 'mod' => 'nnn',
+ ',' => 'rrr',
+ '..' => 'nnr',
+ );
+
+ /**
+ * Map converting from the abbrevation to the full form.
+ *
+ * @var array
+ */
+ private static $typeSpecMap = array(
+ 'b' => 'boolean',
+ 'n' => 'number',
+ 'r' => 'range',
+ );
+
+ /**
+ * Map for converting the new operators introduced in Rev 33 to the old forms
+ */
+ private static $aliasMap = array(
+ '%' => 'mod',
+ '!=' => 'not-in',
+ '=' => 'in'
+ );
+
+ /**
+ * Initialize a new instance of a CLDRPluralRuleConverterOperator object
+ *
+ * @param CLDRPluralRuleConverter $parser The parser
+ * @param string $name The operator name
+ * @param int $pos The length
+ * @param int $length
+ */
+ function __construct( $parser, $name, $pos, $length ) {
+ parent::__construct( $parser, $pos, $length );
+ if ( isset( self::$aliasMap[$name] ) ) {
+ $name = self::$aliasMap[$name];
+ }
+ $this->name = $name;
+ }
+
+ /**
+ * Compute the operation
+ *
+ * @param CLDRPluralRuleConverterExpression $left The left part of the expression
+ * @param CLDRPluralRuleConverterExpression $right The right part of the expression
+ * @return CLDRPluralRuleConverterExpression The result of the operation
+ */
+ public function operate( $left, $right ) {
+ $typeSpec = self::$opTypes[$this->name];
+
+ $leftType = self::$typeSpecMap[$typeSpec[0]];
+ $rightType = self::$typeSpecMap[$typeSpec[1]];
+ $resultType = self::$typeSpecMap[$typeSpec[2]];
+
+ $start = min( $this->pos, $left->pos, $right->pos );
+ $end = max( $this->end, $left->end, $right->end );
+ $length = $end - $start;
+
+ $newExpr = new CLDRPluralRuleConverterExpression( $this->parser, $resultType,
+ "{$left->rpn} {$right->rpn} {$this->name}",
+ $start, $length );
+
+ if ( !$left->isType( $leftType ) ) {
+ $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
+ }
+
+ if ( !$right->isType( $rightType ) ) {
+ $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
+ }
+
+ return $newExpr;
+ }
+}
diff --git a/languages/utils/CLDRPluralRuleError.php b/languages/utils/CLDRPluralRuleError.php
new file mode 100644
index 00000000..cc0b5d2f
--- /dev/null
+++ b/languages/utils/CLDRPluralRuleError.php
@@ -0,0 +1,20 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * The exception class for all the classes in this file. This will be thrown
+ * back to the caller if there is any validation error.
+ */
+class CLDRPluralRuleError extends MWException {
+ function __construct( $message ) {
+ parent::__construct( 'CLDR plural rule error: ' . $message );
+ }
+}
diff --git a/languages/utils/CLDRPluralRuleEvaluator.php b/languages/utils/CLDRPluralRuleEvaluator.php
index afe88a5b..7e7208aa 100644
--- a/languages/utils/CLDRPluralRuleEvaluator.php
+++ b/languages/utils/CLDRPluralRuleEvaluator.php
@@ -1,11 +1,12 @@
<?php
+
/**
* Parse and evaluate a plural rule.
*
* UTS #35 Revision 33
* http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
*
- * @author Niklas Laxstrom, Tim Starling
+ * @author Niklas Laxström, Tim Starling
*
* @copyright Copyright © 2010-2012, Niklas Laxström
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0
@@ -30,18 +31,18 @@
* @file
* @since 1.20
*/
-
class CLDRPluralRuleEvaluator {
/**
* Evaluate a number against a set of plural rules. If a rule passes,
* return the index of plural rule.
*
- * @param int The number to be evaluated against the rules
- * @param array The associative array of plural rules in pluralform => rule format.
+ * @param int $number The number to be evaluated against the rules
+ * @param array $rules The associative array of plural rules in pluralform => rule format.
* @return int The index of the plural form which passed the evaluation
*/
public static function evaluate( $number, array $rules ) {
$rules = self::compile( $rules );
+
return self::evaluateCompiled( $number, $rules );
}
@@ -49,8 +50,8 @@ class CLDRPluralRuleEvaluator {
* Convert a set of rules to a compiled form which is optimised for
* fast evaluation. The result will be an array of strings, and may be cached.
*
- * @param $rules The rules to compile
- * @return An array of compile rules.
+ * @param array $rules The rules to compile
+ * @return array An array of compile rules.
*/
public static function compile( array $rules ) {
// We can't use array_map() for this because it generates a warning if
@@ -58,6 +59,7 @@ class CLDRPluralRuleEvaluator {
foreach ( $rules as &$rule ) {
$rule = CLDRPluralRuleConverter::convert( $rule );
}
+
return $rules;
}
@@ -65,16 +67,17 @@ class CLDRPluralRuleEvaluator {
* Evaluate a compiled set of rules returned by compile(). Do not allow
* the user to edit the compiled form, or else PHP errors may result.
*
- * @param string The number to be evaluated against the rules, in English, or it
+ * @param string $number The number to be evaluated against the rules, in English, or it
* may be a type convertible to string.
- * @param array The associative array of plural rules in pluralform => rule format.
+ * @param array $rules The associative array of plural rules in pluralform => rule format.
* @return int The index of the plural form which passed the evaluation
*/
public static function evaluateCompiled( $number, array $rules ) {
// Calculate the values of the operand symbols
$number = strval( $number );
- if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) {
- wfDebug( __METHOD__.': invalid number input, returning "other"' );
+ if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) {
+ wfDebug( __METHOD__ . ": invalid number input, returning 'other'\n" );
+
return count( $rules );
}
if ( !isset( $m[3] ) ) {
@@ -131,16 +134,16 @@ class CLDRPluralRuleEvaluator {
/**
* Do a single operation
*
- * @param $token string The token string
- * @param $left The left operand. If it is an object, its state may be destroyed.
- * @param $right The right operand
+ * @param string $token The token string
+ * @param mixed $left The left operand. If it is an object, its state may be destroyed.
+ * @param mixed $right The right operand
* @throws CLDRPluralRuleError
- * @return mixed
+ * @return mixed The operation result
*/
private static function doOperation( $token, $left, $right ) {
if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) {
- if ( !( $right instanceof CLDRPluralRuleEvaluator_Range ) ) {
- $right = new CLDRPluralRuleEvaluator_Range( $right );
+ if ( !( $right instanceof CLDRPluralRuleEvaluatorRange ) ) {
+ $right = new CLDRPluralRuleEvaluatorRange( $right );
}
}
switch ( $token ) {
@@ -164,531 +167,21 @@ class CLDRPluralRuleEvaluator {
if ( is_int( $left ) ) {
return (int)fmod( $left, $right );
}
+
return fmod( $left, $right );
case ',':
- if ( $left instanceof CLDRPluralRuleEvaluator_Range ) {
+ if ( $left instanceof CLDRPluralRuleEvaluatorRange ) {
$range = $left;
} else {
- $range = new CLDRPluralRuleEvaluator_Range( $left );
+ $range = new CLDRPluralRuleEvaluatorRange( $left );
}
$range->add( $right );
+
return $range;
case '..':
- return new CLDRPluralRuleEvaluator_Range( $left, $right );
+ return new CLDRPluralRuleEvaluatorRange( $left, $right );
default:
throw new CLDRPluralRuleError( "Invalid RPN token" );
}
}
}
-
-/**
- * Evaluator helper class representing a range list.
- */
-class CLDRPluralRuleEvaluator_Range {
- public $parts = array();
-
- function __construct( $start, $end = false ) {
- if ( $end === false ) {
- $this->parts[] = $start;
- } else {
- $this->parts[] = array( $start, $end );
- }
- }
-
- /**
- * Determine if the given number is inside the range. If $integerConstraint
- * is true, the number must additionally be an integer if it is to match
- * any interval part.
- */
- function isNumberIn( $number, $integerConstraint = true ) {
- foreach ( $this->parts as $part ) {
- if ( is_array( $part ) ) {
- if ( ( !$integerConstraint || floor( $number ) === (float)$number )
- && $number >= $part[0] && $number <= $part[1] )
- {
- return true;
- }
- } else {
- if ( $number == $part ) {
- return true;
- }
- }
- }
- return false;
- }
-
- /**
- * Readable alias for isNumberIn( $number, false ), and the implementation
- * of the "within" operator.
- */
- function isNumberWithin( $number ) {
- return $this->isNumberIn( $number, false );
- }
-
- /**
- * Add another part to this range. The supplied new part may either be a
- * range object itself, or a single number.
- */
- function add( $other ) {
- if ( $other instanceof self ) {
- $this->parts = array_merge( $this->parts, $other->parts );
- } else {
- $this->parts[] = $other;
- }
- }
-
- /**
- * For debugging
- */
- function __toString() {
- $s = 'Range(';
- foreach ( $this->parts as $i => $part ) {
- if ( $i ) {
- $s .= ', ';
- }
- if ( is_array( $part ) ) {
- $s .= $part[0] . '..' . $part[1];
- } else {
- $s .= $part;
- }
- }
- $s .= ')';
- return $s;
- }
-
-}
-
-/**
- * Helper class for converting rules to reverse polish notation (RPN).
- */
-class CLDRPluralRuleConverter {
- /**
- * The input string
- *
- * @var string
- */
- public $rule;
-
- /**
- * The current position
- *
- * @var int
- */
- public $pos;
-
- /**
- * The past-the-end position
- *
- * @var int
- */
- public $end;
-
- /**
- * The operator stack
- *
- * @var array
- */
- public $operators = array();
-
- /**
- * The operand stack
- *
- * @var array
- */
- public $operands = array();
-
- /**
- * Precedence levels. Note that there's no need to worry about associativity
- * for the level 4 operators, since they return boolean and don't accept
- * boolean inputs.
- */
- static $precedence = array(
- 'or' => 2,
- 'and' => 3,
- 'is' => 4,
- 'is-not' => 4,
- 'in' => 4,
- 'not-in' => 4,
- 'within' => 4,
- 'not-within' => 4,
- 'mod' => 5,
- ',' => 6,
- '..' => 7,
- );
-
- /**
- * A character list defining whitespace, for use in strspn() etc.
- */
- const WHITESPACE_CLASS = " \t\r\n";
-
- /**
- * Same for digits. Note that the grammar given in UTS #35 doesn't allow
- * negative numbers or decimal separators.
- */
- const NUMBER_CLASS = '0123456789';
-
- /**
- * A character list of symbolic operands.
- */
- const OPERAND_SYMBOLS = 'nivwft';
-
- /**
- * An anchored regular expression which matches a word at the current offset.
- */
- const WORD_REGEX = '/[a-zA-Z@]+/A';
-
- /**
- * Convert a rule to RPN. This is the only public entry point.
- */
- public static function convert( $rule ) {
- $parser = new self( $rule );
- return $parser->doConvert();
- }
-
- /**
- * Private constructor.
- */
- protected function __construct( $rule ) {
- $this->rule = $rule;
- $this->pos = 0;
- $this->end = strlen( $rule );
- }
-
- /**
- * Do the operation.
- */
- protected function doConvert() {
- $expectOperator = true;
-
- // Iterate through all tokens, saving the operators and operands to a
- // stack per Dijkstra's shunting yard algorithm.
- while ( false !== ( $token = $this->nextToken() ) ) {
- // In this grammar, there are only binary operators, so every valid
- // rule string will alternate between operator and operand tokens.
- $expectOperator = !$expectOperator;
-
- if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
- // Operand
- if ( $expectOperator ) {
- $token->error( 'unexpected operand' );
- }
- $this->operands[] = $token;
- continue;
- } else {
- // Operator
- if ( !$expectOperator ) {
- $token->error( 'unexpected operator' );
- }
- // Resolve higher precedence levels
- $lastOp = end( $this->operators );
- while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
- $this->doOperation( $lastOp, $this->operands );
- array_pop( $this->operators );
- $lastOp = end( $this->operators );
- }
- $this->operators[] = $token;
- }
- }
-
- // Finish off the stack
- while ( $op = array_pop( $this->operators ) ) {
- $this->doOperation( $op, $this->operands );
- }
-
- // Make sure the result is sane. The first case is possible for an empty
- // string input, the second should be unreachable.
- if ( !count( $this->operands ) ) {
- $this->error( 'condition expected' );
- } elseif ( count( $this->operands ) > 1 ) {
- $this->error( 'missing operator or too many operands' );
- }
-
- $value = $this->operands[0];
- if ( $value->type !== 'boolean' ) {
- $this->error( 'the result must have a boolean type' );
- }
-
- return $this->operands[0]->rpn;
- }
-
- /**
- * Fetch the next token from the input string. Return it as a
- * CLDRPluralRuleConverter_Fragment object.
- */
- protected function nextToken() {
- if ( $this->pos >= $this->end ) {
- return false;
- }
-
- // Whitespace
- $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
- $this->pos += $length;
-
- if ( $this->pos >= $this->end ) {
- return false;
- }
-
- // Number
- $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
- if ( $length !== 0 ) {
- $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
- $this->pos += $length;
- return $token;
- }
-
- // Two-character operators
- $op2 = substr( $this->rule, $this->pos, 2 );
- if ( $op2 === '..' || $op2 === '!=' ) {
- $token = $this->newOperator( $op2, $this->pos, 2 );
- $this->pos += 2;
- return $token;
- }
-
- // Single-character operators
- $op1 = $this->rule[$this->pos];
- if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
- $token = $this->newOperator( $op1, $this->pos, 1 );
- $this->pos ++;
- return $token;
- }
-
- // Word
- if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
- $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
- }
- $word1 = strtolower( $m[0] );
- $word2 = '';
- $nextTokenPos = $this->pos + strlen( $word1 );
- if ( $word1 === 'not' || $word1 === 'is' ) {
- // Look ahead one word
- $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
- if ( $nextTokenPos < $this->end
- && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) )
- {
- $word2 = strtolower( $m[0] );
- $nextTokenPos += strlen( $word2 );
- }
- }
-
- // Two-word operators like "is not" take precedence over single-word operators like "is"
- if ( $word2 !== '' ) {
- $bothWords = "{$word1}-{$word2}";
- if ( isset( self::$precedence[$bothWords] ) ) {
- $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
- $this->pos = $nextTokenPos;
- return $token;
- }
- }
-
- // Single-word operators
- if ( isset( self::$precedence[$word1] ) ) {
- $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
- $this->pos += strlen( $word1 );
- return $token;
- }
-
- // The single-character operand symbols
- if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
- $token = $this->newNumber( $word1, $this->pos );
- $this->pos ++;
- return $token;
- }
-
- // Samples
- if ( $word1 === '@integer' || $word1 === '@decimal' ) {
- // Samples are like comments, they have no effect on rule evaluation.
- // They run from the first sample indicator to the end of the string.
- $this->pos = $this->end;
- return false;
- }
-
- $this->error( 'unrecognised word' );
- }
-
- /**
- * For the binary operator $op, pop its operands off the stack and push
- * a fragment with rpn and type members describing the result of that
- * operation.
- */
- protected function doOperation( $op ) {
- if ( count( $this->operands ) < 2 ) {
- $op->error( 'missing operand' );
- }
- $right = array_pop( $this->operands );
- $left = array_pop( $this->operands );
- $result = $op->operate( $left, $right );
- $this->operands[] = $result;
- }
-
- /**
- * Create a numerical expression object
- */
- protected function newNumber( $text, $pos ) {
- return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
- }
-
- /**
- * Create a binary operator
- */
- protected function newOperator( $type, $pos, $length ) {
- return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
- }
-
- /**
- * Throw an error
- */
- protected function error( $message ) {
- throw new CLDRPluralRuleError( $message );
- }
-}
-
-/**
- * Helper for CLDRPluralRuleConverter.
- * The base class for operators and expressions, describing a region of the input string.
- */
-class CLDRPluralRuleConverter_Fragment {
- public $parser, $pos, $length, $end;
-
- function __construct( $parser, $pos, $length ) {
- $this->parser = $parser;
- $this->pos = $pos;
- $this->length = $length;
- $this->end = $pos + $length;
- }
-
- public function error( $message ) {
- $text = $this->getText();
- throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
- }
-
- public function getText() {
- return substr( $this->parser->rule, $this->pos, $this->length );
- }
-}
-
-/**
- * Helper for CLDRPluralRuleConverter.
- * An expression object, representing a region of the input string (for error
- * messages), the RPN notation used to evaluate it, and the result type for
- * validation.
- */
-class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment {
- public $type, $rpn;
-
- function __construct( $parser, $type, $rpn, $pos, $length ) {
- parent::__construct( $parser, $pos, $length );
- $this->type = $type;
- $this->rpn = $rpn;
- }
-
- public function isType( $type ) {
- if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
- return true;
- }
- if ( $type === $this->type ) {
- return true;
- }
- return false;
- }
-}
-
-/**
- * Helper for CLDRPluralRuleConverter.
- * An operator object, representing a region of the input string (for error
- * messages), and the binary operator at that location.
- */
-class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
- public $name;
-
- /**
- * Each op type has three characters: left operand type, right operand type and result type
- *
- * b = boolean
- * n = number
- * r = range
- *
- * A number is a kind of range.
- */
- static $opTypes = array(
- 'or' => 'bbb',
- 'and' => 'bbb',
- 'is' => 'nnb',
- 'is-not' => 'nnb',
- 'in' => 'nrb',
- 'not-in' => 'nrb',
- 'within' => 'nrb',
- 'not-within' => 'nrb',
- 'mod' => 'nnn',
- ',' => 'rrr',
- '..' => 'nnr',
- );
-
- /**
- * Map converting from the abbrevation to the full form.
- */
- static $typeSpecMap = array(
- 'b' => 'boolean',
- 'n' => 'number',
- 'r' => 'range',
- );
-
- /**
- * Map for converting the new operators introduced in Rev 33 to the old forms
- */
- static $aliasMap = array(
- '%' => 'mod',
- '!=' => 'not-in',
- '=' => 'in'
- );
-
- /**
- * Initialize a new instance of a CLDRPluralRuleConverter_Operator object
- *
- * @param CLDRPluralRuleConverter $parser The parser
- * @param string $name The operator name
- * @param int $pos The position
- * @param int $pos The length
- */
- function __construct( $parser, $name, $pos, $length ) {
- parent::__construct( $parser, $pos, $length );
- if ( isset( self::$aliasMap[$name] ) ) {
- $name = self::$aliasMap[$name];
- }
- $this->name = $name;
- }
-
- public function operate( $left, $right ) {
- $typeSpec = self::$opTypes[$this->name];
-
- $leftType = self::$typeSpecMap[$typeSpec[0]];
- $rightType = self::$typeSpecMap[$typeSpec[1]];
- $resultType = self::$typeSpecMap[$typeSpec[2]];
-
- $start = min( $this->pos, $left->pos, $right->pos );
- $end = max( $this->end, $left->end, $right->end );
- $length = $end - $start;
-
- $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType,
- "{$left->rpn} {$right->rpn} {$this->name}",
- $start, $length );
-
- if ( !$left->isType( $leftType ) ) {
- $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
- }
-
- if ( !$right->isType( $rightType ) ) {
- $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
- }
- return $newExpr;
- }
-}
-
-/**
- * The exception class for all the classes in this file. This will be thrown
- * back to the caller if there is any validation error.
- */
-class CLDRPluralRuleError extends MWException {
- function __construct( $message ) {
- parent::__construct( 'CLDR plural rule error: ' . $message );
- }
-}
diff --git a/languages/utils/CLDRPluralRuleEvaluatorRange.php b/languages/utils/CLDRPluralRuleEvaluatorRange.php
new file mode 100644
index 00000000..996c22e3
--- /dev/null
+++ b/languages/utils/CLDRPluralRuleEvaluatorRange.php
@@ -0,0 +1,110 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Evaluator helper class representing a range list.
+ */
+class CLDRPluralRuleEvaluatorRange {
+ /**
+ * The parts
+ *
+ * @var array
+ */
+ public $parts = array();
+
+ /**
+ * Initialize a new instance of CLDRPluralRuleEvaluatorRange
+ *
+ * @param int $start The start of the range
+ * @param int|bool $end The end of the range, or false if the range is not bounded.
+ */
+ function __construct( $start, $end = false ) {
+ if ( $end === false ) {
+ $this->parts[] = $start;
+ } else {
+ $this->parts[] = array( $start, $end );
+ }
+ }
+
+ /**
+ * Determine if the given number is inside the range.
+ *
+ * @param int $number The number to check
+ * @param bool $integerConstraint If true, also asserts the number is an integer;
+ * otherwise, number simply has to be inside the range.
+ * @return bool True if the number is inside the range; otherwise, false.
+ */
+ function isNumberIn( $number, $integerConstraint = true ) {
+ foreach ( $this->parts as $part ) {
+ if ( is_array( $part ) ) {
+ if ( ( !$integerConstraint || floor( $number ) === (float)$number )
+ && $number >= $part[0] && $number <= $part[1]
+ ) {
+ return true;
+ }
+ } else {
+ if ( $number == $part ) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Readable alias for isNumberIn( $number, false ), and the implementation
+ * of the "within" operator.
+ *
+ * @param int $number The number to check
+ * @return bool True if the number is inside the range; otherwise, false.
+ */
+ function isNumberWithin( $number ) {
+ return $this->isNumberIn( $number, false );
+ }
+
+ /**
+ * Add another part to this range.
+ *
+ * @param CLDRPluralRuleEvaluatorRange|int $other The part to add, either
+ * a range object itself or a single number.
+ */
+ function add( $other ) {
+ if ( $other instanceof self ) {
+ $this->parts = array_merge( $this->parts, $other->parts );
+ } else {
+ $this->parts[] = $other;
+ }
+ }
+
+ /**
+ * Returns the string representation of the rule evaluator range.
+ * The purpose of this method is to help debugging.
+ *
+ * @return string The string representation of the rule evaluator range
+ */
+ function __toString() {
+ $s = 'Range(';
+ foreach ( $this->parts as $i => $part ) {
+ if ( $i ) {
+ $s .= ', ';
+ }
+ if ( is_array( $part ) ) {
+ $s .= $part[0] . '..' . $part[1];
+ } else {
+ $s .= $part;
+ }
+ }
+ $s .= ')';
+
+ return $s;
+ }
+}