summaryrefslogtreecommitdiff
path: root/includes/utils
diff options
context:
space:
mode:
Diffstat (limited to 'includes/utils')
-rw-r--r--includes/utils/ArrayUtils.php187
-rw-r--r--includes/utils/Cdb.php163
-rw-r--r--includes/utils/CdbDBA.php75
-rw-r--r--includes/utils/CdbPHP.php494
-rw-r--r--includes/utils/IP.php738
-rw-r--r--includes/utils/MWCryptHKDF.php332
-rw-r--r--includes/utils/MWCryptRand.php516
-rw-r--r--includes/utils/MWFunction.php63
-rw-r--r--includes/utils/README9
-rw-r--r--includes/utils/StringUtils.php612
-rw-r--r--includes/utils/UIDGenerator.php507
-rw-r--r--includes/utils/ZipDirectoryReader.php732
12 files changed, 4428 insertions, 0 deletions
diff --git a/includes/utils/ArrayUtils.php b/includes/utils/ArrayUtils.php
new file mode 100644
index 00000000..1e521cb8
--- /dev/null
+++ b/includes/utils/ArrayUtils.php
@@ -0,0 +1,187 @@
+<?php
+/**
+ * Methods to play with arrays.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * A collection of static methods to play with arrays.
+ *
+ * @since 1.21
+ */
+class ArrayUtils {
+ /**
+ * Sort the given array in a pseudo-random order which depends only on the
+ * given key and each element value. This is typically used for load
+ * balancing between servers each with a local cache.
+ *
+ * Keys are preserved. The input array is modified in place.
+ *
+ * Note: Benchmarking on PHP 5.3 and 5.4 indicates that for small
+ * strings, md5() is only 10% slower than hash('joaat',...) etc.,
+ * since the function call overhead dominates. So there's not much
+ * justification for breaking compatibility with installations
+ * compiled with ./configure --disable-hash.
+ *
+ * @param array $array Array to sort
+ * @param string $key
+ * @param string $separator A separator used to delimit the array elements and the
+ * key. This can be chosen to provide backwards compatibility with
+ * various consistent hash implementations that existed before this
+ * function was introduced.
+ */
+ public static function consistentHashSort( &$array, $key, $separator = "\000" ) {
+ $hashes = array();
+ foreach ( $array as $elt ) {
+ $hashes[$elt] = md5( $elt . $separator . $key );
+ }
+ uasort( $array, function ( $a, $b ) use ( $hashes ) {
+ return strcmp( $hashes[$a], $hashes[$b] );
+ } );
+ }
+
+ /**
+ * Given an array of non-normalised probabilities, this function will select
+ * an element and return the appropriate key
+ *
+ * @param array $weights
+ * @return bool|int|string
+ */
+ public static function pickRandom( $weights ) {
+ if ( !is_array( $weights ) || count( $weights ) == 0 ) {
+ return false;
+ }
+
+ $sum = array_sum( $weights );
+ if ( $sum == 0 ) {
+ # No loads on any of them
+ # In previous versions, this triggered an unweighted random selection,
+ # but this feature has been removed as of April 2006 to allow for strict
+ # separation of query groups.
+ return false;
+ }
+ $max = mt_getrandmax();
+ $rand = mt_rand( 0, $max ) / $max * $sum;
+
+ $sum = 0;
+ foreach ( $weights as $i => $w ) {
+ $sum += $w;
+ # Do not return keys if they have 0 weight.
+ # Note that the "all 0 weight" case is handed above
+ if ( $w > 0 && $sum >= $rand ) {
+ break;
+ }
+ }
+
+ return $i;
+ }
+
+ /**
+ * Do a binary search, and return the index of the largest item that sorts
+ * less than or equal to the target value.
+ *
+ * @since 1.23
+ *
+ * @param array $valueCallback A function to call to get the value with
+ * a given array index.
+ * @param int $valueCount The number of items accessible via $valueCallback,
+ * indexed from 0 to $valueCount - 1
+ * @param array $comparisonCallback A callback to compare two values, returning
+ * -1, 0 or 1 in the style of strcmp().
+ * @param string $target The target value to find.
+ *
+ * @return int|bool The item index of the lower bound, or false if the target value
+ * sorts before all items.
+ */
+ public static function findLowerBound( $valueCallback, $valueCount,
+ $comparisonCallback, $target
+ ) {
+ if ( $valueCount === 0 ) {
+ return false;
+ }
+
+ $min = 0;
+ $max = $valueCount;
+ do {
+ $mid = $min + ( ( $max - $min ) >> 1 );
+ $item = call_user_func( $valueCallback, $mid );
+ $comparison = call_user_func( $comparisonCallback, $target, $item );
+ if ( $comparison > 0 ) {
+ $min = $mid;
+ } elseif ( $comparison == 0 ) {
+ $min = $mid;
+ break;
+ } else {
+ $max = $mid;
+ }
+ } while ( $min < $max - 1 );
+
+ if ( $min == 0 ) {
+ $item = call_user_func( $valueCallback, $min );
+ $comparison = call_user_func( $comparisonCallback, $target, $item );
+ if ( $comparison < 0 ) {
+ // Before the first item
+ return false;
+ }
+ }
+ return $min;
+ }
+
+ /**
+ * Do array_diff_assoc() on multi-dimensional arrays.
+ *
+ * Note: empty arrays are removed.
+ *
+ * @since 1.23
+ *
+ * @param array $array1 The array to compare from
+ * @param array $array2,... More arrays to compare against
+ * @return array An array containing all the values from array1
+ * that are not present in any of the other arrays.
+ */
+ public static function arrayDiffAssocRecursive( $array1 ) {
+ $arrays = func_get_args();
+ array_shift( $arrays );
+ $ret = array();
+
+ foreach ( $array1 as $key => $value ) {
+ if ( is_array( $value ) ) {
+ $args = array( $value );
+ foreach ( $arrays as $array ) {
+ if ( isset( $array[$key] ) ) {
+ $args[] = $array[$key];
+ }
+ }
+ $valueret = call_user_func_array( __METHOD__, $args );
+ if ( count( $valueret ) ) {
+ $ret[$key] = $valueret;
+ }
+ } else {
+ foreach ( $arrays as $array ) {
+ if ( isset( $array[$key] ) && $array[$key] === $value ) {
+ continue 2;
+ }
+ }
+ $ret[$key] = $value;
+ }
+ }
+
+ return $ret;
+ }
+}
diff --git a/includes/utils/Cdb.php b/includes/utils/Cdb.php
new file mode 100644
index 00000000..3ceb620f
--- /dev/null
+++ b/includes/utils/Cdb.php
@@ -0,0 +1,163 @@
+<?php
+/**
+ * Native CDB file reader and writer.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Read from a CDB file.
+ * Native and pure PHP implementations are provided.
+ * http://cr.yp.to/cdb.html
+ */
+abstract class CdbReader {
+ /**
+ * The file handle
+ */
+ protected $handle;
+
+ /**
+ * Open a file and return a subclass instance
+ *
+ * @param string $fileName
+ *
+ * @return CdbReader
+ */
+ public static function open( $fileName ) {
+ return self::haveExtension() ?
+ new CdbReaderDBA( $fileName ) :
+ new CdbReaderPHP( $fileName );
+ }
+
+ /**
+ * Returns true if the native extension is available
+ *
+ * @return bool
+ */
+ public static function haveExtension() {
+ if ( !function_exists( 'dba_handlers' ) ) {
+ return false;
+ }
+ $handlers = dba_handlers();
+ if ( !in_array( 'cdb', $handlers ) || !in_array( 'cdb_make', $handlers ) ) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Create the object and open the file
+ *
+ * @param string $fileName
+ */
+ abstract public function __construct( $fileName );
+
+ /**
+ * Close the file. Optional, you can just let the variable go out of scope.
+ */
+ abstract public function close();
+
+ /**
+ * Get a value with a given key. Only string values are supported.
+ *
+ * @param string $key
+ */
+ abstract public function get( $key );
+}
+
+/**
+ * Write to a CDB file.
+ * Native and pure PHP implementations are provided.
+ */
+abstract class CdbWriter {
+ /**
+ * The file handle
+ */
+ protected $handle;
+
+ /**
+ * File we'll be writing to when we're done
+ * @var string
+ */
+ protected $realFileName;
+
+ /**
+ * File we write to temporarily until we're done
+ * @var string
+ */
+ protected $tmpFileName;
+
+ /**
+ * Open a writer and return a subclass instance.
+ * The user must have write access to the directory, for temporary file creation.
+ *
+ * @param string $fileName
+ *
+ * @return CdbWriterDBA|CdbWriterPHP
+ */
+ public static function open( $fileName ) {
+ return CdbReader::haveExtension() ?
+ new CdbWriterDBA( $fileName ) :
+ new CdbWriterPHP( $fileName );
+ }
+
+ /**
+ * Create the object and open the file
+ *
+ * @param string $fileName
+ */
+ abstract public function __construct( $fileName );
+
+ /**
+ * Set a key to a given value. The value will be converted to string.
+ * @param string $key
+ * @param string $value
+ */
+ abstract public function set( $key, $value );
+
+ /**
+ * Close the writer object. You should call this function before the object
+ * goes out of scope, to write out the final hashtables.
+ */
+ abstract public function close();
+
+ /**
+ * If the object goes out of scope, close it for sanity
+ */
+ public function __destruct() {
+ if ( isset( $this->handle ) ) {
+ $this->close();
+ }
+ }
+
+ /**
+ * Are we running on Windows?
+ * @return bool
+ */
+ protected function isWindows() {
+ return substr( php_uname(), 0, 7 ) == 'Windows';
+ }
+}
+
+/**
+ * Exception for Cdb errors.
+ * This explicitly doesn't subclass MWException to encourage reuse.
+ */
+class CdbException extends Exception {
+}
diff --git a/includes/utils/CdbDBA.php b/includes/utils/CdbDBA.php
new file mode 100644
index 00000000..efcaf21f
--- /dev/null
+++ b/includes/utils/CdbDBA.php
@@ -0,0 +1,75 @@
+<?php
+/**
+ * DBA-based CDB reader/writer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Reader class which uses the DBA extension
+ */
+class CdbReaderDBA extends CdbReader {
+ public function __construct( $fileName ) {
+ $this->handle = dba_open( $fileName, 'r-', 'cdb' );
+ if ( !$this->handle ) {
+ throw new CdbException( 'Unable to open CDB file "' . $fileName . '"' );
+ }
+ }
+
+ public function close() {
+ if ( isset( $this->handle ) ) {
+ dba_close( $this->handle );
+ }
+ unset( $this->handle );
+ }
+
+ public function get( $key ) {
+ return dba_fetch( $key, $this->handle );
+ }
+}
+
+/**
+ * Writer class which uses the DBA extension
+ */
+class CdbWriterDBA extends CdbWriter {
+ public function __construct( $fileName ) {
+ $this->realFileName = $fileName;
+ $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
+ $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' );
+ if ( !$this->handle ) {
+ throw new CdbException( 'Unable to open CDB file for write "' . $fileName . '"' );
+ }
+ }
+
+ public function set( $key, $value ) {
+ return dba_insert( $key, $value, $this->handle );
+ }
+
+ public function close() {
+ if ( isset( $this->handle ) ) {
+ dba_close( $this->handle );
+ }
+ if ( $this->isWindows() ) {
+ unlink( $this->realFileName );
+ }
+ if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
+ throw new CdbException( 'Unable to move the new CDB file into place.' );
+ }
+ unset( $this->handle );
+ }
+}
diff --git a/includes/utils/CdbPHP.php b/includes/utils/CdbPHP.php
new file mode 100644
index 00000000..19d747a7
--- /dev/null
+++ b/includes/utils/CdbPHP.php
@@ -0,0 +1,494 @@
+<?php
+/**
+ * This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
+ * appears in PHP 5.3. Changes are:
+ * * Error returns replaced with exceptions
+ * * Exception thrown if sizes or offsets are between 2GB and 4GB
+ * * Some variables renamed
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Common functions for readers and writers
+ */
+class CdbFunctions {
+ /**
+ * Take a modulo of a signed integer as if it were an unsigned integer.
+ * $b must be less than 0x40000000 and greater than 0
+ *
+ * @param int $a
+ * @param int $b
+ *
+ * @return int
+ */
+ public static function unsignedMod( $a, $b ) {
+ if ( $a & 0x80000000 ) {
+ $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
+
+ return $m % $b;
+ } else {
+ return $a % $b;
+ }
+ }
+
+ /**
+ * Shift a signed integer right as if it were unsigned
+ * @param int $a
+ * @param int $b
+ * @return int
+ */
+ public static function unsignedShiftRight( $a, $b ) {
+ if ( $b == 0 ) {
+ return $a;
+ }
+ if ( $a & 0x80000000 ) {
+ return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
+ } else {
+ return $a >> $b;
+ }
+ }
+
+ /**
+ * The CDB hash function.
+ *
+ * @param string $s
+ *
+ * @return int
+ */
+ public static function hash( $s ) {
+ $h = 5381;
+ $len = strlen( $s );
+ for ( $i = 0; $i < $len; $i++ ) {
+ $h5 = ( $h << 5 ) & 0xffffffff;
+ // Do a 32-bit sum
+ // Inlined here for speed
+ $sum = ( $h & 0x3fffffff ) + ( $h5 & 0x3fffffff );
+ $h =
+ (
+ ( $sum & 0x40000000 ? 1 : 0 )
+ + ( $h & 0x80000000 ? 2 : 0 )
+ + ( $h & 0x40000000 ? 1 : 0 )
+ + ( $h5 & 0x80000000 ? 2 : 0 )
+ + ( $h5 & 0x40000000 ? 1 : 0 )
+ ) << 30
+ | ( $sum & 0x3fffffff );
+ $h ^= ord( $s[$i] );
+ $h &= 0xffffffff;
+ }
+
+ return $h;
+ }
+}
+
+/**
+ * CDB reader class
+ */
+class CdbReaderPHP extends CdbReader {
+ /** The filename */
+ protected $fileName;
+
+ /* number of hash slots searched under this key */
+ protected $loop;
+
+ /* initialized if loop is nonzero */
+ protected $khash;
+
+ /* initialized if loop is nonzero */
+ protected $kpos;
+
+ /* initialized if loop is nonzero */
+ protected $hpos;
+
+ /* initialized if loop is nonzero */
+ protected $hslots;
+
+ /* initialized if findNext() returns true */
+ protected $dpos;
+
+ /* initialized if cdb_findnext() returns 1 */
+ protected $dlen;
+
+ /**
+ * @param string $fileName
+ * @throws CdbException
+ */
+ public function __construct( $fileName ) {
+ $this->fileName = $fileName;
+ $this->handle = fopen( $fileName, 'rb' );
+ if ( !$this->handle ) {
+ throw new CdbException( 'Unable to open CDB file "' . $this->fileName . '".' );
+ }
+ $this->findStart();
+ }
+
+ public function close() {
+ if ( isset( $this->handle ) ) {
+ fclose( $this->handle );
+ }
+ unset( $this->handle );
+ }
+
+ /**
+ * @param mixed $key
+ * @return bool|string
+ */
+ public function get( $key ) {
+ // strval is required
+ if ( $this->find( strval( $key ) ) ) {
+ return $this->read( $this->dlen, $this->dpos );
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * @param string $key
+ * @param int $pos
+ * @return bool
+ */
+ protected function match( $key, $pos ) {
+ $buf = $this->read( strlen( $key ), $pos );
+
+ return $buf === $key;
+ }
+
+ protected function findStart() {
+ $this->loop = 0;
+ }
+
+ /**
+ * @throws CdbException
+ * @param int $length
+ * @param int $pos
+ * @return string
+ */
+ protected function read( $length, $pos ) {
+ if ( fseek( $this->handle, $pos ) == -1 ) {
+ // This can easily happen if the internal pointers are incorrect
+ throw new CdbException(
+ 'Seek failed, file "' . $this->fileName . '" may be corrupted.' );
+ }
+
+ if ( $length == 0 ) {
+ return '';
+ }
+
+ $buf = fread( $this->handle, $length );
+ if ( $buf === false || strlen( $buf ) !== $length ) {
+ throw new CdbException(
+ 'Read from CDB file failed, file "' . $this->fileName . '" may be corrupted.' );
+ }
+
+ return $buf;
+ }
+
+ /**
+ * Unpack an unsigned integer and throw an exception if it needs more than 31 bits
+ * @param string $s
+ * @throws CdbException
+ * @return mixed
+ */
+ protected function unpack31( $s ) {
+ $data = unpack( 'V', $s );
+ if ( $data[1] > 0x7fffffff ) {
+ throw new CdbException(
+ 'Error in CDB file "' . $this->fileName . '", integer too big.' );
+ }
+
+ return $data[1];
+ }
+
+ /**
+ * Unpack a 32-bit signed integer
+ * @param string $s
+ * @return int
+ */
+ protected function unpackSigned( $s ) {
+ $data = unpack( 'va/vb', $s );
+
+ return $data['a'] | ( $data['b'] << 16 );
+ }
+
+ /**
+ * @param string $key
+ * @return bool
+ */
+ protected function findNext( $key ) {
+ if ( !$this->loop ) {
+ $u = CdbFunctions::hash( $key );
+ $buf = $this->read( 8, ( $u << 3 ) & 2047 );
+ $this->hslots = $this->unpack31( substr( $buf, 4 ) );
+ if ( !$this->hslots ) {
+ return false;
+ }
+ $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
+ $this->khash = $u;
+ $u = CdbFunctions::unsignedShiftRight( $u, 8 );
+ $u = CdbFunctions::unsignedMod( $u, $this->hslots );
+ $u <<= 3;
+ $this->kpos = $this->hpos + $u;
+ }
+
+ while ( $this->loop < $this->hslots ) {
+ $buf = $this->read( 8, $this->kpos );
+ $pos = $this->unpack31( substr( $buf, 4 ) );
+ if ( !$pos ) {
+ return false;
+ }
+ $this->loop += 1;
+ $this->kpos += 8;
+ if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
+ $this->kpos = $this->hpos;
+ }
+ $u = $this->unpackSigned( substr( $buf, 0, 4 ) );
+ if ( $u === $this->khash ) {
+ $buf = $this->read( 8, $pos );
+ $keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
+ if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
+ // Found
+ $this->dlen = $this->unpack31( substr( $buf, 4 ) );
+ $this->dpos = $pos + 8 + $keyLen;
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * @param mixed $key
+ * @return bool
+ */
+ protected function find( $key ) {
+ $this->findStart();
+
+ return $this->findNext( $key );
+ }
+}
+
+/**
+ * CDB writer class
+ */
+class CdbWriterPHP extends CdbWriter {
+ protected $hplist;
+
+ protected $numentries;
+
+ protected $pos;
+
+ /**
+ * @param string $fileName
+ */
+ public function __construct( $fileName ) {
+ $this->realFileName = $fileName;
+ $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
+ $this->handle = fopen( $this->tmpFileName, 'wb' );
+ if ( !$this->handle ) {
+ $this->throwException(
+ 'Unable to open CDB file "' . $this->tmpFileName . '" for write.' );
+ }
+ $this->hplist = array();
+ $this->numentries = 0;
+ $this->pos = 2048; // leaving space for the pointer array, 256 * 8
+ if ( fseek( $this->handle, $this->pos ) == -1 ) {
+ $this->throwException( 'fseek failed in file "' . $this->tmpFileName . '".' );
+ }
+ }
+
+ /**
+ * @param string $key
+ * @param string $value
+ */
+ public function set( $key, $value ) {
+ if ( strval( $key ) === '' ) {
+ // DBA cross-check hack
+ return;
+ }
+ $this->addbegin( strlen( $key ), strlen( $value ) );
+ $this->write( $key );
+ $this->write( $value );
+ $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
+ }
+
+ /**
+ * @throws CdbException
+ */
+ public function close() {
+ $this->finish();
+ if ( isset( $this->handle ) ) {
+ fclose( $this->handle );
+ }
+ if ( $this->isWindows() && file_exists( $this->realFileName ) ) {
+ unlink( $this->realFileName );
+ }
+ if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
+ $this->throwException( 'Unable to move the new CDB file into place.' );
+ }
+ unset( $this->handle );
+ }
+
+ /**
+ * @throws CdbException
+ * @param string $buf
+ */
+ protected function write( $buf ) {
+ $len = fwrite( $this->handle, $buf );
+ if ( $len !== strlen( $buf ) ) {
+ $this->throwException( 'Error writing to CDB file "' . $this->tmpFileName . '".' );
+ }
+ }
+
+ /**
+ * @throws CdbException
+ * @param int $len
+ */
+ protected function posplus( $len ) {
+ $newpos = $this->pos + $len;
+ if ( $newpos > 0x7fffffff ) {
+ $this->throwException(
+ 'A value in the CDB file "' . $this->tmpFileName . '" is too large.' );
+ }
+ $this->pos = $newpos;
+ }
+
+ /**
+ * @param int $keylen
+ * @param int $datalen
+ * @param int $h
+ */
+ protected function addend( $keylen, $datalen, $h ) {
+ $this->hplist[] = array(
+ 'h' => $h,
+ 'p' => $this->pos
+ );
+
+ $this->numentries++;
+ $this->posplus( 8 );
+ $this->posplus( $keylen );
+ $this->posplus( $datalen );
+ }
+
+ /**
+ * @throws CdbException
+ * @param int $keylen
+ * @param int $datalen
+ */
+ protected function addbegin( $keylen, $datalen ) {
+ if ( $keylen > 0x7fffffff ) {
+ $this->throwException( 'Key length too long in file "' . $this->tmpFileName . '".' );
+ }
+ if ( $datalen > 0x7fffffff ) {
+ $this->throwException( 'Data length too long in file "' . $this->tmpFileName . '".' );
+ }
+ $buf = pack( 'VV', $keylen, $datalen );
+ $this->write( $buf );
+ }
+
+ /**
+ * @throws CdbException
+ */
+ protected function finish() {
+ // Hack for DBA cross-check
+ $this->hplist = array_reverse( $this->hplist );
+
+ // Calculate the number of items that will be in each hashtable
+ $counts = array_fill( 0, 256, 0 );
+ foreach ( $this->hplist as $item ) {
+ ++$counts[255 & $item['h']];
+ }
+
+ // Fill in $starts with the *end* indexes
+ $starts = array();
+ $pos = 0;
+ for ( $i = 0; $i < 256; ++$i ) {
+ $pos += $counts[$i];
+ $starts[$i] = $pos;
+ }
+
+ // Excessively clever and indulgent code to simultaneously fill $packedTables
+ // with the packed hashtables, and adjust the elements of $starts
+ // to actually point to the starts instead of the ends.
+ $packedTables = array_fill( 0, $this->numentries, false );
+ foreach ( $this->hplist as $item ) {
+ $packedTables[--$starts[255 & $item['h']]] = $item;
+ }
+
+ $final = '';
+ for ( $i = 0; $i < 256; ++$i ) {
+ $count = $counts[$i];
+
+ // The size of the hashtable will be double the item count.
+ // The rest of the slots will be empty.
+ $len = $count + $count;
+ $final .= pack( 'VV', $this->pos, $len );
+
+ $hashtable = array();
+ for ( $u = 0; $u < $len; ++$u ) {
+ $hashtable[$u] = array( 'h' => 0, 'p' => 0 );
+ }
+
+ // Fill the hashtable, using the next empty slot if the hashed slot
+ // is taken.
+ for ( $u = 0; $u < $count; ++$u ) {
+ $hp = $packedTables[$starts[$i] + $u];
+ $where = CdbFunctions::unsignedMod(
+ CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
+ while ( $hashtable[$where]['p'] ) {
+ if ( ++$where == $len ) {
+ $where = 0;
+ }
+ }
+ $hashtable[$where] = $hp;
+ }
+
+ // Write the hashtable
+ for ( $u = 0; $u < $len; ++$u ) {
+ $buf = pack( 'vvV',
+ $hashtable[$u]['h'] & 0xffff,
+ CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
+ $hashtable[$u]['p'] );
+ $this->write( $buf );
+ $this->posplus( 8 );
+ }
+ }
+
+ // Write the pointer array at the start of the file
+ rewind( $this->handle );
+ if ( ftell( $this->handle ) != 0 ) {
+ $this->throwException( 'Error rewinding to start of file "' . $this->tmpFileName . '".' );
+ }
+ $this->write( $final );
+ }
+
+ /**
+ * Clean up the temp file and throw an exception
+ *
+ * @param string $msg
+ * @throws CdbException
+ */
+ protected function throwException( $msg ) {
+ if ( $this->handle ) {
+ fclose( $this->handle );
+ unlink( $this->tmpFileName );
+ }
+ throw new CdbException( $msg );
+ }
+}
diff --git a/includes/utils/IP.php b/includes/utils/IP.php
new file mode 100644
index 00000000..0e2db8cc
--- /dev/null
+++ b/includes/utils/IP.php
@@ -0,0 +1,738 @@
+<?php
+/**
+ * Functions and constants to play with IP addresses and ranges
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Antoine Musso "<hashar at free dot fr>", Aaron Schulz
+ */
+
+// Some regex definition to "play" with IP address and IP address blocks
+
+// An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255
+define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' );
+define( 'RE_IP_ADD', RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE );
+// An IPv4 block is an IP address and a prefix (d1 to d32)
+define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' );
+define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX );
+
+// An IPv6 address is made up of 8 words (each x0000 to xFFFF).
+// However, the "::" abbreviation can be used on consecutive x0000 words.
+define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' );
+define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)' );
+define( 'RE_IPV6_ADD',
+ '(?:' . // starts with "::" (including "::")
+ ':(?::|(?::' . RE_IPV6_WORD . '){1,7})' .
+ '|' . // ends with "::" (except "::")
+ RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,6}::' .
+ '|' . // contains one "::" in the middle (the ^ makes the test fail if none found)
+ RE_IPV6_WORD . '(?::((?(-1)|:))?' . RE_IPV6_WORD . '){1,6}(?(-2)|^)' .
+ '|' . // contains no "::"
+ RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){7}' .
+ ')'
+);
+// An IPv6 block is an IP address and a prefix (d1 to d128)
+define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX );
+// For IPv6 canonicalization (NOT for strict validation; these are quite lax!)
+define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' );
+define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' );
+
+// This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network
+define( 'IP_ADDRESS_STRING',
+ '(?:' .
+ RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?' . // IPv4
+ '|' .
+ RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?' . // IPv6
+ ')'
+);
+
+/**
+ * A collection of public static functions to play with IP address
+ * and IP blocks.
+ */
+class IP {
+ /** @var IPSet */
+ private static $proxyIpSet = null;
+
+ /**
+ * Determine if a string is as valid IP address or network (CIDR prefix).
+ * SIIT IPv4-translated addresses are rejected.
+ * Note: canonicalize() tries to convert translated addresses to IPv4.
+ *
+ * @param string $ip Possible IP address
+ * @return bool
+ */
+ public static function isIPAddress( $ip ) {
+ return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip );
+ }
+
+ /**
+ * Given a string, determine if it as valid IP in IPv6 only.
+ * Note: Unlike isValid(), this looks for networks too.
+ *
+ * @param string $ip Possible IP address
+ * @return bool
+ */
+ public static function isIPv6( $ip ) {
+ return (bool)preg_match( '/^' . RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?$/', $ip );
+ }
+
+ /**
+ * Given a string, determine if it as valid IP in IPv4 only.
+ * Note: Unlike isValid(), this looks for networks too.
+ *
+ * @param string $ip Possible IP address
+ * @return bool
+ */
+ public static function isIPv4( $ip ) {
+ return (bool)preg_match( '/^' . RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?$/', $ip );
+ }
+
+ /**
+ * Validate an IP address. Ranges are NOT considered valid.
+ * SIIT IPv4-translated addresses are rejected.
+ * Note: canonicalize() tries to convert translated addresses to IPv4.
+ *
+ * @param string $ip
+ * @return bool True if it is valid
+ */
+ public static function isValid( $ip ) {
+ return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip )
+ || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) );
+ }
+
+ /**
+ * Validate an IP Block (valid address WITH a valid prefix).
+ * SIIT IPv4-translated addresses are rejected.
+ * Note: canonicalize() tries to convert translated addresses to IPv4.
+ *
+ * @param string $ipblock
+ * @return bool True if it is valid
+ */
+ public static function isValidBlock( $ipblock ) {
+ return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock )
+ || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) );
+ }
+
+ /**
+ * Convert an IP into a verbose, uppercase, normalized form.
+ * IPv6 addresses in octet notation are expanded to 8 words.
+ * IPv4 addresses are just trimmed.
+ *
+ * @param string $ip IP address in quad or octet form (CIDR or not).
+ * @return string
+ */
+ public static function sanitizeIP( $ip ) {
+ $ip = trim( $ip );
+ if ( $ip === '' ) {
+ return null;
+ }
+ if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) {
+ return $ip; // nothing else to do for IPv4 addresses or invalid ones
+ }
+ // Remove any whitespaces, convert to upper case
+ $ip = strtoupper( $ip );
+ // Expand zero abbreviations
+ $abbrevPos = strpos( $ip, '::' );
+ if ( $abbrevPos !== false ) {
+ // We know this is valid IPv6. Find the last index of the
+ // address before any CIDR number (e.g. "a:b:c::/24").
+ $CIDRStart = strpos( $ip, "/" );
+ $addressEnd = ( $CIDRStart !== false )
+ ? $CIDRStart - 1
+ : strlen( $ip ) - 1;
+ // If the '::' is at the beginning...
+ if ( $abbrevPos == 0 ) {
+ $repeat = '0:';
+ $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::'
+ $pad = 9; // 7+2 (due to '::')
+ // If the '::' is at the end...
+ } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) {
+ $repeat = ':0';
+ $extra = '';
+ $pad = 9; // 7+2 (due to '::')
+ // If the '::' is in the middle...
+ } else {
+ $repeat = ':0';
+ $extra = ':';
+ $pad = 8; // 6+2 (due to '::')
+ }
+ $ip = str_replace( '::',
+ str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra,
+ $ip
+ );
+ }
+ // Remove leading zeros from each bloc as needed
+ $ip = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip );
+
+ return $ip;
+ }
+
+ /**
+ * Prettify an IP for display to end users.
+ * This will make it more compact and lower-case.
+ *
+ * @param string $ip
+ * @return string
+ */
+ public static function prettifyIP( $ip ) {
+ $ip = self::sanitizeIP( $ip ); // normalize (removes '::')
+ if ( self::isIPv6( $ip ) ) {
+ // Split IP into an address and a CIDR
+ if ( strpos( $ip, '/' ) !== false ) {
+ list( $ip, $cidr ) = explode( '/', $ip, 2 );
+ } else {
+ list( $ip, $cidr ) = array( $ip, '' );
+ }
+ // Get the largest slice of words with multiple zeros
+ $offset = 0;
+ $longest = $longestPos = false;
+ while ( preg_match(
+ '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset
+ ) ) {
+ list( $match, $pos ) = $m[0]; // full match
+ if ( strlen( $match ) > strlen( $longest ) ) {
+ $longest = $match;
+ $longestPos = $pos;
+ }
+ $offset = ( $pos + strlen( $match ) ); // advance
+ }
+ if ( $longest !== false ) {
+ // Replace this portion of the string with the '::' abbreviation
+ $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) );
+ }
+ // Add any CIDR back on
+ if ( $cidr !== '' ) {
+ $ip = "{$ip}/{$cidr}";
+ }
+ // Convert to lower case to make it more readable
+ $ip = strtolower( $ip );
+ }
+
+ return $ip;
+ }
+
+ /**
+ * Given a host/port string, like one might find in the host part of a URL
+ * per RFC 2732, split the hostname part and the port part and return an
+ * array with an element for each. If there is no port part, the array will
+ * have false in place of the port. If the string was invalid in some way,
+ * false is returned.
+ *
+ * This was easy with IPv4 and was generally done in an ad-hoc way, but
+ * with IPv6 it's somewhat more complicated due to the need to parse the
+ * square brackets and colons.
+ *
+ * A bare IPv6 address is accepted despite the lack of square brackets.
+ *
+ * @param string $both The string with the host and port
+ * @return array
+ */
+ public static function splitHostAndPort( $both ) {
+ if ( substr( $both, 0, 1 ) === '[' ) {
+ if ( preg_match( '/^\[(' . RE_IPV6_ADD . ')\](?::(?P<port>\d+))?$/', $both, $m ) ) {
+ if ( isset( $m['port'] ) ) {
+ return array( $m[1], intval( $m['port'] ) );
+ } else {
+ return array( $m[1], false );
+ }
+ } else {
+ // Square bracket found but no IPv6
+ return false;
+ }
+ }
+ $numColons = substr_count( $both, ':' );
+ if ( $numColons >= 2 ) {
+ // Is it a bare IPv6 address?
+ if ( preg_match( '/^' . RE_IPV6_ADD . '$/', $both ) ) {
+ return array( $both, false );
+ } else {
+ // Not valid IPv6, but too many colons for anything else
+ return false;
+ }
+ }
+ if ( $numColons >= 1 ) {
+ // Host:port?
+ $bits = explode( ':', $both );
+ if ( preg_match( '/^\d+/', $bits[1] ) ) {
+ return array( $bits[0], intval( $bits[1] ) );
+ } else {
+ // Not a valid port
+ return false;
+ }
+ }
+
+ // Plain hostname
+ return array( $both, false );
+ }
+
+ /**
+ * Given a host name and a port, combine them into host/port string like
+ * you might find in a URL. If the host contains a colon, wrap it in square
+ * brackets like in RFC 2732. If the port matches the default port, omit
+ * the port specification
+ *
+ * @param string $host
+ * @param int $port
+ * @param bool|int $defaultPort
+ * @return string
+ */
+ public static function combineHostAndPort( $host, $port, $defaultPort = false ) {
+ if ( strpos( $host, ':' ) !== false ) {
+ $host = "[$host]";
+ }
+ if ( $defaultPort !== false && $port == $defaultPort ) {
+ return $host;
+ } else {
+ return "$host:$port";
+ }
+ }
+
+ /**
+ * Convert an IPv4 or IPv6 hexadecimal representation back to readable format
+ *
+ * @param string $hex Number, with "v6-" prefix if it is IPv6
+ * @return string Quad-dotted (IPv4) or octet notation (IPv6)
+ */
+ public static function formatHex( $hex ) {
+ if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6
+ return self::hexToOctet( substr( $hex, 3 ) );
+ } else { // IPv4
+ return self::hexToQuad( $hex );
+ }
+ }
+
+ /**
+ * Converts a hexadecimal number to an IPv6 address in octet notation
+ *
+ * @param string $ip_hex Pure hex (no v6- prefix)
+ * @return string (of format a:b:c:d:e:f:g:h)
+ */
+ public static function hexToOctet( $ip_hex ) {
+ // Pad hex to 32 chars (128 bits)
+ $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT );
+ // Separate into 8 words
+ $ip_oct = substr( $ip_hex, 0, 4 );
+ for ( $n = 1; $n < 8; $n++ ) {
+ $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 );
+ }
+ // NO leading zeroes
+ $ip_oct = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip_oct );
+
+ return $ip_oct;
+ }
+
+ /**
+ * Converts a hexadecimal number to an IPv4 address in quad-dotted notation
+ *
+ * @param string $ip_hex Pure hex
+ * @return string (of format a.b.c.d)
+ */
+ public static function hexToQuad( $ip_hex ) {
+ // Pad hex to 8 chars (32 bits)
+ $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT );
+ // Separate into four quads
+ $s = '';
+ for ( $i = 0; $i < 4; $i++ ) {
+ if ( $s !== '' ) {
+ $s .= '.';
+ }
+ $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 );
+ }
+
+ return $s;
+ }
+
+ /**
+ * Determine if an IP address really is an IP address, and if it is public,
+ * i.e. not RFC 1918 or similar
+ *
+ * @param string $ip
+ * @return bool
+ */
+ public static function isPublic( $ip ) {
+ static $privateSet = null;
+ if ( !$privateSet ) {
+ $privateSet = new IPSet( array(
+ '10.0.0.0/8', # RFC 1918 (private)
+ '172.16.0.0/12', # RFC 1918 (private)
+ '192.168.0.0/16', # RFC 1918 (private)
+ '0.0.0.0/8', # this network
+ '127.0.0.0/8', # loopback
+ 'fc00::/7', # RFC 4193 (local)
+ '0:0:0:0:0:0:0:1', # loopback
+ ) );
+ }
+ return !$privateSet->match( $ip );
+ }
+
+ /**
+ * Return a zero-padded upper case hexadecimal representation of an IP address.
+ *
+ * Hexadecimal addresses are used because they can easily be extended to
+ * IPv6 support. To separate the ranges, the return value from this
+ * function for an IPv6 address will be prefixed with "v6-", a non-
+ * hexadecimal string which sorts after the IPv4 addresses.
+ *
+ * @param string $ip Quad dotted/octet IP address.
+ * @return string|bool False on failure
+ */
+ public static function toHex( $ip ) {
+ if ( self::isIPv6( $ip ) ) {
+ $n = 'v6-' . self::IPv6ToRawHex( $ip );
+ } elseif ( self::isIPv4( $ip ) ) {
+ // Bug 60035: an IP with leading 0's fails in ip2long sometimes (e.g. *.08)
+ $ip = preg_replace( '/(?<=\.)0+(?=[1-9])/', '', $ip );
+ $n = ip2long( $ip );
+ if ( $n < 0 ) {
+ $n += pow( 2, 32 );
+ # On 32-bit platforms (and on Windows), 2^32 does not fit into an int,
+ # so $n becomes a float. We convert it to string instead.
+ if ( is_float( $n ) ) {
+ $n = (string)$n;
+ }
+ }
+ if ( $n !== false ) {
+ # Floating points can handle the conversion; faster than wfBaseConvert()
+ $n = strtoupper( str_pad( base_convert( $n, 10, 16 ), 8, '0', STR_PAD_LEFT ) );
+ }
+ } else {
+ $n = false;
+ }
+
+ return $n;
+ }
+
+ /**
+ * Given an IPv6 address in octet notation, returns a pure hex string.
+ *
+ * @param string $ip Octet ipv6 IP address.
+ * @return string|bool Pure hex (uppercase); false on failure
+ */
+ private static function IPv6ToRawHex( $ip ) {
+ $ip = self::sanitizeIP( $ip );
+ if ( !$ip ) {
+ return false;
+ }
+ $r_ip = '';
+ foreach ( explode( ':', $ip ) as $v ) {
+ $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT );
+ }
+
+ return $r_ip;
+ }
+
+ /**
+ * Convert a network specification in CIDR notation
+ * to an integer network and a number of bits
+ *
+ * @param string $range IP with CIDR prefix
+ * @return array(int or string, int)
+ */
+ public static function parseCIDR( $range ) {
+ if ( self::isIPv6( $range ) ) {
+ return self::parseCIDR6( $range );
+ }
+ $parts = explode( '/', $range, 2 );
+ if ( count( $parts ) != 2 ) {
+ return array( false, false );
+ }
+ list( $network, $bits ) = $parts;
+ $network = ip2long( $network );
+ if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) {
+ if ( $bits == 0 ) {
+ $network = 0;
+ } else {
+ $network &= ~( ( 1 << ( 32 - $bits ) ) - 1 );
+ }
+ # Convert to unsigned
+ if ( $network < 0 ) {
+ $network += pow( 2, 32 );
+ }
+ } else {
+ $network = false;
+ $bits = false;
+ }
+
+ return array( $network, $bits );
+ }
+
+ /**
+ * Given a string range in a number of formats,
+ * return the start and end of the range in hexadecimal.
+ *
+ * Formats are:
+ * 1.2.3.4/24 CIDR
+ * 1.2.3.4 - 1.2.3.5 Explicit range
+ * 1.2.3.4 Single IP
+ *
+ * 2001:0db8:85a3::7344/96 CIDR
+ * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
+ * 2001:0db8:85a3::7344 Single IP
+ * @param string $range IP range
+ * @return array(string, string)
+ */
+ public static function parseRange( $range ) {
+ // CIDR notation
+ if ( strpos( $range, '/' ) !== false ) {
+ if ( self::isIPv6( $range ) ) {
+ return self::parseRange6( $range );
+ }
+ list( $network, $bits ) = self::parseCIDR( $range );
+ if ( $network === false ) {
+ $start = $end = false;
+ } else {
+ $start = sprintf( '%08X', $network );
+ $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 );
+ }
+ // Explicit range
+ } elseif ( strpos( $range, '-' ) !== false ) {
+ list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
+ if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) {
+ return self::parseRange6( $range );
+ }
+ if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) {
+ $start = self::toHex( $start );
+ $end = self::toHex( $end );
+ if ( $start > $end ) {
+ $start = $end = false;
+ }
+ } else {
+ $start = $end = false;
+ }
+ } else {
+ # Single IP
+ $start = $end = self::toHex( $range );
+ }
+ if ( $start === false || $end === false ) {
+ return array( false, false );
+ } else {
+ return array( $start, $end );
+ }
+ }
+
+ /**
+ * Convert a network specification in IPv6 CIDR notation to an
+ * integer network and a number of bits
+ *
+ * @param string $range
+ *
+ * @return array(string, int)
+ */
+ private static function parseCIDR6( $range ) {
+ # Explode into <expanded IP,range>
+ $parts = explode( '/', IP::sanitizeIP( $range ), 2 );
+ if ( count( $parts ) != 2 ) {
+ return array( false, false );
+ }
+ list( $network, $bits ) = $parts;
+ $network = self::IPv6ToRawHex( $network );
+ if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) {
+ if ( $bits == 0 ) {
+ $network = "0";
+ } else {
+ # Native 32 bit functions WONT work here!!!
+ # Convert to a padded binary number
+ $network = wfBaseConvert( $network, 16, 2, 128 );
+ # Truncate the last (128-$bits) bits and replace them with zeros
+ $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT );
+ # Convert back to an integer
+ $network = wfBaseConvert( $network, 2, 10 );
+ }
+ } else {
+ $network = false;
+ $bits = false;
+ }
+
+ return array( $network, (int)$bits );
+ }
+
+ /**
+ * Given a string range in a number of formats, return the
+ * start and end of the range in hexadecimal. For IPv6.
+ *
+ * Formats are:
+ * 2001:0db8:85a3::7344/96 CIDR
+ * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
+ * 2001:0db8:85a3::7344/96 Single IP
+ *
+ * @param string $range
+ *
+ * @return array(string, string)
+ */
+ private static function parseRange6( $range ) {
+ # Expand any IPv6 IP
+ $range = IP::sanitizeIP( $range );
+ // CIDR notation...
+ if ( strpos( $range, '/' ) !== false ) {
+ list( $network, $bits ) = self::parseCIDR6( $range );
+ if ( $network === false ) {
+ $start = $end = false;
+ } else {
+ $start = wfBaseConvert( $network, 10, 16, 32, false );
+ # Turn network to binary (again)
+ $end = wfBaseConvert( $network, 10, 2, 128 );
+ # Truncate the last (128-$bits) bits and replace them with ones
+ $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT );
+ # Convert to hex
+ $end = wfBaseConvert( $end, 2, 16, 32, false );
+ # see toHex() comment
+ $start = "v6-$start";
+ $end = "v6-$end";
+ }
+ // Explicit range notation...
+ } elseif ( strpos( $range, '-' ) !== false ) {
+ list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
+ $start = self::toHex( $start );
+ $end = self::toHex( $end );
+ if ( $start > $end ) {
+ $start = $end = false;
+ }
+ } else {
+ # Single IP
+ $start = $end = self::toHex( $range );
+ }
+ if ( $start === false || $end === false ) {
+ return array( false, false );
+ } else {
+ return array( $start, $end );
+ }
+ }
+
+ /**
+ * Determine if a given IPv4/IPv6 address is in a given CIDR network
+ *
+ * @param string $addr The address to check against the given range.
+ * @param string $range The range to check the given address against.
+ * @return bool Whether or not the given address is in the given range.
+ */
+ public static function isInRange( $addr, $range ) {
+ $hexIP = self::toHex( $addr );
+ list( $start, $end ) = self::parseRange( $range );
+
+ return ( strcmp( $hexIP, $start ) >= 0 &&
+ strcmp( $hexIP, $end ) <= 0 );
+ }
+
+ /**
+ * Convert some unusual representations of IPv4 addresses to their
+ * canonical dotted quad representation.
+ *
+ * This currently only checks a few IPV4-to-IPv6 related cases. More
+ * unusual representations may be added later.
+ *
+ * @param string $addr Something that might be an IP address
+ * @return string Valid dotted quad IPv4 address or null
+ */
+ public static function canonicalize( $addr ) {
+ // remove zone info (bug 35738)
+ $addr = preg_replace( '/\%.*/', '', $addr );
+
+ if ( self::isValid( $addr ) ) {
+ return $addr;
+ }
+ // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4
+ if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) {
+ $addr = substr( $addr, strrpos( $addr, ':' ) + 1 );
+ if ( self::isIPv4( $addr ) ) {
+ return $addr;
+ }
+ }
+ // IPv6 loopback address
+ $m = array();
+ if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) {
+ return '127.0.0.1';
+ }
+ // IPv4-mapped and IPv4-compatible IPv6 addresses
+ if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) {
+ return $m[1];
+ }
+ if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD .
+ ':' . RE_IPV6_WORD . '$/i', $addr, $m )
+ ) {
+ return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) );
+ }
+
+ return null; // give up
+ }
+
+ /**
+ * Gets rid of unneeded numbers in quad-dotted/octet IP strings
+ * For example, 127.111.113.151/24 -> 127.111.113.0/24
+ * @param string $range IP address to normalize
+ * @return string
+ */
+ public static function sanitizeRange( $range ) {
+ list( /*...*/, $bits ) = self::parseCIDR( $range );
+ list( $start, /*...*/ ) = self::parseRange( $range );
+ $start = self::formatHex( $start );
+ if ( $bits === false ) {
+ return $start; // wasn't actually a range
+ }
+
+ return "$start/$bits";
+ }
+
+ /**
+ * Checks if an IP is a trusted proxy provider.
+ * Useful to tell if X-Forwarded-For data is possibly bogus.
+ * Squid cache servers for the site are whitelisted.
+ * @since 1.24
+ *
+ * @param string $ip
+ * @return bool
+ */
+ public static function isTrustedProxy( $ip ) {
+ $trusted = self::isConfiguredProxy( $ip );
+ wfRunHooks( 'IsTrustedProxy', array( &$ip, &$trusted ) );
+ return $trusted;
+ }
+
+ /**
+ * Checks if an IP matches a proxy we've configured
+ * @since 1.24
+ *
+ * @param string $ip
+ * @return bool
+ */
+ public static function isConfiguredProxy( $ip ) {
+ global $wgSquidServers, $wgSquidServersNoPurge;
+
+ wfProfileIn( __METHOD__ );
+ // Quick check of known singular proxy servers
+ $trusted = in_array( $ip, $wgSquidServers );
+
+ // Check against addresses and CIDR nets in the NoPurge list
+ if ( !$trusted ) {
+ if ( !self::$proxyIpSet ) {
+ self::$proxyIpSet = new IPSet( $wgSquidServersNoPurge );
+ }
+ $trusted = self::$proxyIpSet->match( $ip );
+ }
+ wfProfileOut( __METHOD__ );
+
+ return $trusted;
+ }
+
+ /**
+ * Clears precomputed data used for proxy support.
+ * Use this only for unit tests.
+ */
+ public static function clearCaches() {
+ self::$proxyIpSet = null;
+ }
+}
diff --git a/includes/utils/MWCryptHKDF.php b/includes/utils/MWCryptHKDF.php
new file mode 100644
index 00000000..cc136793
--- /dev/null
+++ b/includes/utils/MWCryptHKDF.php
@@ -0,0 +1,332 @@
+<?php
+/**
+ * Extract-and-Expand Key Derivation Function (HKDF). A cryptographicly
+ * secure key expansion function based on RFC 5869.
+ *
+ * This relies on the secrecy of $wgSecretKey (by default), or $wgHKDFSecret.
+ * By default, sha256 is used as the underlying hashing algorithm, but any other
+ * algorithm can be used. Finding the secret key from the output would require
+ * an attacker to discover the input key (the PRK) to the hmac that generated
+ * the output, and discover the particular data, hmac'ed with an evolving key
+ * (salt), to produce the PRK. Even with md5, no publicly known attacks make
+ * this currently feasible.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Chris Steipp
+ * @file
+ */
+
+class MWCryptHKDF {
+
+ /**
+ * Singleton instance for public use
+ */
+ protected static $singleton = null;
+
+ /**
+ * The persistant cache
+ */
+ protected $cache = null;
+
+ /**
+ * Cache key we'll use for our salt
+ */
+ protected $cacheKey = null;
+
+ /**
+ * The hash algorithm being used
+ */
+ protected $algorithm = null;
+
+ /**
+ * binary string, the salt for the HKDF
+ */
+ protected $salt;
+
+ /**
+ * The pseudorandom key
+ */
+ private $prk;
+
+ /**
+ * The secret key material. This must be kept secret to preserve
+ * the security properties of this RNG.
+ */
+ private $skm;
+
+ /**
+ * The last block (K(i)) of the most recent expanded key
+ */
+ protected $lastK;
+
+ /**
+ * a "context information" string CTXinfo (which may be null)
+ * See http://eprint.iacr.org/2010/264.pdf Section 4.1
+ */
+ protected $context = array();
+
+ /**
+ * Round count is computed based on the hash'es output length,
+ * which neither php nor openssl seem to provide easily.
+ */
+ public static $hashLength = array(
+ 'md5' => 16,
+ 'sha1' => 20,
+ 'sha224' => 28,
+ 'sha256' => 32,
+ 'sha384' => 48,
+ 'sha512' => 64,
+ 'ripemd128' => 16,
+ 'ripemd160' => 20,
+ 'ripemd256' => 32,
+ 'ripemd320' => 40,
+ 'whirlpool' => 64,
+ );
+
+
+ /**
+ * @param string $secretKeyMaterial
+ * @param string $algorithm Name of hashing algorithm
+ * @param BagOStuff $cache
+ * @param string|array $context Context to mix into HKDF context
+ */
+ public function __construct( $secretKeyMaterial, $algorithm, $cache, $context ) {
+ if ( strlen( $secretKeyMaterial ) < 16 ) {
+ throw new MWException( "MWCryptHKDF secret was too short." );
+ }
+ $this->skm = $secretKeyMaterial;
+ $this->algorithm = $algorithm;
+ $this->cache = $cache;
+ $this->salt = ''; // Initialize a blank salt, see getSaltUsingCache()
+ $this->prk = '';
+ $this->context = is_array( $context ) ? $context : array( $context );
+
+ // To prevent every call from hitting the same memcache server, pick
+ // from a set of keys to use. mt_rand is only use to pick a random
+ // server, and does not affect the security of the process.
+ $this->cacheKey = wfMemcKey( 'HKDF', mt_rand( 0, 16 ) );
+ }
+
+ /**
+ * Save the last block generated, so the next user will compute a different PRK
+ * from the same SKM. This should keep things unpredictable even if an attacker
+ * is able to influence CTXinfo.
+ */
+ function __destruct() {
+ if ( $this->lastK ) {
+ $this->cache->set( $this->cacheKey, $this->lastK );
+ }
+ }
+
+ /**
+ * MW specific salt, cached from last run
+ * @return string Binary string
+ */
+ protected function getSaltUsingCache() {
+ if ( $this->salt == '' ) {
+ $lastSalt = $this->cache->get( $this->cacheKey );
+ if ( $lastSalt === false ) {
+ // If we don't have a previous value to use as our salt, we use
+ // 16 bytes from MWCryptRand, which will use a small amount of
+ // entropy from our pool. Note, "XTR may be deterministic or keyed
+ // via an optional “salt value” (i.e., a non-secret random
+ // value)..." - http://eprint.iacr.org/2010/264.pdf. However, we
+ // use a strongly random value since we can.
+ $lastSalt = MWCryptRand::generate( 16 );
+ }
+ // Get a binary string that is hashLen long
+ $this->salt = hash( $this->algorithm, $lastSalt, true );
+ }
+ return $this->salt;
+ }
+
+ /**
+ * Return a singleton instance, based on the global configs.
+ * @return HKDF
+ */
+ protected static function singleton() {
+ global $wgHKDFAlgorithm, $wgHKDFSecret, $wgSecretKey;
+
+ $secret = $wgHKDFSecret ?: $wgSecretKey;
+ if ( !$secret ) {
+ throw new MWException( "Cannot use MWCryptHKDF without a secret." );
+ }
+
+ // In HKDF, the context can be known to the attacker, but this will
+ // keep simultaneous runs from producing the same output.
+ $context = array();
+ $context[] = microtime();
+ $context[] = getmypid();
+ $context[] = gethostname();
+
+ // Setup salt cache. Use APC, or fallback to the main cache if it isn't setup
+ try {
+ $cache = ObjectCache::newAccelerator( array() );
+ } catch ( Exception $e ) {
+ $cache = wfGetMainCache();
+ }
+
+ if ( is_null( self::$singleton ) ) {
+ self::$singleton = new self( $secret, $wgHKDFAlgorithm, $cache, $context );
+ }
+
+ return self::$singleton;
+ }
+
+ /**
+ * Produce $bytes of secure random data. As a side-effect,
+ * $this->lastK is set to the last hashLen block of key material.
+ * @param int $bytes Number of bytes of data
+ * @param string $context Context to mix into CTXinfo
+ * @return string Binary string of length $bytes
+ */
+ protected function realGenerate( $bytes, $context = '' ) {
+
+ if ( $this->prk === '' ) {
+ $salt = $this->getSaltUsingCache();
+ $this->prk = self::HKDFExtract(
+ $this->algorithm,
+ $salt,
+ $this->skm
+ );
+ }
+
+ $CTXinfo = implode( ':', array_merge( $this->context, array( $context ) ) );
+
+ return self::HKDFExpand(
+ $this->algorithm,
+ $this->prk,
+ $CTXinfo,
+ $bytes,
+ $this->lastK
+ );
+ }
+
+
+ /**
+ * RFC5869 defines HKDF in 2 steps, extraction and expansion.
+ * From http://eprint.iacr.org/2010/264.pdf:
+ *
+ * The scheme HKDF is specifed as:
+ * HKDF(XTS, SKM, CTXinfo, L) = K(1) || K(2) || ... || K(t)
+ * where the values K(i) are defined as follows:
+ * PRK = HMAC(XTS, SKM)
+ * K(1) = HMAC(PRK, CTXinfo || 0);
+ * K(i+1) = HMAC(PRK, K(i) || CTXinfo || i), 1 <= i < t;
+ * where t = [L/k] and the value K(t) is truncated to its first d = L mod k bits;
+ * the counter i is non-wrapping and of a given fixed size, e.g., a single byte.
+ * Note that the length of the HMAC output is the same as its key length and therefore
+ * the scheme is well defined.
+ *
+ * XTS is the "extractor salt"
+ * SKM is the "secret keying material"
+ *
+ * N.B. http://eprint.iacr.org/2010/264.pdf seems to differ from RFC 5869 in that the test
+ * vectors from RFC 5869 only work if K(0) = '' and K(1) = HMAC(PRK, K(0) || CTXinfo || 1)
+ *
+ * @param string $hash The hashing function to use (e.g., sha256)
+ * @param string $ikm The input keying material
+ * @param string $salt The salt to add to the ikm, to get the prk
+ * @param string $info Optional context (change the output without affecting
+ * the randomness properties of the output)
+ * @param int $L Number of bytes to return
+ * @return string Cryptographically secure pseudorandom binary string
+ */
+ public static function HKDF( $hash, $ikm, $salt, $info, $L ) {
+ $prk = self::HKDFExtract( $hash, $salt, $ikm );
+ $okm = self::HKDFExpand( $hash, $prk, $info, $L );
+ return $okm;
+ }
+
+ /**
+ * Extract the PRK, PRK = HMAC(XTS, SKM)
+ * Note that the hmac is keyed with XTS (the salt),
+ * and the SKM (source key material) is the "data".
+ *
+ * @param string $hash The hashing function to use (e.g., sha256)
+ * @param string $salt The salt to add to the ikm, to get the prk
+ * @param string $ikm The input keying material
+ * @return string Binary string (pseudorandm key) used as input to HKDFExpand
+ */
+ private static function HKDFExtract( $hash, $salt, $ikm ) {
+ return hash_hmac( $hash, $ikm, $salt, true );
+ }
+
+ /**
+ * Expand the key with the given context
+ *
+ * @param string $hash Hashing Algorithm
+ * @param string $prk A pseudorandom key of at least HashLen octets
+ * (usually, the output from the extract step)
+ * @param string $info Optional context and application specific information
+ * (can be a zero-length string)
+ * @param int $bytes Length of output keying material in bytes
+ * (<= 255*HashLen)
+ * @param string &$lastK Set by this function to the last block of the expansion.
+ * In MediaWiki, this is used to seed future Extractions.
+ * @return string Cryptographically secure random string $bytes long
+ */
+ private static function HKDFExpand( $hash, $prk, $info, $bytes, &$lastK = '' ) {
+ $hashLen = MWCryptHKDF::$hashLength[$hash];
+ $rounds = ceil( $bytes / $hashLen );
+ $output = '';
+
+ if ( $bytes > 255 * $hashLen ) {
+ throw new MWException( "Too many bytes requested from HDKFExpand" );
+ }
+
+ // K(1) = HMAC(PRK, CTXinfo || 1);
+ // K(i) = HMAC(PRK, K(i-1) || CTXinfo || i); 1 < i <= t;
+ for ( $counter = 1; $counter <= $rounds; ++$counter ) {
+ $lastK = hash_hmac(
+ $hash,
+ $lastK . $info . chr( $counter ),
+ $prk,
+ true
+ );
+ $output .= $lastK;
+ }
+
+ return substr( $output, 0, $bytes );
+ }
+
+ /**
+ * Generate cryptographically random data and return it in raw binary form.
+ *
+ * @param int $bytes The number of bytes of random data to generate
+ * @param string $context String to mix into HMAC context
+ * @return string Binary string of length $bytes
+ */
+ public static function generate( $bytes, $context ) {
+ return self::singleton()->realGenerate( $bytes, $context );
+ }
+
+ /**
+ * Generate cryptographically random data and return it in hexadecimal string format.
+ * See MWCryptRand::realGenerateHex for details of the char-to-byte conversion logic.
+ *
+ * @param int $chars The number of hex chars of random data to generate
+ * @param string $context String to mix into HMAC context
+ * @return string Random hex characters, $chars long
+ */
+ public static function generateHex( $chars, $context = '' ) {
+ $bytes = ceil( $chars / 2 );
+ $hex = bin2hex( self::singleton()->realGenerate( $bytes, $context ) );
+ return substr( $hex, 0, $chars );
+ }
+
+}
diff --git a/includes/utils/MWCryptRand.php b/includes/utils/MWCryptRand.php
new file mode 100644
index 00000000..b602f78e
--- /dev/null
+++ b/includes/utils/MWCryptRand.php
@@ -0,0 +1,516 @@
+<?php
+/**
+ * A cryptographic random generator class used for generating secret keys
+ *
+ * This is based in part on Drupal code as well as what we used in our own code
+ * prior to introduction of this class.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Daniel Friesen
+ * @file
+ */
+
+class MWCryptRand {
+ /**
+ * Minimum number of iterations we want to make in our drift calculations.
+ */
+ const MIN_ITERATIONS = 1000;
+
+ /**
+ * Number of milliseconds we want to spend generating each separate byte
+ * of the final generated bytes.
+ * This is used in combination with the hash length to determine the duration
+ * we should spend doing drift calculations.
+ */
+ const MSEC_PER_BYTE = 0.5;
+
+ /**
+ * Singleton instance for public use
+ */
+ protected static $singleton = null;
+
+ /**
+ * The hash algorithm being used
+ */
+ protected $algo = null;
+
+ /**
+ * The number of bytes outputted by the hash algorithm
+ */
+ protected $hashLength = null;
+
+ /**
+ * A boolean indicating whether the previous random generation was done using
+ * cryptographically strong random number generator or not.
+ */
+ protected $strong = null;
+
+ /**
+ * Initialize an initial random state based off of whatever we can find
+ * @return string
+ */
+ protected function initialRandomState() {
+ // $_SERVER contains a variety of unstable user and system specific information
+ // It'll vary a little with each page, and vary even more with separate users
+ // It'll also vary slightly across different machines
+ $state = serialize( $_SERVER );
+
+ // To try vary the system information of the state a bit more
+ // by including the system's hostname into the state
+ $state .= wfHostname();
+
+ // Try to gather a little entropy from the different php rand sources
+ $state .= rand() . uniqid( mt_rand(), true );
+
+ // Include some information about the filesystem's current state in the random state
+ $files = array();
+
+ // We know this file is here so grab some info about ourselves
+ $files[] = __FILE__;
+
+ // We must also have a parent folder, and with the usual file structure, a grandparent
+ $files[] = __DIR__;
+ $files[] = dirname( __DIR__ );
+
+ // The config file is likely the most often edited file we know should
+ // be around so include its stat info into the state.
+ // The constant with its location will almost always be defined, as
+ // WebStart.php defines MW_CONFIG_FILE to $IP/LocalSettings.php unless
+ // being configured with MW_CONFIG_CALLBACK (e.g. the installer).
+ if ( defined( 'MW_CONFIG_FILE' ) ) {
+ $files[] = MW_CONFIG_FILE;
+ }
+
+ foreach ( $files as $file ) {
+ wfSuppressWarnings();
+ $stat = stat( $file );
+ wfRestoreWarnings();
+ if ( $stat ) {
+ // stat() duplicates data into numeric and string keys so kill off all the numeric ones
+ foreach ( $stat as $k => $v ) {
+ if ( is_numeric( $k ) ) {
+ unset( $k );
+ }
+ }
+ // The absolute filename itself will differ from install to install so don't leave it out
+ if ( ( $path = realpath( $file ) ) !== false ) {
+ $state .= $path;
+ } else {
+ $state .= $file;
+ }
+ $state .= implode( '', $stat );
+ } else {
+ // The fact that the file isn't there is worth at least a
+ // minuscule amount of entropy.
+ $state .= '0';
+ }
+ }
+
+ // Try and make this a little more unstable by including the varying process
+ // id of the php process we are running inside of if we are able to access it
+ if ( function_exists( 'getmypid' ) ) {
+ $state .= getmypid();
+ }
+
+ // If available try to increase the instability of the data by throwing in
+ // the precise amount of memory that we happen to be using at the moment.
+ if ( function_exists( 'memory_get_usage' ) ) {
+ $state .= memory_get_usage( true );
+ }
+
+ // It's mostly worthless but throw the wiki's id into the data for a little more variance
+ $state .= wfWikiID();
+
+ // If we have a secret key set then throw it into the state as well
+ global $wgSecretKey;
+ if ( $wgSecretKey ) {
+ $state .= $wgSecretKey;
+ }
+
+ return $state;
+ }
+
+ /**
+ * Randomly hash data while mixing in clock drift data for randomness
+ *
+ * @param string $data The data to randomly hash.
+ * @return string The hashed bytes
+ * @author Tim Starling
+ */
+ protected function driftHash( $data ) {
+ // Minimum number of iterations (to avoid slow operations causing the
+ // loop to gather little entropy)
+ $minIterations = self::MIN_ITERATIONS;
+ // Duration of time to spend doing calculations (in seconds)
+ $duration = ( self::MSEC_PER_BYTE / 1000 ) * $this->hashLength();
+ // Create a buffer to use to trigger memory operations
+ $bufLength = 10000000;
+ $buffer = str_repeat( ' ', $bufLength );
+ $bufPos = 0;
+
+ // Iterate for $duration seconds or at least $minIterations number of iterations
+ $iterations = 0;
+ $startTime = microtime( true );
+ $currentTime = $startTime;
+ while ( $iterations < $minIterations || $currentTime - $startTime < $duration ) {
+ // Trigger some memory writing to trigger some bus activity
+ // This may create variance in the time between iterations
+ $bufPos = ( $bufPos + 13 ) % $bufLength;
+ $buffer[$bufPos] = ' ';
+ // Add the drift between this iteration and the last in as entropy
+ $nextTime = microtime( true );
+ $delta = (int)( ( $nextTime - $currentTime ) * 1000000 );
+ $data .= $delta;
+ // Every 100 iterations hash the data and entropy
+ if ( $iterations % 100 === 0 ) {
+ $data = sha1( $data );
+ }
+ $currentTime = $nextTime;
+ $iterations++;
+ }
+ $timeTaken = $currentTime - $startTime;
+ $data = $this->hash( $data );
+
+ wfDebug( __METHOD__ . ": Clock drift calculation " .
+ "(time-taken=" . ( $timeTaken * 1000 ) . "ms, " .
+ "iterations=$iterations, " .
+ "time-per-iteration=" . ( $timeTaken / $iterations * 1e6 ) . "us)\n" );
+
+ return $data;
+ }
+
+ /**
+ * Return a rolling random state initially build using data from unstable sources
+ * @return string A new weak random state
+ */
+ protected function randomState() {
+ static $state = null;
+ if ( is_null( $state ) ) {
+ // Initialize the state with whatever unstable data we can find
+ // It's important that this data is hashed right afterwards to prevent
+ // it from being leaked into the output stream
+ $state = $this->hash( $this->initialRandomState() );
+ }
+ // Generate a new random state based on the initial random state or previous
+ // random state by combining it with clock drift
+ $state = $this->driftHash( $state );
+
+ return $state;
+ }
+
+ /**
+ * Decide on the best acceptable hash algorithm we have available for hash()
+ * @throws MWException
+ * @return string A hash algorithm
+ */
+ protected function hashAlgo() {
+ if ( !is_null( $this->algo ) ) {
+ return $this->algo;
+ }
+
+ $algos = hash_algos();
+ $preference = array( 'whirlpool', 'sha256', 'sha1', 'md5' );
+
+ foreach ( $preference as $algorithm ) {
+ if ( in_array( $algorithm, $algos ) ) {
+ $this->algo = $algorithm;
+ wfDebug( __METHOD__ . ": Using the {$this->algo} hash algorithm.\n" );
+
+ return $this->algo;
+ }
+ }
+
+ // We only reach here if no acceptable hash is found in the list, this should
+ // be a technical impossibility since most of php's hash list is fixed and
+ // some of the ones we list are available as their own native functions
+ // But since we already require at least 5.2 and hash() was default in
+ // 5.1.2 we don't bother falling back to methods like sha1 and md5.
+ throw new MWException( "Could not find an acceptable hashing function in hash_algos()" );
+ }
+
+ /**
+ * Return the byte-length output of the hash algorithm we are
+ * using in self::hash and self::hmac.
+ *
+ * @return int Number of bytes the hash outputs
+ */
+ protected function hashLength() {
+ if ( is_null( $this->hashLength ) ) {
+ $this->hashLength = strlen( $this->hash( '' ) );
+ }
+
+ return $this->hashLength;
+ }
+
+ /**
+ * Generate an acceptably unstable one-way-hash of some text
+ * making use of the best hash algorithm that we have available.
+ *
+ * @param string $data
+ * @return string A raw hash of the data
+ */
+ protected function hash( $data ) {
+ return hash( $this->hashAlgo(), $data, true );
+ }
+
+ /**
+ * Generate an acceptably unstable one-way-hmac of some text
+ * making use of the best hash algorithm that we have available.
+ *
+ * @param string $data
+ * @param string $key
+ * @return string A raw hash of the data
+ */
+ protected function hmac( $data, $key ) {
+ return hash_hmac( $this->hashAlgo(), $data, $key, true );
+ }
+
+ /**
+ * @see self::wasStrong()
+ */
+ public function realWasStrong() {
+ if ( is_null( $this->strong ) ) {
+ throw new MWException( __METHOD__ . ' called before generation of random data' );
+ }
+
+ return $this->strong;
+ }
+
+ /**
+ * @see self::generate()
+ */
+ public function realGenerate( $bytes, $forceStrong = false ) {
+ wfProfileIn( __METHOD__ );
+
+ wfDebug( __METHOD__ . ": Generating cryptographic random bytes for " .
+ wfGetAllCallers( 5 ) . "\n" );
+
+ $bytes = floor( $bytes );
+ static $buffer = '';
+ if ( is_null( $this->strong ) ) {
+ // Set strength to false initially until we know what source data is coming from
+ $this->strong = true;
+ }
+
+ if ( strlen( $buffer ) < $bytes ) {
+ // If available make use of mcrypt_create_iv URANDOM source to generate randomness
+ // On unix-like systems this reads from /dev/urandom but does it without any buffering
+ // and bypasses openbasedir restrictions, so it's preferable to reading directly
+ // On Windows starting in PHP 5.3.0 Windows' native CryptGenRandom is used to generate
+ // entropy so this is also preferable to just trying to read urandom because it may work
+ // on Windows systems as well.
+ if ( function_exists( 'mcrypt_create_iv' ) ) {
+ wfProfileIn( __METHOD__ . '-mcrypt' );
+ $rem = $bytes - strlen( $buffer );
+ $iv = mcrypt_create_iv( $rem, MCRYPT_DEV_URANDOM );
+ if ( $iv === false ) {
+ wfDebug( __METHOD__ . ": mcrypt_create_iv returned false.\n" );
+ } else {
+ $buffer .= $iv;
+ wfDebug( __METHOD__ . ": mcrypt_create_iv generated " . strlen( $iv ) .
+ " bytes of randomness.\n" );
+ }
+ wfProfileOut( __METHOD__ . '-mcrypt' );
+ }
+ }
+
+ if ( strlen( $buffer ) < $bytes ) {
+ // If available make use of openssl's random_pseudo_bytes method to
+ // attempt to generate randomness. However don't do this on Windows
+ // with PHP < 5.3.4 due to a bug:
+ // http://stackoverflow.com/questions/1940168/openssl-random-pseudo-bytes-is-slow-php
+ // http://git.php.net/?p=php-src.git;a=commitdiff;h=cd62a70863c261b07f6dadedad9464f7e213cad5
+ if ( function_exists( 'openssl_random_pseudo_bytes' )
+ && ( !wfIsWindows() || version_compare( PHP_VERSION, '5.3.4', '>=' ) )
+ ) {
+ wfProfileIn( __METHOD__ . '-openssl' );
+ $rem = $bytes - strlen( $buffer );
+ $openssl_bytes = openssl_random_pseudo_bytes( $rem, $openssl_strong );
+ if ( $openssl_bytes === false ) {
+ wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes returned false.\n" );
+ } else {
+ $buffer .= $openssl_bytes;
+ wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes generated " .
+ strlen( $openssl_bytes ) . " bytes of " .
+ ( $openssl_strong ? "strong" : "weak" ) . " randomness.\n" );
+ }
+ if ( strlen( $buffer ) >= $bytes ) {
+ // openssl tells us if the random source was strong, if some of our data was generated
+ // using it use it's say on whether the randomness is strong
+ $this->strong = !!$openssl_strong;
+ }
+ wfProfileOut( __METHOD__ . '-openssl' );
+ }
+ }
+
+ // Only read from urandom if we can control the buffer size or were passed forceStrong
+ if ( strlen( $buffer ) < $bytes &&
+ ( function_exists( 'stream_set_read_buffer' ) || $forceStrong )
+ ) {
+ wfProfileIn( __METHOD__ . '-fopen-urandom' );
+ $rem = $bytes - strlen( $buffer );
+ if ( !function_exists( 'stream_set_read_buffer' ) && $forceStrong ) {
+ wfDebug( __METHOD__ . ": Was forced to read from /dev/urandom " .
+ "without control over the buffer size.\n" );
+ }
+ // /dev/urandom is generally considered the best possible commonly
+ // available random source, and is available on most *nix systems.
+ wfSuppressWarnings();
+ $urandom = fopen( "/dev/urandom", "rb" );
+ wfRestoreWarnings();
+
+ // Attempt to read all our random data from urandom
+ // php's fread always does buffered reads based on the stream's chunk_size
+ // so in reality it will usually read more than the amount of data we're
+ // asked for and not storing that risks depleting the system's random pool.
+ // If stream_set_read_buffer is available set the chunk_size to the amount
+ // of data we need. Otherwise read 8k, php's default chunk_size.
+ if ( $urandom ) {
+ // php's default chunk_size is 8k
+ $chunk_size = 1024 * 8;
+ if ( function_exists( 'stream_set_read_buffer' ) ) {
+ // If possible set the chunk_size to the amount of data we need
+ stream_set_read_buffer( $urandom, $rem );
+ $chunk_size = $rem;
+ }
+ $random_bytes = fread( $urandom, max( $chunk_size, $rem ) );
+ $buffer .= $random_bytes;
+ fclose( $urandom );
+ wfDebug( __METHOD__ . ": /dev/urandom generated " . strlen( $random_bytes ) .
+ " bytes of randomness.\n" );
+
+ if ( strlen( $buffer ) >= $bytes ) {
+ // urandom is always strong, set to true if all our data was generated using it
+ $this->strong = true;
+ }
+ } else {
+ wfDebug( __METHOD__ . ": /dev/urandom could not be opened.\n" );
+ }
+ wfProfileOut( __METHOD__ . '-fopen-urandom' );
+ }
+
+ // If we cannot use or generate enough data from a secure source
+ // use this loop to generate a good set of pseudo random data.
+ // This works by initializing a random state using a pile of unstable data
+ // and continually shoving it through a hash along with a variable salt.
+ // We hash the random state with more salt to avoid the state from leaking
+ // out and being used to predict the /randomness/ that follows.
+ if ( strlen( $buffer ) < $bytes ) {
+ wfDebug( __METHOD__ .
+ ": Falling back to using a pseudo random state to generate randomness.\n" );
+ }
+ while ( strlen( $buffer ) < $bytes ) {
+ wfProfileIn( __METHOD__ . '-fallback' );
+ $buffer .= $this->hmac( $this->randomState(), mt_rand() );
+ // This code is never really cryptographically strong, if we use it
+ // at all, then set strong to false.
+ $this->strong = false;
+ wfProfileOut( __METHOD__ . '-fallback' );
+ }
+
+ // Once the buffer has been filled up with enough random data to fulfill
+ // the request shift off enough data to handle the request and leave the
+ // unused portion left inside the buffer for the next request for random data
+ $generated = substr( $buffer, 0, $bytes );
+ $buffer = substr( $buffer, $bytes );
+
+ wfDebug( __METHOD__ . ": " . strlen( $buffer ) .
+ " bytes of randomness leftover in the buffer.\n" );
+
+ wfProfileOut( __METHOD__ );
+
+ return $generated;
+ }
+
+ /**
+ * @see self::generateHex()
+ */
+ public function realGenerateHex( $chars, $forceStrong = false ) {
+ // hex strings are 2x the length of raw binary so we divide the length in half
+ // odd numbers will result in a .5 that leads the generate() being 1 character
+ // short, so we use ceil() to ensure that we always have enough bytes
+ $bytes = ceil( $chars / 2 );
+ // Generate the data and then convert it to a hex string
+ $hex = bin2hex( $this->generate( $bytes, $forceStrong ) );
+
+ // A bit of paranoia here, the caller asked for a specific length of string
+ // here, and it's possible (eg when given an odd number) that we may actually
+ // have at least 1 char more than they asked for. Just in case they made this
+ // call intending to insert it into a database that does truncation we don't
+ // want to give them too much and end up with their database and their live
+ // code having two different values because part of what we gave them is truncated
+ // hence, we strip out any run of characters longer than what we were asked for.
+ return substr( $hex, 0, $chars );
+ }
+
+ /** Publicly exposed static methods **/
+
+ /**
+ * Return a singleton instance of MWCryptRand
+ * @return MWCryptRand
+ */
+ protected static function singleton() {
+ if ( is_null( self::$singleton ) ) {
+ self::$singleton = new self;
+ }
+
+ return self::$singleton;
+ }
+
+ /**
+ * Return a boolean indicating whether or not the source used for cryptographic
+ * random bytes generation in the previously run generate* call
+ * was cryptographically strong.
+ *
+ * @return bool Returns true if the source was strong, false if not.
+ */
+ public static function wasStrong() {
+ return self::singleton()->realWasStrong();
+ }
+
+ /**
+ * Generate a run of (ideally) cryptographically random data and return
+ * it in raw binary form.
+ * You can use MWCryptRand::wasStrong() if you wish to know if the source used
+ * was cryptographically strong.
+ *
+ * @param int $bytes The number of bytes of random data to generate
+ * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
+ * strong sources of entropy even if reading from them may steal
+ * more entropy from the system than optimal.
+ * @return string Raw binary random data
+ */
+ public static function generate( $bytes, $forceStrong = false ) {
+ return self::singleton()->realGenerate( $bytes, $forceStrong );
+ }
+
+ /**
+ * Generate a run of (ideally) cryptographically random data and return
+ * it in hexadecimal string format.
+ * You can use MWCryptRand::wasStrong() if you wish to know if the source used
+ * was cryptographically strong.
+ *
+ * @param int $chars The number of hex chars of random data to generate
+ * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
+ * strong sources of entropy even if reading from them may steal
+ * more entropy from the system than optimal.
+ * @return string Hexadecimal random data
+ */
+ public static function generateHex( $chars, $forceStrong = false ) {
+ return self::singleton()->realGenerateHex( $chars, $forceStrong );
+ }
+}
diff --git a/includes/utils/MWFunction.php b/includes/utils/MWFunction.php
new file mode 100644
index 00000000..3a0492dc
--- /dev/null
+++ b/includes/utils/MWFunction.php
@@ -0,0 +1,63 @@
+<?php
+/**
+ * Helper methods to call functions and instance objects.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+class MWFunction {
+
+ /**
+ * @deprecated since 1.22; use call_user_func()
+ * @param callable $callback
+ * @return mixed
+ */
+ public static function call( $callback ) {
+ wfDeprecated( __METHOD__, '1.22' );
+ $args = func_get_args();
+
+ return call_user_func_array( 'call_user_func', $args );
+ }
+
+ /**
+ * @deprecated since 1.22; use call_user_func_array()
+ * @param callable $callback
+ * @param array $argsarams
+ * @return mixed
+ */
+ public static function callArray( $callback, $argsarams ) {
+ wfDeprecated( __METHOD__, '1.22' );
+
+ return call_user_func_array( $callback, $argsarams );
+ }
+
+ /**
+ * @param string $class
+ * @param array $args
+ * @return object
+ */
+ public static function newObj( $class, $args = array() ) {
+ if ( !count( $args ) ) {
+ return new $class;
+ }
+
+ $ref = new ReflectionClass( $class );
+
+ return $ref->newInstanceArgs( $args );
+ }
+}
diff --git a/includes/utils/README b/includes/utils/README
new file mode 100644
index 00000000..b5b8ec88
--- /dev/null
+++ b/includes/utils/README
@@ -0,0 +1,9 @@
+The classes in this directory are general utilities for use by any part of
+MediaWiki. They do not favour any particular user interface and are not
+constrained to serve any particular feature. This is similar to includes/libs,
+except that some dependency on the MediaWiki framework (such as the use of
+MWException, Status or wfDebug()) disqualifies them from use outside of
+MediaWiki without modification.
+
+Utilities should not use global configuration variables, rather they should rely
+on the caller to configure their behaviour.
diff --git a/includes/utils/StringUtils.php b/includes/utils/StringUtils.php
new file mode 100644
index 00000000..86f45122
--- /dev/null
+++ b/includes/utils/StringUtils.php
@@ -0,0 +1,612 @@
+<?php
+/**
+ * Methods to play with strings.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * A collection of static methods to play with strings.
+ */
+class StringUtils {
+ /**
+ * Test whether a string is valid UTF-8.
+ *
+ * The function check for invalid byte sequences, overlong encoding but
+ * not for different normalisations.
+ *
+ * This relies internally on the mbstring function mb_check_encoding()
+ * hardcoded to check against UTF-8. Whenever the function is not available
+ * we fallback to a pure PHP implementation. Setting $disableMbstring to
+ * true will skip the use of mb_check_encoding, this is mostly intended for
+ * unit testing our internal implementation.
+ *
+ * @since 1.21
+ * @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation.
+ * In particular, the pure PHP code path did not in fact check for overlong forms.
+ * Beware of this when backporting code to that version of MediaWiki.
+ *
+ * @param string $value String to check
+ * @param bool $disableMbstring Whether to use the pure PHP
+ * implementation instead of trying mb_check_encoding. Intended for unit
+ * testing. Default: false
+ *
+ * @return bool Whether the given $value is a valid UTF-8 encoded string
+ */
+ static function isUtf8( $value, $disableMbstring = false ) {
+ $value = (string)$value;
+
+ // If the mbstring extension is loaded, use it. However, before PHP 5.4, values above
+ // U+10FFFF are incorrectly allowed, so we have to check for them separately.
+ if ( !$disableMbstring && function_exists( 'mb_check_encoding' ) ) {
+ static $newPHP;
+ if ( $newPHP === null ) {
+ $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
+ }
+
+ return mb_check_encoding( $value, 'UTF-8' ) &&
+ ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
+ }
+
+ if ( preg_match( "/[\x80-\xff]/S", $value ) === 0 ) {
+ // String contains only ASCII characters, has to be valid
+ return true;
+ }
+
+ // PCRE implements repetition using recursion; to avoid a stack overflow (and segfault)
+ // for large input, we check for invalid sequences (<= 5 bytes) rather than valid
+ // sequences, which can be as long as the input string is. Multiple short regexes are
+ // used rather than a single long regex for performance.
+ static $regexes;
+ if ( $regexes === null ) {
+ $cont = "[\x80-\xbf]";
+ $after = "(?!$cont)"; // "(?:[^\x80-\xbf]|$)" would work here
+ $regexes = array(
+ // Continuation byte at the start
+ "/^$cont/",
+
+ // ASCII byte followed by a continuation byte
+ "/[\\x00-\x7f]$cont/S",
+
+ // Illegal byte
+ "/[\xc0\xc1\xf5-\xff]/S",
+
+ // Invalid 2-byte sequence, or valid one then an extra continuation byte
+ "/[\xc2-\xdf](?!$cont$after)/S",
+
+ // Invalid 3-byte sequence, or valid one then an extra continuation byte
+ "/\xe0(?![\xa0-\xbf]$cont$after)/",
+ "/[\xe1-\xec\xee\xef](?!$cont{2}$after)/S",
+ "/\xed(?![\x80-\x9f]$cont$after)/",
+
+ // Invalid 4-byte sequence, or valid one then an extra continuation byte
+ "/\xf0(?![\x90-\xbf]$cont{2}$after)/",
+ "/[\xf1-\xf3](?!$cont{3}$after)/S",
+ "/\xf4(?![\x80-\x8f]$cont{2}$after)/",
+ );
+ }
+
+ foreach ( $regexes as $regex ) {
+ if ( preg_match( $regex, $value ) !== 0 ) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Perform an operation equivalent to
+ *
+ * preg_replace( "!$startDelim(.*?)$endDelim!", $replace, $subject );
+ *
+ * except that it's worst-case O(N) instead of O(N^2)
+ *
+ * Compared to delimiterReplace(), this implementation is fast but memory-
+ * hungry and inflexible. The memory requirements are such that I don't
+ * recommend using it on anything but guaranteed small chunks of text.
+ *
+ * @param string $startDelim
+ * @param string $endDelim
+ * @param string $replace
+ * @param string $subject
+ *
+ * @return string
+ */
+ static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
+ $segments = explode( $startDelim, $subject );
+ $output = array_shift( $segments );
+ foreach ( $segments as $s ) {
+ $endDelimPos = strpos( $s, $endDelim );
+ if ( $endDelimPos === false ) {
+ $output .= $startDelim . $s;
+ } else {
+ $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
+ }
+ }
+
+ return $output;
+ }
+
+ /**
+ * Perform an operation equivalent to
+ *
+ * preg_replace_callback( "!$startDelim(.*)$endDelim!s$flags", $callback, $subject )
+ *
+ * This implementation is slower than hungryDelimiterReplace but uses far less
+ * memory. The delimiters are literal strings, not regular expressions.
+ *
+ * If the start delimiter ends with an initial substring of the end delimiter,
+ * e.g. in the case of C-style comments, the behavior differs from the model
+ * regex. In this implementation, the end must share no characters with the
+ * start, so e.g. /*\/ is not considered to be both the start and end of a
+ * comment. /*\/xy/*\/ is considered to be a single comment with contents /xy/.
+ *
+ * @param string $startDelim Start delimiter
+ * @param string $endDelim End delimiter
+ * @param callable $callback Function to call on each match
+ * @param string $subject
+ * @param string $flags Regular expression flags
+ * @throws MWException
+ * @return string
+ */
+ static function delimiterReplaceCallback( $startDelim, $endDelim, $callback,
+ $subject, $flags = ''
+ ) {
+ $inputPos = 0;
+ $outputPos = 0;
+ $output = '';
+ $foundStart = false;
+ $encStart = preg_quote( $startDelim, '!' );
+ $encEnd = preg_quote( $endDelim, '!' );
+ $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
+ $endLength = strlen( $endDelim );
+ $m = array();
+
+ while ( $inputPos < strlen( $subject ) &&
+ preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos )
+ ) {
+ $tokenOffset = $m[0][1];
+ if ( $m[1][0] != '' ) {
+ if ( $foundStart &&
+ $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0
+ ) {
+ # An end match is present at the same location
+ $tokenType = 'end';
+ $tokenLength = $endLength;
+ } else {
+ $tokenType = 'start';
+ $tokenLength = strlen( $m[0][0] );
+ }
+ } elseif ( $m[2][0] != '' ) {
+ $tokenType = 'end';
+ $tokenLength = strlen( $m[0][0] );
+ } else {
+ throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
+ }
+
+ if ( $tokenType == 'start' ) {
+ # Only move the start position if we haven't already found a start
+ # This means that START START END matches outer pair
+ if ( !$foundStart ) {
+ # Found start
+ $inputPos = $tokenOffset + $tokenLength;
+ # Write out the non-matching section
+ $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
+ $outputPos = $tokenOffset;
+ $contentPos = $inputPos;
+ $foundStart = true;
+ } else {
+ # Move the input position past the *first character* of START,
+ # to protect against missing END when it overlaps with START
+ $inputPos = $tokenOffset + 1;
+ }
+ } elseif ( $tokenType == 'end' ) {
+ if ( $foundStart ) {
+ # Found match
+ $output .= call_user_func( $callback, array(
+ substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
+ substr( $subject, $contentPos, $tokenOffset - $contentPos )
+ ) );
+ $foundStart = false;
+ } else {
+ # Non-matching end, write it out
+ $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
+ }
+ $inputPos = $outputPos = $tokenOffset + $tokenLength;
+ } else {
+ throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
+ }
+ }
+ if ( $outputPos < strlen( $subject ) ) {
+ $output .= substr( $subject, $outputPos );
+ }
+
+ return $output;
+ }
+
+ /**
+ * Perform an operation equivalent to
+ *
+ * preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject )
+ *
+ * @param string $startDelim Start delimiter regular expression
+ * @param string $endDelim End delimiter regular expression
+ * @param string $replace Replacement string. May contain $1, which will be
+ * replaced by the text between the delimiters
+ * @param string $subject String to search
+ * @param string $flags Regular expression flags
+ * @return string The string with the matches replaced
+ */
+ static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
+ $replacer = new RegexlikeReplacer( $replace );
+
+ return self::delimiterReplaceCallback( $startDelim, $endDelim,
+ $replacer->cb(), $subject, $flags );
+ }
+
+ /**
+ * More or less "markup-safe" explode()
+ * Ignores any instances of the separator inside <...>
+ * @param string $separator
+ * @param string $text
+ * @return array
+ */
+ static function explodeMarkup( $separator, $text ) {
+ $placeholder = "\x00";
+
+ // Remove placeholder instances
+ $text = str_replace( $placeholder, '', $text );
+
+ // Replace instances of the separator inside HTML-like tags with the placeholder
+ $replacer = new DoubleReplacer( $separator, $placeholder );
+ $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
+
+ // Explode, then put the replaced separators back in
+ $items = explode( $separator, $cleaned );
+ foreach ( $items as $i => $str ) {
+ $items[$i] = str_replace( $placeholder, $separator, $str );
+ }
+
+ return $items;
+ }
+
+ /**
+ * Escape a string to make it suitable for inclusion in a preg_replace()
+ * replacement parameter.
+ *
+ * @param string $string
+ * @return string
+ */
+ static function escapeRegexReplacement( $string ) {
+ $string = str_replace( '\\', '\\\\', $string );
+ $string = str_replace( '$', '\\$', $string );
+
+ return $string;
+ }
+
+ /**
+ * Workalike for explode() with limited memory usage.
+ * Returns an Iterator
+ * @param string $separator
+ * @param string $subject
+ * @return ArrayIterator|ExplodeIterator
+ */
+ static function explode( $separator, $subject ) {
+ if ( substr_count( $subject, $separator ) > 1000 ) {
+ return new ExplodeIterator( $separator, $subject );
+ } else {
+ return new ArrayIterator( explode( $separator, $subject ) );
+ }
+ }
+}
+
+/**
+ * Base class for "replacers", objects used in preg_replace_callback() and
+ * StringUtils::delimiterReplaceCallback()
+ */
+class Replacer {
+ /**
+ * @return array
+ */
+ function cb() {
+ return array( &$this, 'replace' );
+ }
+}
+
+/**
+ * Class to replace regex matches with a string similar to that used in preg_replace()
+ */
+class RegexlikeReplacer extends Replacer {
+ private $r;
+
+ /**
+ * @param string $r
+ */
+ function __construct( $r ) {
+ $this->r = $r;
+ }
+
+ /**
+ * @param array $matches
+ * @return string
+ */
+ function replace( $matches ) {
+ $pairs = array();
+ foreach ( $matches as $i => $match ) {
+ $pairs["\$$i"] = $match;
+ }
+
+ return strtr( $this->r, $pairs );
+ }
+}
+
+/**
+ * Class to perform secondary replacement within each replacement string
+ */
+class DoubleReplacer extends Replacer {
+ /**
+ * @param mixed $from
+ * @param mixed $to
+ * @param int $index
+ */
+ function __construct( $from, $to, $index = 0 ) {
+ $this->from = $from;
+ $this->to = $to;
+ $this->index = $index;
+ }
+
+ /**
+ * @param array $matches
+ * @return mixed
+ */
+ function replace( $matches ) {
+ return str_replace( $this->from, $this->to, $matches[$this->index] );
+ }
+}
+
+/**
+ * Class to perform replacement based on a simple hashtable lookup
+ */
+class HashtableReplacer extends Replacer {
+ private $table, $index;
+
+ /**
+ * @param array $table
+ * @param int $index
+ */
+ function __construct( $table, $index = 0 ) {
+ $this->table = $table;
+ $this->index = $index;
+ }
+
+ /**
+ * @param array $matches
+ * @return mixed
+ */
+ function replace( $matches ) {
+ return $this->table[$matches[$this->index]];
+ }
+}
+
+/**
+ * Replacement array for FSS with fallback to strtr()
+ * Supports lazy initialisation of FSS resource
+ */
+class ReplacementArray {
+ private $data = false;
+ private $fss = false;
+
+ /**
+ * Create an object with the specified replacement array
+ * The array should have the same form as the replacement array for strtr()
+ * @param array $data
+ */
+ function __construct( $data = array() ) {
+ $this->data = $data;
+ }
+
+ /**
+ * @return array
+ */
+ function __sleep() {
+ return array( 'data' );
+ }
+
+ function __wakeup() {
+ $this->fss = false;
+ }
+
+ /**
+ * Set the whole replacement array at once
+ * @param array $data
+ */
+ function setArray( $data ) {
+ $this->data = $data;
+ $this->fss = false;
+ }
+
+ /**
+ * @return array|bool
+ */
+ function getArray() {
+ return $this->data;
+ }
+
+ /**
+ * Set an element of the replacement array
+ * @param string $from
+ * @param string $to
+ */
+ function setPair( $from, $to ) {
+ $this->data[$from] = $to;
+ $this->fss = false;
+ }
+
+ /**
+ * @param array $data
+ */
+ function mergeArray( $data ) {
+ $this->data = array_merge( $this->data, $data );
+ $this->fss = false;
+ }
+
+ /**
+ * @param ReplacementArray $other
+ */
+ function merge( $other ) {
+ $this->data = array_merge( $this->data, $other->data );
+ $this->fss = false;
+ }
+
+ /**
+ * @param string $from
+ */
+ function removePair( $from ) {
+ unset( $this->data[$from] );
+ $this->fss = false;
+ }
+
+ /**
+ * @param array $data
+ */
+ function removeArray( $data ) {
+ foreach ( $data as $from => $to ) {
+ $this->removePair( $from );
+ }
+ $this->fss = false;
+ }
+
+ /**
+ * @param string $subject
+ * @return string
+ */
+ function replace( $subject ) {
+ if ( function_exists( 'fss_prep_replace' ) ) {
+ wfProfileIn( __METHOD__ . '-fss' );
+ if ( $this->fss === false ) {
+ $this->fss = fss_prep_replace( $this->data );
+ }
+ $result = fss_exec_replace( $this->fss, $subject );
+ wfProfileOut( __METHOD__ . '-fss' );
+ } else {
+ wfProfileIn( __METHOD__ . '-strtr' );
+ $result = strtr( $subject, $this->data );
+ wfProfileOut( __METHOD__ . '-strtr' );
+ }
+
+ return $result;
+ }
+}
+
+/**
+ * An iterator which works exactly like:
+ *
+ * foreach ( explode( $delim, $s ) as $element ) {
+ * ...
+ * }
+ *
+ * Except it doesn't use 193 byte per element
+ */
+class ExplodeIterator implements Iterator {
+ // The subject string
+ private $subject, $subjectLength;
+
+ // The delimiter
+ private $delim, $delimLength;
+
+ // The position of the start of the line
+ private $curPos;
+
+ // The position after the end of the next delimiter
+ private $endPos;
+
+ // The current token
+ private $current;
+
+ /**
+ * Construct a DelimIterator
+ * @param string $delim
+ * @param string $subject
+ */
+ function __construct( $delim, $subject ) {
+ $this->subject = $subject;
+ $this->delim = $delim;
+
+ // Micro-optimisation (theoretical)
+ $this->subjectLength = strlen( $subject );
+ $this->delimLength = strlen( $delim );
+
+ $this->rewind();
+ }
+
+ function rewind() {
+ $this->curPos = 0;
+ $this->endPos = strpos( $this->subject, $this->delim );
+ $this->refreshCurrent();
+ }
+
+ function refreshCurrent() {
+ if ( $this->curPos === false ) {
+ $this->current = false;
+ } elseif ( $this->curPos >= $this->subjectLength ) {
+ $this->current = '';
+ } elseif ( $this->endPos === false ) {
+ $this->current = substr( $this->subject, $this->curPos );
+ } else {
+ $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
+ }
+ }
+
+ function current() {
+ return $this->current;
+ }
+
+ /**
+ * @return int|bool Current position or boolean false if invalid
+ */
+ function key() {
+ return $this->curPos;
+ }
+
+ /**
+ * @return string
+ */
+ function next() {
+ if ( $this->endPos === false ) {
+ $this->curPos = false;
+ } else {
+ $this->curPos = $this->endPos + $this->delimLength;
+ if ( $this->curPos >= $this->subjectLength ) {
+ $this->endPos = false;
+ } else {
+ $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
+ }
+ }
+ $this->refreshCurrent();
+
+ return $this->current;
+ }
+
+ /**
+ * @return bool
+ */
+ function valid() {
+ return $this->curPos !== false;
+ }
+}
diff --git a/includes/utils/UIDGenerator.php b/includes/utils/UIDGenerator.php
new file mode 100644
index 00000000..5346afa6
--- /dev/null
+++ b/includes/utils/UIDGenerator.php
@@ -0,0 +1,507 @@
+<?php
+/**
+ * This file deals with UID generation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * Class for getting statistically unique IDs
+ *
+ * @since 1.21
+ */
+class UIDGenerator {
+ /** @var UIDGenerator */
+ protected static $instance = null;
+
+ protected $nodeIdFile; // string; local file path
+ protected $nodeId32; // string; node ID in binary (32 bits)
+ protected $nodeId48; // string; node ID in binary (48 bits)
+
+ protected $lockFile88; // string; local file path
+ protected $lockFile128; // string; local file path
+
+ /** @var array */
+ protected $fileHandles = array(); // cache file handles
+
+ const QUICK_RAND = 1; // get randomness from fast and insecure sources
+ const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
+
+ protected function __construct() {
+ $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
+ $nodeId = '';
+ if ( is_file( $this->nodeIdFile ) ) {
+ $nodeId = file_get_contents( $this->nodeIdFile );
+ }
+ // Try to get some ID that uniquely identifies this machine (RFC 4122)...
+ if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
+ wfSuppressWarnings();
+ if ( wfIsWindows() ) {
+ // http://technet.microsoft.com/en-us/library/bb490913.aspx
+ $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
+ $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
+ $info = str_getcsv( $line );
+ $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
+ } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
+ // See http://linux.die.net/man/8/ifconfig
+ $m = array();
+ preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
+ wfShellExec( '/sbin/ifconfig -a' ), $m );
+ $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
+ }
+ wfRestoreWarnings();
+ if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
+ $nodeId = MWCryptRand::generateHex( 12, true );
+ $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
+ }
+ file_put_contents( $this->nodeIdFile, $nodeId ); // cache
+ }
+ $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
+ $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
+ // If different processes run as different users, they may have different temp dirs.
+ // This is dealt with by initializing the clock sequence number and counters randomly.
+ $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
+ $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
+ }
+
+ /**
+ * @return UIDGenerator
+ */
+ protected static function singleton() {
+ if ( self::$instance === null ) {
+ self::$instance = new self();
+ }
+
+ return self::$instance;
+ }
+
+ /**
+ * Get a statistically unique 88-bit unsigned integer ID string.
+ * The bits of the UID are prefixed with the time (down to the millisecond).
+ *
+ * These IDs are suitable as values for the shard key of distributed data.
+ * If a column uses these as values, it should be declared UNIQUE to handle collisions.
+ * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
+ * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
+ *
+ * UID generation is serialized on each server (as the node ID is for the whole machine).
+ *
+ * @param int $base Specifies a base other than 10
+ * @return string Number
+ * @throws MWException
+ */
+ public static function newTimestampedUID88( $base = 10 ) {
+ if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
+ throw new MWException( "Base must an integer be between 2 and 36" );
+ }
+ $gen = self::singleton();
+ $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
+
+ return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
+ }
+
+ /**
+ * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
+ * @return string 88 bits
+ */
+ protected function getTimestampedID88( array $info ) {
+ list( $time, $counter ) = $info;
+ // Take the 46 MSBs of "milliseconds since epoch"
+ $id_bin = $this->millisecondsSinceEpochBinary( $time );
+ // Add a 10 bit counter resulting in 56 bits total
+ $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
+ // Add the 32 bit node ID resulting in 88 bits total
+ $id_bin .= $this->nodeId32;
+ // Convert to a 1-27 digit integer string
+ if ( strlen( $id_bin ) !== 88 ) {
+ throw new MWException( "Detected overflow for millisecond timestamp." );
+ }
+
+ return $id_bin;
+ }
+
+ /**
+ * Get a statistically unique 128-bit unsigned integer ID string.
+ * The bits of the UID are prefixed with the time (down to the millisecond).
+ *
+ * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
+ * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
+ * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
+ *
+ * UID generation is serialized on each server (as the node ID is for the whole machine).
+ *
+ * @param int $base Specifies a base other than 10
+ * @return string Number
+ * @throws MWException
+ */
+ public static function newTimestampedUID128( $base = 10 ) {
+ if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
+ throw new MWException( "Base must be an integer between 2 and 36" );
+ }
+ $gen = self::singleton();
+ $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
+
+ return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
+ }
+
+ /**
+ * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
+ * @return string 128 bits
+ */
+ protected function getTimestampedID128( array $info ) {
+ list( $time, $counter, $clkSeq ) = $info;
+ // Take the 46 MSBs of "milliseconds since epoch"
+ $id_bin = $this->millisecondsSinceEpochBinary( $time );
+ // Add a 20 bit counter resulting in 66 bits total
+ $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
+ // Add a 14 bit clock sequence number resulting in 80 bits total
+ $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
+ // Add the 48 bit node ID resulting in 128 bits total
+ $id_bin .= $this->nodeId48;
+ // Convert to a 1-39 digit integer string
+ if ( strlen( $id_bin ) !== 128 ) {
+ throw new MWException( "Detected overflow for millisecond timestamp." );
+ }
+
+ return $id_bin;
+ }
+
+ /**
+ * Return an RFC4122 compliant v4 UUID
+ *
+ * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
+ * @return string
+ * @throws MWException
+ */
+ public static function newUUIDv4( $flags = 0 ) {
+ $hex = ( $flags & self::QUICK_RAND )
+ ? wfRandomString( 31 )
+ : MWCryptRand::generateHex( 31 );
+
+ return sprintf( '%s-%s-%s-%s-%s',
+ // "time_low" (32 bits)
+ substr( $hex, 0, 8 ),
+ // "time_mid" (16 bits)
+ substr( $hex, 8, 4 ),
+ // "time_hi_and_version" (16 bits)
+ '4' . substr( $hex, 12, 3 ),
+ // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
+ dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
+ // "node" (48 bits)
+ substr( $hex, 19, 12 )
+ );
+ }
+
+ /**
+ * Return an RFC4122 compliant v4 UUID
+ *
+ * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
+ * @return string 32 hex characters with no hyphens
+ * @throws MWException
+ */
+ public static function newRawUUIDv4( $flags = 0 ) {
+ return str_replace( '-', '', self::newUUIDv4( $flags ) );
+ }
+
+ /**
+ * Return an ID that is sequential *only* for this node and bucket
+ *
+ * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols.
+ * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart.
+ *
+ * @param string $bucket Arbitrary bucket name (should be ASCII)
+ * @param int $bits Bit size (<=48) of resulting numbers before wrap-around
+ * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
+ * @return float Integer value as float
+ * @since 1.23
+ */
+ public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
+ return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
+ }
+
+ /**
+ * Return IDs that are sequential *only* for this node and bucket
+ *
+ * @see UIDGenerator::newSequentialPerNodeID()
+ * @param string $bucket Arbitrary bucket name (should be ASCII)
+ * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
+ * @param int $count Number of IDs to return (1 to 10000)
+ * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
+ * @return array Ordered list of float integer values
+ * @since 1.23
+ */
+ public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
+ $gen = self::singleton();
+ return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
+ }
+
+ /**
+ * Return IDs that are sequential *only* for this node and bucket
+ *
+ * @see UIDGenerator::newSequentialPerNodeID()
+ * @param string $bucket Arbitrary bucket name (should be ASCII)
+ * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
+ * @param int $count Number of IDs to return (1 to 10000)
+ * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
+ * @return array Ordered list of float integer values
+ */
+ protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
+ if ( $count <= 0 ) {
+ return array(); // nothing to do
+ } elseif ( $count > 10000 ) {
+ throw new MWException( "Number of requested IDs ($count) is too high." );
+ } elseif ( $bits < 16 || $bits > 48 ) {
+ throw new MWException( "Requested bit size ($bits) is out of range." );
+ }
+
+ $counter = null; // post-increment persistent counter value
+
+ // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
+ // Counter values would not survive accross script instances in CLI mode.
+ $cache = null;
+ if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
+ try {
+ $cache = ObjectCache::newAccelerator( array() );
+ } catch ( MWException $e ) {
+ // not supported
+ }
+ }
+ if ( $cache ) {
+ $counter = $cache->incr( $bucket, $count );
+ if ( $counter === false ) {
+ if ( !$cache->add( $bucket, (int)$count ) ) {
+ throw new MWException( 'Unable to set value to ' . get_class( $cache ) );
+ }
+ $counter = $count;
+ }
+ }
+
+ // Note: use of fmod() avoids "division by zero" on 32 bit machines
+ if ( $counter === null ) {
+ $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
+ // Get the UID lock file handle
+ if ( isset( $this->fileHandles[$path] ) ) {
+ $handle = $this->fileHandles[$path];
+ } else {
+ $handle = fopen( $path, 'cb+' );
+ $this->fileHandles[$path] = $handle ?: null; // cache
+ }
+ // Acquire the UID lock file
+ if ( $handle === false ) {
+ throw new MWException( "Could not open '{$path}'." );
+ } elseif ( !flock( $handle, LOCK_EX ) ) {
+ fclose( $handle );
+ throw new MWException( "Could not acquire '{$path}'." );
+ }
+ // Fetch the counter value and increment it...
+ rewind( $handle );
+ $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
+ // Write back the new counter value
+ ftruncate( $handle, 0 );
+ rewind( $handle );
+ fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
+ fflush( $handle );
+ // Release the UID lock file
+ flock( $handle, LOCK_UN );
+ }
+
+ $ids = array();
+ $divisor = pow( 2, $bits );
+ $currentId = floor( $counter - $count ); // pre-increment counter value
+ for ( $i = 0; $i < $count; ++$i ) {
+ $ids[] = fmod( ++$currentId, $divisor );
+ }
+
+ return $ids;
+ }
+
+ /**
+ * Get a (time,counter,clock sequence) where (time,counter) is higher
+ * than any previous (time,counter) value for the given clock sequence.
+ * This is useful for making UIDs sequential on a per-node bases.
+ *
+ * @param string $lockFile Name of a local lock file
+ * @param int $clockSeqSize The number of possible clock sequence values
+ * @param int $counterSize The number of possible counter values
+ * @return array (result of UIDGenerator::millitime(), counter, clock sequence)
+ * @throws MWException
+ */
+ protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
+ // Get the UID lock file handle
+ $path = $this->$lockFile;
+ if ( isset( $this->fileHandles[$path] ) ) {
+ $handle = $this->fileHandles[$path];
+ } else {
+ $handle = fopen( $path, 'cb+' );
+ $this->fileHandles[$path] = $handle ?: null; // cache
+ }
+ // Acquire the UID lock file
+ if ( $handle === false ) {
+ throw new MWException( "Could not open '{$this->$lockFile}'." );
+ } elseif ( !flock( $handle, LOCK_EX ) ) {
+ fclose( $handle );
+ throw new MWException( "Could not acquire '{$this->$lockFile}'." );
+ }
+ // Get the current timestamp, clock sequence number, last time, and counter
+ rewind( $handle );
+ $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
+ $clockChanged = false; // clock set back significantly?
+ if ( count( $data ) == 5 ) { // last UID info already initialized
+ $clkSeq = (int)$data[0] % $clockSeqSize;
+ $prevTime = array( (int)$data[1], (int)$data[2] );
+ $offset = (int)$data[4] % $counterSize; // random counter offset
+ $counter = 0; // counter for UIDs with the same timestamp
+ // Delay until the clock reaches the time of the last ID.
+ // This detects any microtime() drift among processes.
+ $time = $this->timeWaitUntil( $prevTime );
+ if ( !$time ) { // too long to delay?
+ $clockChanged = true; // bump clock sequence number
+ $time = self::millitime();
+ } elseif ( $time == $prevTime ) {
+ // Bump the counter if there are timestamp collisions
+ $counter = (int)$data[3] % $counterSize;
+ if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
+ flock( $handle, LOCK_UN ); // abort
+ throw new MWException( "Counter overflow for timestamp value." );
+ }
+ }
+ } else { // last UID info not initialized
+ $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
+ $counter = 0;
+ $offset = mt_rand( 0, $counterSize - 1 );
+ $time = self::millitime();
+ }
+ // microtime() and gettimeofday() can drift from time() at least on Windows.
+ // The drift is immediate for processes running while the system clock changes.
+ // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
+ if ( abs( time() - $time[0] ) >= 2 ) {
+ // We don't want processes using too high or low timestamps to avoid duplicate
+ // UIDs and clock sequence number churn. This process should just be restarted.
+ flock( $handle, LOCK_UN ); // abort
+ throw new MWException( "Process clock is outdated or drifted." );
+ }
+ // If microtime() is synced and a clock change was detected, then the clock went back
+ if ( $clockChanged ) {
+ // Bump the clock sequence number and also randomize the counter offset,
+ // which is useful for UIDs that do not include the clock sequence number.
+ $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
+ $offset = mt_rand( 0, $counterSize - 1 );
+ trigger_error( "Clock was set back; sequence number incremented." );
+ }
+ // Update the (clock sequence number, timestamp, counter)
+ ftruncate( $handle, 0 );
+ rewind( $handle );
+ fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
+ fflush( $handle );
+ // Release the UID lock file
+ flock( $handle, LOCK_UN );
+
+ return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
+ }
+
+ /**
+ * Wait till the current timestamp reaches $time and return the current
+ * timestamp. This returns false if it would have to wait more than 10ms.
+ *
+ * @param array $time Result of UIDGenerator::millitime()
+ * @return array|bool UIDGenerator::millitime() result or false
+ */
+ protected function timeWaitUntil( array $time ) {
+ do {
+ $ct = self::millitime();
+ if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
+ return $ct; // current timestamp is higher than $time
+ }
+ } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
+
+ return false;
+ }
+
+ /**
+ * @param array $time Result of UIDGenerator::millitime()
+ * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
+ */
+ protected function millisecondsSinceEpochBinary( array $time ) {
+ list( $sec, $msec ) = $time;
+ $ts = 1000 * $sec + $msec;
+ if ( $ts > pow( 2, 52 ) ) {
+ throw new MWException( __METHOD__ .
+ ': sorry, this function doesn\'t work after the year 144680' );
+ }
+
+ return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 );
+ }
+
+ /**
+ * @return array (current time in seconds, milliseconds since then)
+ */
+ protected static function millitime() {
+ list( $msec, $sec ) = explode( ' ', microtime() );
+
+ return array( (int)$sec, (int)( $msec * 1000 ) );
+ }
+
+ /**
+ * Delete all cache files that have been created.
+ *
+ * This is a cleanup method primarily meant to be used from unit tests to
+ * avoid poluting the local filesystem. If used outside of a unit test
+ * environment it should be used with caution as it may destroy state saved
+ * in the files.
+ *
+ * @see unitTestTearDown
+ * @since 1.23
+ */
+ protected function deleteCacheFiles() {
+ // Bug: 44850
+ foreach ( $this->fileHandles as $path => $handle ) {
+ if ( $handle !== null ) {
+ fclose( $handle );
+ }
+ if ( is_file( $path ) ) {
+ unlink( $path );
+ }
+ unset( $this->fileHandles[$path] );
+ }
+ if ( is_file( $this->nodeIdFile ) ) {
+ unlink( $this->nodeIdFile );
+ }
+ }
+
+ /**
+ * Cleanup resources when tearing down after a unit test.
+ *
+ * This is a cleanup method primarily meant to be used from unit tests to
+ * avoid poluting the local filesystem. If used outside of a unit test
+ * environment it should be used with caution as it may destroy state saved
+ * in the files.
+ *
+ * @see deleteCacheFiles
+ * @since 1.23
+ */
+ public static function unitTestTearDown() {
+ // Bug: 44850
+ $gen = self::singleton();
+ $gen->deleteCacheFiles();
+ }
+
+ function __destruct() {
+ array_map( 'fclose', array_filter( $this->fileHandles ) );
+ }
+}
diff --git a/includes/utils/ZipDirectoryReader.php b/includes/utils/ZipDirectoryReader.php
new file mode 100644
index 00000000..bc849766
--- /dev/null
+++ b/includes/utils/ZipDirectoryReader.php
@@ -0,0 +1,732 @@
+<?php
+/**
+ * ZIP file directories reader, for the purposes of upload verification.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * A class for reading ZIP file directories, for the purposes of upload
+ * verification.
+ *
+ * Only a functional interface is provided: ZipFileReader::read(). No access is
+ * given to object instances.
+ *
+ */
+class ZipDirectoryReader {
+ /**
+ * Read a ZIP file and call a function for each file discovered in it.
+ *
+ * Because this class is aimed at verification, an error is raised on
+ * suspicious or ambiguous input, instead of emulating some standard
+ * behavior.
+ *
+ * @param string $fileName The archive file name
+ * @param array $callback The callback function. It will be called for each file
+ * with a single associative array each time, with members:
+ *
+ * - name: The file name. Directories conventionally have a trailing
+ * slash.
+ *
+ * - mtime: The file modification time, in MediaWiki 14-char format
+ *
+ * - size: The uncompressed file size
+ *
+ * @param array $options An associative array of read options, with the option
+ * name in the key. This may currently contain:
+ *
+ * - zip64: If this is set to true, then we will emulate a
+ * library with ZIP64 support, like OpenJDK 7. If it is set to
+ * false, then we will emulate a library with no knowledge of
+ * ZIP64.
+ *
+ * NOTE: The ZIP64 code is untested and probably doesn't work. It
+ * turned out to be easier to just reject ZIP64 archive uploads,
+ * since they are likely to be very rare. Confirming safety of a
+ * ZIP64 file is fairly complex. What do you do with a file that is
+ * ambiguous and broken when read with a non-ZIP64 reader, but valid
+ * when read with a ZIP64 reader? This situation is normal for a
+ * valid ZIP64 file, and working out what non-ZIP64 readers will make
+ * of such a file is not trivial.
+ *
+ * @return Status A Status object. The following fatal errors are defined:
+ *
+ * - zip-file-open-error: The file could not be opened.
+ *
+ * - zip-wrong-format: The file does not appear to be a ZIP file.
+ *
+ * - zip-bad: There was something wrong or ambiguous about the file
+ * data.
+ *
+ * - zip-unsupported: The ZIP file uses features which
+ * ZipDirectoryReader does not support.
+ *
+ * The default messages for those fatal errors are written in a way that
+ * makes sense for upload verification.
+ *
+ * If a fatal error is returned, more information about the error will be
+ * available in the debug log.
+ *
+ * Note that the callback function may be called any number of times before
+ * a fatal error is returned. If this occurs, the data sent to the callback
+ * function should be discarded.
+ */
+ public static function read( $fileName, $callback, $options = array() ) {
+ $zdr = new self( $fileName, $callback, $options );
+
+ return $zdr->execute();
+ }
+
+ /** The file name */
+ protected $fileName;
+
+ /** The opened file resource */
+ protected $file;
+
+ /** The cached length of the file, or null if it has not been loaded yet. */
+ protected $fileLength;
+
+ /** A segmented cache of the file contents */
+ protected $buffer;
+
+ /** The file data callback */
+ protected $callback;
+
+ /** The ZIP64 mode */
+ protected $zip64 = false;
+
+ /** Stored headers */
+ protected $eocdr, $eocdr64, $eocdr64Locator;
+
+ protected $data;
+
+ /** The "extra field" ID for ZIP64 central directory entries */
+ const ZIP64_EXTRA_HEADER = 0x0001;
+
+ /** The segment size for the file contents cache */
+ const SEGSIZE = 16384;
+
+ /** The index of the "general field" bit for UTF-8 file names */
+ const GENERAL_UTF8 = 11;
+
+ /** The index of the "general field" bit for central directory encryption */
+ const GENERAL_CD_ENCRYPTED = 13;
+
+ /**
+ * Private constructor
+ * @param string $fileName
+ * @param callable $callback
+ * @param array $options
+ */
+ protected function __construct( $fileName, $callback, $options ) {
+ $this->fileName = $fileName;
+ $this->callback = $callback;
+
+ if ( isset( $options['zip64'] ) ) {
+ $this->zip64 = $options['zip64'];
+ }
+ }
+
+ /**
+ * Read the directory according to settings in $this.
+ *
+ * @return Status
+ */
+ function execute() {
+ $this->file = fopen( $this->fileName, 'r' );
+ $this->data = array();
+ if ( !$this->file ) {
+ return Status::newFatal( 'zip-file-open-error' );
+ }
+
+ $status = Status::newGood();
+ try {
+ $this->readEndOfCentralDirectoryRecord();
+ if ( $this->zip64 ) {
+ list( $offset, $size ) = $this->findZip64CentralDirectory();
+ $this->readCentralDirectory( $offset, $size );
+ } else {
+ if ( $this->eocdr['CD size'] == 0xffffffff
+ || $this->eocdr['CD offset'] == 0xffffffff
+ || $this->eocdr['CD entries total'] == 0xffff
+ ) {
+ $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
+ 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
+ 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
+ }
+
+ list( $offset, $size ) = $this->findOldCentralDirectory();
+ $this->readCentralDirectory( $offset, $size );
+ }
+ } catch ( ZipDirectoryReaderError $e ) {
+ $status->fatal( $e->getErrorCode() );
+ }
+
+ fclose( $this->file );
+
+ return $status;
+ }
+
+ /**
+ * Throw an error, and log a debug message
+ * @param mixed $code
+ * @param string $debugMessage
+ */
+ function error( $code, $debugMessage ) {
+ wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
+ throw new ZipDirectoryReaderError( $code );
+ }
+
+ /**
+ * Read the header which is at the end of the central directory,
+ * unimaginatively called the "end of central directory record" by the ZIP
+ * spec.
+ */
+ function readEndOfCentralDirectoryRecord() {
+ $info = array(
+ 'signature' => 4,
+ 'disk' => 2,
+ 'CD start disk' => 2,
+ 'CD entries this disk' => 2,
+ 'CD entries total' => 2,
+ 'CD size' => 4,
+ 'CD offset' => 4,
+ 'file comment length' => 2,
+ );
+ $structSize = $this->getStructSize( $info );
+ $startPos = $this->getFileLength() - 65536 - $structSize;
+ if ( $startPos < 0 ) {
+ $startPos = 0;
+ }
+
+ $block = $this->getBlock( $startPos );
+ $sigPos = strrpos( $block, "PK\x05\x06" );
+ if ( $sigPos === false ) {
+ $this->error( 'zip-wrong-format',
+ "zip file lacks EOCDR signature. It probably isn't a zip file." );
+ }
+
+ $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
+ $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
+
+ if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
+ $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
+ }
+ if ( $this->eocdr['disk'] !== 0
+ || $this->eocdr['CD start disk'] !== 0
+ ) {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
+ }
+ $this->eocdr += $this->unpack(
+ $block,
+ array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
+ $sigPos + $structSize );
+ $this->eocdr['position'] = $startPos + $sigPos;
+ }
+
+ /**
+ * Read the header called the "ZIP64 end of central directory locator". An
+ * error will be raised if it does not exist.
+ */
+ function readZip64EndOfCentralDirectoryLocator() {
+ $info = array(
+ 'signature' => array( 'string', 4 ),
+ 'eocdr64 start disk' => 4,
+ 'eocdr64 offset' => 8,
+ 'number of disks' => 4,
+ );
+ $structSize = $this->getStructSize( $info );
+
+ $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
+ $block = $this->getBlock( $start, $structSize );
+ $this->eocdr64Locator = $data = $this->unpack( $block, $info );
+
+ if ( $data['signature'] !== "PK\x06\x07" ) {
+ // Note: Java will allow this and continue to read the
+ // EOCDR64, so we have to reject the upload, we can't
+ // just use the EOCDR header instead.
+ $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
+ }
+ }
+
+ /**
+ * Read the header called the "ZIP64 end of central directory record". It
+ * may replace the regular "end of central directory record" in ZIP64 files.
+ */
+ function readZip64EndOfCentralDirectoryRecord() {
+ if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
+ || $this->eocdr64Locator['number of disks'] != 0
+ ) {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
+ }
+
+ $info = array(
+ 'signature' => array( 'string', 4 ),
+ 'EOCDR64 size' => 8,
+ 'version made by' => 2,
+ 'version needed' => 2,
+ 'disk' => 4,
+ 'CD start disk' => 4,
+ 'CD entries this disk' => 8,
+ 'CD entries total' => 8,
+ 'CD size' => 8,
+ 'CD offset' => 8
+ );
+ $structSize = $this->getStructSize( $info );
+ $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
+ $this->eocdr64 = $data = $this->unpack( $block, $info );
+ if ( $data['signature'] !== "PK\x06\x06" ) {
+ $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
+ }
+ if ( $data['disk'] !== 0
+ || $data['CD start disk'] !== 0
+ ) {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
+ }
+ }
+
+ /**
+ * Find the location of the central directory, as would be seen by a
+ * non-ZIP64 reader.
+ *
+ * @return array List containing offset, size and end position.
+ */
+ function findOldCentralDirectory() {
+ $size = $this->eocdr['CD size'];
+ $offset = $this->eocdr['CD offset'];
+ $endPos = $this->eocdr['position'];
+
+ // Some readers use the EOCDR position instead of the offset field
+ // to find the directory, so to be safe, we check if they both agree.
+ if ( $offset + $size != $endPos ) {
+ $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
+ 'of central directory record' );
+ }
+
+ return array( $offset, $size );
+ }
+
+ /**
+ * Find the location of the central directory, as would be seen by a
+ * ZIP64-compliant reader.
+ *
+ * @return array List containing offset, size and end position.
+ */
+ function findZip64CentralDirectory() {
+ // The spec is ambiguous about the exact rules of precedence between the
+ // ZIP64 headers and the original headers. Here we follow zip_util.c
+ // from OpenJDK 7.
+ $size = $this->eocdr['CD size'];
+ $offset = $this->eocdr['CD offset'];
+ $numEntries = $this->eocdr['CD entries total'];
+ $endPos = $this->eocdr['position'];
+ if ( $size == 0xffffffff
+ || $offset == 0xffffffff
+ || $numEntries == 0xffff
+ ) {
+ $this->readZip64EndOfCentralDirectoryLocator();
+
+ if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
+ $this->readZip64EndOfCentralDirectoryRecord();
+ if ( isset( $this->eocdr64['CD offset'] ) ) {
+ $size = $this->eocdr64['CD size'];
+ $offset = $this->eocdr64['CD offset'];
+ $endPos = $this->eocdr64Locator['eocdr64 offset'];
+ }
+ }
+ }
+ // Some readers use the EOCDR position instead of the offset field
+ // to find the directory, so to be safe, we check if they both agree.
+ if ( $offset + $size != $endPos ) {
+ $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
+ 'of central directory record' );
+ }
+
+ return array( $offset, $size );
+ }
+
+ /**
+ * Read the central directory at the given location
+ * @param int $offset
+ * @param int $size
+ */
+ function readCentralDirectory( $offset, $size ) {
+ $block = $this->getBlock( $offset, $size );
+
+ $fixedInfo = array(
+ 'signature' => array( 'string', 4 ),
+ 'version made by' => 2,
+ 'version needed' => 2,
+ 'general bits' => 2,
+ 'compression method' => 2,
+ 'mod time' => 2,
+ 'mod date' => 2,
+ 'crc-32' => 4,
+ 'compressed size' => 4,
+ 'uncompressed size' => 4,
+ 'name length' => 2,
+ 'extra field length' => 2,
+ 'comment length' => 2,
+ 'disk number start' => 2,
+ 'internal attrs' => 2,
+ 'external attrs' => 4,
+ 'local header offset' => 4,
+ );
+ $fixedSize = $this->getStructSize( $fixedInfo );
+
+ $pos = 0;
+ while ( $pos < $size ) {
+ $data = $this->unpack( $block, $fixedInfo, $pos );
+ $pos += $fixedSize;
+
+ if ( $data['signature'] !== "PK\x01\x02" ) {
+ $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
+ }
+
+ $variableInfo = array(
+ 'name' => array( 'string', $data['name length'] ),
+ 'extra field' => array( 'string', $data['extra field length'] ),
+ 'comment' => array( 'string', $data['comment length'] ),
+ );
+ $data += $this->unpack( $block, $variableInfo, $pos );
+ $pos += $this->getStructSize( $variableInfo );
+
+ if ( $this->zip64 && (
+ $data['compressed size'] == 0xffffffff
+ || $data['uncompressed size'] == 0xffffffff
+ || $data['local header offset'] == 0xffffffff )
+ ) {
+ $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
+ if ( $zip64Data ) {
+ $data = $zip64Data + $data;
+ }
+ }
+
+ if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
+ $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
+ }
+
+ // Convert the timestamp into MediaWiki format
+ // For the format, please see the MS-DOS 2.0 Programmer's Reference,
+ // pages 3-5 and 3-6.
+ $time = $data['mod time'];
+ $date = $data['mod date'];
+
+ $year = 1980 + ( $date >> 9 );
+ $month = ( $date >> 5 ) & 15;
+ $day = $date & 31;
+ $hour = ( $time >> 11 ) & 31;
+ $minute = ( $time >> 5 ) & 63;
+ $second = ( $time & 31 ) * 2;
+ $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
+ $year, $month, $day, $hour, $minute, $second );
+
+ // Convert the character set in the file name
+ if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
+ $name = $data['name'];
+ } else {
+ $name = iconv( 'CP437', 'UTF-8', $data['name'] );
+ }
+
+ // Compile a data array for the user, with a sensible format
+ $userData = array(
+ 'name' => $name,
+ 'mtime' => $timestamp,
+ 'size' => $data['uncompressed size'],
+ );
+ call_user_func( $this->callback, $userData );
+ }
+ }
+
+ /**
+ * Interpret ZIP64 "extra field" data and return an associative array.
+ * @param string $extraField
+ * @return array|bool
+ */
+ function unpackZip64Extra( $extraField ) {
+ $extraHeaderInfo = array(
+ 'id' => 2,
+ 'size' => 2,
+ );
+ $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
+
+ $zip64ExtraInfo = array(
+ 'uncompressed size' => 8,
+ 'compressed size' => 8,
+ 'local header offset' => 8,
+ 'disk number start' => 4,
+ );
+
+ $extraPos = 0;
+ while ( $extraPos < strlen( $extraField ) ) {
+ $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
+ $extraPos += $extraHeaderSize;
+ $extra += $this->unpack( $extraField,
+ array( 'data' => array( 'string', $extra['size'] ) ),
+ $extraPos );
+ $extraPos += $extra['size'];
+
+ if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
+ return $this->unpack( $extra['data'], $zip64ExtraInfo );
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Get the length of the file.
+ * @return int
+ */
+ function getFileLength() {
+ if ( $this->fileLength === null ) {
+ $stat = fstat( $this->file );
+ $this->fileLength = $stat['size'];
+ }
+
+ return $this->fileLength;
+ }
+
+ /**
+ * Get the file contents from a given offset. If there are not enough bytes
+ * in the file to satisfy the request, an exception will be thrown.
+ *
+ * @param int $start The byte offset of the start of the block.
+ * @param int $length The number of bytes to return. If omitted, the remainder
+ * of the file will be returned.
+ *
+ * @return string
+ */
+ function getBlock( $start, $length = null ) {
+ $fileLength = $this->getFileLength();
+ if ( $start >= $fileLength ) {
+ $this->error( 'zip-bad', "getBlock() requested position $start, " .
+ "file length is $fileLength" );
+ }
+ if ( $length === null ) {
+ $length = $fileLength - $start;
+ }
+ $end = $start + $length;
+ if ( $end > $fileLength ) {
+ $this->error( 'zip-bad', "getBlock() requested end position $end, " .
+ "file length is $fileLength" );
+ }
+ $startSeg = floor( $start / self::SEGSIZE );
+ $endSeg = ceil( $end / self::SEGSIZE );
+
+ $block = '';
+ for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
+ $block .= $this->getSegment( $segIndex );
+ }
+
+ $block = substr( $block,
+ $start - $startSeg * self::SEGSIZE,
+ $length );
+
+ if ( strlen( $block ) < $length ) {
+ $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
+ }
+
+ return $block;
+ }
+
+ /**
+ * Get a section of the file starting at position $segIndex * self::SEGSIZE,
+ * of length self::SEGSIZE. The result is cached. This is a helper function
+ * for getBlock().
+ *
+ * If there are not enough bytes in the file to satisfy the request, the
+ * return value will be truncated. If a request is made for a segment beyond
+ * the end of the file, an empty string will be returned.
+ *
+ * @param int $segIndex
+ *
+ * @return string
+ */
+ function getSegment( $segIndex ) {
+ if ( !isset( $this->buffer[$segIndex] ) ) {
+ $bytePos = $segIndex * self::SEGSIZE;
+ if ( $bytePos >= $this->getFileLength() ) {
+ $this->buffer[$segIndex] = '';
+
+ return '';
+ }
+ if ( fseek( $this->file, $bytePos ) ) {
+ $this->error( 'zip-bad', "seek to $bytePos failed" );
+ }
+ $seg = fread( $this->file, self::SEGSIZE );
+ if ( $seg === false ) {
+ $this->error( 'zip-bad', "read from $bytePos failed" );
+ }
+ $this->buffer[$segIndex] = $seg;
+ }
+
+ return $this->buffer[$segIndex];
+ }
+
+ /**
+ * Get the size of a structure in bytes. See unpack() for the format of $struct.
+ * @param array $struct
+ * @return int
+ */
+ function getStructSize( $struct ) {
+ $size = 0;
+ foreach ( $struct as $type ) {
+ if ( is_array( $type ) ) {
+ list( , $fieldSize ) = $type;
+ $size += $fieldSize;
+ } else {
+ $size += $type;
+ }
+ }
+
+ return $size;
+ }
+
+ /**
+ * Unpack a binary structure. This is like the built-in unpack() function
+ * except nicer.
+ *
+ * @param string $string The binary data input
+ *
+ * @param array $struct An associative array giving structure members and their
+ * types. In the key is the field name. The value may be either an
+ * integer, in which case the field is a little-endian unsigned integer
+ * encoded in the given number of bytes, or an array, in which case the
+ * first element of the array is the type name, and the subsequent
+ * elements are type-dependent parameters. Only one such type is defined:
+ * - "string": The second array element gives the length of string.
+ * Not null terminated.
+ *
+ * @param int $offset The offset into the string at which to start unpacking.
+ *
+ * @throws MWException
+ * @return array Unpacked associative array. Note that large integers in the input
+ * may be represented as floating point numbers in the return value, so
+ * the use of weak comparison is advised.
+ */
+ function unpack( $string, $struct, $offset = 0 ) {
+ $size = $this->getStructSize( $struct );
+ if ( $offset + $size > strlen( $string ) ) {
+ $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
+ }
+
+ $data = array();
+ $pos = $offset;
+ foreach ( $struct as $key => $type ) {
+ if ( is_array( $type ) ) {
+ list( $typeName, $fieldSize ) = $type;
+ switch ( $typeName ) {
+ case 'string':
+ $data[$key] = substr( $string, $pos, $fieldSize );
+ $pos += $fieldSize;
+ break;
+ default:
+ throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
+ }
+ } else {
+ // Unsigned little-endian integer
+ $length = intval( $type );
+
+ // Calculate the value. Use an algorithm which automatically
+ // upgrades the value to floating point if necessary.
+ $value = 0;
+ for ( $i = $length - 1; $i >= 0; $i-- ) {
+ $value *= 256;
+ $value += ord( $string[$pos + $i] );
+ }
+
+ // Throw an exception if there was loss of precision
+ if ( $value > pow( 2, 52 ) ) {
+ $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
+ 'This could happen if we tried to unpack a 64-bit structure ' .
+ 'at an invalid location.' );
+ }
+ $data[$key] = $value;
+ $pos += $length;
+ }
+ }
+
+ return $data;
+ }
+
+ /**
+ * Returns a bit from a given position in an integer value, converted to
+ * boolean.
+ *
+ * @param int $value
+ * @param int $bitIndex The index of the bit, where 0 is the LSB.
+ * @return bool
+ */
+ function testBit( $value, $bitIndex ) {
+ return (bool)( ( $value >> $bitIndex ) & 1 );
+ }
+
+ /**
+ * Debugging helper function which dumps a string in hexdump -C format.
+ * @param string $s
+ */
+ function hexDump( $s ) {
+ $n = strlen( $s );
+ for ( $i = 0; $i < $n; $i += 16 ) {
+ printf( "%08X ", $i );
+ for ( $j = 0; $j < 16; $j++ ) {
+ print " ";
+ if ( $j == 8 ) {
+ print " ";
+ }
+ if ( $i + $j >= $n ) {
+ print " ";
+ } else {
+ printf( "%02X", ord( $s[$i + $j] ) );
+ }
+ }
+
+ print " |";
+ for ( $j = 0; $j < 16; $j++ ) {
+ if ( $i + $j >= $n ) {
+ print " ";
+ } elseif ( ctype_print( $s[$i + $j] ) ) {
+ print $s[$i + $j];
+ } else {
+ print '.';
+ }
+ }
+ print "|\n";
+ }
+ }
+}
+
+/**
+ * Internal exception class. Will be caught by private code.
+ */
+class ZipDirectoryReaderError extends Exception {
+ protected $errorCode;
+
+ function __construct( $code ) {
+ $this->errorCode = $code;
+ parent::__construct( "ZipDirectoryReader error: $code" );
+ }
+
+ /**
+ * @return mixed
+ */
+ function getErrorCode() {
+ return $this->errorCode;
+ }
+}