summaryrefslogtreecommitdiff
path: root/includes/normal
diff options
context:
space:
mode:
Diffstat (limited to 'includes/normal')
-rw-r--r--includes/normal/README10
-rw-r--r--includes/normal/RandomTest.php6
-rw-r--r--includes/normal/UtfNormal.php4
-rw-r--r--includes/normal/UtfNormalTest.php8
-rw-r--r--includes/normal/UtfNormalTest2.php2
-rw-r--r--includes/normal/UtfNormalUtil.php8
6 files changed, 18 insertions, 20 deletions
diff --git a/includes/normal/README b/includes/normal/README
index a17aa7da..0f718d2c 100644
--- a/includes/normal/README
+++ b/includes/normal/README
@@ -48,12 +48,12 @@ grains of salt.
There's an experimental PHP extension module which wraps the ICU library's
normalization functions. This is *MUCH* faster than doing this work in pure
-PHP code. This is in the 'normal' directory in MediaWiki's CVS extensions
-module. It is known to work with PHP 4.3.8 and 5.0.2 on Linux/x86 but hasn't
-been thoroughly tested on other configurations.
+PHP code. This is at https://git.wikimedia.org/summary/mediawiki%2Fextensions%2Fnormal.git.
+It is used by the WMF, which currently runs PHP 5.3.10 on Linux. It hasn't been
+thoroughly tested on other configurations, but may work.
If the php_normal.so module is loaded in php.ini, the normalization functions
will automatically use it. If you can't (or don't want to) load it in php.ini,
-you may be able to load it using the dl() function before include()ing or
-require()ing UtfNormal.php, and it will be picked up.
+you may be able to load it using the dl() function before the inclusion of
+UtfNormal.php, and it will be picked up.
diff --git a/includes/normal/RandomTest.php b/includes/normal/RandomTest.php
index 9dc1c861..06029868 100644
--- a/includes/normal/RandomTest.php
+++ b/includes/normal/RandomTest.php
@@ -31,10 +31,10 @@ if( PHP_SAPI != 'cli' ) {
}
/** */
-require_once( 'UtfNormal.php' );
-require_once( '../diff/DifferenceEngine.php' );
+require_once 'UtfNormal.php';
+require_once '../diff/DifferenceEngine.php';
-dl('php_utfnormal.so' );
+dl( 'php_utfnormal.so' );
# mt_srand( 99999 );
diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php
index 77ddb79b..5a091afc 100644
--- a/includes/normal/UtfNormal.php
+++ b/includes/normal/UtfNormal.php
@@ -190,7 +190,7 @@ class UtfNormal {
*/
static function loadData() {
if( !isset( self::$utfCombiningClass ) ) {
- require_once( __DIR__ . '/UtfNormalData.inc' );
+ require_once __DIR__ . '/UtfNormalData.inc';
}
}
@@ -491,7 +491,7 @@ class UtfNormal {
*/
static function NFKD( $string ) {
if( !isset( self::$utfCompatibilityDecomp ) ) {
- require_once( 'UtfNormalDataK.inc' );
+ require_once 'UtfNormalDataK.inc';
}
return self::fastCombiningSort(
self::fastDecompose( $string, self::$utfCompatibilityDecomp ) );
diff --git a/includes/normal/UtfNormalTest.php b/includes/normal/UtfNormalTest.php
index 661e53fd..51183666 100644
--- a/includes/normal/UtfNormalTest.php
+++ b/includes/normal/UtfNormalTest.php
@@ -34,9 +34,7 @@ $verbose = true;
if( defined( 'PRETTY_UTF8' ) ) {
function pretty( $string ) {
- return preg_replace( '/([\x00-\xff])/e',
- 'sprintf("%02X", ord("$1"))',
- $string );
+ return strtoupper( bin2hex( $string ) );
}
} else {
/**
@@ -44,9 +42,7 @@ if( defined( 'PRETTY_UTF8' ) ) {
* @return string
*/
function pretty( $string ) {
- return trim( preg_replace( '/(.)/use',
- 'sprintf("%04X ", utf8ToCodepoint("$1"))',
- $string ) );
+ return strtoupper( utf8ToHexSequence( $string ) );
}
}
diff --git a/includes/normal/UtfNormalTest2.php b/includes/normal/UtfNormalTest2.php
index 2266696e..750c0099 100644
--- a/includes/normal/UtfNormalTest2.php
+++ b/includes/normal/UtfNormalTest2.php
@@ -65,7 +65,7 @@ $f = fopen($file, "r");
later and slow down the runtime.
*/
-require_once("./UtfNormal.php");
+require_once './UtfNormal.php';
function normalize_form_c($c) { return UtfNormal::toNFC($c); }
function normalize_form_d($c) { return UtfNormal::toNFD($c); }
function normalize_form_kc($c) { return UtfNormal::toNFKC($c); }
diff --git a/includes/normal/UtfNormalUtil.php b/includes/normal/UtfNormalUtil.php
index 9b96a073..e8fec936 100644
--- a/includes/normal/UtfNormalUtil.php
+++ b/includes/normal/UtfNormalUtil.php
@@ -76,9 +76,11 @@ function hexSequenceToUtf8( $sequence ) {
* @private
*/
function utf8ToHexSequence( $str ) {
- return rtrim( preg_replace( '/(.)/uSe',
- 'sprintf("%04x ", utf8ToCodepoint("$1"))',
- $str ) );
+ $buf = '';
+ foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) {
+ $buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) );
+ }
+ return rtrim( $buf );
}
/**