summaryrefslogtreecommitdiff
path: root/includes/normal/UtfNormalUtil.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/normal/UtfNormalUtil.php')
-rw-r--r--includes/normal/UtfNormalUtil.php42
1 files changed, 28 insertions, 14 deletions
diff --git a/includes/normal/UtfNormalUtil.php b/includes/normal/UtfNormalUtil.php
index e8fec936..6c925dfa 100644
--- a/includes/normal/UtfNormalUtil.php
+++ b/includes/normal/UtfNormalUtil.php
@@ -4,7 +4,7 @@
* Should probably merge them for consistency.
*
* Copyright © 2004 Brion Vibber <brion@pobox.com>
- * http://www.mediawiki.org/
+ * https://www.mediawiki.org/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -34,16 +34,27 @@
* @public
*/
function codepointToUtf8( $codepoint ) {
- if($codepoint < 0x80) return chr($codepoint);
- if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) .
- chr($codepoint & 0x3f | 0x80);
- if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
- chr($codepoint >> 6 & 0x3f | 0x80) .
- chr($codepoint & 0x3f | 0x80);
- if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
- chr($codepoint >> 12 & 0x3f | 0x80) .
- chr($codepoint >> 6 & 0x3f | 0x80) .
- chr($codepoint & 0x3f | 0x80);
+ if ( $codepoint < 0x80 ) {
+ return chr( $codepoint );
+ }
+
+ if ( $codepoint < 0x800 ) {
+ return chr( $codepoint >> 6 & 0x3f | 0xc0 ) .
+ chr( $codepoint & 0x3f | 0x80 );
+ }
+
+ if ( $codepoint < 0x10000 ) {
+ return chr( $codepoint >> 12 & 0x0f | 0xe0 ) .
+ chr( $codepoint >> 6 & 0x3f | 0x80 ) .
+ chr( $codepoint & 0x3f | 0x80 );
+ }
+
+ if ( $codepoint < 0x110000 ) {
+ return chr( $codepoint >> 18 & 0x07 | 0xf0 ) .
+ chr( $codepoint >> 12 & 0x3f | 0x80 ) .
+ chr( $codepoint >> 6 & 0x3f | 0x80 ) .
+ chr( $codepoint & 0x3f | 0x80 );
+ }
echo "Asked for code outside of range ($codepoint)\n";
die( -1 );
@@ -60,10 +71,11 @@ function codepointToUtf8( $codepoint ) {
*/
function hexSequenceToUtf8( $sequence ) {
$utf = '';
- foreach( explode( ' ', $sequence ) as $hex ) {
+ foreach ( explode( ' ', $sequence ) as $hex ) {
$n = hexdec( $hex );
$utf .= codepointToUtf8( $n );
}
+
return $utf;
}
@@ -80,6 +92,7 @@ function utf8ToHexSequence( $str ) {
foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) {
$buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) );
}
+
return rtrim( $buf );
}
@@ -107,6 +120,7 @@ function utf8ToCodepoint( $char ) {
if ( $length != strlen( $char ) ) {
return false;
}
+
if ( $length == 1 ) {
return ord( $char );
}
@@ -116,7 +130,7 @@ function utf8ToCodepoint( $char ) {
$z >>= $length;
# Add in the free bits from subsequent bytes
- for ( $i=1; $i < $length; $i++ ) {
+ for ( $i = 1; $i < $length; $i++ ) {
$z <<= 6;
$z |= ord( $char[$i] ) & 0x3f;
}
@@ -136,5 +150,5 @@ function escapeSingleString( $string ) {
array(
'\\' => '\\\\',
'\'' => '\\\''
- ));
+ ) );
}