diff options
Diffstat (limited to 'includes/libs/normal')
-rw-r--r-- | includes/libs/normal/UtfNormal.php | 129 | ||||
-rw-r--r-- | includes/libs/normal/UtfNormalDefines.php | 186 | ||||
-rw-r--r-- | includes/libs/normal/UtfNormalUtil.php | 99 |
3 files changed, 0 insertions, 414 deletions
diff --git a/includes/libs/normal/UtfNormal.php b/includes/libs/normal/UtfNormal.php deleted file mode 100644 index c9c05a07..00000000 --- a/includes/libs/normal/UtfNormal.php +++ /dev/null @@ -1,129 +0,0 @@ -<?php -/** - * Unicode normalization routines - * - * Copyright © 2004 Brion Vibber <brion@pobox.com> - * https://www.mediawiki.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup UtfNormal - */ - -/** - * @defgroup UtfNormal UtfNormal - */ - -use UtfNormal\Validator; - -/** - * Unicode normalization routines for working with UTF-8 strings. - * Currently assumes that input strings are valid UTF-8! - * - * Not as fast as I'd like, but should be usable for most purposes. - * UtfNormal::toNFC() will bail early if given ASCII text or text - * it can quickly determine is already normalized. - * - * All functions can be called static. - * - * See description of forms at http://www.unicode.org/reports/tr15/ - * - * @deprecated since 1.25, use UtfNormal\Validator directly - * @ingroup UtfNormal - */ -class UtfNormal { - /** - * The ultimate convenience function! Clean up invalid UTF-8 sequences, - * and convert to normal form C, canonical composition. - * - * Fast return for pure ASCII strings; some lesser optimizations for - * strings containing only known-good characters. Not as fast as toNFC(). - * - * @param string $string a UTF-8 string - * @return string a clean, shiny, normalized UTF-8 string - */ - static function cleanUp( $string ) { - return Validator::cleanUp( $string ); - } - - /** - * Convert a UTF-8 string to normal form C, canonical composition. - * Fast return for pure ASCII strings; some lesser optimizations for - * strings containing only known-good characters. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form C - */ - static function toNFC( $string ) { - return Validator::toNFC( $string ); - } - - /** - * Convert a UTF-8 string to normal form D, canonical decomposition. - * Fast return for pure ASCII strings. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form D - */ - static function toNFD( $string ) { - return Validator::toNFD( $string ); - } - - /** - * Convert a UTF-8 string to normal form KC, compatibility composition. - * This may cause irreversible information loss, use judiciously. - * Fast return for pure ASCII strings. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form KC - */ - static function toNFKC( $string ) { - return Validator::toNFKC( $string ); - } - - /** - * Convert a UTF-8 string to normal form KD, compatibility decomposition. - * This may cause irreversible information loss, use judiciously. - * Fast return for pure ASCII strings. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form KD - */ - static function toNFKD( $string ) { - return Validator::toNFKD( $string ); - } - - /** - * Returns true if the string is _definitely_ in NFC. - * Returns false if not or uncertain. - * @param string $string a valid UTF-8 string. Input is not validated. - * @return bool - */ - static function quickIsNFC( $string ) { - return Validator::quickIsNFC( $string ); - } - - /** - * Returns true if the string is _definitely_ in NFC. - * Returns false if not or uncertain. - * @param string $string a UTF-8 string, altered on output to be valid UTF-8 safe for XML. - * @return bool - */ - static function quickIsNFCVerify( &$string ) { - return Validator::quickIsNFCVerify( $string ); - } -} diff --git a/includes/libs/normal/UtfNormalDefines.php b/includes/libs/normal/UtfNormalDefines.php deleted file mode 100644 index b8e44c77..00000000 --- a/includes/libs/normal/UtfNormalDefines.php +++ /dev/null @@ -1,186 +0,0 @@ -<?php -/** - * Backwards-compatability constants which are now provided by the - * UtfNormal library. They are hardcoded here since they are needed - * before the composer autoloader is initialized. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup UtfNormal - */ - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_FIRST', 0xac00 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LAST', 0xd7a3 ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LBASE', 0x1100 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_VBASE', 0x1161 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_TBASE', 0x11a7 ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LCOUNT', 19 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_VCOUNT', 21 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_TCOUNT', 28 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_NCOUNT', UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LEND', UNICODE_HANGUL_LBASE + UNICODE_HANGUL_LCOUNT - 1 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_VEND', UNICODE_HANGUL_VBASE + UNICODE_HANGUL_VCOUNT - 1 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_TEND', UNICODE_HANGUL_TBASE + UNICODE_HANGUL_TCOUNT - 1 ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_SURROGATE_FIRST', 0xd800 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_SURROGATE_LAST', 0xdfff ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_MAX', 0x10ffff ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_REPLACEMENT', 0xfffd ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_FIRST', "\xea\xb0\x80" /*codepointToUtf8( UNICODE_HANGUL_FIRST )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_LAST', "\xed\x9e\xa3" /*codepointToUtf8( UNICODE_HANGUL_LAST )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_LBASE', "\xe1\x84\x80" /*codepointToUtf8( UNICODE_HANGUL_LBASE )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_VBASE', "\xe1\x85\xa1" /*codepointToUtf8( UNICODE_HANGUL_VBASE )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_TBASE', "\xe1\x86\xa7" /*codepointToUtf8( UNICODE_HANGUL_TBASE )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_LEND', "\xe1\x84\x92" /*codepointToUtf8( UNICODE_HANGUL_LEND )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_VEND', "\xe1\x85\xb5" /*codepointToUtf8( UNICODE_HANGUL_VEND )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_TEND', "\xe1\x87\x82" /*codepointToUtf8( UNICODE_HANGUL_TEND )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_SURROGATE_FIRST', "\xed\xa0\x80" /*codepointToUtf8( UNICODE_SURROGATE_FIRST )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_SURROGATE_LAST', "\xed\xbf\xbf" /*codepointToUtf8( UNICODE_SURROGATE_LAST )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_MAX', "\xf4\x8f\xbf\xbf" /*codepointToUtf8( UNICODE_MAX )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_REPLACEMENT', "\xef\xbf\xbd" /*codepointToUtf8( UNICODE_REPLACEMENT )*/ ); -#define( 'UTF8_REPLACEMENT', '!' ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_OVERLONG_A', "\xc1\xbf" ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_OVERLONG_B', "\xe0\x9f\xbf" ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_OVERLONG_C', "\xf0\x8f\xbf\xbf" ); - -# These two ranges are illegal -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FDD0', "\xef\xb7\x90" /*codepointToUtf8( 0xfdd0 )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FDEF', "\xef\xb7\xaf" /*codepointToUtf8( 0xfdef )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FFFE', "\xef\xbf\xbe" /*codepointToUtf8( 0xfffe )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FFFF', "\xef\xbf\xbf" /*codepointToUtf8( 0xffff )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HEAD', false ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_TAIL', true ); diff --git a/includes/libs/normal/UtfNormalUtil.php b/includes/libs/normal/UtfNormalUtil.php deleted file mode 100644 index ad9a2b9a..00000000 --- a/includes/libs/normal/UtfNormalUtil.php +++ /dev/null @@ -1,99 +0,0 @@ -<?php -/** - * Some of these functions are adapted from places in MediaWiki. - * Should probably merge them for consistency. - * - * Copyright © 2004 Brion Vibber <brion@pobox.com> - * https://www.mediawiki.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup UtfNormal - */ - - -use UtfNormal\Utils; -/** - * Return UTF-8 sequence for a given Unicode code point. - * - * @param $codepoint Integer: - * @return String - * @throws InvalidArgumentException if fed out of range data. - * @public - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function codepointToUtf8( $codepoint ) { - return Utils::codepointToUtf8( $codepoint ); -} - -/** - * Take a series of space-separated hexadecimal numbers representing - * Unicode code points and return a UTF-8 string composed of those - * characters. Used by UTF-8 data generation and testing routines. - * - * @param $sequence String - * @return String - * @throws InvalidArgumentException if fed out of range data. - * @private - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function hexSequenceToUtf8( $sequence ) { - return Utils::hexSequenceToUtf8( $sequence ); -} - -/** - * Take a UTF-8 string and return a space-separated series of hex - * numbers representing Unicode code points. For debugging. - * - * @fixme this is private but extensions + maint scripts are using it - * @param string $str UTF-8 string. - * @return string - * @private - */ -function utf8ToHexSequence( $str ) { - $buf = ''; - foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) { - $buf .= sprintf( '%04x ', UtfNormal\Utils::utf8ToCodepoint( $cp ) ); - } - - return rtrim( $buf ); -} - -/** - * Determine the Unicode codepoint of a single-character UTF-8 sequence. - * Does not check for invalid input data. - * - * @param $char String - * @return Integer - * @public - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function utf8ToCodepoint( $char ) { - return Utils::utf8ToCodepoint( $char ); -} - -/** - * Escape a string for inclusion in a PHP single-quoted string literal. - * - * @param string $string string to be escaped. - * @return String: escaped string. - * @public - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function escapeSingleString( $string ) { - return Utils::escapeSingleString( $string ); -} |