summaryrefslogtreecommitdiff
path: root/includes/title/MediaWikiTitleCodec.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/title/MediaWikiTitleCodec.php')
-rw-r--r--includes/title/MediaWikiTitleCodec.php34
1 files changed, 32 insertions, 2 deletions
diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php
index 6ca0799c..20034b74 100644
--- a/includes/title/MediaWikiTitleCodec.php
+++ b/includes/title/MediaWikiTitleCodec.php
@@ -31,6 +31,7 @@
* via parseTitle() or from a (semi)trusted source, such as the database.
*
* @see https://www.mediawiki.org/wiki/Requests_for_comment/TitleValue
+ * @since 1.23
*/
class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
/**
@@ -229,7 +230,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
);
$dbkey = trim( $dbkey, '_' );
- if ( strpos( $dbkey, UTF8_REPLACEMENT ) !== false ) {
+ if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
# Contained illegal UTF-8 sequences or forbidden Unicode chars.
throw new MalformedTitleException( 'Bad UTF-8 sequences found in title: ' . $text );
}
@@ -322,7 +323,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
}
# Reject illegal characters.
- $rxTc = Title::getTitleInvalidRegex();
+ $rxTc = self::getTitleInvalidRegex();
if ( preg_match( $rxTc, $dbkey ) ) {
throw new MalformedTitleException( 'Illegal characters found in title: ' . $text );
}
@@ -397,4 +398,33 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser {
return $parts;
}
+
+ /**
+ * Returns a simple regex that will match on characters and sequences invalid in titles.
+ * Note that this doesn't pick up many things that could be wrong with titles, but that
+ * replacing this regex with something valid will make many titles valid.
+ * Previously Title::getTitleInvalidRegex()
+ *
+ * @return string Regex string
+ * @since 1.25
+ */
+ public static function getTitleInvalidRegex() {
+ static $rxTc = false;
+ if ( !$rxTc ) {
+ # Matching titles will be held as illegal.
+ $rxTc = '/' .
+ # Any character not allowed is forbidden...
+ '[^' . Title::legalChars() . ']' .
+ # URL percent encoding sequences interfere with the ability
+ # to round-trip titles -- you can't link to them consistently.
+ '|%[0-9A-Fa-f]{2}' .
+ # XML/HTML character references produce similar issues.
+ '|&[A-Za-z0-9\x80-\xff]+;' .
+ '|&#[0-9]+;' .
+ '|&#x[0-9A-Fa-f]+;' .
+ '/S';
+ }
+
+ return $rxTc;
+ }
}