summaryrefslogtreecommitdiff
path: root/includes/Sanitizer.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/Sanitizer.php')
-rw-r--r--includes/Sanitizer.php47
1 files changed, 34 insertions, 13 deletions
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index b5725ce8..26837b3c 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -726,29 +726,35 @@ class Sanitizer {
/**
* Pick apart some CSS and check it for forbidden or unsafe structures.
- * Returns a sanitized string, or false if it was just too evil.
+ * Returns a sanitized string. This sanitized string will have
+ * character references and escape sequences decoded, and comments
+ * stripped. If the input is just too evil, only a comment complaining
+ * about evilness will be returned.
*
* Currently URL references, 'expression', 'tps' are forbidden.
*
+ * NOTE: Despite the fact that character references are decoded, the
+ * returned string may contain character references given certain
+ * clever input strings. These character references must
+ * be escaped before the return value is embedded in HTML.
+ *
* @param $value String
- * @return Mixed
+ * @return String
*/
static function checkCss( $value ) {
+ // Decode character references like {
$value = Sanitizer::decodeCharReferences( $value );
- // Remove any comments; IE gets token splitting wrong
- $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
-
- // Remove anything after a comment-start token, to guard against
- // incorrect client implementations.
- $commentPos = strpos( $value, '/*' );
- if ( $commentPos !== false ) {
- $value = substr( $value, 0, $commentPos );
- }
-
// Decode escape sequences and line continuation
// See the grammar in the CSS 2 spec, appendix D.
- static $decodeRegex, $reencodeTable;
+ // This has to be done AFTER decoding character references.
+ // This means it isn't possible for this function to return
+ // unsanitized escape sequences. It is possible to manufacture
+ // input that contains character references that decode to
+ // escape sequences that decode to character references, but
+ // it's OK for the return value to contain character references
+ // because the caller is supposed to escape those anyway.
+ static $decodeRegex;
if ( !$decodeRegex ) {
$space = '[\\x20\\t\\r\\n\\f]';
$nl = '(?:\\n|\\r\\n|\\r|\\f)';
@@ -763,6 +769,21 @@ class Sanitizer {
}
$value = preg_replace_callback( $decodeRegex,
array( __CLASS__, 'cssDecodeCallback' ), $value );
+
+ // Remove any comments; IE gets token splitting wrong
+ // This must be done AFTER decoding character references and
+ // escape sequences, because those steps can introduce comments
+ // This step cannot introduce character references or escape
+ // sequences, because it replaces comments with spaces rather
+ // than removing them completely.
+ $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
+
+ // Remove anything after a comment-start token, to guard against
+ // incorrect client implementations.
+ $commentPos = strpos( $value, '/*' );
+ if ( $commentPos !== false ) {
+ $value = substr( $value, 0, $commentPos );
+ }
// Reject problematic keywords and control characters
if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) {