From cdafed9759bbff5952f09e5a3d866f24fba57104 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 1 May 2013 08:56:50 +0200 Subject: Update to MediaWiki 1.20.5 --- includes/upload/UploadBase.php | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'includes/upload') diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php index d40b53d3..3a5733ca 100644 --- a/includes/upload/UploadBase.php +++ b/includes/upload/UploadBase.php @@ -46,6 +46,8 @@ abstract class UploadBase { protected $mBlackListedExtensions; protected $mJavaDetected; + protected static $safeXmlEncodings = array( 'UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'UTF-16', 'UTF-32' ); + const SUCCESS = 0; const OK = 0; const EMPTY_FILE = 3; @@ -966,6 +968,15 @@ abstract class UploadBase { return true; } + // Some browsers will interpret obscure xml encodings as UTF-8, while + // PHP/expat will interpret the given encoding in the xml declaration (bug 47304) + if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 ) { + if ( self::checkXMLEncodingMissmatch( $file ) ) { + wfProfileOut( __METHOD__ ); + return true; + } + } + /** * Internet Explorer for Windows performs some really stupid file type * autodetection which can cause it to interpret valid image files as HTML @@ -1037,6 +1048,62 @@ abstract class UploadBase { return false; } + + /** + * Check a whitelist of xml encodings that are known not to be interpreted differently + * by the server's xml parser (expat) and some common browsers. + * + * @param string $file pathname to the temporary upload file + * @return Boolean: true if the file contains an encoding that could be misinterpreted + */ + public static function checkXMLEncodingMissmatch( $file ) { + global $wgSVGMetadataCutoff; + $contents = file_get_contents( $file, false, null, -1, $wgSVGMetadataCutoff ); + $encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si'; + + if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) { + if ( preg_match( $encodingRegex, $matches[1], $encMatch ) + && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings ) + ) { + wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" ); + return true; + } + } elseif ( preg_match( "!<\?xml\b!si", $contents ) ) { + // Start of XML declaration without an end in the first $wgSVGMetadataCutoff + // bytes. There shouldn't be a legitimate reason for this to happen. + wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" ); + return true; + } elseif ( substr( $contents, 0, 4) == "\x4C\x6F\xA7\x94" ) { + // EBCDIC encoded XML + wfDebug( __METHOD__ . ": EBCDIC Encoded XML\n" ); + return true; + } + + // It's possible the file is encoded with multi-byte encoding, so re-encode attempt to + // detect the encoding in case is specifies an encoding not whitelisted in self::$safeXmlEncodings + $attemptEncodings = array( 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' ); + foreach ( $attemptEncodings as $encoding ) { + wfSuppressWarnings(); + $str = iconv( $encoding, 'UTF-8', $contents ); + wfRestoreWarnings(); + if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) { + if ( preg_match( $encodingRegex, $matches[1], $encMatch ) + && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings ) + ) { + wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" ); + return true; + } + } elseif ( $str != '' && preg_match( "!<\?xml\b!si", $str ) ) { + // Start of XML declaration without an end in the first $wgSVGMetadataCutoff + // bytes. There shouldn't be a legitimate reason for this to happen. + wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" ); + return true; + } + } + + return false; + } + /** * @param $filename string * @return bool -- cgit v1.2.2