Update to MediaWiki 1.20.5

author: Pierre Schmitz <pierre@archlinux.de> 2013-05-01 08:56:50 +0200
committer: Pierre Schmitz <pierre@archlinux.de> 2013-05-01 08:56:50 +0200
commit: cdafed9759bbff5952f09e5a3d866f24fba57104 (patch)
tree: 009e2e480b45e5cfd52051b964a60d52a2a7c6df /includes/upload
parent: f7253921201bcf43d385440317ab279fb83a4658 (diff)
1 files changed, 67 insertions, 0 deletions
diff --git a/includes/upload/UploadBase.php b/includes/upload/UploadBase.php
index d40b53d3..3a5733ca 100644
--- a/includes/upload/UploadBase.php
+++ b/includes/upload/UploadBase.php
@@ -46,6 +46,8 @@ abstract class UploadBase {
 	protected $mBlackListedExtensions;
 	protected $mJavaDetected;
 
+	protected static $safeXmlEncodings = array( 'UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'UTF-16', 'UTF-32' );
+
 	const SUCCESS = 0;
 	const OK = 0;
 	const EMPTY_FILE = 3;
@@ -966,6 +968,15 @@ abstract class UploadBase {
 			return true;
 		}
 
+		// Some browsers will interpret obscure xml encodings as UTF-8, while
+		// PHP/expat will interpret the given encoding in the xml declaration (bug 47304)
+		if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 ) {
+			if ( self::checkXMLEncodingMissmatch( $file ) ) {
+				wfProfileOut( __METHOD__ );
+				return true;
+			}
+		}
+
 		/**
 		 * Internet Explorer for Windows performs some really stupid file type
 		 * autodetection which can cause it to interpret valid image files as HTML
@@ -1037,6 +1048,62 @@ abstract class UploadBase {
 		return false;
 	}
 
+
+	/**
+	 * Check a whitelist of xml encodings that are known not to be interpreted differently
+	 * by the server's xml parser (expat) and some common browsers.
+	 *
+	 * @param string $file pathname to the temporary upload file
+	 * @return Boolean: true if the file contains an encoding that could be misinterpreted
+	 */
+	public static function checkXMLEncodingMissmatch( $file ) {
+		global $wgSVGMetadataCutoff;
+		$contents = file_get_contents( $file, false, null, -1, $wgSVGMetadataCutoff );
+		$encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
+
+		if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
+			if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+				&& !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+			) {
+				wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+				return true;
+			}
+		} elseif ( preg_match( "!<\?xml\b!si", $contents ) ) {
+			// Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+			// bytes. There shouldn't be a legitimate reason for this to happen.
+			wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+			return true;
+		} elseif ( substr( $contents, 0, 4) == "\x4C\x6F\xA7\x94" ) {
+			// EBCDIC encoded XML
+			wfDebug( __METHOD__ . ": EBCDIC Encoded XML\n" );
+			return true;
+		}
+
+		// It's possible the file is encoded with multi-byte encoding, so re-encode attempt to
+		// detect the encoding in case is specifies an encoding not whitelisted in self::$safeXmlEncodings
+		$attemptEncodings = array( 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' );
+		foreach ( $attemptEncodings as $encoding ) {
+			wfSuppressWarnings();
+			$str = iconv( $encoding, 'UTF-8', $contents );
+			wfRestoreWarnings();
+			if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches )	) {
+				if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+					&& !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+				) {
+					wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+					return true;
+				}
+			} elseif ( $str != '' && preg_match( "!<\?xml\b!si", $str ) ) {
+				// Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+				// bytes. There shouldn't be a legitimate reason for this to happen.
+				wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+				return true;
+			}
+		}
+
+		return false;
+	}
+
 	/**
 	 * @param $filename string
 	 * @return bool
author	Pierre Schmitz <pierre@archlinux.de>	2013-05-01 08:56:50 +0200
committer	Pierre Schmitz <pierre@archlinux.de>	2013-05-01 08:56:50 +0200
commit	cdafed9759bbff5952f09e5a3d866f24fba57104 (patch)
tree	009e2e480b45e5cfd52051b964a60d52a2a7c6df /includes/upload
parent	f7253921201bcf43d385440317ab279fb83a4658 (diff)