From 086ae52d12011746a75f5588e877347bc0457352 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Fri, 21 Mar 2008 11:49:34 +0100 Subject: Update auf MediaWiki 1.12.0 --- includes/MimeMagic.php | 318 ++++++++++++++++++++++--------------------------- 1 file changed, 145 insertions(+), 173 deletions(-) (limited to 'includes/MimeMagic.php') diff --git a/includes/MimeMagic.php b/includes/MimeMagic.php index 264a3595..2ca5892f 100644 --- a/includes/MimeMagic.php +++ b/includes/MimeMagic.php @@ -24,8 +24,9 @@ image/jpeg jpeg jpg jpe image/png png image/svg+xml image/svg svg image/tiff tiff tif -image/vnd.djvu djvu +image/vnd.djvu image/x.djvu image/x-djvu djvu image/x-portable-pixmap ppm +image/x-xcf xcf text/plain txt text/html html htm video/ogg ogm ogg @@ -54,6 +55,7 @@ image/png [BITMAP] image/svg+xml [DRAWING] image/tiff [BITMAP] image/vnd.djvu [BITMAP] +image/x-xcf [BITMAP] image/x-portable-pixmap [BITMAP] text/plain [TEXT] text/html [TEXT] @@ -351,10 +353,17 @@ class MimeMagic { */ function isRecognizableExtension( $extension ) { static $types = array( + // Types recognized by getimagesize() 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd', 'bmp', 'tiff', 'tif', 'jpc', 'jp2', 'jpx', 'jb2', 'swc', 'iff', 'wbmp', - 'xbm', 'djvu' + 'xbm', + + // Formats we recognize magic numbers for + 'djvu', 'ogg', 'mid', 'pdf', 'wmf', 'xcf', + + // XML formats we sure hope we recognize reliably + 'svg', ); return in_array( strtolower( $extension ), $types ); } @@ -371,8 +380,22 @@ class MimeMagic { * @return string the mime type of $file */ function guessMimeType( $file, $ext = true ) { - $mime = $this->detectMimeType( $file, $ext ); + $mime = $this->doGuessMimeType( $file, $ext ); + + if( !$mime ) { + wfDebug( __METHOD__.": internal type detection failed for $file (.$ext)...\n" ); + $mime = $this->detectMimeType( $file, $ext ); + } + + if ( isset( $this->mMimeTypeAliases[$mime] ) ) { + $mime = $this->mMimeTypeAliases[$mime]; + } + wfDebug(__METHOD__.": final mime type of $file: $mime\n"); + return $mime; + } + + function doGuessMimeType( $file, $ext = true ) { // Read a chunk of the file wfSuppressWarnings(); $f = fopen( $file, "rt" ); @@ -381,128 +404,132 @@ class MimeMagic { $head = fread( $f, 1024 ); fclose( $f ); - $sub4 = substr( $head, 0, 4 ); - if ( $sub4 == "\x01\x00\x09\x00" || $sub4 == "\xd7\xcd\xc6\x9a" ) { - // WMF kill kill kill + // Hardcode a few magic number checks... + $headers = array( + // Multimedia... + 'MThd' => 'audio/midi', + 'OggS' => 'application/ogg', + + // Image formats... // Note that WMF may have a bare header, no magic number. - // The former of the above two checks is theoretically prone to false positives - $mime = "application/x-msmetafile"; + "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives? + "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile', + '%PDF' => 'application/pdf', + 'gimp xcf' => 'image/x-xcf', + + // Some forbidden fruit... + 'MZ' => 'application/octet-stream', // DOS/Windows executable + "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary + "\x7fELF" => 'application/octet-stream', // ELF binary + ); + + foreach( $headers as $magic => $candidate ) { + if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { + wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" ); + return $candidate; + } } - if ( strpos( $mime, "text/" ) === 0 || $mime === "application/xml" ) { - - $xml_type = NULL; - $script_type = NULL; - - /* - * look for XML formats (XHTML and SVG) - */ - if ($mime === "text/sgml" || - $mime === "text/plain" || - $mime === "text/html" || - $mime === "text/xml" || - $mime === "application/xml") { - - if ( substr( $head, 0, 5 ) == "%sim', - $head, $match ) ) { - $doctype = $match[1]; - } - if ( preg_match( '%<(\w+).*>%sim', $head, $match ) ) { - $tag = $match[1]; - } - - #print "
ANALYSING $file ($mime): doctype= $doctype; tag= $tag
"; - - if ( strpos( $doctype, "-//W3C//DTD SVG" ) === 0 ) { - $mime = "image/svg+xml"; - } elseif ( $tag === "svg" ) { - $mime = "image/svg+xml"; - } elseif ( strpos( $doctype, "-//W3C//DTD XHTML" ) === 0 ) { - $mime = "text/html"; - } elseif ( $tag === "html" ) { - $mime = "text/html"; - } - } + /* + * look for PHP + * Check for this before HTML/XML... + * Warning: this is a heuristic, and won't match a file with a lot of non-PHP before. + * It will also match text files which could be PHP. :) + */ + if( ( strpos( $head, 'wellFormed ) { + $types = array( + 'http://www.w3.org/2000/svg:svg' => 'image/svg+xml', + 'svg' => 'image/svg+xml', + 'http://www.w3.org/1999/xhtml:html' => 'text/html', // application/xhtml+xml? + 'html' => 'text/html', // application/xhtml+xml? + ); + if( isset( $types[$xml->rootElement] ) ) { + $mime = $types[$xml->rootElement]; + return $mime; + } else { + /// Fixme -- this would be the place to allow additional XML type checks + return 'application/xml'; } + } - /* - * look for shell scripts - */ - if ( !$xml_type ) { - $script_type = NULL; - - # detect by shebang - if ( substr( $head, 0, 2) == "#!" ) { - $script_type = "ASCII"; - } elseif ( substr( $head, 0, 5) == "\xef\xbb\xbf#!" ) { - $script_type = "UTF-8"; - } elseif ( substr( $head, 0, 7) == "\xfe\xff\x00#\x00!" ) { - $script_type = "UTF-16BE"; - } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { - $script_type= "UTF-16LE"; - } - - if ( $script_type ) { - if ( $script_type !== "UTF-8" && $script_type !== "ASCII") { - $head = iconv( $script_type, "ASCII//IGNORE", $head); - } - - $match = array(); + /* + * look for shell scripts + */ + $script_type = NULL; + + # detect by shebang + if ( substr( $head, 0, 2) == "#!" ) { + $script_type = "ASCII"; + } elseif ( substr( $head, 0, 5) == "\xef\xbb\xbf#!" ) { + $script_type = "UTF-8"; + } elseif ( substr( $head, 0, 7) == "\xfe\xff\x00#\x00!" ) { + $script_type = "UTF-16BE"; + } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { + $script_type= "UTF-16LE"; + } - if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) { - $mime = "application/x-{$match[2]}"; + if ( $script_type ) { + if ( $script_type !== "UTF-8" && $script_type !== "ASCII") { + // Quick and dirty fold down to ASCII! + $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ); + $chars = unpack( $pack[$script_type], substr( $head, 2 ) ); + $head = ''; + foreach( $chars as $codepoint ) { + if( $codepoint < 128 ) { + $head .= chr( $codepoint ); + } else { + $head .= '?'; } } } - /* - * look for PHP - */ - if( !$xml_type && !$script_type ) { - - if( ( strpos( $head, 'mMimeTypeAliases[$mime] ) ) { - $mime = $this->mMimeTypeAliases[$mime]; + + wfSuppressWarnings(); + $gis = getimagesize( $file ); + wfRestoreWarnings(); + + if( $gis && isset( $gis['mime'] ) ) { + $mime = $gis['mime']; + wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" ); + return $mime; + } else { + return false; } - wfDebug(__METHOD__.": final mime type of $file: $mime\n"); - return $mime; + // Also test DjVu + $deja = new DjVuImage( $file ); + if( $deja->isValid() ) { + wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" ); + return 'image/vnd.djvu'; + } } /** Internal mime type detection, please use guessMimeType() for application code instead. @@ -559,15 +586,6 @@ class MimeMagic { # see http://www.php.net/manual/en/ref.mime-magic.php for details. $m = mime_content_type($file); - - if ( $m == 'text/plain' ) { - // mime_content_type sometimes considers DJVU files to be text/plain. - $deja = new DjVuImage( $file ); - if( $deja->isValid() ) { - wfDebug( __METHOD__.": (re)detected $file as image/vnd.djvu\n" ); - $m = 'image/vnd.djvu'; - } - } } else { wfDebug( __METHOD__.": no magic mime detector found!\n" ); } @@ -586,66 +604,20 @@ class MimeMagic { } } - # if still not known, use getimagesize to find out the type of image - # TODO: skip things that do not have a well-known image extension? Would that be safe? - wfSuppressWarnings(); - $gis = getimagesize( $file ); - wfRestoreWarnings(); - - $notAnImage = false; - - if ( $gis && is_array($gis) && $gis[2] ) { - - switch ( $gis[2] ) { - case IMAGETYPE_GIF: $m = "image/gif"; break; - case IMAGETYPE_JPEG: $m = "image/jpeg"; break; - case IMAGETYPE_PNG: $m = "image/png"; break; - case IMAGETYPE_SWF: $m = "application/x-shockwave-flash"; break; - case IMAGETYPE_PSD: $m = "application/photoshop"; break; - case IMAGETYPE_BMP: $m = "image/bmp"; break; - case IMAGETYPE_TIFF_II: $m = "image/tiff"; break; - case IMAGETYPE_TIFF_MM: $m = "image/tiff"; break; - case IMAGETYPE_JPC: $m = "image"; break; - case IMAGETYPE_JP2: $m = "image/jpeg2000"; break; - case IMAGETYPE_JPX: $m = "image/jpeg2000"; break; - case IMAGETYPE_JB2: $m = "image"; break; - case IMAGETYPE_SWC: $m = "application/x-shockwave-flash"; break; - case IMAGETYPE_IFF: $m = "image/vnd.xiff"; break; - case IMAGETYPE_WBMP: $m = "image/vnd.wap.wbmp"; break; - case IMAGETYPE_XBM: $m = "image/x-xbitmap"; break; - } - - if ( $m ) { - wfDebug( __METHOD__.": image mime type of $file: $m\n" ); - return $m; - } - else { - $notAnImage = true; - } - } else { - // Also test DjVu - $deja = new DjVuImage( $file ); - if( $deja->isValid() ) { - wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" ); - return 'image/vnd.djvu'; - } - } - # if desired, look at extension as a fallback. if ( $ext === true ) { $i = strrpos( $file, '.' ); $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); } if ( $ext ) { - $m = $this->guessTypesForExtension( $ext ); - - # TODO: if $notAnImage is set, do not trust the file extension if - # the results is one of the image types that should have been recognized - # by getimagesize - - if ( $m ) { - wfDebug( __METHOD__.": extension mime type of $file: $m\n" ); - return $m; + if( $this->isRecognizableExtension( $ext ) ) { + wfDebug( __METHOD__. ": refusing to guess mime type for .$ext file, we should have recognized it\n" ); + } else { + $m = $this->guessTypesForExtension( $ext ); + if ( $m ) { + wfDebug( __METHOD__.": extension mime type of $file: $m\n" ); + return $m; + } } } -- cgit v1.2.2