summaryrefslogtreecommitdiff
path: root/includes/media
diff options
context:
space:
mode:
Diffstat (limited to 'includes/media')
-rw-r--r--includes/media/BitmapMetadataHandler.php6
-rw-r--r--includes/media/JpegMetadataExtractor.php2
-rw-r--r--includes/media/XMP.php96
3 files changed, 100 insertions, 4 deletions
diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php
index dd41c388..1d790155 100644
--- a/includes/media/BitmapMetadataHandler.php
+++ b/includes/media/BitmapMetadataHandler.php
@@ -154,7 +154,7 @@ class BitmapMetadataHandler {
* @throws MWException On invalid file.
*/
static function Jpeg( $filename ) {
- $showXMP = function_exists( 'xml_parser_create_ns' );
+ $showXMP = XMPReader::isSupported();
$meta = new self();
$seg = JpegMetadataExtractor::segmentSplitter( $filename );
@@ -196,7 +196,7 @@ class BitmapMetadataHandler {
* @return array Array for storage in img_metadata.
*/
public static function PNG( $filename ) {
- $showXMP = function_exists( 'xml_parser_create_ns' );
+ $showXMP = XMPReader::isSupported();
$meta = new self();
$array = PNGMetadataExtractor::getMetadata( $filename );
@@ -236,7 +236,7 @@ class BitmapMetadataHandler {
$meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' );
}
- if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) {
+ if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) {
$xmp = new XMPReader();
$xmp->parse( $baseArray['xmp'] );
$xmpRes = $xmp->getResults();
diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php
index 8c5b46bb..aaa9930a 100644
--- a/includes/media/JpegMetadataExtractor.php
+++ b/includes/media/JpegMetadataExtractor.php
@@ -48,7 +48,7 @@ class JpegMetadataExtractor {
* @throws MWException If given invalid file.
*/
static function segmentSplitter( $filename ) {
- $showXMP = function_exists( 'xml_parser_create_ns' );
+ $showXMP = XMPReader::isSupported();
$segmentCount = 0;
diff --git a/includes/media/XMP.php b/includes/media/XMP.php
index cdbd5ab2..a3f45e6c 100644
--- a/includes/media/XMP.php
+++ b/includes/media/XMP.php
@@ -80,6 +80,12 @@ class XMPReader {
/** @var int */
private $extendedXMPOffset = 0;
+ /** @var int Flag determining if the XMP is safe to parse **/
+ private $parsable = 0;
+
+ /** @var string Buffer of XML to parse **/
+ private $xmlParsableBuffer = '';
+
/**
* These are various mode constants.
* they are used to figure out what to do
@@ -108,6 +114,12 @@ class XMPReader {
const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
const NS_XML = 'http://www.w3.org/XML/1998/namespace';
+ // States used while determining if XML is safe to parse
+ const PARSABLE_UNKNOWN = 0;
+ const PARSABLE_OK = 1;
+ const PARSABLE_BUFFERING = 2;
+ const PARSABLE_NO = 3;
+
/**
* Constructor.
*
@@ -145,6 +157,9 @@ class XMPReader {
array( $this, 'endElement' ) );
xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) );
+
+ $this->parsable = self::PARSABLE_UNKNOWN;
+ $this->xmlParsableBuffer = '';
}
/** Destroy the xml parser
@@ -156,6 +171,13 @@ class XMPReader {
xml_parser_free( $this->xmlParser );
}
+ /**
+ * Check if this instance supports using this class
+ */
+ public static function isSupported() {
+ return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' );
+ }
+
/** Get the result array. Do some post-processing before returning
* the array, and transform any metadata that is special-cased.
*
@@ -305,6 +327,27 @@ class XMPReader {
wfRestoreWarnings();
}
+ // Ensure the XMP block does not have an xml doctype declaration, which
+ // could declare entities unsafe to parse with xml_parse (T85848/T71210).
+ if ( $this->parsable !== self::PARSABLE_OK ) {
+ if ( $this->parsable === self::PARSABLE_NO ) {
+ throw new MWException( 'Unsafe doctype declaration in XML.' );
+ }
+
+ $content = $this->xmlParsableBuffer . $content;
+ if ( !$this->checkParseSafety( $content ) ) {
+ if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) {
+ // parse wasn't Unsuccessful yet, so return true
+ // in this case.
+ return true;
+ }
+ $msg = ( $this->parsable === self::PARSABLE_NO ) ?
+ 'Unsafe doctype declaration in XML.' :
+ 'No root element found in XML.';
+ throw new MWException( $msg );
+ }
+ }
+
$ok = xml_parse( $this->xmlParser, $content, $allOfIt );
if ( !$ok ) {
$error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
@@ -437,6 +480,59 @@ class XMPReader {
}
}
+ /**
+ * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't
+ * contain a doctype declaration which could contain a dos attack if we
+ * parse it and expand internal entities (T85848).
+ *
+ * @param string $content xml string to check for parse safety
+ * @return bool true if the xml is safe to parse, false otherwise
+ */
+ private function checkParseSafety( $content ) {
+ $reader = new XMLReader();
+ $result = null;
+
+ // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed
+ // instead of using XML().
+ $reader->open(
+ 'data://text/plain,' . urlencode( $content ),
+ null,
+ LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET
+ );
+
+ $oldDisable = libxml_disable_entity_loader( true );
+ $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false );
+
+ // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning
+ // when parsing truncated XML, which causes unit tests to fail.
+ wfSuppressWarnings();
+ while ( $reader->read() ) {
+ if ( $reader->nodeType === XMLReader::ELEMENT ) {
+ // Reached the first element without hitting a doctype declaration
+ $this->parsable = self::PARSABLE_OK;
+ $result = true;
+ break;
+ }
+ if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
+ $this->parsable = self::PARSABLE_NO;
+ $result = false;
+ break;
+ }
+ }
+ wfRestoreWarnings();
+ libxml_disable_entity_loader( $oldDisable );
+
+ if ( !is_null( $result ) ) {
+ return $result;
+ }
+
+ // Reached the end of the parsable xml without finding an element
+ // or doctype. Buffer and try again.
+ $this->parsable = self::PARSABLE_BUFFERING;
+ $this->xmlParsableBuffer = $content;
+ return false;
+ }
+
/** When we hit a closing element in MODE_IGNORE
* Check to see if this is the element we started to ignore,
* in which case we get out of MODE_IGNORE