summaryrefslogtreecommitdiff
path: root/includes/libs/XmlTypeCheck.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/libs/XmlTypeCheck.php')
-rw-r--r--includes/libs/XmlTypeCheck.php251
1 files changed, 161 insertions, 90 deletions
diff --git a/includes/libs/XmlTypeCheck.php b/includes/libs/XmlTypeCheck.php
index aca857e9..31a4e28a 100644
--- a/includes/libs/XmlTypeCheck.php
+++ b/includes/libs/XmlTypeCheck.php
@@ -2,6 +2,11 @@
/**
* XML syntax and type checker.
*
+ * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
+ * more control over the expansion of XML entities. When passed to the
+ * callback, entities will be fully expanded, but may report the XML is
+ * invalid if expanding the entities are likely to cause a DoS.
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -25,7 +30,7 @@ class XmlTypeCheck {
* Will be set to true or false to indicate whether the file is
* well-formed XML. Note that this doesn't check schema validity.
*/
- public $wellFormed = false;
+ public $wellFormed = null;
/**
* Will be set to true if the optional element filter returned
@@ -78,12 +83,7 @@ class XmlTypeCheck {
function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
$this->filterCallback = $filterCallback;
$this->parserOptions = array_merge( $this->parserOptions, $options );
-
- if ( $isFile ) {
- $this->validateFromFile( $input );
- } else {
- $this->validateFromString( $input );
- }
+ $this->validateFromInput( $input, $isFile );
}
/**
@@ -125,140 +125,211 @@ class XmlTypeCheck {
return $this->rootElement;
}
+
/**
- * Get an XML parser with the root element handler.
- * @see XmlTypeCheck::rootElementOpen()
- * @return resource a resource handle for the XML parser
+ * @param string $fname the filename
*/
- private function getParser() {
- $parser = xml_parser_create_ns( 'UTF-8' );
- // case folding violates XML standard, turn it off
- xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
- xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
- if ( $this->parserOptions['processing_instruction_handler'] ) {
- xml_set_processing_instruction_handler(
- $parser,
- array( $this, 'processingInstructionHandler' )
- );
+ private function validateFromInput( $xml, $isFile ) {
+ $reader = new XMLReader();
+ if ( $isFile ) {
+ $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+ } else {
+ $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+ }
+ if ( $s !== true ) {
+ // Couldn't open the XML
+ $this->wellFormed = false;
+ } else {
+ $oldDisable = libxml_disable_entity_loader( true );
+ $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
+ try {
+ $this->validate( $reader );
+ } catch ( Exception $e ) {
+ // Calling this malformed, because we didn't parse the whole
+ // thing. Maybe just an external entity refernce.
+ $this->wellFormed = false;
+ $reader->close();
+ libxml_disable_entity_loader( $oldDisable );
+ throw $e;
+ }
+ $reader->close();
+ libxml_disable_entity_loader( $oldDisable );
}
- return $parser;
}
- /**
- * @param string $fname the filename
- */
- private function validateFromFile( $fname ) {
- $parser = $this->getParser();
-
- if ( file_exists( $fname ) ) {
- $file = fopen( $fname, "rb" );
- if ( $file ) {
- do {
- $chunk = fread( $file, 32768 );
- $ret = xml_parse( $parser, $chunk, feof( $file ) );
- if ( $ret == 0 ) {
- $this->wellFormed = false;
- fclose( $file );
- xml_parser_free( $parser );
- return;
+ private function readNext( XMLReader $reader ) {
+ set_error_handler( array( $this, 'XmlErrorHandler' ) );
+ $ret = $reader->read();
+ restore_error_handler();
+ return $ret;
+ }
+
+ public function XmlErrorHandler( $errno, $errstr ) {
+ $this->wellFormed = false;
+ }
+
+ private function validate( $reader ) {
+
+ // First, move through anything that isn't an element, and
+ // handle any processing instructions with the callback
+ do {
+ if( !$this->readNext( $reader ) ) {
+ // Hit the end of the document before any elements
+ $this->wellFormed = false;
+ return;
+ }
+ if ( $reader->nodeType === XMLReader::PI ) {
+ $this->processingInstructionHandler( $reader->name, $reader->value );
+ }
+ } while ( $reader->nodeType != XMLReader::ELEMENT );
+
+ // Process the rest of the document
+ do {
+ switch ( $reader->nodeType ) {
+ case XMLReader::ELEMENT:
+ $name = $this->expandNS(
+ $reader->name,
+ $reader->namespaceURI
+ );
+ if ( $this->rootElement === '' ) {
+ $this->rootElement = $name;
}
- } while ( !feof( $file ) );
+ $empty = $reader->isEmptyElement;
+ $attrs = $this->getAttributesArray( $reader );
+ $this->elementOpen( $name, $attrs );
+ if ( $empty ) {
+ $this->elementClose();
+ }
+ break;
+
+ case XMLReader::END_ELEMENT:
+ $this->elementClose();
+ break;
+
+ case XMLReader::WHITESPACE:
+ case XMLReader::SIGNIFICANT_WHITESPACE:
+ case XMLReader::CDATA:
+ case XMLReader::TEXT:
+ $this->elementData( $reader->value );
+ break;
- fclose( $file );
+ case XMLReader::ENTITY_REF:
+ // Unexpanded entity (maybe external?),
+ // don't send to the filter (xml_parse didn't)
+ break;
+
+ case XMLReader::COMMENT:
+ // Don't send to the filter (xml_parse didn't)
+ break;
+
+ case XMLReader::PI:
+ // Processing instructions can happen after the header too
+ $this->processingInstructionHandler(
+ $reader->name,
+ $reader->value
+ );
+ break;
+ default:
+ // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
+ // NOTATION, or XML_DECLARATION
+ // xml_parse didn't send these to the filter, so we won't.
}
+
+ } while ( $this->readNext( $reader ) );
+
+ if ( $this->stackDepth !== 0 ) {
+ $this->wellFormed = false;
+ } elseif ( $this->wellFormed === null ) {
+ $this->wellFormed = true;
}
- $this->wellFormed = true;
- xml_parser_free( $parser );
}
/**
- *
- * @param string $string the XML-input-string to be checked.
+ * Get all of the attributes for an XMLReader's current node
+ * @param $r XMLReader
+ * @return array of attributes
*/
- private function validateFromString( $string ) {
- $parser = $this->getParser();
- $ret = xml_parse( $parser, $string, true );
- xml_parser_free( $parser );
- if ( $ret == 0 ) {
- $this->wellFormed = false;
- return;
+ private function getAttributesArray( XMLReader $r ) {
+ $attrs = array();
+ while ( $r->moveToNextAttribute() ) {
+ if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
+ // XMLReader treats xmlns attributes as normal
+ // attributes, while xml_parse doesn't
+ continue;
+ }
+ $name = $this->expandNS( $r->name, $r->namespaceURI );
+ $attrs[$name] = $r->value;
}
- $this->wellFormed = true;
+ return $attrs;
}
/**
- * @param $parser
- * @param $name
- * @param $attribs
+ * @param $name element or attribute name, maybe with a full or short prefix
+ * @param $namespaceURI the namespaceURI
+ * @return string the name prefixed with namespaceURI
*/
- private function rootElementOpen( $parser, $name, $attribs ) {
- $this->rootElement = $name;
-
- if ( is_callable( $this->filterCallback ) ) {
- xml_set_element_handler(
- $parser,
- array( $this, 'elementOpen' ),
- array( $this, 'elementClose' )
- );
- xml_set_character_data_handler( $parser, array( $this, 'elementData' ) );
- $this->elementOpen( $parser, $name, $attribs );
- } else {
- // We only need the first open element
- xml_set_element_handler( $parser, false, false );
+ private function expandNS( $name, $namespaceURI ) {
+ if ( $namespaceURI ) {
+ $parts = explode( ':', $name );
+ $localname = array_pop( $parts );
+ return "$namespaceURI:$localname";
}
+ return $name;
}
/**
- * @param $parser
* @param $name
* @param $attribs
*/
- private function elementOpen( $parser, $name, $attribs ) {
+ private function elementOpen( $name, $attribs ) {
$this->elementDataContext[] = array( $name, $attribs );
$this->elementData[] = '';
$this->stackDepth++;
}
/**
- * @param $parser
- * @param $name
*/
- private function elementClose( $parser, $name ) {
+ private function elementClose() {
list( $name, $attribs ) = array_pop( $this->elementDataContext );
$data = array_pop( $this->elementData );
$this->stackDepth--;
- if ( call_user_func(
- $this->filterCallback,
- $name,
- $attribs,
- $data
- ) ) {
- // Filter hit!
+ if ( is_callable( $this->filterCallback )
+ && call_user_func(
+ $this->filterCallback,
+ $name,
+ $attribs,
+ $data
+ )
+ ) {
+ // Filter hit
$this->filterMatch = true;
}
}
/**
- * @param $parser
* @param $data
*/
- private function elementData( $parser, $data ) {
- // xml_set_character_data_handler breaks the data on & characters, so
- // we collect any data here, and we'll run the callback in elementClose
+ private function elementData( $data ) {
+ // Collect any data here, and we'll run the callback in elementClose
$this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
}
/**
- * @param $parser
* @param $target
* @param $data
*/
- private function processingInstructionHandler( $parser, $target, $data ) {
- if ( call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data ) ) {
- // Filter hit!
- $this->filterMatch = true;
+ private function processingInstructionHandler( $target, $data ) {
+ if ( $this->parserOptions['processing_instruction_handler'] ) {
+ if ( call_user_func(
+ $this->parserOptions['processing_instruction_handler'],
+ $target,
+ $data
+ ) ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ }
}
}
}