summaryrefslogtreecommitdiff
path: root/includes/XmlTypeCheck.php
blob: 639d1f85313a030b81cd417a9888b212a5d6b176 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
<?php

class XmlTypeCheck {
	/**
	 * Will be set to true or false to indicate whether the file is
	 * well-formed XML. Note that this doesn't check schema validity.
	 */
	public $wellFormed = false;
	
	/**
	 * Name of the document's root element, including any namespace
	 * as an expanded URL.
	 */
	public $rootElement = '';
	
	private $softNamespaces;
	private $namespaces = array();
	
	/**
	 * @param $file string filename
	 * @param $softNamespaces bool
	 *        If set to true, use of undeclared XML namespaces will be ignored.
	 *        This matches the behavior of rsvg, but more compliant consumers
	 *        such as Firefox will reject such files.
	 *        Leave off for the default, stricter checks.
	 */
	function __construct( $file, $softNamespaces=false ) {
		$this->softNamespaces = $softNamespaces;
		$this->run( $file );
	}
	
	private function run( $fname ) {
		if( $this->softNamespaces ) {
			$parser = xml_parser_create( 'UTF-8' );
		} else {
			$parser = xml_parser_create_ns( 'UTF-8' );
		}
		
		// case folding violates XML standard, turn it off
		xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
		
		xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );

		$file = fopen( $fname, "rb" );
		do {
			$chunk = fread( $file, 32768 );
			$ret = xml_parse( $parser, $chunk, feof( $file ) );
			if( $ret == 0 ) {
				// XML isn't well-formed!
				fclose( $file );
				xml_parser_free( $parser );
				return;
			}
		} while( !feof( $file ) );
		
		$this->wellFormed = true;
		
		fclose( $file );
		xml_parser_free( $parser );
	}

	private function elementOpen( $parser, $name, $attribs ) {
		if( $this->softNamespaces ) {
			// Check namespaces manually, so expat doesn't throw
			// errors on use of undeclared namespaces.
			foreach( $attribs as $attrib => $val ) {
				if( $attrib == 'xmlns' ) {
					$this->namespaces[''] = $val;
				} elseif( substr( $attrib, 0, strlen( 'xmlns:' ) ) == 'xmlns:' ) {
					$this->namespaces[substr( $attrib, strlen( 'xmlns:' ) )] = $val;
				}
			}
			
			if( strpos( $name, ':' ) === false ) {
				$ns = '';
				$subname = $name;
			} else {
				list( $ns, $subname ) = explode( ':', $name, 2 );
			}
			
			if( isset( $this->namespaces[$ns] ) ) {
				$name = $this->namespaces[$ns] . ':' . $subname;
			} else {
				// Technically this is invalid for XML with Namespaces.
				// But..... we'll just let it slide in soft mode.
			}
		}
		
		// We only need the first open element
		$this->rootElement = $name;
		xml_set_element_handler( $parser, false, false );
	}
}