summaryrefslogtreecommitdiff
path: root/includes/parser/Preprocessor_DOM.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/parser/Preprocessor_DOM.php')
-rw-r--r--includes/parser/Preprocessor_DOM.php249
1 files changed, 193 insertions, 56 deletions
diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php
index 2b635f7c..755563a0 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -5,12 +5,18 @@
* @file
* @ingroup Parser
*/
-
+
/**
* @ingroup Parser
*/
class Preprocessor_DOM implements Preprocessor {
- var $parser, $memoryLimit;
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ var $memoryLimit;
const CACHE_VERSION = 1;
@@ -27,21 +33,31 @@ class Preprocessor_DOM implements Preprocessor {
}
}
+ /**
+ * @return PPFrame_DOM
+ */
function newFrame() {
return new PPFrame_DOM( $this );
}
+ /**
+ * @param $args
+ * @return PPCustomFrame_DOM
+ */
function newCustomFrame( $args ) {
return new PPCustomFrame_DOM( $this, $args );
}
+ /**
+ * @param $values
+ * @return PPNode_DOM
+ */
function newPartNodeArray( $values ) {
//NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
- $xml = "";
- $xml .= "<list>";
+ $xml = "<list>";
foreach ( $values as $k => $val ) {
-
+
if ( is_int( $k ) ) {
$xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) ."</value></part>";
} else {
@@ -59,6 +75,10 @@ class Preprocessor_DOM implements Preprocessor {
return $node;
}
+ /**
+ * @throws MWException
+ * @return bool
+ */
function memCheck() {
if ( $this->memoryLimit === false ) {
return;
@@ -91,14 +111,15 @@ class Preprocessor_DOM implements Preprocessor {
* cache may be implemented at a later date which takes further advantage of these strict
* dependency requirements.
*
- * @private
+ * @return PPNode_DOM
*/
function preprocessToObj( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
global $wgMemc, $wgPreprocessorCacheThreshold;
-
+
$xml = false;
- $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+ $cacheable = ( $wgPreprocessorCacheThreshold !== false
+ && strlen( $text ) > $wgPreprocessorCacheThreshold );
if ( $cacheable ) {
wfProfileIn( __METHOD__.'-cacheable' );
@@ -134,7 +155,8 @@ class Preprocessor_DOM implements Preprocessor {
if ( !$result ) {
// Try running the XML through UtfNormal to get rid of invalid characters
$xml = UtfNormal::cleanUp( $xml );
- $result = $dom->loadXML( $xml );
+ // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep
+ $result = $dom->loadXML( $xml, 1 << 19 );
if ( !$result ) {
throw new MWException( __METHOD__.' generated invalid XML' );
}
@@ -147,7 +169,12 @@ class Preprocessor_DOM implements Preprocessor {
wfProfileOut( __METHOD__ );
return $obj;
}
-
+
+ /**
+ * @param $text string
+ * @param $flags int
+ * @return string
+ */
function preprocessToXml( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
$rules = array(
@@ -317,7 +344,7 @@ class Preprocessor_DOM implements Preprocessor {
// Search backwards for leading whitespace
$wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
// Search forwards for trailing whitespace
- // $wsEnd will be the position of the last space
+ // $wsEnd will be the position of the last space (or the '>' if there's none)
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
@@ -344,13 +371,11 @@ class Preprocessor_DOM implements Preprocessor {
if ( $stack->top ) {
$part = $stack->top->getCurrentPart();
- if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
- // Comments abutting, no change in visual end
- $part->commentEnd = $wsEnd;
- } else {
+ if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) {
$part->visualEnd = $wsStart;
- $part->commentEnd = $endPos;
}
+ // Else comments abutting, no change in visual end
+ $part->commentEnd = $endPos;
}
$i = $endPos + 1;
$inner = substr( $text, $startPos, $endPos - $startPos + 1 );
@@ -389,8 +414,8 @@ class Preprocessor_DOM implements Preprocessor {
} else {
$attrEnd = $tagEndPos;
// Find closing tag
- if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
- $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
{
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
$i = $matches[0][1] + strlen( $matches[0][0] );
@@ -423,9 +448,7 @@ class Preprocessor_DOM implements Preprocessor {
$accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
}
$accum .= $close . '</ext>';
- }
-
- elseif ( $found == 'line-start' ) {
+ } elseif ( $found == 'line-start' ) {
// Is this the start of a heading?
// Line break belongs before the heading element in any case
if ( $fakeLineStart ) {
@@ -453,9 +476,7 @@ class Preprocessor_DOM implements Preprocessor {
extract( $flags );
$i += $count;
}
- }
-
- elseif ( $found == 'line-end' ) {
+ } elseif ( $found == 'line-end' ) {
$piece = $stack->top;
// A heading must be open, otherwise \n wouldn't have been in the search list
assert( $piece->open == "\n" );
@@ -522,7 +543,7 @@ class Preprocessor_DOM implements Preprocessor {
'open' => $curChar,
'close' => $rule['end'],
'count' => $count,
- 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
+ 'lineStart' => ($i == 0 || $text[$i-1] == "\n"),
);
$stack->push( $piece );
@@ -557,7 +578,7 @@ class Preprocessor_DOM implements Preprocessor {
}
}
- if ($matchingCount <= 0) {
+ if ( $matchingCount <= 0 ) {
# No matching element found in callback array
# Output a literal closing brace and continue
$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
@@ -607,7 +628,7 @@ class Preprocessor_DOM implements Preprocessor {
$accum =& $stack->getAccum();
# Re-add the old stack element if it still has unmatched opening characters remaining
- if ($matchingCount < $piece->count) {
+ if ( $matchingCount < $piece->count ) {
$piece->parts = array( new PPDPart );
$piece->count -= $matchingCount;
# do we still qualify for any callback with remaining count?
@@ -630,16 +651,12 @@ class Preprocessor_DOM implements Preprocessor {
# Add XML element to the enclosing accumulator
$accum .= $element;
- }
-
- elseif ( $found == 'pipe' ) {
+ } elseif ( $found == 'pipe' ) {
$findEquals = true; // shortcut for getFlags()
$stack->addPart();
$accum =& $stack->getAccum();
++$i;
- }
-
- elseif ( $found == 'equals' ) {
+ } elseif ( $found == 'equals' ) {
$findEquals = false; // shortcut for getFlags()
$stack->getCurrentPart()->eqpos = strlen( $accum );
$accum .= '=';
@@ -655,7 +672,7 @@ class Preprocessor_DOM implements Preprocessor {
$xml = $stack->rootAccum;
wfProfileOut( __METHOD__ );
-
+
return $xml;
}
}
@@ -665,7 +682,12 @@ class Preprocessor_DOM implements Preprocessor {
* @ingroup Parser
*/
class PPDStack {
- var $stack, $rootAccum, $top;
+ var $stack, $rootAccum;
+
+ /**
+ * @var PPDStack
+ */
+ var $top;
var $out;
var $elementClass = 'PPDStackElement';
@@ -678,6 +700,9 @@ class PPDStack {
$this->accum =& $this->rootAccum;
}
+ /**
+ * @return int
+ */
function count() {
return count( $this->stack );
}
@@ -726,6 +751,9 @@ class PPDStack {
$this->accum =& $this->top->getAccum();
}
+ /**
+ * @return array
+ */
function getFlags() {
if ( !count( $this->stack ) ) {
return array(
@@ -773,6 +801,9 @@ class PPDStackElement {
return $this->parts[count($this->parts) - 1];
}
+ /**
+ * @return array
+ */
function getFlags() {
$partCount = count( $this->parts );
$findPipe = $this->open != "\n" && $this->open != '[';
@@ -785,6 +816,8 @@ class PPDStackElement {
/**
* Get the output string that would result if the close is not found.
+ *
+ * @return string
*/
function breakSyntax( $openingCount = false ) {
if ( $this->open == "\n" ) {
@@ -829,7 +862,21 @@ class PPDPart {
* @ingroup Parser
*/
class PPFrame_DOM implements PPFrame {
- var $preprocessor, $parser, $title;
+
+ /**
+ * @var Preprocessor
+ */
+ var $preprocessor;
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ /**
+ * @var Title
+ */
+ var $title;
var $titleCache;
/**
@@ -847,7 +894,7 @@ class PPFrame_DOM implements PPFrame {
/**
* Construct a new preprocessor frame.
- * @param $preprocessor Preprocessor: The parent preprocessor
+ * @param $preprocessor Preprocessor The parent preprocessor
*/
function __construct( $preprocessor ) {
$this->preprocessor = $preprocessor;
@@ -861,6 +908,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Create a new child frame
* $args is optionally a multi-root PPNode or array containing the template arguments
+ *
+ * @return PPTemplateFrame_DOM
*/
function newChild( $args = false, $title = false ) {
$namedArgs = array();
@@ -896,6 +945,12 @@ class PPFrame_DOM implements PPFrame {
return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
}
+ /**
+ * @throws MWException
+ * @param $root
+ * @param $flags int
+ * @return string
+ */
function expand( $root, $flags = 0 ) {
static $expansionDepth = 0;
if ( is_string( $root ) ) {
@@ -1058,11 +1113,11 @@ class PPFrame_DOM implements PPFrame {
# Heading
$s = $this->expand( $contextNode->childNodes, $flags );
- # Insert a heading marker only for <h> children of <root>
- # This is to stop extractSections from going over multiple tree levels
- if ( $contextNode->parentNode->nodeName == 'root'
- && $this->parser->ot['html'] )
- {
+ # Insert a heading marker only for <h> children of <root>
+ # This is to stop extractSections from going over multiple tree levels
+ if ( $contextNode->parentNode->nodeName == 'root'
+ && $this->parser->ot['html'] )
+ {
# Insert heading index marker
$headingIndex = $contextNode->getAttribute( 'i' );
$titleText = $this->title->getPrefixedDBkey();
@@ -1071,7 +1126,7 @@ class PPFrame_DOM implements PPFrame {
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
$count = $contextNode->getAttribute( 'level' );
$s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
- $this->parser->mStripState->general->setPair( $marker, '' );
+ $this->parser->mStripState->addGeneral( $marker, '' );
}
$out .= $s;
} else {
@@ -1107,6 +1162,11 @@ class PPFrame_DOM implements PPFrame {
return $outStack[0];
}
+ /**
+ * @param $sep
+ * @param $flags
+ * @return string
+ */
function implodeWithFlags( $sep, $flags /*, ... */ ) {
$args = array_slice( func_get_args(), 2 );
@@ -1132,6 +1192,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Implode with no flags specified
* This previously called implodeWithFlags but has now been inlined to reduce stack depth
+ *
+ * @return string
*/
function implode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
@@ -1160,6 +1222,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Makes an object that, when expand()ed, will be the same as one obtained
* with implode()
+ *
+ * @return array
*/
function virtualImplode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
@@ -1225,20 +1289,31 @@ class PPFrame_DOM implements PPFrame {
}
}
+ /**
+ * @return array
+ */
function getArguments() {
return array();
}
+ /**
+ * @return array
+ */
function getNumberedArguments() {
return array();
}
+ /**
+ * @return array
+ */
function getNamedArguments() {
return array();
}
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return true;
@@ -1250,6 +1325,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @return bool
*/
function loopCheck( $title ) {
return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
@@ -1257,6 +1334,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return false;
@@ -1268,9 +1347,21 @@ class PPFrame_DOM implements PPFrame {
* @ingroup Parser
*/
class PPTemplateFrame_DOM extends PPFrame_DOM {
- var $numberedArgs, $namedArgs, $parent;
+ var $numberedArgs, $namedArgs;
+
+ /**
+ * @var PPFrame_DOM
+ */
+ var $parent;
var $numberedExpansionCache, $namedExpansionCache;
+ /**
+ * @param $preprocessor
+ * @param $parent PPFrame_DOM
+ * @param $numberedArgs array
+ * @param $namedArgs array
+ * @param $title Title
+ */
function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
parent::__construct( $preprocessor );
@@ -1305,8 +1396,11 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
$s .= '}';
return $s;
}
+
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return !count( $this->numberedArgs ) && !count( $this->namedArgs );
@@ -1321,7 +1415,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
}
return $arguments;
}
-
+
function getNumberedArguments() {
$arguments = array();
foreach ( array_keys($this->numberedArgs) as $key ) {
@@ -1329,7 +1423,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
}
return $arguments;
}
-
+
function getNamedArguments() {
$arguments = array();
foreach ( array_keys($this->namedArgs) as $key ) {
@@ -1371,6 +1465,8 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return true;
@@ -1405,6 +1501,9 @@ class PPCustomFrame_DOM extends PPFrame_DOM {
return $s;
}
+ /**
+ * @return bool
+ */
function isEmpty() {
return !count( $this->args );
}
@@ -1421,14 +1520,22 @@ class PPCustomFrame_DOM extends PPFrame_DOM {
* @ingroup Parser
*/
class PPNode_DOM implements PPNode {
+
+ /**
+ * @var DOMElement
+ */
var $node;
+ var $xpath;
function __construct( $node, $xpath = false ) {
$this->node = $node;
}
- function __get( $name ) {
- if ( $name == 'xpath' ) {
+ /**
+ * @return DOMXPath
+ */
+ function getXPath() {
+ if ( $this->xpath === null ) {
$this->xpath = new DOMXPath( $this->node->ownerDocument );
}
return $this->xpath;
@@ -1446,22 +1553,39 @@ class PPNode_DOM implements PPNode {
return $s;
}
+ /**
+ * @return bool|PPNode_DOM
+ */
function getChildren() {
return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
}
+ /**
+ * @return bool|PPNode_DOM
+ */
function getFirstChild() {
return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
}
+ /**
+ * @return bool|PPNode_DOM
+ */
function getNextSibling() {
return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
}
+ /**
+ * @param $type
+ *
+ * @return bool|PPNode_DOM
+ */
function getChildrenOfType( $type ) {
- return new self( $this->xpath->query( $type, $this->node ) );
+ return new self( $this->getXPath()->query( $type, $this->node ) );
}
+ /**
+ * @return int
+ */
function getLength() {
if ( $this->node instanceof DOMNodeList ) {
return $this->node->length;
@@ -1470,11 +1594,18 @@ class PPNode_DOM implements PPNode {
}
}
+ /**
+ * @param $i
+ * @return bool|PPNode_DOM
+ */
function item( $i ) {
$item = $this->node->item( $i );
return $item ? new self( $item ) : false;
}
+ /**
+ * @return string
+ */
function getName() {
if ( $this->node instanceof DOMNodeList ) {
return '#nodelist';
@@ -1488,10 +1619,13 @@ class PPNode_DOM implements PPNode {
* name PPNode name
* index String index
* value PPNode value
+ *
+ * @return array
*/
function splitArg() {
- $names = $this->xpath->query( 'name', $this->node );
- $values = $this->xpath->query( 'value', $this->node );
+ $xpath = $this->getXPath();
+ $names = $xpath->query( 'name', $this->node );
+ $values = $xpath->query( 'value', $this->node );
if ( !$names->length || !$values->length ) {
throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
}
@@ -1506,12 +1640,15 @@ class PPNode_DOM implements PPNode {
/**
* Split an <ext> node into an associative array containing name, attr, inner and close
* All values in the resulting array are PPNodes. Inner and close are optional.
+ *
+ * @return array
*/
function splitExt() {
- $names = $this->xpath->query( 'name', $this->node );
- $attrs = $this->xpath->query( 'attr', $this->node );
- $inners = $this->xpath->query( 'inner', $this->node );
- $closes = $this->xpath->query( 'close', $this->node );
+ $xpath = $this->getXPath();
+ $names = $xpath->query( 'name', $this->node );
+ $attrs = $xpath->query( 'attr', $this->node );
+ $inners = $xpath->query( 'inner', $this->node );
+ $closes = $xpath->query( 'close', $this->node );
if ( !$names->length || !$attrs->length ) {
throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
}
@@ -1531,7 +1668,7 @@ class PPNode_DOM implements PPNode {
* Split a <h> node
*/
function splitHeading() {
- if ( !$this->nodeName == 'h' ) {
+ if ( $this->getName() !== 'h' ) {
throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
}
return array(