summaryrefslogtreecommitdiff
path: root/includes/parser
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2011-12-03 13:29:22 +0100
committerPierre Schmitz <pierre@archlinux.de>2011-12-03 13:29:22 +0100
commitca32f08966f1b51fcb19460f0996bb0c4048e6fe (patch)
treeec04cc15b867bc21eedca904cea9af0254531a11 /includes/parser
parenta22fbfc60f36f5f7ee10d5ae6fe347340c2ee67c (diff)
Update to MediaWiki 1.18.0
* also update ArchLinux skin to chagnes in MonoBook * Use only css to hide our menu bar when printing
Diffstat (limited to 'includes/parser')
-rw-r--r--includes/parser/CoreLinkFunctions.php30
-rw-r--r--includes/parser/CoreParserFunctions.php173
-rw-r--r--includes/parser/CoreTagHooks.php65
-rw-r--r--includes/parser/DateFormatter.php8
-rw-r--r--includes/parser/LinkHolderArray.php252
-rw-r--r--includes/parser/Parser.php1224
-rw-r--r--includes/parser/ParserCache.php44
-rw-r--r--includes/parser/ParserOptions.php192
-rw-r--r--includes/parser/ParserOutput.php133
-rw-r--r--includes/parser/Parser_DiffTest.php4
-rw-r--r--includes/parser/Parser_LinkHooks.php21
-rw-r--r--includes/parser/Preprocessor.php48
-rw-r--r--includes/parser/Preprocessor_DOM.php249
-rw-r--r--includes/parser/Preprocessor_Hash.php214
-rw-r--r--includes/parser/Preprocessor_HipHop.hphp1941
-rw-r--r--includes/parser/StripState.php175
-rw-r--r--includes/parser/Tidy.php163
17 files changed, 4012 insertions, 924 deletions
diff --git a/includes/parser/CoreLinkFunctions.php b/includes/parser/CoreLinkFunctions.php
index 913ec22b..8de13278 100644
--- a/includes/parser/CoreLinkFunctions.php
+++ b/includes/parser/CoreLinkFunctions.php
@@ -10,11 +10,25 @@
* @ingroup Parser
*/
class CoreLinkFunctions {
+ /**
+ * @param $parser Parser_LinkHooks
+ * @return bool
+ */
static function register( $parser ) {
$parser->setLinkHook( NS_CATEGORY, array( __CLASS__, 'categoryLinkHook' ) );
return true;
}
+ /**
+ * @param $parser Parser
+ * @param $holders LinkHolderArray
+ * @param $markers LinkMarkerReplacer
+ * @param Title $title
+ * @param $titleText
+ * @param null $displayText
+ * @param bool $leadingColon
+ * @return bool
+ */
static function defaultLinkHook( $parser, $holders, $markers,
Title $title, $titleText, &$displayText = null, &$leadingColon = false ) {
if( isset($displayText) && $markers->findMarker( $displayText ) ) {
@@ -25,9 +39,19 @@ class CoreLinkFunctions {
# Return false so that this link is reverted back to WikiText
return false;
}
- return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, '', '', '' );
+ return $holders->makeHolder( $title, isset($displayText) ? $displayText : $titleText, array(), '', '' );
}
-
+
+ /**
+ * @param $parser Parser
+ * @param $holders LinkHolderArray
+ * @param $markers LinkMarkerReplacer
+ * @param Title $title
+ * @param $titleText
+ * @param null $sortText
+ * @param bool $leadingColon
+ * @return bool|string
+ */
static function categoryLinkHook( $parser, $holders, $markers,
Title $title, $titleText, &$sortText = null, &$leadingColon = false ) {
global $wgContLang;
@@ -48,5 +72,5 @@ class CoreLinkFunctions {
$parser->mOutput->addCategory( $title->getDBkey(), $sortText );
return '';
}
-
+
}
diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php
index 94949221..eebed44c 100644
--- a/includes/parser/CoreParserFunctions.php
+++ b/includes/parser/CoreParserFunctions.php
@@ -10,6 +10,10 @@
* @ingroup Parser
*/
class CoreParserFunctions {
+ /**
+ * @param $parser Parser
+ * @return void
+ */
static function register( $parser ) {
global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;
@@ -31,6 +35,8 @@ class CoreParserFunctions {
$parser->setFunctionHook( 'localurle', array( __CLASS__, 'localurle' ), SFH_NO_HASH );
$parser->setFunctionHook( 'fullurl', array( __CLASS__, 'fullurl' ), SFH_NO_HASH );
$parser->setFunctionHook( 'fullurle', array( __CLASS__, 'fullurle' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'canonicalurl', array( __CLASS__, 'canonicalurl' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'canonicalurle', array( __CLASS__, 'canonicalurle' ), SFH_NO_HASH );
$parser->setFunctionHook( 'formatnum', array( __CLASS__, 'formatnum' ), SFH_NO_HASH );
$parser->setFunctionHook( 'grammar', array( __CLASS__, 'grammar' ), SFH_NO_HASH );
$parser->setFunctionHook( 'gender', array( __CLASS__, 'gender' ), SFH_NO_HASH );
@@ -83,18 +89,27 @@ class CoreParserFunctions {
}
}
+ /**
+ * @param $parser Parser
+ * @param string $part1
+ * @return array
+ */
static function intFunction( $parser, $part1 = '' /*, ... */ ) {
if ( strval( $part1 ) !== '' ) {
$args = array_slice( func_get_args(), 2 );
- $message = wfMsgGetKey( $part1, true, $parser->getOptions()->getUserLang(), false );
- $message = wfMsgReplaceArgs( $message, $args );
- $message = $parser->replaceVariables( $message ); // like $wgMessageCache->transform()
- return $message;
+ $message = wfMessage( $part1, $args )->inLanguage( $parser->getOptions()->getUserLang() )->plain();
+ return array( $message, 'noparse' => false );
} else {
return array( 'found' => false );
}
}
+ /**
+ * @param $parser Parser
+ * @param $date
+ * @param null $defaultPref
+ * @return mixed|string
+ */
static function formatDate( $parser, $date, $defaultPref = null ) {
$df = DateFormatter::getInstance();
@@ -172,6 +187,11 @@ class CoreParserFunctions {
return $wgContLang->ucfirst( $s );
}
+ /**
+ * @param $parser Parser
+ * @param string $s
+ * @return
+ */
static function lc( $parser, $s = '' ) {
global $wgContLang;
if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
@@ -181,6 +201,11 @@ class CoreParserFunctions {
}
}
+ /**
+ * @param $parser Parser
+ * @param string $s
+ * @return
+ */
static function uc( $parser, $s = '' ) {
global $wgContLang;
if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
@@ -194,6 +219,8 @@ class CoreParserFunctions {
static function localurle( $parser, $s = '', $arg = null ) { return self::urlFunction( 'escapeLocalURL', $s, $arg ); }
static function fullurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getFullURL', $s, $arg ); }
static function fullurle( $parser, $s = '', $arg = null ) { return self::urlFunction( 'escapeFullURL', $s, $arg ); }
+ static function canonicalurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getCanonicalURL', $s, $arg ); }
+ static function canonicalurle( $parser, $s = '', $arg = null ) { return self::urlFunction( 'escapeCanonicalURL', $s, $arg ); }
static function urlFunction( $func, $s = '', $arg = null ) {
$title = Title::newFromText( $s );
@@ -219,6 +246,12 @@ class CoreParserFunctions {
}
}
+ /**
+ * @param $parser Parser
+ * @param string $num
+ * @param null $raw
+ * @return
+ */
static function formatNum( $parser, $num = '', $raw = null) {
if ( self::israw( $raw ) ) {
return $parser->getFunctionLang()->parseFormattedNumber( $num );
@@ -227,35 +260,54 @@ class CoreParserFunctions {
}
}
+ /**
+ * @param $parser Parser
+ * @param string $case
+ * @param string $word
+ * @return
+ */
static function grammar( $parser, $case = '', $word = '' ) {
return $parser->getFunctionLang()->convertGrammar( $word, $case );
}
- static function gender( $parser, $user ) {
+ /**
+ * @param $parser Parser
+ * @param $username string
+ * @return
+ */
+ static function gender( $parser, $username ) {
wfProfileIn( __METHOD__ );
$forms = array_slice( func_get_args(), 2);
+ $username = trim( $username );
+
// default
$gender = User::getDefaultOption( 'gender' );
// allow prefix.
- $title = Title::newFromText( $user );
+ $title = Title::newFromText( $username );
- if ( is_object( $title ) && $title->getNamespace() == NS_USER )
- $user = $title->getText();
+ if ( $title && $title->getNamespace() == NS_USER ) {
+ $username = $title->getText();
+ }
- // check parameter, or use $wgUser if in interface message
- $user = User::newFromName( $user );
+ // check parameter, or use the ParserOptions if in interface message
+ $user = User::newFromName( $username );
if ( $user ) {
$gender = $user->getOption( 'gender' );
- } elseif ( $parser->getOptions()->getInterfaceMessage() ) {
- global $wgUser;
- $gender = $wgUser->getOption( 'gender' );
+ } elseif ( $username === '' && $parser->getOptions()->getInterfaceMessage() ) {
+ $gender = $parser->getOptions()->getUser()->getOption( 'gender' );
}
$ret = $parser->getFunctionLang()->gender( $gender, $forms );
wfProfileOut( __METHOD__ );
return $ret;
}
+
+ /**
+ * @param $parser Parser
+ * @param string $text
+ * @return
+ */
static function plural( $parser, $text = '' ) {
$forms = array_slice( func_get_args(), 2 );
$text = $parser->getFunctionLang()->parseFormattedNumber( $text );
@@ -396,10 +448,11 @@ class CoreParserFunctions {
return '';
return wfUrlencode( $t->getSubjectNsText() );
}
- /*
+
+ /**
* Functions to get and normalize pagenames, corresponding to the magic words
* of the same names
- */
+ */
static function pagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
@@ -410,7 +463,7 @@ class CoreParserFunctions {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
return '';
- return $t->getPartialURL();
+ return wfEscapeWikiText( $t->getPartialURL() );
}
static function fullpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
@@ -422,31 +475,31 @@ class CoreParserFunctions {
$t = Title::newFromText( $title );
if ( is_null( $t ) || !$t->canTalk() )
return '';
- return $t->getPrefixedURL();
+ return wfEscapeWikiText( $t->getPrefixedURL() );
}
static function subpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
return '';
- return $t->getSubpageText();
+ return wfEscapeWikiText( $t->getSubpageText() );
}
static function subpagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
return '';
- return $t->getSubpageUrlForm();
+ return wfEscapeWikiText( $t->getSubpageUrlForm() );
}
static function basepagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
return '';
- return $t->getBaseText();
+ return wfEscapeWikiText( $t->getBaseText() );
}
static function basepagenamee( $parser, $title = null ) {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
return '';
- return wfUrlEncode( str_replace( ' ', '_', $t->getBaseText() ) );
+ return wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $t->getBaseText() ) ) );
}
static function talkpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
@@ -458,7 +511,7 @@ class CoreParserFunctions {
$t = Title::newFromText( $title );
if ( is_null( $t ) || !$t->canTalk() )
return '';
- return $t->getTalkPage()->getPrefixedUrl();
+ return wfEscapeWikiText( $t->getTalkPage()->getPrefixedUrl() );
}
static function subjectpagename( $parser, $title = null ) {
$t = Title::newFromText( $title );
@@ -470,7 +523,7 @@ class CoreParserFunctions {
$t = Title::newFromText( $title );
if ( is_null( $t ) )
return '';
- return $t->getSubjectPage()->getPrefixedUrl();
+ return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedUrl() );
}
/**
@@ -503,12 +556,13 @@ class CoreParserFunctions {
* Return the size of the given page, or 0 if it's nonexistent. This is an
* expensive parser function and can't be called too many times per page.
*
- * @todo Fixme: This doesn't work correctly on preview for getting the size
+ * @todo FIXME: This doesn't work correctly on preview for getting the size
* of the current page.
- * @todo Fixme: Title::getLength() documentation claims that it adds things
+ * @todo FIXME: Title::getLength() documentation claims that it adds things
* to the link cache, so the local cache here should be unnecessary, but
* in fact calling getLength() repeatedly for the same $page does seem to
* run one query for each call?
+ * @param $parser Parser
*/
static function pagesize( $parser, $page = '', $raw = null ) {
static $cache = array();
@@ -546,10 +600,25 @@ class CoreParserFunctions {
return implode( $restrictions, ',' );
}
- static function language( $parser, $arg = '' ) {
+ /**
+ * Gives language names.
+ * @param $parser Parser
+ * @param $code String Language code
+ * @param $language String Language code
+ * @return String
+ */
+ static function language( $parser, $code = '', $language = '' ) {
global $wgContLang;
- $lang = $wgContLang->getLanguageName( strtolower( $arg ) );
- return $lang != '' ? $lang : $arg;
+ $code = strtolower( $code );
+ $language = strtolower( $language );
+
+ if ( $language !== '' ) {
+ $names = Language::getTranslatedLanguageNames( $language );
+ return isset( $names[$code] ) ? $names[$code] : wfBCP47( $code );
+ }
+
+ $lang = $wgContLang->getLanguageName( $code );
+ return $lang !== '' ? $lang : wfBCP47( $code );
}
/**
@@ -586,12 +655,17 @@ class CoreParserFunctions {
return self::pad( $string, $length, $padding );
}
+ /**
+ * @param $parser Parser
+ * @param $text
+ * @return string
+ */
static function anchorencode( $parser, $text ) {
return substr( $parser->guessSectionNameFromWikiText( $text ), 1);
}
static function special( $parser, $text ) {
- list( $page, $subpage ) = SpecialPage::resolveAliasWithSubpage( $text );
+ list( $page, $subpage ) = SpecialPageFactory::resolveAlias( $text );
if ( $page ) {
$title = SpecialPage::getTitleFor( $page, $subpage );
return $title;
@@ -600,6 +674,11 @@ class CoreParserFunctions {
}
}
+ /**
+ * @param $parser Parser
+ * @param $text
+ * @return string
+ */
public static function defaultsort( $parser, $text ) {
$text = trim( $text );
if( strlen( $text ) == 0 )
@@ -616,11 +695,41 @@ class CoreParserFunctions {
'</span>' );
}
- public static function filepath( $parser, $name='', $option='' ) {
+ // Usage {{filepath|300}}, {{filepath|nowiki}}, {{filepath|nowiki|300}} or {{filepath|300|nowiki}}
+ public static function filepath( $parser, $name='', $argA='', $argB='' ) {
$file = wfFindFile( $name );
- if( $file ) {
+ $size = '';
+ $argA_int = intval( $argA );
+ $argB_int = intval( $argB );
+
+ if ( $argB_int > 0 ) {
+ // {{filepath: | option | size }}
+ $size = $argB_int;
+ $option = $argA;
+
+ } elseif ( $argA_int > 0 ) {
+ // {{filepath: | size [|option] }}
+ $size = $argA_int;
+ $option = $argB;
+
+ } else {
+ // {{filepath: [|option] }}
+ $option = $argA;
+ }
+
+ if ( $file ) {
$url = $file->getFullUrl();
- if( $option == 'nowiki' ) {
+
+ // If a size is requested...
+ if ( is_integer( $size ) ) {
+ $mto = $file->transform( array( 'width' => $size ) );
+ // ... and we can
+ if ( $mto && !$mto->isError() ) {
+ // ... change the URL to point to a thumbnail.
+ $url = wfExpandUrl( $mto->getUrl(), PROTO_RELATIVE );
+ }
+ }
+ if ( $option == 'nowiki' ) {
return array( $url, 'nowiki' => true );
}
return $url;
diff --git a/includes/parser/CoreTagHooks.php b/includes/parser/CoreTagHooks.php
index 33f3c824..7d488c4b 100644
--- a/includes/parser/CoreTagHooks.php
+++ b/includes/parser/CoreTagHooks.php
@@ -10,19 +10,30 @@
* @ingroup Parser
*/
class CoreTagHooks {
+ /**
+ * @param $parser Parser
+ * @return void
+ */
static function register( $parser ) {
- global $wgRawHtml, $wgUseTeX;
+ global $wgRawHtml;
$parser->setHook( 'pre', array( __CLASS__, 'pre' ) );
$parser->setHook( 'nowiki', array( __CLASS__, 'nowiki' ) );
$parser->setHook( 'gallery', array( __CLASS__, 'gallery' ) );
if ( $wgRawHtml ) {
$parser->setHook( 'html', array( __CLASS__, 'html' ) );
}
- if ( $wgUseTeX ) {
- $parser->setHook( 'math', array( __CLASS__, 'math' ) );
- }
}
+ /**
+ * Core parser tag hook function for 'pre'.
+ * Text is treated roughly as 'nowiki' wrapped in an HTML 'pre' tag;
+ * valid HTML attributes are passed on.
+ *
+ * @param string $text
+ * @param array $attribs
+ * @param Parser $parser
+ * @return string HTML
+ */
static function pre( $text, $attribs, $parser ) {
// Backwards-compatibility hack
$content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' );
@@ -33,6 +44,20 @@ class CoreTagHooks {
'</pre>';
}
+ /**
+ * Core parser tag hook function for 'html', used only when
+ * $wgRawHtml is enabled.
+ *
+ * This is potentially unsafe and should be used only in very careful
+ * circumstances, as the contents are emitted as raw HTML.
+ *
+ * Uses undocumented extended tag hook return values, introduced in r61913.
+ *
+ * @param $content string
+ * @param $attributes array
+ * @param $parser Parser
+ * @return array
+ */
static function html( $content, $attributes, $parser ) {
global $wgRawHtml;
if( $wgRawHtml ) {
@@ -42,16 +67,38 @@ class CoreTagHooks {
}
}
+ /**
+ * Core parser tag hook function for 'nowiki'. Text within this section
+ * gets interpreted as a string of text with HTML-compatible character
+ * references, and wiki markup within it will not be expanded.
+ *
+ * Uses undocumented extended tag hook return values, introduced in r61913.
+ *
+ * @param $content string
+ * @param $attributes array
+ * @param $parser Parser
+ * @return array
+ */
static function nowiki( $content, $attributes, $parser ) {
$content = strtr( $content, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
return array( Xml::escapeTagsOnly( $content ), 'markerType' => 'nowiki' );
}
- static function math( $content, $attributes, $parser ) {
- global $wgContLang;
- return $wgContLang->armourMath( MathRenderer::renderMath( $content, $attributes, $parser->getOptions() ) );
- }
-
+ /**
+ * Core parser tag hook function for 'gallery'.
+ *
+ * Renders a thumbnail list of the given images, with optional captions.
+ * Full syntax documented on the wiki:
+ *
+ * http://www.mediawiki.org/wiki/Help:Images#Gallery_syntax
+ *
+ * @todo break Parser::renderImageGallery out here too.
+ *
+ * @param string $content
+ * @param array $attributes
+ * @param Parser $parser
+ * @return string HTML
+ */
static function gallery( $content, $attributes, $parser ) {
return $parser->renderImageGallery( $content, $attributes );
}
diff --git a/includes/parser/DateFormatter.php b/includes/parser/DateFormatter.php
index cf510171..6559e886 100644
--- a/includes/parser/DateFormatter.php
+++ b/includes/parser/DateFormatter.php
@@ -182,8 +182,8 @@ class DateFormatter
$bits = array();
$key = $this->keys[$this->mSource];
for ( $p=0; $p < strlen($key); $p++ ) {
- if ( $key{$p} != ' ' ) {
- $bits[$key{$p}] = $matches[$p+1];
+ if ( $key[$p] != ' ' ) {
+ $bits[$key[$p]] = $matches[$p+1];
}
}
@@ -224,7 +224,7 @@ class DateFormatter
}
for ( $p=0; $p < strlen( $format ); $p++ ) {
- $char = $format{$p};
+ $char = $format[$p];
switch ( $char ) {
case 'd': # ISO day of month
$text .= $bits['d'];
@@ -327,7 +327,7 @@ class DateFormatter
* @todo document
*/
function makeNormalYear( $iso ) {
- if ( $iso{0} == '-' ) {
+ if ( $iso[0] == '-' ) {
$text = (intval( substr( $iso, 1 ) ) + 1) . ' BC';
} else {
$text = intval( $iso );
diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php
index 19313b80..5418b6e5 100644
--- a/includes/parser/LinkHolderArray.php
+++ b/includes/parser/LinkHolderArray.php
@@ -12,6 +12,7 @@ class LinkHolderArray {
var $internals = array(), $interwikis = array();
var $size = 0;
var $parent;
+ protected $tempIdOffset;
function __construct( $parent ) {
$this->parent = $parent;
@@ -26,8 +27,51 @@ class LinkHolderArray {
}
}
+ /**
+ * Don't serialize the parent object, it is big, and not needed when it is
+ * a parameter to mergeForeign(), which is the only application of
+ * serializing at present.
+ *
+ * Compact the titles, only serialize the text form.
+ */
+ function __sleep() {
+ foreach ( $this->internals as &$nsLinks ) {
+ foreach ( $nsLinks as &$entry ) {
+ unset( $entry['title'] );
+ }
+ }
+ unset( $nsLinks );
+ unset( $entry );
+
+ foreach ( $this->interwikis as &$entry ) {
+ unset( $entry['title'] );
+ }
+ unset( $entry );
+
+ return array( 'internals', 'interwikis', 'size' );
+ }
+
+ /**
+ * Recreate the Title objects
+ */
+ function __wakeup() {
+ foreach ( $this->internals as &$nsLinks ) {
+ foreach ( $nsLinks as &$entry ) {
+ $entry['title'] = Title::newFromText( $entry['pdbk'] );
+ }
+ }
+ unset( $nsLinks );
+ unset( $entry );
+
+ foreach ( $this->interwikis as &$entry ) {
+ $entry['title'] = Title::newFromText( $entry['pdbk'] );
+ }
+ unset( $entry );
+ }
+
/**
* Merge another LinkHolderArray into this one
+ * @param $other LinkHolderArray
*/
function merge( $other ) {
foreach ( $other->internals as $ns => $entries ) {
@@ -42,6 +86,86 @@ class LinkHolderArray {
}
/**
+ * Merge a LinkHolderArray from another parser instance into this one. The
+ * keys will not be preserved. Any text which went with the old
+ * LinkHolderArray and needs to work with the new one should be passed in
+ * the $texts array. The strings in this array will have their link holders
+ * converted for use in the destination link holder. The resulting array of
+ * strings will be returned.
+ *
+ * @param $other LinkHolderArray
+ * @param $texts Array of strings
+ * @return Array
+ */
+ function mergeForeign( $other, $texts ) {
+ $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
+ $maxId = 0;
+
+ # Renumber internal links
+ foreach ( $other->internals as $ns => $nsLinks ) {
+ foreach ( $nsLinks as $key => $entry ) {
+ $newKey = $idOffset + $key;
+ $this->internals[$ns][$newKey] = $entry;
+ $maxId = $newKey > $maxId ? $newKey : $maxId;
+ }
+ }
+ $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
+ array( $this, 'mergeForeignCallback' ), $texts );
+
+ # Renumber interwiki links
+ foreach ( $other->interwikis as $key => $entry ) {
+ $newKey = $idOffset + $key;
+ $this->interwikis[$newKey] = $entry;
+ $maxId = $newKey > $maxId ? $newKey : $maxId;
+ }
+ $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
+ array( $this, 'mergeForeignCallback' ), $texts );
+
+ # Set the parent link ID to be beyond the highest used ID
+ $this->parent->setLinkID( $maxId + 1 );
+ $this->tempIdOffset = null;
+ return $texts;
+ }
+
+ protected function mergeForeignCallback( $m ) {
+ return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
+ }
+
+ /**
+ * Get a subset of the current LinkHolderArray which is sufficient to
+ * interpret the given text.
+ */
+ function getSubArray( $text ) {
+ $sub = new LinkHolderArray( $this->parent );
+
+ # Internal links
+ $pos = 0;
+ while ( $pos < strlen( $text ) ) {
+ if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
+ $text, $m, PREG_OFFSET_CAPTURE, $pos ) )
+ {
+ break;
+ }
+ $ns = $m[1][0];
+ $key = $m[2][0];
+ $sub->internals[$ns][$key] = $this->internals[$ns][$key];
+ $pos = $m[0][1] + strlen( $m[0][0] );
+ }
+
+ # Interwiki links
+ $pos = 0;
+ while ( $pos < strlen( $text ) ) {
+ if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
+ break;
+ }
+ $key = $m[1][0];
+ $sub->interwikis[$key] = $this->interwikis[$key];
+ $pos = $m[0][1] + strlen( $m[0][0] );
+ }
+ return $sub;
+ }
+
+ /**
* Returns true if the memory requirements of this object are getting large
*/
function isBig() {
@@ -65,8 +189,9 @@ class LinkHolderArray {
* parsing of interwiki links, and secondly to allow all existence checks and
* article length checks (for stub links) to be bundled into a single query.
*
+ * @param $nt Title
*/
- function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
+ function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
wfProfileIn( __METHOD__ );
if ( ! is_object($nt) ) {
# Fail gracefully
@@ -80,7 +205,7 @@ class LinkHolderArray {
'text' => $prefix.$text.$inside,
'pdbk' => $nt->getPrefixedDBkey(),
);
- if ( $query !== '' ) {
+ if ( $query !== array() ) {
$entry['query'] = $query;
}
@@ -102,18 +227,7 @@ class LinkHolderArray {
}
/**
- * Get the stub threshold
- */
- function getStubThreshold() {
- global $wgUser;
- if ( !isset( $this->stubThreshold ) ) {
- $this->stubThreshold = $wgUser->getStubThreshold();
- }
- return $this->stubThreshold;
- }
-
- /**
- * FIXME: update documentation. makeLinkObj() is deprecated.
+ * @todo FIXME: Update documentation. makeLinkObj() is deprecated.
* Replace <!--LINK--> link placeholders with actual links, in the buffer
* Placeholders created in Skin::makeLinkObj()
* Returns an array of link CSS classes, indexed by PDBK.
@@ -140,14 +254,12 @@ class LinkHolderArray {
global $wgContLang;
$colours = array();
- $sk = $this->parent->getOptions()->getSkin( $this->parent->mTitle );
$linkCache = LinkCache::singleton();
$output = $this->parent->getOutput();
wfProfileIn( __METHOD__.'-check' );
$dbr = wfGetDB( DB_SLAVE );
- $page = $dbr->tableName( 'page' );
- $threshold = $this->getStubThreshold();
+ $threshold = $this->parent->getOptions()->getStubThreshold();
# Sort by namespace
ksort( $this->internals );
@@ -155,8 +267,7 @@ class LinkHolderArray {
$linkcolour_ids = array();
# Generate query
- $query = false;
- $current = null;
+ $queries = array();
foreach ( $this->internals as $ns => $entries ) {
foreach ( $entries as $entry ) {
$title = $entry['title'];
@@ -174,32 +285,35 @@ class LinkHolderArray {
} elseif ( $ns == NS_SPECIAL ) {
$colours[$pdbk] = 'new';
} elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
- $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
+ $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
$output->addLink( $title, $id );
$linkcolour_ids[$id] = $pdbk;
} elseif ( $linkCache->isBadLink( $pdbk ) ) {
$colours[$pdbk] = 'new';
} else {
# Not in the link cache, add it to the query
- if ( !isset( $current ) ) {
- $current = $ns;
- $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len, page_latest";
- $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
- } elseif ( $current != $ns ) {
- $current = $ns;
- $query .= ")) OR (page_namespace=$ns AND page_title IN(";
- } else {
- $query .= ', ';
- }
-
- $query .= $dbr->addQuotes( $title->getDBkey() );
+ $queries[$ns][] = $title->getDBkey();
}
}
}
- if ( $query ) {
- $query .= '))';
+ if ( $queries ) {
+ $where = array();
+ foreach( $queries as $ns => $pages ){
+ $where[] = $dbr->makeList(
+ array(
+ 'page_namespace' => $ns,
+ 'page_title' => $pages,
+ ),
+ LIST_AND
+ );
+ }
- $res = $dbr->query( $query, __METHOD__ );
+ $res = $dbr->select(
+ 'page',
+ array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ),
+ $dbr->makeList( $where, LIST_OR ),
+ __METHOD__
+ );
# Fetch data and form into an associative array
# non-existent = broken
@@ -208,10 +322,10 @@ class LinkHolderArray {
$pdbk = $title->getPrefixedDBkey();
$linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect, $s->page_latest );
$output->addLink( $title, $s->page_id );
- # FIXME: convoluted data flow
+ # @todo FIXME: Convoluted data flow
# The redirect status and length is passed to getLinkColour via the LinkCache
# Use formal parameters instead
- $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
+ $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
//add id to the extension todolist
$linkcolour_ids[$s->page_id] = $pdbk;
}
@@ -235,23 +349,29 @@ class LinkHolderArray {
foreach ( $entries as $index => $entry ) {
$pdbk = $entry['pdbk'];
$title = $entry['title'];
- $query = isset( $entry['query'] ) ? $entry['query'] : '';
+ $query = isset( $entry['query'] ) ? $entry['query'] : array();
$key = "$ns:$index";
$searchkey = "<!--LINK $key-->";
- if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) {
- $linkCache->addBadLinkObj( $title );
+ $displayText = $entry['text'];
+ if ( $displayText === '' ) {
+ $displayText = null;
+ }
+ if ( !isset( $colours[$pdbk] ) ) {
$colours[$pdbk] = 'new';
+ }
+ $attribs = array();
+ if ( $colours[$pdbk] == 'new' ) {
+ $linkCache->addBadLinkObj( $title );
$output->addLink( $title, 0 );
- // FIXME: replace deprecated makeBrokenLinkObj() by link()
- $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
- $entry['text'],
- $query );
+ $type = array( 'broken' );
} else {
- // FIXME: replace deprecated makeColouredLinkObj() by link()
- $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk],
- $entry['text'],
- $query );
+ if ( $colours[$pdbk] != '' ) {
+ $attribs['class'] = $colours[$pdbk];
+ }
+ $type = array( 'known', 'noclasses' );
}
+ $replacePairs[$searchkey] = Linker::link( $title, $displayText,
+ $attribs, $query, $type );
}
}
$replacer = new HashtableReplacer( $replacePairs, 1 );
@@ -278,11 +398,10 @@ class LinkHolderArray {
wfProfileIn( __METHOD__ );
# Make interwiki link HTML
- $sk = $this->parent->getOptions()->getSkin( $this->parent->mTitle );
$output = $this->parent->getOutput();
$replacePairs = array();
foreach( $this->interwikis as $key => $link ) {
- $replacePairs[$key] = $sk->link( $link['title'], $link['text'] );
+ $replacePairs[$key] = Linker::link( $link['title'], $link['text'] );
$output->addInterwikiLink( $link['title'] );
}
$replacer = new HashtableReplacer( $replacePairs, 1 );
@@ -303,11 +422,10 @@ class LinkHolderArray {
$variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
$output = $this->parent->getOutput();
$linkCache = LinkCache::singleton();
- $sk = $this->parent->getOptions()->getSkin( $this->parent->mTitle );
- $threshold = $this->getStubThreshold();
+ $threshold = $this->parent->getOptions()->getStubThreshold();
$titlesToBeConverted = '';
$titlesAttrs = array();
-
+
// Concatenate titles to a single string, thus we only need auto convert the
// single string to all variants. This would improve parser's performance
// significantly.
@@ -322,14 +440,14 @@ class LinkHolderArray {
'ns' => $ns,
'key' => "$ns:$index",
'titleText' => $titleText,
- );
+ );
// separate titles with \0 because it would never appears
// in a valid title
$titlesToBeConverted .= $titleText . "\0";
}
}
}
-
+
// Now do the conversion and explode string to text of titles
$titlesAllVariants = $wgContLang->autoConvertToAllVariants( $titlesToBeConverted );
$allVariantsName = array_keys( $titlesAllVariants );
@@ -341,9 +459,8 @@ class LinkHolderArray {
for ( $i = 0; $i < $l; $i ++ ) {
foreach ( $allVariantsName as $variantName ) {
$textVariant = $titlesAllVariants[$variantName][$i];
- extract( $titlesAttrs[$i] );
- if($textVariant != $titleText){
- $variantTitle = Title::makeTitle( $ns, $textVariant );
+ if ( $textVariant != $titlesAttrs[$i]['titleText'] ) {
+ $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant );
if( is_null( $variantTitle ) ) {
continue;
}
@@ -372,11 +489,12 @@ class LinkHolderArray {
if(!$linkBatch->isEmpty()){
// construct query
$dbr = wfGetDB( DB_SLAVE );
- $page = $dbr->tableName( 'page' );
- $titleClause = $linkBatch->constructSet('page', $dbr);
- $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
- $variantQuery .= " FROM $page WHERE $titleClause";
- $varRes = $dbr->query( $variantQuery, __METHOD__ );
+ $varRes = $dbr->select( 'page',
+ array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len' ),
+ $linkBatch->constructSet( 'page', $dbr ),
+ __METHOD__
+ );
+
$linkcolour_ids = array();
// for each found variants, figure out link holders and replace
@@ -387,14 +505,14 @@ class LinkHolderArray {
$vardbk = $variantTitle->getDBkey();
$holderKeys = array();
- if(isset($variantMap[$varPdbk])){
+ if( isset( $variantMap[$varPdbk] ) ) {
$holderKeys = $variantMap[$varPdbk];
$linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
$output->addLink( $variantTitle, $s->page_id );
}
// loop over link holders
- foreach($holderKeys as $key){
+ foreach( $holderKeys as $key ) {
list( $ns, $index ) = explode( ':', $key, 2 );
$entry =& $this->internals[$ns][$index];
$pdbk = $entry['pdbk'];
@@ -405,10 +523,10 @@ class LinkHolderArray {
$entry['pdbk'] = $varPdbk;
// set pdbk and colour
- # FIXME: convoluted data flow
+ # @todo FIXME: Convoluted data flow
# The redirect status and length is passed to getLinkColour via the LinkCache
# Use formal parameters instead
- $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
+ $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold );
$linkcolour_ids[$s->page_id] = $pdbk;
}
}
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 4a3aa03b..8d4c60df 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -24,18 +24,20 @@
* removes HTML comments and expands templates
* cleanSig() / cleanSigInSig()
* Cleans a signature before saving it to preferences
- * extractSections()
- * Extracts sections from an article for section editing
+ * getSection()
+ * Return the content of a section from an article for section editing
+ * replaceSection()
+ * Replaces a section by number inside an article
* getPreloadText()
* Removes <noinclude> sections, and <includeonly> tags.
*
* Globals used:
* objects: $wgLang, $wgContLang
*
- * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
+ * NOT $wgUser or $wgTitle. Keep them away!
*
* settings:
- * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
+ * $wgUseDynamicDates*, $wgInterwikiMagic*,
* $wgNamespacesWithSubpages, $wgAllowExternalImages*,
* $wgLocaltimezone, $wgAllowSpecialInclusion*,
* $wgMaxArticleSize*
@@ -53,6 +55,12 @@ class Parser {
*/
const VERSION = '1.6.4';
+ /**
+ * Update this version number when the output of serialiseHalfParsedText()
+ * changes in an incompatible way
+ */
+ const HALF_PARSED_VERSION = 2;
+
# Flags for Parser::setFunctionHook
# Also available as global constants from Defines.php
const SFH_NO_HASH = 1;
@@ -89,50 +97,99 @@ class Parser {
const MARKER_SUFFIX = "-QINU\x7f";
# Persistent:
- var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables;
- var $mSubstWords, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex;
- var $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList;
- var $mVarCache, $mConf, $mFunctionTagHooks;
+ var $mTagHooks = array();
+ var $mTransparentTagHooks = array();
+ var $mFunctionHooks = array();
+ var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
+ var $mFunctionTagHooks = array();
+ var $mStripList = array();
+ var $mDefaultStripList = array();
+ var $mVarCache = array();
+ var $mImageParams = array();
+ var $mImageParamsMagicArray = array();
+ var $mMarkerIndex = 0;
+ var $mFirstCall = true;
+
+ # Initialised by initialiseVariables()
+
+ /**
+ * @var MagicWordArray
+ */
+ var $mVariables;
+ /**
+ * @var MagicWordArray
+ */
+ var $mSubstWords;
+ var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
# Cleared with clearState():
- var $mOutput, $mAutonumber, $mDTopen, $mStripState;
+ /**
+ * @var ParserOutput
+ */
+ var $mOutput;
+ var $mAutonumber, $mDTopen;
+
+ /**
+ * @var StripState
+ */
+ var $mStripState;
+
var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
- var $mLinkHolders, $mLinkID;
+ /**
+ * @var LinkHolderArray
+ */
+ var $mLinkHolders;
+
+ var $mLinkID;
var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
var $mTplExpandCache; # empty-frame expansion cache
var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
var $mExpensiveFunctionCount; # number of expensive parser function calls
+ /**
+ * @var User
+ */
+ var $mUser; # User object; only used when doing pre-save transform
+
# Temporary
# These are variables reset at least once per parse regardless of $clearState
- var $mOptions; # ParserOptions object
+
+ /**
+ * @var ParserOptions
+ */
+ var $mOptions;
+
+ /**
+ * @var Title
+ */
var $mTitle; # Title context, used for self-link rendering and similar things
var $mOutputType; # Output type, one of the OT_xxx constants
var $ot; # Shortcut alias, see setOutputType()
+ var $mRevisionObject; # The revision object of the specified revision ID
var $mRevisionId; # ID to display in {{REVISIONID}} tags
var $mRevisionTimestamp; # The timestamp of the specified revision ID
+ var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
/**
+ * @var string
+ */
+ var $mUniqPrefix;
+
+ /**
* Constructor
- *
- * @public
*/
- function __construct( $conf = array() ) {
+ public function __construct( $conf = array() ) {
$this->mConf = $conf;
- $this->mTagHooks = array();
- $this->mTransparentTagHooks = array();
- $this->mFunctionHooks = array();
- $this->mFunctionTagHooks = array();
- $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
- $this->mDefaultStripList = $this->mStripList = array();
$this->mUrlProtocols = wfUrlProtocols();
- $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
+ $this->mExtLinkBracketedRegex = '/\[((' . wfUrlProtocols() . ')'.
'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S';
- $this->mVarCache = array();
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
+ } elseif ( defined( 'MW_COMPILED' ) ) {
+ # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode
+ $this->mPreprocessorClass = 'Preprocessor_Hash';
} elseif ( extension_loaded( 'domxml' ) ) {
# PECL extension that conflicts with the core DOM extension (bug 13770)
wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
@@ -142,8 +199,7 @@ class Parser {
} else {
$this->mPreprocessorClass = 'Preprocessor_Hash';
}
- $this->mMarkerIndex = 0;
- $this->mFirstCall = true;
+ wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
}
/**
@@ -151,7 +207,7 @@ class Parser {
*/
function __destruct() {
if ( isset( $this->mLinkHolders ) ) {
- $this->mLinkHolders->__destruct();
+ unset( $this->mLinkHolders );
}
foreach ( $this as $name => $value ) {
unset( $this->$name );
@@ -193,13 +249,14 @@ class Parser {
$this->mLastSection = '';
$this->mDTopen = false;
$this->mIncludeCount = array();
- $this->mStripState = new StripState;
$this->mArgStack = false;
$this->mInPre = false;
$this->mLinkHolders = new LinkHolderArray( $this );
$this->mLinkID = 0;
- $this->mRevisionTimestamp = $this->mRevisionId = null;
+ $this->mRevisionObject = $this->mRevisionTimestamp =
+ $this->mRevisionId = $this->mRevisionUser = null;
$this->mVarCache = array();
+ $this->mUser = null;
/**
* Prefix for temporary replacement strings for the multipass parser.
@@ -214,6 +271,7 @@ class Parser {
# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Changed to \x7f to allow XML double-parsing -- TS
$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+ $this->mStripState = new StripState( $this->mUniqPrefix );
# Clear these on every parse, bug 4549
@@ -245,7 +303,7 @@ class Parser {
* Do not call this function recursively.
*
* @param $text String: text we want to parse
- * @param $title A title object
+ * @param $title Title object
* @param $options ParserOptions
* @param $linestart boolean
* @param $clearState boolean
@@ -263,20 +321,19 @@ class Parser {
wfProfileIn( __METHOD__ );
wfProfileIn( $fname );
- $this->mOptions = $options;
- if ( $clearState ) {
- $this->clearState();
- }
-
- $this->setTitle( $title ); # Page title has to be set for the pre-processor
+ $this->startParse( $title, $options, self::OT_HTML, $clearState );
$oldRevisionId = $this->mRevisionId;
+ $oldRevisionObject = $this->mRevisionObject;
$oldRevisionTimestamp = $this->mRevisionTimestamp;
+ $oldRevisionUser = $this->mRevisionUser;
if ( $revid !== null ) {
$this->mRevisionId = $revid;
+ $this->mRevisionObject = null;
$this->mRevisionTimestamp = null;
+ $this->mRevisionUser = null;
}
- $this->setOutputType( self::OT_HTML );
+
wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
# No more strip!
wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
@@ -327,7 +384,7 @@ class Parser {
|| $wgDisableTitleConversion
|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
|| isset( $this->mDoubleUnderscores['notitleconvert'] )
- || $this->mOutput->getDisplayTitle() !== false ) )
+ || $this->mOutput->getDisplayTitle() !== false ) )
{
$convruletitle = $wgContLang->getConvRuleTitle();
if ( $convruletitle ) {
@@ -342,23 +399,7 @@ class Parser {
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
-//!JF Move to its own function
-
- $uniq_prefix = $this->mUniqPrefix;
- $matches = array();
- $elements = array_keys( $this->mTransparentTagHooks );
- $text = $this->extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
- foreach ( $matches as $marker => $data ) {
- list( $element, $content, $params, $tag ) = $data;
- $tagName = strtolower( $element );
- if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
- } else {
- $output = $tag;
- }
- $this->mStripState->general->setPair( $marker, $output );
- }
+ $text = $this->replaceTransparentTags( $text );
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
@@ -415,7 +456,9 @@ class Parser {
$this->mOutput->setText( $text );
$this->mRevisionId = $oldRevisionId;
+ $this->mRevisionObject = $oldRevisionObject;
$this->mRevisionTimestamp = $oldRevisionTimestamp;
+ $this->mRevisionUser = $oldRevisionUser;
wfProfileOut( $fname );
wfProfileOut( __METHOD__ );
@@ -430,6 +473,8 @@ class Parser {
*
* @param $text String: text extension wants to have parsed
* @param $frame PPFrame: The frame to use for expanding any template variables
+ *
+ * @return string
*/
function recursiveTagParse( $text, $frame=false ) {
wfProfileIn( __METHOD__ );
@@ -446,10 +491,7 @@ class Parser {
*/
function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
wfProfileIn( __METHOD__ );
- $this->mOptions = $options;
- $this->clearState();
- $this->setOutputType( self::OT_PREPROCESS );
- $this->setTitle( $title );
+ $this->startParse( $title, $options, self::OT_PREPROCESS, true );
if ( $revid !== null ) {
$this->mRevisionId = $revid;
}
@@ -469,10 +511,7 @@ class Parser {
*/
public function getPreloadText( $text, Title $title, ParserOptions $options ) {
# Parser (re)initialisation
- $this->mOptions = $options;
- $this->clearState();
- $this->setOutputType( self::OT_PLAIN );
- $this->setTitle( $title );
+ $this->startParse( $title, $options, self::OT_PLAIN, true );
$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
@@ -484,21 +523,30 @@ class Parser {
/**
* Get a random string
*
- * @private
- * @static
+ * @return string
*/
- static private function getRandomString() {
+ static public function getRandomString() {
return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
}
/**
+ * Set the current user.
+ * Should only be used when doing pre-save transform.
+ *
+ * @param $user Mixed: User object or null (to reset)
+ */
+ function setUser( $user ) {
+ $this->mUser = $user;
+ }
+
+ /**
* Accessor for mUniqPrefix.
*
* @return String
*/
public function uniqPrefix() {
if ( !isset( $this->mUniqPrefix ) ) {
- # @todo Fixme: this is probably *horribly wrong*
+ # @todo FIXME: This is probably *horribly wrong*
# LanguageConverter seems to want $wgParser's uniqPrefix, however
# if this is called for a parser cache hit, the parser may not
# have ever been initialized in the first place.
@@ -511,11 +559,13 @@ class Parser {
/**
* Set the context title
+ *
+ * @param $t Title
*/
function setTitle( $t ) {
- if ( !$t || $t instanceof FakeTitle ) {
- $t = Title::newFromText( 'NO TITLE' );
- }
+ if ( !$t || $t instanceof FakeTitle ) {
+ $t = Title::newFromText( 'NO TITLE' );
+ }
if ( strval( $t->getFragment() ) !== '' ) {
# Strip the fragment to avoid various odd effects
@@ -599,19 +649,47 @@ class Parser {
return wfSetVar( $this->mOptions, $x );
}
+ /**
+ * @return int
+ */
function nextLinkID() {
return $this->mLinkID++;
}
- function getFunctionLang() {
- global $wgLang, $wgContLang;
+ /**
+ * @param $id int
+ */
+ function setLinkID( $id ) {
+ $this->mLinkID = $id;
+ }
+ /**
+ * @return Language
+ */
+ function getFunctionLang() {
$target = $this->mOptions->getTargetLanguage();
if ( $target !== null ) {
return $target;
- } else {
- return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
+ } elseif( $this->mOptions->getInterfaceMessage() ) {
+ global $wgLang;
+ return $wgLang;
+ } elseif( is_null( $this->mTitle ) ) {
+ throw new MWException( __METHOD__.': $this->mTitle is null' );
}
+ return $this->mTitle->getPageLanguage();
+ }
+
+ /**
+ * Get a User object either from $this->mUser, if set, or from the
+ * ParserOptions object otherwise
+ *
+ * @return User object
+ */
+ function getUser() {
+ if ( !is_null( $this->mUser ) ) {
+ return $this->mUser;
+ }
+ return $this->mOptions->getUser();
}
/**
@@ -638,15 +716,13 @@ class Parser {
* array( 'param' => 'x' ),
* '<element param="x">tag content</element>' ) )
*
- * @param $elements list of element names. Comments are always extracted.
- * @param $text Source text string.
- * @param $matches Out parameter, Array: extracted tags
- * @param $uniq_prefix
+ * @param $elements array list of element names. Comments are always extracted.
+ * @param $text string Source text string.
+ * @param $matches array Out parameter, Array: extracted tags
+ * @param $uniq_prefix string
* @return String: stripped text
- *
- * @static
*/
- public function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
+ public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
static $n = 1;
$stripped = '';
$matches = array();
@@ -710,77 +786,33 @@ class Parser {
/**
* Get a list of strippable XML-like elements
+ *
+ * @return array
*/
function getStripList() {
return $this->mStripList;
}
/**
- * @deprecated use replaceVariables
- */
- function strip( $text, $state, $stripcomments = false , $dontstrip = array() ) {
- return $text;
- }
-
- /**
- * Restores pre, math, and other extensions removed by strip()
- *
- * always call unstripNoWiki() after this one
- * @private
- * @deprecated use $this->mStripState->unstrip()
- */
- function unstrip( $text, $state ) {
- return $state->unstripGeneral( $text );
- }
-
- /**
- * Always call this after unstrip() to preserve the order
- *
- * @private
- * @deprecated use $this->mStripState->unstrip()
- */
- function unstripNoWiki( $text, $state ) {
- return $state->unstripNoWiki( $text );
- }
-
- /**
- * @deprecated use $this->mStripState->unstripBoth()
- */
- function unstripForHTML( $text ) {
- return $this->mStripState->unstripBoth( $text );
- }
-
- /**
* Add an item to the strip state
* Returns the unique tag which must be inserted into the stripped text
* The tag will be replaced with the original text in unstrip()
- *
- * @private
*/
function insertStripItem( $text ) {
$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
$this->mMarkerIndex++;
- $this->mStripState->general->setPair( $rnd, $text );
+ $this->mStripState->addGeneral( $rnd, $text );
return $rnd;
}
/**
- * Interface with html tidy
- * @deprecated Use MWTidy::tidy()
- */
- public static function tidy( $text ) {
- wfDeprecated( __METHOD__ );
- return MWTidy::tidy( $text );
- }
-
- /**
* parse the wiki syntax used to render tables
*
* @private
*/
function doTableStuff( $text ) {
wfProfileIn( __METHOD__ );
-
+
$lines = StringUtils::explode( "\n", $text );
$out = '';
$td_history = array(); # Is currently a td tag open?
@@ -793,7 +825,7 @@ class Parser {
foreach ( $lines as $outLine ) {
$line = trim( $outLine );
- if ( $line === '' ) { # empty line, go to next line
+ if ( $line === '' ) { # empty line, go to next line
$out .= $outLine."\n";
continue;
}
@@ -1043,7 +1075,7 @@ class Parser {
*/
function doMagicLinks( $text ) {
wfProfileIn( __METHOD__ );
- $prots = $this->mUrlProtocols;
+ $prots = wfUrlProtocolsWithoutProtRel();
$urlChar = self::EXT_LINK_URL_CLASS;
$text = preg_replace_callback(
'!(?: # Start cases
@@ -1052,15 +1084,20 @@ class Parser {
(\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
(?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
ISBN\s+(\b # m[5]: ISBN, capture number
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
- [0-9Xx] # check digit
- \b)
+ (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
+ (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
+ [0-9Xx] # check digit
+ \b)
)!x', array( &$this, 'magicLinkCallback' ), $text );
wfProfileOut( __METHOD__ );
return $text;
}
+ /**
+ * @throws MWException
+ * @param $m array
+ * @return HTML|string
+ */
function magicLinkCallback( $m ) {
if ( isset( $m[1] ) && $m[1] !== '' ) {
# Skip anchor
@@ -1087,10 +1124,8 @@ class Parser {
throw new MWException( __METHOD__.': unrecognised match type "' .
substr( $m[0], 0, 20 ) . '"' );
}
- $url = wfMsgForContent( $urlmsg, $id);
- $sk = $this->mOptions->getSkin( $this->mTitle );
- $la = $sk->getExternalLinkAttributes( "external $CssClass" );
- return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+ $url = wfMsgForContent( $urlmsg, $id );
+ return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# ISBN
$isbn = $m[5];
@@ -1117,7 +1152,6 @@ class Parser {
global $wgContLang;
wfProfileIn( __METHOD__ );
- $sk = $this->mOptions->getSkin( $this->mTitle );
$trail = '';
# The characters '<' and '>' (which were escaped by
@@ -1148,7 +1182,7 @@ class Parser {
$text = $this->maybeMakeExternalImage( $url );
if ( $text === false ) {
# Not an image, make a link
- $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free',
+ $text = Linker::makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free',
$this->getExternalLinkAttribs( $url ) );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
@@ -1355,7 +1389,7 @@ class Parser {
/**
* Replace external links (REL)
*
- * Note: this is all very hackish and the order of execution matters a lot.
+ * Note: this is all very hackish and the order of execution matters a lot.
* Make sure to run maintenance/parserTests.php if you change this code.
*
* @private
@@ -1364,8 +1398,6 @@ class Parser {
global $wgContLang;
wfProfileIn( __METHOD__ );
- $sk = $this->mOptions->getSkin( $this->mTitle );
-
$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
$s = array_shift( $bits );
@@ -1399,16 +1431,10 @@ class Parser {
# No link text, e.g. [http://domain.tld/some.link]
if ( $text == '' ) {
- # Autonumber if allowed. See bug #5918
- if ( strpos( wfUrlProtocols(), substr( $protocol, 0, strpos( $protocol, ':' ) ) ) !== false ) {
- $langObj = $this->getFunctionLang();
- $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
- $linktype = 'autonumber';
- } else {
- # Otherwise just use the URL
- $text = htmlspecialchars( $url );
- $linktype = 'free';
- }
+ # Autonumber
+ $langObj = $this->getFunctionLang();
+ $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
+ $linktype = 'autonumber';
} else {
# Have link text, e.g. [http://domain.tld/some.link text]s
# Check for trail
@@ -1423,7 +1449,7 @@ class Parser {
# This means that users can paste URLs directly into the text
# Funny characters like ö aren't valid in URLs anyway
# This was changed in August 2004
- $s .= $sk->makeExternalLink( $url, $text, false, $linktype,
+ $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
$this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
# Register link in the output object.
@@ -1449,23 +1475,12 @@ class Parser {
*/
function getExternalLinkAttribs( $url = false ) {
$attribs = array();
- global $wgNoFollowLinks, $wgNoFollowNsExceptions;
+ global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
$ns = $this->mTitle->getNamespace();
- if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) ) {
+ if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) &&
+ !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) )
+ {
$attribs['rel'] = 'nofollow';
-
- global $wgNoFollowDomainExceptions;
- if ( $wgNoFollowDomainExceptions ) {
- $bits = wfParseUrl( $url );
- if ( is_array( $bits ) && isset( $bits['host'] ) ) {
- foreach ( $wgNoFollowDomainExceptions as $domain ) {
- if ( substr( $bits['host'], -strlen( $domain ) ) == $domain ) {
- unset( $attribs['rel'] );
- break;
- }
- }
- }
- }
}
if ( $this->mOptions->getExternalLinkTarget() ) {
$attribs['target'] = $this->mOptions->getExternalLinkTarget();
@@ -1473,7 +1488,6 @@ class Parser {
return $attribs;
}
-
/**
* Replace unusual URL escape codes with their equivalent characters
*
@@ -1513,7 +1527,6 @@ class Parser {
* @private
*/
function maybeMakeExternalImage( $url ) {
- $sk = $this->mOptions->getSkin( $this->mTitle );
$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
$imagesexception = !empty( $imagesfrom );
$text = false;
@@ -1532,10 +1545,10 @@ class Parser {
$imagematch = false;
}
if ( $this->mOptions->getAllowExternalImages()
- || ( $imagesexception && $imagematch ) ) {
+ || ( $imagesexception && $imagematch ) ) {
if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
# Image found
- $text = $sk->makeExternalImage( $url );
+ $text = Linker::makeExternalImage( $url );
}
}
if ( !$text && $this->mOptions->getEnableImageWhitelist()
@@ -1548,7 +1561,7 @@ class Parser {
}
if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
# Image matches a whitelist entry
- $text = $sk->makeExternalImage( $url );
+ $text = Linker::makeExternalImage( $url );
break;
}
}
@@ -1589,10 +1602,9 @@ class Parser {
$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
}
- $sk = $this->mOptions->getSkin( $this->mTitle );
$holders = new LinkHolderArray( $this );
- # split the entire text string on occurences of [[
+ # split the entire text string on occurences of [[
$a = StringUtils::explode( '[[', ' ' . $s );
# get the first element (all text up to first [[), and remove the space we added
$s = $a->current();
@@ -1684,14 +1696,14 @@ class Parser {
# fix up urlencoded title texts
if ( strpos( $m[1], '%' ) !== false ) {
# Should anchors '#' also be rejected?
- $m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode( $m[1] ) );
+ $m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), rawurldecode( $m[1] ) );
}
$trail = $m[3];
} elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption
$might_be_img = true;
$text = $m[2];
if ( strpos( $m[1], '%' ) !== false ) {
- $m[1] = urldecode( $m[1] );
+ $m[1] = rawurldecode( $m[1] );
}
$trail = "";
} else { # Invalid form; output directly
@@ -1705,7 +1717,7 @@ class Parser {
# Don't allow internal links to pages containing
# PROTO: where PROTO is a valid URL protocol; these
# should be external links.
- if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $m[1] ) ) {
+ if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $m[1] ) ) {
$s .= $prefix . '[[' . $line ;
wfProfileOut( __METHOD__."-misc" );
continue;
@@ -1787,9 +1799,10 @@ class Parser {
$text = $link;
} else {
# Bug 4598 madness. Handle the quotes only if they come from the alternate part
- # [[Lista d''e paise d''o munno]] -> <a href="">Lista d''e paise d''o munno</a>
- # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
- $text = $this->doQuotes($text);
+ # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
+ # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
+ # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
+ $text = $this->doQuotes( $text );
}
# Link not escaped by : , create the various objects
@@ -1823,14 +1836,13 @@ class Parser {
$holders->merge( $this->replaceInternalLinks2( $text ) );
}
# cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
- $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail;
+ $s .= $prefix . $this->armorLinks(
+ $this->makeImage( $nt, $text, $holders ) ) . $trail;
} else {
$s .= $prefix . $trail;
}
- $this->mOutput->addImage( $nt->getDBkey() );
wfProfileOut( __METHOD__."-image" );
continue;
-
}
if ( $ns == NS_CATEGORY ) {
@@ -1851,7 +1863,7 @@ class Parser {
* Strip the whitespace Category links produce, see bug 87
* @todo We might want to use trim($tmp, "\n") here.
*/
- $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;
+ $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
wfProfileOut( __METHOD__."-category" );
continue;
@@ -1861,26 +1873,24 @@ class Parser {
# Self-link checking
if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
if ( in_array( $nt->getPrefixedText(), $selflink, true ) ) {
- $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
+ $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
continue;
}
}
# NS_MEDIA is a pseudo-namespace for linking directly to a file
- # FIXME: Should do batch file existence checks, see comment below
+ # @todo FIXME: Should do batch file existence checks, see comment below
if ( $ns == NS_MEDIA ) {
wfProfileIn( __METHOD__."-media" );
# Give extensions a chance to select the file revision for us
- $skip = $time = false;
- wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) );
- if ( $skip ) {
- $link = $sk->link( $nt );
- } else {
- $link = $sk->makeMediaLinkObj( $nt, $text, $time );
- }
+ $time = $sha1 = $descQuery = false;
+ wfRunHooks( 'BeforeParserFetchFileAndTitle',
+ array( $this, $nt, &$time, &$sha1, &$descQuery ) );
+ # Fetch and register the file (file title may be different via hooks)
+ list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $time, $sha1 );
# Cloak with NOPARSE to avoid replacement in replaceExternalLinks
- $s .= $prefix . $this->armorLinks( $link ) . $trail;
- $this->mOutput->addImage( $nt->getDBkey() );
+ $s .= $prefix . $this->armorLinks(
+ Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
wfProfileOut( __METHOD__."-media" );
continue;
}
@@ -1889,14 +1899,14 @@ class Parser {
# Some titles, such as valid special pages or files in foreign repos, should
# be shown as bluelinks even though they're not included in the page table
#
- # FIXME: isAlwaysKnown() can be expensive for file links; we should really do
+ # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
# batch file existence checks for NS_FILE and NS_MEDIA
if ( $iw == '' && $nt->isAlwaysKnown() ) {
$this->mOutput->addLink( $nt );
- $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );
+ $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
} else {
# Links will be added to the output link list after checking
- $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
+ $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
}
wfProfileOut( __METHOD__."-always_known" );
}
@@ -1905,18 +1915,6 @@ class Parser {
}
/**
- * Make a link placeholder. The text returned can be later resolved to a real link with
- * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
- * parsing of interwiki links, and secondly to allow all existence checks and
- * article length checks (for stub links) to be bundled into a single query.
- *
- * @deprecated
- */
- function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
- return $this->mLinkHolders->makeHolder( $nt, $text, $query, $trail, $prefix );
- }
-
- /**
* Render a forced-blue link inline; protect against double expansion of
* URLs if we're in a mode that prepends full URL prefixes to internal links.
* Since this little disaster has to split off the trail text to avoid
@@ -1925,16 +1923,23 @@ class Parser {
*
* @param $nt Title
* @param $text String
- * @param $query String
+ * @param $query Array or String
* @param $trail String
* @param $prefix String
* @return String: HTML-wikitext mix oh yuck
*/
- function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
+ function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
list( $inside, $trail ) = Linker::splitTrail( $trail );
- $sk = $this->mOptions->getSkin( $this->mTitle );
- # FIXME: use link() instead of deprecated makeKnownLinkObj()
- $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix );
+
+ if ( is_string( $query ) ) {
+ $query = wfCgiToArray( $query );
+ }
+ if ( $text == '' ) {
+ $text = htmlspecialchars( $nt->getPrefixedText() );
+ }
+
+ $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
+
return $this->armorLinks( $link ) . $trail;
}
@@ -1977,6 +1982,8 @@ class Parser {
/**#@+
* Used by doBlockLevels()
* @private
+ *
+ * @return string
*/
function closeParagraph() {
$result = '';
@@ -2001,7 +2008,7 @@ class Parser {
}
for ( $i = 0; $i < $shorter; ++$i ) {
- if ( $st1{$i} != $st2{$i} ) {
+ if ( $st1[$i] != $st2[$i] ) {
break;
}
}
@@ -2012,6 +2019,8 @@ class Parser {
* These next three functions open, continue, and close the list
* element appropriate to the prefix character passed into them.
* @private
+ *
+ * @return string
*/
function openList( $char ) {
$result = $this->closeParagraph();
@@ -2036,6 +2045,8 @@ class Parser {
* TODO: document
* @param $char String
* @private
+ *
+ * @return string
*/
function nextItem( $char ) {
if ( '*' === $char || '#' === $char ) {
@@ -2060,6 +2071,8 @@ class Parser {
* TODO: document
* @param $char String
* @private
+ *
+ * @return string
*/
function closeList( $char ) {
if ( '*' === $char ) {
@@ -2178,7 +2191,7 @@ class Parser {
$output .= $this->openList( $char );
if ( ';' === $char ) {
- # FIXME: This is dupe of code above
+ # @todo FIXME: This is dupe of code above
if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
$t = $t2;
$output .= $term . $this->nextItem( ':' );
@@ -2200,7 +2213,7 @@ class Parser {
'<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
if ( $openmatch or $closematch ) {
$paragraphStack = false;
- # TODO bug 5718: paragraph closed
+ # TODO bug 5718: paragraph closed
$output .= $this->closeParagraph();
if ( $preOpenMatch and !$preCloseMatch ) {
$this->mInPre = true;
@@ -2299,7 +2312,7 @@ class Parser {
$stack = 0;
$len = strlen( $str );
for( $i = 0; $i < $len; $i++ ) {
- $c = $str{$i};
+ $c = $str[$i];
switch( $state ) {
# (Using the number is a performance hack for common cases)
@@ -2435,6 +2448,9 @@ class Parser {
* Return value of a magic variable (like PAGENAME)
*
* @private
+ *
+ * @param $index integer
+ * @param $frame PPFrame
*/
function getVariableValue( $index, $frame=false ) {
global $wgContLang, $wgSitename, $wgServer;
@@ -2521,25 +2537,25 @@ class Parser {
$value = wfEscapeWikiText( $this->mTitle->getText() );
break;
case 'pagenamee':
- $value = $this->mTitle->getPartialURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
break;
case 'fullpagename':
$value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
break;
case 'fullpagenamee':
- $value = $this->mTitle->getPrefixedURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
break;
case 'subpagename':
$value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
break;
case 'subpagenamee':
- $value = $this->mTitle->getSubpageUrlForm();
+ $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
break;
case 'basepagename':
$value = wfEscapeWikiText( $this->mTitle->getBaseText() );
break;
case 'basepagenamee':
- $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+ $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
break;
case 'talkpagename':
if ( $this->mTitle->canTalk() ) {
@@ -2552,7 +2568,7 @@ class Parser {
case 'talkpagenamee':
if ( $this->mTitle->canTalk() ) {
$talkPage = $this->mTitle->getTalkPage();
- $value = $talkPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
} else {
$value = '';
}
@@ -2563,7 +2579,7 @@ class Parser {
break;
case 'subjectpagenamee':
$subjPage = $this->mTitle->getSubjectPage();
- $value = $subjPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
break;
case 'revisionid':
# Let the edit saving system know we should parse the page
@@ -2719,10 +2735,8 @@ class Parser {
case 'server':
return $wgServer;
case 'servername':
- wfSuppressWarnings(); # May give an E_WARNING in PHP < 5.3.3
- $serverName = parse_url( $wgServer, PHP_URL_HOST );
- wfRestoreWarnings();
- return $serverName ? $serverName : $wgServer;
+ $serverParts = wfParseUrl( $wgServer );
+ return $serverParts && isset( $serverParts['host'] ) ? $serverParts['host'] : $wgServer;
case 'scriptpath':
return $wgScriptPath;
case 'stylepath':
@@ -2783,6 +2797,8 @@ class Parser {
* dependency requirements.
*
* @private
+ *
+ * @return PPNode
*/
function preprocessToDom( $text, $flags = 0 ) {
$dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
@@ -2791,6 +2807,8 @@ class Parser {
/**
* Return a three-element array: leading whitespace, string contents, trailing whitespace
+ *
+ * @return array
*/
public static function splitWhitespace( $s ) {
$ltrimmed = ltrim( $s );
@@ -2821,6 +2839,8 @@ class Parser {
* Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly.
* @param $argsOnly Boolean: only do argument (triple-brace) expansion, not double-brace expansion
* @private
+ *
+ * @return string
*/
function replaceVariables( $text, $frame = false, $argsOnly = false ) {
# Is there any text? Also, Prevent too big inclusions!
@@ -2844,7 +2864,11 @@ class Parser {
return $text;
}
- # Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
+ /**
+ * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
+ *
+ * @return array
+ */
static function createAssocArgs( $args ) {
$assocArgs = array();
$index = 1;
@@ -2918,7 +2942,7 @@ class Parser {
$isLocalObj = false; # $text is a DOM node needing expansion in the current frame
# Title object, where $text came from
- $title = null;
+ $title = false;
# $part1 is the bit before the first |, and must contain only title characters.
# Various prefixes will be stripped from it later.
@@ -2930,8 +2954,10 @@ class Parser {
$originalTitle = $part1;
# $args is a list of argument nodes, starting from index 0, not including $part1
+ # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object
$args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
wfProfileOut( __METHOD__.'-setup' );
+ wfProfileIn( __METHOD__."-title-$originalTitle" );
# SUBST
wfProfileIn( __METHOD__.'-modifiers' );
@@ -3100,7 +3126,7 @@ class Parser {
&& $this->mOptions->getAllowSpecialInclusion()
&& $this->ot['html'] )
{
- $text = SpecialPage::capturePath( $title );
+ $text = SpecialPageFactory::capturePath( $title );
if ( is_string( $text ) ) {
$found = true;
$isHTML = true;
@@ -3150,6 +3176,7 @@ class Parser {
# Recover the source wikitext and return it
if ( !$found ) {
$text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
+ wfProfileOut( __METHOD__."-title-$originalTitle" );
wfProfileOut( __METHOD__ );
return array( 'object' => $text );
}
@@ -3218,6 +3245,7 @@ class Parser {
$ret = array( 'text' => $text );
}
+ wfProfileOut( __METHOD__."-title-$originalTitle" );
wfProfileOut( __METHOD__ );
return $ret;
}
@@ -3225,6 +3253,8 @@ class Parser {
/**
* Get the semi-parsed DOM representation of a template with a given title,
* and its redirect destination title. Cached.
+ *
+ * @return array
*/
function getTemplateDom( $title ) {
$cacheTitle = $title;
@@ -3260,6 +3290,8 @@ class Parser {
/**
* Fetch the unparsed text of a template and register a reference to it.
+ * @param Title $title
+ * @return Array ( string or false, Title )
*/
function fetchTemplateAndTitle( $title ) {
$templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate()
@@ -3274,6 +3306,11 @@ class Parser {
return array( $text, $finalTitle );
}
+ /**
+ * Fetch the unparsed text of a template and register a reference to it.
+ * @param Title $title
+ * @return mixed string or false
+ */
function fetchTemplate( $title ) {
$rv = $this->fetchTemplateAndTitle( $title );
return $rv[0];
@@ -3282,8 +3319,10 @@ class Parser {
/**
* Static function to get a template
* Can be overridden via ParserOptions::setTemplateCallback().
+ *
+ * @return array
*/
- static function statelessFetchTemplate( $title, $parser=false ) {
+ static function statelessFetchTemplate( $title, $parser = false ) {
$text = $skip = false;
$finalTitle = $title;
$deps = array();
@@ -3292,17 +3331,22 @@ class Parser {
for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
# Give extensions a chance to select the revision instead
$id = false; # Assume current
- wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
+ wfRunHooks( 'BeforeParserFetchTemplateAndtitle',
+ array( $parser, $title, &$skip, &$id ) );
if ( $skip ) {
$text = false;
$deps[] = array(
- 'title' => $title,
- 'page_id' => $title->getArticleID(),
- 'rev_id' => null );
+ 'title' => $title,
+ 'page_id' => $title->getArticleID(),
+ 'rev_id' => null
+ );
break;
}
- $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
+ # Get the revision
+ $rev = $id
+ ? Revision::newFromId( $id )
+ : Revision::newFromTitle( $title );
$rev_id = $rev ? $rev->getId() : 0;
# If there is no current revision, there is no page
if ( $id === false && !$rev ) {
@@ -3311,20 +3355,27 @@ class Parser {
}
$deps[] = array(
- 'title' => $title,
- 'page_id' => $title->getArticleID(),
- 'rev_id' => $rev_id );
+ 'title' => $title,
+ 'page_id' => $title->getArticleID(),
+ 'rev_id' => $rev_id );
+ if ( $rev && !$title->equals( $rev->getTitle() ) ) {
+ # We fetched a rev from a different title; register it too...
+ $deps[] = array(
+ 'title' => $rev->getTitle(),
+ 'page_id' => $rev->getPage(),
+ 'rev_id' => $rev_id );
+ }
if ( $rev ) {
$text = $rev->getText();
} elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
global $wgContLang;
- $message = $wgContLang->lcfirst( $title->getText() );
- $text = wfMsgForContentNoTrans( $message );
- if ( wfEmptyMsg( $message, $text ) ) {
+ $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+ if ( !$message->exists() ) {
$text = false;
break;
}
+ $text = $message->plain();
} else {
break;
}
@@ -3342,7 +3393,56 @@ class Parser {
}
/**
+ * Fetch a file and its title and register a reference to it.
+ * @param Title $title
+ * @param string $time MW timestamp
+ * @param string $sha1 base 36 SHA-1
+ * @return mixed File or false
+ */
+ function fetchFile( $title, $time = false, $sha1 = false ) {
+ $res = $this->fetchFileAndTitle( $title, $time, $sha1 );
+ return $res[0];
+ }
+
+ /**
+ * Fetch a file and its title and register a reference to it.
+ * @param Title $title
+ * @param string $time MW timestamp
+ * @param string $sha1 base 36 SHA-1
+ * @return Array ( File or false, Title of file )
+ */
+ function fetchFileAndTitle( $title, $time = false, $sha1 = false ) {
+ if ( $time === '0' ) {
+ $file = false; // broken thumbnail forced by hook
+ } elseif ( $sha1 ) { // get by (sha1,timestamp)
+ $file = RepoGroup::singleton()->findFileFromKey( $sha1, array( 'time' => $time ) );
+ } else { // get by (name,timestamp)
+ $file = wfFindFile( $title, array( 'time' => $time ) );
+ }
+ $time = $file ? $file->getTimestamp() : false;
+ $sha1 = $file ? $file->getSha1() : false;
+ # Register the file as a dependency...
+ $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
+ if ( $file && !$title->equals( $file->getTitle() ) ) {
+ # Update fetched file title
+ $title = $file->getTitle();
+ if ( is_null( $file->getRedirectedTitle() ) ) {
+ # This file was not a redirect, but the title does not match.
+ # Register under the new name because otherwise the link will
+ # get lost.
+ $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
+ }
+ }
+ return array( $file, $title );
+ }
+
+ /**
* Transclude an interwiki link.
+ *
+ * @param $title Title
+ * @param $action
+ *
+ * @return string
*/
function interwikiTransclude( $title, $action ) {
global $wgEnableScaryTranscluding;
@@ -3359,6 +3459,10 @@ class Parser {
return $this->fetchScaryTemplateMaybeFromCache( $url );
}
+ /**
+ * @param $url string
+ * @return Mixed|String
+ */
function fetchScaryTemplateMaybeFromCache( $url ) {
global $wgTranscludeCacheExpiry;
$dbr = wfGetDB( DB_SLAVE );
@@ -3383,10 +3487,14 @@ class Parser {
return $text;
}
-
/**
* Triple brace replacement -- used for template arguments
* @private
+ *
+ * @param $peice array
+ * @param $frame PPFrame
+ *
+ * @return array
*/
function argSubstitution( $piece, $frame ) {
wfProfileIn( __METHOD__ );
@@ -3399,9 +3507,9 @@ class Parser {
$text = $frame->getArgument( $argName );
if ( $text === false && $parts->getLength() > 0
&& (
- $this->ot['html']
- || $this->ot['pre']
- || ( $this->ot['wiki'] && $frame->isTemplate() )
+ $this->ot['html']
+ || $this->ot['pre']
+ || ( $this->ot['wiki'] && $frame->isTemplate() )
)
) {
# No match in frame, use the supplied default
@@ -3440,6 +3548,8 @@ class Parser {
* inner Contents of extension element
* noClose Original text did not have a close tag
* @param $frame PPFrame
+ *
+ * @return string
*/
function extensionSubstitution( $params, $frame ) {
$name = $frame->expand( $params['name'] );
@@ -3508,9 +3618,9 @@ class Parser {
if ( $markerType === 'none' ) {
return $output;
} elseif ( $markerType === 'nowiki' ) {
- $this->mStripState->nowiki->setPair( $marker, $output );
+ $this->mStripState->addNoWiki( $marker, $output );
} elseif ( $markerType === 'general' ) {
- $this->mStripState->general->setPair( $marker, $output );
+ $this->mStripState->addGeneral( $marker, $output );
} else {
throw new MWException( __METHOD__.': invalid marker type' );
}
@@ -3525,7 +3635,7 @@ class Parser {
* @return Boolean: false if this inclusion would take it over the maximum, true otherwise
*/
function incrementIncludeSize( $type, $size ) {
- if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize( $type ) ) {
+ if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
return false;
} else {
$this->mIncludeSizes[$type] += $size;
@@ -3582,7 +3692,7 @@ class Parser {
}
# (bug 8068) Allow control over whether robots index a page.
#
- # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This
+ # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
# is not desirable, the last one on the page should win.
if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
$this->mOutput->setIndexPolicy( 'noindex' );
@@ -3592,7 +3702,7 @@ class Parser {
$this->mOutput->setIndexPolicy( 'index' );
$this->addTrackingCategory( 'index-category' );
}
-
+
# Cache all double underscores in the database
foreach ( $this->mDoubleUnderscores as $key => $val ) {
$this->mOutput->setProperty( $key, '' );
@@ -3610,6 +3720,10 @@ class Parser {
* @return Boolean: whether the addition was successful
*/
protected function addTrackingCategory( $msg ) {
+ if ( $this->mTitle->getNamespace() === NS_SPECIAL ) {
+ wfDebug( __METHOD__.": Not adding tracking category $msg to special page!\n" );
+ return false;
+ }
$cat = wfMsgForContent( $msg );
# Allow tracking categories to be disabled by setting them to "-"
@@ -3643,16 +3757,17 @@ class Parser {
* @private
*/
function formatHeadings( $text, $origText, $isMain=true ) {
- global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds;
+ global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds;
- $doNumberHeadings = $this->mOptions->getNumberHeadings();
-
# Inhibit editsection links if requested in the page
if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
$showEditLink = 0;
} else {
$showEditLink = $this->mOptions->getEditSection();
}
+ if ( $showEditLink ) {
+ $this->mOutput->setEditSectionTokens( true );
+ }
# Get all headlines for numbering them and adding funky stuff like [edit]
# links - this is for later, but we need the number of headlines right now
@@ -3683,9 +3798,6 @@ class Parser {
$enoughToc = true;
}
- # We need this to perform operations on the HTML
- $sk = $this->mOptions->getSkin( $this->mTitle );
-
# headline counter
$headlineCount = 0;
$numVisible = 0;
@@ -3736,7 +3848,7 @@ class Parser {
$sublevelCount[$toclevel] = 0;
if ( $toclevel<$wgMaxTocLevel ) {
$prevtoclevel = $toclevel;
- $toc .= $sk->tocIndent();
+ $toc .= Linker::tocIndent();
$numVisible++;
}
} elseif ( $level < $prevlevel && $toclevel > 1 ) {
@@ -3759,16 +3871,16 @@ class Parser {
if ( $toclevel<$wgMaxTocLevel ) {
if ( $prevtoclevel < $wgMaxTocLevel ) {
# Unindent only if the previous toc level was shown :p
- $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel );
+ $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
$prevtoclevel = $toclevel;
} else {
- $toc .= $sk->tocLineEnd();
+ $toc .= Linker::tocLineEnd();
}
}
} else {
# No change in level, end TOC line
if ( $toclevel<$wgMaxTocLevel ) {
- $toc .= $sk->tocLineEnd();
+ $toc .= Linker::tocLineEnd();
}
}
@@ -3782,7 +3894,7 @@ class Parser {
if ( $dot ) {
$numbering .= '.';
}
- $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
+ $numbering .= $this->getFunctionLang()->formatNum( $sublevelCount[$i] );
$dot = 1;
}
}
@@ -3837,10 +3949,10 @@ class Parser {
'noninitial' );
}
- # HTML names must be case-insensitively unique (bug 10721).
- # This does not apply to Unicode characters per
+ # HTML names must be case-insensitively unique (bug 10721).
+ # This does not apply to Unicode characters per
# http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison
- # FIXME: We may be changing them depending on the current locale.
+ # @todo FIXME: We may be changing them depending on the current locale.
$arrayKey = strtolower( $safeHeadline );
if ( $legacyHeadline === false ) {
$legacyArrayKey = false;
@@ -3861,7 +3973,7 @@ class Parser {
}
# Don't number the heading if it is the only one (looks silly)
- if ( $doNumberHeadings && count( $matches[3] ) > 1) {
+ if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
# the two are different if the line contains a link
$headline = $numbering . ' ' . $headline;
}
@@ -3876,7 +3988,7 @@ class Parser {
$legacyAnchor .= '_' . $refers[$legacyArrayKey];
}
if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
- $toc .= $sk->tocLine( $anchor, $tocline,
+ $toc .= Linker::tocLine( $anchor, $tocline,
$numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
}
@@ -3905,18 +4017,33 @@ class Parser {
);
# give headline the correct <h#> tag
- if ( $showEditLink && $sectionIndex !== false ) {
+ if ( $sectionIndex !== false ) {
+ // Output edit section links as markers with styles that can be customized by skins
if ( $isTemplate ) {
# Put a T flag in the section identifier, to indicate to extractSections()
# that sections inside <includeonly> should be counted.
- $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex", null, $this->mOptions->getUserLang() );
+ $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
+ } else {
+ $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
+ }
+ // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
+ // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
+ // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
+ // so we don't have to worry about a user trying to input one of these markers directly.
+ // We use a page and section attribute to stop the language converter from converting these important bits
+ // of data, but put the headline hint inside a content block because the language converter is supposed to
+ // be able to convert that piece of data.
+ $editlink = '<mw:editsection page="' . htmlspecialchars($editlinkArgs[0]);
+ $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
+ if ( isset($editlinkArgs[2]) ) {
+ $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
} else {
- $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint, $this->mOptions->getUserLang() );
+ $editlink .= '/>';
}
} else {
$editlink = '';
}
- $head[$headlineCount] = $sk->makeHeadline( $level,
+ $head[$headlineCount] = Linker::makeHeadline( $level,
$matches['attrib'][$headlineCount], $anchor, $headline,
$editlink, $legacyAnchor );
@@ -3932,9 +4059,9 @@ class Parser {
if ( $enoughToc ) {
if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
- $toc .= $sk->tocUnindent( $prevtoclevel - 1 );
+ $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
}
- $toc = $sk->tocList( $toc, $this->mOptions->getUserLang() );
+ $toc = Linker::tocList( $toc, $this->mOptions->getUserLang() );
$this->mOutput->setTOCHTML( $toc );
}
@@ -3985,21 +4112,21 @@ class Parser {
* @param $clearState Boolean: whether to clear the parser state first
* @return String: the altered wiki markup
*/
- public function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) {
- $this->mOptions = $options;
- $this->setTitle( $title );
- $this->setOutputType( self::OT_WIKI );
-
- if ( $clearState ) {
- $this->clearState();
- }
+ public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) {
+ $this->startParse( $title, $options, self::OT_WIKI, $clearState );
+ $this->setUser( $user );
$pairs = array(
"\r\n" => "\n",
);
$text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
- $text = $this->pstPass2( $text, $user );
+ if( $options->getPreSaveTransform() ) {
+ $text = $this->pstPass2( $text, $user );
+ }
$text = $this->mStripState->unstripBoth( $text );
+
+ $this->setUser( null ); #Reset
+
return $text;
}
@@ -4032,9 +4159,9 @@ class Parser {
# whatever crap the system uses, localised or not, so we cannot
# ship premade translations.
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
- $value = wfMsgForContent( $key );
- if ( !wfEmptyMsg( $key, $value ) ) {
- $tzMsg = $value;
+ $msg = wfMessage( $key )->inContentLanguage();
+ if ( $msg->exists() ) {
+ $tzMsg = $msg->text();
}
date_default_timezone_set( $oldtz );
@@ -4093,6 +4220,8 @@ class Parser {
* validated, ready-to-insert wikitext.
* If you have pre-fetched the nickname or the fancySig option, you can
* specify them here to save a database query.
+ * Do not reuse this parser instance after calling getUserSig(),
+ * as it may have changed if it's the $wgParser.
*
* @param $user User
* @param $nickname String: nickname to use or false to use user's default nickname
@@ -4136,11 +4265,9 @@ class Parser {
# If we're still here, make it a link to the user page
$userText = wfEscapeWikiText( $username );
$nickText = wfEscapeWikiText( $nickname );
- if ( $user->isAnon() ) {
- return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText );
- } else {
- return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText );
- }
+ $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
+
+ return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text();
}
/**
@@ -4177,7 +4304,7 @@ class Parser {
return $text;
}
- # FIXME: regex doesn't respect extension tags or nowiki
+ # @todo FIXME: Regex doesn't respect extension tags or nowiki
# => Move this logic to braceSubstitution()
$substWord = MagicWord::get( 'subst' );
$substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
@@ -4212,6 +4339,10 @@ class Parser {
* so that an external function can call some class members with confidence
*/
public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
+ $this->startParse( $title, $options, $outputType, $clearState );
+ }
+
+ private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
$this->setTitle( $title );
$this->mOptions = $options;
$this->setOutputType( $outputType );
@@ -4225,10 +4356,10 @@ class Parser {
*
* @param $text String: the text to preprocess
* @param $options ParserOptions: options
+ * @param $title Title object or null to use $wgTitle
* @return String
*/
- public function transformMsg( $text, $options ) {
- global $wgTitle;
+ public function transformMsg( $text, $options, $title = null ) {
static $executing = false;
# Guard against infinite recursion
@@ -4238,7 +4369,10 @@ class Parser {
$executing = true;
wfProfileIn( __METHOD__ );
- $title = $wgTitle;
+ if ( !$title ) {
+ global $wgTitle;
+ $title = $wgTitle;
+ }
if ( !$title ) {
# It's not uncommon having a null $wgTitle in scripts. See r80898
# Create a ghost title in such case
@@ -4254,17 +4388,29 @@ class Parser {
/**
* Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
* The callback should have the following form:
- * function myParserHook( $text, $params, $parser ) { ... }
+ * function myParserHook( $text, $params, $parser, $frame ) { ... }
*
* Transform and return $text. Use $parser for any required context, e.g. use
* $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
*
+ * Hooks may return extended information by returning an array, of which the
+ * first numbered element (index 0) must be the return string, and all other
+ * entries are extracted into local variables within an internal function
+ * in the Parser class.
+ *
+ * This interface (introduced r61913) appears to be undocumented, but
+ * 'markerName' is used by some core tag hooks to override which strip
+ * array their results are placed in. **Use great caution if attempting
+ * this interface, as it is not documented and injudicious use could smash
+ * private variables.**
+ *
* @param $tag Mixed: the tag to use, e.g. 'hook' for <hook>
* @param $callback Mixed: the callback function (and object) to use for the tag
* @return The old value of the mTagHooks array associated with the hook
*/
public function setHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
$oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
$this->mTagHooks[$tag] = $callback;
if ( !in_array( $tag, $this->mStripList ) ) {
@@ -4274,8 +4420,25 @@ class Parser {
return $oldVal;
}
+ /**
+ * As setHook(), but letting the contents be parsed.
+ *
+ * Transparent tag hooks are like regular XML-style tag hooks, except they
+ * operate late in the transformation sequence, on HTML instead of wikitext.
+ *
+ * This is probably obsoleted by things dealing with parser frames?
+ * The only extension currently using it is geoserver.
+ *
+ * @since 1.10
+ * @todo better document or deprecate this
+ *
+ * @param $tag Mixed: the tag to use, e.g. 'hook' for <hook>
+ * @param $callback Mixed: the callback function (and object) to use for the tag
+ * @return The old value of the mTagHooks array associated with the hook
+ */
function setTransparentTagHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
$this->mTransparentTagHooks[$tag] = $callback;
@@ -4380,6 +4543,7 @@ class Parser {
*/
function setFunctionTagHook( $tag, $callback, $flags ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
$old = isset( $this->mFunctionTagHooks[$tag] ) ?
$this->mFunctionTagHooks[$tag] : null;
$this->mFunctionTagHooks[$tag] = array( $callback, $flags );
@@ -4392,7 +4556,7 @@ class Parser {
}
/**
- * FIXME: update documentation. makeLinkObj() is deprecated.
+ * @todo FIXME: Update documentation. makeLinkObj() is deprecated.
* Replace <!--LINK--> link placeholders with actual links, in the buffer
* Placeholders created in Skin::makeLinkObj()
* Returns an array of link CSS classes, indexed by PDBK.
@@ -4420,6 +4584,10 @@ class Parser {
* given as text will return the HTML of a gallery with two images,
* labeled 'The number "1"' and
* 'A tree'.
+ *
+ * @param string $text
+ * @param array $param
+ * @return string HTML
*/
function renderImageGallery( $text, $params ) {
$ig = new ImageGallery();
@@ -4429,8 +4597,6 @@ class Parser {
$ig->setParser( $this );
$ig->setHideBadImages();
$ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
- $ig->useSkin( $this->mOptions->getSkin( $this->mTitle ) );
- $ig->mRevisionId = $this->mRevisionId;
if ( isset( $params['showfilename'] ) ) {
$ig->setShowFilename( true );
@@ -4467,28 +4633,40 @@ class Parser {
}
if ( strpos( $matches[0], '%' ) !== false ) {
- $matches[1] = urldecode( $matches[1] );
+ $matches[1] = rawurldecode( $matches[1] );
}
- $tp = Title::newFromText( $matches[1] );
- $nt =& $tp;
- if ( is_null( $nt ) ) {
+ $title = Title::newFromText( $matches[1], NS_FILE );
+ if ( is_null( $title ) ) {
# Bogus title. Ignore these so we don't bomb out later.
continue;
}
+
+ $label = '';
+ $alt = '';
if ( isset( $matches[3] ) ) {
- $label = $matches[3];
- } else {
- $label = '';
+ // look for an |alt= definition while trying not to break existing
+ // captions with multiple pipes (|) in it, until a more sensible grammar
+ // is defined for images in galleries
+
+ $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
+ $altmatches = StringUtils::explode('|', $matches[3]);
+ $magicWordAlt = MagicWord::get( 'img_alt' );
+
+ foreach ( $altmatches as $altmatch ) {
+ $match = $magicWordAlt->matchVariableStartToEnd( $altmatch );
+ if ( $match ) {
+ $alt = $this->stripAltText( $match, false );
+ }
+ else {
+ // concatenate all other pipes
+ $label .= '|' . $altmatch;
+ }
+ }
+ // remove the first pipe
+ $label = substr( $label, 1 );
}
- $html = $this->recursiveTagParse( trim( $label ) );
-
- $ig->add( $nt, $html );
-
- # Only add real images (bug #5586)
- if ( $nt->getNamespace() == NS_FILE ) {
- $this->mOutput->addImage( $nt->getDBkey() );
- }
+ $ig->add( $title, $label, $alt );
}
return $ig->toHTML();
}
@@ -4539,6 +4717,7 @@ class Parser {
* @param $title Title
* @param $options String
* @param $holders LinkHolderArray
+ * @return string HTML
*/
function makeImage( $title, $options, $holders = false ) {
# Check if the options text is of the form "options|alt text"
@@ -4567,23 +4746,23 @@ class Parser {
# * text-bottom
$parts = StringUtils::explode( "|", $options );
- $sk = $this->mOptions->getSkin( $this->mTitle );
# Give extensions a chance to select the file revision for us
- $skip = $time = $descQuery = false;
- wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) );
+ $time = $sha1 = $descQuery = false;
+ wfRunHooks( 'BeforeParserFetchFileAndTitle',
+ array( $this, $title, &$time, &$sha1, &$descQuery ) );
+ # Fetch and register the file (file title may be different via hooks)
+ list( $file, $title ) = $this->fetchFileAndTitle( $title, $time, $sha1 );
- if ( $skip ) {
- return $sk->link( $title );
- }
-
- # Get the file
- $file = wfFindFile( $title, array( 'time' => $time ) );
# Get parameter map
$handler = $file ? $file->getHandler() : false;
list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
+ if ( !$file ) {
+ $this->addTrackingCategory( 'broken-file-category' );
+ }
+
# Process the input parameters
$caption = '';
$params = array( 'frame' => array(), 'handler' => array(),
@@ -4627,7 +4806,7 @@ class Parser {
switch( $paramName ) {
case 'manualthumb':
case 'alt':
- # @todo Fixme: possibly check validity here for
+ # @todo FIXME: Possibly check validity here for
# manualthumb? downstream behavior seems odd with
# missing manual thumbs.
$validated = true;
@@ -4687,9 +4866,9 @@ class Parser {
# Will the image be presented in a frame, with the caption below?
$imageIsFramed = isset( $params['frame']['frame'] ) ||
- isset( $params['frame']['framed'] ) ||
- isset( $params['frame']['thumbnail'] ) ||
- isset( $params['frame']['manualthumb'] );
+ isset( $params['frame']['framed'] ) ||
+ isset( $params['frame']['thumbnail'] ) ||
+ isset( $params['frame']['manualthumb'] );
# In the old days, [[Image:Foo|text...]] would set alt text. Later it
# came to also set the caption, ordinary text after the image -- which
@@ -4733,7 +4912,8 @@ class Parser {
wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) );
# Linker does the rest
- $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery, $this->mOptions->getThumbSize() );
+ $ret = Linker::makeImageLink2( $title, $file, $params['frame'], $params['handler'],
+ $time, $descQuery, $this->mOptions->getThumbSize() );
# Give the handler a chance to modify the parser object
if ( $handler ) {
@@ -4743,6 +4923,11 @@ class Parser {
return $ret;
}
+ /**
+ * @param $caption
+ * @param $holders LinkHolderArray
+ * @return mixed|String
+ */
protected function stripAltText( $caption, $holders ) {
# Strip bad stuff out of the title (tooltip). We can't just use
# replaceLinkHoldersText() here, because if this function is called
@@ -4779,7 +4964,6 @@ class Parser {
* @param $text String
* @param $frame PPFrame
* @return String
- * @private
*/
function attributeStripCallback( &$text, $frame = false ) {
$text = $this->replaceVariables( $text, $frame );
@@ -4789,12 +4973,39 @@ class Parser {
/**
* Accessor
+ *
+ * @return array
*/
function getTags() {
return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ) );
}
/**
+ * Replace transparent tags in $text with the values given by the callbacks.
+ *
+ * Transparent tag hooks are like regular XML-style tag hooks, except they
+ * operate late in the transformation sequence, on HTML instead of wikitext.
+ */
+ function replaceTransparentTags( $text ) {
+ $matches = array();
+ $elements = array_keys( $this->mTransparentTagHooks );
+ $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
+ $replacements = array();
+
+ foreach ( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ $tagName = strtolower( $element );
+ if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+ } else {
+ $output = $tag;
+ }
+ $replacements[$marker] = $output;
+ }
+ return strtr( $text, $replacements );
+ }
+
+ /**
* Break wikitext input into sections, and either pull or replace
* some particular section's text.
*
@@ -4814,17 +5025,18 @@ class Parser {
* pull the given section along with its lower-level subsections. If the section is
* not found, $mode=get will return $newtext, and $mode=replace will return $text.
*
+ * Section 0 is always considered to exist, even if it only contains the empty
+ * string. If $text is the empty string and section 0 is replaced, $newText is
+ * returned.
+ *
* @param $mode String: one of "get" or "replace"
* @param $newText String: replacement text for section data.
* @return String: for "get", the extracted section text.
* for "replace", the whole page with the section replaced.
*/
private function extractSections( $text, $section, $mode, $newText='' ) {
- global $wgTitle;
- $this->mOptions = new ParserOptions;
- $this->clearState();
- $this->setTitle( $wgTitle ); # not generally used but removes an ugly failure mode
- $this->setOutputType( self::OT_PLAIN );
+ global $wgTitle; # not generally used but removes an ugly failure mode
+ $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
$outText = '';
$frame = $this->getPreprocessor()->newFrame();
@@ -4837,6 +5049,25 @@ class Parser {
$flags |= self::PTD_FOR_INCLUSION;
}
}
+
+ # Check for empty input
+ if ( strval( $text ) === '' ) {
+ # Only sections 0 and T-0 exist in an empty document
+ if ( $sectionIndex == 0 ) {
+ if ( $mode === 'get' ) {
+ return '';
+ } else {
+ return $newText;
+ }
+ } else {
+ if ( $mode === 'get' ) {
+ return $newText;
+ } else {
+ return $text;
+ }
+ }
+ }
+
# Preprocess the text
$root = $this->preprocessToDom( $text, $flags );
@@ -4930,12 +5161,13 @@ class Parser {
/**
* This function returns $oldtext after the content of the section
- * specified by $section has been replaced with $text.
+ * specified by $section has been replaced with $text. If the target
+ * section does not exist, $oldtext is returned unchanged.
*
- * @param $text String: former text of the article
+ * @param $oldtext String: former text of the article
* @param $section Numeric: section identifier
* @param $text String: replacing text
- * #return String: modified text
+ * @return String: modified text
*/
public function replaceSection( $oldtext, $section, $text ) {
return $this->extractSections( $oldtext, $section, "replace", $text );
@@ -4951,30 +5183,44 @@ class Parser {
}
/**
+ * Get the revision object for $this->mRevisionId
+ *
+ * @return Revision|null either a Revision object or null
+ */
+ protected function getRevisionObject() {
+ if ( !is_null( $this->mRevisionObject ) ) {
+ return $this->mRevisionObject;
+ }
+ if ( is_null( $this->mRevisionId ) ) {
+ return null;
+ }
+
+ $this->mRevisionObject = Revision::newFromId( $this->mRevisionId );
+ return $this->mRevisionObject;
+ }
+
+ /**
* Get the timestamp associated with the current revision, adjusted for
* the default server-local timestamp
*/
function getRevisionTimestamp() {
if ( is_null( $this->mRevisionTimestamp ) ) {
wfProfileIn( __METHOD__ );
- global $wgContLang;
- $dbr = wfGetDB( DB_SLAVE );
- $timestamp = $dbr->selectField( 'revision', 'rev_timestamp',
- array( 'rev_id' => $this->mRevisionId ), __METHOD__ );
-
- # Normalize timestamp to internal MW format for timezone processing.
- # This has the added side-effect of replacing a null value with
- # the current time, which gives us more sensible behavior for
- # previews.
- $timestamp = wfTimestamp( TS_MW, $timestamp );
-
- # The cryptic '' timezone parameter tells to use the site-default
- # timezone offset instead of the user settings.
- #
- # Since this value will be saved into the parser cache, served
- # to other users, and potentially even used inside links and such,
- # it needs to be consistent for all visitors.
- $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
+
+ $revObject = $this->getRevisionObject();
+ $timestamp = $revObject ? $revObject->getTimestamp() : false;
+
+ if( $timestamp !== false ) {
+ global $wgContLang;
+
+ # The cryptic '' timezone parameter tells to use the site-default
+ # timezone offset instead of the user settings.
+ #
+ # Since this value will be saved into the parser cache, served
+ # to other users, and potentially even used inside links and such,
+ # it needs to be consistent for all visitors.
+ $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
+ }
wfProfileOut( __METHOD__ );
}
@@ -4987,16 +5233,18 @@ class Parser {
* @return String: user name
*/
function getRevisionUser() {
- # if this template is subst: the revision id will be blank,
- # so just use the current user's name
- if ( $this->mRevisionId ) {
- $revision = Revision::newFromId( $this->mRevisionId );
- $revuser = $revision->getUserText();
- } else {
- global $wgUser;
- $revuser = $wgUser->getName();
+ if( is_null( $this->mRevisionUser ) ) {
+ $revObject = $this->getRevisionObject();
+
+ # if this template is subst: the revision id will be blank,
+ # so just use the current user's name
+ if( $revObject ) {
+ $this->mRevisionUser = $revObject->getUserText();
+ } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
+ $this->mRevisionUser = $this->getUser()->getName();
+ }
}
- return $revuser;
+ return $this->mRevisionUser;
}
/**
@@ -5083,7 +5331,8 @@ class Parser {
$text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
$text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
- # Strip external link markup (FIXME: Not Tolerant to blank link text
+ # Strip external link markup
+ # @todo FIXME: Not tolerant to blank link text
# I.E. [http://www.mediawiki.org] will render as [1] or something depending
# on how many empty links there are on the page - need to figure that out.
$text = preg_replace( '/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
@@ -5098,36 +5347,39 @@ class Parser {
/**
* strip/replaceVariables/unstrip for preprocessor regression testing
+ *
+ * @return string
*/
- function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) {
- $this->mOptions = $options;
- $this->clearState();
- if ( !$title instanceof Title ) {
- $title = Title::newFromText( $title );
- }
- $this->mTitle = $title;
- $this->setOutputType( $outputType );
+ function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
+ $this->startParse( $title, $options, $outputType, true );
+
$text = $this->replaceVariables( $text );
$text = $this->mStripState->unstripBoth( $text );
$text = Sanitizer::removeHTMLtags( $text );
return $text;
}
- function testPst( $text, $title, $options ) {
- global $wgUser;
- if ( !$title instanceof Title ) {
- $title = Title::newFromText( $title );
- }
- return $this->preSaveTransform( $text, $title, $wgUser, $options );
+ function testPst( $text, Title $title, ParserOptions $options ) {
+ return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
}
- function testPreprocess( $text, $title, $options ) {
- if ( !$title instanceof Title ) {
- $title = Title::newFromText( $title );
- }
+ function testPreprocess( $text, Title $title, ParserOptions $options ) {
return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
}
+ /**
+ * Call a callback function on all regions of the given text that are not
+ * inside strip markers, and replace those regions with the return value
+ * of the callback. For example, with input:
+ *
+ * aaa<MARKER>bbb
+ *
+ * This will call the callback function twice, with 'aaa' and 'bbb'. Those
+ * two strings will be replaced with the value returned by the callback in
+ * each case.
+ *
+ * @return string
+ */
function markerSkipCallback( $s, $callback ) {
$i = 0;
$out = '';
@@ -5152,168 +5404,72 @@ class Parser {
return $out;
}
- function serialiseHalfParsedText( $text ) {
- $data = array();
- $data['text'] = $text;
-
- # First, find all strip markers, and store their
- # data in an array.
- $stripState = new StripState;
- $pos = 0;
- while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) )
- && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) )
- {
- $end_pos += strlen( self::MARKER_SUFFIX );
- $marker = substr( $text, $start_pos, $end_pos-$start_pos );
-
- if ( !empty( $this->mStripState->general->data[$marker] ) ) {
- $replaceArray = $stripState->general;
- $stripText = $this->mStripState->general->data[$marker];
- } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
- $replaceArray = $stripState->nowiki;
- $stripText = $this->mStripState->nowiki->data[$marker];
- } else {
- throw new MWException( "Hanging strip marker: '$marker'." );
- }
-
- $replaceArray->setPair( $marker, $stripText );
- $pos = $end_pos;
- }
- $data['stripstate'] = $stripState;
-
- # Now, find all of our links, and store THEIR
- # data in an array! :)
- $links = array( 'internal' => array(), 'interwiki' => array() );
- $pos = 0;
-
- # Internal links
- while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
- list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
-
- $ns = trim( $ns );
- if ( empty( $links['internal'][$ns] ) ) {
- $links['internal'][$ns] = array();
- }
-
- $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
- $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
- $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
- }
-
- $pos = 0;
-
- # Interwiki links
- while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
- $data = substr( $text, $start_pos );
- $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
- $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
- $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
- }
-
- $data['linkholder'] = $links;
-
+ /**
+ * Save the parser state required to convert the given half-parsed text to
+ * HTML. "Half-parsed" in this context means the output of
+ * recursiveTagParse() or internalParse(). This output has strip markers
+ * from replaceVariables (extensionSubstitution() etc.), and link
+ * placeholders from replaceLinkHolders().
+ *
+ * Returns an array which can be serialized and stored persistently. This
+ * array can later be loaded into another parser instance with
+ * unserializeHalfParsedText(). The text can then be safely incorporated into
+ * the return value of a parser hook.
+ *
+ * @return array
+ */
+ function serializeHalfParsedText( $text ) {
+ wfProfileIn( __METHOD__ );
+ $data = array(
+ 'text' => $text,
+ 'version' => self::HALF_PARSED_VERSION,
+ 'stripState' => $this->mStripState->getSubState( $text ),
+ 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
+ );
+ wfProfileOut( __METHOD__ );
return $data;
}
/**
- * TODO: document
- * @param $data Array
- * @param $intPrefix String unique identifying prefix
+ * Load the parser state given in the $data array, which is assumed to
+ * have been generated by serializeHalfParsedText(). The text contents is
+ * extracted from the array, and its markers are transformed into markers
+ * appropriate for the current Parser instance. This transformed text is
+ * returned, and can be safely included in the return value of a parser
+ * hook.
+ *
+ * If the $data array has been stored persistently, the caller should first
+ * check whether it is still valid, by calling isValidHalfParsedText().
+ *
+ * @param $data Serialized data
* @return String
*/
- function unserialiseHalfParsedText( $data, $intPrefix = null ) {
- if ( !$intPrefix ) {
- $intPrefix = self::getRandomString();
+ function unserializeHalfParsedText( $data ) {
+ if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
+ throw new MWException( __METHOD__.': invalid version' );
}
# First, extract the strip state.
- $stripState = $data['stripstate'];
- $this->mStripState->general->merge( $stripState->general );
- $this->mStripState->nowiki->merge( $stripState->nowiki );
-
- # Now, extract the text, and renumber links
- $text = $data['text'];
- $links = $data['linkholder'];
-
- # Internal...
- foreach ( $links['internal'] as $ns => $nsLinks ) {
- foreach ( $nsLinks as $key => $entry ) {
- $newKey = $intPrefix . '-' . $key;
- $this->mLinkHolders->internals[$ns][$newKey] = $entry;
-
- $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
- }
- }
+ $texts = array( $data['text'] );
+ $texts = $this->mStripState->merge( $data['stripState'], $texts );
- # Interwiki...
- foreach ( $links['interwiki'] as $key => $entry ) {
- $newKey = "$intPrefix-$key";
- $this->mLinkHolders->interwikis[$newKey] = $entry;
-
- $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
- }
+ # Now renumber links
+ $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
# Should be good to go.
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class StripState {
- var $general, $nowiki;
-
- function __construct() {
- $this->general = new ReplacementArray;
- $this->nowiki = new ReplacementArray;
- }
-
- function unstripGeneral( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripNoWiki( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripBoth( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
+ return $texts[0];
}
-}
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class OnlyIncludeReplacer {
- var $output = '';
-
- function replace( $matches ) {
- if ( substr( $matches[1], -1 ) === "\n" ) {
- $this->output .= substr( $matches[1], 0, -1 );
- } else {
- $this->output .= $matches[1];
- }
+ /**
+ * Returns true if the given array, presumed to be generated by
+ * serializeHalfParsedText(), is compatible with the current version of the
+ * parser.
+ *
+ * @param $data Array
+ *
+ * @return bool
+ */
+ function isValidHalfParsedText( $data ) {
+ return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
}
}
diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php
index 1e028ae5..dcbf7a4d 100644
--- a/includes/parser/ParserCache.php
+++ b/includes/parser/ParserCache.php
@@ -31,13 +31,18 @@ class ParserCache {
*
* @param $memCached Object
*/
- function __construct( $memCached ) {
+ protected function __construct( $memCached ) {
if ( !$memCached ) {
throw new MWException( "Tried to create a ParserCache with an invalid memcached" );
}
$this->mMemc = $memCached;
}
+ /**
+ * @param $article Article
+ * @param $hash string
+ * @return mixed|string
+ */
protected function getParserOutputKey( $article, $hash ) {
global $wgRequest;
@@ -49,6 +54,10 @@ class ParserCache {
return $key;
}
+ /**
+ * @param $article Article
+ * @return mixed|string
+ */
protected function getOptionsKey( $article ) {
$pageid = $article->getID();
return wfMemcKey( 'pcache', 'idoptions', "{$pageid}" );
@@ -63,6 +72,9 @@ class ParserCache {
* $article. For example give a Chinese interface to a user with
* English preferences. That's why we take into account *all* user
* options. (r70809 CR)
+ *
+ * @param $article Article
+ * @param $popts ParserOptions
*/
function getETag( $article, $popts ) {
return 'W/"' . $this->getParserOutputKey( $article,
@@ -72,6 +84,9 @@ class ParserCache {
/**
* Retrieve the ParserOutput from ParserCache, even if it's outdated.
+ * @param $article Article
+ * @param $popts ParserOptions
+ * @return ParserOutput|false
*/
public function getDirty( $article, $popts ) {
$value = $this->get( $article, $popts, true );
@@ -82,6 +97,9 @@ class ParserCache {
* Used to provide a unique id for the PoolCounter.
* It would be preferable to have this code in get()
* instead of having Article looking in our internals.
+ *
+ * @param $article Article
+ * @param $popts ParserOptions
*/
public function getKey( $article, $popts, $useOutdated = true ) {
global $wgCacheEpoch;
@@ -116,6 +134,12 @@ class ParserCache {
/**
* Retrieve the ParserOutput from ParserCache.
* false if not found or outdated.
+ *
+ * @param $article Article
+ * @param $popts ParserOptions
+ * @param $useOutdated
+ *
+ * @return ParserOutput|false
*/
public function get( $article, $popts, $useOutdated = false ) {
global $wgCacheEpoch;
@@ -150,6 +174,11 @@ class ParserCache {
}
wfDebug( "Found.\n" );
+
+ // The edit section preference may not be the appropiate one in
+ // the ParserOutput, as we are not storing it in the parsercache
+ // key. Force it here. See bug 31445.
+ $value->setEditSectionTokens( $popts->getEditSection() );
if ( !$useOutdated && $value->expired( $touched ) ) {
wfIncrStats( "pcache_miss_expired" );
@@ -157,9 +186,6 @@ class ParserCache {
wfDebug( "ParserOutput key expired, touched $touched, epoch $wgCacheEpoch, cached $cacheTime\n" );
$value = false;
} else {
- if ( isset( $value->mTimestamp ) ) {
- $article->mTimestamp = $value->mTimestamp;
- }
wfIncrStats( "pcache_hit" );
}
@@ -167,7 +193,12 @@ class ParserCache {
return $value;
}
-
+ /**
+ * @param $parserOutput ParserOutput
+ * @param $article Article
+ * @param $popts ParserOptions
+ * @return void
+ */
public function save( $parserOutput, $article, $popts ) {
$expire = $parserOutput->getCacheExpiry();
@@ -183,7 +214,8 @@ class ParserCache {
$optionsKey->setContainsOldMagic( $parserOutput->containsOldMagic() );
- $parserOutputKey = $this->getParserOutputKey( $article, $popts->optionsHash( $optionsKey->mUsedOptions ) );
+ $parserOutputKey = $this->getParserOutputKey( $article,
+ $popts->optionsHash( $optionsKey->mUsedOptions ) );
// Save the timestamp so that we don't have to load the revision row on view
$parserOutput->mTimestamp = $article->getTimestamp();
diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php
index 1bda0792..07752768 100644
--- a/includes/parser/ParserOptions.php
+++ b/includes/parser/ParserOptions.php
@@ -5,7 +5,7 @@
* @file
* @ingroup Parser
*/
-
+
/**
* Set options of the Parser
* @todo document
@@ -18,46 +18,51 @@ class ParserOptions {
var $mAllowExternalImages; # Allow external images inline
var $mAllowExternalImagesFrom; # If not, any exception?
var $mEnableImageWhitelist; # If not or it doesn't match, should we check an on-wiki whitelist?
- var $mSkin; # Reference to the preferred skin
- var $mDateFormat; # Date format index
- var $mEditSection; # Create "edit section" links
- var $mNumberHeadings; # Automatically number headings
+ var $mDateFormat = null; # Date format index
+ var $mEditSection = true; # Create "edit section" links
var $mAllowSpecialInclusion; # Allow inclusion of special pages
- var $mTidy; # Ask for tidy cleanup
- var $mInterfaceMessage; # Which lang to call for PLURAL and GRAMMAR
- var $mTargetLanguage; # Overrides above setting with arbitrary language
+ var $mTidy = false; # Ask for tidy cleanup
+ var $mInterfaceMessage = false; # Which lang to call for PLURAL and GRAMMAR
+ var $mTargetLanguage = null; # Overrides above setting with arbitrary language
var $mMaxIncludeSize; # Maximum size of template expansions, in bytes
var $mMaxPPNodeCount; # Maximum number of nodes touched by PPFrame::expand()
var $mMaxPPExpandDepth; # Maximum recursion depth in PPFrame::expand()
var $mMaxTemplateDepth; # Maximum recursion depth for templates within templates
- var $mRemoveComments; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS
- var $mTemplateCallback; # Callback for template fetching
- var $mEnableLimitReport; # Enable limit report in an HTML comment on output
+ var $mRemoveComments = true; # Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS
+ var $mTemplateCallback = # Callback for template fetching
+ array( 'Parser', 'statelessFetchTemplate' );
+ var $mEnableLimitReport = false; # Enable limit report in an HTML comment on output
var $mTimestamp; # Timestamp used for {{CURRENTDAY}} etc.
var $mExternalLinkTarget; # Target attribute for external links
+ var $mCleanSignatures; #
+ var $mPreSaveTransform = true; # Transform wiki markup when saving the page.
+
+ var $mNumberHeadings; # Automatically number headings
var $mMath; # User math preference (as integer)
- var $mUserLang; # Language code of the User language.
var $mThumbSize; # Thumb size preferred by the user.
- var $mCleanSignatures; #
+ private $mStubThreshold; # Maximum article size of an article to be marked as "stub"
+ var $mUserLang; # Language code of the User language.
+
+ /**
+ * @var User
+ */
+ var $mUser; # Stored user object
+ var $mIsPreview = false; # Parsing the page for a "preview" operation
+ var $mIsSectionPreview = false; # Parsing the page for a "preview" operation on a single section
+ var $mIsPrintable = false; # Parsing the printable version of the page
- var $mUser; # Stored user object, just used to initialise the skin
- var $mIsPreview; # Parsing the page for a "preview" operation
- var $mIsSectionPreview; # Parsing the page for a "preview" operation on a single section
- var $mIsPrintable; # Parsing the printable version of the page
-
var $mExtraKey = ''; # Extra key that should be present in the caching key.
-
+
protected $onAccessCallback = null;
-
+
function getUseDynamicDates() { return $this->mUseDynamicDates; }
function getInterwikiMagic() { return $this->mInterwikiMagic; }
function getAllowExternalImages() { return $this->mAllowExternalImages; }
function getAllowExternalImagesFrom() { return $this->mAllowExternalImagesFrom; }
function getEnableImageWhitelist() { return $this->mEnableImageWhitelist; }
- function getEditSection() { $this->optionUsed('editsection');
- return $this->mEditSection; }
- function getNumberHeadings() { $this->optionUsed('numberheadings');
- return $this->mNumberHeadings; }
+ function getEditSection() { return $this->mEditSection; }
+ function getNumberHeadings() { $this->optionUsed( 'numberheadings' );
+ return $this->mNumberHeadings; }
function getAllowSpecialInclusion() { return $this->mAllowSpecialInclusion; }
function getTidy() { return $this->mTidy; }
function getInterfaceMessage() { return $this->mInterfaceMessage; }
@@ -71,25 +76,32 @@ class ParserOptions {
function getEnableLimitReport() { return $this->mEnableLimitReport; }
function getCleanSignatures() { return $this->mCleanSignatures; }
function getExternalLinkTarget() { return $this->mExternalLinkTarget; }
- function getMath() { $this->optionUsed('math');
- return $this->mMath; }
- function getThumbSize() { $this->optionUsed('thumbsize');
- return $this->mThumbSize; }
-
+ function getMath() { $this->optionUsed( 'math' );
+ return $this->mMath; }
+ function getThumbSize() { $this->optionUsed( 'thumbsize' );
+ return $this->mThumbSize; }
+ function getStubThreshold() { $this->optionUsed( 'stubthreshold' );
+ return $this->mStubThreshold; }
+
function getIsPreview() { return $this->mIsPreview; }
function getIsSectionPreview() { return $this->mIsSectionPreview; }
- function getIsPrintable() { $this->optionUsed('printable');
- return $this->mIsPrintable; }
+ function getIsPrintable() { $this->optionUsed( 'printable' );
+ return $this->mIsPrintable; }
+ function getUser() { return $this->mUser; }
+ function getPreSaveTransform() { return $this->mPreSaveTransform; }
+ /**
+ * @param $title Title
+ * @return Skin
+ * @deprecated since 1.18 Use Linker::* instead
+ */
function getSkin( $title = null ) {
- if ( !isset( $this->mSkin ) ) {
- $this->mSkin = $this->mUser->getSkin( $title );
- }
- return $this->mSkin;
+ wfDeprecated( __METHOD__ );
+ return new DummyLinker;
}
function getDateFormat() {
- $this->optionUsed('dateformat');
+ $this->optionUsed( 'dateformat' );
if ( !isset( $this->mDateFormat ) ) {
$this->mDateFormat = $this->mUser->getDatePreference();
}
@@ -107,9 +119,11 @@ class ParserOptions {
* You shouldn't use this. Really. $parser->getFunctionLang() is all you need.
* Using this fragments the cache and is discouraged. Yes, {{int: }} uses this,
* producing inconsistent tables (Bug 14404).
+ * @return String Language code
+ * @since 1.17
*/
function getUserLang() {
- $this->optionUsed('userlang');
+ $this->optionUsed( 'userlang' );
return $this->mUserLang;
}
@@ -122,9 +136,9 @@ class ParserOptions {
function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
function setAllowSpecialInclusion( $x ) { return wfSetVar( $this->mAllowSpecialInclusion, $x ); }
- function setTidy( $x ) { return wfSetVar( $this->mTidy, $x); }
+ function setTidy( $x ) { return wfSetVar( $this->mTidy, $x ); }
function setSkin( $x ) { $this->mSkin = $x; }
- function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x); }
+ function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x ); }
function setTargetLanguage( $x ) { return wfSetVar( $this->mTargetLanguage, $x, true ); }
function setMaxIncludeSize( $x ) { return wfSetVar( $this->mMaxIncludeSize, $x ); }
function setMaxPPNodeCount( $x ) { return wfSetVar( $this->mMaxPPNodeCount, $x ); }
@@ -136,9 +150,16 @@ class ParserOptions {
function setCleanSignatures( $x ) { return wfSetVar( $this->mCleanSignatures, $x ); }
function setExternalLinkTarget( $x ) { return wfSetVar( $this->mExternalLinkTarget, $x ); }
function setMath( $x ) { return wfSetVar( $this->mMath, $x ); }
- function setUserLang( $x ) { return wfSetVar( $this->mUserLang, $x ); }
+ function setUserLang( $x ) {
+ if ( $x instanceof Language ) {
+ $x = $x->getCode();
+ }
+ return wfSetVar( $this->mUserLang, $x );
+ }
function setThumbSize( $x ) { return wfSetVar( $this->mThumbSize, $x ); }
-
+ function setStubThreshold( $x ) { return wfSetVar( $this->mStubThreshold, $x ); }
+ function setPreSaveTransform( $x ) { return wfSetVar( $this->mPreSaveTransform, $x ); }
+
function setIsPreview( $x ) { return wfSetVar( $this->mIsPreview, $x ); }
function setIsSectionPreview( $x ) { return wfSetVar( $this->mIsSectionPreview, $x ); }
function setIsPrintable( $x ) { return wfSetVar( $this->mIsPrintable, $x ); }
@@ -191,30 +212,19 @@ class ParserOptions {
$this->mAllowExternalImages = $wgAllowExternalImages;
$this->mAllowExternalImagesFrom = $wgAllowExternalImagesFrom;
$this->mEnableImageWhitelist = $wgEnableImageWhitelist;
- $this->mSkin = null; # Deferred
- $this->mDateFormat = null; # Deferred
- $this->mEditSection = true;
- $this->mNumberHeadings = $user->getOption( 'numberheadings' );
$this->mAllowSpecialInclusion = $wgAllowSpecialInclusion;
- $this->mTidy = false;
- $this->mInterfaceMessage = false;
- $this->mTargetLanguage = null; // default depends on InterfaceMessage setting
$this->mMaxIncludeSize = $wgMaxArticleSize * 1024;
$this->mMaxPPNodeCount = $wgMaxPPNodeCount;
$this->mMaxPPExpandDepth = $wgMaxPPExpandDepth;
$this->mMaxTemplateDepth = $wgMaxTemplateDepth;
- $this->mRemoveComments = true;
- $this->mTemplateCallback = array( 'Parser', 'statelessFetchTemplate' );
- $this->mEnableLimitReport = false;
$this->mCleanSignatures = $wgCleanSignatures;
$this->mExternalLinkTarget = $wgExternalLinkTarget;
+
+ $this->mNumberHeadings = $user->getOption( 'numberheadings' );
$this->mMath = $user->getOption( 'math' );
- $this->mUserLang = $wgLang->getCode();
$this->mThumbSize = $user->getOption( 'thumbsize' );
-
- $this->mIsPreview = false;
- $this->mIsSectionPreview = false;
- $this->mIsPrintable = false;
+ $this->mStubThreshold = $user->getStubThreshold();
+ $this->mUserLang = $wgLang->getCode();
wfProfileOut( __METHOD__ );
}
@@ -226,7 +236,7 @@ class ParserOptions {
function registerWatcher( $callback ) {
$this->onAccessCallback = $callback;
}
-
+
/**
* Called when an option is accessed.
*/
@@ -235,9 +245,9 @@ class ParserOptions {
call_user_func( $this->onAccessCallback, $optionName );
}
}
-
+
/**
- * Returns the full array of options that would have been used by
+ * Returns the full array of options that would have been used by
* in 1.16.
* Used to get the old parser cache entries when available.
*/
@@ -249,14 +259,14 @@ class ParserOptions {
}
return $legacyOpts;
}
-
+
/**
* Generate a hash string with the values set on these ParserOptions
* for the keys given in the array.
* This will be used as part of the hash key for the parser cache,
- * so users sharign the options with vary for the same page share
+ * so users sharign the options with vary for the same page share
* the same cached data safely.
- *
+ *
* Replaces User::getPageRenderingHash()
*
* Extensions which require it should install 'PageRenderingHash' hook,
@@ -270,48 +280,49 @@ class ParserOptions {
global $wgContLang, $wgRenderHashAppend;
$confstr = '';
-
- if ( in_array( 'math', $forOptions ) )
+
+ if ( in_array( 'math', $forOptions ) ) {
$confstr .= $this->mMath;
- else
+ } else {
$confstr .= '*';
-
+ }
+
// Space assigned for the stubthreshold but unused
- // since it disables the parser cache, its value will always
+ // since it disables the parser cache, its value will always
// be 0 when this function is called by parsercache.
- // The conditional is here to avoid a confusing 0
- if ( true || in_array( 'stubthreshold', $forOptions ) )
- $confstr .= '!0' ;
- else
+ if ( in_array( 'stubthreshold', $forOptions ) ) {
+ $confstr .= '!' . $this->mStubThreshold;
+ } else {
$confstr .= '!*' ;
+ }
- if ( in_array( 'dateformat', $forOptions ) )
+ if ( in_array( 'dateformat', $forOptions ) ) {
$confstr .= '!' . $this->getDateFormat();
-
- if ( in_array( 'numberheadings', $forOptions ) )
+ }
+
+ if ( in_array( 'numberheadings', $forOptions ) ) {
$confstr .= '!' . ( $this->mNumberHeadings ? '1' : '' );
- else
+ } else {
$confstr .= '!*';
-
- if ( in_array( 'userlang', $forOptions ) )
+ }
+
+ if ( in_array( 'userlang', $forOptions ) ) {
$confstr .= '!' . $this->mUserLang;
- else
+ } else {
$confstr .= '!*';
+ }
- if ( in_array( 'thumbsize', $forOptions ) )
+ if ( in_array( 'thumbsize', $forOptions ) ) {
$confstr .= '!' . $this->mThumbSize;
- else
+ } else {
$confstr .= '!*';
+ }
// add in language specific options, if any
- // FIXME: This is just a way of retrieving the url/user preferred variant
+ // @todo FIXME: This is just a way of retrieving the url/user preferred variant
$confstr .= $wgContLang->getExtraHashOptions();
- // Since the skin could be overloading link(), it should be
- // included here but in practice, none of our skins do that.
- // $confstr .= "!" . $this->mSkin->getSkinName();
-
$confstr .= $wgRenderHashAppend;
if ( !in_array( 'editsection', $forOptions ) ) {
@@ -319,20 +330,21 @@ class ParserOptions {
} elseif ( !$this->mEditSection ) {
$confstr .= '!edit=0';
}
-
- if ( $this->mIsPrintable && in_array( 'printable', $forOptions ) )
+
+ if ( $this->mIsPrintable && in_array( 'printable', $forOptions ) ) {
$confstr .= '!printable=1';
-
+ }
+
if ( $this->mExtraKey != '' )
$confstr .= $this->mExtraKey;
-
+
// Give a chance for extensions to modify the hash, if they have
// extra options or other effects on the parser cache.
wfRunHooks( 'PageRenderingHash', array( &$confstr ) );
// Make it a valid memcached key fragment
$confstr = str_replace( ' ', '_', $confstr );
-
+
return $confstr;
}
}
diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php
index 1e4765db..403b6625 100644
--- a/includes/parser/ParserOutput.php
+++ b/includes/parser/ParserOutput.php
@@ -21,14 +21,15 @@ class CacheTime {
function containsOldMagic() { return $this->mContainsOldMagic; }
function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
-
- /**
- * setCacheTime() sets the timestamp expressing when the page has been rendered.
+
+ /**
+ * setCacheTime() sets the timestamp expressing when the page has been rendered.
* This doesn not control expiry, see updateCacheExpiry() for that!
+ * @param $t string
+ * @return string
*/
- function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
+ function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
-
/**
* Sets the number of seconds after which this object should expire.
* This value is used with the ParserCache.
@@ -36,16 +37,20 @@ class CacheTime {
* the new call has no effect. The value returned by getCacheExpiry is smaller
* or equal to the smallest number that was provided as an argument to
* updateCacheExpiry().
+ *
+ * @param $seconds number
*/
function updateCacheExpiry( $seconds ) {
$seconds = (int)$seconds;
- if ( $this->mCacheExpiry === null || $this->mCacheExpiry > $seconds )
- $this->mCacheExpiry = $seconds;
+ if ( $this->mCacheExpiry === null || $this->mCacheExpiry > $seconds ) {
+ $this->mCacheExpiry = $seconds;
+ }
// hack: set old-style marker for uncacheable entries.
- if ( $this->mCacheExpiry !== null && $this->mCacheExpiry <= 0 )
+ if ( $this->mCacheExpiry !== null && $this->mCacheExpiry <= 0 ) {
$this->mCacheTime = -1;
+ }
}
/**
@@ -59,28 +64,36 @@ class CacheTime {
function getCacheExpiry() {
global $wgParserCacheExpireTime;
- if ( $this->mCacheTime < 0 ) return 0; // old-style marker for "not cachable"
+ if ( $this->mCacheTime < 0 ) {
+ return 0;
+ } // old-style marker for "not cachable"
$expire = $this->mCacheExpiry;
- if ( $expire === null )
+ if ( $expire === null ) {
$expire = $wgParserCacheExpireTime;
- else
+ } else {
$expire = min( $expire, $wgParserCacheExpireTime );
+ }
if( $this->containsOldMagic() ) { //compatibility hack
$expire = min( $expire, 3600 ); # 1 hour
}
- if ( $expire <= 0 ) return 0; // not cachable
- else return $expire;
+ if ( $expire <= 0 ) {
+ return 0; // not cachable
+ } else {
+ return $expire;
+ }
}
-
+ /**
+ * @return bool
+ */
function isCacheable() {
return $this->getCacheExpiry() > 0;
}
-
+
/**
* Return true if this cached output object predates the global or
* per-article cache invalidation timestamps, or if it comes from
@@ -100,8 +113,7 @@ class CacheTime {
}
}
-class ParserOutput extends CacheTime
-{
+class ParserOutput extends CacheTime {
var $mText, # The output text
$mLanguageLinks, # List of the full text of language links, in the order they appear
$mCategories, # Map of category names to sort keys
@@ -110,6 +122,7 @@ class ParserOutput extends CacheTime
$mTemplates = array(), # 2-D map of NS/DBK to ID for the template references. ID=zero for broken.
$mTemplateIds = array(), # 2-D map of NS/DBK to rev ID for the template references. ID=zero for broken.
$mImages = array(), # DB keys of the images used, in the array key only
+ $mImageTimeKeys = array(), # DB keys of the images used mapped to sha1 and MW timestamp
$mExternalLinks = array(), # External link URLs, in the key only
$mInterwikiLinks = array(), # 2-D map of prefix/DBK (in keys only) for the inline interwiki links in the document.
$mNewSection = false, # Show a new section link?
@@ -117,13 +130,19 @@ class ParserOutput extends CacheTime
$mNoGallery = false, # No gallery on category page? (__NOGALLERY__)
$mHeadItems = array(), # Items to put in the <head> section
$mModules = array(), # Modules to be loaded by the resource loader
+ $mModuleScripts = array(), # Modules of which only the JS will be loaded by the resource loader
+ $mModuleStyles = array(), # Modules of which only the CSSS will be loaded by the resource loader
+ $mModuleMessages = array(), # Modules of which only the messages will be loaded by the resource loader
$mOutputHooks = array(), # Hook tags as per $wgParserOutputHooks
$mWarnings = array(), # Warning text to be returned to the user. Wikitext formatted, in the key only
$mSections = array(), # Table of contents
+ $mEditSectionTokens = false, # prefix/suffix markers if edit sections were output as tokens
$mProperties = array(), # Name/value pairs to be cached in the DB
$mTOCHTML = ''; # HTML of the TOC
private $mIndexPolicy = ''; # 'index' or 'noindex'? Any other value will result in no change.
- private $mAccessedOptions = null; # List of ParserOptions (stored in the keys)
+ private $mAccessedOptions = array(); # List of ParserOptions (stored in the keys)
+
+ const EDITSECTION_REGEX = '#<(?:mw:)?editsection page="(.*?)" section="(.*?)"(?:/>|>(.*?)(</(?:mw:)?editsection>))#';
function __construct( $text = '', $languageLinks = array(), $categoryLinks = array(),
$containsOldMagic = false, $titletext = '' )
@@ -135,21 +154,55 @@ class ParserOutput extends CacheTime
$this->mTitleText = $titletext;
}
- function getText() { return $this->mText; }
+ function getText() {
+ if ( $this->mEditSectionTokens ) {
+ return preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
+ array( &$this, 'replaceEditSectionLinksCallback' ), $this->mText );
+ } else {
+ return preg_replace( ParserOutput::EDITSECTION_REGEX, '', $this->mText );
+ }
+ return $this->mText;
+ }
+
+ /**
+ * callback used by getText to replace editsection tokens
+ * @private
+ */
+ function replaceEditSectionLinksCallback( $m ) {
+ global $wgOut, $wgLang;
+ $args = array(
+ htmlspecialchars_decode($m[1]),
+ htmlspecialchars_decode($m[2]),
+ isset($m[4]) ? $m[3] : null,
+ );
+ $args[0] = Title::newFromText( $args[0] );
+ if ( !is_object($args[0]) ) {
+ throw new MWException("Bad parser output text.");
+ }
+ $args[] = $wgLang->getCode();
+ $skin = $wgOut->getSkin();
+ return call_user_func_array( array( $skin, 'doEditSectionLink' ), $args );
+ }
+
function &getLanguageLinks() { return $this->mLanguageLinks; }
function getInterwikiLinks() { return $this->mInterwikiLinks; }
function getCategoryLinks() { return array_keys( $this->mCategories ); }
function &getCategories() { return $this->mCategories; }
function getTitleText() { return $this->mTitleText; }
function getSections() { return $this->mSections; }
+ function getEditSectionTokens() { return $this->mEditSectionTokens; }
function &getLinks() { return $this->mLinks; }
function &getTemplates() { return $this->mTemplates; }
+ function &getTemplateIds() { return $this->mTemplateIds; }
function &getImages() { return $this->mImages; }
+ function &getImageTimeKeys() { return $this->mImageTimeKeys; }
function &getExternalLinks() { return $this->mExternalLinks; }
function getNoGallery() { return $this->mNoGallery; }
function getHeadItems() { return $this->mHeadItems; }
function getModules() { return $this->mModules; }
- function getSubtitle() { return $this->mSubtitle; }
+ function getModuleScripts() { return $this->mModuleScripts; }
+ function getModuleStyles() { return $this->mModuleStyles; }
+ function getModuleMessages() { return $this->mModuleMessages; }
function getOutputHooks() { return (array)$this->mOutputHooks; }
function getWarnings() { return array_keys( $this->mWarnings ); }
function getIndexPolicy() { return $this->mIndexPolicy; }
@@ -161,6 +214,7 @@ class ParserOutput extends CacheTime
function setTitleText( $t ) { return wfSetVar( $this->mTitleText, $t ); }
function setSections( $toc ) { return wfSetVar( $this->mSections, $toc ); }
+ function setEditSectionTokens( $t ) { return wfSetVar( $this->mEditSectionTokens, $t ); }
function setIndexPolicy( $policy ) { return wfSetVar( $this->mIndexPolicy, $policy ); }
function setTOCHTML( $tochtml ) { return wfSetVar( $this->mTOCHTML, $tochtml ); }
@@ -226,10 +280,27 @@ class ParserOutput extends CacheTime
$this->mLinks[$ns][$dbk] = $id;
}
- function addImage( $name ) {
+ /**
+ * Register a file dependency for this output
+ * @param $name string Title dbKey
+ * @param $timestamp string MW timestamp of file creation (or false if non-existing)
+ * @param $sha string base 36 SHA-1 of file (or false if non-existing)
+ * @return void
+ */
+ function addImage( $name, $timestamp = null, $sha1 = null ) {
$this->mImages[$name] = 1;
+ if ( $timestamp !== null && $sha1 !== null ) {
+ $this->mImageTimeKeys[$name] = array( 'time' => $timestamp, 'sha1' => $sha1 );
+ }
}
+ /**
+ * Register a template dependency for this output
+ * @param $title Title
+ * @param $page_id
+ * @param $rev_id
+ * @return void
+ */
function addTemplate( $title, $page_id, $rev_id ) {
$ns = $title->getNamespace();
$dbk = $title->getDBkey();
@@ -271,10 +342,22 @@ class ParserOutput extends CacheTime
}
}
- function addModules( $modules ) {
+ public function addModules( $modules ) {
$this->mModules = array_merge( $this->mModules, (array) $modules );
}
+ public function addModuleScripts( $modules ) {
+ $this->mModuleScripts = array_merge( $this->mModuleScripts, (array)$modules );
+ }
+
+ public function addModuleStyles( $modules ) {
+ $this->mModuleStyles = array_merge( $this->mModuleStyles, (array)$modules );
+ }
+
+ public function addModuleMessages( $modules ) {
+ $this->mModuleMessages = array_merge( $this->mModuleMessages, (array)$modules );
+ }
+
/**
* Override the title to be used for display
* -- this is assumed to have been validated
@@ -293,7 +376,7 @@ class ParserOutput extends CacheTime
* @return String
*/
public function getDisplayTitle() {
- $t = $this->getTitleText( );
+ $t = $this->getTitleText();
if( $t === '' ) {
return false;
}
@@ -333,11 +416,11 @@ class ParserOutput extends CacheTime
/**
* Returns the options from its ParserOptions which have been taken
* into account to produce this output or false if not available.
- * @return mixed Array/false
+ * @return mixed Array
*/
public function getUsedOptions() {
if ( !isset( $this->mAccessedOptions ) ) {
- return false;
+ return array();
}
return array_keys( $this->mAccessedOptions );
}
diff --git a/includes/parser/Parser_DiffTest.php b/includes/parser/Parser_DiffTest.php
index c6dd76e5..efad33f9 100644
--- a/includes/parser/Parser_DiffTest.php
+++ b/includes/parser/Parser_DiffTest.php
@@ -111,6 +111,10 @@ class Parser_DiffTest
}
}
+ /**
+ * @param $parser Parser
+ * @return bool
+ */
function onClearState( &$parser ) {
// hack marker prefixes to get identical output
if ( !isset( $this->dtUniqPrefix ) ) {
diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php
index 7c17ce4e..90e44943 100644
--- a/includes/parser/Parser_LinkHooks.php
+++ b/includes/parser/Parser_LinkHooks.php
@@ -9,8 +9,7 @@
* Parser with LinkHooks experiment
* @ingroup Parser
*/
-class Parser_LinkHooks extends Parser
-{
+class Parser_LinkHooks extends Parser {
/**
* Update this version number when the ParserOutput format
* changes in an incompatible way, so the parser cache
@@ -38,10 +37,8 @@ class Parser_LinkHooks extends Parser
/**
* Constructor
- *
- * @public
*/
- function __construct( $conf = array() ) {
+ public function __construct( $conf = array() ) {
parent::__construct( $conf );
$this->mLinkHooks = array();
}
@@ -82,8 +79,6 @@ class Parser_LinkHooks extends Parser
* True) (Treat as link) Parse the link according to normal link rules
* False) (Bad link) Just output the raw wikitext (You may modify the text first)
*
- * @public
- *
* @param $ns Integer or String: the Namespace ID or regex pattern if SLH_PATTERN is set
* @param $callback Mixed: the callback function (and object) to use
* @param $flags Integer: a combination of the following flags:
@@ -91,7 +86,7 @@ class Parser_LinkHooks extends Parser
*
* @return The old callback function for this name, if any
*/
- function setLinkHook( $ns, $callback, $flags = 0 ) {
+ public function setLinkHook( $ns, $callback, $flags = 0 ) {
if( $flags & SLH_PATTERN && !is_string($ns) )
throw new MWException( __METHOD__.'() expecting a regex string pattern.' );
elseif( $flags | ~SLH_PATTERN && !is_int($ns) )
@@ -232,7 +227,7 @@ class Parser_LinkHooks extends Parser
wfProfileOut( __METHOD__."-misc" );
# Make title object
wfProfileIn( __METHOD__."-title" );
- $title = Title::newFromText( $this->mStripState->unstripNoWiki($titleText) );
+ $title = Title::newFromText( $this->mStripState->unstripNoWiki( $titleText ) );
if( !$title ) {
wfProfileOut( __METHOD__."-title" );
wfProfileOut( __METHOD__ );
@@ -244,7 +239,7 @@ class Parser_LinkHooks extends Parser
# Default for Namespaces is a default link
# ToDo: Default for patterns is plain wikitext
$return = true;
- if( isset($this->mLinkHooks[$ns]) ) {
+ if( isset( $this->mLinkHooks[$ns] ) ) {
list( $callback, $flags ) = $this->mLinkHooks[$ns];
if( $flags & SLH_PATTERN ) {
$args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon );
@@ -253,14 +248,14 @@ class Parser_LinkHooks extends Parser
}
# Workaround for PHP bug 35229 and similar
if ( !is_callable( $callback ) ) {
- throw new MWException( "Tag hook for $name is not callable\n" );
+ throw new MWException( "Tag hook for namespace $ns is not callable\n" );
}
$return = call_user_func_array( $callback, $args );
}
if( $return === true ) {
# True (treat as plain link) was returned, call the defaultLinkHook
- $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
- $return = call_user_func_array( array( 'CoreLinkFunctions', 'defaultLinkHook' ), $args );
+ $return = CoreLinkFunctions::defaultLinkHook( $parser, $holders, $markers, $title,
+ $titleText, $paramText, $leadingColon );
}
if( $return === false ) {
# False (no link) was returned, output plain wikitext
diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php
index c31f37bf..d6328aa7 100644
--- a/includes/parser/Preprocessor.php
+++ b/includes/parser/Preprocessor.php
@@ -9,19 +9,44 @@
* @ingroup Parser
*/
interface Preprocessor {
- /** Create a new preprocessor object based on an initialised Parser object */
+ /**
+ * Create a new preprocessor object based on an initialised Parser object
+ *
+ * @param $parser Parser
+ */
function __construct( $parser );
- /** Create a new top-level frame for expansion of a page */
+ /**
+ * Create a new top-level frame for expansion of a page
+ *
+ * @return PPFrame
+ */
function newFrame();
- /** Create a new custom frame for programmatic use of parameter replacement as used in some extensions */
+ /**
+ * Create a new custom frame for programmatic use of parameter replacement as used in some extensions
+ *
+ * @param $args array
+ *
+ * @return PPFrame
+ */
function newCustomFrame( $args );
- /** Create a new custom node for programmatic use of parameter replacement as used in some extensions */
+ /**
+ * Create a new custom node for programmatic use of parameter replacement as used in some extensions
+ *
+ * @param $values
+ */
function newPartNodeArray( $values );
- /** Preprocess text to a PPNode */
+ /**
+ * Preprocess text to a PPNode
+ *
+ * @param $text
+ * @param $flags
+ *
+ * @return PPNode
+ */
function preprocessToObj( $text, $flags = 0 );
}
@@ -39,6 +64,11 @@ interface PPFrame {
/**
* Create a child frame
+ *
+ * @param $args array
+ * @param $title Title
+ *
+ * @return PPFrame
*/
function newChild( $args = false, $title = false );
@@ -70,6 +100,8 @@ interface PPFrame {
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty();
@@ -95,6 +127,10 @@ interface PPFrame {
/**
* Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @param $title
+ *
+ * @return bool
*/
function loopCheck( $title );
@@ -126,6 +162,8 @@ interface PPNode {
/**
* Get the first child of a tree node. False if there isn't one.
+ *
+ * @return PPNode
*/
function getFirstChild();
diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php
index 2b635f7c..755563a0 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -5,12 +5,18 @@
* @file
* @ingroup Parser
*/
-
+
/**
* @ingroup Parser
*/
class Preprocessor_DOM implements Preprocessor {
- var $parser, $memoryLimit;
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ var $memoryLimit;
const CACHE_VERSION = 1;
@@ -27,21 +33,31 @@ class Preprocessor_DOM implements Preprocessor {
}
}
+ /**
+ * @return PPFrame_DOM
+ */
function newFrame() {
return new PPFrame_DOM( $this );
}
+ /**
+ * @param $args
+ * @return PPCustomFrame_DOM
+ */
function newCustomFrame( $args ) {
return new PPCustomFrame_DOM( $this, $args );
}
+ /**
+ * @param $values
+ * @return PPNode_DOM
+ */
function newPartNodeArray( $values ) {
//NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
- $xml = "";
- $xml .= "<list>";
+ $xml = "<list>";
foreach ( $values as $k => $val ) {
-
+
if ( is_int( $k ) ) {
$xml .= "<part><name index=\"$k\"/><value>" . htmlspecialchars( $val ) ."</value></part>";
} else {
@@ -59,6 +75,10 @@ class Preprocessor_DOM implements Preprocessor {
return $node;
}
+ /**
+ * @throws MWException
+ * @return bool
+ */
function memCheck() {
if ( $this->memoryLimit === false ) {
return;
@@ -91,14 +111,15 @@ class Preprocessor_DOM implements Preprocessor {
* cache may be implemented at a later date which takes further advantage of these strict
* dependency requirements.
*
- * @private
+ * @return PPNode_DOM
*/
function preprocessToObj( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
global $wgMemc, $wgPreprocessorCacheThreshold;
-
+
$xml = false;
- $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+ $cacheable = ( $wgPreprocessorCacheThreshold !== false
+ && strlen( $text ) > $wgPreprocessorCacheThreshold );
if ( $cacheable ) {
wfProfileIn( __METHOD__.'-cacheable' );
@@ -134,7 +155,8 @@ class Preprocessor_DOM implements Preprocessor {
if ( !$result ) {
// Try running the XML through UtfNormal to get rid of invalid characters
$xml = UtfNormal::cleanUp( $xml );
- $result = $dom->loadXML( $xml );
+ // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep
+ $result = $dom->loadXML( $xml, 1 << 19 );
if ( !$result ) {
throw new MWException( __METHOD__.' generated invalid XML' );
}
@@ -147,7 +169,12 @@ class Preprocessor_DOM implements Preprocessor {
wfProfileOut( __METHOD__ );
return $obj;
}
-
+
+ /**
+ * @param $text string
+ * @param $flags int
+ * @return string
+ */
function preprocessToXml( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
$rules = array(
@@ -317,7 +344,7 @@ class Preprocessor_DOM implements Preprocessor {
// Search backwards for leading whitespace
$wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
// Search forwards for trailing whitespace
- // $wsEnd will be the position of the last space
+ // $wsEnd will be the position of the last space (or the '>' if there's none)
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
@@ -344,13 +371,11 @@ class Preprocessor_DOM implements Preprocessor {
if ( $stack->top ) {
$part = $stack->top->getCurrentPart();
- if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
- // Comments abutting, no change in visual end
- $part->commentEnd = $wsEnd;
- } else {
+ if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) {
$part->visualEnd = $wsStart;
- $part->commentEnd = $endPos;
}
+ // Else comments abutting, no change in visual end
+ $part->commentEnd = $endPos;
}
$i = $endPos + 1;
$inner = substr( $text, $startPos, $endPos - $startPos + 1 );
@@ -389,8 +414,8 @@ class Preprocessor_DOM implements Preprocessor {
} else {
$attrEnd = $tagEndPos;
// Find closing tag
- if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
- $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
{
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
$i = $matches[0][1] + strlen( $matches[0][0] );
@@ -423,9 +448,7 @@ class Preprocessor_DOM implements Preprocessor {
$accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
}
$accum .= $close . '</ext>';
- }
-
- elseif ( $found == 'line-start' ) {
+ } elseif ( $found == 'line-start' ) {
// Is this the start of a heading?
// Line break belongs before the heading element in any case
if ( $fakeLineStart ) {
@@ -453,9 +476,7 @@ class Preprocessor_DOM implements Preprocessor {
extract( $flags );
$i += $count;
}
- }
-
- elseif ( $found == 'line-end' ) {
+ } elseif ( $found == 'line-end' ) {
$piece = $stack->top;
// A heading must be open, otherwise \n wouldn't have been in the search list
assert( $piece->open == "\n" );
@@ -522,7 +543,7 @@ class Preprocessor_DOM implements Preprocessor {
'open' => $curChar,
'close' => $rule['end'],
'count' => $count,
- 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
+ 'lineStart' => ($i == 0 || $text[$i-1] == "\n"),
);
$stack->push( $piece );
@@ -557,7 +578,7 @@ class Preprocessor_DOM implements Preprocessor {
}
}
- if ($matchingCount <= 0) {
+ if ( $matchingCount <= 0 ) {
# No matching element found in callback array
# Output a literal closing brace and continue
$accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
@@ -607,7 +628,7 @@ class Preprocessor_DOM implements Preprocessor {
$accum =& $stack->getAccum();
# Re-add the old stack element if it still has unmatched opening characters remaining
- if ($matchingCount < $piece->count) {
+ if ( $matchingCount < $piece->count ) {
$piece->parts = array( new PPDPart );
$piece->count -= $matchingCount;
# do we still qualify for any callback with remaining count?
@@ -630,16 +651,12 @@ class Preprocessor_DOM implements Preprocessor {
# Add XML element to the enclosing accumulator
$accum .= $element;
- }
-
- elseif ( $found == 'pipe' ) {
+ } elseif ( $found == 'pipe' ) {
$findEquals = true; // shortcut for getFlags()
$stack->addPart();
$accum =& $stack->getAccum();
++$i;
- }
-
- elseif ( $found == 'equals' ) {
+ } elseif ( $found == 'equals' ) {
$findEquals = false; // shortcut for getFlags()
$stack->getCurrentPart()->eqpos = strlen( $accum );
$accum .= '=';
@@ -655,7 +672,7 @@ class Preprocessor_DOM implements Preprocessor {
$xml = $stack->rootAccum;
wfProfileOut( __METHOD__ );
-
+
return $xml;
}
}
@@ -665,7 +682,12 @@ class Preprocessor_DOM implements Preprocessor {
* @ingroup Parser
*/
class PPDStack {
- var $stack, $rootAccum, $top;
+ var $stack, $rootAccum;
+
+ /**
+ * @var PPDStack
+ */
+ var $top;
var $out;
var $elementClass = 'PPDStackElement';
@@ -678,6 +700,9 @@ class PPDStack {
$this->accum =& $this->rootAccum;
}
+ /**
+ * @return int
+ */
function count() {
return count( $this->stack );
}
@@ -726,6 +751,9 @@ class PPDStack {
$this->accum =& $this->top->getAccum();
}
+ /**
+ * @return array
+ */
function getFlags() {
if ( !count( $this->stack ) ) {
return array(
@@ -773,6 +801,9 @@ class PPDStackElement {
return $this->parts[count($this->parts) - 1];
}
+ /**
+ * @return array
+ */
function getFlags() {
$partCount = count( $this->parts );
$findPipe = $this->open != "\n" && $this->open != '[';
@@ -785,6 +816,8 @@ class PPDStackElement {
/**
* Get the output string that would result if the close is not found.
+ *
+ * @return string
*/
function breakSyntax( $openingCount = false ) {
if ( $this->open == "\n" ) {
@@ -829,7 +862,21 @@ class PPDPart {
* @ingroup Parser
*/
class PPFrame_DOM implements PPFrame {
- var $preprocessor, $parser, $title;
+
+ /**
+ * @var Preprocessor
+ */
+ var $preprocessor;
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ /**
+ * @var Title
+ */
+ var $title;
var $titleCache;
/**
@@ -847,7 +894,7 @@ class PPFrame_DOM implements PPFrame {
/**
* Construct a new preprocessor frame.
- * @param $preprocessor Preprocessor: The parent preprocessor
+ * @param $preprocessor Preprocessor The parent preprocessor
*/
function __construct( $preprocessor ) {
$this->preprocessor = $preprocessor;
@@ -861,6 +908,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Create a new child frame
* $args is optionally a multi-root PPNode or array containing the template arguments
+ *
+ * @return PPTemplateFrame_DOM
*/
function newChild( $args = false, $title = false ) {
$namedArgs = array();
@@ -896,6 +945,12 @@ class PPFrame_DOM implements PPFrame {
return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
}
+ /**
+ * @throws MWException
+ * @param $root
+ * @param $flags int
+ * @return string
+ */
function expand( $root, $flags = 0 ) {
static $expansionDepth = 0;
if ( is_string( $root ) ) {
@@ -1058,11 +1113,11 @@ class PPFrame_DOM implements PPFrame {
# Heading
$s = $this->expand( $contextNode->childNodes, $flags );
- # Insert a heading marker only for <h> children of <root>
- # This is to stop extractSections from going over multiple tree levels
- if ( $contextNode->parentNode->nodeName == 'root'
- && $this->parser->ot['html'] )
- {
+ # Insert a heading marker only for <h> children of <root>
+ # This is to stop extractSections from going over multiple tree levels
+ if ( $contextNode->parentNode->nodeName == 'root'
+ && $this->parser->ot['html'] )
+ {
# Insert heading index marker
$headingIndex = $contextNode->getAttribute( 'i' );
$titleText = $this->title->getPrefixedDBkey();
@@ -1071,7 +1126,7 @@ class PPFrame_DOM implements PPFrame {
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
$count = $contextNode->getAttribute( 'level' );
$s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
- $this->parser->mStripState->general->setPair( $marker, '' );
+ $this->parser->mStripState->addGeneral( $marker, '' );
}
$out .= $s;
} else {
@@ -1107,6 +1162,11 @@ class PPFrame_DOM implements PPFrame {
return $outStack[0];
}
+ /**
+ * @param $sep
+ * @param $flags
+ * @return string
+ */
function implodeWithFlags( $sep, $flags /*, ... */ ) {
$args = array_slice( func_get_args(), 2 );
@@ -1132,6 +1192,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Implode with no flags specified
* This previously called implodeWithFlags but has now been inlined to reduce stack depth
+ *
+ * @return string
*/
function implode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
@@ -1160,6 +1222,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Makes an object that, when expand()ed, will be the same as one obtained
* with implode()
+ *
+ * @return array
*/
function virtualImplode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
@@ -1225,20 +1289,31 @@ class PPFrame_DOM implements PPFrame {
}
}
+ /**
+ * @return array
+ */
function getArguments() {
return array();
}
+ /**
+ * @return array
+ */
function getNumberedArguments() {
return array();
}
+ /**
+ * @return array
+ */
function getNamedArguments() {
return array();
}
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return true;
@@ -1250,6 +1325,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @return bool
*/
function loopCheck( $title ) {
return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
@@ -1257,6 +1334,8 @@ class PPFrame_DOM implements PPFrame {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return false;
@@ -1268,9 +1347,21 @@ class PPFrame_DOM implements PPFrame {
* @ingroup Parser
*/
class PPTemplateFrame_DOM extends PPFrame_DOM {
- var $numberedArgs, $namedArgs, $parent;
+ var $numberedArgs, $namedArgs;
+
+ /**
+ * @var PPFrame_DOM
+ */
+ var $parent;
var $numberedExpansionCache, $namedExpansionCache;
+ /**
+ * @param $preprocessor
+ * @param $parent PPFrame_DOM
+ * @param $numberedArgs array
+ * @param $namedArgs array
+ * @param $title Title
+ */
function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
parent::__construct( $preprocessor );
@@ -1305,8 +1396,11 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
$s .= '}';
return $s;
}
+
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return !count( $this->numberedArgs ) && !count( $this->namedArgs );
@@ -1321,7 +1415,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
}
return $arguments;
}
-
+
function getNumberedArguments() {
$arguments = array();
foreach ( array_keys($this->numberedArgs) as $key ) {
@@ -1329,7 +1423,7 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
}
return $arguments;
}
-
+
function getNamedArguments() {
$arguments = array();
foreach ( array_keys($this->namedArgs) as $key ) {
@@ -1371,6 +1465,8 @@ class PPTemplateFrame_DOM extends PPFrame_DOM {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return true;
@@ -1405,6 +1501,9 @@ class PPCustomFrame_DOM extends PPFrame_DOM {
return $s;
}
+ /**
+ * @return bool
+ */
function isEmpty() {
return !count( $this->args );
}
@@ -1421,14 +1520,22 @@ class PPCustomFrame_DOM extends PPFrame_DOM {
* @ingroup Parser
*/
class PPNode_DOM implements PPNode {
+
+ /**
+ * @var DOMElement
+ */
var $node;
+ var $xpath;
function __construct( $node, $xpath = false ) {
$this->node = $node;
}
- function __get( $name ) {
- if ( $name == 'xpath' ) {
+ /**
+ * @return DOMXPath
+ */
+ function getXPath() {
+ if ( $this->xpath === null ) {
$this->xpath = new DOMXPath( $this->node->ownerDocument );
}
return $this->xpath;
@@ -1446,22 +1553,39 @@ class PPNode_DOM implements PPNode {
return $s;
}
+ /**
+ * @return bool|PPNode_DOM
+ */
function getChildren() {
return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
}
+ /**
+ * @return bool|PPNode_DOM
+ */
function getFirstChild() {
return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
}
+ /**
+ * @return bool|PPNode_DOM
+ */
function getNextSibling() {
return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
}
+ /**
+ * @param $type
+ *
+ * @return bool|PPNode_DOM
+ */
function getChildrenOfType( $type ) {
- return new self( $this->xpath->query( $type, $this->node ) );
+ return new self( $this->getXPath()->query( $type, $this->node ) );
}
+ /**
+ * @return int
+ */
function getLength() {
if ( $this->node instanceof DOMNodeList ) {
return $this->node->length;
@@ -1470,11 +1594,18 @@ class PPNode_DOM implements PPNode {
}
}
+ /**
+ * @param $i
+ * @return bool|PPNode_DOM
+ */
function item( $i ) {
$item = $this->node->item( $i );
return $item ? new self( $item ) : false;
}
+ /**
+ * @return string
+ */
function getName() {
if ( $this->node instanceof DOMNodeList ) {
return '#nodelist';
@@ -1488,10 +1619,13 @@ class PPNode_DOM implements PPNode {
* name PPNode name
* index String index
* value PPNode value
+ *
+ * @return array
*/
function splitArg() {
- $names = $this->xpath->query( 'name', $this->node );
- $values = $this->xpath->query( 'value', $this->node );
+ $xpath = $this->getXPath();
+ $names = $xpath->query( 'name', $this->node );
+ $values = $xpath->query( 'value', $this->node );
if ( !$names->length || !$values->length ) {
throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
}
@@ -1506,12 +1640,15 @@ class PPNode_DOM implements PPNode {
/**
* Split an <ext> node into an associative array containing name, attr, inner and close
* All values in the resulting array are PPNodes. Inner and close are optional.
+ *
+ * @return array
*/
function splitExt() {
- $names = $this->xpath->query( 'name', $this->node );
- $attrs = $this->xpath->query( 'attr', $this->node );
- $inners = $this->xpath->query( 'inner', $this->node );
- $closes = $this->xpath->query( 'close', $this->node );
+ $xpath = $this->getXPath();
+ $names = $xpath->query( 'name', $this->node );
+ $attrs = $xpath->query( 'attr', $this->node );
+ $inners = $xpath->query( 'inner', $this->node );
+ $closes = $xpath->query( 'close', $this->node );
if ( !$names->length || !$attrs->length ) {
throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
}
@@ -1531,7 +1668,7 @@ class PPNode_DOM implements PPNode {
* Split a <h> node
*/
function splitHeading() {
- if ( !$this->nodeName == 'h' ) {
+ if ( $this->getName() !== 'h' ) {
throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
}
return array(
diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php
index 6cb2febc..c2d7d3d8 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -5,7 +5,7 @@
* @file
* @ingroup Parser
*/
-
+
/**
* Differences from DOM schema:
* * attribute nodes are children
@@ -13,22 +13,36 @@
* @ingroup Parser
*/
class Preprocessor_Hash implements Preprocessor {
+ /**
+ * @var Parser
+ */
var $parser;
-
+
const CACHE_VERSION = 1;
function __construct( $parser ) {
$this->parser = $parser;
}
+ /**
+ * @return PPFrame_Hash
+ */
function newFrame() {
return new PPFrame_Hash( $this );
}
+ /**
+ * @param $args
+ * @return PPCustomFrame_Hash
+ */
function newCustomFrame( $args ) {
return new PPCustomFrame_Hash( $this, $args );
}
+ /**
+ * @param $values array
+ * @return PPNode_Hash_Array
+ */
function newPartNodeArray( $values ) {
$list = array();
@@ -76,16 +90,15 @@ class Preprocessor_Hash implements Preprocessor {
* cache may be implemented at a later date which takes further advantage of these strict
* dependency requirements.
*
- * @private
+ * @return PPNode_Hash_Tree
*/
function preprocessToObj( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
-
-
+
// Check cache.
global $wgMemc, $wgPreprocessorCacheThreshold;
-
- $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+
+ $cacheable = $wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold;
if ( $cacheable ) {
wfProfileIn( __METHOD__.'-cacheable' );
@@ -272,7 +285,7 @@ class Preprocessor_Hash implements Preprocessor {
// Search backwards for leading whitespace
$wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
// Search forwards for trailing whitespace
- // $wsEnd will be the position of the last space
+ // $wsEnd will be the position of the last space (or the '>' if there's none)
$wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
@@ -302,13 +315,11 @@ class Preprocessor_Hash implements Preprocessor {
if ( $stack->top ) {
$part = $stack->top->getCurrentPart();
- if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
- // Comments abutting, no change in visual end
- $part->commentEnd = $wsEnd;
- } else {
+ if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) {
$part->visualEnd = $wsStart;
- $part->commentEnd = $endPos;
}
+ // Else comments abutting, no change in visual end
+ $part->commentEnd = $endPos;
}
$i = $endPos + 1;
$inner = substr( $text, $startPos, $endPos - $startPos + 1 );
@@ -348,8 +359,8 @@ class Preprocessor_Hash implements Preprocessor {
} else {
$attrEnd = $tagEndPos;
// Find closing tag
- if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
- $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
{
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
$i = $matches[0][1] + strlen( $matches[0][0] );
@@ -414,9 +425,7 @@ class Preprocessor_Hash implements Preprocessor {
extract( $stack->getFlags() );
$i += $count;
}
- }
-
- elseif ( $found == 'line-end' ) {
+ } elseif ( $found == 'line-end' ) {
$piece = $stack->top;
// A heading must be open, otherwise \n wouldn't have been in the search list
assert( $piece->open == "\n" );
@@ -478,9 +487,7 @@ class Preprocessor_Hash implements Preprocessor {
// another heading. Infinite loops are avoided because the next iteration MUST
// hit the heading open case above, which unconditionally increments the
// input pointer.
- }
-
- elseif ( $found == 'open' ) {
+ } elseif ( $found == 'open' ) {
# count opening brace characters
$count = strspn( $text, $curChar, $i );
@@ -491,7 +498,7 @@ class Preprocessor_Hash implements Preprocessor {
'open' => $curChar,
'close' => $rule['end'],
'count' => $count,
- 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
+ 'lineStart' => ($i == 0 || $text[$i-1] == "\n"),
);
$stack->push( $piece );
@@ -502,9 +509,7 @@ class Preprocessor_Hash implements Preprocessor {
$accum->addLiteral( str_repeat( $curChar, $count ) );
}
$i += $count;
- }
-
- elseif ( $found == 'close' ) {
+ } elseif ( $found == 'close' ) {
$piece = $stack->top;
# lets check if there are enough characters for closing brace
$maxCount = $piece->count;
@@ -644,16 +649,12 @@ class Preprocessor_Hash implements Preprocessor {
} else {
$accum->addAccum( $element );
}
- }
-
- elseif ( $found == 'pipe' ) {
+ } elseif ( $found == 'pipe' ) {
$findEquals = true; // shortcut for getFlags()
$stack->addPart();
$accum =& $stack->getAccum();
++$i;
- }
-
- elseif ( $found == 'equals' ) {
+ } elseif ( $found == 'equals' ) {
$findEquals = false; // shortcut for getFlags()
$accum->addNodeWithText( 'equals', '=' );
$stack->getCurrentPart()->eqpos = $accum->lastNode;
@@ -676,7 +677,7 @@ class Preprocessor_Hash implements Preprocessor {
$rootNode = new PPNode_Hash_Tree( 'root' );
$rootNode->firstChild = $stack->rootAccum->firstNode;
$rootNode->lastChild = $stack->rootAccum->lastNode;
-
+
// Cache
if ($cacheable) {
$cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );
@@ -685,7 +686,7 @@ class Preprocessor_Hash implements Preprocessor {
wfProfileOut( __METHOD__.'-cacheable' );
wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
}
-
+
wfProfileOut( __METHOD__ );
return $rootNode;
}
@@ -714,6 +715,8 @@ class PPDStackElement_Hash extends PPDStackElement {
/**
* Get the accumulator that would result if the close is not found.
+ *
+ * @return PPDAccum_Hash
*/
function breakSyntax( $openingCount = false ) {
if ( $this->open == "\n" ) {
@@ -818,7 +821,21 @@ class PPDAccum_Hash {
* @ingroup Parser
*/
class PPFrame_Hash implements PPFrame {
- var $preprocessor, $parser, $title;
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ /**
+ * @var Preprocessor
+ */
+ var $preprocessor;
+
+ /**
+ * @var Title
+ */
+ var $title;
var $titleCache;
/**
@@ -850,6 +867,11 @@ class PPFrame_Hash implements PPFrame {
/**
* Create a new child frame
* $args is optionally a multi-root PPNode or array containing the template arguments
+ *
+ * @param $args PPNode_Hash_Array|array
+ * @param $title Title|false
+ *
+ * @return PPTemplateFrame_Hash
*/
function newChild( $args = false, $title = false ) {
$namedArgs = array();
@@ -880,14 +902,19 @@ class PPFrame_Hash implements PPFrame {
return new PPTemplateFrame_Hash( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
}
+ /**
+ * @throws MWException
+ * @param $root
+ * @param $flags int
+ * @return string
+ */
function expand( $root, $flags = 0 ) {
static $expansionDepth = 0;
if ( is_string( $root ) ) {
return $root;
}
- if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() )
- {
+ if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) {
return '<span class="error">Node-count limit exceeded</span>';
}
if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
@@ -1016,7 +1043,7 @@ class PPFrame_Hash implements PPFrame {
$serial = count( $this->parser->mHeadings ) - 1;
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
$s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] );
- $this->parser->mStripState->general->setPair( $marker, '' );
+ $this->parser->mStripState->addGeneral( $marker, '' );
$out .= $s;
} else {
# Expand in virtual stack
@@ -1050,6 +1077,11 @@ class PPFrame_Hash implements PPFrame {
return $outStack[0];
}
+ /**
+ * @param $sep
+ * @param $flags
+ * @return string
+ */
function implodeWithFlags( $sep, $flags /*, ... */ ) {
$args = array_slice( func_get_args(), 2 );
@@ -1077,6 +1109,7 @@ class PPFrame_Hash implements PPFrame {
/**
* Implode with no flags specified
* This previously called implodeWithFlags but has now been inlined to reduce stack depth
+ * @return string
*/
function implode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
@@ -1105,6 +1138,8 @@ class PPFrame_Hash implements PPFrame {
/**
* Makes an object that, when expand()ed, will be the same as one obtained
* with implode()
+ *
+ * @return PPNode_Hash_Array
*/
function virtualImplode( $sep /*, ... */ ) {
$args = array_slice( func_get_args(), 1 );
@@ -1132,6 +1167,8 @@ class PPFrame_Hash implements PPFrame {
/**
* Virtual implode with brackets
+ *
+ * @return PPNode_Hash_Array
*/
function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
$args = array_slice( func_get_args(), 3 );
@@ -1162,6 +1199,10 @@ class PPFrame_Hash implements PPFrame {
return 'frame{}';
}
+ /**
+ * @param $level bool
+ * @return array|bool|String
+ */
function getPDBK( $level = false ) {
if ( $level === false ) {
return $this->title->getPrefixedDBkey();
@@ -1170,31 +1211,50 @@ class PPFrame_Hash implements PPFrame {
}
}
+ /**
+ * @return array
+ */
function getArguments() {
return array();
}
+ /**
+ * @return array
+ */
function getNumberedArguments() {
return array();
}
+ /**
+ * @return array
+ */
function getNamedArguments() {
return array();
}
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return true;
}
+ /**
+ * @param $name
+ * @return bool
+ */
function getArgument( $name ) {
return false;
}
/**
* Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @param $title Title
+ *
+ * @return bool
*/
function loopCheck( $title ) {
return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
@@ -1202,6 +1262,8 @@ class PPFrame_Hash implements PPFrame {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return false;
@@ -1216,6 +1278,13 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
var $numberedArgs, $namedArgs, $parent;
var $numberedExpansionCache, $namedExpansionCache;
+ /**
+ * @param $preprocessor
+ * @param $parent
+ * @param $numberedArgs array
+ * @param $namedArgs array
+ * @param $title Title
+ */
function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
parent::__construct( $preprocessor );
@@ -1252,11 +1321,16 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
}
/**
* Returns true if there are no arguments in this frame
+ *
+ * @return bool
*/
function isEmpty() {
return !count( $this->numberedArgs ) && !count( $this->namedArgs );
}
+ /**
+ * @return array
+ */
function getArguments() {
$arguments = array();
foreach ( array_merge(
@@ -1266,7 +1340,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
}
return $arguments;
}
-
+
+ /**
+ * @return array
+ */
function getNumberedArguments() {
$arguments = array();
foreach ( array_keys($this->numberedArgs) as $key ) {
@@ -1274,7 +1351,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
}
return $arguments;
}
-
+
+ /**
+ * @return array
+ */
function getNamedArguments() {
$arguments = array();
foreach ( array_keys($this->namedArgs) as $key ) {
@@ -1283,6 +1363,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
return $arguments;
}
+ /**
+ * @param $index
+ * @return array|bool
+ */
function getNumberedArgument( $index ) {
if ( !isset( $this->numberedArgs[$index] ) ) {
return false;
@@ -1294,6 +1378,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
return $this->numberedExpansionCache[$index];
}
+ /**
+ * @param $name
+ * @return bool
+ */
function getNamedArgument( $name ) {
if ( !isset( $this->namedArgs[$name] ) ) {
return false;
@@ -1306,6 +1394,10 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
return $this->namedExpansionCache[$name];
}
+ /**
+ * @param $name
+ * @return array|bool
+ */
function getArgument( $name ) {
$text = $this->getNumberedArgument( $name );
if ( $text === false ) {
@@ -1316,6 +1408,8 @@ class PPTemplateFrame_Hash extends PPFrame_Hash {
/**
* Return true if the frame is a template frame
+ *
+ * @return bool
*/
function isTemplate() {
return true;
@@ -1350,10 +1444,17 @@ class PPCustomFrame_Hash extends PPFrame_Hash {
return $s;
}
+ /**
+ * @return bool
+ */
function isEmpty() {
return !count( $this->args );
}
+ /**
+ * @param $index
+ * @return bool
+ */
function getArgument( $index ) {
if ( !isset( $this->args[$index] ) ) {
return false;
@@ -1390,6 +1491,11 @@ class PPNode_Hash_Tree implements PPNode {
}
}
+ /**
+ * @param $name
+ * @param $text
+ * @return PPNode_Hash_Tree
+ */
static function newWithText( $name, $text ) {
$obj = new self( $name );
$obj->addChild( new PPNode_Hash_Text( $text ) );
@@ -1405,6 +1511,9 @@ class PPNode_Hash_Tree implements PPNode {
}
}
+ /**
+ * @return PPNode_Hash_Array
+ */
function getChildren() {
$children = array();
for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
@@ -1431,9 +1540,24 @@ class PPNode_Hash_Tree implements PPNode {
return $children;
}
- function getLength() { return false; }
- function item( $i ) { return false; }
+ /**
+ * @return bool
+ */
+ function getLength() {
+ return false;
+ }
+ /**
+ * @param $i
+ * @return bool
+ */
+ function item( $i ) {
+ return false;
+ }
+
+ /**
+ * @return string
+ */
function getName() {
return $this->name;
}
@@ -1443,6 +1567,8 @@ class PPNode_Hash_Tree implements PPNode {
* name PPNode name
* index String index
* value PPNode value
+ *
+ * @return array
*/
function splitArg() {
$bits = array();
@@ -1474,6 +1600,8 @@ class PPNode_Hash_Tree implements PPNode {
/**
* Split an <ext> node into an associative array containing name, attr, inner and close
* All values in the resulting array are PPNodes. Inner and close are optional.
+ *
+ * @return array
*/
function splitExt() {
$bits = array();
@@ -1499,6 +1627,8 @@ class PPNode_Hash_Tree implements PPNode {
/**
* Split an <h> node
+ *
+ * @return array
*/
function splitHeading() {
if ( $this->name !== 'h' ) {
@@ -1523,6 +1653,8 @@ class PPNode_Hash_Tree implements PPNode {
/**
* Split a <template> or <tplarg> node
+ *
+ * @return array
*/
function splitTemplate() {
$parts = array();
diff --git a/includes/parser/Preprocessor_HipHop.hphp b/includes/parser/Preprocessor_HipHop.hphp
new file mode 100644
index 00000000..dc404f7c
--- /dev/null
+++ b/includes/parser/Preprocessor_HipHop.hphp
@@ -0,0 +1,1941 @@
+<?php
+/**
+ * A preprocessor optimised for HipHop, using HipHop-specific syntax.
+ * vim: ft=php
+ *
+ * @file
+ * @ingroup Parser
+ */
+
+/**
+ * @ingroup Parser
+ */
+class Preprocessor_HipHop implements Preprocessor {
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ const CACHE_VERSION = 1;
+
+ function __construct( $parser ) {
+ $this->parser = $parser;
+ }
+
+ /**
+ * @return PPFrame_HipHop
+ */
+ function newFrame() {
+ return new PPFrame_HipHop( $this );
+ }
+
+ /**
+ * @param $args
+ * @return PPCustomFrame_HipHop
+ */
+ function newCustomFrame( array $args ) {
+ return new PPCustomFrame_HipHop( $this, $args );
+ }
+
+ /**
+ * @param $values array
+ * @return PPNode_HipHop_Array
+ */
+ function newPartNodeArray( $values ) {
+ $list = array();
+
+ foreach ( $values as $k => $val ) {
+ $partNode = new PPNode_HipHop_Tree( 'part' );
+ $nameNode = new PPNode_HipHop_Tree( 'name' );
+
+ if ( is_int( $k ) ) {
+ $nameNode->addChild( new PPNode_HipHop_Attr( 'index', $k ) );
+ $partNode->addChild( $nameNode );
+ } else {
+ $nameNode->addChild( new PPNode_HipHop_Text( $k ) );
+ $partNode->addChild( $nameNode );
+ $partNode->addChild( new PPNode_HipHop_Text( '=' ) );
+ }
+
+ $valueNode = new PPNode_HipHop_Tree( 'value' );
+ $valueNode->addChild( new PPNode_HipHop_Text( $val ) );
+ $partNode->addChild( $valueNode );
+
+ $list[] = $partNode;
+ }
+
+ $node = new PPNode_HipHop_Array( $list );
+ return $node;
+ }
+
+ /**
+ * Preprocess some wikitext and return the document tree.
+ * This is the ghost of Parser::replace_variables().
+ *
+ * @param $text String: the text to parse
+ * @param $flags Integer: bitwise combination of:
+ * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
+ * included. Default is to assume a direct page view.
+ *
+ * The generated DOM tree must depend only on the input text and the flags.
+ * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
+ *
+ * Any flag added to the $flags parameter here, or any other parameter liable to cause a
+ * change in the DOM tree for a given text, must be passed through the section identifier
+ * in the section edit link and thus back to extractSections().
+ *
+ * The output of this function is currently only cached in process memory, but a persistent
+ * cache may be implemented at a later date which takes further advantage of these strict
+ * dependency requirements.
+ *
+ * @return PPNode_HipHop_Tree
+ */
+ function preprocessToObj( string $text, int $flags = 0 ) {
+ wfProfileIn( __METHOD__ );
+
+ // Check cache.
+ global $wgMemc, $wgPreprocessorCacheThreshold;
+
+ $cacheable = ($wgPreprocessorCacheThreshold !== false && strlen( $text ) > $wgPreprocessorCacheThreshold);
+ if ( $cacheable ) {
+ wfProfileIn( __METHOD__.'-cacheable' );
+
+ $cacheKey = strval( wfMemcKey( 'preprocess-hash', md5($text), $flags ) );
+ $cacheValue = strval( $wgMemc->get( $cacheKey ) );
+ if ( $cacheValue !== '' ) {
+ $version = substr( $cacheValue, 0, 8 );
+ if ( intval( $version ) == self::CACHE_VERSION ) {
+ $hash = unserialize( substr( $cacheValue, 8 ) );
+ // From the cache
+ wfDebugLog( "Preprocessor",
+ "Loaded preprocessor hash from memcached (key $cacheKey)" );
+ wfProfileOut( __METHOD__.'-cacheable' );
+ wfProfileOut( __METHOD__ );
+ return $hash;
+ }
+ }
+ wfProfileIn( __METHOD__.'-cache-miss' );
+ }
+
+ $rules = array(
+ '{' => array(
+ 'end' => '}',
+ 'names' => array(
+ 2 => 'template',
+ 3 => 'tplarg',
+ ),
+ 'min' => 2,
+ 'max' => 3,
+ ),
+ '[' => array(
+ 'end' => ']',
+ 'names' => array( 2 => 'LITERAL' ),
+ 'min' => 2,
+ 'max' => 2,
+ )
+ );
+
+ $forInclusion = (bool)( $flags & Parser::PTD_FOR_INCLUSION );
+
+ $xmlishElements = (array)$this->parser->getStripList();
+ $enableOnlyinclude = false;
+ if ( $forInclusion ) {
+ $ignoredTags = array( 'includeonly', '/includeonly' );
+ $ignoredElements = array( 'noinclude' );
+ $xmlishElements[] = 'noinclude';
+ if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
+ $enableOnlyinclude = true;
+ }
+ } else if ( $this->parser->ot['wiki'] ) {
+ $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude', 'includeonly', '/includeonly' );
+ $ignoredElements = array();
+ } else {
+ $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
+ $ignoredElements = array( 'includeonly' );
+ $xmlishElements[] = 'includeonly';
+ }
+ $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
+
+ // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
+ $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
+
+ $stack = new PPDStack_HipHop;
+
+ $searchBase = "[{<\n";
+ $revText = strrev( $text ); // For fast reverse searches
+
+ $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
+ $accum = $stack->getAccum(); # Current accumulator
+ $headingIndex = 1;
+ $stackFlags = array(
+ 'findPipe' => false, # True to take notice of pipe characters
+ 'findEquals' => false, # True to find equals signs in arguments
+ 'inHeading' => false, # True if $i is inside a possible heading
+ );
+ $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
+ $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
+ $fakeLineStart = true; # Do a line-start run without outputting an LF character
+
+ while ( true ) {
+ //$this->memCheck();
+
+ if ( $findOnlyinclude ) {
+ // Ignore all input up to the next <onlyinclude>
+ $variantStartPos = strpos( $text, '<onlyinclude>', $i );
+ if ( $variantStartPos === false ) {
+ // Ignored section runs to the end
+ $accum->addNodeWithText( 'ignore', strval( substr( $text, $i ) ) );
+ break;
+ }
+ $startPos1 = intval( $variantStartPos );
+ $tagEndPos = $startPos1 + strlen( '<onlyinclude>' ); // past-the-end
+ $accum->addNodeWithText( 'ignore', strval( substr( $text, $i, $tagEndPos - $i ) ) );
+ $i = $tagEndPos;
+ $findOnlyinclude = false;
+ }
+
+ if ( $fakeLineStart ) {
+ $found = 'line-start';
+ $curChar = '';
+ } else {
+ # Find next opening brace, closing brace or pipe
+ $search = $searchBase;
+ if ( $stack->top === false ) {
+ $currentClosing = '';
+ } else {
+ $currentClosing = strval( $stack->getTop()->close );
+ $search .= $currentClosing;
+ }
+ if ( $stackFlags['findPipe'] ) {
+ $search .= '|';
+ }
+ if ( $stackFlags['findEquals'] ) {
+ // First equals will be for the template
+ $search .= '=';
+ }
+ $rule = null;
+ # Output literal section, advance input counter
+ $literalLength = intval( strcspn( $text, $search, $i ) );
+ if ( $literalLength > 0 ) {
+ $accum->addLiteral( strval( substr( $text, $i, $literalLength ) ) );
+ $i += $literalLength;
+ }
+ if ( $i >= strlen( $text ) ) {
+ if ( $currentClosing === "\n" ) {
+ // Do a past-the-end run to finish off the heading
+ $curChar = '';
+ $found = 'line-end';
+ } else {
+ # All done
+ break;
+ }
+ } else {
+ $curChar = $text[$i];
+ if ( $curChar === '|' ) {
+ $found = 'pipe';
+ } elseif ( $curChar === '=' ) {
+ $found = 'equals';
+ } elseif ( $curChar === '<' ) {
+ $found = 'angle';
+ } elseif ( $curChar === "\n" ) {
+ if ( $stackFlags['inHeading'] ) {
+ $found = 'line-end';
+ } else {
+ $found = 'line-start';
+ }
+ } elseif ( $curChar === $currentClosing ) {
+ $found = 'close';
+ } elseif ( isset( $rules[$curChar] ) ) {
+ $found = 'open';
+ $rule = $rules[$curChar];
+ } else {
+ # Some versions of PHP have a strcspn which stops on null characters
+ # Ignore and continue
+ ++$i;
+ continue;
+ }
+ }
+ }
+
+ if ( $found === 'angle' ) {
+ $matches = false;
+ // Handle </onlyinclude>
+ if ( $enableOnlyinclude
+ && substr( $text, $i, strlen( '</onlyinclude>' ) ) === '</onlyinclude>' )
+ {
+ $findOnlyinclude = true;
+ continue;
+ }
+
+ // Determine element name
+ if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
+ // Element name missing or not listed
+ $accum->addLiteral( '<' );
+ ++$i;
+ continue;
+ }
+ // Handle comments
+ if ( isset( $matches[2] ) && $matches[2] === '!--' ) {
+ // To avoid leaving blank lines, when a comment is both preceded
+ // and followed by a newline (ignoring spaces), trim leading and
+ // trailing spaces and one of the newlines.
+
+ // Find the end
+ $variantEndPos = strpos( $text, '-->', $i + 4 );
+ if ( $variantEndPos === false ) {
+ // Unclosed comment in input, runs to end
+ $inner = strval( substr( $text, $i ) );
+ $accum->addNodeWithText( 'comment', $inner );
+ $i = strlen( $text );
+ } else {
+ $endPos = intval( $variantEndPos );
+ // Search backwards for leading whitespace
+ if ( $i ) {
+ $wsStart = $i - intval( strspn( $revText, ' ', strlen( $text ) - $i ) );
+ } else {
+ $wsStart = 0;
+ }
+ // Search forwards for trailing whitespace
+ // $wsEnd will be the position of the last space (or the '>' if there's none)
+ $wsEnd = $endPos + 2 + intval( strspn( $text, ' ', $endPos + 3 ) );
+ // Eat the line if possible
+ // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
+ // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
+ // it's a possible beneficial b/c break.
+ if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) === "\n"
+ && substr( $text, $wsEnd + 1, 1 ) === "\n" )
+ {
+ $startPos2 = $wsStart;
+ $endPos = $wsEnd + 1;
+ // Remove leading whitespace from the end of the accumulator
+ // Sanity check first though
+ $wsLength = $i - $wsStart;
+ if ( $wsLength > 0
+ && $accum->lastNode instanceof PPNode_HipHop_Text
+ && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) )
+ {
+ $accum->lastNode->value = strval( substr( $accum->lastNode->value, 0, -$wsLength ) );
+ }
+ // Do a line-start run next time to look for headings after the comment
+ $fakeLineStart = true;
+ } else {
+ // No line to eat, just take the comment itself
+ $startPos2 = $i;
+ $endPos += 2;
+ }
+
+ if ( $stack->top ) {
+ $part = $stack->getTop()->getCurrentPart();
+ if ( ! (isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 )) {
+ $part->visualEnd = $wsStart;
+ }
+ // Else comments abutting, no change in visual end
+ $part->commentEnd = $endPos;
+ }
+ $i = $endPos + 1;
+ $inner = strval( substr( $text, $startPos2, $endPos - $startPos2 + 1 ) );
+ $accum->addNodeWithText( 'comment', $inner );
+ }
+ continue;
+ }
+ $name = strval( $matches[1] );
+ $lowerName = strtolower( $name );
+ $attrStart = $i + strlen( $name ) + 1;
+
+ // Find end of tag
+ $variantTagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
+ if ( $variantTagEndPos === false ) {
+ // Infinite backtrack
+ // Disable tag search to prevent worst-case O(N^2) performance
+ $noMoreGT = true;
+ $accum->addLiteral( '<' );
+ ++$i;
+ continue;
+ }
+ $tagEndPos = intval( $variantTagEndPos );
+
+ // Handle ignored tags
+ if ( in_array( $lowerName, $ignoredTags ) ) {
+ $accum->addNodeWithText( 'ignore', strval( substr( $text, $i, $tagEndPos - $i + 1 ) ) );
+ $i = $tagEndPos + 1;
+ continue;
+ }
+
+ $tagStartPos = $i;
+ $inner = $close = '';
+ if ( $text[$tagEndPos-1] === '/' ) {
+ // Short end tag
+ $attrEnd = $tagEndPos - 1;
+ $shortEnd = true;
+ $inner = '';
+ $i = $tagEndPos + 1;
+ $haveClose = false;
+ } else {
+ $attrEnd = $tagEndPos;
+ $shortEnd = false;
+ // Find closing tag
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ {
+ $inner = strval( substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ) );
+ $i = intval( $matches[0][1] ) + strlen( $matches[0][0] );
+ $close = strval( $matches[0][0] );
+ $haveClose = true;
+ } else {
+ // No end tag -- let it run out to the end of the text.
+ $inner = strval( substr( $text, $tagEndPos + 1 ) );
+ $i = strlen( $text );
+ $haveClose = false;
+ }
+ }
+ // <includeonly> and <noinclude> just become <ignore> tags
+ if ( in_array( $lowerName, $ignoredElements ) ) {
+ $accum->addNodeWithText( 'ignore', strval( substr( $text, $tagStartPos, $i - $tagStartPos ) ) );
+ continue;
+ }
+
+ if ( $attrEnd <= $attrStart ) {
+ $attr = '';
+ } else {
+ // Note that the attr element contains the whitespace between name and attribute,
+ // this is necessary for precise reconstruction during pre-save transform.
+ $attr = strval( substr( $text, $attrStart, $attrEnd - $attrStart ) );
+ }
+
+ $extNode = new PPNode_HipHop_Tree( 'ext' );
+ $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'name', $name ) );
+ $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'attr', $attr ) );
+ if ( !$shortEnd ) {
+ $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'inner', $inner ) );
+ }
+ if ( $haveClose ) {
+ $extNode->addChild( PPNode_HipHop_Tree::newWithText( 'close', $close ) );
+ }
+ $accum->addNode( $extNode );
+ }
+
+ elseif ( $found === 'line-start' ) {
+ // Is this the start of a heading?
+ // Line break belongs before the heading element in any case
+ if ( $fakeLineStart ) {
+ $fakeLineStart = false;
+ } else {
+ $accum->addLiteral( $curChar );
+ $i++;
+ }
+
+ $count = intval( strspn( $text, '=', $i, 6 ) );
+ if ( $count == 1 && $stackFlags['findEquals'] ) {
+ // DWIM: This looks kind of like a name/value separator
+ // Let's let the equals handler have it and break the potential heading
+ // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
+ } elseif ( $count > 0 ) {
+ $partData = array(
+ 'open' => "\n",
+ 'close' => "\n",
+ 'parts' => array( new PPDPart_HipHop( str_repeat( '=', $count ) ) ),
+ 'startPos' => $i,
+ 'count' => $count );
+ $stack->push( $partData );
+ $accum = $stack->getAccum();
+ $stackFlags = $stack->getFlags();
+ $i += $count;
+ }
+ } elseif ( $found === 'line-end' ) {
+ $piece = $stack->getTop();
+ // A heading must be open, otherwise \n wouldn't have been in the search list
+ assert( $piece->open === "\n" );
+ $part = $piece->getCurrentPart();
+ // Search back through the input to see if it has a proper close
+ // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
+ $wsLength = intval( strspn( $revText, " \t", strlen( $text ) - $i ) );
+ $searchStart = $i - $wsLength;
+ if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
+ // Comment found at line end
+ // Search for equals signs before the comment
+ $searchStart = intval( $part->visualEnd );
+ $searchStart -= intval( strspn( $revText, " \t", strlen( $text ) - $searchStart ) );
+ }
+ $count = intval( $piece->count );
+ $equalsLength = intval( strspn( $revText, '=', strlen( $text ) - $searchStart ) );
+ $isTreeNode = false;
+ $resultAccum = $accum;
+ if ( $equalsLength > 0 ) {
+ if ( $searchStart - $equalsLength == $piece->startPos ) {
+ // This is just a single string of equals signs on its own line
+ // Replicate the doHeadings behaviour /={count}(.+)={count}/
+ // First find out how many equals signs there really are (don't stop at 6)
+ $count = $equalsLength;
+ if ( $count < 3 ) {
+ $count = 0;
+ } else {
+ $count = intval( ( $count - 1 ) / 2 );
+ if ( $count > 6 ) {
+ $count = 6;
+ }
+ }
+ } else {
+ if ( $count > $equalsLength ) {
+ $count = $equalsLength;
+ }
+ }
+ if ( $count > 0 ) {
+ // Normal match, output <h>
+ $tree = new PPNode_HipHop_Tree( 'possible-h' );
+ $tree->addChild( new PPNode_HipHop_Attr( 'level', $count ) );
+ $tree->addChild( new PPNode_HipHop_Attr( 'i', $headingIndex++ ) );
+ $tree->lastChild->nextSibling = $accum->firstNode;
+ $tree->lastChild = $accum->lastNode;
+ $isTreeNode = true;
+ } else {
+ // Single equals sign on its own line, count=0
+ // Output $resultAccum
+ }
+ } else {
+ // No match, no <h>, just pass down the inner text
+ // Output $resultAccum
+ }
+ // Unwind the stack
+ $stack->pop();
+ $accum = $stack->getAccum();
+ $stackFlags = $stack->getFlags();
+
+ // Append the result to the enclosing accumulator
+ if ( $isTreeNode ) {
+ $accum->addNode( $tree );
+ } else {
+ $accum->addAccum( $resultAccum );
+ }
+ // Note that we do NOT increment the input pointer.
+ // This is because the closing linebreak could be the opening linebreak of
+ // another heading. Infinite loops are avoided because the next iteration MUST
+ // hit the heading open case above, which unconditionally increments the
+ // input pointer.
+ } elseif ( $found === 'open' ) {
+ # count opening brace characters
+ $count = intval( strspn( $text, $curChar, $i ) );
+
+ # we need to add to stack only if opening brace count is enough for one of the rules
+ if ( $count >= $rule['min'] ) {
+ # Add it to the stack
+ $partData = array(
+ 'open' => $curChar,
+ 'close' => $rule['end'],
+ 'count' => $count,
+ 'lineStart' => ($i == 0 || $text[$i-1] === "\n"),
+ );
+
+ $stack->push( $partData );
+ $accum = $stack->getAccum();
+ $stackFlags = $stack->getFlags();
+ } else {
+ # Add literal brace(s)
+ $accum->addLiteral( str_repeat( $curChar, $count ) );
+ }
+ $i += $count;
+ } elseif ( $found === 'close' ) {
+ $piece = $stack->getTop();
+ # lets check if there are enough characters for closing brace
+ $maxCount = intval( $piece->count );
+ $count = intval( strspn( $text, $curChar, $i, $maxCount ) );
+
+ # check for maximum matching characters (if there are 5 closing
+ # characters, we will probably need only 3 - depending on the rules)
+ $rule = $rules[$piece->open];
+ if ( $count > $rule['max'] ) {
+ # The specified maximum exists in the callback array, unless the caller
+ # has made an error
+ $matchingCount = intval( $rule['max'] );
+ } else {
+ # Count is less than the maximum
+ # Skip any gaps in the callback array to find the true largest match
+ # Need to use array_key_exists not isset because the callback can be null
+ $matchingCount = $count;
+ while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
+ --$matchingCount;
+ }
+ }
+
+ if ($matchingCount <= 0) {
+ # No matching element found in callback array
+ # Output a literal closing brace and continue
+ $accum->addLiteral( str_repeat( $curChar, $count ) );
+ $i += $count;
+ continue;
+ }
+ $name = strval( $rule['names'][$matchingCount] );
+ $isTreeNode = false;
+ if ( $name === 'LITERAL' ) {
+ // No element, just literal text
+ $resultAccum = $piece->breakSyntax( $matchingCount );
+ $resultAccum->addLiteral( str_repeat( $rule['end'], $matchingCount ) );
+ } else {
+ # Create XML element
+ # Note: $parts is already XML, does not need to be encoded further
+ $isTreeNode = true;
+ $parts = $piece->parts;
+ $titleAccum = PPDAccum_HipHop::cast( $parts[0]->out );
+ unset( $parts[0] );
+
+ $tree = new PPNode_HipHop_Tree( $name );
+
+ # The invocation is at the start of the line if lineStart is set in
+ # the stack, and all opening brackets are used up.
+ if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
+ $tree->addChild( new PPNode_HipHop_Attr( 'lineStart', 1 ) );
+ }
+ $titleNode = new PPNode_HipHop_Tree( 'title' );
+ $titleNode->firstChild = $titleAccum->firstNode;
+ $titleNode->lastChild = $titleAccum->lastNode;
+ $tree->addChild( $titleNode );
+ $argIndex = 1;
+ foreach ( $parts as $variantPart ) {
+ $part = PPDPart_HipHop::cast( $variantPart );
+ if ( isset( $part->eqpos ) ) {
+ // Find equals
+ $lastNode = false;
+ for ( $node = $part->out->firstNode; $node; $node = $node->nextSibling ) {
+ if ( $node === $part->eqpos ) {
+ break;
+ }
+ $lastNode = $node;
+ }
+ if ( !$node ) {
+ throw new MWException( __METHOD__. ': eqpos not found' );
+ }
+ if ( $node->name !== 'equals' ) {
+ throw new MWException( __METHOD__ .': eqpos is not equals' );
+ }
+ $equalsNode = $node;
+
+ // Construct name node
+ $nameNode = new PPNode_HipHop_Tree( 'name' );
+ if ( $lastNode !== false ) {
+ $lastNode->nextSibling = false;
+ $nameNode->firstChild = $part->out->firstNode;
+ $nameNode->lastChild = $lastNode;
+ }
+
+ // Construct value node
+ $valueNode = new PPNode_HipHop_Tree( 'value' );
+ if ( $equalsNode->nextSibling !== false ) {
+ $valueNode->firstChild = $equalsNode->nextSibling;
+ $valueNode->lastChild = $part->out->lastNode;
+ }
+ $partNode = new PPNode_HipHop_Tree( 'part' );
+ $partNode->addChild( $nameNode );
+ $partNode->addChild( $equalsNode->firstChild );
+ $partNode->addChild( $valueNode );
+ $tree->addChild( $partNode );
+ } else {
+ $partNode = new PPNode_HipHop_Tree( 'part' );
+ $nameNode = new PPNode_HipHop_Tree( 'name' );
+ $nameNode->addChild( new PPNode_HipHop_Attr( 'index', $argIndex++ ) );
+ $valueNode = new PPNode_HipHop_Tree( 'value' );
+ $valueNode->firstChild = $part->out->firstNode;
+ $valueNode->lastChild = $part->out->lastNode;
+ $partNode->addChild( $nameNode );
+ $partNode->addChild( $valueNode );
+ $tree->addChild( $partNode );
+ }
+ }
+ }
+
+ # Advance input pointer
+ $i += $matchingCount;
+
+ # Unwind the stack
+ $stack->pop();
+ $accum = $stack->getAccum();
+
+ # Re-add the old stack element if it still has unmatched opening characters remaining
+ if ($matchingCount < $piece->count) {
+ $piece->parts = array( new PPDPart_HipHop );
+ $piece->count -= $matchingCount;
+ # do we still qualify for any callback with remaining count?
+ $names = $rules[$piece->open]['names'];
+ $skippedBraces = 0;
+ $enclosingAccum = $accum;
+ while ( $piece->count ) {
+ if ( array_key_exists( $piece->count, $names ) ) {
+ $stack->push( $piece );
+ $accum = $stack->getAccum();
+ break;
+ }
+ --$piece->count;
+ $skippedBraces ++;
+ }
+ $enclosingAccum->addLiteral( str_repeat( $piece->open, $skippedBraces ) );
+ }
+
+ $stackFlags = $stack->getFlags();
+
+ # Add XML element to the enclosing accumulator
+ if ( $isTreeNode ) {
+ $accum->addNode( $tree );
+ } else {
+ $accum->addAccum( $resultAccum );
+ }
+ } elseif ( $found === 'pipe' ) {
+ $stackFlags['findEquals'] = true; // shortcut for getFlags()
+ $stack->addPart();
+ $accum = $stack->getAccum();
+ ++$i;
+ } elseif ( $found === 'equals' ) {
+ $stackFlags['findEquals'] = false; // shortcut for getFlags()
+ $accum->addNodeWithText( 'equals', '=' );
+ $stack->getCurrentPart()->eqpos = $accum->lastNode;
+ ++$i;
+ }
+ }
+
+ # Output any remaining unclosed brackets
+ foreach ( $stack->stack as $variantPiece ) {
+ $piece = PPDStackElement_HipHop::cast( $variantPiece );
+ $stack->rootAccum->addAccum( $piece->breakSyntax() );
+ }
+
+ # Enable top-level headings
+ for ( $node = $stack->rootAccum->firstNode; $node; $node = $node->nextSibling ) {
+ if ( isset( $node->name ) && $node->name === 'possible-h' ) {
+ $node->name = 'h';
+ }
+ }
+
+ $rootNode = new PPNode_HipHop_Tree( 'root' );
+ $rootNode->firstChild = $stack->rootAccum->firstNode;
+ $rootNode->lastChild = $stack->rootAccum->lastNode;
+
+ // Cache
+ if ($cacheable) {
+ $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );
+ $wgMemc->set( $cacheKey, $cacheValue, 86400 );
+ wfProfileOut( __METHOD__.'-cache-miss' );
+ wfProfileOut( __METHOD__.'-cacheable' );
+ wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
+ }
+
+ wfProfileOut( __METHOD__ );
+ return $rootNode;
+ }
+}
+
+
+
+/**
+ * Stack class to help Preprocessor::preprocessToObj()
+ * @ingroup Parser
+ */
+class PPDStack_HipHop {
+ var $stack, $rootAccum;
+
+ /**
+ * @var PPDStack
+ */
+ var $top;
+ var $out;
+
+ static $false = false;
+
+ function __construct() {
+ $this->stack = array();
+ $this->top = false;
+ $this->rootAccum = new PPDAccum_HipHop;
+ $this->accum = $this->rootAccum;
+ }
+
+ /**
+ * @return int
+ */
+ function count() {
+ return count( $this->stack );
+ }
+
+ function getAccum() {
+ return PPDAccum_HipHop::cast( $this->accum );
+ }
+
+ function getCurrentPart() {
+ return $this->getTop()->getCurrentPart();
+ }
+
+ function getTop() {
+ return PPDStackElement_HipHop::cast( $this->top );
+ }
+
+ function push( $data ) {
+ if ( $data instanceof PPDStackElement_HipHop ) {
+ $this->stack[] = $data;
+ } else {
+ $this->stack[] = new PPDStackElement_HipHop( $data );
+ }
+ $this->top = $this->stack[ count( $this->stack ) - 1 ];
+ $this->accum = $this->top->getAccum();
+ }
+
+ function pop() {
+ if ( !count( $this->stack ) ) {
+ throw new MWException( __METHOD__.': no elements remaining' );
+ }
+ $temp = array_pop( $this->stack );
+
+ if ( count( $this->stack ) ) {
+ $this->top = $this->stack[ count( $this->stack ) - 1 ];
+ $this->accum = $this->top->getAccum();
+ } else {
+ $this->top = self::$false;
+ $this->accum = $this->rootAccum;
+ }
+ return $temp;
+ }
+
+ function addPart( $s = '' ) {
+ $this->top->addPart( $s );
+ $this->accum = $this->top->getAccum();
+ }
+
+ /**
+ * @return array
+ */
+ function getFlags() {
+ if ( !count( $this->stack ) ) {
+ return array(
+ 'findEquals' => false,
+ 'findPipe' => false,
+ 'inHeading' => false,
+ );
+ } else {
+ return $this->top->getFlags();
+ }
+ }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPDStackElement_HipHop {
+ var $open, // Opening character (\n for heading)
+ $close, // Matching closing character
+ $count, // Number of opening characters found (number of "=" for heading)
+ $parts, // Array of PPDPart objects describing pipe-separated parts.
+ $lineStart; // True if the open char appeared at the start of the input line. Not set for headings.
+
+ static function cast( PPDStackElement_HipHop $obj ) {
+ return $obj;
+ }
+
+ function __construct( $data = array() ) {
+ $this->parts = array( new PPDPart_HipHop );
+
+ foreach ( $data as $name => $value ) {
+ $this->$name = $value;
+ }
+ }
+
+ function getAccum() {
+ return PPDAccum_HipHop::cast( $this->parts[count($this->parts) - 1]->out );
+ }
+
+ function addPart( $s = '' ) {
+ $this->parts[] = new PPDPart_HipHop( $s );
+ }
+
+ function getCurrentPart() {
+ return PPDPart_HipHop::cast( $this->parts[count($this->parts) - 1] );
+ }
+
+ /**
+ * @return array
+ */
+ function getFlags() {
+ $partCount = count( $this->parts );
+ $findPipe = $this->open !== "\n" && $this->open !== '[';
+ return array(
+ 'findPipe' => $findPipe,
+ 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ),
+ 'inHeading' => $this->open === "\n",
+ );
+ }
+
+ /**
+ * Get the accumulator that would result if the close is not found.
+ *
+ * @return PPDAccum_HipHop
+ */
+ function breakSyntax( $openingCount = false ) {
+ if ( $this->open === "\n" ) {
+ $accum = PPDAccum_HipHop::cast( $this->parts[0]->out );
+ } else {
+ if ( $openingCount === false ) {
+ $openingCount = $this->count;
+ }
+ $accum = new PPDAccum_HipHop;
+ $accum->addLiteral( str_repeat( $this->open, $openingCount ) );
+ $first = true;
+ foreach ( $this->parts as $part ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $accum->addLiteral( '|' );
+ }
+ $accum->addAccum( $part->out );
+ }
+ }
+ return $accum;
+ }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPDPart_HipHop {
+ var $out; // Output accumulator object
+
+ // Optional member variables:
+ // eqpos Position of equals sign in output accumulator
+ // commentEnd Past-the-end input pointer for the last comment encountered
+ // visualEnd Past-the-end input pointer for the end of the accumulator minus comments
+
+ function __construct( $out = '' ) {
+ $this->out = new PPDAccum_HipHop;
+ if ( $out !== '' ) {
+ $this->out->addLiteral( $out );
+ }
+ }
+
+ static function cast( PPDPart_HipHop $obj ) {
+ return $obj;
+ }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPDAccum_HipHop {
+ var $firstNode, $lastNode;
+
+ function __construct() {
+ $this->firstNode = $this->lastNode = false;
+ }
+
+ static function cast( PPDAccum_HipHop $obj ) {
+ return $obj;
+ }
+
+ /**
+ * Append a string literal
+ */
+ function addLiteral( string $s ) {
+ if ( $this->lastNode === false ) {
+ $this->firstNode = $this->lastNode = new PPNode_HipHop_Text( $s );
+ } elseif ( $this->lastNode instanceof PPNode_HipHop_Text ) {
+ $this->lastNode->value .= $s;
+ } else {
+ $this->lastNode->nextSibling = new PPNode_HipHop_Text( $s );
+ $this->lastNode = $this->lastNode->nextSibling;
+ }
+ }
+
+ /**
+ * Append a PPNode
+ */
+ function addNode( PPNode $node ) {
+ if ( $this->lastNode === false ) {
+ $this->firstNode = $this->lastNode = $node;
+ } else {
+ $this->lastNode->nextSibling = $node;
+ $this->lastNode = $node;
+ }
+ }
+
+ /**
+ * Append a tree node with text contents
+ */
+ function addNodeWithText( string $name, string $value ) {
+ $node = PPNode_HipHop_Tree::newWithText( $name, $value );
+ $this->addNode( $node );
+ }
+
+ /**
+ * Append a PPDAccum_HipHop
+ * Takes over ownership of the nodes in the source argument. These nodes may
+ * subsequently be modified, especially nextSibling.
+ */
+ function addAccum( PPDAccum_HipHop $accum ) {
+ if ( $accum->lastNode === false ) {
+ // nothing to add
+ } elseif ( $this->lastNode === false ) {
+ $this->firstNode = $accum->firstNode;
+ $this->lastNode = $accum->lastNode;
+ } else {
+ $this->lastNode->nextSibling = $accum->firstNode;
+ $this->lastNode = $accum->lastNode;
+ }
+ }
+}
+
+/**
+ * An expansion frame, used as a context to expand the result of preprocessToObj()
+ * @ingroup Parser
+ */
+class PPFrame_HipHop implements PPFrame {
+
+ /**
+ * @var Parser
+ */
+ var $parser;
+
+ /**
+ * @var Preprocessor
+ */
+ var $preprocessor;
+
+ /**
+ * @var Title
+ */
+ var $title;
+ var $titleCache;
+
+ /**
+ * Hashtable listing templates which are disallowed for expansion in this frame,
+ * having been encountered previously in parent frames.
+ */
+ var $loopCheckHash;
+
+ /**
+ * Recursion depth of this frame, top = 0
+ * Note that this is NOT the same as expansion depth in expand()
+ */
+ var $depth;
+
+
+ /**
+ * Construct a new preprocessor frame.
+ * @param $preprocessor Preprocessor: the parent preprocessor
+ */
+ function __construct( $preprocessor ) {
+ $this->preprocessor = $preprocessor;
+ $this->parser = $preprocessor->parser;
+ $this->title = $this->parser->mTitle;
+ $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
+ $this->loopCheckHash = array();
+ $this->depth = 0;
+ }
+
+ /**
+ * Create a new child frame
+ * $args is optionally a multi-root PPNode or array containing the template arguments
+ *
+ * @param $args PPNode_HipHop_Array|array
+ * @param $title Title|false
+ *
+ * @return PPTemplateFrame_HipHop
+ */
+ function newChild( $args = false, $title = false ) {
+ $namedArgs = array();
+ $numberedArgs = array();
+ if ( $title === false ) {
+ $title = $this->title;
+ }
+ if ( $args !== false ) {
+ if ( $args instanceof PPNode_HipHop_Array ) {
+ $args = $args->value;
+ } elseif ( !is_array( $args ) ) {
+ throw new MWException( __METHOD__ . ': $args must be array or PPNode_HipHop_Array' );
+ }
+ foreach ( $args as $arg ) {
+ $bits = $arg->splitArg();
+ if ( $bits['index'] !== '' ) {
+ // Numbered parameter
+ $numberedArgs[$bits['index']] = $bits['value'];
+ unset( $namedArgs[$bits['index']] );
+ } else {
+ // Named parameter
+ $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
+ $namedArgs[$name] = $bits['value'];
+ unset( $numberedArgs[$name] );
+ }
+ }
+ }
+ return new PPTemplateFrame_HipHop( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
+ }
+
+ /**
+ * @throws MWException
+ * @param $root
+ * @param $flags int
+ * @return string
+ */
+ function expand( $root, $flags = 0 ) {
+ static $expansionDepth = 0;
+ if ( is_string( $root ) ) {
+ return $root;
+ }
+
+ if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) {
+ return '<span class="error">Node-count limit exceeded</span>';
+ }
+ if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
+ return '<span class="error">Expansion depth limit exceeded</span>';
+ }
+ ++$expansionDepth;
+
+ $outStack = array( '', '' );
+ $iteratorStack = array( false, $root );
+ $indexStack = array( 0, 0 );
+
+ while ( count( $iteratorStack ) > 1 ) {
+ $level = count( $outStack ) - 1;
+ $iteratorNode =& $iteratorStack[ $level ];
+ $out =& $outStack[$level];
+ $index =& $indexStack[$level];
+
+ if ( is_array( $iteratorNode ) ) {
+ if ( $index >= count( $iteratorNode ) ) {
+ // All done with this iterator
+ $iteratorStack[$level] = false;
+ $contextNode = false;
+ } else {
+ $contextNode = $iteratorNode[$index];
+ $index++;
+ }
+ } elseif ( $iteratorNode instanceof PPNode_HipHop_Array ) {
+ if ( $index >= $iteratorNode->getLength() ) {
+ // All done with this iterator
+ $iteratorStack[$level] = false;
+ $contextNode = false;
+ } else {
+ $contextNode = $iteratorNode->item( $index );
+ $index++;
+ }
+ } else {
+ // Copy to $contextNode and then delete from iterator stack,
+ // because this is not an iterator but we do have to execute it once
+ $contextNode = $iteratorStack[$level];
+ $iteratorStack[$level] = false;
+ }
+
+ $newIterator = false;
+
+ if ( $contextNode === false ) {
+ // nothing to do
+ } elseif ( is_string( $contextNode ) ) {
+ $out .= $contextNode;
+ } elseif ( is_array( $contextNode ) || $contextNode instanceof PPNode_HipHop_Array ) {
+ $newIterator = $contextNode;
+ } elseif ( $contextNode instanceof PPNode_HipHop_Attr ) {
+ // No output
+ } elseif ( $contextNode instanceof PPNode_HipHop_Text ) {
+ $out .= $contextNode->value;
+ } elseif ( $contextNode instanceof PPNode_HipHop_Tree ) {
+ if ( $contextNode->name === 'template' ) {
+ # Double-brace expansion
+ $bits = $contextNode->splitTemplate();
+ if ( $flags & PPFrame::NO_TEMPLATES ) {
+ $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $bits['title'], $bits['parts'] );
+ } else {
+ $ret = $this->parser->braceSubstitution( $bits, $this );
+ if ( isset( $ret['object'] ) ) {
+ $newIterator = $ret['object'];
+ } else {
+ $out .= $ret['text'];
+ }
+ }
+ } elseif ( $contextNode->name === 'tplarg' ) {
+ # Triple-brace expansion
+ $bits = $contextNode->splitTemplate();
+ if ( $flags & PPFrame::NO_ARGS ) {
+ $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $bits['title'], $bits['parts'] );
+ } else {
+ $ret = $this->parser->argSubstitution( $bits, $this );
+ if ( isset( $ret['object'] ) ) {
+ $newIterator = $ret['object'];
+ } else {
+ $out .= $ret['text'];
+ }
+ }
+ } elseif ( $contextNode->name === 'comment' ) {
+ # HTML-style comment
+ # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
+ if ( $this->parser->ot['html']
+ || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
+ || ( $flags & PPFrame::STRIP_COMMENTS ) )
+ {
+ $out .= '';
+ }
+ # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
+ # Not in RECOVER_COMMENTS mode (extractSections) though
+ elseif ( $this->parser->ot['wiki'] && ! ( $flags & PPFrame::RECOVER_COMMENTS ) ) {
+ $out .= $this->parser->insertStripItem( $contextNode->firstChild->value );
+ }
+ # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
+ else {
+ $out .= $contextNode->firstChild->value;
+ }
+ } elseif ( $contextNode->name === 'ignore' ) {
+ # Output suppression used by <includeonly> etc.
+ # OT_WIKI will only respect <ignore> in substed templates.
+ # The other output types respect it unless NO_IGNORE is set.
+ # extractSections() sets NO_IGNORE and so never respects it.
+ if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & PPFrame::NO_IGNORE ) ) {
+ $out .= $contextNode->firstChild->value;
+ } else {
+ //$out .= '';
+ }
+ } elseif ( $contextNode->name === 'ext' ) {
+ # Extension tag
+ $bits = $contextNode->splitExt() + array( 'attr' => null, 'inner' => null, 'close' => null );
+ $out .= $this->parser->extensionSubstitution( $bits, $this );
+ } elseif ( $contextNode->name === 'h' ) {
+ # Heading
+ if ( $this->parser->ot['html'] ) {
+ # Expand immediately and insert heading index marker
+ $s = '';
+ for ( $node = $contextNode->firstChild; $node; $node = $node->nextSibling ) {
+ $s .= $this->expand( $node, $flags );
+ }
+
+ $bits = $contextNode->splitHeading();
+ $titleText = $this->title->getPrefixedDBkey();
+ $this->parser->mHeadings[] = array( $titleText, $bits['i'] );
+ $serial = count( $this->parser->mHeadings ) - 1;
+ $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
+ $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] );
+ $this->parser->mStripState->addGeneral( $marker, '' );
+ $out .= $s;
+ } else {
+ # Expand in virtual stack
+ $newIterator = $contextNode->getChildren();
+ }
+ } else {
+ # Generic recursive expansion
+ $newIterator = $contextNode->getChildren();
+ }
+ } else {
+ throw new MWException( __METHOD__.': Invalid parameter type' );
+ }
+
+ if ( $newIterator !== false ) {
+ $outStack[] = '';
+ $iteratorStack[] = $newIterator;
+ $indexStack[] = 0;
+ } elseif ( $iteratorStack[$level] === false ) {
+ // Return accumulated value to parent
+ // With tail recursion
+ while ( $iteratorStack[$level] === false && $level > 0 ) {
+ $outStack[$level - 1] .= $out;
+ array_pop( $outStack );
+ array_pop( $iteratorStack );
+ array_pop( $indexStack );
+ $level--;
+ }
+ }
+ }
+ --$expansionDepth;
+ return $outStack[0];
+ }
+
+ /**
+ * @param $sep
+ * @param $flags
+ * @return string
+ */
+ function implodeWithFlags( $sep, $flags /*, ... */ ) {
+ $args = array_slice( func_get_args(), 2 );
+
+ $first = true;
+ $s = '';
+ foreach ( $args as $root ) {
+ if ( $root instanceof PPNode_HipHop_Array ) {
+ $root = $root->value;
+ }
+ if ( !is_array( $root ) ) {
+ $root = array( $root );
+ }
+ foreach ( $root as $node ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $s .= $sep;
+ }
+ $s .= $this->expand( $node, $flags );
+ }
+ }
+ return $s;
+ }
+
+ /**
+ * Implode with no flags specified
+ * This previously called implodeWithFlags but has now been inlined to reduce stack depth
+ * @return string
+ */
+ function implode( $sep /*, ... */ ) {
+ $args = array_slice( func_get_args(), 1 );
+
+ $first = true;
+ $s = '';
+ foreach ( $args as $root ) {
+ if ( $root instanceof PPNode_HipHop_Array ) {
+ $root = $root->value;
+ }
+ if ( !is_array( $root ) ) {
+ $root = array( $root );
+ }
+ foreach ( $root as $node ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $s .= $sep;
+ }
+ $s .= $this->expand( $node );
+ }
+ }
+ return $s;
+ }
+
+ /**
+ * Makes an object that, when expand()ed, will be the same as one obtained
+ * with implode()
+ *
+ * @return PPNode_HipHop_Array
+ */
+ function virtualImplode( $sep /*, ... */ ) {
+ $args = array_slice( func_get_args(), 1 );
+ $out = array();
+ $first = true;
+
+ foreach ( $args as $root ) {
+ if ( $root instanceof PPNode_HipHop_Array ) {
+ $root = $root->value;
+ }
+ if ( !is_array( $root ) ) {
+ $root = array( $root );
+ }
+ foreach ( $root as $node ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $out[] = $sep;
+ }
+ $out[] = $node;
+ }
+ }
+ return new PPNode_HipHop_Array( $out );
+ }
+
+ /**
+ * Virtual implode with brackets
+ *
+ * @return PPNode_HipHop_Array
+ */
+ function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
+ $args = array_slice( func_get_args(), 3 );
+ $out = array( $start );
+ $first = true;
+
+ foreach ( $args as $root ) {
+ if ( $root instanceof PPNode_HipHop_Array ) {
+ $root = $root->value;
+ }
+ if ( !is_array( $root ) ) {
+ $root = array( $root );
+ }
+ foreach ( $root as $node ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $out[] = $sep;
+ }
+ $out[] = $node;
+ }
+ }
+ $out[] = $end;
+ return new PPNode_HipHop_Array( $out );
+ }
+
+ function __toString() {
+ return 'frame{}';
+ }
+
+ /**
+ * @param $level bool
+ * @return array|bool|String
+ */
+ function getPDBK( $level = false ) {
+ if ( $level === false ) {
+ return $this->title->getPrefixedDBkey();
+ } else {
+ return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
+ }
+ }
+
+ /**
+ * @return array
+ */
+ function getArguments() {
+ return array();
+ }
+
+ /**
+ * @return array
+ */
+ function getNumberedArguments() {
+ return array();
+ }
+
+ /**
+ * @return array
+ */
+ function getNamedArguments() {
+ return array();
+ }
+
+ /**
+ * Returns true if there are no arguments in this frame
+ *
+ * @return bool
+ */
+ function isEmpty() {
+ return true;
+ }
+
+ /**
+ * @param $name
+ * @return bool
+ */
+ function getArgument( $name ) {
+ return false;
+ }
+
+ /**
+ * Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @param $title Title
+ *
+ * @return bool
+ */
+ function loopCheck( $title ) {
+ return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
+ }
+
+ /**
+ * Return true if the frame is a template frame
+ *
+ * @return bool
+ */
+ function isTemplate() {
+ return false;
+ }
+}
+
+/**
+ * Expansion frame with template arguments
+ * @ingroup Parser
+ */
+class PPTemplateFrame_HipHop extends PPFrame_HipHop {
+ var $numberedArgs, $namedArgs, $parent;
+ var $numberedExpansionCache, $namedExpansionCache;
+
+ /**
+ * @param $preprocessor
+ * @param $parent
+ * @param $numberedArgs array
+ * @param $namedArgs array
+ * @param $title Title
+ */
+ function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
+ parent::__construct( $preprocessor );
+
+ $this->parent = $parent;
+ $this->numberedArgs = $numberedArgs;
+ $this->namedArgs = $namedArgs;
+ $this->title = $title;
+ $pdbk = $title ? $title->getPrefixedDBkey() : false;
+ $this->titleCache = $parent->titleCache;
+ $this->titleCache[] = $pdbk;
+ $this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
+ if ( $pdbk !== false ) {
+ $this->loopCheckHash[$pdbk] = true;
+ }
+ $this->depth = $parent->depth + 1;
+ $this->numberedExpansionCache = $this->namedExpansionCache = array();
+ }
+
+ function __toString() {
+ $s = 'tplframe{';
+ $first = true;
+ $args = $this->numberedArgs + $this->namedArgs;
+ foreach ( $args as $name => $value ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $s .= ', ';
+ }
+ $s .= "\"$name\":\"" .
+ str_replace( '"', '\\"', $value->__toString() ) . '"';
+ }
+ $s .= '}';
+ return $s;
+ }
+ /**
+ * Returns true if there are no arguments in this frame
+ *
+ * @return bool
+ */
+ function isEmpty() {
+ return !count( $this->numberedArgs ) && !count( $this->namedArgs );
+ }
+
+ /**
+ * @return array
+ */
+ function getArguments() {
+ $arguments = array();
+ foreach ( array_merge(
+ array_keys($this->numberedArgs),
+ array_keys($this->namedArgs)) as $key ) {
+ $arguments[$key] = $this->getArgument($key);
+ }
+ return $arguments;
+ }
+
+ /**
+ * @return array
+ */
+ function getNumberedArguments() {
+ $arguments = array();
+ foreach ( array_keys($this->numberedArgs) as $key ) {
+ $arguments[$key] = $this->getArgument($key);
+ }
+ return $arguments;
+ }
+
+ /**
+ * @return array
+ */
+ function getNamedArguments() {
+ $arguments = array();
+ foreach ( array_keys($this->namedArgs) as $key ) {
+ $arguments[$key] = $this->getArgument($key);
+ }
+ return $arguments;
+ }
+
+ /**
+ * @param $index
+ * @return array|bool
+ */
+ function getNumberedArgument( $index ) {
+ if ( !isset( $this->numberedArgs[$index] ) ) {
+ return false;
+ }
+ if ( !isset( $this->numberedExpansionCache[$index] ) ) {
+ # No trimming for unnamed arguments
+ $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], PPFrame::STRIP_COMMENTS );
+ }
+ return $this->numberedExpansionCache[$index];
+ }
+
+ /**
+ * @param $name
+ * @return bool
+ */
+ function getNamedArgument( $name ) {
+ if ( !isset( $this->namedArgs[$name] ) ) {
+ return false;
+ }
+ if ( !isset( $this->namedExpansionCache[$name] ) ) {
+ # Trim named arguments post-expand, for backwards compatibility
+ $this->namedExpansionCache[$name] = trim(
+ $this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) );
+ }
+ return $this->namedExpansionCache[$name];
+ }
+
+ /**
+ * @param $name
+ * @return array|bool
+ */
+ function getArgument( $name ) {
+ $text = $this->getNumberedArgument( $name );
+ if ( $text === false ) {
+ $text = $this->getNamedArgument( $name );
+ }
+ return $text;
+ }
+
+ /**
+ * Return true if the frame is a template frame
+ *
+ * @return bool
+ */
+ function isTemplate() {
+ return true;
+ }
+}
+
+/**
+ * Expansion frame with custom arguments
+ * @ingroup Parser
+ */
+class PPCustomFrame_HipHop extends PPFrame_HipHop {
+ var $args;
+
+ function __construct( $preprocessor, $args ) {
+ parent::__construct( $preprocessor );
+ $this->args = $args;
+ }
+
+ function __toString() {
+ $s = 'cstmframe{';
+ $first = true;
+ foreach ( $this->args as $name => $value ) {
+ if ( $first ) {
+ $first = false;
+ } else {
+ $s .= ', ';
+ }
+ $s .= "\"$name\":\"" .
+ str_replace( '"', '\\"', $value->__toString() ) . '"';
+ }
+ $s .= '}';
+ return $s;
+ }
+
+ /**
+ * @return bool
+ */
+ function isEmpty() {
+ return !count( $this->args );
+ }
+
+ /**
+ * @param $index
+ * @return bool
+ */
+ function getArgument( $index ) {
+ if ( !isset( $this->args[$index] ) ) {
+ return false;
+ }
+ return $this->args[$index];
+ }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPNode_HipHop_Tree implements PPNode {
+ var $name, $firstChild, $lastChild, $nextSibling;
+
+ function __construct( $name ) {
+ $this->name = $name;
+ $this->firstChild = $this->lastChild = $this->nextSibling = false;
+ }
+
+ function __toString() {
+ $inner = '';
+ $attribs = '';
+ for ( $node = $this->firstChild; $node; $node = $node->nextSibling ) {
+ if ( $node instanceof PPNode_HipHop_Attr ) {
+ $attribs .= ' ' . $node->name . '="' . htmlspecialchars( $node->value ) . '"';
+ } else {
+ $inner .= $node->__toString();
+ }
+ }
+ if ( $inner === '' ) {
+ return "<{$this->name}$attribs/>";
+ } else {
+ return "<{$this->name}$attribs>$inner</{$this->name}>";
+ }
+ }
+
+ /**
+ * @param $name
+ * @param $text
+ * @return PPNode_HipHop_Tree
+ */
+ static function newWithText( $name, $text ) {
+ $obj = new self( $name );
+ $obj->addChild( new PPNode_HipHop_Text( $text ) );
+ return $obj;
+ }
+
+ function addChild( $node ) {
+ if ( $this->lastChild === false ) {
+ $this->firstChild = $this->lastChild = $node;
+ } else {
+ $this->lastChild->nextSibling = $node;
+ $this->lastChild = $node;
+ }
+ }
+
+ /**
+ * @return PPNode_HipHop_Array
+ */
+ function getChildren() {
+ $children = array();
+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
+ $children[] = $child;
+ }
+ return new PPNode_HipHop_Array( $children );
+ }
+
+ function getFirstChild() {
+ return $this->firstChild;
+ }
+
+ function getNextSibling() {
+ return $this->nextSibling;
+ }
+
+ function getChildrenOfType( $name ) {
+ $children = array();
+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
+ if ( isset( $child->name ) && $child->name === $name ) {
+ $children[] = $name;
+ }
+ }
+ return $children;
+ }
+
+ /**
+ * @return bool
+ */
+ function getLength() {
+ return false;
+ }
+
+ /**
+ * @param $i
+ * @return bool
+ */
+ function item( $i ) {
+ return false;
+ }
+
+ /**
+ * @return string
+ */
+ function getName() {
+ return $this->name;
+ }
+
+ /**
+ * Split a <part> node into an associative array containing:
+ * name PPNode name
+ * index String index
+ * value PPNode value
+ *
+ * @return array
+ */
+ function splitArg() {
+ $bits = array();
+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
+ if ( !isset( $child->name ) ) {
+ continue;
+ }
+ if ( $child->name === 'name' ) {
+ $bits['name'] = $child;
+ if ( $child->firstChild instanceof PPNode_HipHop_Attr
+ && $child->firstChild->name === 'index' )
+ {
+ $bits['index'] = $child->firstChild->value;
+ }
+ } elseif ( $child->name === 'value' ) {
+ $bits['value'] = $child;
+ }
+ }
+
+ if ( !isset( $bits['name'] ) ) {
+ throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
+ }
+ if ( !isset( $bits['index'] ) ) {
+ $bits['index'] = '';
+ }
+ return $bits;
+ }
+
+ /**
+ * Split an <ext> node into an associative array containing name, attr, inner and close
+ * All values in the resulting array are PPNodes. Inner and close are optional.
+ *
+ * @return array
+ */
+ function splitExt() {
+ $bits = array();
+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
+ if ( !isset( $child->name ) ) {
+ continue;
+ }
+ if ( $child->name === 'name' ) {
+ $bits['name'] = $child;
+ } elseif ( $child->name === 'attr' ) {
+ $bits['attr'] = $child;
+ } elseif ( $child->name === 'inner' ) {
+ $bits['inner'] = $child;
+ } elseif ( $child->name === 'close' ) {
+ $bits['close'] = $child;
+ }
+ }
+ if ( !isset( $bits['name'] ) ) {
+ throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
+ }
+ return $bits;
+ }
+
+ /**
+ * Split an <h> node
+ *
+ * @return array
+ */
+ function splitHeading() {
+ if ( $this->name !== 'h' ) {
+ throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
+ }
+ $bits = array();
+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
+ if ( !isset( $child->name ) ) {
+ continue;
+ }
+ if ( $child->name === 'i' ) {
+ $bits['i'] = $child->value;
+ } elseif ( $child->name === 'level' ) {
+ $bits['level'] = $child->value;
+ }
+ }
+ if ( !isset( $bits['i'] ) ) {
+ throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
+ }
+ return $bits;
+ }
+
+ /**
+ * Split a <template> or <tplarg> node
+ *
+ * @return array
+ */
+ function splitTemplate() {
+ $parts = array();
+ $bits = array( 'lineStart' => '' );
+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
+ if ( !isset( $child->name ) ) {
+ continue;
+ }
+ if ( $child->name === 'title' ) {
+ $bits['title'] = $child;
+ }
+ if ( $child->name === 'part' ) {
+ $parts[] = $child;
+ }
+ if ( $child->name === 'lineStart' ) {
+ $bits['lineStart'] = '1';
+ }
+ }
+ if ( !isset( $bits['title'] ) ) {
+ throw new MWException( 'Invalid node passed to ' . __METHOD__ );
+ }
+ $bits['parts'] = new PPNode_HipHop_Array( $parts );
+ return $bits;
+ }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPNode_HipHop_Text implements PPNode {
+ var $value, $nextSibling;
+
+ function __construct( $value ) {
+ if ( is_object( $value ) ) {
+ throw new MWException( __CLASS__ . ' given object instead of string' );
+ }
+ $this->value = $value;
+ }
+
+ function __toString() {
+ return htmlspecialchars( $this->value );
+ }
+
+ function getNextSibling() {
+ return $this->nextSibling;
+ }
+
+ function getChildren() { return false; }
+ function getFirstChild() { return false; }
+ function getChildrenOfType( $name ) { return false; }
+ function getLength() { return false; }
+ function item( $i ) { return false; }
+ function getName() { return '#text'; }
+ function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); }
+ function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); }
+ function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPNode_HipHop_Array implements PPNode {
+ var $value, $nextSibling;
+
+ function __construct( $value ) {
+ $this->value = $value;
+ }
+
+ function __toString() {
+ return var_export( $this, true );
+ }
+
+ function getLength() {
+ return count( $this->value );
+ }
+
+ function item( $i ) {
+ return $this->value[$i];
+ }
+
+ function getName() { return '#nodelist'; }
+
+ function getNextSibling() {
+ return $this->nextSibling;
+ }
+
+ function getChildren() { return false; }
+ function getFirstChild() { return false; }
+ function getChildrenOfType( $name ) { return false; }
+ function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); }
+ function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); }
+ function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); }
+}
+
+/**
+ * @ingroup Parser
+ */
+class PPNode_HipHop_Attr implements PPNode {
+ var $name, $value, $nextSibling;
+
+ function __construct( $name, $value ) {
+ $this->name = $name;
+ $this->value = $value;
+ }
+
+ function __toString() {
+ return "<@{$this->name}>" . htmlspecialchars( $this->value ) . "</@{$this->name}>";
+ }
+
+ function getName() {
+ return $this->name;
+ }
+
+ function getNextSibling() {
+ return $this->nextSibling;
+ }
+
+ function getChildren() { return false; }
+ function getFirstChild() { return false; }
+ function getChildrenOfType( $name ) { return false; }
+ function getLength() { return false; }
+ function item( $i ) { return false; }
+ function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); }
+ function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); }
+ function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); }
+}
diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php
new file mode 100644
index 00000000..c7bd1e77
--- /dev/null
+++ b/includes/parser/StripState.php
@@ -0,0 +1,175 @@
+<?php
+
+/**
+ * @todo document, briefly.
+ * @ingroup Parser
+ */
+class StripState {
+ protected $prefix;
+ protected $data;
+ protected $regex;
+
+ protected $tempType, $tempMergePrefix;
+
+ function __construct( $prefix ) {
+ $this->prefix = $prefix;
+ $this->data = array(
+ 'nowiki' => array(),
+ 'general' => array()
+ );
+ $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
+ }
+
+ /**
+ * Add a nowiki strip item
+ * @param $marker
+ * @param $value
+ */
+ function addNoWiki( $marker, $value ) {
+ $this->addItem( 'nowiki', $marker, $value );
+ }
+
+ /**
+ * @param $marker
+ * @param $value
+ */
+ function addGeneral( $marker, $value ) {
+ $this->addItem( 'general', $marker, $value );
+ }
+
+ /**
+ * @throws MWException
+ * @param $type
+ * @param $marker
+ * @param $value
+ */
+ protected function addItem( $type, $marker, $value ) {
+ if ( !preg_match( $this->regex, $marker, $m ) ) {
+ throw new MWException( "Invalid marker: $marker" );
+ }
+
+ $this->data[$type][$m[1]] = $value;
+ }
+
+ /**
+ * @param $text
+ * @return mixed
+ */
+ function unstripGeneral( $text ) {
+ return $this->unstripType( 'general', $text );
+ }
+
+ /**
+ * @param $text
+ * @return mixed
+ */
+ function unstripNoWiki( $text ) {
+ return $this->unstripType( 'nowiki', $text );
+ }
+
+ /**
+ * @param $text
+ * @return mixed
+ */
+ function unstripBoth( $text ) {
+ $text = $this->unstripType( 'general', $text );
+ $text = $this->unstripType( 'nowiki', $text );
+ return $text;
+ }
+
+ /**
+ * @param $type
+ * @param $text
+ * @return mixed
+ */
+ protected function unstripType( $type, $text ) {
+ // Shortcut
+ if ( !count( $this->data[$type] ) ) {
+ return $text;
+ }
+
+ wfProfileIn( __METHOD__ );
+ $this->tempType = $type;
+ $out = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
+ $this->tempType = null;
+ wfProfileOut( __METHOD__ );
+ return $out;
+ }
+
+ /**
+ * @param $m array
+ * @return array
+ */
+ protected function unstripCallback( $m ) {
+ if ( isset( $this->data[$this->tempType][$m[1]] ) ) {
+ return $this->data[$this->tempType][$m[1]];
+ } else {
+ return $m[0];
+ }
+ }
+
+ /**
+ * Get a StripState object which is sufficient to unstrip the given text.
+ * It will contain the minimum subset of strip items necessary.
+ *
+ * @param $text string
+ *
+ * @return StripState
+ */
+ function getSubState( $text ) {
+ $subState = new StripState( $this->prefix );
+ $pos = 0;
+ while ( true ) {
+ $startPos = strpos( $text, $this->prefix, $pos );
+ $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos );
+ if ( $startPos === false || $endPos === false ) {
+ break;
+ }
+
+ $endPos += strlen( Parser::MARKER_SUFFIX );
+ $marker = substr( $text, $startPos, $endPos - $startPos );
+ if ( !preg_match( $this->regex, $marker, $m ) ) {
+ continue;
+ }
+
+ $key = $m[1];
+ if ( isset( $this->data['nowiki'][$key] ) ) {
+ $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
+ } elseif ( isset( $this->data['general'][$key] ) ) {
+ $subState->data['general'][$key] = $this->data['general'][$key];
+ }
+ $pos = $endPos;
+ }
+ return $subState;
+ }
+
+ /**
+ * Merge another StripState object into this one. The strip marker keys
+ * will not be preserved. The strings in the $texts array will have their
+ * strip markers rewritten, the resulting array of strings will be returned.
+ *
+ * @param $otherState StripState
+ * @param $texts Array
+ * @return Array
+ */
+ function merge( $otherState, $texts ) {
+ $mergePrefix = Parser::getRandomString();
+
+ foreach ( $otherState->data as $type => $items ) {
+ foreach ( $items as $key => $value ) {
+ $this->data[$type]["$mergePrefix-$key"] = $value;
+ }
+ }
+
+ $this->tempMergePrefix = $mergePrefix;
+ $texts = preg_replace_callback( $otherState->regex, array( $this, 'mergeCallback' ), $texts );
+ $this->tempMergePrefix = null;
+ return $texts;
+ }
+
+ protected function mergeCallback( $m ) {
+ $key = $m[1];
+ return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
+ }
+}
+
diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php
index 38f22fd8..3a6d3e9c 100644
--- a/includes/parser/Tidy.php
+++ b/includes/parser/Tidy.php
@@ -6,6 +6,74 @@
*/
/**
+ * Class used to hide mw:editsection tokens from Tidy so that it doesn't break them
+ * or break on them. This is a bit of a hack for now, but hopefully in the future
+ * we may create a real postprocessor or something that will replace this.
+ * It's called wrapper because for now it basically takes over MWTidy::tidy's task
+ * of wrapping the text in a xhtml block
+ *
+ * This re-uses some of the parser's UNIQ tricks, though some of it is private so it's
+ * duplicated. Perhaps we should create an abstract marker hiding class.
+ */
+class MWTidyWrapper {
+
+ /**
+ * @var ReplacementArray
+ */
+ protected $mTokens;
+
+ protected $mUniqPrefix;
+
+ protected $mMarkerIndex;
+
+ public function __construct() {
+ $this->mTokens = null;
+ $this->mUniqPrefix = null;
+ }
+
+ /**
+ * @param $text string
+ * @return string
+ */
+ public function getWrapped( $text ) {
+ $this->mTokens = new ReplacementArray;
+ $this->mUniqPrefix = "\x7fUNIQ" .
+ dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
+ $this->mMarkerIndex = 0;
+
+ $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
+ array( &$this, 'replaceEditSectionLinksCallback' ), $text );
+
+ $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+ ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+ '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
+
+ return $wrappedtext;
+ }
+
+ /**
+ * @param $m array
+ *
+ * @return string
+ */
+ function replaceEditSectionLinksCallback( $m ) {
+ $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
+ $this->mMarkerIndex++;
+ $this->mTokens->setPair( $marker, $m[0] );
+ return $marker;
+ }
+
+ /**
+ * @param $text string
+ * @return string
+ */
+ public function postprocess( $text ) {
+ return $this->mTokens->replace( $text );
+ }
+
+}
+
+/**
* Class to interact with HTML tidy
*
* Either the external tidy program or the in-process tidy extension
@@ -15,7 +83,6 @@
* @ingroup Parser
*/
class MWTidy {
-
/**
* Interface with html tidy, used if $wgUseTidy = true.
* If tidy isn't able to correct the markup, the original will be
@@ -27,20 +94,26 @@ class MWTidy {
public static function tidy( $text ) {
global $wgTidyInternal;
- $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
-' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
-'<head><title>test</title></head><body>'.$text.'</body></html>';
+ $wrapper = new MWTidyWrapper;
+ $wrappedtext = $wrapper->getWrapped( $text );
- if( $wgTidyInternal ) {
- $correctedtext = self::execInternalTidy( $wrappedtext );
+ $retVal = null;
+ if ( $wgTidyInternal ) {
+ $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal );
} else {
- $correctedtext = self::execExternalTidy( $wrappedtext );
+ $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal );
}
- if( is_null( $correctedtext ) ) {
+
+ if ( $retVal < 0 ) {
+ wfDebug( "Possible tidy configuration error!\n" );
+ return $text . "\n<!-- Tidy was unable to run -->\n";
+ } elseif ( is_null( $correctedtext ) ) {
wfDebug( "Tidy error detected!\n" );
return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
}
+ $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens
+
return $correctedtext;
}
@@ -60,6 +133,7 @@ class MWTidy {
} else {
$errorStr = self::execExternalTidy( $text, true, $retval );
}
+
return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
}
@@ -68,7 +142,7 @@ class MWTidy {
* Also called in OutputHandler.php for full page validation
*
* @param $text String: HTML to check
- * @param $stderr Boolean: Whether to read from STDERR rather than STDOUT
+ * @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT
* @param &$retval Exit code (-1 on internal error)
* @return mixed String or null
*/
@@ -79,7 +153,7 @@ class MWTidy {
$cleansource = '';
$opts = ' -utf8';
- if( $stderr ) {
+ if ( $stderr ) {
$descriptorspec = array(
0 => array( 'pipe', 'r' ),
1 => array( 'file', wfGetNull(), 'a' ),
@@ -96,73 +170,84 @@ class MWTidy {
$readpipe = $stderr ? 2 : 1;
$pipes = array();
- if( function_exists( 'proc_open' ) ) {
- $process = proc_open( "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
- if ( is_resource( $process ) ) {
- // Theoretically, this style of communication could cause a deadlock
- // here. If the stdout buffer fills up, then writes to stdin could
- // block. This doesn't appear to happen with tidy, because tidy only
- // writes to stdout after it's finished reading from stdin. Search
- // for tidyParseStdin and tidySaveStdout in console/tidy.c
- fwrite( $pipes[0], $text );
- fclose( $pipes[0] );
- while ( !feof( $pipes[$readpipe] ) ) {
- $cleansource .= fgets( $pipes[$readpipe], 1024 );
- }
- fclose( $pipes[$readpipe] );
- $retval = proc_close( $process );
- } else {
- $retval = -1;
+ $process = proc_open(
+ "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
+
+ if ( is_resource( $process ) ) {
+ // Theoretically, this style of communication could cause a deadlock
+ // here. If the stdout buffer fills up, then writes to stdin could
+ // block. This doesn't appear to happen with tidy, because tidy only
+ // writes to stdout after it's finished reading from stdin. Search
+ // for tidyParseStdin and tidySaveStdout in console/tidy.c
+ fwrite( $pipes[0], $text );
+ fclose( $pipes[0] );
+ while ( !feof( $pipes[$readpipe] ) ) {
+ $cleansource .= fgets( $pipes[$readpipe], 1024 );
}
+ fclose( $pipes[$readpipe] );
+ $retval = proc_close( $process );
} else {
- $retval = -1;
+ wfWarn( "Unable to start external tidy process" );
+ $retval = -1;
}
- if( !$stderr && $cleansource == '' && $text != '' ) {
+ if ( !$stderr && $cleansource == '' && $text != '' ) {
// Some kind of error happened, so we couldn't get the corrected text.
// Just give up; we'll use the source text and append a warning.
$cleansource = null;
}
+
wfProfileOut( __METHOD__ );
return $cleansource;
}
/**
- * Use the HTML tidy PECL extension to use the tidy library in-process,
+ * Use the HTML tidy extension to use the tidy library in-process,
* saving the overhead of spawning a new process.
*
- * 'pear install tidy' should be able to compile the extension module.
+ * @param $text String: HTML to check
+ * @param $stderr Boolean: Whether to read result from error status instead of output
+ * @param &$retval Exit code (-1 on internal error)
+ * @return mixed String or null
*/
private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
global $wgTidyConf, $wgDebugTidy;
wfProfileIn( __METHOD__ );
+ if ( !MWInit::classExists( 'tidy' ) ) {
+ wfWarn( "Unable to load internal tidy class." );
+ $retval = -1;
+
+ wfProfileOut( __METHOD__ );
+ return null;
+ }
+
$tidy = new tidy;
$tidy->parseString( $text, $wgTidyConf, 'utf8' );
- if( $stderr ) {
+ if ( $stderr ) {
$retval = $tidy->getStatus();
+
wfProfileOut( __METHOD__ );
return $tidy->errorBuffer;
} else {
$tidy->cleanRepair();
$retval = $tidy->getStatus();
- if( $retval == 2 ) {
+ if ( $retval == 2 ) {
// 2 is magic number for fatal error
// http://www.php.net/manual/en/function.tidy-get-status.php
$cleansource = null;
} else {
$cleansource = tidy_get_output( $tidy );
- }
- if ( $wgDebugTidy && $retval > 0 ) {
- $cleansource .= "<!--\nTidy reports:\n" .
- str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
- "\n-->";
+ if ( $wgDebugTidy && $retval > 0 ) {
+ $cleansource .= "<!--\nTidy reports:\n" .
+ str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
+ "\n-->";
+ }
}
wfProfileOut( __METHOD__ );
return $cleansource;
}
}
-
}