summaryrefslogtreecommitdiff
path: root/includes/parser
diff options
context:
space:
mode:
Diffstat (limited to 'includes/parser')
-rw-r--r--includes/parser/CoreParserFunctions.php231
-rw-r--r--includes/parser/DateFormatter.php122
-rw-r--r--includes/parser/Parser.php360
-rw-r--r--includes/parser/ParserCache.php26
-rw-r--r--includes/parser/ParserOptions.php17
-rw-r--r--includes/parser/ParserOutput.php10
-rw-r--r--includes/parser/Preprocessor_DOM.php82
-rw-r--r--includes/parser/Preprocessor_Hash.php41
-rw-r--r--includes/parser/Tidy.php170
9 files changed, 824 insertions, 235 deletions
diff --git a/includes/parser/CoreParserFunctions.php b/includes/parser/CoreParserFunctions.php
index a3b5189a..774e96a7 100644
--- a/includes/parser/CoreParserFunctions.php
+++ b/includes/parser/CoreParserFunctions.php
@@ -27,9 +27,11 @@ class CoreParserFunctions {
$parser->setFunctionHook( 'fullurle', array( __CLASS__, 'fullurle' ), SFH_NO_HASH );
$parser->setFunctionHook( 'formatnum', array( __CLASS__, 'formatnum' ), SFH_NO_HASH );
$parser->setFunctionHook( 'grammar', array( __CLASS__, 'grammar' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'gender', array( __CLASS__, 'gender' ), SFH_NO_HASH );
$parser->setFunctionHook( 'plural', array( __CLASS__, 'plural' ), SFH_NO_HASH );
$parser->setFunctionHook( 'numberofpages', array( __CLASS__, 'numberofpages' ), SFH_NO_HASH );
$parser->setFunctionHook( 'numberofusers', array( __CLASS__, 'numberofusers' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'numberofactiveusers', array( __CLASS__, 'numberofactiveusers' ), SFH_NO_HASH );
$parser->setFunctionHook( 'numberofarticles', array( __CLASS__, 'numberofarticles' ), SFH_NO_HASH );
$parser->setFunctionHook( 'numberoffiles', array( __CLASS__, 'numberoffiles' ), SFH_NO_HASH );
$parser->setFunctionHook( 'numberofadmins', array( __CLASS__, 'numberofadmins' ), SFH_NO_HASH );
@@ -45,7 +47,27 @@ class CoreParserFunctions {
$parser->setFunctionHook( 'filepath', array( __CLASS__, 'filepath' ), SFH_NO_HASH );
$parser->setFunctionHook( 'pagesincategory', array( __CLASS__, 'pagesincategory' ), SFH_NO_HASH );
$parser->setFunctionHook( 'pagesize', array( __CLASS__, 'pagesize' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'protectionlevel', array( __CLASS__, 'protectionlevel' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'namespace', array( __CLASS__, 'mwnamespace' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'namespacee', array( __CLASS__, 'namespacee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'talkspace', array( __CLASS__, 'talkspace' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'talkspacee', array( __CLASS__, 'talkspacee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'subjectspace', array( __CLASS__, 'subjectspace' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'subjectspacee', array( __CLASS__, 'subjectspacee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'pagename', array( __CLASS__, 'pagename' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'pagenamee', array( __CLASS__, 'pagenamee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'fullpagename', array( __CLASS__, 'fullpagename' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'fullpagenamee', array( __CLASS__, 'fullpagenamee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'basepagename', array( __CLASS__, 'basepagename' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'basepagenamee', array( __CLASS__, 'basepagenamee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'subpagename', array( __CLASS__, 'subpagename' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'subpagenamee', array( __CLASS__, 'subpagenamee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'talkpagename', array( __CLASS__, 'talkpagename' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'talkpagenamee', array( __CLASS__, 'talkpagenamee' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'subjectpagename', array( __CLASS__, 'subjectpagename' ), SFH_NO_HASH );
+ $parser->setFunctionHook( 'subjectpagenamee', array( __CLASS__, 'subjectpagenamee' ), SFH_NO_HASH );
$parser->setFunctionHook( 'tag', array( __CLASS__, 'tagObj' ), SFH_OBJECT_ARGS );
+ $parser->setFunctionHook( 'formatdate', array( __CLASS__, 'formatDate' ) );
if ( $wgAllowDisplayTitle ) {
$parser->setFunctionHook( 'displaytitle', array( __CLASS__, 'displaytitle' ), SFH_NO_HASH );
@@ -66,6 +88,22 @@ class CoreParserFunctions {
return array( 'found' => false );
}
}
+
+ static function formatDate( $parser, $date, $defaultPref = null ) {
+ $df = DateFormatter::getInstance();
+
+ $date = trim($date);
+
+ $pref = $parser->mOptions->getDateFormat();
+
+ // Specify a different default date format other than the the normal default
+ // iff the user has 'default' for their setting
+ if ($pref == 'default' && $defaultPref)
+ $pref = $defaultPref;
+
+ $date = $df->reformat( $pref, $date, array('match-whole') );
+ return $date;
+ }
static function ns( $parser, $part1 = '' ) {
global $wgContLang;
@@ -154,6 +192,28 @@ class CoreParserFunctions {
return $parser->getFunctionLang()->convertGrammar( $word, $case );
}
+ static function gender( $parser, $user ) {
+ $forms = array_slice( func_get_args(), 2);
+
+ // default
+ $gender = User::getDefaultOption( 'gender' );
+
+ // allow prefix.
+ $title = Title::newFromText( $user );
+
+ if (is_object( $title ) && $title->getNamespace() == NS_USER)
+ $user = $title->getText();
+
+ // check parameter, or use $wgUser if in interface message
+ $user = User::newFromName( $user );
+ if ( $user ) {
+ $gender = $user->getOption( 'gender' );
+ } elseif ( $parser->mOptions->getInterfaceMessage() ) {
+ global $wgUser;
+ $gender = $wgUser->getOption( 'gender' );
+ }
+ return $parser->getFunctionLang()->gender( $gender, $forms );
+ }
static function plural( $parser, $text = '') {
$forms = array_slice( func_get_args(), 2);
$text = $parser->getFunctionLang()->parseFormattedNumber( $text );
@@ -208,6 +268,9 @@ class CoreParserFunctions {
static function numberofusers( $parser, $raw = null ) {
return self::formatRaw( SiteStats::users(), $raw );
}
+ static function numberofactiveusers( $parser, $raw = null ) {
+ return self::formatRaw( SiteStats::activeUsers(), $raw );
+ }
static function numberofarticles( $parser, $raw = null ) {
return self::formatRaw( SiteStats::articles(), $raw );
}
@@ -230,6 +293,126 @@ class CoreParserFunctions {
return self::formatRaw( SiteStats::numberingroup( strtolower( $name ) ), $raw );
}
+
+ /**
+ * Given a title, return the namespace name that would be given by the
+ * corresponding magic word
+ * Note: function name changed to "mwnamespace" rather than "namespace"
+ * to not break PHP 5.3
+ */
+ static function mwnamespace( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return str_replace( '_', ' ', $t->getNsText() );
+ }
+ static function namespacee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return wfUrlencode( $t->getNsText() );
+ }
+ static function talkspace( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) || !$t->canTalk() )
+ return '';
+ return str_replace( '_', ' ', $t->getTalkNsText() );
+ }
+ static function talkspacee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) || !$t->canTalk() )
+ return '';
+ return wfUrlencode( $t->getTalkNsText() );
+ }
+ static function subjectspace( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return str_replace( '_', ' ', $t->getSubjectNsText() );
+ }
+ static function subjectspacee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return wfUrlencode( $t->getSubjectNsText() );
+ }
+ /*
+ * Functions to get and normalize pagenames, corresponding to the magic words
+ * of the same names
+ */
+ static function pagename( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return wfEscapeWikiText( $t->getText() );
+ }
+ static function pagenamee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return $t->getPartialURL();
+ }
+ static function fullpagename( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) || !$t->canTalk() )
+ return '';
+ return wfEscapeWikiText( $t->getPrefixedText() );
+ }
+ static function fullpagenamee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) || !$t->canTalk() )
+ return '';
+ return $t->getPrefixedURL();
+ }
+ static function subpagename( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return $t->getSubpageText();
+ }
+ static function subpagenamee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return $t->getSubpageUrlForm();
+ }
+ static function basepagename( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return $t->getBaseText();
+ }
+ static function basepagenamee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return wfUrlEncode( str_replace( ' ', '_', $t->getBaseText() ) );
+ }
+ static function talkpagename( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) || !$t->canTalk() )
+ return '';
+ return wfEscapeWikiText( $t->getTalkPage()->getPrefixedText() );
+ }
+ static function talkpagenamee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) || !$t->canTalk() )
+ return '';
+ return $t->getTalkPage()->getPrefixedUrl();
+ }
+ static function subjectpagename( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return wfEscapeWikiText( $t->getSubjectPage()->getPrefixedText() );
+ }
+ static function subjectpagenamee( $parser, $title = null ) {
+ $t = Title::newFromText( $title );
+ if ( is_null($t) )
+ return '';
+ return $t->getSubjectPage()->getPrefixedUrl();
+ }
+
/**
* Return the number of pages in the given category, or 0 if it's nonexis-
* tent. This is an expensive parser function and can't be called too many
@@ -292,6 +475,16 @@ class CoreParserFunctions {
}
return self::formatRaw( $length, $raw );
}
+
+ /**
+ * Returns the requested protection level for the current page
+ */
+ static function protectionlevel( $parser, $type = '' ) {
+ $restrictions = $parser->mTitle->getRestrictions( strtolower( $type ) );
+ # Title::getRestrictions returns an array, its possible it may have
+ # multiple values in the future
+ return implode( $restrictions, ',' );
+ }
static function language( $parser, $arg = '' ) {
global $wgContLang;
@@ -299,20 +492,38 @@ class CoreParserFunctions {
return $lang != '' ? $lang : $arg;
}
- static function pad( $string = '', $length = 0, $char = 0, $direction = STR_PAD_RIGHT ) {
- $length = min( max( $length, 0 ), 500 );
- $char = substr( $char, 0, 1 );
- return ( $string !== '' && (int)$length > 0 && strlen( trim( (string)$char ) ) > 0 )
- ? str_pad( $string, $length, (string)$char, $direction )
- : $string;
+ /**
+ * Unicode-safe str_pad with the restriction that $length is forced to be <= 500
+ */
+ static function pad( $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
+ $lengthOfPadding = mb_strlen( $padding );
+ if ( $lengthOfPadding == 0 ) return $string;
+
+ # The remaining length to add counts down to 0 as padding is added
+ $length = min( $length, 500 ) - mb_strlen( $string );
+ # $finalPadding is just $padding repeated enough times so that
+ # mb_strlen( $string ) + mb_strlen( $finalPadding ) == $length
+ $finalPadding = '';
+ while ( $length > 0 ) {
+ # If $length < $lengthofPadding, truncate $padding so we get the
+ # exact length desired.
+ $finalPadding .= mb_substr( $padding, 0, $length );
+ $length -= $lengthOfPadding;
+ }
+
+ if ( $direction == STR_PAD_LEFT ) {
+ return $finalPadding . $string;
+ } else {
+ return $string . $finalPadding;
+ }
}
- static function padleft( $parser, $string = '', $length = 0, $char = 0 ) {
- return self::pad( $string, $length, $char, STR_PAD_LEFT );
+ static function padleft( $parser, $string = '', $length = 0, $padding = '0' ) {
+ return self::pad( $string, $length, $padding, STR_PAD_LEFT );
}
- static function padright( $parser, $string = '', $length = 0, $char = 0 ) {
- return self::pad( $string, $length, $char );
+ static function padright( $parser, $string = '', $length = 0, $padding = '0' ) {
+ return self::pad( $string, $length, $padding );
}
static function anchorencode( $parser, $text ) {
diff --git a/includes/parser/DateFormatter.php b/includes/parser/DateFormatter.php
index 9ef11d5e..aa6415e4 100644
--- a/includes/parser/DateFormatter.php
+++ b/includes/parser/DateFormatter.php
@@ -41,11 +41,11 @@ class DateFormatter
$this->regexTrail = '(?![a-z])/iu';
# Partial regular expressions
- $this->prxDM = '\[\[(\d{1,2})[ _](' . $this->monthNames . ')]]';
- $this->prxMD = '\[\[(' . $this->monthNames . ')[ _](\d{1,2})]]';
- $this->prxY = '\[\[(\d{1,4}([ _]BC|))]]';
- $this->prxISO1 = '\[\[(-?\d{4})]]-\[\[(\d{2})-(\d{2})]]';
- $this->prxISO2 = '\[\[(-?\d{4})-(\d{2})-(\d{2})]]';
+ $this->prxDM = '\[\[(\d{1,2})[ _](' . $this->monthNames . ')\]\]';
+ $this->prxMD = '\[\[(' . $this->monthNames . ')[ _](\d{1,2})\]\]';
+ $this->prxY = '\[\[(\d{1,4}([ _]BC|))\]\]';
+ $this->prxISO1 = '\[\[(-?\d{4})]]-\[\[(\d{2})-(\d{2})\]\]';
+ $this->prxISO2 = '\[\[(-?\d{4})-(\d{2})-(\d{2})\]\]';
# Real regular expressions
$this->regexes[self::DMY] = "/{$this->prxDM} *,? *{$this->prxY}{$this->regexTrail}";
@@ -96,9 +96,11 @@ class DateFormatter
}
/**
- * @static
+ * Get a DateFormatter object
+ *
+ * @return DateFormatter object
*/
- function &getInstance() {
+ public static function &getInstance() {
global $wgMemc;
static $dateFormatter = false;
if ( !$dateFormatter ) {
@@ -112,10 +114,14 @@ class DateFormatter
}
/**
- * @param string $preference User preference
- * @param string $text Text to reformat
+ * @param $preference String: User preference
+ * @param $text String: Text to reformat
*/
- function reformat( $preference, $text ) {
+ function reformat( $preference, $text, $options = array('linked') ) {
+
+ $linked = in_array( 'linked', $options );
+ $match_whole = in_array( 'match-whole', $options );
+
if ( isset( $this->preferences[$preference] ) ) {
$preference = $this->preferences[$preference];
} else {
@@ -136,7 +142,24 @@ class DateFormatter
# Default
$this->mTarget = $i;
}
- $text = preg_replace_callback( $this->regexes[$i], array( &$this, 'replace' ), $text );
+ $regex = $this->regexes[$i];
+
+ // Horrible hack
+ if (!$linked) {
+ $regex = str_replace( array( '\[\[', '\]\]' ), '', $regex );
+ }
+
+ if ($match_whole) {
+ // Let's hope this works
+ $regex = preg_replace( '!^/!', '/^', $regex );
+ $regex = str_replace( $this->regexTrail,
+ '$'.$this->regexTrail, $regex );
+ }
+
+ // Another horrible hack
+ $this->mLinked = $linked;
+ $text = preg_replace_callback( $regex, array( &$this, 'replace' ), $text );
+ unset($this->mLinked);
}
return $text;
}
@@ -146,6 +169,10 @@ class DateFormatter
*/
function replace( $matches ) {
# Extract information from $matches
+ $linked = true;
+ if ( isset( $this->mLinked ) )
+ $linked = $this->mLinked;
+
$bits = array();
$key = $this->keys[$this->mSource];
for ( $p=0; $p < strlen($key); $p++ ) {
@@ -153,41 +180,54 @@ class DateFormatter
$bits[$key{$p}] = $matches[$p+1];
}
}
-
+
+ return $this->formatDate( $bits, $linked );
+ }
+
+ function formatDate( $bits, $link = true ) {
$format = $this->targets[$this->mTarget];
+
+ if (!$link) {
+ // strip piped links
+ $format = preg_replace( '/\[\[[^|]+\|([^\]]+)\]\]/', '$1', $format );
+ // strip remaining links
+ $format = str_replace( array( '[[', ']]' ), '', $format );
+ }
# Construct new date
$text = '';
$fail = false;
+
+ // Pre-generate y/Y stuff because we need the year for the <span> title.
+ if ( !isset( $bits['y'] ) && isset( $bits['Y'] ) )
+ $bits['y'] = $this->makeIsoYear( $bits['Y'] );
+ if ( !isset( $bits['Y'] ) && isset( $bits['y'] ) )
+ $bits['Y'] = $this->makeNormalYear( $bits['y'] );
+
+ if ( !isset( $bits['m'] ) ) {
+ $m = $this->makeIsoMonth( $bits['F'] );
+ if ( !$m || $m == '00' ) {
+ $fail = true;
+ } else {
+ $bits['m'] = $m;
+ }
+ }
+
+ if ( !isset($bits['d']) ) {
+ $bits['d'] = sprintf( '%02d', $bits['j'] );
+ }
for ( $p=0; $p < strlen( $format ); $p++ ) {
$char = $format{$p};
switch ( $char ) {
case 'd': # ISO day of month
- if ( !isset($bits['d']) ) {
- $text .= sprintf( '%02d', $bits['j'] );
- } else {
- $text .= $bits['d'];
- }
+ $text .= $bits['d'];
break;
case 'm': # ISO month
- if ( !isset($bits['m']) ) {
- $m = $this->makeIsoMonth( $bits['F'] );
- if ( !$m || $m == '00' ) {
- $fail = true;
- } else {
- $text .= $m;
- }
- } else {
- $text .= $bits['m'];
- }
+ $text .= $bits['m'];
break;
case 'y': # ISO year
- if ( !isset( $bits['y'] ) ) {
- $text .= $this->makeIsoYear( $bits['Y'] );
- } else {
- $text .= $bits['y'];
- }
+ $text .= $bits['y'];
break;
case 'j': # ordinary day of month
if ( !isset($bits['j']) ) {
@@ -210,11 +250,7 @@ class DateFormatter
}
break;
case 'Y': # ordinary (optional BC) year
- if ( !isset( $bits['Y'] ) ) {
- $text .= $this->makeNormalYear( $bits['y'] );
- } else {
- $text .= $bits['Y'];
- }
+ $text .= $bits['Y'];
break;
default:
$text .= $char;
@@ -223,6 +259,18 @@ class DateFormatter
if ( $fail ) {
$text = $matches[0];
}
+
+ $isoBits = array();
+ if ( isset($bits['y']) )
+ $isoBits[] = $bits['y'];
+ $isoBits[] = $bits['m'];
+ $isoBits[] = $bits['d'];
+ $isoDate = implode( '-', $isoBits );;
+
+ // Output is not strictly HTML (it's wikitext), but <span> is whitelisted.
+ $text = Xml::tags( 'span',
+ array( 'class' => 'mw-formatted-date', 'title' => $isoDate ), $text );
+
return $text;
}
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 7fcfb90a..e6a68782 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -374,8 +374,8 @@ class Parser
$text = Sanitizer::normalizeCharReferences( $text );
- if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
- $text = self::tidy($text);
+ if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) {
+ $text = MWTidy::tidy( $text );
} else {
# attempt to sanitize at least some nesting problems
# (bug #2702 and quite a few others)
@@ -648,126 +648,14 @@ class Parser
$this->mStripState->general->setPair( $rnd, $text );
return $rnd;
}
-
- /**
- * Interface with html tidy, used if $wgUseTidy = true.
- * If tidy isn't able to correct the markup, the original will be
- * returned in all its glory with a warning comment appended.
- *
- * Either the external tidy program or the in-process tidy extension
- * will be used depending on availability. Override the default
- * $wgTidyInternal setting to disable the internal if it's not working.
- *
- * @param string $text Hideous HTML input
- * @return string Corrected HTML output
- * @public
- * @static
- */
- function tidy( $text ) {
- global $wgTidyInternal;
-
- $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
-' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
-'<head><title>test</title></head><body>'.$text.'</body></html>';
-
- # Tidy is known to clobber tabs; convert 'em to entities
- $wrappedtext = str_replace("\t", '&#9;', $wrappedtext);
-
- if( $wgTidyInternal ) {
- $correctedtext = self::internalTidy( $wrappedtext );
- } else {
- $correctedtext = self::externalTidy( $wrappedtext );
- }
- if( is_null( $correctedtext ) ) {
- wfDebug( "Tidy error detected!\n" );
- return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
- }
-
- # Convert the tabs back from entities
- $correctedtext = str_replace('&#9;', "\t", $correctedtext);
-
- return $correctedtext;
- }
-
- /**
- * Spawn an external HTML tidy process and get corrected markup back from it.
- *
- * @private
- * @static
- */
- function externalTidy( $text ) {
- global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
- wfProfileIn( __METHOD__ );
-
- $cleansource = '';
- $opts = ' -utf8';
-
- $descriptorspec = array(
- 0 => array('pipe', 'r'),
- 1 => array('pipe', 'w'),
- 2 => array('file', wfGetNull(), 'a')
- );
- $pipes = array();
- if( function_exists('proc_open') ) {
- $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
- if (is_resource($process)) {
- // Theoretically, this style of communication could cause a deadlock
- // here. If the stdout buffer fills up, then writes to stdin could
- // block. This doesn't appear to happen with tidy, because tidy only
- // writes to stdout after it's finished reading from stdin. Search
- // for tidyParseStdin and tidySaveStdout in console/tidy.c
- fwrite($pipes[0], $text);
- fclose($pipes[0]);
- while (!feof($pipes[1])) {
- $cleansource .= fgets($pipes[1], 1024);
- }
- fclose($pipes[1]);
- proc_close($process);
- }
- }
-
- wfProfileOut( __METHOD__ );
-
- if( $cleansource == '' && $text != '') {
- // Some kind of error happened, so we couldn't get the corrected text.
- // Just give up; we'll use the source text and append a warning.
- return null;
- } else {
- return $cleansource;
- }
- }
-
+
/**
- * Use the HTML tidy PECL extension to use the tidy library in-process,
- * saving the overhead of spawning a new process.
- *
- * 'pear install tidy' should be able to compile the extension module.
- *
- * @private
- * @static
+ * Interface with html tidy
+ * @deprecated Use MWTidy::tidy()
*/
- function internalTidy( $text ) {
- global $wgTidyConf, $IP, $wgDebugTidy;
- wfProfileIn( __METHOD__ );
-
- $tidy = new tidy;
- $tidy->parseString( $text, $wgTidyConf, 'utf8' );
- $tidy->cleanRepair();
- if( $tidy->getStatus() == 2 ) {
- // 2 is magic number for fatal error
- // http://www.php.net/manual/en/function.tidy-get-status.php
- $cleansource = null;
- } else {
- $cleansource = tidy_get_output( $tidy );
- }
- if ( $wgDebugTidy && $tidy->getStatus() > 0 ) {
- $cleansource .= "<!--\nTidy reports:\n" .
- str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
- "\n-->";
- }
-
- wfProfileOut( __METHOD__ );
- return $cleansource;
+ public static function tidy( $text ) {
+ wfDeprecated( __METHOD__ );
+ return MWTidy::tidy( $text );
}
/**
@@ -998,7 +886,7 @@ class Parser
$text = $this->doDoubleUnderscore( $text );
$text = $this->doHeadings( $text );
- if($this->mOptions->getUseDynamicDates()) {
+ if( $this->mOptions->getUseDynamicDates() ) {
$df = DateFormatter::getInstance();
$text = $df->reformat( $this->mOptions->getDateFormat(), $text );
}
@@ -1008,7 +896,7 @@ class Parser
# replaceInternalLinks may sometimes leave behind
# absolute URLs, which have to be masked to hide them from replaceExternalLinks
- $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text);
+ $text = str_replace($this->mUniqPrefix.'NOPARSE', '', $text);
$text = $this->doMagicLinks( $text );
$text = $this->formatHeadings( $text, $isMain );
@@ -1045,16 +933,16 @@ class Parser
}
function magicLinkCallback( $m ) {
- if ( isset( $m[1] ) && strval( $m[1] ) !== '' ) {
+ if ( isset( $m[1] ) && $m[1] !== '' ) {
# Skip anchor
return $m[0];
- } elseif ( isset( $m[2] ) && strval( $m[2] ) !== '' ) {
+ } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
# Skip HTML element
return $m[0];
- } elseif ( isset( $m[3] ) && strval( $m[3] ) !== '' ) {
+ } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
# Free external link
return $this->makeFreeExternalLink( $m[0] );
- } elseif ( isset( $m[4] ) && strval( $m[4] ) !== '' ) {
+ } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
# RFC or PMID
if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
$keyword = 'RFC';
@@ -1072,7 +960,7 @@ class Parser
$sk = $this->mOptions->getSkin();
$la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
- } elseif ( isset( $m[5] ) && strval( $m[5] ) !== '' ) {
+ } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
# ISBN
$isbn = $m[5];
$num = strtr( $isbn, array(
@@ -1130,7 +1018,7 @@ class Parser
if ( $text === false ) {
# Not an image, make a link
$text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free',
- $this->getExternalLinkAttribs() );
+ $this->getExternalLinkAttribs( $url ) );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
$pasteurized = self::replaceUnusualEscapes( $url );
@@ -1406,18 +1294,12 @@ class Parser
$url = Sanitizer::cleanUrl( $url );
- if ( $this->mOptions->mExternalLinkTarget ) {
- $attribs = array( 'target' => $this->mOptions->mExternalLinkTarget );
- } else {
- $attribs = array();
- }
-
# Use the encoded URL
# This means that users can paste URLs directly into the text
# Funny characters like &ouml; aren't valid in URLs anyway
# This was changed in August 2004
- $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs() )
- . $dtrail . $trail;
+ $s .= $sk->makeExternalLink( $url, $text, false, $linktype,
+ $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
# Register link in the output object.
# Replace unnecessary URL escape codes with the referenced character
@@ -1430,12 +1312,36 @@ class Parser
return $s;
}
- function getExternalLinkAttribs() {
+ /**
+ * Get an associative array of additional HTML attributes appropriate for a
+ * particular external link. This currently may include rel => nofollow
+ * (depending on configuration, namespace, and the URL's domain) and/or a
+ * target attribute (depending on configuration).
+ *
+ * @param string $url Optional URL, to extract the domain from for rel =>
+ * nofollow if appropriate
+ * @return array Associative array of HTML attributes
+ */
+ function getExternalLinkAttribs( $url = false ) {
$attribs = array();
global $wgNoFollowLinks, $wgNoFollowNsExceptions;
$ns = $this->mTitle->getNamespace();
if( $wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) ) {
$attribs['rel'] = 'nofollow';
+
+ global $wgNoFollowDomainExceptions;
+ if ( $wgNoFollowDomainExceptions ) {
+ $bits = wfParseUrl( $url );
+ if ( is_array( $bits ) && isset( $bits['host'] ) ) {
+ foreach ( $wgNoFollowDomainExceptions as $domain ) {
+ if( substr( $bits['host'], -strlen( $domain ) )
+ == $domain ) {
+ unset( $attribs['rel'] );
+ break;
+ }
+ }
+ }
+ }
}
if ( $this->mOptions->getExternalLinkTarget() ) {
$attribs['target'] = $this->mOptions->getExternalLinkTarget();
@@ -1697,7 +1603,7 @@ class Parser
wfProfileOut( __METHOD__."-misc" );
wfProfileIn( __METHOD__."-title" );
$nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) );
- if( !$nt ) {
+ if( $nt === NULL ) {
$s .= $prefix . '[[' . $line;
wfProfileOut( __METHOD__."-title" );
continue;
@@ -1823,6 +1729,7 @@ class Parser
# NS_MEDIA is a pseudo-namespace for linking directly to a file
# FIXME: Should do batch file existence checks, see comment below
if( $ns == NS_MEDIA ) {
+ wfProfileIn( __METHOD__."-media" );
# Give extensions a chance to select the file revision for us
$skip = $time = false;
wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) );
@@ -1834,9 +1741,11 @@ class Parser
# Cloak with NOPARSE to avoid replacement in replaceExternalLinks
$s .= $prefix . $this->armorLinks( $link ) . $trail;
$this->mOutput->addImage( $nt->getDBkey() );
+ wfProfileOut( __METHOD__."-media" );
continue;
}
+ wfProfileIn( __METHOD__."-always_known" );
# Some titles, such as valid special pages or files in foreign repos, should
# be shown as bluelinks even though they're not included in the page table
#
@@ -1849,6 +1758,7 @@ class Parser
# Links will be added to the output link list after checking
$s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix );
}
+ wfProfileOut( __METHOD__."-always_known" );
}
wfProfileOut( __METHOD__ );
return $holders;
@@ -2178,7 +2088,7 @@ class Parser
$inBlockElem = true;
}
} else if ( !$inBlockElem && !$this->mInPre ) {
- if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
+ if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
// pre
if ($this->mLastSection !== 'pre') {
$paragraphStack = false;
@@ -2540,6 +2450,12 @@ class Parser
$this->mOutput->setFlag( 'vary-revision' );
wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
return $this->getRevisionTimestamp();
+ case 'revisionuser':
+ // Let the edit saving system know we should parse the page
+ // *after* a revision ID has been assigned. This is for null edits.
+ $this->mOutput->setFlag( 'vary-revision' );
+ wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
+ return $this->getRevisionUser();
case 'namespace':
return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) );
case 'namespacee':
@@ -2586,6 +2502,8 @@ class Parser
return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::images() );
case 'numberofusers':
return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::users() );
+ case 'numberofactiveusers':
+ return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::activeUsers() );
case 'numberofpages':
return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() );
case 'numberofadmins':
@@ -2696,11 +2614,10 @@ class Parser
* @private
*/
function replaceVariables( $text, $frame = false, $argsOnly = false ) {
- # Prevent too big inclusions
- if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
+ # Is there any text? Also, Prevent too big inclusions!
+ if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
return $text;
}
-
wfProfileIn( __METHOD__ );
if ( $frame === false ) {
@@ -2776,7 +2693,7 @@ class Parser
* @private
*/
function braceSubstitution( $piece, $frame ) {
- global $wgContLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces;
+ global $wgContLang, $wgNonincludableNamespaces;
wfProfileIn( __METHOD__ );
wfProfileIn( __METHOD__.'-setup' );
@@ -2936,12 +2853,6 @@ class Parser
if($wgContLang->hasVariants() && $title->getArticleID() == 0){
$wgContLang->findVariantLink( $part1, $title, true );
}
- # Do infinite loop check
- if ( !$frame->loopCheck( $title ) ) {
- $found = true;
- $text = '<span class="error">' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . '</span>';
- wfDebug( __METHOD__.": template loop broken at '$titleText'\n" );
- }
# Do recursion depth check
$limit = $this->mOptions->getMaxTemplateDepth();
if ( $frame->depth >= $limit ) {
@@ -2991,6 +2902,14 @@ class Parser
}
$found = true;
}
+
+ # Do infinite loop check
+ # This has to be done after redirect resolution to avoid infinite loops via redirects
+ if ( !$frame->loopCheck( $title ) ) {
+ $found = true;
+ $text = '<span class="error">' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . '</span>';
+ wfDebug( __METHOD__.": template loop broken at '$titleText'\n" );
+ }
wfProfileOut( __METHOD__ . '-loadtpl' );
}
@@ -3304,6 +3223,7 @@ class Parser
throw new MWException( '<html> extension tag encountered unexpectedly' );
}
case 'nowiki':
+ $content = strtr($content, array('-{' => '-&#123;', '}-' => '&#125;-'));
$output = Xml::escapeTagsOnly( $content );
break;
case 'math':
@@ -3387,6 +3307,7 @@ class Parser
* Fills $this->mDoubleUnderscores, returns the modified text
*/
function doDoubleUnderscore( $text ) {
+ wfProfileIn( __METHOD__ );
// The position of __TOC__ needs to be recorded
$mw = MagicWord::get( 'toc' );
if( $mw->match( $text ) ) {
@@ -3429,7 +3350,7 @@ class Parser
} elseif( isset( $this->mDoubleUnderscores['index'] ) ) {
$this->mOutput->setIndexPolicy( 'index' );
}
-
+ wfProfileOut( __METHOD__ );
return $text;
}
@@ -3459,7 +3380,7 @@ class Parser
}
# Inhibit editsection links if requested in the page
- if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
+ if ( isset( $this->mDoubleUnderscores['noeditsection'] ) || $this->mOptions->getIsPrintable() ) {
$showEditLink = 0;
}
@@ -3479,6 +3400,12 @@ class Parser
$this->mOutput->setNewSection( true );
}
+ # Allow user to remove the "new section"
+ # link via __NONEWSECTIONLINK__
+ if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
+ $this->mOutput->hideNewSection( true );
+ }
+
# if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
# override above conditions and always show TOC above first header
if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
@@ -3762,13 +3689,13 @@ class Parser
*
* @param string $text the text to transform
* @param Title &$title the Title object for the current article
- * @param User &$user the User object describing the current user
+ * @param User $user the User object describing the current user
* @param ParserOptions $options parsing options
* @param bool $clearState whether to clear the parser state first
* @return string the altered wiki markup
* @public
*/
- function preSaveTransform( $text, &$title, $user, $options, $clearState = true ) {
+ function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) {
$this->mOptions = $options;
$this->setTitle( $title );
$this->setOutputType( self::OT_WIKI );
@@ -3808,6 +3735,15 @@ class Parser
putenv( 'TZ='.$wgLocaltimezone );
$ts = date( 'YmdHis', $unixts );
$tz = date( 'T', $unixts ); # might vary on DST changeover!
+
+ /* Allow translation of timezones trough wiki. date() can return
+ * whatever crap the system uses, localised or not, so we cannot
+ * ship premade translations.
+ */
+ $key = 'timezone-' . strtolower( trim( $tz ) );
+ $value = wfMsgForContent( $key );
+ if ( !wfEmptyMsg( $key, $value ) ) $tz = $value;
+
putenv( 'TZ='.$oldtz );
}
@@ -4627,7 +4563,11 @@ class Parser
// Output the replacement text
// Add two newlines on -- trailing whitespace in $newText is conventionally
// stripped by the editor, so we need both newlines to restore the paragraph gap
- $outText .= $newText . "\n\n";
+ // Only add trailing whitespace if there is newText
+ if($newText != "") {
+ $outText .= $newText . "\n\n";
+ }
+
while ( $node ) {
$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
$node = $node->getNextSibling();
@@ -4694,6 +4634,22 @@ class Parser
}
/**
+ * Get the name of the user that edited the last revision
+ */
+ function getRevisionUser() {
+ // if this template is subst: the revision id will be blank,
+ // so just use the current user's name
+ if( $this->mRevisionId ) {
+ $revision = Revision::newFromId( $this->mRevisionId );
+ $revuser = $revision->getUserText();
+ } else {
+ global $wgUser;
+ $revuser = $wgUser->getName();
+ }
+ return $revuser;
+ }
+
+ /**
* Mutator for $mDefaultSort
*
* @param $sort New value
@@ -4844,6 +4800,102 @@ class Parser
}
return $out;
}
+
+ function serialiseHalfParsedText( $text ) {
+ $data = array();
+ $data['text'] = $text;
+
+ // First, find all strip markers, and store their
+ // data in an array.
+ $stripState = new StripState;
+ $pos = 0;
+ while( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) ) && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) ) {
+ $end_pos += strlen( self::MARKER_SUFFIX );
+ $marker = substr( $text, $start_pos, $end_pos-$start_pos );
+
+ if ( !empty( $this->mStripState->general->data[$marker] ) ) {
+ $replaceArray = $stripState->general;
+ $stripText = $this->mStripState->general->data[$marker];
+ } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
+ $replaceArray = $stripState->nowiki;
+ $stripText = $this->mStripState->nowiki->data[$marker];
+ } else {
+ throw new MWException( "Hanging strip marker: '$marker'." );
+ }
+
+ $replaceArray->setPair( $marker, $stripText );
+ $pos = $end_pos;
+ }
+ $data['stripstate'] = $stripState;
+
+ // Now, find all of our links, and store THEIR
+ // data in an array! :)
+ $links = array( 'internal' => array(), 'interwiki' => array() );
+ $pos = 0;
+
+ // Internal links
+ while( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
+ list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
+
+ $ns = trim($ns);
+ if (empty( $links['internal'][$ns] )) {
+ $links['internal'][$ns] = array();
+ }
+
+ $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
+ $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
+ $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
+ }
+
+ $pos = 0;
+
+ // Interwiki links
+ while( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
+ $data = substr( $text, $start_pos );
+ $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
+ $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
+ $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
+ }
+
+ $data['linkholder'] = $links;
+
+ return $data;
+ }
+
+ function unserialiseHalfParsedText( $data, $intPrefix = null /* Unique identifying prefix */ ) {
+ if (!$intPrefix)
+ $intPrefix = $this->getRandomString();
+
+ // First, extract the strip state.
+ $stripState = $data['stripstate'];
+ $this->mStripState->general->merge( $stripState->general );
+ $this->mStripState->nowiki->merge( $stripState->nowiki );
+
+ // Now, extract the text, and renumber links
+ $text = $data['text'];
+ $links = $data['linkholder'];
+
+ // Internal...
+ foreach( $links['internal'] as $ns => $nsLinks ) {
+ foreach( $nsLinks as $key => $entry ) {
+ $newKey = $intPrefix . '-' . $key;
+ $this->mLinkHolders->internals[$ns][$newKey] = $entry;
+
+ $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
+ }
+ }
+
+ // Interwiki...
+ foreach( $links['interwiki'] as $key => $entry ) {
+ $newKey = "$intPrefix-$key";
+ $this->mLinkHolders->interwikis[$newKey] = $entry;
+
+ $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
+ }
+
+ // Should be good to go.
+ return $text;
+ }
}
/**
diff --git a/includes/parser/ParserCache.php b/includes/parser/ParserCache.php
index 7e61157a..d17214c3 100644
--- a/includes/parser/ParserCache.php
+++ b/includes/parser/ParserCache.php
@@ -26,8 +26,14 @@ class ParserCache {
$this->mMemc =& $memCached;
}
- function getKey( &$article, &$user ) {
- global $action;
+ function getKey( &$article, $popts ) {
+ global $wgRequest;
+
+ if( $popts instanceof User ) // It used to be getKey( &$article, &$user )
+ $popts = ParserOptions::newFromUser( $popts );
+
+ $user = $popts->mUser;
+ $printable = ( $popts->getIsPrintable() ) ? '!printable=1' : '';
$hash = $user->getPageRenderingHash();
if( !$article->mTitle->quickUserCan( 'edit' ) ) {
// section edit links are suppressed even if the user has them on
@@ -36,21 +42,21 @@ class ParserCache {
$edit = '';
}
$pageid = $article->getID();
- $renderkey = (int)($action == 'render');
- $key = wfMemcKey( 'pcache', 'idhash', "{$pageid}-{$renderkey}!{$hash}{$edit}" );
+ $renderkey = (int)($wgRequest->getVal('action') == 'render');
+ $key = wfMemcKey( 'pcache', 'idhash', "{$pageid}-{$renderkey}!{$hash}{$edit}{$printable}" );
return $key;
}
- function getETag( &$article, &$user ) {
- return 'W/"' . $this->getKey($article, $user) . "--" . $article->mTouched. '"';
+ function getETag( &$article, $popts ) {
+ return 'W/"' . $this->getKey($article, $popts) . "--" . $article->mTouched. '"';
}
- function get( &$article, &$user ) {
+ function get( &$article, $popts ) {
global $wgCacheEpoch;
$fname = 'ParserCache::get';
wfProfileIn( $fname );
- $key = $this->getKey( $article, $user );
+ $key = $this->getKey( $article, $popts );
wfDebug( "Trying parser cache $key\n" );
$value = $this->mMemc->get( $key );
@@ -86,9 +92,9 @@ class ParserCache {
return $value;
}
- function save( $parserOutput, &$article, &$user ){
+ function save( $parserOutput, &$article, $popts ){
global $wgParserCacheExpireTime;
- $key = $this->getKey( $article, $user );
+ $key = $this->getKey( $article, $popts );
if( $parserOutput->getCacheTime() != -1 ) {
diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php
index 5b8cd3ee..e6a9f3a7 100644
--- a/includes/parser/ParserOptions.php
+++ b/includes/parser/ParserOptions.php
@@ -33,7 +33,10 @@ class ParserOptions
var $mExternalLinkTarget; # Target attribute for external links
var $mUser; # Stored user object, just used to initialise the skin
-
+ var $mIsPreview; # Parsing the page for a "preview" operation
+ var $mIsSectionPreview; # Parsing the page for a "preview" operation on a single section
+ var $mIsPrintable; # Parsing the printable version of the page
+
function getUseTeX() { return $this->mUseTeX; }
function getUseDynamicDates() { return $this->mUseDynamicDates; }
function getInterwikiMagic() { return $this->mInterwikiMagic; }
@@ -54,7 +57,10 @@ class ParserOptions
function getEnableLimitReport() { return $this->mEnableLimitReport; }
function getCleanSignatures() { return $this->mCleanSignatures; }
function getExternalLinkTarget() { return $this->mExternalLinkTarget; }
-
+ function getIsPreview() { return $this->mIsPreview; }
+ function getIsSectionPreview() { return $this->mIsSectionPreview; }
+ function getIsPrintable() { return $this->mIsPrintable; }
+
function getSkin() {
if ( !isset( $this->mSkin ) ) {
$this->mSkin = $this->mUser->getSkin();
@@ -99,7 +105,10 @@ class ParserOptions
function setTimestamp( $x ) { return wfSetVar( $this->mTimestamp, $x ); }
function setCleanSignatures( $x ) { return wfSetVar( $this->mCleanSignatures, $x ); }
function setExternalLinkTarget( $x ) { return wfSetVar( $this->mExternalLinkTarget, $x ); }
-
+ function setIsPreview( $x ) { return wfSetVar( $this->mIsPreview, $x ); }
+ function setIsSectionPreview( $x ) { return wfSetVar( $this->mIsSectionPreview, $x ); }
+ function setIsPrintable( $x ) { return wfSetVar( $this->mIsPrintable, $x ); }
+
function __construct( $user = null ) {
$this->initialiseFromUser( $user );
}
@@ -156,6 +165,8 @@ class ParserOptions
$this->mEnableLimitReport = false;
$this->mCleanSignatures = $wgCleanSignatures;
$this->mExternalLinkTarget = $wgExternalLinkTarget;
+ $this->mIsPreview = false;
+ $this->mIsSectionPreview = false;
wfProfileOut( $fname );
}
}
diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php
index 35cb5c92..22c1dfba 100644
--- a/includes/parser/ParserOutput.php
+++ b/includes/parser/ParserOutput.php
@@ -18,6 +18,7 @@ class ParserOutput
$mImages = array(), # DB keys of the images used, in the array key only
$mExternalLinks = array(), # External link URLs, in the key only
$mNewSection = false, # Show a new section link?
+ $mHideNewSection = false, # Hide the new section link?
$mNoGallery = false, # No gallery on category page? (__NOGALLERY__)
$mHeadItems = array(), # Items to put in the <head> section
$mOutputHooks = array(), # Hook tags as per $wgParserOutputHooks
@@ -80,6 +81,12 @@ class ParserOutput
function setNewSection( $value ) {
$this->mNewSection = (bool)$value;
}
+ function hideNewSection ( $value ) {
+ $this->mHideNewSection = (bool)$value;
+ }
+ function getHideNewSection () {
+ return (bool)$this->mHideNewSection;
+ }
function getNewSection() {
return (bool)$this->mNewSection;
}
@@ -94,6 +101,9 @@ class ParserOutput
// We don't record Special: links currently
// It might actually be wise to, but we'd need to do some normalization.
return;
+ } elseif( $dbk === '' ) {
+ // Don't record self links - [[#Foo]]
+ return;
}
if ( !isset( $this->mLinks[$ns] ) ) {
$this->mLinks[$ns] = array();
diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php
index af591b67..2e114545 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -6,6 +6,8 @@
class Preprocessor_DOM implements Preprocessor {
var $parser, $memoryLimit;
+ const CACHE_VERSION = 1;
+
function __construct( $parser ) {
$this->parser = $parser;
$mem = ini_get( 'memory_limit' );
@@ -63,8 +65,61 @@ class Preprocessor_DOM implements Preprocessor {
*/
function preprocessToObj( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
- wfProfileIn( __METHOD__.'-makexml' );
+ global $wgMemc, $wgPreprocessorCacheThreshold;
+
+ $xml = false;
+ $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+ if ( $cacheable ) {
+ wfProfileIn( __METHOD__.'-cacheable' );
+
+ $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
+ $cacheValue = $wgMemc->get( $cacheKey );
+ if ( $cacheValue ) {
+ $version = substr( $cacheValue, 0, 8 );
+ if ( intval( $version ) == self::CACHE_VERSION ) {
+ $xml = substr( $cacheValue, 8 );
+ // From the cache
+ wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
+ }
+ }
+ }
+ if ( $xml === false ) {
+ if ( $cacheable ) {
+ wfProfileIn( __METHOD__.'-cache-miss' );
+ $xml = $this->preprocessToXml( $text, $flags );
+ $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
+ $wgMemc->set( $cacheKey, $cacheValue, 86400 );
+ wfProfileOut( __METHOD__.'-cache-miss' );
+ wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
+ } else {
+ $xml = $this->preprocessToXml( $text, $flags );
+ }
+ }
+ wfProfileIn( __METHOD__.'-loadXML' );
+ $dom = new DOMDocument;
+ wfSuppressWarnings();
+ $result = $dom->loadXML( $xml );
+ wfRestoreWarnings();
+ if ( !$result ) {
+ // Try running the XML through UtfNormal to get rid of invalid characters
+ $xml = UtfNormal::cleanUp( $xml );
+ $result = $dom->loadXML( $xml );
+ if ( !$result ) {
+ throw new MWException( __METHOD__.' generated invalid XML' );
+ }
+ }
+ $obj = new PPNode_DOM( $dom->documentElement );
+ wfProfileOut( __METHOD__.'-loadXML' );
+ if ( $cacheable ) {
+ wfProfileOut( __METHOD__.'-cacheable' );
+ }
+ wfProfileOut( __METHOD__ );
+ return $obj;
+ }
+
+ function preprocessToXml( $text, $flags = 0 ) {
+ wfProfileIn( __METHOD__ );
$rules = array(
'{' => array(
'end' => '}',
@@ -304,7 +359,9 @@ class Preprocessor_DOM implements Preprocessor {
} else {
$attrEnd = $tagEndPos;
// Find closing tag
- if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ {
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
$i = $matches[0][1] + strlen( $matches[0][0] );
$close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
@@ -569,24 +626,9 @@ class Preprocessor_DOM implements Preprocessor {
$stack->rootAccum .= '</root>';
$xml = $stack->rootAccum;
- wfProfileOut( __METHOD__.'-makexml' );
- wfProfileIn( __METHOD__.'-loadXML' );
- $dom = new DOMDocument;
- wfSuppressWarnings();
- $result = $dom->loadXML( $xml );
- wfRestoreWarnings();
- if ( !$result ) {
- // Try running the XML through UtfNormal to get rid of invalid characters
- $xml = UtfNormal::cleanUp( $xml );
- $result = $dom->loadXML( $xml );
- if ( !$result ) {
- throw new MWException( __METHOD__.' generated invalid XML' );
- }
- }
- $obj = new PPNode_DOM( $dom->documentElement );
- wfProfileOut( __METHOD__.'-loadXML' );
wfProfileOut( __METHOD__ );
- return $obj;
+
+ return $xml;
}
}
@@ -831,7 +873,6 @@ class PPFrame_DOM implements PPFrame {
if ( is_string( $root ) ) {
return $root;
}
- wfProfileIn( __METHOD__ );
if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
{
@@ -841,6 +882,7 @@ class PPFrame_DOM implements PPFrame {
if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
return '<span class="error">Expansion depth limit exceeded</span>';
}
+ wfProfileIn( __METHOD__ );
++$expansionDepth;
if ( $root instanceof PPNode_DOM ) {
diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php
index 62028291..f46ee40c 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -8,6 +8,8 @@
*/
class Preprocessor_Hash implements Preprocessor {
var $parser;
+
+ const CACHE_VERSION = 1;
function __construct( $parser ) {
$this->parser = $parser;
@@ -45,6 +47,31 @@ class Preprocessor_Hash implements Preprocessor {
*/
function preprocessToObj( $text, $flags = 0 ) {
wfProfileIn( __METHOD__ );
+
+
+ // Check cache.
+ global $wgMemc, $wgPreprocessorCacheThreshold;
+
+ $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
+ if ( $cacheable ) {
+ wfProfileIn( __METHOD__.'-cacheable' );
+
+ $cacheKey = wfMemcKey( 'preprocess-hash', md5($text), $flags );
+ $cacheValue = $wgMemc->get( $cacheKey );
+ if ( $cacheValue ) {
+ $version = substr( $cacheValue, 0, 8 );
+ if ( intval( $version ) == self::CACHE_VERSION ) {
+ $hash = unserialize( substr( $cacheValue, 8 ) );
+ // From the cache
+ wfDebugLog( "Preprocessor",
+ "Loaded preprocessor hash from memcached (key $cacheKey)" );
+ wfProfileOut( __METHOD__.'-cacheable' );
+ wfProfileOut( __METHOD__ );
+ return $hash;
+ }
+ }
+ wfProfileIn( __METHOD__.'-cache-miss' );
+ }
$rules = array(
'{' => array(
@@ -288,7 +315,9 @@ class Preprocessor_Hash implements Preprocessor {
} else {
$attrEnd = $tagEndPos;
// Find closing tag
- if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
+ {
$inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
$i = $matches[0][1] + strlen( $matches[0][0] );
$close = $matches[0][0];
@@ -615,6 +644,16 @@ class Preprocessor_Hash implements Preprocessor {
$rootNode = new PPNode_Hash_Tree( 'root' );
$rootNode->firstChild = $stack->rootAccum->firstNode;
$rootNode->lastChild = $stack->rootAccum->lastNode;
+
+ // Cache
+ if ($cacheable) {
+ $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );;
+ $wgMemc->set( $cacheKey, $cacheValue, 86400 );
+ wfProfileOut( __METHOD__.'-cache-miss' );
+ wfProfileOut( __METHOD__.'-cacheable' );
+ wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
+ }
+
wfProfileOut( __METHOD__ );
return $rootNode;
}
diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php
new file mode 100644
index 00000000..95f83621
--- /dev/null
+++ b/includes/parser/Tidy.php
@@ -0,0 +1,170 @@
+<?php
+
+/**
+ * Class to interact with HTML tidy
+ *
+ * Either the external tidy program or the in-process tidy extension
+ * will be used depending on availability. Override the default
+ * $wgTidyInternal setting to disable the internal if it's not working.
+ *
+ * @ingroup Parser
+ */
+class MWTidy {
+
+ /**
+ * Interface with html tidy, used if $wgUseTidy = true.
+ * If tidy isn't able to correct the markup, the original will be
+ * returned in all its glory with a warning comment appended.
+ *
+ * @param string $text Hideous HTML input
+ * @return string Corrected HTML output
+ */
+ public static function tidy( $text ) {
+ global $wgTidyInternal;
+
+ $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
+
+ # Tidy is known to clobber tabs; convert them to entities
+ $wrappedtext = str_replace( "\t", '&#9;', $wrappedtext );
+
+ if( $wgTidyInternal ) {
+ $correctedtext = self::execInternalTidy( $wrappedtext );
+ } else {
+ $correctedtext = self::execExternalTidy( $wrappedtext );
+ }
+ if( is_null( $correctedtext ) ) {
+ wfDebug( "Tidy error detected!\n" );
+ return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
+ }
+
+ # Convert the tabs back from entities
+ $correctedtext = str_replace( '&#9;', "\t", $correctedtext );
+
+ return $correctedtext;
+ }
+
+ /**
+ * Check HTML for errors, used if $wgValidateAllHtml = true.
+ *
+ * @param $text String
+ * @param &$errorStr String: return the error string
+ * @return Boolean: whether the HTML is valid
+ */
+ public static function checkErrors( $text, &$errorStr = null ) {
+ global $wgTidyInternal;
+
+ $retval = 0;
+ if( $wgTidyInternal ) {
+ $errorStr = self::execInternalTidy( $text, true, $retval );
+ } else {
+ $errorStr = self::execExternalTidy( $text, true, $retval );
+ }
+ return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
+ }
+
+ /**
+ * Spawn an external HTML tidy process and get corrected markup back from it.
+ * Also called in OutputHandler.php for full page validation
+ *
+ * @param $text String: HTML to check
+ * @param $stderr Boolean: Whether to read from STDERR rather than STDOUT
+ * @param &$retval Exit code (-1 on internal error)
+ * @retrun mixed String or null
+ */
+ private static function execExternalTidy( $text, $stderr = false, &$retval = null ) {
+ global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
+ wfProfileIn( __METHOD__ );
+
+ $cleansource = '';
+ $opts = ' -utf8';
+
+ if( $stderr ) {
+ $descriptorspec = array(
+ 0 => array( 'pipe', 'r' ),
+ 1 => array( 'file', wfGetNull(), 'a' ),
+ 2 => array( 'pipe', 'w' )
+ );
+ } else {
+ $descriptorspec = array(
+ 0 => array( 'pipe', 'r' ),
+ 1 => array( 'pipe', 'w' ),
+ 2 => array( 'file', wfGetNull(), 'a' )
+ );
+ }
+
+ $readpipe = $stderr ? 2 : 1;
+ $pipes = array();
+
+ if( function_exists( 'proc_open' ) ) {
+ $process = proc_open( "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
+ if ( is_resource( $process ) ) {
+ // Theoretically, this style of communication could cause a deadlock
+ // here. If the stdout buffer fills up, then writes to stdin could
+ // block. This doesn't appear to happen with tidy, because tidy only
+ // writes to stdout after it's finished reading from stdin. Search
+ // for tidyParseStdin and tidySaveStdout in console/tidy.c
+ fwrite( $pipes[0], $text );
+ fclose( $pipes[0] );
+ while ( !feof( $pipes[$readpipe] ) ) {
+ $cleansource .= fgets( $pipes[$readpipe], 1024 );
+ }
+ fclose( $pipes[$readpipe] );
+ $retval = proc_close( $process );
+ } else {
+ $retval = -1;
+ }
+ } else {
+ $retval = -1;
+ }
+
+ wfProfileOut( __METHOD__ );
+
+ if( !$stderr && $cleansource == '' && $text != '' ) {
+ // Some kind of error happened, so we couldn't get the corrected text.
+ // Just give up; we'll use the source text and append a warning.
+ return null;
+ } else {
+ return $cleansource;
+ }
+ }
+
+ /**
+ * Use the HTML tidy PECL extension to use the tidy library in-process,
+ * saving the overhead of spawning a new process.
+ *
+ * 'pear install tidy' should be able to compile the extension module.
+ */
+ private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
+ global $wgTidyConf, $IP, $wgDebugTidy;
+ wfProfileIn( __METHOD__ );
+
+ $tidy = new tidy;
+ $tidy->parseString( $text, $wgTidyConf, 'utf8' );
+
+ if( $stderr ) {
+ $retval = $tidy->getStatus();
+ return $tidy->errorBuffer;
+ } else {
+ $tidy->cleanRepair();
+ $retval = $tidy->getStatus();
+ if( $retval == 2 ) {
+ // 2 is magic number for fatal error
+ // http://www.php.net/manual/en/function.tidy-get-status.php
+ $cleansource = null;
+ } else {
+ $cleansource = tidy_get_output( $tidy );
+ }
+ if ( $wgDebugTidy && $retval > 0 ) {
+ $cleansource .= "<!--\nTidy reports:\n" .
+ str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
+ "\n-->";
+ }
+
+ wfProfileOut( __METHOD__ );
+ return $cleansource;
+ }
+ }
+
+}