From 72e90545454c0e014318fa3c81658e035aac58c1 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 10 Jun 2009 13:00:47 +0200 Subject: applying patch to version 1.15.0 --- includes/parser/Parser.php | 360 ++++++++++++++++++++++++++------------------- 1 file changed, 206 insertions(+), 154 deletions(-) (limited to 'includes/parser/Parser.php') diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 7fcfb90a..e6a68782 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -374,8 +374,8 @@ class Parser $text = Sanitizer::normalizeCharReferences( $text ); - if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { - $text = self::tidy($text); + if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) { + $text = MWTidy::tidy( $text ); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) @@ -648,126 +648,14 @@ class Parser $this->mStripState->general->setPair( $rnd, $text ); return $rnd; } - - /** - * Interface with html tidy, used if $wgUseTidy = true. - * If tidy isn't able to correct the markup, the original will be - * returned in all its glory with a warning comment appended. - * - * Either the external tidy program or the in-process tidy extension - * will be used depending on availability. Override the default - * $wgTidyInternal setting to disable the internal if it's not working. - * - * @param string $text Hideous HTML input - * @return string Corrected HTML output - * @public - * @static - */ - function tidy( $text ) { - global $wgTidyInternal; - - $wrappedtext = ''. -'test'.$text.''; - - # Tidy is known to clobber tabs; convert 'em to entities - $wrappedtext = str_replace("\t", ' ', $wrappedtext); - - if( $wgTidyInternal ) { - $correctedtext = self::internalTidy( $wrappedtext ); - } else { - $correctedtext = self::externalTidy( $wrappedtext ); - } - if( is_null( $correctedtext ) ) { - wfDebug( "Tidy error detected!\n" ); - return $text . "\n\n"; - } - - # Convert the tabs back from entities - $correctedtext = str_replace(' ', "\t", $correctedtext); - - return $correctedtext; - } - - /** - * Spawn an external HTML tidy process and get corrected markup back from it. - * - * @private - * @static - */ - function externalTidy( $text ) { - global $wgTidyConf, $wgTidyBin, $wgTidyOpts; - wfProfileIn( __METHOD__ ); - - $cleansource = ''; - $opts = ' -utf8'; - - $descriptorspec = array( - 0 => array('pipe', 'r'), - 1 => array('pipe', 'w'), - 2 => array('file', wfGetNull(), 'a') - ); - $pipes = array(); - if( function_exists('proc_open') ) { - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); - if (is_resource($process)) { - // Theoretically, this style of communication could cause a deadlock - // here. If the stdout buffer fills up, then writes to stdin could - // block. This doesn't appear to happen with tidy, because tidy only - // writes to stdout after it's finished reading from stdin. Search - // for tidyParseStdin and tidySaveStdout in console/tidy.c - fwrite($pipes[0], $text); - fclose($pipes[0]); - while (!feof($pipes[1])) { - $cleansource .= fgets($pipes[1], 1024); - } - fclose($pipes[1]); - proc_close($process); - } - } - - wfProfileOut( __METHOD__ ); - - if( $cleansource == '' && $text != '') { - // Some kind of error happened, so we couldn't get the corrected text. - // Just give up; we'll use the source text and append a warning. - return null; - } else { - return $cleansource; - } - } - + /** - * Use the HTML tidy PECL extension to use the tidy library in-process, - * saving the overhead of spawning a new process. - * - * 'pear install tidy' should be able to compile the extension module. - * - * @private - * @static + * Interface with html tidy + * @deprecated Use MWTidy::tidy() */ - function internalTidy( $text ) { - global $wgTidyConf, $IP, $wgDebugTidy; - wfProfileIn( __METHOD__ ); - - $tidy = new tidy; - $tidy->parseString( $text, $wgTidyConf, 'utf8' ); - $tidy->cleanRepair(); - if( $tidy->getStatus() == 2 ) { - // 2 is magic number for fatal error - // http://www.php.net/manual/en/function.tidy-get-status.php - $cleansource = null; - } else { - $cleansource = tidy_get_output( $tidy ); - } - if ( $wgDebugTidy && $tidy->getStatus() > 0 ) { - $cleansource .= "', '-->', $tidy->errorBuffer ) . - "\n-->"; - } - - wfProfileOut( __METHOD__ ); - return $cleansource; + public static function tidy( $text ) { + wfDeprecated( __METHOD__ ); + return MWTidy::tidy( $text ); } /** @@ -998,7 +886,7 @@ class Parser $text = $this->doDoubleUnderscore( $text ); $text = $this->doHeadings( $text ); - if($this->mOptions->getUseDynamicDates()) { + if( $this->mOptions->getUseDynamicDates() ) { $df = DateFormatter::getInstance(); $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); } @@ -1008,7 +896,7 @@ class Parser # replaceInternalLinks may sometimes leave behind # absolute URLs, which have to be masked to hide them from replaceExternalLinks - $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text); + $text = str_replace($this->mUniqPrefix.'NOPARSE', '', $text); $text = $this->doMagicLinks( $text ); $text = $this->formatHeadings( $text, $isMain ); @@ -1045,16 +933,16 @@ class Parser } function magicLinkCallback( $m ) { - if ( isset( $m[1] ) && strval( $m[1] ) !== '' ) { + if ( isset( $m[1] ) && $m[1] !== '' ) { # Skip anchor return $m[0]; - } elseif ( isset( $m[2] ) && strval( $m[2] ) !== '' ) { + } elseif ( isset( $m[2] ) && $m[2] !== '' ) { # Skip HTML element return $m[0]; - } elseif ( isset( $m[3] ) && strval( $m[3] ) !== '' ) { + } elseif ( isset( $m[3] ) && $m[3] !== '' ) { # Free external link return $this->makeFreeExternalLink( $m[0] ); - } elseif ( isset( $m[4] ) && strval( $m[4] ) !== '' ) { + } elseif ( isset( $m[4] ) && $m[4] !== '' ) { # RFC or PMID if ( substr( $m[0], 0, 3 ) === 'RFC' ) { $keyword = 'RFC'; @@ -1072,7 +960,7 @@ class Parser $sk = $this->mOptions->getSkin(); $la = $sk->getExternalLinkAttributes( $url, $keyword.$id ); return "{$keyword} {$id}"; - } elseif ( isset( $m[5] ) && strval( $m[5] ) !== '' ) { + } elseif ( isset( $m[5] ) && $m[5] !== '' ) { # ISBN $isbn = $m[5]; $num = strtr( $isbn, array( @@ -1130,7 +1018,7 @@ class Parser if ( $text === false ) { # Not an image, make a link $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', - $this->getExternalLinkAttribs() ); + $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters $pasteurized = self::replaceUnusualEscapes( $url ); @@ -1406,18 +1294,12 @@ class Parser $url = Sanitizer::cleanUrl( $url ); - if ( $this->mOptions->mExternalLinkTarget ) { - $attribs = array( 'target' => $this->mOptions->mExternalLinkTarget ); - } else { - $attribs = array(); - } - # Use the encoded URL # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 - $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs() ) - . $dtrail . $trail; + $s .= $sk->makeExternalLink( $url, $text, false, $linktype, + $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail; # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character @@ -1430,12 +1312,36 @@ class Parser return $s; } - function getExternalLinkAttribs() { + /** + * Get an associative array of additional HTML attributes appropriate for a + * particular external link. This currently may include rel => nofollow + * (depending on configuration, namespace, and the URL's domain) and/or a + * target attribute (depending on configuration). + * + * @param string $url Optional URL, to extract the domain from for rel => + * nofollow if appropriate + * @return array Associative array of HTML attributes + */ + function getExternalLinkAttribs( $url = false ) { $attribs = array(); global $wgNoFollowLinks, $wgNoFollowNsExceptions; $ns = $this->mTitle->getNamespace(); if( $wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) ) { $attribs['rel'] = 'nofollow'; + + global $wgNoFollowDomainExceptions; + if ( $wgNoFollowDomainExceptions ) { + $bits = wfParseUrl( $url ); + if ( is_array( $bits ) && isset( $bits['host'] ) ) { + foreach ( $wgNoFollowDomainExceptions as $domain ) { + if( substr( $bits['host'], -strlen( $domain ) ) + == $domain ) { + unset( $attribs['rel'] ); + break; + } + } + } + } } if ( $this->mOptions->getExternalLinkTarget() ) { $attribs['target'] = $this->mOptions->getExternalLinkTarget(); @@ -1697,7 +1603,7 @@ class Parser wfProfileOut( __METHOD__."-misc" ); wfProfileIn( __METHOD__."-title" ); $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) ); - if( !$nt ) { + if( $nt === NULL ) { $s .= $prefix . '[[' . $line; wfProfileOut( __METHOD__."-title" ); continue; @@ -1823,6 +1729,7 @@ class Parser # NS_MEDIA is a pseudo-namespace for linking directly to a file # FIXME: Should do batch file existence checks, see comment below if( $ns == NS_MEDIA ) { + wfProfileIn( __METHOD__."-media" ); # Give extensions a chance to select the file revision for us $skip = $time = false; wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); @@ -1834,9 +1741,11 @@ class Parser # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( $link ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); + wfProfileOut( __METHOD__."-media" ); continue; } + wfProfileIn( __METHOD__."-always_known" ); # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # @@ -1849,6 +1758,7 @@ class Parser # Links will be added to the output link list after checking $s .= $holders->makeHolder( $nt, $text, '', $trail, $prefix ); } + wfProfileOut( __METHOD__."-always_known" ); } wfProfileOut( __METHOD__ ); return $holders; @@ -2178,7 +2088,7 @@ class Parser $inBlockElem = true; } } else if ( !$inBlockElem && !$this->mInPre ) { - if ( ' ' == $t{0} and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) { + if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) { // pre if ($this->mLastSection !== 'pre') { $paragraphStack = false; @@ -2540,6 +2450,12 @@ class Parser $this->mOutput->setFlag( 'vary-revision' ); wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" ); return $this->getRevisionTimestamp(); + case 'revisionuser': + // Let the edit saving system know we should parse the page + // *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" ); + return $this->getRevisionUser(); case 'namespace': return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); case 'namespacee': @@ -2586,6 +2502,8 @@ class Parser return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::images() ); case 'numberofusers': return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::users() ); + case 'numberofactiveusers': + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::activeUsers() ); case 'numberofpages': return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); case 'numberofadmins': @@ -2696,11 +2614,10 @@ class Parser * @private */ function replaceVariables( $text, $frame = false, $argsOnly = false ) { - # Prevent too big inclusions - if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { + # Is there any text? Also, Prevent too big inclusions! + if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { return $text; } - wfProfileIn( __METHOD__ ); if ( $frame === false ) { @@ -2776,7 +2693,7 @@ class Parser * @private */ function braceSubstitution( $piece, $frame ) { - global $wgContLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces; + global $wgContLang, $wgNonincludableNamespaces; wfProfileIn( __METHOD__ ); wfProfileIn( __METHOD__.'-setup' ); @@ -2936,12 +2853,6 @@ class Parser if($wgContLang->hasVariants() && $title->getArticleID() == 0){ $wgContLang->findVariantLink( $part1, $title, true ); } - # Do infinite loop check - if ( !$frame->loopCheck( $title ) ) { - $found = true; - $text = '' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . ''; - wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); - } # Do recursion depth check $limit = $this->mOptions->getMaxTemplateDepth(); if ( $frame->depth >= $limit ) { @@ -2991,6 +2902,14 @@ class Parser } $found = true; } + + # Do infinite loop check + # This has to be done after redirect resolution to avoid infinite loops via redirects + if ( !$frame->loopCheck( $title ) ) { + $found = true; + $text = '' . wfMsgForContent( 'parser-template-loop-warning', $titleText ) . ''; + wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); + } wfProfileOut( __METHOD__ . '-loadtpl' ); } @@ -3304,6 +3223,7 @@ class Parser throw new MWException( ' extension tag encountered unexpectedly' ); } case 'nowiki': + $content = strtr($content, array('-{' => '-{', '}-' => '}-')); $output = Xml::escapeTagsOnly( $content ); break; case 'math': @@ -3387,6 +3307,7 @@ class Parser * Fills $this->mDoubleUnderscores, returns the modified text */ function doDoubleUnderscore( $text ) { + wfProfileIn( __METHOD__ ); // The position of __TOC__ needs to be recorded $mw = MagicWord::get( 'toc' ); if( $mw->match( $text ) ) { @@ -3429,7 +3350,7 @@ class Parser } elseif( isset( $this->mDoubleUnderscores['index'] ) ) { $this->mOutput->setIndexPolicy( 'index' ); } - + wfProfileOut( __METHOD__ ); return $text; } @@ -3459,7 +3380,7 @@ class Parser } # Inhibit editsection links if requested in the page - if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { + if ( isset( $this->mDoubleUnderscores['noeditsection'] ) || $this->mOptions->getIsPrintable() ) { $showEditLink = 0; } @@ -3479,6 +3400,12 @@ class Parser $this->mOutput->setNewSection( true ); } + # Allow user to remove the "new section" + # link via __NONEWSECTIONLINK__ + if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) { + $this->mOutput->hideNewSection( true ); + } + # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { @@ -3762,13 +3689,13 @@ class Parser * * @param string $text the text to transform * @param Title &$title the Title object for the current article - * @param User &$user the User object describing the current user + * @param User $user the User object describing the current user * @param ParserOptions $options parsing options * @param bool $clearState whether to clear the parser state first * @return string the altered wiki markup * @public */ - function preSaveTransform( $text, &$title, $user, $options, $clearState = true ) { + function preSaveTransform( $text, Title $title, $user, $options, $clearState = true ) { $this->mOptions = $options; $this->setTitle( $title ); $this->setOutputType( self::OT_WIKI ); @@ -3808,6 +3735,15 @@ class Parser putenv( 'TZ='.$wgLocaltimezone ); $ts = date( 'YmdHis', $unixts ); $tz = date( 'T', $unixts ); # might vary on DST changeover! + + /* Allow translation of timezones trough wiki. date() can return + * whatever crap the system uses, localised or not, so we cannot + * ship premade translations. + */ + $key = 'timezone-' . strtolower( trim( $tz ) ); + $value = wfMsgForContent( $key ); + if ( !wfEmptyMsg( $key, $value ) ) $tz = $value; + putenv( 'TZ='.$oldtz ); } @@ -4627,7 +4563,11 @@ class Parser // Output the replacement text // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap - $outText .= $newText . "\n\n"; + // Only add trailing whitespace if there is newText + if($newText != "") { + $outText .= $newText . "\n\n"; + } + while ( $node ) { $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); $node = $node->getNextSibling(); @@ -4693,6 +4633,22 @@ class Parser return $this->mRevisionTimestamp; } + /** + * Get the name of the user that edited the last revision + */ + function getRevisionUser() { + // if this template is subst: the revision id will be blank, + // so just use the current user's name + if( $this->mRevisionId ) { + $revision = Revision::newFromId( $this->mRevisionId ); + $revuser = $revision->getUserText(); + } else { + global $wgUser; + $revuser = $wgUser->getName(); + } + return $revuser; + } + /** * Mutator for $mDefaultSort * @@ -4844,6 +4800,102 @@ class Parser } return $out; } + + function serialiseHalfParsedText( $text ) { + $data = array(); + $data['text'] = $text; + + // First, find all strip markers, and store their + // data in an array. + $stripState = new StripState; + $pos = 0; + while( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) ) && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) ) { + $end_pos += strlen( self::MARKER_SUFFIX ); + $marker = substr( $text, $start_pos, $end_pos-$start_pos ); + + if ( !empty( $this->mStripState->general->data[$marker] ) ) { + $replaceArray = $stripState->general; + $stripText = $this->mStripState->general->data[$marker]; + } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) { + $replaceArray = $stripState->nowiki; + $stripText = $this->mStripState->nowiki->data[$marker]; + } else { + throw new MWException( "Hanging strip marker: '$marker'." ); + } + + $replaceArray->setPair( $marker, $stripText ); + $pos = $end_pos; + } + $data['stripstate'] = $stripState; + + // Now, find all of our links, and store THEIR + // data in an array! :) + $links = array( 'internal' => array(), 'interwiki' => array() ); + $pos = 0; + + // Internal links + while( ( $start_pos = strpos( $text, '' ) ) ); + $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key]; + $pos = $start_pos + strlen( "" ); + } + + $pos = 0; + + // Interwiki links + while( ( $start_pos = strpos( $text, '' ) ) ); + $links['interwiki'][] = $this->mLinkHolders->interwiki[$key]; + $pos = $start_pos + strlen( "" ); + } + + $data['linkholder'] = $links; + + return $data; + } + + function unserialiseHalfParsedText( $data, $intPrefix = null /* Unique identifying prefix */ ) { + if (!$intPrefix) + $intPrefix = $this->getRandomString(); + + // First, extract the strip state. + $stripState = $data['stripstate']; + $this->mStripState->general->merge( $stripState->general ); + $this->mStripState->nowiki->merge( $stripState->nowiki ); + + // Now, extract the text, and renumber links + $text = $data['text']; + $links = $data['linkholder']; + + // Internal... + foreach( $links['internal'] as $ns => $nsLinks ) { + foreach( $nsLinks as $key => $entry ) { + $newKey = $intPrefix . '-' . $key; + $this->mLinkHolders->internals[$ns][$newKey] = $entry; + + $text = str_replace( "", "", $text ); + } + } + + // Interwiki... + foreach( $links['interwiki'] as $key => $entry ) { + $newKey = "$intPrefix-$key"; + $this->mLinkHolders->interwikis[$newKey] = $entry; + + $text = str_replace( "", "", $text ); + } + + // Should be good to go. + return $text; + } } /** -- cgit v1.2.2