summaryrefslogtreecommitdiff
path: root/includes/parser/Tidy.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/parser/Tidy.php')
-rw-r--r--includes/parser/Tidy.php69
1 files changed, 42 insertions, 27 deletions
diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php
index ed2d436d..32b16aaf 100644
--- a/includes/parser/Tidy.php
+++ b/includes/parser/Tidy.php
@@ -59,12 +59,21 @@ class MWTidyWrapper {
dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
$this->mMarkerIndex = 0;
+ // Replace <mw:editsection> elements with placeholders
$wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
- array( &$this, 'replaceEditSectionLinksCallback' ), $text );
+ array( &$this, 'replaceCallback' ), $text );
+ // ...and <mw:toc> markers
+ $wrappedtext = preg_replace_callback( '/\<\\/?mw:toc\>/',
+ array( &$this, 'replaceCallback' ), $wrappedtext );
- $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
- ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
- '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
+ // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
+ // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
+ $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
+
+ // Wrap the whole thing in a doctype and body for Tidy.
+ $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' .
+ ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' .
+ '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>';
return $wrappedtext;
}
@@ -74,7 +83,7 @@ class MWTidyWrapper {
*
* @return string
*/
- function replaceEditSectionLinksCallback( $m ) {
+ function replaceCallback( $m ) {
$marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
$this->mMarkerIndex++;
$this->mTokens->setPair( $marker, $m[0] );
@@ -86,7 +95,13 @@ class MWTidyWrapper {
* @return string
*/
public function postprocess( $text ) {
- return $this->mTokens->replace( $text );
+ // Revert <html-{link,meta}> back to <{link,meta}>
+ $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
+
+ // Restore the contents of placeholder tokens
+ $text = $this->mTokens->replace( $text );
+
+ return $text;
}
}
@@ -106,7 +121,7 @@ class MWTidy {
* If tidy isn't able to correct the markup, the original will be
* returned in all its glory with a warning comment appended.
*
- * @param $text String: hideous HTML input
+ * @param string $text hideous HTML input
* @return String: corrected HTML output
*/
public static function tidy( $text ) {
@@ -146,7 +161,7 @@ class MWTidy {
global $wgTidyInternal;
$retval = 0;
- if( $wgTidyInternal ) {
+ if ( $wgTidyInternal ) {
$errorStr = self::execInternalTidy( $text, true, $retval );
} else {
$errorStr = self::execExternalTidy( $text, true, $retval );
@@ -159,7 +174,7 @@ class MWTidy {
* Spawn an external HTML tidy process and get corrected markup back from it.
* Also called in OutputHandler.php for full page validation
*
- * @param $text String: HTML to check
+ * @param string $text HTML to check
* @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT
* @param &$retval int Exit code (-1 on internal error)
* @return mixed String or null
@@ -223,7 +238,7 @@ class MWTidy {
* Use the HTML tidy extension to use the tidy library in-process,
* saving the overhead of spawning a new process.
*
- * @param $text String: HTML to check
+ * @param string $text HTML to check
* @param $stderr Boolean: Whether to read result from error status instead of output
* @param &$retval int Exit code (-1 on internal error)
* @return mixed String or null
@@ -232,7 +247,7 @@ class MWTidy {
global $wgTidyConf, $wgDebugTidy;
wfProfileIn( __METHOD__ );
- if ( !MWInit::classExists( 'tidy' ) ) {
+ if ( !class_exists( 'tidy' ) ) {
wfWarn( "Unable to load internal tidy class." );
$retval = -1;
@@ -248,24 +263,24 @@ class MWTidy {
wfProfileOut( __METHOD__ );
return $tidy->errorBuffer;
+ }
+
+ $tidy->cleanRepair();
+ $retval = $tidy->getStatus();
+ if ( $retval == 2 ) {
+ // 2 is magic number for fatal error
+ // http://www.php.net/manual/en/function.tidy-get-status.php
+ $cleansource = null;
} else {
- $tidy->cleanRepair();
- $retval = $tidy->getStatus();
- if ( $retval == 2 ) {
- // 2 is magic number for fatal error
- // http://www.php.net/manual/en/function.tidy-get-status.php
- $cleansource = null;
- } else {
- $cleansource = tidy_get_output( $tidy );
- if ( $wgDebugTidy && $retval > 0 ) {
- $cleansource .= "<!--\nTidy reports:\n" .
- str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
- "\n-->";
- }
+ $cleansource = tidy_get_output( $tidy );
+ if ( $wgDebugTidy && $retval > 0 ) {
+ $cleansource .= "<!--\nTidy reports:\n" .
+ str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
+ "\n-->";
}
-
- wfProfileOut( __METHOD__ );
- return $cleansource;
}
+
+ wfProfileOut( __METHOD__ );
+ return $cleansource;
}
}