summaryrefslogtreecommitdiff
path: root/includes/parser/MWTidy.php
blob: 807842b61c346b2cd37b4493e05faac32acf2414 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
<?php
/**
 * HTML validation and correction
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Parser
 */

/**
 * Class to interact with HTML tidy
 *
 * Either the external tidy program or the in-process tidy extension
 * will be used depending on availability. Override the default
 * $wgTidyInternal setting to disable the internal if it's not working.
 *
 * @ingroup Parser
 */
class MWTidy {
	private static $instance;

	/**
	 * Interface with html tidy.
	 * If tidy isn't able to correct the markup, the original will be
	 * returned in all its glory with a warning comment appended.
	 *
	 * @param string $text HTML input fragment. This should not contain a
	 *                     <body> or <html> tag.
	 * @return string Corrected HTML output
	 */
	public static function tidy( $text ) {
		$driver = self::singleton();
		if ( !$driver ) {
			throw new MWException( __METHOD__.
				': tidy is disabled, caller should have checked MWTidy::isEnabled()' );
		}
		return $driver->tidy( $text );
	}

	/**
	 * Check HTML for errors, used if $wgValidateAllHtml = true.
	 *
	 * @param string $text
	 * @param string &$errorStr Return the error string
	 * @return bool Whether the HTML is valid
	 */
	public static function checkErrors( $text, &$errorStr = null ) {
		$driver = self::singleton();
		if ( !$driver ) {
			throw new MWException( __METHOD__.
				': tidy is disabled, caller should have checked MWTidy::isEnabled()' );
		}
		if ( $driver->supportsValidate() ) {
			return $driver->validate( $text, $errorStr );
		} else {
			throw new MWException( __METHOD__ . ": error text return from HHVM tidy is not supported" );
		}
	}

	public static function isEnabled() {
		return self::singleton() !== false;
	}

	protected static function singleton() {
		global $wgUseTidy, $wgTidyInternal, $wgTidyConf, $wgDebugTidy, $wgTidyConfig,
			$wgTidyBin, $wgTidyOpts;

		if ( self::$instance === null ) {
			if ( $wgTidyConfig !== null ) {
				$config = $wgTidyConfig;
			} elseif ( $wgUseTidy ) {
				// b/c configuration
				$config = array(
					'tidyConfigFile' => $wgTidyConf,
					'debugComment' => $wgDebugTidy,
					'tidyBin' => $wgTidyBin,
					'tidyCommandLine' => $wgTidyOpts );
				if ( $wgTidyInternal ) {
					if ( wfIsHHVM() ) {
						$config['driver'] = 'RaggettInternalHHVM';
					} else {
						$config['driver'] = 'RaggettInternalPHP';
					}
				} else {
					$config['driver'] = 'RaggettExternal';
				}
			} else {
				return false;
			}
			switch ( $config['driver'] ) {
				case 'RaggettInternalHHVM':
					self::$instance = new MediaWiki\Tidy\RaggettInternalHHVM( $config );
					break;
				case 'RaggettInternalPHP':
					self::$instance = new MediaWiki\Tidy\RaggettInternalPHP( $config );
					break;
				case 'RaggettExternal':
					self::$instance = new MediaWiki\Tidy\RaggettExternal( $config );
					break;
				case 'Html5Depurate':
					self::$instance = new MediaWiki\Tidy\Html5Depurate( $config );
					break;
				default:
					throw new MWException( "Invalid tidy driver: \"{$config['driver']}\"" );
			}
		}
		return self::$instance;
	}

	/**
	 * Set the driver to be used. This is for testing.
	 * @param TidyDriverBase|false|null $instance
	 */
	public static function setInstance( $instance ) {
		self::$instance = $instance;
	}

	/**
	 * Destroy the current singleton instance
	 */
	public static function destroySingleton() {
		self::$instance = null;
	}
}