summaryrefslogtreecommitdiff
path: root/includes/Sanitizer.php
diff options
context:
space:
mode:
authorLuke Shumaker <LukeShu@sbcglobal.net>2014-01-28 09:50:25 -0500
committerLuke Shumaker <LukeShu@sbcglobal.net>2014-01-28 09:50:25 -0500
commit5744df39e15f85c6cc8a9faf8924d77e76d2b216 (patch)
treea8c8dd40a94d1fa0d5377566aa5548ae55a163da /includes/Sanitizer.php
parent4bb2aeca1d198391ca856aa16c40b8559c68daec (diff)
parent224b22a051051f6c2e494c3a2fb4adb42898e2d1 (diff)
Merge branch 'archwiki'
Conflicts: extensions/FluxBBAuthPlugin.php extensions/SyntaxHighlight_GeSHi/README extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.i18n.php extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.php extensions/SyntaxHighlight_GeSHi/geshi/docs/CHANGES extensions/SyntaxHighlight_GeSHi/geshi/docs/THANKS extensions/SyntaxHighlight_GeSHi/geshi/docs/TODO extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractClass.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractClass_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractMethod.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractPrivateClass.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractPrivateClass_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractPrivateMethod.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Class.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Class_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Constant.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Constructor.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Destructor.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Function.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Global.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/I.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Index.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Interface.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Interface_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/L.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Lminus.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Lplus.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Method.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Page.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Page_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateClass.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateClass_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateMethod.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateVariable.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/StaticMethod.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/StaticVariable.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/T.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Tminus.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Tplus.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Variable.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/blank.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/class_folder.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/file.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/folder.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/function_folder.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/next_button.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/next_button_disabled.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/package.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/package_folder.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/previous_button.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/previous_button_disabled.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/private_class_logo.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/tutorial.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/tutorial_folder.png extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/up_button.png extensions/SyntaxHighlight_GeSHi/geshi/docs/geshi-doc.html extensions/SyntaxHighlight_GeSHi/geshi/docs/geshi-doc.txt extensions/SyntaxHighlight_GeSHi/geshi/geshi.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/4cs.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/6502acme.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/6502kickass.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/6502tasm.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/68000devpac.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/abap.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/actionscript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/actionscript3.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/ada.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/algol68.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/apache.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/applescript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/apt_sources.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/asm.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/asp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/autoconf.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/autohotkey.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/autoit.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/avisynth.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/awk.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/bascomavr.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/bash.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/basic4gl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/bf.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/bibtex.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/blitzbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/bnf.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/boo.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/c.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/c_loadrunner.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/c_mac.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/caddcl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cadlisp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cfdg.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cfm.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/chaiscript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cil.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/clojure.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cmake.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cobol.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/coffeescript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cpp-qt.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cpp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/csharp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/css.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/cuesheet.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/d.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/dcs.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/delphi.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/diff.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/div.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/dos.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/dot.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/e.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/ecmascript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/eiffel.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/email.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/epc.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/erlang.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/euphoria.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/f1.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/falcon.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/fo.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/fortran.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/freebasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/fsharp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/gambas.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/gdb.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/genero.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/genie.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/gettext.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/glsl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/gml.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/gnuplot.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/go.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/groovy.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/gwbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/haskell.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/hicest.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/hq9plus.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/html4strict.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/html5.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/icon.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/idl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/ini.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/inno.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/intercal.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/io.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/j.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/java.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/java5.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/javascript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/jquery.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/kixtart.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/klonec.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/klonecpp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/latex.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lb.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lisp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/llvm.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/locobasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/logtalk.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lolcode.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lotusformulas.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lotusscript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lscript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lsl2.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/lua.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/m68k.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/magiksf.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/make.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/mapbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/matlab.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/mirc.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/mmix.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/modula2.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/modula3.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/mpasm.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/mxml.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/mysql.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/newlisp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/nsis.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/oberon2.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/objc.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/objeck.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/ocaml-brief.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/ocaml.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/oobas.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/oracle11.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/oracle8.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/oxygene.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/oz.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pascal.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pcre.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/per.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/perl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/perl6.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pf.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/php-brief.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/php.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pic16.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pike.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pixelbender.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pli.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/plsql.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/postgresql.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/povray.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/powerbuilder.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/powershell.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/proftpd.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/progress.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/prolog.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/properties.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/providex.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/purebasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/pycon.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/python.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/q.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/qbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/rails.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/rebol.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/reg.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/robots.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/rpmspec.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/rsplus.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/ruby.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/sas.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/scala.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/scheme.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/scilab.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/sdlbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/smalltalk.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/smarty.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/sql.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/systemverilog.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/tcl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/teraterm.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/text.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/thinbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/tsql.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/typoscript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/unicon.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/uscript.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/vala.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/vb.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/vbnet.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/verilog.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/vhdl.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/vim.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/visualfoxpro.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/visualprolog.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/whitespace.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/whois.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/winbatch.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/xbasic.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/xml.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/xorg_conf.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/xpp.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/yaml.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/z80.php extensions/SyntaxHighlight_GeSHi/geshi/geshi/zxbasic.php
Diffstat (limited to 'includes/Sanitizer.php')
-rw-r--r--includes/Sanitizer.php510
1 files changed, 346 insertions, 164 deletions
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index b443ce14..9e9ac38b 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -1,6 +1,6 @@
<?php
/**
- * XHTML sanitizer for %MediaWiki.
+ * HTML sanitizer for %MediaWiki.
*
* Copyright © 2002-2005 Brion Vibber <brion@pobox.com> et al
* http://www.mediawiki.org/
@@ -25,7 +25,7 @@
*/
/**
- * XHTML sanitizer for MediaWiki
+ * HTML sanitizer for MediaWiki
* @ingroup Parser
*/
class Sanitizer {
@@ -54,9 +54,8 @@ class Sanitizer {
* List of all named character entities defined in HTML 4.01
* http://www.w3.org/TR/html4/sgml/entities.html
* As well as &apos; which is only defined starting in XHTML1.
- * @private
*/
- static $htmlEntities = array(
+ private static $htmlEntities = array(
'Aacute' => 193,
'aacute' => 225,
'Acirc' => 194,
@@ -315,7 +314,7 @@ class Sanitizer {
/**
* Character entity aliases accepted by MediaWiki
*/
- static $htmlEntityAliases = array(
+ private static $htmlEntityAliases = array(
'רלמ' => 'rlm',
'رلم' => 'rlm',
);
@@ -323,7 +322,7 @@ class Sanitizer {
/**
* Lazy-initialised attributes regex, see getAttribsRegex()
*/
- static $attribsRegex;
+ private static $attribsRegex;
/**
* Regular expression to match HTML/XML attribute pairs within a tag.
@@ -357,51 +356,61 @@ class Sanitizer {
* removes HTML comments
* @private
* @param $text String
- * @param $processCallback Callback to do any variable or parameter replacements in HTML attribute values
- * @param $args Array for the processing callback
- * @param $extratags Array for any extra tags to include
- * @param $removetags Array for any tags (default or extra) to exclude
+ * @param $processCallback Callback to do any variable or parameter
+ * replacements in HTML attribute values
+ * @param array $args for the processing callback
+ * @param array $extratags for any extra tags to include
+ * @param array $removetags for any tags (default or extra) to exclude
* @return string
*/
- static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) {
- global $wgUseTidy;
+ static function removeHTMLtags( $text, $processCallback = null,
+ $args = array(), $extratags = array(), $removetags = array()
+ ) {
+ global $wgUseTidy, $wgAllowMicrodataAttributes, $wgAllowImageTag;
static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
wfProfileIn( __METHOD__ );
- if ( !$staticInitialised ) {
+ // Base our staticInitialised variable off of the global config state so that if the globals
+ // are changed (like in the screwed up test system) we will re-initialise the settings.
+ $globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
+ if ( !$staticInitialised || $staticInitialised != $globalContext ) {
$htmlpairsStatic = array( # Tags that must be closed
'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
'strike', 'strong', 'tt', 'var', 'div', 'center',
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
- 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
- 'kbd', 'samp'
+ 'ruby', 'rt', 'rb', 'rp', 'p', 'span', 'abbr', 'dfn',
+ 'kbd', 'samp', 'data', 'time', 'mark'
);
$htmlsingle = array(
- 'br', 'hr', 'li', 'dt', 'dd'
+ 'br', 'wbr', 'hr', 'li', 'dt', 'dd'
);
$htmlsingleonly = array( # Elements that cannot have close tags
- 'br', 'hr'
+ 'br', 'wbr', 'hr'
);
+ if ( $wgAllowMicrodataAttributes ) {
+ $htmlsingle[] = $htmlsingleonly[] = 'meta';
+ $htmlsingle[] = $htmlsingleonly[] = 'link';
+ }
$htmlnest = array( # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
- 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
+ 'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span',
+ 'var', 'kbd', 'samp', 'em', 'strong', 'q', 'ruby', 'bdo'
);
$tabletags = array( # Can only appear inside table, we will close them
'td', 'th', 'tr',
);
$htmllist = array( # Tags used by list
- 'ul','ol',
+ 'ul', 'ol',
);
$listtags = array( # Tags that can appear in a list
'li',
);
- global $wgAllowImageTag;
if ( $wgAllowImageTag ) {
$htmlsingle[] = 'img';
$htmlsingleonly[] = 'img';
@@ -416,13 +425,13 @@ class Sanitizer {
foreach ( $vars as $var ) {
$$var = array_flip( $$var );
}
- $staticInitialised = true;
+ $staticInitialised = $globalContext;
}
# Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
$extratags = array_flip( $extratags );
$removetags = array_flip( $removetags );
$htmlpairs = array_merge( $extratags, $htmlpairsStatic );
- $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ) , $removetags );
+ $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ), $removetags );
# Remove HTML comments
$text = Sanitizer::removeHTMLcomments( $text );
@@ -437,7 +446,7 @@ class Sanitizer {
# $params: String between element name and >
# $brace: Ending '>' or '/>'
# $rest: Everything until the next element of $bits
- if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) {
+ if ( preg_match( '!^(/?)([^\\s/>]+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) {
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
} else {
$slash = $t = $params = $brace = $rest = null;
@@ -498,18 +507,22 @@ class Sanitizer {
!in_array( 'table', $tagstack ) ) {
$badtag = true;
} elseif ( in_array( $t, $tagstack ) &&
- !isset( $htmlnest [$t ] ) ) {
+ !isset( $htmlnest[$t] ) ) {
$badtag = true;
# Is it a self closed htmlpair ? (bug 5487)
} elseif ( $brace == '/>' &&
isset( $htmlpairs[$t] ) ) {
$badtag = true;
} elseif ( isset( $htmlsingleonly[$t] ) ) {
- # Hack to force empty tag for uncloseable elements
+ # Hack to force empty tag for unclosable elements
$brace = '/>';
} elseif ( isset( $htmlsingle[$t] ) ) {
# Hack to not close $htmlsingle tags
$brace = null;
+ # Still need to push this optionally-closed tag to
+ # the tag stack so that we can match end tags
+ # instead of marking them as bad.
+ array_push( $tagstack, $t );
} elseif ( isset( $tabletags[$t] )
&& in_array( $t, $tagstack ) ) {
// New table tag but forgot to close the previous one
@@ -524,10 +537,14 @@ class Sanitizer {
# Replace any variables or template parameters with
# plaintext results.
- if( is_callable( $processCallback ) ) {
+ if ( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, array( &$params, $args ) );
}
+ if ( !Sanitizer::validateTag( $params, $t ) ) {
+ $badtag = true;
+ }
+
# Strip non-approved attributes from the tag
$newparams = Sanitizer::fixTagAttributes( $params, $t );
}
@@ -538,12 +555,14 @@ class Sanitizer {
continue;
}
}
- $text .= '&lt;' . str_replace( '>', '&gt;', $x);
+ $text .= '&lt;' . str_replace( '>', '&gt;', $x );
}
# Close off any remaining tags
- while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
+ while ( is_array( $tagstack ) && ( $t = array_pop( $tagstack ) ) ) {
$text .= "</$t>\n";
- if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
+ if ( $t == 'table' ) {
+ $tagstack = array_pop( $tablestack );
+ }
}
} else {
# this might be possible using tidy itself
@@ -551,16 +570,24 @@ class Sanitizer {
preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
@list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
+ $badtag = false;
if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
- if( is_callable( $processCallback ) ) {
+ if ( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, array( &$params, $args ) );
}
+
+ if ( !Sanitizer::validateTag( $params, $t ) ) {
+ $badtag = true;
+ }
+
$newparams = Sanitizer::fixTagAttributes( $params, $t );
- $rest = str_replace( '>', '&gt;', $rest );
- $text .= "<$slash$t$newparams$brace$rest";
- } else {
- $text .= '&lt;' . str_replace( '>', '&gt;', $x);
+ if ( !$badtag ) {
+ $rest = str_replace( '>', '&gt;', $rest );
+ $text .= "<$slash$t$newparams$brace$rest";
+ continue;
+ }
}
+ $text .= '&lt;' . str_replace( '>', '&gt;', $x );
}
}
wfProfileOut( __METHOD__ );
@@ -579,9 +606,9 @@ class Sanitizer {
*/
static function removeHTMLcomments( $text ) {
wfProfileIn( __METHOD__ );
- while (($start = strpos($text, '<!--')) !== false) {
- $end = strpos($text, '-->', $start + 4);
- if ($end === false) {
+ while ( ( $start = strpos( $text, '<!--' ) ) !== false ) {
+ $end = strpos( $text, '-->', $start + 4 );
+ if ( $end === false ) {
# Unterminated comment; bail out
break;
}
@@ -590,22 +617,24 @@ class Sanitizer {
# Trim space and newline if the comment is both
# preceded and followed by a newline
- $spaceStart = max($start - 1, 0);
+ $spaceStart = max( $start - 1, 0 );
$spaceLen = $end - $spaceStart;
- while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
+ while ( substr( $text, $spaceStart, 1 ) === ' ' && $spaceStart > 0 ) {
$spaceStart--;
$spaceLen++;
}
- while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
+ while ( substr( $text, $spaceStart + $spaceLen, 1 ) === ' ' ) {
$spaceLen++;
- if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
+ }
+ if ( substr( $text, $spaceStart, 1 ) === "\n"
+ && substr( $text, $spaceStart + $spaceLen, 1 ) === "\n" ) {
# Remove the comment, leading and trailing
# spaces, and leave only one newline.
- $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
+ $text = substr_replace( $text, "\n", $spaceStart, $spaceLen + 1 );
}
else {
# Remove just the comment.
- $text = substr_replace($text, '', $start, $end - $start);
+ $text = substr_replace( $text, '', $start, $end - $start );
}
}
wfProfileOut( __METHOD__ );
@@ -613,12 +642,45 @@ class Sanitizer {
}
/**
+ * Takes attribute names and values for a tag and the tag name and
+ * validates that the tag is allowed to be present.
+ * This DOES NOT validate the attributes, nor does it validate the
+ * tags themselves. This method only handles the special circumstances
+ * where we may want to allow a tag within content but ONLY when it has
+ * specific attributes set.
+ *
+ * @param $params
+ * @param $element
+ * @return bool
+ */
+ static function validateTag( $params, $element ) {
+ $params = Sanitizer::decodeTagAttributes( $params );
+
+ if ( $element == 'meta' || $element == 'link' ) {
+ if ( !isset( $params['itemprop'] ) ) {
+ // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
+ return false;
+ }
+ if ( $element == 'meta' && !isset( $params['content'] ) ) {
+ // <meta> must have a content="" for the itemprop
+ return false;
+ }
+ if ( $element == 'link' && !isset( $params['href'] ) ) {
+ // <link> must have an associated href=""
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
* Take an array of attribute names and values and normalize or discard
* illegal values for the given element type.
*
* - Discards attributes not on a whitelist for the given element
* - Unsafe style attributes are discarded
- * - Invalid id attributes are reencoded
+ * - Invalid id attributes are re-encoded
*
* @param $attribs Array
* @param $element String
@@ -638,23 +700,23 @@ class Sanitizer {
*
* - Discards attributes not the given whitelist
* - Unsafe style attributes are discarded
- * - Invalid id attributes are reencoded
+ * - Invalid id attributes are re-encoded
*
* @param $attribs Array
- * @param $whitelist Array: list of allowed attribute names
+ * @param array $whitelist list of allowed attribute names
* @return Array
*
* @todo Check for legal values where the DTD limits things.
* @todo Check for unique id attribute :P
*/
static function validateAttributes( $attribs, $whitelist ) {
- global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes, $wgHtml5;
+ global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
$whitelist = array_flip( $whitelist );
$hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
$out = array();
- foreach( $attribs as $attribute => $value ) {
+ foreach ( $attribs as $attribute => $value ) {
#allow XML namespace declaration if RDFa is enabled
if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) {
@@ -664,14 +726,14 @@ class Sanitizer {
continue;
}
- # Allow any attribute beginning with "data-", if in HTML5 mode
- if ( !($wgHtml5 && preg_match( '/^data-/i', $attribute )) && !isset( $whitelist[$attribute] ) ) {
+ # Allow any attribute beginning with "data-"
+ if ( !preg_match( '/^data-/i', $attribute ) && !isset( $whitelist[$attribute] ) ) {
continue;
}
# Strip javascript "expression" from stylesheets.
# http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
- if( $attribute == 'style' ) {
+ if ( $attribute == 'style' ) {
$value = Sanitizer::checkCss( $value );
}
@@ -679,13 +741,28 @@ class Sanitizer {
$value = Sanitizer::escapeId( $value, 'noninitial' );
}
- //RDFa and microdata properties allow URLs, URIs and/or CURIs. check them for sanity
- if ( $attribute === 'rel' || $attribute === 'rev' ||
- $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || #RDFa
- $attribute === 'datatype' || $attribute === 'typeof' || #RDFa
- $attribute === 'itemid' || $attribute === 'itemprop' || $attribute === 'itemref' || #HTML5 microdata
- $attribute === 'itemscope' || $attribute === 'itemtype' ) { #HTML5 microdata
+ # WAI-ARIA
+ # http://www.w3.org/TR/wai-aria/
+ # http://www.whatwg.org/html/elements.html#wai-aria
+ # For now we only support role="presentation" until we work out what roles should be
+ # usable by content and we ensure that our code explicitly rejects patterns that
+ # violate HTML5's ARIA restrictions.
+ if ( $attribute === 'role' && $value !== 'presentation' ) {
+ continue;
+ }
+ // RDFa and microdata properties allow URLs, URIs and/or CURIs.
+ // Check them for sanity.
+ if ( $attribute === 'rel' || $attribute === 'rev'
+ # RDFa
+ || $attribute === 'about' || $attribute === 'property'
+ || $attribute === 'resource' || $attribute === 'datatype'
+ || $attribute === 'typeof'
+ # HTML5 microdata
+ || $attribute === 'itemid' || $attribute === 'itemprop'
+ || $attribute === 'itemref' || $attribute === 'itemscope'
+ || $attribute === 'itemtype'
+ ) {
//Paranoia. Allow "simple" values but suppress javascript
if ( preg_match( self::EVIL_URI_PATTERN, $value ) ) {
continue;
@@ -697,7 +774,7 @@ class Sanitizer {
if ( $attribute === 'href' || $attribute === 'src' ) {
if ( !preg_match( $hrefExp, $value ) ) {
continue; //drop any href or src attributes not using an allowed protocol.
- //NOTE: this also drops all relative URLs
+ // NOTE: this also drops all relative URLs
}
}
@@ -713,7 +790,7 @@ class Sanitizer {
unset( $out['itemid'] );
unset( $out['itemref'] );
}
- # TODO: Strip itemprop if we aren't descendants of an itemscope.
+ # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
}
return $out;
}
@@ -730,9 +807,10 @@ class Sanitizer {
*/
static function mergeAttributes( $a, $b ) {
$out = array_merge( $a, $b );
- if( isset( $a['class'] ) && isset( $b['class'] )
- && is_string( $a['class'] ) && is_string( $b['class'] )
- && $a['class'] !== $b['class'] ) {
+ if ( isset( $a['class'] ) && isset( $b['class'] )
+ && is_string( $a['class'] ) && is_string( $b['class'] )
+ && $a['class'] !== $b['class']
+ ) {
$classes = preg_split( '/\s+/', "{$a['class']} {$b['class']}",
-1, PREG_SPLIT_NO_EMPTY );
$out['class'] = implode( ' ', array_unique( $classes ) );
@@ -743,9 +821,10 @@ class Sanitizer {
/**
* Pick apart some CSS and check it for forbidden or unsafe structures.
* Returns a sanitized string. This sanitized string will have
- * character references and escape sequences decoded, and comments
- * stripped. If the input is just too evil, only a comment complaining
- * about evilness will be returned.
+ * character references and escape sequences decoded and comments
+ * stripped (unless it is itself one valid comment, in which case the value
+ * will be passed through). If the input is just too evil, only a comment
+ * complaining about evilness will be returned.
*
* Currently URL references, 'expression', 'tps' are forbidden.
*
@@ -786,25 +865,77 @@ class Sanitizer {
$value = preg_replace_callback( $decodeRegex,
array( __CLASS__, 'cssDecodeCallback' ), $value );
- // Remove any comments; IE gets token splitting wrong
- // This must be done AFTER decoding character references and
- // escape sequences, because those steps can introduce comments
- // This step cannot introduce character references or escape
- // sequences, because it replaces comments with spaces rather
- // than removing them completely.
- $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
-
- // Remove anything after a comment-start token, to guard against
- // incorrect client implementations.
- $commentPos = strpos( $value, '/*' );
- if ( $commentPos !== false ) {
- $value = substr( $value, 0, $commentPos );
+ // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
+ $value = preg_replace_callback(
+ '/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (bug 58088)
+ function ( $matches ) {
+ $cp = utf8ToCodepoint( $matches[0] );
+ if ( $cp === false ) {
+ return '';
+ }
+ return chr( $cp - 65248 ); // ASCII range \x21-\x7A
+ },
+ $value
+ );
+
+ // Convert more characters IE6 might treat as ascii
+ // U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D
+ $value = str_replace(
+ array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ),
+ array( 'r', 'n', 'n', 'l', 'i', '(', '(' ),
+ $value
+ );
+
+ // Let the value through if it's nothing but a single comment, to
+ // allow other functions which may reject it to pass some error
+ // message through.
+ if ( !preg_match( '! ^ \s* /\* [^*\\/]* \*/ \s* $ !x', $value ) ) {
+ // Remove any comments; IE gets token splitting wrong
+ // This must be done AFTER decoding character references and
+ // escape sequences, because those steps can introduce comments
+ // This step cannot introduce character references or escape
+ // sequences, because it replaces comments with spaces rather
+ // than removing them completely.
+ $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
+
+ // Remove anything after a comment-start token, to guard against
+ // incorrect client implementations.
+ $commentPos = strpos( $value, '/*' );
+ if ( $commentPos !== false ) {
+ $value = substr( $value, 0, $commentPos );
+ }
}
+ // S followed by repeat, iteration, or prolonged sound marks,
+ // which IE will treat as "ss"
+ $value = preg_replace(
+ '/s(?:
+ \xE3\x80\xB1 | # U+3031
+ \xE3\x82\x9D | # U+309D
+ \xE3\x83\xBC | # U+30FC
+ \xE3\x83\xBD | # U+30FD
+ \xEF\xB9\xBC | # U+FE7C
+ \xEF\xB9\xBD | # U+FE7D
+ \xEF\xBD\xB0 # U+FF70
+ )/ix',
+ 'ss',
+ $value
+ );
+
// Reject problematic keywords and control characters
- if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) {
+ if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
return '/* invalid control char */';
- } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix', $value ) ) {
+ } elseif ( preg_match(
+ '! expression
+ | filter\s*:
+ | accelerator\s*:
+ | -o-link\s*:
+ | -o-link-source\s*:
+ | -o-replace\s*:
+ | url\s*\(
+ | image\s*\(
+ | image-set\s*\(
+ !ix', $value ) ) {
return '/* insecure input */';
}
return $value;
@@ -855,21 +986,14 @@ class Sanitizer {
* @return String
*/
static function fixTagAttributes( $text, $element ) {
- if( trim( $text ) == '' ) {
+ if ( trim( $text ) == '' ) {
return '';
}
$decoded = Sanitizer::decodeTagAttributes( $text );
$stripped = Sanitizer::validateTagAttributes( $decoded, $element );
- $attribs = array();
- foreach( $stripped as $attribute => $value ) {
- $encAttribute = htmlspecialchars( $attribute );
- $encValue = Sanitizer::safeEncodeAttribute( $value );
-
- $attribs[] = "$encAttribute=\"$encValue\"";
- }
- return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+ return Sanitizer::safeEncodeTagAttributes( $stripped );
}
/**
@@ -942,10 +1066,10 @@ class Sanitizer {
* in the id and
* name attributes
* @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
- * @see http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#the-id-attribute
+ * @see http://www.whatwg.org/html/elements.html#the-id-attribute
* HTML5 definition of id attribute
*
- * @param $id String: id to escape
+ * @param string $id id to escape
* @param $options Mixed: string or array of strings (default is array()):
* 'noninitial': This is a non-initial fragment of an id, not a full id,
* so don't pay attention if the first character isn't valid at the
@@ -957,10 +1081,10 @@ class Sanitizer {
* @return String
*/
static function escapeId( $id, $options = array() ) {
- global $wgHtml5, $wgExperimentalHtmlIds;
+ global $wgExperimentalHtmlIds;
$options = (array)$options;
- if ( $wgHtml5 && $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) {
+ if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) {
$id = Sanitizer::decodeCharReferences( $id );
$id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
$id = trim( $id, '_' );
@@ -982,7 +1106,7 @@ class Sanitizer {
$id = str_replace( array_keys( $replace ), array_values( $replace ), $id );
if ( !preg_match( '/^[a-zA-Z]/', $id )
- && !in_array( 'noninitial', $options ) ) {
+ && !in_array( 'noninitial', $options ) ) {
// Initial character must be a letter!
$id = "x$id";
}
@@ -1002,17 +1126,17 @@ class Sanitizer {
*/
static function escapeClass( $class ) {
// Convert ugly stuff to underscores and kill underscores in ugly places
- return rtrim(preg_replace(
- array('/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/','/_+/'),
+ return rtrim( preg_replace(
+ array( '/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/', '/_+/' ),
'_',
- $class ), '_');
+ $class ), '_' );
}
/**
- * Given HTML input, escape with htmlspecialchars but un-escape entites.
+ * Given HTML input, escape with htmlspecialchars but un-escape entities.
* This allows (generally harmless) entities like &#160; to survive.
*
- * @param $html String to escape
+ * @param string $html to escape
* @return String: escaped input
*/
static function escapeHtmlAllowEntities( $html ) {
@@ -1041,13 +1165,13 @@ class Sanitizer {
* @return Array
*/
public static function decodeTagAttributes( $text ) {
- if( trim( $text ) == '' ) {
+ if ( trim( $text ) == '' ) {
return array();
}
$attribs = array();
$pairs = array();
- if( !preg_match_all(
+ if ( !preg_match_all(
self::getAttribsRegex(),
$text,
$pairs,
@@ -1055,7 +1179,7 @@ class Sanitizer {
return $attribs;
}
- foreach( $pairs as $set ) {
+ foreach ( $pairs as $set ) {
$attribute = strtolower( $set[1] );
$value = Sanitizer::getTagAttributeCallback( $set );
@@ -1070,26 +1194,45 @@ class Sanitizer {
}
/**
+ * Build a partial tag string from an associative array of attribute
+ * names and values as returned by decodeTagAttributes.
+ *
+ * @param $assoc_array Array
+ * @return String
+ */
+ public static function safeEncodeTagAttributes( $assoc_array ) {
+ $attribs = array();
+ foreach ( $assoc_array as $attribute => $value ) {
+ $encAttribute = htmlspecialchars( $attribute );
+ $encValue = Sanitizer::safeEncodeAttribute( $value );
+
+ $attribs[] = "$encAttribute=\"$encValue\"";
+ }
+ return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+ }
+
+ /**
* Pick the appropriate attribute value from a match set from the
* attribs regex matches.
*
* @param $set Array
+ * @throws MWException
* @return String
*/
private static function getTagAttributeCallback( $set ) {
- if( isset( $set[6] ) ) {
+ if ( isset( $set[6] ) ) {
# Illegal #XXXXXX color with no quotes.
return $set[6];
- } elseif( isset( $set[5] ) ) {
+ } elseif ( isset( $set[5] ) ) {
# No quotes.
return $set[5];
- } elseif( isset( $set[4] ) ) {
+ } elseif ( isset( $set[4] ) ) {
# Single-quoted
return $set[4];
- } elseif( isset( $set[3] ) ) {
+ } elseif ( isset( $set[3] ) ) {
# Double-quoted
return $set[3];
- } elseif( !isset( $set[2] ) ) {
+ } elseif ( !isset( $set[2] ) ) {
# In XHTML, attributes must have a value.
# For 'reduced' form, return explicitly the attribute name here.
return $set[1];
@@ -1165,14 +1308,14 @@ class Sanitizer {
*/
static function normalizeCharReferencesCallback( $matches ) {
$ret = null;
- if( $matches[1] != '' ) {
+ if ( $matches[1] != '' ) {
$ret = Sanitizer::normalizeEntity( $matches[1] );
- } elseif( $matches[2] != '' ) {
+ } elseif ( $matches[2] != '' ) {
$ret = Sanitizer::decCharReference( $matches[2] );
- } elseif( $matches[3] != '' ) {
+ } elseif ( $matches[3] != '' ) {
$ret = Sanitizer::hexCharReference( $matches[3] );
}
- if( is_null( $ret ) ) {
+ if ( is_null( $ret ) ) {
return htmlspecialchars( $matches[0] );
} else {
return $ret;
@@ -1208,7 +1351,7 @@ class Sanitizer {
*/
static function decCharReference( $codepoint ) {
$point = intval( $codepoint );
- if( Sanitizer::validateCodepoint( $point ) ) {
+ if ( Sanitizer::validateCodepoint( $point ) ) {
return sprintf( '&#%d;', $point );
} else {
return null;
@@ -1221,7 +1364,7 @@ class Sanitizer {
*/
static function hexCharReference( $codepoint ) {
$point = hexdec( $codepoint );
- if( Sanitizer::validateCodepoint( $point ) ) {
+ if ( Sanitizer::validateCodepoint( $point ) ) {
return sprintf( '&#x%x;', $point );
} else {
return null;
@@ -1234,12 +1377,12 @@ class Sanitizer {
* @return Boolean
*/
private static function validateCodepoint( $codepoint ) {
- return ($codepoint == 0x09)
- || ($codepoint == 0x0a)
- || ($codepoint == 0x0d)
- || ($codepoint >= 0x20 && $codepoint <= 0xd7ff)
- || ($codepoint >= 0xe000 && $codepoint <= 0xfffd)
- || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
+ return $codepoint == 0x09
+ || $codepoint == 0x0a
+ || $codepoint == 0x0d
+ || ( $codepoint >= 0x20 && $codepoint <= 0xd7ff )
+ || ( $codepoint >= 0xe000 && $codepoint <= 0xfffd )
+ || ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff );
}
/**
@@ -1263,7 +1406,7 @@ class Sanitizer {
* This is useful for page titles, not for text to be displayed,
* MediaWiki allows HTML entities to escape normalization as a feature.
*
- * @param $text String (already normalized, containing entities)
+ * @param string $text (already normalized, containing entities)
* @return String (still normalized, without entities)
*/
public static function decodeCharReferencesAndNormalize( $text ) {
@@ -1285,12 +1428,12 @@ class Sanitizer {
* @return String
*/
static function decodeCharReferencesCallback( $matches ) {
- if( $matches[1] != '' ) {
+ if ( $matches[1] != '' ) {
return Sanitizer::decodeEntity( $matches[1] );
- } elseif( $matches[2] != '' ) {
- return Sanitizer::decodeChar( intval( $matches[2] ) );
- } elseif( $matches[3] != '' ) {
- return Sanitizer::decodeChar( hexdec( $matches[3] ) );
+ } elseif ( $matches[2] != '' ) {
+ return Sanitizer::decodeChar( intval( $matches[2] ) );
+ } elseif ( $matches[3] != '' ) {
+ return Sanitizer::decodeChar( hexdec( $matches[3] ) );
}
# Last case should be an ampersand by itself
return $matches[0];
@@ -1304,7 +1447,7 @@ class Sanitizer {
* @private
*/
static function decodeChar( $codepoint ) {
- if( Sanitizer::validateCodepoint( $codepoint ) ) {
+ if ( Sanitizer::validateCodepoint( $codepoint ) ) {
return codepointToUtf8( $codepoint );
} else {
return UTF8_REPLACEMENT;
@@ -1323,7 +1466,7 @@ class Sanitizer {
if ( isset( self::$htmlEntityAliases[$name] ) ) {
$name = self::$htmlEntityAliases[$name];
}
- if( isset( self::$htmlEntities[$name] ) ) {
+ if ( isset( self::$htmlEntities[$name] ) ) {
return codepointToUtf8( self::$htmlEntities[$name] );
} else {
return "&$name;";
@@ -1337,10 +1480,7 @@ class Sanitizer {
* @return Array
*/
static function attributeWhitelist( $element ) {
- static $list;
- if( !isset( $list ) ) {
- $list = Sanitizer::setupAttributeWhitelist();
- }
+ $list = Sanitizer::setupAttributeWhitelist();
return isset( $list[$element] )
? $list[$element]
: array();
@@ -1352,41 +1492,62 @@ class Sanitizer {
* @return Array
*/
static function setupAttributeWhitelist() {
- global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowMicrodataAttributes;
+ global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
+
+ static $whitelist, $staticInitialised;
+ $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgAllowMicrodataAttributes' ) );
- $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
+ if ( isset( $whitelist ) && $staticInitialised == $globalContext ) {
+ return $whitelist;
+ }
+
+ $common = array(
+ # HTML
+ 'id',
+ 'class',
+ 'style',
+ 'lang',
+ 'dir',
+ 'title',
+
+ # WAI-ARIA
+ 'role',
+ );
if ( $wgAllowRdfaAttributes ) {
- #RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
+ # RDFa attributes as specified in section 9 of
+ # http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
$common = array_merge( $common, array(
- 'about', 'property', 'resource', 'datatype', 'typeof',
+ 'about', 'property', 'resource', 'datatype', 'typeof',
) );
}
- if ( $wgHtml5 && $wgAllowMicrodataAttributes ) {
- # add HTML5 microdata tages as pecified by http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata.html#the-microdata-model
+ if ( $wgAllowMicrodataAttributes ) {
+ # add HTML5 microdata tags as specified by
+ # http://www.whatwg.org/html/microdata.html#the-microdata-model
$common = array_merge( $common, array(
- 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
+ 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
) );
}
$block = array_merge( $common, array( 'align' ) );
$tablealign = array( 'align', 'char', 'charoff', 'valign' );
- $tablecell = array( 'abbr',
- 'axis',
- 'headers',
- 'scope',
- 'rowspan',
- 'colspan',
- 'nowrap', # deprecated
- 'width', # deprecated
- 'height', # deprecated
- 'bgcolor' # deprecated
- );
+ $tablecell = array(
+ 'abbr',
+ 'axis',
+ 'headers',
+ 'scope',
+ 'rowspan',
+ 'colspan',
+ 'nowrap', # deprecated
+ 'width', # deprecated
+ 'height', # deprecated
+ 'bgcolor', # deprecated
+ );
# Numbers refer to sections in HTML 4.01 standard describing the element.
# See: http://www.w3.org/TR/html4/
- $whitelist = array (
+ $whitelist = array(
# 7.5.4
'div' => $block,
'center' => $common, # deprecated
@@ -1432,6 +1593,9 @@ class Sanitizer {
# 9.3.2
'br' => array( 'id', 'class', 'title', 'style', 'clear' ),
+ # http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
+ 'wbr' => array( 'id', 'class', 'title', 'style' ),
+
# 9.3.4
'pre' => array_merge( $common, array( 'width' ) ),
@@ -1475,7 +1639,9 @@ class Sanitizer {
'td' => array_merge( $common, $tablecell, $tablealign ),
'th' => array_merge( $common, $tablecell, $tablealign ),
- # 12.2 # NOTE: <a> is not allowed directly, but the attrib whitelist is used from the Parser object
+ # 12.2
+ # NOTE: <a> is not allowed directly, but the attrib
+ # whitelist is used from the Parser object
'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa
# 13.2
@@ -1501,8 +1667,8 @@ class Sanitizer {
# 15.3
'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
- # XHTML Ruby annotation text module, simple ruby only.
- # http://www.w3c.org/TR/ruby/
+ # HTML Ruby annotation text module, simple ruby only.
+ # http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
'ruby' => $common,
# rbc
# rtc
@@ -1518,7 +1684,23 @@ class Sanitizer {
# HTML 5 section 4.6
'bdi' => $common,
- );
+ # HTML5 elements, defined by:
+ # http://www.whatwg.org/html/
+ 'data' => array_merge( $common, array( 'value' ) ),
+ 'time' => array_merge( $common, array( 'datetime' ) ),
+ 'mark' => $common,
+
+ // meta and link are only permitted by removeHTMLtags when Microdata
+ // is enabled so we don't bother adding a conditional to hide these
+ // Also meta and link are only valid in WikiText as Microdata elements
+ // (ie: validateTag rejects tags missing the attributes needed for Microdata)
+ // So we don't bother including $common attributes that have no purpose.
+ 'meta' => array( 'itemprop', 'content' ),
+ 'link' => array( 'itemprop', 'href' ),
+ );
+
+ $staticInitialised = $globalContext;
+
return $whitelist;
}
@@ -1529,7 +1711,7 @@ class Sanitizer {
* Warning: this return value must be further escaped for literal
* inclusion in HTML output as of 1.10!
*
- * @param $text String: HTML fragment
+ * @param string $text HTML fragment
* @return String
*/
static function stripAllTags( $text ) {
@@ -1554,7 +1736,7 @@ class Sanitizer {
*/
static function hackDocType() {
$out = "<!DOCTYPE html [\n";
- foreach( self::$htmlEntities as $entity => $codepoint ) {
+ foreach ( self::$htmlEntities as $entity => $codepoint ) {
$out .= "<!ENTITY $entity \"&#$codepoint;\">";
}
$out .= "]>\n";
@@ -1576,7 +1758,7 @@ class Sanitizer {
# Validate hostname portion
$matches = array();
- if( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
+ if ( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
list( /* $whole */, $protocol, $host, $rest ) = $matches;
// Characters that will be ignored in IDNs.
@@ -1620,7 +1802,7 @@ class Sanitizer {
* Does a string look like an e-mail address?
*
* This validates an email address using an HTML5 specification found at:
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/states-of-the-type-attribute.html#valid-e-mail-address
+ * http://www.whatwg.org/html/states-of-the-type-attribute.html#valid-e-mail-address
* Which as of 2011-01-24 says:
*
* A valid e-mail address is a string that matches the ABNF production
@@ -1641,20 +1823,20 @@ class Sanitizer {
*
* @since 1.18
*
- * @param $addr String E-mail address
+ * @param string $addr E-mail address
* @return Bool
*/
public static function validateEmail( $addr ) {
$result = null;
- if( !wfRunHooks( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
+ if ( !wfRunHooks( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
return $result;
}
// Please note strings below are enclosed in brackets [], this make the
// hyphen "-" a range indicator. Hence it is double backslashed below.
// See bug 26948
- $rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~" ;
- $rfc1034_ldh_str = "a-z0-9\\-" ;
+ $rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~";
+ $rfc1034_ldh_str = "a-z0-9\\-";
$HTML5_email_regexp = "/
^ # start of string
@@ -1663,8 +1845,8 @@ class Sanitizer {
[$rfc1034_ldh_str]+ # First domain part
(\\.[$rfc1034_ldh_str]+)* # Following part prefixed with a dot
$ # End of string
- /ix" ; // case Insensitive, eXtended
+ /ix"; // case Insensitive, eXtended
- return (bool) preg_match( $HTML5_email_regexp, $addr );
+ return (bool)preg_match( $HTML5_email_regexp, $addr );
}
}