summaryrefslogtreecommitdiff
path: root/includes/libs
diff options
context:
space:
mode:
Diffstat (limited to 'includes/libs')
-rw-r--r--includes/libs/CSSJanus.php32
-rw-r--r--includes/libs/CSSMin.php12
-rw-r--r--includes/libs/IEContentAnalyzer.php79
-rw-r--r--includes/libs/IEUrlExtension.php58
-rw-r--r--includes/libs/JavaScriptMinifier.php49
-rw-r--r--includes/libs/jsminplus.php223
6 files changed, 286 insertions, 167 deletions
diff --git a/includes/libs/CSSJanus.php b/includes/libs/CSSJanus.php
index aa04bc49..c8fc296b 100644
--- a/includes/libs/CSSJanus.php
+++ b/includes/libs/CSSJanus.php
@@ -22,7 +22,9 @@
* written for LTR to RTL.
*
* The original Python version of CSSJanus is Copyright 2008 by Google Inc. and
- * is distributed under the Apache license.
+ * is distributed under the Apache license. This PHP port is Copyright 2010 by
+ * Roan Kattouw and is dual-licensed under the GPL (as in the comment above) and
+ * the Apache (as in the original code) licenses.
*
* Original code: http://code.google.com/p/cssjanus/source/browse/trunk/cssjanus.py
* License of original code: http://code.google.com/p/cssjanus/source/browse/trunk/LICENSE
@@ -111,8 +113,8 @@ class CSSJanus {
$patterns['four_notation_color'] = "/(-color\s*:\s*){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}/i";
// The two regexes below are parenthesized differently then in the original implementation to make the
// callback's job more straightforward
- $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)({$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
- $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)({$patterns['num']})(%)/";
+ $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)(-?{$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
+ $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)(-?{$patterns['num']})(%)/";
}
/**
@@ -173,6 +175,8 @@ class CSSJanus {
*
* See http://code.google.com/p/cssjanus/issues/detail?id=15 and
* TODO: URL
+ * @param $css string
+ * @return string
*/
private static function fixDirection( $css ) {
$css = preg_replace( self::$patterns['direction_ltr'],
@@ -185,6 +189,8 @@ class CSSJanus {
/**
* Replace 'ltr' with 'rtl' and vice versa in background URLs
+ * @param $css string
+ * @return string
*/
private static function fixLtrRtlInURL( $css ) {
$css = preg_replace( self::$patterns['ltr_in_url'], self::$patterns['tmpToken'], $css );
@@ -196,6 +202,8 @@ class CSSJanus {
/**
* Replace 'left' with 'right' and vice versa in background URLs
+ * @param $css string
+ * @return string
*/
private static function fixLeftRightInURL( $css ) {
$css = preg_replace( self::$patterns['left_in_url'], self::$patterns['tmpToken'], $css );
@@ -207,6 +215,8 @@ class CSSJanus {
/**
* Flip rules like left: , padding-right: , etc.
+ * @param $css string
+ * @return string
*/
private static function fixLeftAndRight( $css ) {
$css = preg_replace( self::$patterns['left'], self::$patterns['tmpToken'], $css );
@@ -218,6 +228,8 @@ class CSSJanus {
/**
* Flip East and West in rules like cursor: nw-resize;
+ * @param $css string
+ * @return string
*/
private static function fixCursorProperties( $css ) {
$css = preg_replace( self::$patterns['cursor_east'],
@@ -237,6 +249,8 @@ class CSSJanus {
* and four-part color rules with multiple whitespace characters between
* colors are not recognized.
* See http://code.google.com/p/cssjanus/issues/detail?id=16
+ * @param $css string
+ * @return string
*/
private static function fixFourPartNotation( $css ) {
$css = preg_replace( self::$patterns['four_notation_quantity'], '$1$2$7$4$5$6$3', $css );
@@ -247,6 +261,8 @@ class CSSJanus {
/**
* Flip horizontal background percentages.
+ * @param $css string
+ * @return string
*/
private static function fixBackgroundPosition( $css ) {
$css = preg_replace_callback( self::$patterns['bg_horizontal_percentage'],
@@ -259,6 +275,8 @@ class CSSJanus {
/**
* Callback for calculateNewBackgroundPosition()
+ * @param $matches array
+ * @return string
*/
private static function calculateNewBackgroundPosition( $matches ) {
return $matches[1] . ( 100 - $matches[2] ) . $matches[3];
@@ -295,6 +313,10 @@ class CSSJanus_Tokenizer {
return preg_replace_callback( $this->regex, array( $this, 'tokenizeCallback' ), $str );
}
+ /**
+ * @param $matches array
+ * @return string
+ */
private function tokenizeCallback( $matches ) {
$this->originals[] = $matches[0];
return $this->token;
@@ -314,6 +336,10 @@ class CSSJanus_Tokenizer {
array( $this, 'detokenizeCallback' ), $str );
}
+ /**
+ * @param $matches
+ * @return mixed
+ */
private function detokenizeCallback( $matches ) {
$retval = current( $this->originals );
next( $this->originals );
diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php
index 4012b695..4f4b28bb 100644
--- a/includes/libs/CSSMin.php
+++ b/includes/libs/CSSMin.php
@@ -1,5 +1,5 @@
<?php
-/*
+/**
* Copyright 2010 Wikimedia Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -79,6 +79,10 @@ class CSSMin {
return $files;
}
+ /**
+ * @param $file string
+ * @return bool|string
+ */
protected static function getMimeType( $file ) {
$realpath = realpath( $file );
// Try a couple of different ways to get the mime-type of a file, in order of
@@ -112,10 +116,10 @@ class CSSMin {
* @param $source string CSS data to remap
* @param $local string File path where the source was read from
* @param $remote string URL path to the file
- * @param $embed ???
+ * @param $embedData bool If false, never do any data URI embedding, even if / * @embed * / is found
* @return string Remapped CSS data
*/
- public static function remap( $source, $local, $remote, $embed = true ) {
+ public static function remap( $source, $local, $remote, $embedData = true ) {
$pattern = '/((?P<embed>\s*\/\*\s*\@embed\s*\*\/)(?P<pre>[^\;\}]*))?' .
self::URL_REGEX . '(?P<post>[^;]*)[\;]?/';
$offset = 0;
@@ -162,7 +166,7 @@ class CSSMin {
// using Z for the timezone, meaning GMT
$url .= '?' . gmdate( 'Y-m-d\TH:i:s\Z', round( filemtime( $file ), -2 ) );
// Embedding requires a bit of extra processing, so let's skip that if we can
- if ( $embed ) {
+ if ( $embedData && $embed ) {
$type = self::getMimeType( $file );
// Detect when URLs were preceeded with embed tags, and also verify file size is
// below the limit
diff --git a/includes/libs/IEContentAnalyzer.php b/includes/libs/IEContentAnalyzer.php
index a2ef1a09..01e72e68 100644
--- a/includes/libs/IEContentAnalyzer.php
+++ b/includes/libs/IEContentAnalyzer.php
@@ -1,19 +1,19 @@
<?php
/**
- * This class simulates Microsoft Internet Explorer's terribly broken and
+ * This class simulates Microsoft Internet Explorer's terribly broken and
* insecure MIME type detection algorithm. It can be used to check web uploads
- * with an apparently safe type, to see if IE will reinterpret them to produce
+ * with an apparently safe type, to see if IE will reinterpret them to produce
* something dangerous.
*
- * It is full of bugs and strange design choices should not under any
- * circumstances be used to determine a MIME type to present to a user or
+ * It is full of bugs and strange design choices should not under any
+ * circumstances be used to determine a MIME type to present to a user or
* client. (Apple Safari developers, this means you too.)
*
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
- * attempted to ensure that this code works in exactly the same way as Internet
- * Explorer, it does not share any source code, or creative choices such as
- * variable names, thus I (Tim Starling) claim copyright on it.
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
+ * attempted to ensure that this code works in exactly the same way as Internet
+ * Explorer, it does not share any source code, or creative choices such as
+ * variable names, thus I (Tim Starling) claim copyright on it.
*
* It may be redistributed without restriction. To aid reuse, this class does
* not depend on any MediaWiki module.
@@ -24,8 +24,8 @@ class IEContentAnalyzer {
*/
protected $baseTypeTable = array(
'ambiguous' /*1*/ => array(
- 'text/plain',
- 'application/octet-stream',
+ 'text/plain',
+ 'application/octet-stream',
'application/x-netcdf', // [sic]
),
'text' /*3*/ => array(
@@ -34,8 +34,8 @@ class IEContentAnalyzer {
),
'binary' /*4*/ => array(
'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
- 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
- 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
+ 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
+ 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
'application/x-msdownload'
@@ -293,21 +293,21 @@ class IEContentAnalyzer {
'.xsl' => 'text/xml',
);
- /**
- * IE versions which have been analysed to bring you this class, and for
- * which some substantive difference exists. These will appear as keys
+ /**
+ * IE versions which have been analysed to bring you this class, and for
+ * which some substantive difference exists. These will appear as keys
* in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
*/
protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
/**
- * Type table with versions expanded
+ * Type table with versions expanded
*/
protected $typeTable = array();
/** constructor */
function __construct() {
- // Construct versioned type arrays from the base type array plus additions
+ // Construct versioned type arrays from the base type array plus additions
$types = $this->baseTypeTable;
foreach ( $this->versions as $version ) {
if ( isset( $this->addedTypes[$version] ) ) {
@@ -320,7 +320,7 @@ class IEContentAnalyzer {
}
/**
- * Get the MIME types from getMimesFromData(), but convert the result from IE's
+ * Get the MIME types from getMimesFromData(), but convert the result from IE's
* idiosyncratic private types into something other apps will understand.
*
* @param $fileName String: the file name (unused at present)
@@ -338,6 +338,8 @@ class IEContentAnalyzer {
/**
* Translate a MIME type from IE's idiosyncratic private types into
* more commonly understood type strings
+ * @param $type
+ * @return string
*/
public function translateMimeType( $type ) {
static $table = array(
@@ -375,6 +377,11 @@ class IEContentAnalyzer {
/**
* Get the MIME type for a given named version
+ * @param $version
+ * @param $fileName
+ * @param $chunk
+ * @param $proposed
+ * @return bool|string
*/
protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
// Strip text after a semicolon
@@ -397,8 +404,8 @@ class IEContentAnalyzer {
// Truncate chunk at 255 bytes
$chunk = substr( $chunk, 0, 255 );
- // IE does the Check*Headers() calls last, and instead does the following image
- // type checks by directly looking for the magic numbers. What I do here should
+ // IE does the Check*Headers() calls last, and instead does the following image
+ // type checks by directly looking for the magic numbers. What I do here should
// have the same effect since the magic number checks are identical in both cases.
$result = $this->sampleData( $version, $chunk );
$sampleFound = $result['found'];
@@ -413,7 +420,7 @@ class IEContentAnalyzer {
return 'image/gif';
}
if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
- && $binaryType == 'image/pjpeg' )
+ && $binaryType == 'image/pjpeg' )
{
return $proposed;
}
@@ -430,7 +437,7 @@ class IEContentAnalyzer {
return 'application/x-cdf';
}
- // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
+ // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
// previous versions
if ( isset( $sampleFound['rss'] ) ) {
return 'application/rss+xml';
@@ -483,8 +490,8 @@ class IEContentAnalyzer {
// Freaky heuristics to determine if the data is text or binary
// The heuristic is of course broken for non-ASCII text
- if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
- < ( $counters['ctrl'] + $counters['high'] ) * 16 )
+ if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
+ < ( $counters['ctrl'] + $counters['high'] ) * 16 )
{
$kindOfBinary = true;
$type = $binaryType ? $binaryType : $textType;
@@ -529,8 +536,8 @@ class IEContentAnalyzer {
return $this->registry[$ext];
}
- // TODO: If the extension has an application registered to it, IE will return
- // application/octet-stream. We'll skip that, so we could erroneously
+ // TODO: If the extension has an application registered to it, IE will return
+ // application/octet-stream. We'll skip that, so we could erroneously
// return text/plain or application/x-netcdf where application/octet-stream
// would be correct.
@@ -540,6 +547,9 @@ class IEContentAnalyzer {
/**
* Check for text headers at the start of the chunk
* Confirmed same in 5 and 7.
+ * @param $version
+ * @param $chunk
+ * @return bool|string
*/
private function checkTextHeaders( $version, $chunk ) {
$chunk2 = substr( $chunk, 0, 2 );
@@ -563,6 +573,9 @@ class IEContentAnalyzer {
/**
* Check for binary headers at the start of the chunk
* Confirmed same in 5 and 7.
+ * @param $version
+ * @param $chunk
+ * @return bool|string
*/
private function checkBinaryHeaders( $version, $chunk ) {
$chunk2 = substr( $chunk, 0, 2 );
@@ -578,13 +591,13 @@ class IEContentAnalyzer {
return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
}
- if ( $chunk2 == 'BM'
+ if ( $chunk2 == 'BM'
&& substr( $chunk, 6, 2 ) == "\000\000"
&& substr( $chunk, 8, 2 ) == "\000\000" )
{
return 'image/bmp'; // another non-standard MIME
}
- if ( $chunk4 == 'RIFF'
+ if ( $chunk4 == 'RIFF'
&& substr( $chunk, 8, 4 ) == 'WAVE' )
{
return 'audio/wav';
@@ -661,6 +674,9 @@ class IEContentAnalyzer {
/**
* Do heuristic checks on the bulk of the data sample.
* Search for HTML tags.
+ * @param $version
+ * @param $chunk
+ * @return array
*/
protected function sampleData( $version, $chunk ) {
$found = array();
@@ -774,7 +790,7 @@ class IEContentAnalyzer {
}
if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
- if ( isset( $found['rdf-tag'] )
+ if ( isset( $found['rdf-tag'] )
&& isset( $found['rdf-url'] ) ) // [sic]
{
break;
@@ -808,6 +824,11 @@ class IEContentAnalyzer {
return array( 'found' => $found, 'counters' => $counters );
}
+ /**
+ * @param $version
+ * @param $type
+ * @return int|string
+ */
protected function getDataFormat( $version, $type ) {
$types = $this->typeTable[$version];
if ( $type == '(null)' || strval( $type ) === '' ) {
diff --git a/includes/libs/IEUrlExtension.php b/includes/libs/IEUrlExtension.php
index 100454d4..e00e6663 100644
--- a/includes/libs/IEUrlExtension.php
+++ b/includes/libs/IEUrlExtension.php
@@ -1,31 +1,31 @@
<?php
/**
- * Internet Explorer derives a cache filename from a URL, and then in certain
- * circumstances, uses the extension of the resulting file to determine the
- * content type of the data, ignoring the Content-Type header.
+ * Internet Explorer derives a cache filename from a URL, and then in certain
+ * circumstances, uses the extension of the resulting file to determine the
+ * content type of the data, ignoring the Content-Type header.
*
* This can be a problem, especially when non-HTML content is sent by MediaWiki,
* and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
*
- * Usually the script filename (e.g. api.php) is present in the URL, and this
+ * Usually the script filename (e.g. api.php) is present in the URL, and this
* makes Internet Explorer think the extension is a harmless script extension.
- * But Internet Explorer 6 and earlier allows the script extension to be
- * obscured by encoding the dot as "%2E".
+ * But Internet Explorer 6 and earlier allows the script extension to be
+ * obscured by encoding the dot as "%2E".
*
- * This class contains functions which help in detecting and dealing with this
+ * This class contains functions which help in detecting and dealing with this
* situation.
*
- * Checking the URL for a bad extension is somewhat complicated due to the fact
+ * Checking the URL for a bad extension is somewhat complicated due to the fact
* that CGI doesn't provide a standard method to determine the URL. Instead it
- * is necessary to pass a subset of $_SERVER variables, which we then attempt
+ * is necessary to pass a subset of $_SERVER variables, which we then attempt
* to use to guess parts of the URL.
*/
class IEUrlExtension {
/**
* Check a subset of $_SERVER (or the whole of $_SERVER if you like)
- * to see if it indicates that the request was sent with a bad file
- * extension. Returns true if the request should be denied or modified,
+ * to see if it indicates that the request was sent with a bad file
+ * extension. Returns true if the request should be denied or modified,
* false otherwise. The relevant $_SERVER elements are:
*
* - SERVER_SOFTWARE
@@ -37,6 +37,7 @@ class IEUrlExtension {
*
* @param $vars A subset of $_SERVER.
* @param $extWhitelist Extensions which are allowed, assumed harmless.
+ * @return bool
*/
public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
// Check QUERY_STRING or REQUEST_URI
@@ -55,7 +56,7 @@ class IEUrlExtension {
return true;
}
- // Some servers have PATH_INFO but not REQUEST_URI, so we check both
+ // Some servers have PATH_INFO but not REQUEST_URI, so we check both
// to be on the safe side.
if ( isset( $vars['PATH_INFO'] )
&& self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
@@ -71,7 +72,7 @@ class IEUrlExtension {
* Given a right-hand portion of a URL, determine whether IE would detect
* a potentially harmful file extension.
*
- * @param $urlPart The right-hand portion of a URL
+ * @param $urlPart string The right-hand portion of a URL
* @param $extWhitelist An array of file extensions which may occur in this
* URL, and which should be allowed.
* @return bool
@@ -97,10 +98,10 @@ class IEUrlExtension {
}
if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
- // Non-alphanumeric extension, unlikely to be registered.
+ // Non-alphanumeric extension, unlikely to be registered.
//
// The regex above is known to match all registered file extensions
- // in a default Windows XP installation. It's important to allow
+ // in a default Windows XP installation. It's important to allow
// extensions with ampersands and percent signs, since that reduces
// the number of false positives substantially.
return false;
@@ -111,8 +112,11 @@ class IEUrlExtension {
}
/**
- * Returns a variant of $url which will pass isUrlExtensionBad() but has the
+ * Returns a variant of $url which will pass isUrlExtensionBad() but has the
* same GET parameters, or false if it can't figure one out.
+ * @param $url
+ * @param $extWhitelist array
+ * @return bool|string
*/
public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
$questionPos = strpos( $url, '?' );
@@ -127,7 +131,7 @@ class IEUrlExtension {
$query = substr( $url, $questionPos + 1 );
}
- // Multiple question marks cause problems. Encode the second and
+ // Multiple question marks cause problems. Encode the second and
// subsequent question mark.
$query = str_replace( '?', '%3E', $query );
// Append an invalid path character so that IE6 won't see the end of the
@@ -153,16 +157,16 @@ class IEUrlExtension {
* insecure.
*
* The criteria for finding an extension are as follows:
- * - a possible extension is a dot followed by one or more characters not
+ * - a possible extension is a dot followed by one or more characters not
* in <>\"/:|?.#
- * - if we find a possible extension followed by the end of the string or
+ * - if we find a possible extension followed by the end of the string or
* a #, that's our extension
* - if we find a possible extension followed by a ?, that's our extension
- * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
+ * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
* searching for another possible extension
- * - if we find a possible extension followed by a dot or another illegal
+ * - if we find a possible extension followed by a dot or another illegal
* character, we ignore it and continue searching
- *
+ *
* @param $url string URL
* @return mixed Detected extension (string), or false if none found
*/
@@ -182,7 +186,7 @@ class IEUrlExtension {
// End of string, we're done
return false;
}
-
+
// We found a dot. Skip past it
$pos++;
$remainingLength = $urlLength - $pos;
@@ -220,12 +224,12 @@ class IEUrlExtension {
* with %2E not decoded to ".". On such a server, it is possible to detect
* whether the script filename has been obscured.
*
- * The function returns false if the server is not known to have this
+ * The function returns false if the server is not known to have this
* behaviour. Microsoft IIS in particular is known to decode escaped script
* filenames.
*
* SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
- * or a specification in the style of a User-Agent header, such as
+ * or a specification in the style of a User-Agent header, such as
* "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
*
* @param $serverSoftware
@@ -234,8 +238,8 @@ class IEUrlExtension {
*/
public static function haveUndecodedRequestUri( $serverSoftware ) {
static $whitelist = array(
- 'Apache',
- 'Zeus',
+ 'Apache',
+ 'Zeus',
'LiteSpeed' );
if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
return in_array( $m[1], $whitelist );
diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php
index a991d915..baf93385 100644
--- a/includes/libs/JavaScriptMinifier.php
+++ b/includes/libs/JavaScriptMinifier.php
@@ -484,22 +484,42 @@ class JavaScriptMinifier {
$end++;
}
} elseif(
+ $ch === '0'
+ && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
+ ) {
+ // Hex numeric literal
+ $end++; // x or X
+ $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
+ if ( !$len ) {
+ return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
+ }
+ $end += $len;
+ } elseif(
ctype_digit( $ch )
|| ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
) {
- // Numeric literal. Search for the end of it, but don't care about [+-]exponent
- // at the end, as the results of "numeric [+-] numeric" and "numeric" are
- // identical to our state machine.
- $end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end );
- while( $s[$end - 1] === '.' ) {
- // Special case: When a numeric ends with a dot, we have to check the
- // literal for proper syntax
- $decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 );
- if( $decimal === $end - $pos - 1 ) {
- break;
- } else {
- $end--;
+ $end += strspn( $s, '0123456789', $end );
+ $decimal = strspn( $s, '.', $end );
+ if ($decimal) {
+ if ( $decimal > 2 ) {
+ return self::parseError($s, $end, 'The number has too many decimal points' );
+ }
+ $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
+ }
+ $exponent = strspn( $s, 'eE', $end );
+ if( $exponent ) {
+ if ( $exponent > 1 ) {
+ return self::parseError($s, $end, 'Number with several E' );
+ }
+ $end++;
+
+ // + sign is optional; - sign is required.
+ $end += strspn( $s, '-+', $end );
+ $len = strspn( $s, '0123456789', $end );
+ if ( !$len ) {
+ return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
}
+ $end += $len;
}
} elseif( isset( $opChars[$ch] ) ) {
// Punctuation character. Search for the longest matching operator.
@@ -576,4 +596,9 @@ class JavaScriptMinifier {
}
return $out;
}
+
+ static function parseError($fullJavascript, $position, $errorMsg) {
+ // TODO: Handle the error: trigger_error, throw exception, return false...
+ return false;
+ }
}
diff --git a/includes/libs/jsminplus.php b/includes/libs/jsminplus.php
index bab4ff49..8ed08d74 100644
--- a/includes/libs/jsminplus.php
+++ b/includes/libs/jsminplus.php
@@ -1,7 +1,7 @@
<?php
/**
- * JSMinPlus version 1.3
+ * JSMinPlus version 1.4
*
* Minifies a javascript file using a javascript parser
*
@@ -15,8 +15,10 @@
* Usage: $minified = JSMinPlus::minify($script [, $filename])
*
* Versionlog (see also changelog.txt):
- * 19-07-2011 - expanded operator and keyword defines. Fixes the notices when creating several JSTokenizer
- * 17-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
+ * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
+ * reduce memory footprint by minifying by block-scope
+ * some small byte-saving and performance improvements
+ * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
* 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
* 12-04-2009 - some small bugfixes and performance improvements
* 09-04-2009 - initial open sourced version 1.0
@@ -46,7 +48,7 @@
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): Tino Zijdel <crisp@tweakers.net>
- * PHP port, modifications and minifier routine are (C) 2009
+ * PHP port, modifications and minifier routine are (C) 2009-2011
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -86,6 +88,8 @@ define('JS_SETTER', 111);
define('JS_GROUP', 112);
define('JS_LIST', 113);
+define('JS_MINIFIED', 999);
+
define('DECLARED_FORM', 0);
define('EXPRESSED_FORM', 1);
define('STATEMENT_FORM', 2);
@@ -188,7 +192,7 @@ class JSMinPlus
private function __construct()
{
- $this->parser = new JSParser();
+ $this->parser = new JSParser($this);
}
public static function minify($js, $filename='')
@@ -217,22 +221,18 @@ class JSMinPlus
return false;
}
- private function parseTree($n, $noBlockGrouping = false)
+ public function parseTree($n, $noBlockGrouping = false)
{
$s = '';
switch ($n->type)
{
- case KEYWORD_FUNCTION:
- $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
- $params = $n->params;
- for ($i = 0, $j = count($params); $i < $j; $i++)
- $s .= ($i ? ',' : '') . $params[$i];
- $s .= '){' . $this->parseTree($n->body, true) . '}';
+ case JS_MINIFIED:
+ $s = $n->value;
break;
case JS_SCRIPT:
- // we do nothing with funDecls or varDecls
+ // we do nothing yet with funDecls or varDecls
$noBlockGrouping = true;
// FALL THROUGH
@@ -279,6 +279,14 @@ class JSMinPlus
}
break;
+ case KEYWORD_FUNCTION:
+ $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
+ $params = $n->params;
+ for ($i = 0, $j = count($params); $i < $j; $i++)
+ $s .= ($i ? ',' : '') . $params[$i];
+ $s .= '){' . $this->parseTree($n->body, true) . '}';
+ break;
+
case KEYWORD_IF:
$s = 'if(' . $this->parseTree($n->condition) . ')';
$thenPart = $this->parseTree($n->thenPart);
@@ -385,19 +393,14 @@ class JSMinPlus
break;
case KEYWORD_THROW:
- $s = 'throw ' . $this->parseTree($n->exception);
- break;
-
case KEYWORD_RETURN:
- $s = 'return';
+ $s = $n->type;
if ($n->value)
{
$t = $this->parseTree($n->value);
if (strlen($t))
{
- if ( $t[0] != '(' && $t[0] != '[' && $t[0] != '{' &&
- $t[0] != '"' && $t[0] != "'" && $t[0] != '/'
- )
+ if ($this->isWordChar($t[0]) || $t[0] == '\\')
$s .= ' ';
$s .= $t;
@@ -423,6 +426,40 @@ class JSMinPlus
}
break;
+ case KEYWORD_IN:
+ case KEYWORD_INSTANCEOF:
+ $left = $this->parseTree($n->treeNodes[0]);
+ $right = $this->parseTree($n->treeNodes[1]);
+
+ $s = $left;
+
+ if ($this->isWordChar(substr($left, -1)))
+ $s .= ' ';
+
+ $s .= $n->type;
+
+ if ($this->isWordChar($right[0]) || $right[0] == '\\')
+ $s .= ' ';
+
+ $s .= $right;
+ break;
+
+ case KEYWORD_DELETE:
+ case KEYWORD_TYPEOF:
+ $right = $this->parseTree($n->treeNodes[0]);
+
+ $s = $n->type;
+
+ if ($this->isWordChar($right[0]) || $right[0] == '\\')
+ $s .= ' ';
+
+ $s .= $right;
+ break;
+
+ case KEYWORD_VOID:
+ $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
+ break;
+
case KEYWORD_DEBUGGER:
throw new Exception('NOT IMPLEMENTED: DEBUGGER');
break;
@@ -497,26 +534,6 @@ class JSMinPlus
}
break;
- case KEYWORD_IN:
- $s = $this->parseTree($n->treeNodes[0]) . ' in ' . $this->parseTree($n->treeNodes[1]);
- break;
-
- case KEYWORD_INSTANCEOF:
- $s = $this->parseTree($n->treeNodes[0]) . ' instanceof ' . $this->parseTree($n->treeNodes[1]);
- break;
-
- case KEYWORD_DELETE:
- $s = 'delete ' . $this->parseTree($n->treeNodes[0]);
- break;
-
- case KEYWORD_VOID:
- $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
- break;
-
- case KEYWORD_TYPEOF:
- $s = 'typeof ' . $this->parseTree($n->treeNodes[0]);
- break;
-
case OP_NOT:
case OP_BITWISE_NOT:
case OP_UNARY_PLUS:
@@ -606,13 +623,33 @@ class JSMinPlus
$s .= '}';
break;
+ case TOKEN_NUMBER:
+ $s = $n->value;
+ if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
+ $s = $m[1] . 'e' . strlen($m[2]);
+ break;
+
case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
- case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
+ case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
$s = $n->value;
break;
case JS_GROUP:
- $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+ if (in_array(
+ $n->treeNodes[0]->type,
+ array(
+ JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
+ TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
+ KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
+ )
+ ))
+ {
+ $s = $this->parseTree($n->treeNodes[0]);
+ }
+ else
+ {
+ $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+ }
break;
default:
@@ -626,11 +663,17 @@ class JSMinPlus
{
return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
}
+
+ private function isWordChar($char)
+ {
+ return $char == '_' || $char == '$' || ctype_alnum($char);
+ }
}
class JSParser
{
private $t;
+ private $minifier;
private $opPrecedence = array(
';' => 0,
@@ -680,8 +723,9 @@ class JSParser
TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
);
- public function __construct()
+ public function __construct($minifier=null)
{
+ $this->minifier = $minifier;
$this->t = new JSTokenizer();
}
@@ -705,6 +749,19 @@ class JSParser
$n->funDecls = $x->funDecls;
$n->varDecls = $x->varDecls;
+ // minify by scope
+ if ($this->minifier)
+ {
+ $n->value = $this->minifier->parseTree($n);
+
+ // clear tree from node to save memory
+ $n->treeNodes = null;
+ $n->funDecls = null;
+ $n->varDecls = null;
+
+ $n->type = JS_MINIFIED;
+ }
+
return $n;
}
@@ -963,7 +1020,7 @@ class JSParser
case KEYWORD_THROW:
$n = new JSNode($this->t);
- $n->exception = $this->Expression($x);
+ $n->value = $this->Expression($x);
break;
case KEYWORD_RETURN:
@@ -1678,44 +1735,11 @@ class JSTokenizer
);
private $opTypeNames = array(
- ';' => 'SEMICOLON',
- ',' => 'COMMA',
- '?' => 'HOOK',
- ':' => 'COLON',
- '||' => 'OR',
- '&&' => 'AND',
- '|' => 'BITWISE_OR',
- '^' => 'BITWISE_XOR',
- '&' => 'BITWISE_AND',
- '===' => 'STRICT_EQ',
- '==' => 'EQ',
- '=' => 'ASSIGN',
- '!==' => 'STRICT_NE',
- '!=' => 'NE',
- '<<' => 'LSH',
- '<=' => 'LE',
- '<' => 'LT',
- '>>>' => 'URSH',
- '>>' => 'RSH',
- '>=' => 'GE',
- '>' => 'GT',
- '++' => 'INCREMENT',
- '--' => 'DECREMENT',
- '+' => 'PLUS',
- '-' => 'MINUS',
- '*' => 'MUL',
- '/' => 'DIV',
- '%' => 'MOD',
- '!' => 'NOT',
- '~' => 'BITWISE_NOT',
- '.' => 'DOT',
- '[' => 'LEFT_BRACKET',
- ']' => 'RIGHT_BRACKET',
- '{' => 'LEFT_CURLY',
- '}' => 'RIGHT_CURLY',
- '(' => 'LEFT_PAREN',
- ')' => 'RIGHT_PAREN',
- '@*/' => 'CONDCOMMENT_END'
+ ';', ',', '?', ':', '||', '&&', '|', '^',
+ '&', '===', '==', '=', '!==', '!=', '<<', '<=',
+ '<', '>>>', '>>', '>=', '>', '++', '--', '+',
+ '-', '*', '/', '%', '!', '~', '.', '[',
+ ']', '{', '}', '(', ')', '@*/'
);
private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
@@ -1723,7 +1747,7 @@ class JSTokenizer
public function __construct()
{
- $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', array_keys($this->opTypeNames))) . ')#';
+ $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
}
public function init($source, $filename = '', $lineno = 1)
@@ -1874,22 +1898,38 @@ class JSTokenizer
{
switch ($input[0])
{
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if (preg_match('/^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+/', $input, $match))
+ case '0':
+ // hexadecimal
+ if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
{
$tt = TOKEN_NUMBER;
+ break;
}
- else if (preg_match('/^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/', $input, $match))
+ // FALL THROUGH
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ // should always match
+ preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
+ $tt = TOKEN_NUMBER;
+ break;
+
+ case "'":
+ if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
{
- // this should always match because of \d+
- $tt = TOKEN_NUMBER;
+ $tt = TOKEN_STRING;
+ }
+ else
+ {
+ if ($chunksize)
+ return $this->get(null); // retry with a full chunk fetch
+
+ throw $this->newSyntaxError('Unterminated string literal');
}
break;
case '"':
- case "'":
- if (preg_match('/^"(?:\\\\(?:.|\r?\n)|[^\\\\"\r\n]+)*"|^\'(?:\\\\(?:.|\r?\n)|[^\\\\\'\r\n]+)*\'/', $input, $match))
+ if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
{
$tt = TOKEN_STRING;
}
@@ -2091,4 +2131,3 @@ class JSToken
public $lineno;
public $assignOp;
}
-