summaryrefslogtreecommitdiff
path: root/includes/libs/JavaScriptMinifier.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/libs/JavaScriptMinifier.php')
-rw-r--r--includes/libs/JavaScriptMinifier.php579
1 files changed, 579 insertions, 0 deletions
diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php
new file mode 100644
index 00000000..a991d915
--- /dev/null
+++ b/includes/libs/JavaScriptMinifier.php
@@ -0,0 +1,579 @@
+<?php
+/**
+ * JavaScript Minifier
+ *
+ * This class is meant to safely minify javascript code, while leaving syntactically correct
+ * programs intact. Other libraries, such as JSMin require a certain coding style to work
+ * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
+ * slow, because they construct a complete parse tree before outputting the code minified.
+ * So this class is meant to allow arbitrary (but syntactically correct) input, while being
+ * fast enough to be used for on-the-fly minifying.
+ *
+ * Author: Paul Copperman <paul.copperman@gmail.com>
+ * License: choose any of Apache, MIT, GPL, LGPL
+ */
+
+class JavaScriptMinifier {
+
+ /* Class constants */
+ /* Parsing states.
+ * The state machine is only necessary to decide whether to parse a slash as division
+ * operator or as regexp literal.
+ * States are named after the next expected item. We only distinguish states when the
+ * distinction is relevant for our purpose.
+ */
+ const STATEMENT = 0;
+ const CONDITION = 1;
+ const PROPERTY_ASSIGNMENT = 2;
+ const EXPRESSION = 3;
+ const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
+ const EXPRESSION_OP = 5;
+ const EXPRESSION_FUNC = 6;
+ const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
+ const EXPRESSION_TERNARY_OP = 8;
+ const EXPRESSION_TERNARY_FUNC = 9;
+ const PAREN_EXPRESSION = 10; // expression which is not on the top level
+ const PAREN_EXPRESSION_OP = 11;
+ const PAREN_EXPRESSION_FUNC = 12;
+ const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
+ const PROPERTY_EXPRESSION_OP = 14;
+ const PROPERTY_EXPRESSION_FUNC = 15;
+
+ /* Token types */
+ const TYPE_UN_OP = 1; // unary operators
+ const TYPE_INCR_OP = 2; // ++ and --
+ const TYPE_BIN_OP = 3; // binary operators
+ const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
+ const TYPE_HOOK = 5; // ?
+ const TYPE_COLON = 6; // :
+ const TYPE_COMMA = 7; // ,
+ const TYPE_SEMICOLON = 8; // ;
+ const TYPE_BRACE_OPEN = 9; // {
+ const TYPE_BRACE_CLOSE = 10; // }
+ const TYPE_PAREN_OPEN = 11; // ( and [
+ const TYPE_PAREN_CLOSE = 12; // ) and ]
+ const TYPE_RETURN = 13; // keywords: break, continue, return, throw
+ const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
+ const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
+ const TYPE_FUNC = 16; // keywords: function
+ const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
+
+ // Sanity limit to avoid excessive memory usage
+ const STACK_LIMIT = 1000;
+
+ /* Static functions */
+
+ /**
+ * Returns minified JavaScript code.
+ *
+ * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when
+ * literals (e.g. quoted strings) longer than $maxLineLength are encountered
+ * or when required to guard against semicolon insertion.
+ *
+ * @param $s String JavaScript code to minify
+ * @param $statementsOnOwnLine Bool Whether to put each statement on its own line
+ * @param $maxLineLength Int Maximum length of a single line, or -1 for no maximum.
+ * @return String Minified code
+ */
+ public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
+ // First we declare a few tables that contain our parsing rules
+
+ // $opChars : characters, which can be combined without whitespace in between them
+ $opChars = array(
+ '!' => true,
+ '"' => true,
+ '%' => true,
+ '&' => true,
+ "'" => true,
+ '(' => true,
+ ')' => true,
+ '*' => true,
+ '+' => true,
+ ',' => true,
+ '-' => true,
+ '.' => true,
+ '/' => true,
+ ':' => true,
+ ';' => true,
+ '<' => true,
+ '=' => true,
+ '>' => true,
+ '?' => true,
+ '[' => true,
+ ']' => true,
+ '^' => true,
+ '{' => true,
+ '|' => true,
+ '}' => true,
+ '~' => true
+ );
+
+ // $tokenTypes : maps keywords and operators to their corresponding token type
+ $tokenTypes = array(
+ '!' => self::TYPE_UN_OP,
+ '~' => self::TYPE_UN_OP,
+ 'delete' => self::TYPE_UN_OP,
+ 'new' => self::TYPE_UN_OP,
+ 'typeof' => self::TYPE_UN_OP,
+ 'void' => self::TYPE_UN_OP,
+ '++' => self::TYPE_INCR_OP,
+ '--' => self::TYPE_INCR_OP,
+ '!=' => self::TYPE_BIN_OP,
+ '!==' => self::TYPE_BIN_OP,
+ '%' => self::TYPE_BIN_OP,
+ '%=' => self::TYPE_BIN_OP,
+ '&' => self::TYPE_BIN_OP,
+ '&&' => self::TYPE_BIN_OP,
+ '&=' => self::TYPE_BIN_OP,
+ '*' => self::TYPE_BIN_OP,
+ '*=' => self::TYPE_BIN_OP,
+ '+=' => self::TYPE_BIN_OP,
+ '-=' => self::TYPE_BIN_OP,
+ '.' => self::TYPE_BIN_OP,
+ '/' => self::TYPE_BIN_OP,
+ '/=' => self::TYPE_BIN_OP,
+ '<' => self::TYPE_BIN_OP,
+ '<<' => self::TYPE_BIN_OP,
+ '<<=' => self::TYPE_BIN_OP,
+ '<=' => self::TYPE_BIN_OP,
+ '=' => self::TYPE_BIN_OP,
+ '==' => self::TYPE_BIN_OP,
+ '===' => self::TYPE_BIN_OP,
+ '>' => self::TYPE_BIN_OP,
+ '>=' => self::TYPE_BIN_OP,
+ '>>' => self::TYPE_BIN_OP,
+ '>>=' => self::TYPE_BIN_OP,
+ '>>>' => self::TYPE_BIN_OP,
+ '>>>=' => self::TYPE_BIN_OP,
+ '^' => self::TYPE_BIN_OP,
+ '^=' => self::TYPE_BIN_OP,
+ '|' => self::TYPE_BIN_OP,
+ '|=' => self::TYPE_BIN_OP,
+ '||' => self::TYPE_BIN_OP,
+ 'in' => self::TYPE_BIN_OP,
+ 'instanceof' => self::TYPE_BIN_OP,
+ '+' => self::TYPE_ADD_OP,
+ '-' => self::TYPE_ADD_OP,
+ '?' => self::TYPE_HOOK,
+ ':' => self::TYPE_COLON,
+ ',' => self::TYPE_COMMA,
+ ';' => self::TYPE_SEMICOLON,
+ '{' => self::TYPE_BRACE_OPEN,
+ '}' => self::TYPE_BRACE_CLOSE,
+ '(' => self::TYPE_PAREN_OPEN,
+ '[' => self::TYPE_PAREN_OPEN,
+ ')' => self::TYPE_PAREN_CLOSE,
+ ']' => self::TYPE_PAREN_CLOSE,
+ 'break' => self::TYPE_RETURN,
+ 'continue' => self::TYPE_RETURN,
+ 'return' => self::TYPE_RETURN,
+ 'throw' => self::TYPE_RETURN,
+ 'catch' => self::TYPE_IF,
+ 'for' => self::TYPE_IF,
+ 'if' => self::TYPE_IF,
+ 'switch' => self::TYPE_IF,
+ 'while' => self::TYPE_IF,
+ 'with' => self::TYPE_IF,
+ 'case' => self::TYPE_DO,
+ 'do' => self::TYPE_DO,
+ 'else' => self::TYPE_DO,
+ 'finally' => self::TYPE_DO,
+ 'try' => self::TYPE_DO,
+ 'var' => self::TYPE_DO,
+ 'function' => self::TYPE_FUNC
+ );
+
+ // $goto : This is the main table for our state machine. For every state/token pair
+ // the following state is defined. When no rule exists for a given pair,
+ // the state is left unchanged.
+ $goto = array(
+ self::STATEMENT => array(
+ self::TYPE_UN_OP => self::EXPRESSION,
+ self::TYPE_INCR_OP => self::EXPRESSION,
+ self::TYPE_ADD_OP => self::EXPRESSION,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_RETURN => self::EXPRESSION_NO_NL,
+ self::TYPE_IF => self::CONDITION,
+ self::TYPE_FUNC => self::CONDITION,
+ self::TYPE_LITERAL => self::EXPRESSION_OP
+ ),
+ self::CONDITION => array(
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::PROPERTY_ASSIGNMENT => array(
+ self::TYPE_COLON => self::PROPERTY_EXPRESSION,
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::EXPRESSION => array(
+ self::TYPE_SEMICOLON => self::STATEMENT,
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_NO_NL => array(
+ self::TYPE_SEMICOLON => self::STATEMENT,
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_OP => array(
+ self::TYPE_BIN_OP => self::EXPRESSION,
+ self::TYPE_ADD_OP => self::EXPRESSION,
+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
+ self::TYPE_COLON => self::STATEMENT,
+ self::TYPE_COMMA => self::EXPRESSION,
+ self::TYPE_SEMICOLON => self::STATEMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::EXPRESSION_TERNARY => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
+ self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
+ ),
+ self::EXPRESSION_TERNARY_OP => array(
+ self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
+ self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
+ self::TYPE_COMMA => self::EXPRESSION_TERNARY,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::EXPRESSION_TERNARY_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::PAREN_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
+ ),
+ self::PAREN_EXPRESSION_OP => array(
+ self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
+ self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
+ self::TYPE_HOOK => self::PAREN_EXPRESSION,
+ self::TYPE_COLON => self::PAREN_EXPRESSION,
+ self::TYPE_COMMA => self::PAREN_EXPRESSION,
+ self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::PAREN_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ ),
+ self::PROPERTY_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+ self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
+ self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION_OP => array(
+ self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
+ self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
+ self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
+ self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+ ),
+ self::PROPERTY_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT
+ )
+ );
+
+ // $push : This table contains the rules for when to push a state onto the stack.
+ // The pushed state is the state to return to when the corresponding
+ // closing token is found
+ $push = array(
+ self::STATEMENT => array(
+ self::TYPE_BRACE_OPEN => self::STATEMENT,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::CONDITION => array(
+ self::TYPE_PAREN_OPEN => self::STATEMENT
+ ),
+ self::PROPERTY_ASSIGNMENT => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
+ ),
+ self::EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_NO_NL => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_OP => array(
+ self::TYPE_HOOK => self::EXPRESSION,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
+ ),
+ self::EXPRESSION_TERNARY => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
+ ),
+ self::EXPRESSION_TERNARY_OP => array(
+ self::TYPE_HOOK => self::EXPRESSION_TERNARY,
+ self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
+ ),
+ self::EXPRESSION_TERNARY_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
+ ),
+ self::PAREN_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
+ ),
+ self::PAREN_EXPRESSION_OP => array(
+ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
+ ),
+ self::PAREN_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
+ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION_OP => array(
+ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
+ ),
+ self::PROPERTY_EXPRESSION_FUNC => array(
+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
+ )
+ );
+
+ // $pop : Rules for when to pop a state from the stack
+ $pop = array(
+ self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ),
+ self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ),
+ self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ),
+ self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ),
+ self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ),
+ self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
+ self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
+ );
+
+ // $semicolon : Rules for when a semicolon insertion is appropriate
+ $semicolon = array(
+ self::EXPRESSION_NO_NL => array(
+ self::TYPE_UN_OP => true,
+ self::TYPE_INCR_OP => true,
+ self::TYPE_ADD_OP => true,
+ self::TYPE_BRACE_OPEN => true,
+ self::TYPE_PAREN_OPEN => true,
+ self::TYPE_RETURN => true,
+ self::TYPE_IF => true,
+ self::TYPE_DO => true,
+ self::TYPE_FUNC => true,
+ self::TYPE_LITERAL => true
+ ),
+ self::EXPRESSION_OP => array(
+ self::TYPE_UN_OP => true,
+ self::TYPE_INCR_OP => true,
+ self::TYPE_BRACE_OPEN => true,
+ self::TYPE_RETURN => true,
+ self::TYPE_IF => true,
+ self::TYPE_DO => true,
+ self::TYPE_FUNC => true,
+ self::TYPE_LITERAL => true
+ )
+ );
+
+ // Rules for when newlines should be inserted if
+ // $statementsOnOwnLine is enabled.
+ // $newlineBefore is checked before switching state,
+ // $newlineAfter is checked after
+ $newlineBefore = array(
+ self::STATEMENT => array(
+ self::TYPE_BRACE_CLOSE => true,
+ ),
+ );
+ $newlineAfter = array(
+ self::STATEMENT => array(
+ self::TYPE_BRACE_OPEN => true,
+ self::TYPE_PAREN_CLOSE => true,
+ self::TYPE_SEMICOLON => true,
+ ),
+ );
+
+ // $divStates : Contains all states that can be followed by a division operator
+ $divStates = array(
+ self::EXPRESSION_OP => true,
+ self::EXPRESSION_TERNARY_OP => true,
+ self::PAREN_EXPRESSION_OP => true,
+ self::PROPERTY_EXPRESSION_OP => true
+ );
+
+ // Here's where the minifying takes place: Loop through the input, looking for tokens
+ // and output them to $out, taking actions to the above defined rules when appropriate.
+ $out = '';
+ $pos = 0;
+ $length = strlen( $s );
+ $lineLength = 0;
+ $newlineFound = true;
+ $state = self::STATEMENT;
+ $stack = array();
+ $last = ';'; // Pretend that we have seen a semicolon yet
+ while( $pos < $length ) {
+ // First, skip over any whitespace and multiline comments, recording whether we
+ // found any newline character
+ $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
+ if( !$skip ) {
+ $ch = $s[$pos];
+ if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
+ // Multiline comment. Search for the end token or EOT.
+ $end = strpos( $s, '*/', $pos + 2 );
+ $skip = $end === false ? $length - $pos : $end - $pos + 2;
+ }
+ }
+ if( $skip ) {
+ // The semicolon insertion mechanism needs to know whether there was a newline
+ // between two tokens, so record it now.
+ if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
+ $newlineFound = true;
+ }
+ $pos += $skip;
+ continue;
+ }
+ // Handle C++-style comments and html comments, which are treated as single line
+ // comments by the browser, regardless of whether the end tag is on the same line.
+ // Handle --> the same way, but only if it's at the beginning of the line
+ if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
+ || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
+ || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
+ ) {
+ $pos += strcspn( $s, "\r\n", $pos );
+ continue;
+ }
+
+ // Find out which kind of token we're handling. $end will point past the end of it.
+ $end = $pos + 1;
+ // Handle string literals
+ if( $ch === "'" || $ch === '"' ) {
+ // Search to the end of the string literal, skipping over backslash escapes
+ $search = $ch . '\\';
+ do{
+ $end += strcspn( $s, $search, $end ) + 2;
+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+ $end--;
+ // We have to distinguish between regexp literals and division operators
+ // A division operator is only possible in certain states
+ } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
+ // Regexp literal, search to the end, skipping over backslash escapes and
+ // character classes
+ for( ; ; ) {
+ do{
+ $end += strcspn( $s, '/[\\', $end ) + 2;
+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+ $end--;
+ if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
+ break;
+ }
+ do{
+ $end += strcspn( $s, ']\\', $end ) + 2;
+ } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+ $end--;
+ };
+ // Search past the regexp modifiers (gi)
+ while( $end < $length && ctype_alpha( $s[$end] ) ) {
+ $end++;
+ }
+ } elseif(
+ ctype_digit( $ch )
+ || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
+ ) {
+ // Numeric literal. Search for the end of it, but don't care about [+-]exponent
+ // at the end, as the results of "numeric [+-] numeric" and "numeric" are
+ // identical to our state machine.
+ $end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end );
+ while( $s[$end - 1] === '.' ) {
+ // Special case: When a numeric ends with a dot, we have to check the
+ // literal for proper syntax
+ $decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 );
+ if( $decimal === $end - $pos - 1 ) {
+ break;
+ } else {
+ $end--;
+ }
+ }
+ } elseif( isset( $opChars[$ch] ) ) {
+ // Punctuation character. Search for the longest matching operator.
+ while(
+ $end < $length
+ && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
+ ) {
+ $end++;
+ }
+ } else {
+ // Identifier or reserved word. Search for the end by excluding whitespace and
+ // punctuation.
+ $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
+ }
+
+ // Now get the token type from our type array
+ $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
+ $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
+
+ if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
+ // This token triggers the semicolon insertion mechanism of javascript. While we
+ // could add the ; token here ourselves, keeping the newline has a few advantages.
+ $out .= "\n";
+ $state = self::STATEMENT;
+ $lineLength = 0;
+ } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
+ !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
+ {
+ // This line would get too long if we added $token, so add a newline first.
+ // Only do this if it won't trigger semicolon insertion and if it won't
+ // put a postfix increment operator on its own line, which is illegal in js.
+ $out .= "\n";
+ $lineLength = 0;
+ // Check, whether we have to separate the token from the last one with whitespace
+ } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
+ $out .= ' ';
+ $lineLength++;
+ // Don't accidentally create ++, -- or // tokens
+ } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
+ $out .= ' ';
+ $lineLength++;
+ }
+
+ $out .= $token;
+ $lineLength += $end - $pos; // += strlen( $token )
+ $last = $s[$end - 1];
+ $pos = $end;
+ $newlineFound = false;
+
+ // Output a newline after the token if required
+ // This is checked before AND after switching state
+ $newlineAdded = false;
+ if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
+ $out .= "\n";
+ $lineLength = 0;
+ $newlineAdded = true;
+ }
+
+ // Now that we have output our token, transition into the new state.
+ if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
+ $stack[] = $push[$state][$type];
+ }
+ if( $stack && isset( $pop[$state][$type] ) ) {
+ $state = array_pop( $stack );
+ } elseif( isset( $goto[$state][$type] ) ) {
+ $state = $goto[$state][$type];
+ }
+
+ // Check for newline insertion again
+ if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
+ $out .= "\n";
+ $lineLength = 0;
+ }
+ }
+ return $out;
+ }
+}