summaryrefslogtreecommitdiff
path: root/includes/libs/CSSMin.php
blob: 4f142fc753617a3052da68dae896982d48c5ecc3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
<?php
/**
 * Minification of CSS stylesheets.
 *
 * Copyright 2010 Wikimedia Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * 		http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
 * OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 * @file
 * @version 0.1.1 -- 2010-09-11
 * @author Trevor Parscal <tparscal@wikimedia.org>
 * @copyright Copyright 2010 Wikimedia Foundation
 * @license http://www.apache.org/licenses/LICENSE-2.0
 */

/**
 * Transforms CSS data
 *
 * This class provides minification, URL remapping, URL extracting, and data-URL embedding.
 */
class CSSMin {

	/* Constants */

	/**
	 * Maximum file size to still qualify for in-line embedding as a data-URI
	 *
	 * 24,576 is used because Internet Explorer has a 32,768 byte limit for data URIs,
	 * which when base64 encoded will result in a 1/3 increase in size.
	 */
	const EMBED_SIZE_LIMIT = 24576;
	const URL_REGEX = 'url\(\s*[\'"]?(?P<file>[^\?\)\'"]*)(?P<query>\??[^\)\'"]*)[\'"]?\s*\)';

	/* Protected Static Members */

	/** @var array List of common image files extensions and mime-types */
	protected static $mimeTypes = array(
		'gif' => 'image/gif',
		'jpe' => 'image/jpeg',
		'jpeg' => 'image/jpeg',
		'jpg' => 'image/jpeg',
		'png' => 'image/png',
		'tif' => 'image/tiff',
		'tiff' => 'image/tiff',
		'xbm' => 'image/x-xbitmap',
	);

	/* Static Methods */

	/**
	 * Gets a list of local file paths which are referenced in a CSS style sheet
	 *
	 * @param string $source CSS data to remap
	 * @param string $path File path where the source was read from (optional)
	 * @return array List of local file references
	 */
	public static function getLocalFileReferences( $source, $path = null ) {
		$files = array();
		$rFlags = PREG_OFFSET_CAPTURE | PREG_SET_ORDER;
		if ( preg_match_all( '/' . self::URL_REGEX . '/', $source, $matches, $rFlags ) ) {
			foreach ( $matches as $match ) {
				$file = ( isset( $path )
					? rtrim( $path, '/' ) . '/'
					: '' ) . "{$match['file'][0]}";

				// Only proceed if we can access the file
				if ( !is_null( $path ) && file_exists( $file ) ) {
					$files[] = $file;
				}
			}
		}
		return $files;
	}

	/**
	 * Encode an image file as a base64 data URI.
	 * If the image file has a suitable MIME type and size, encode it as a
	 * base64 data URI. Return false if the image type is unfamiliar or exceeds
	 * the size limit.
	 *
	 * @param string $file Image file to encode.
	 * @param string|null $type File's MIME type or null. If null, CSSMin will
	 *     try to autodetect the type.
	 * @param int|bool $sizeLimit If the size of the target file is greater than
	 *     this value, decline to encode the image file and return false
	 *     instead. If $sizeLimit is false, no limit is enforced.
	 * @return string|bool: Image contents encoded as a data URI or false.
	 */
	public static function encodeImageAsDataURI( $file, $type = null, $sizeLimit = self::EMBED_SIZE_LIMIT ) {
		if ( $sizeLimit !== false && filesize( $file ) >= $sizeLimit ) {
			return false;
		}
		if ( $type === null ) {
			$type = self::getMimeType( $file );
		}
		if ( !$type ) {
			return false;
		}
		$data = base64_encode( file_get_contents( $file ) );
		return 'data:' . $type . ';base64,' . $data;
	}

	/**
	 * @param $file string
	 * @return bool|string
	 */
	public static function getMimeType( $file ) {
		$realpath = realpath( $file );
		// Try a couple of different ways to get the mime-type of a file, in order of
		// preference
		if (
			$realpath
			&& function_exists( 'finfo_file' )
			&& function_exists( 'finfo_open' )
			&& defined( 'FILEINFO_MIME_TYPE' )
		) {
			// As of PHP 5.3, this is how you get the mime-type of a file; it uses the Fileinfo
			// PECL extension
			return finfo_file( finfo_open( FILEINFO_MIME_TYPE ), $realpath );
		} elseif ( function_exists( 'mime_content_type' ) ) {
			// Before this was deprecated in PHP 5.3, this was how you got the mime-type of a file
			return mime_content_type( $file );
		} else {
			// Worst-case scenario has happened, use the file extension to infer the mime-type
			$ext = strtolower( pathinfo( $file, PATHINFO_EXTENSION ) );
			if ( isset( self::$mimeTypes[$ext] ) ) {
				return self::$mimeTypes[$ext];
			}
		}
		return false;
	}

	/**
	 * Remaps CSS URL paths and automatically embeds data URIs for URL rules
	 * preceded by an /* @embed * / comment
	 *
	 * @param string $source CSS data to remap
	 * @param string $local File path where the source was read from
	 * @param string $remote URL path to the file
	 * @param bool $embedData If false, never do any data URI embedding, even if / * @embed * / is found
	 * @return string Remapped CSS data
	 */
	public static function remap( $source, $local, $remote, $embedData = true ) {
		$pattern = '/((?P<embed>\s*\/\*\s*\@embed\s*\*\/)(?P<pre>[^\;\}]*))?' .
			self::URL_REGEX . '(?P<post>[^;]*)[\;]?/';
		$offset = 0;
		while ( preg_match( $pattern, $source, $match, PREG_OFFSET_CAPTURE, $offset ) ) {
			// Skip fully-qualified URLs and data URIs
			$urlScheme = parse_url( $match['file'][0], PHP_URL_SCHEME );
			if ( $urlScheme ) {
				// Move the offset to the end of the match, leaving it alone
				$offset = $match[0][1] + strlen( $match[0][0] );
				continue;
			}
			// URLs with absolute paths like /w/index.php need to be expanded
			// to absolute URLs but otherwise left alone
			if ( $match['file'][0] !== '' && $match['file'][0][0] === '/' ) {
				// Replace the file path with an expanded (possibly protocol-relative) URL
				// ...but only if wfExpandUrl() is even available.
				// This will not be the case if we're running outside of MW
				$lengthIncrease = 0;
				if ( function_exists( 'wfExpandUrl' ) ) {
					$expanded = wfExpandUrl( $match['file'][0], PROTO_RELATIVE );
					$origLength = strlen( $match['file'][0] );
					$lengthIncrease = strlen( $expanded ) - $origLength;
					$source = substr_replace( $source, $expanded,
						$match['file'][1], $origLength
					);
				}
				// Move the offset to the end of the match, leaving it alone
				$offset = $match[0][1] + strlen( $match[0][0] ) + $lengthIncrease;
				continue;
			}

			// Guard against double slashes, because "some/remote/../foo.png"
			// resolves to "some/remote/foo.png" on (some?) clients (bug 27052).
			if ( substr( $remote, -1 ) == '/' ) {
				$remote = substr( $remote, 0, -1 );
			}

			// Shortcuts
			$embed = $match['embed'][0];
			$pre = $match['pre'][0];
			$post = $match['post'][0];
			$query = $match['query'][0];
			$url = "{$remote}/{$match['file'][0]}";
			$file = "{$local}/{$match['file'][0]}";

			$replacement = false;

			if ( $local !== false && file_exists( $file ) ) {
				// Add version parameter as a time-stamp in ISO 8601 format,
				// using Z for the timezone, meaning GMT
				$url .= '?' . gmdate( 'Y-m-d\TH:i:s\Z', round( filemtime( $file ), -2 ) );
				// Embedding requires a bit of extra processing, so let's skip that if we can
				if ( $embedData && $embed && $match['embed'][1] > 0 ) {
					$data = self::encodeImageAsDataURI( $file );
					if ( $data !== false ) {
						// Build 2 CSS properties; one which uses a base64 encoded data URI in place
						// of the @embed comment to try and retain line-number integrity, and the
						// other with a remapped an versioned URL and an Internet Explorer hack
						// making it ignored in all browsers that support data URIs
						$replacement = "{$pre}url({$data}){$post};{$pre}url({$url}){$post}!ie;";
					}
				}
				if ( $replacement === false ) {
					// Assume that all paths are relative to $remote, and make them absolute
					$replacement = "{$embed}{$pre}url({$url}){$post};";
				}
			} elseif ( $local === false ) {
				// Assume that all paths are relative to $remote, and make them absolute
				$replacement = "{$embed}{$pre}url({$url}{$query}){$post};";
			}
			if ( $replacement !== false ) {
				// Perform replacement on the source
				$source = substr_replace(
					$source, $replacement, $match[0][1], strlen( $match[0][0] )
				);
				// Move the offset to the end of the replacement in the source
				$offset = $match[0][1] + strlen( $replacement );
				continue;
			}
			// Move the offset to the end of the match, leaving it alone
			$offset = $match[0][1] + strlen( $match[0][0] );
		}
		return $source;
	}

	/**
	 * Removes whitespace from CSS data
	 *
	 * @param string $css CSS data to minify
	 * @return string Minified CSS data
	 */
	public static function minify( $css ) {
		return trim(
			str_replace(
				array( '; ', ': ', ' {', '{ ', ', ', '} ', ';}' ),
				array( ';', ':', '{', '{', ',', '}', '}' ),
				preg_replace( array( '/\s+/', '/\/\*.*?\*\//s' ), array( ' ', '' ), $css )
			)
		);
	}
}