summaryrefslogtreecommitdiff
path: root/includes/upload/UploadStash.php
blob: 1765925d297692e5a91d0e2a418fb2dc4d6e56c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
<?php
/** 
 * UploadStash is intended to accomplish a few things:
 *   - enable applications to temporarily stash files without publishing them to the wiki.
 *      - Several parts of MediaWiki do this in similar ways: UploadBase, UploadWizard, and FirefoggChunkedExtension
 *        And there are several that reimplement stashing from scratch, in idiosyncratic ways. The idea is to unify them all here.
 *	  Mostly all of them are the same except for storing some custom fields, which we subsume into the data array.
 *   - enable applications to find said files later, as long as the session or temp files haven't been purged. 
 *   - enable the uploading user (and *ONLY* the uploading user) to access said files, and thumbnails of said files, via a URL.
 *     We accomplish this by making the session serve as a URL->file mapping, on the assumption that nobody else can access 
 *     the session, even the uploading user. See SpecialUploadStash, which implements a web interface to some files stored this way.
 *
 */
class UploadStash {

	// Format of the key for files -- has to be suitable as a filename itself (e.g. ab12cd34ef.jpg)
	const KEY_FORMAT_REGEX = '/^[\w-]+\.\w*$/';

	// repository that this uses to store temp files
	// public because we sometimes need to get a LocalFile within the same repo.
	public $repo; 
	
	// array of initialized objects obtained from session (lazily initialized upon getFile())
	private $files = array();  

	// TODO: Once UploadBase starts using this, switch to use these constants rather than UploadBase::SESSION*
	// const SESSION_VERSION = 2;
	// const SESSION_KEYNAME = 'wsUploadData';

	/**
	 * Represents the session which contains temporarily stored files.
	 * Designed to be compatible with the session stashing code in UploadBase (should replace it eventually)
	 *
	 * @param $repo FileRepo: optional -- repo in which to store files. Will choose LocalRepo if not supplied.
	 */
	public function __construct( $repo ) { 

		// this might change based on wiki's configuration.
		$this->repo = $repo;

		if ( ! isset( $_SESSION ) ) {
			throw new UploadStashNotAvailableException( 'no session variable' );
		}

		if ( !isset( $_SESSION[UploadBase::SESSION_KEYNAME] ) ) {
			$_SESSION[UploadBase::SESSION_KEYNAME] = array();
		}
		
	}

	/**
	 * Get a file and its metadata from the stash.
	 * May throw exception if session data cannot be parsed due to schema change, or key not found.
	 *
	 * @param $key Integer: key
	 * @throws UploadStashFileNotFoundException
	 * @throws UploadStashBadVersionException
	 * @return UploadStashFile
	 */
	public function getFile( $key ) {
		if ( ! preg_match( self::KEY_FORMAT_REGEX, $key ) ) {
			throw new UploadStashBadPathException( "key '$key' is not in a proper format" );
		} 
 
		if ( !isset( $this->files[$key] ) ) {
			if ( !isset( $_SESSION[UploadBase::SESSION_KEYNAME][$key] ) ) {
				throw new UploadStashFileNotFoundException( "key '$key' not found in stash" );
			}

			$data = $_SESSION[UploadBase::SESSION_KEYNAME][$key];
			// guards against PHP class changing while session data doesn't
			if ($data['version'] !== UploadBase::SESSION_VERSION ) {
				throw new UploadStashBadVersionException( $data['version'] . " does not match current version " . UploadBase::SESSION_VERSION );
			}
		
			// separate the stashData into the path, and then the rest of the data
			$path = $data['mTempPath'];
			unset( $data['mTempPath'] );

			$file = new UploadStashFile( $this, $this->repo, $path, $key, $data );
			if ( $file->getSize === 0 ) {
				throw new UploadStashZeroLengthFileException( "File is zero length" );
			}
			$this->files[$key] = $file;

		}
		return $this->files[$key];
	}

	/**
	 * Stash a file in a temp directory and record that we did this in the session, along with other metadata.
	 * We store data in a flat key-val namespace because that's how UploadBase did it. This also means we have to
	 * ensure that the key-val pairs in $data do not overwrite other required fields.
	 *
	 * @param $path String: path to file you want stashed
	 * @param $data Array: optional, other data you want associated with the file. Do not use 'mTempPath', 'mFileProps', 'mFileSize', or 'version' as keys here
	 * @param $key String: optional, unique key for this file in this session. Used for directory hashing when storing, otherwise not important
	 * @throws UploadStashBadPathException
	 * @throws UploadStashFileException
	 * @return UploadStashFile: file, or null on failure
	 */
	public function stashFile( $path, $data = array(), $key = null ) {
		if ( ! file_exists( $path ) ) {
			wfDebug( "UploadStash: tried to stash file at '$path', but it doesn't exist\n" );
			throw new UploadStashBadPathException( "path doesn't exist" );
		}
                $fileProps = File::getPropsFromPath( $path );

		// we will be initializing from some tmpnam files that don't have extensions.
		// most of MediaWiki assumes all uploaded files have good extensions. So, we fix this.
		$extension = self::getExtensionForPath( $path );
		if ( ! preg_match( "/\\.\\Q$extension\\E$/", $path ) ) {
			$pathWithGoodExtension = "$path.$extension";
			if ( ! rename( $path, $pathWithGoodExtension ) ) {
				throw new UploadStashFileException( "couldn't rename $path to have a better extension at $pathWithGoodExtension" );
			}
			$path = $pathWithGoodExtension;
		} 

		// If no key was supplied, use content hash. Also has the nice property of collapsing multiple identical files
		// uploaded this session, which could happen if uploads had failed.
		if ( is_null( $key ) ) {
			$key = $fileProps['sha1'] . "." . $extension;
		}

		if ( ! preg_match( self::KEY_FORMAT_REGEX, $key ) ) {
			throw new UploadStashBadPathException( "key '$key' is not in a proper format" );
		} 


		// if not already in a temporary area, put it there
		$status = $this->repo->storeTemp( basename( $path ), $path );

		if( ! $status->isOK() ) {
			// It is a convention in MediaWiki to only return one error per API exception, even if multiple errors
			// are available. We use reset() to pick the "first" thing that was wrong, preferring errors to warnings.
			// This is a bit lame, as we may have more info in the $status and we're throwing it away, but to fix it means
			// redesigning API errors significantly.
			// $status->value just contains the virtual URL (if anything) which is probably useless to the caller
			$error = reset( $status->getErrorsArray() );
			if ( ! count( $error ) ) {
				$error = reset( $status->getWarningsArray() );
				if ( ! count( $error ) ) {
					$error = array( 'unknown', 'no error recorded' );
				}
			}
			throw new UploadStashFileException( "error storing file in '$path': " . implode( '; ', $error ) );
		}
		$stashPath = $status->value;

		// required info we always store. Must trump any other application info in $data
		// 'mTempPath', 'mFileSize', and 'mFileProps' are arbitrary names
		// chosen for compatibility with UploadBase's way of doing this.
		$requiredData = array( 
			'mTempPath' => $stashPath,
			'mFileSize' => $fileProps['size'],
			'mFileProps' =>	$fileProps,
			'version' => UploadBase::SESSION_VERSION
		);

		// now, merge required info and extra data into the session. (The extra data changes from application to application.
		// UploadWizard wants different things than say FirefoggChunkedUpload.)
		wfDebug( __METHOD__ . " storing under $key\n" );
		$_SESSION[UploadBase::SESSION_KEYNAME][$key] = array_merge( $data, $requiredData );
		
		return $this->getFile( $key );
	}

	/**
	 * Find or guess extension -- ensuring that our extension matches our mime type.
	 * Since these files are constructed from php tempnames they may not start off 
	 * with an extension.
	 * XXX this is somewhat redundant with the checks that ApiUpload.php does with incoming 
	 * uploads versus the desired filename. Maybe we can get that passed to us...
	 */
	public static function getExtensionForPath( $path ) { 	
		// Does this have an extension?
		$n = strrpos( $path, '.' );
		$extension = null;
		if ( $n !== false ) {
			$extension = $n ? substr( $path, $n + 1 ) : '';
		} else {
			// If not, assume that it should be related to the mime type of the original file.
			$magic = MimeMagic::singleton();
			$mimeType = $magic->guessMimeType( $path );
			$extensions = explode( ' ', MimeMagic::singleton()->getExtensionsForType( $mimeType ) );
			if ( count( $extensions ) ) { 
				$extension = $extensions[0];	
			}
		}

		if ( is_null( $extension ) ) {
			throw new UploadStashFileException( "extension is null" );
		}

		return File::normalizeExtension( $extension );
	}

}

class UploadStashFile extends UnregisteredLocalFile {
	private $sessionStash;
	private $sessionKey;
	private $sessionData;
	private $urlName;

	/**
	 * A LocalFile wrapper around a file that has been temporarily stashed, so we can do things like create thumbnails for it
	 * Arguably UnregisteredLocalFile should be handling its own file repo but that class is a bit retarded currently
	 *
	 * @param $stash UploadStash: useful for obtaining config, stashing transformed files
	 * @param $repo FileRepo: repository where we should find the path
	 * @param $path String: path to file
	 * @param $key String: key to store the path and any stashed data under
	 * @param $data String: any other data we want stored with this file
	 * @throws UploadStashBadPathException
	 * @throws UploadStashFileNotFoundException
	 */
	public function __construct( $stash, $repo, $path, $key, $data ) { 
		$this->sessionStash = $stash;
		$this->sessionKey = $key;
		$this->sessionData = $data;

		// resolve mwrepo:// urls
		if ( $repo->isVirtualUrl( $path ) ) {
			$path = $repo->resolveVirtualUrl( $path );	
		}

		// check if path appears to be sane, no parent traversals, and is in this repo's temp zone.
		$repoTempPath = $repo->getZonePath( 'temp' );
		if ( ( ! $repo->validateFilename( $path ) ) || 
				( strpos( $path, $repoTempPath ) !== 0 ) ) {
			wfDebug( "UploadStash: tried to construct an UploadStashFile from a file that should already exist at '$path', but path is not valid\n" );
			throw new UploadStashBadPathException( 'path is not valid' );
		}

		// check if path exists! and is a plain file.
		if ( ! $repo->fileExists( $path, FileRepo::FILES_ONLY ) ) {
			wfDebug( "UploadStash: tried to construct an UploadStashFile from a file that should already exist at '$path', but path is not found\n" );
			throw new UploadStashFileNotFoundException( 'cannot find path, or not a plain file' );
		}

			

		parent::__construct( false, $repo, $path, false );

		$this->name = basename( $this->path );
	}

	/**
	 * A method needed by the file transforming and scaling routines in File.php
	 * We do not necessarily care about doing the description at this point
	 * However, we also can't return the empty string, as the rest of MediaWiki demands this (and calls to imagemagick
	 * convert require it to be there)
	 *
	 * @return String: dummy value
	 */
	public function getDescriptionUrl() {
		return $this->getUrl();
	}

	/**
	 * Get the path for the thumbnail (actually any transformation of this file)
	 * The actual argument is the result of thumbName although we seem to have 
	 * buggy code elsewhere that expects a boolean 'suffix'
	 *
	 * @param $thumbName String: name of thumbnail (e.g. "120px-123456.jpg" ), or false to just get the path
	 * @return String: path thumbnail should take on filesystem, or containing directory if thumbname is false
	 */
	public function getThumbPath( $thumbName = false ) { 
		$path = dirname( $this->path );
		if ( $thumbName !== false ) {
			$path .= "/$thumbName";
		}
		return $path;
	}

	/**
	 * Return the file/url base name of a thumbnail with the specified parameters
	 *
	 * @param $params Array: handler-specific parameters
	 * @return String: base name for URL, like '120px-12345.jpg', or null if there is no handler
	 */
	function thumbName( $params ) {
		return $this->getParamThumbName( $this->getUrlName(), $params );
	}


	/**
	 * Given the name of the original, i.e. Foo.jpg, and scaling parameters, returns filename with appropriate extension
	 * This is abstracted from getThumbName because we also use it to calculate the thumbname the file should have on 
	 * remote image scalers	
	 *
	 * @param String $urlName: A filename, like MyMovie.ogx
	 * @param Array $parameters: scaling parameters, like array( 'width' => '120' );
	 * @return String|null parameterized thumb name, like 120px-MyMovie.ogx.jpg, or null if no handler found
	 */
	function getParamThumbName( $urlName, $params ) {
		if ( !$this->getHandler() ) {
			return null;
		}
		$extension = $this->getExtension();
		list( $thumbExt, ) = $this->handler->getThumbType( $extension, $this->getMimeType(), $params );
		$thumbName = $this->getHandler()->makeParamString( $params ) . '-' . $urlName;
		if ( $thumbExt != $extension ) {
			$thumbName .= ".$thumbExt";
		}
		return $thumbName;
	}

	/**
	 * Helper function -- given a 'subpage', return the local URL e.g. /wiki/Special:UploadStash/subpage
	 * @param {String} $subPage
	 * @return {String} local URL for this subpage in the Special:UploadStash space. 
	 */
	private function getSpecialUrl( $subPage ) {
		return SpecialPage::getTitleFor( 'UploadStash', $subPage )->getLocalURL();
	}


	/** 
	 * Get a URL to access the thumbnail 
	 * This is required because the model of how files work requires that 
	 * the thumbnail urls be predictable. However, in our model the URL is not based on the filename
	 * (that's hidden in the session)
	 *
	 * @param $thumbName String: basename of thumbnail file -- however, we don't want to use the file exactly
	 * @return String: URL to access thumbnail, or URL with partial path
	 */
	public function getThumbUrl( $thumbName = false ) { 
		wfDebug( __METHOD__ . " getting for $thumbName \n" );
		return $this->getSpecialUrl( 'thumb/' . $this->getUrlName() . '/' . $thumbName );
	}

	/** 
	 * The basename for the URL, which we want to not be related to the filename.
	 * Will also be used as the lookup key for a thumbnail file.
	 *
	 * @return String: base url name, like '120px-123456.jpg'
	 */
	public function getUrlName() { 
		if ( ! $this->urlName ) {
			$this->urlName = $this->sessionKey;
		}
		return $this->urlName;
	}

	/**
	 * Return the URL of the file, if for some reason we wanted to download it
	 * We tend not to do this for the original file, but we do want thumb icons
	 *
	 * @return String: url
	 */
	public function getUrl() {
		if ( !isset( $this->url ) ) {
			$this->url = $this->getSpecialUrl( 'file/' . $this->getUrlName() );
		}
		return $this->url;
	}

	/**
	 * Parent classes use this method, for no obvious reason, to return the path (relative to wiki root, I assume). 
	 * But with this class, the URL is unrelated to the path.
	 *
	 * @return String: url
	 */
	public function getFullUrl() { 
		return $this->getUrl();
	}


	/**
	 * Getter for session key (the session-unique id by which this file's location & metadata is stored in the session)
	 *
	 * @return String: session key
	 */
	public function getSessionKey() {
		return $this->sessionKey;
	}

	/**
	 * Remove the associated temporary file
	 * @return Status: success
	 */
	public function remove() {
		return $this->repo->freeTemp( $this->path );
	}

}

class UploadStashNotAvailableException extends MWException {};
class UploadStashFileNotFoundException extends MWException {};
class UploadStashBadPathException extends MWException {};
class UploadStashBadVersionException extends MWException {};
class UploadStashFileException extends MWException {};
class UploadStashZeroLengthFileException extends MWException {};