execute(); } /** The file name */ protected $fileName; /** The opened file resource */ protected $file; /** The cached length of the file, or null if it has not been loaded yet. */ protected $fileLength; /** A segmented cache of the file contents */ protected $buffer; /** The file data callback */ protected $callback; /** The ZIP64 mode */ protected $zip64 = false; /** Stored headers */ protected $eocdr, $eocdr64, $eocdr64Locator; protected $data; /** The "extra field" ID for ZIP64 central directory entries */ const ZIP64_EXTRA_HEADER = 0x0001; /** The segment size for the file contents cache */ const SEGSIZE = 16384; /** The index of the "general field" bit for UTF-8 file names */ const GENERAL_UTF8 = 11; /** The index of the "general field" bit for central directory encryption */ const GENERAL_CD_ENCRYPTED = 13; /** * Private constructor * @param string $fileName * @param callable $callback * @param array $options */ protected function __construct( $fileName, $callback, $options ) { $this->fileName = $fileName; $this->callback = $callback; if ( isset( $options['zip64'] ) ) { $this->zip64 = $options['zip64']; } } /** * Read the directory according to settings in $this. * * @return Status */ function execute() { $this->file = fopen( $this->fileName, 'r' ); $this->data = array(); if ( !$this->file ) { return Status::newFatal( 'zip-file-open-error' ); } $status = Status::newGood(); try { $this->readEndOfCentralDirectoryRecord(); if ( $this->zip64 ) { list( $offset, $size ) = $this->findZip64CentralDirectory(); $this->readCentralDirectory( $offset, $size ); } else { if ( $this->eocdr['CD size'] == 0xffffffff || $this->eocdr['CD offset'] == 0xffffffff || $this->eocdr['CD entries total'] == 0xffff ) { $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); } list( $offset, $size ) = $this->findOldCentralDirectory(); $this->readCentralDirectory( $offset, $size ); } } catch ( ZipDirectoryReaderError $e ) { $status->fatal( $e->getErrorCode() ); } fclose( $this->file ); return $status; } /** * Throw an error, and log a debug message * @param mixed $code * @param string $debugMessage */ function error( $code, $debugMessage ) { wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); throw new ZipDirectoryReaderError( $code ); } /** * Read the header which is at the end of the central directory, * unimaginatively called the "end of central directory record" by the ZIP * spec. */ function readEndOfCentralDirectoryRecord() { $info = array( 'signature' => 4, 'disk' => 2, 'CD start disk' => 2, 'CD entries this disk' => 2, 'CD entries total' => 2, 'CD size' => 4, 'CD offset' => 4, 'file comment length' => 2, ); $structSize = $this->getStructSize( $info ); $startPos = $this->getFileLength() - 65536 - $structSize; if ( $startPos < 0 ) { $startPos = 0; } $block = $this->getBlock( $startPos ); $sigPos = strrpos( $block, "PK\x05\x06" ); if ( $sigPos === false ) { $this->error( 'zip-wrong-format', "zip file lacks EOCDR signature. It probably isn't a zip file." ); } $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); } if ( $this->eocdr['disk'] !== 0 || $this->eocdr['CD start disk'] !== 0 ) { $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); } $this->eocdr += $this->unpack( $block, array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ), $sigPos + $structSize ); $this->eocdr['position'] = $startPos + $sigPos; } /** * Read the header called the "ZIP64 end of central directory locator". An * error will be raised if it does not exist. */ function readZip64EndOfCentralDirectoryLocator() { $info = array( 'signature' => array( 'string', 4 ), 'eocdr64 start disk' => 4, 'eocdr64 offset' => 8, 'number of disks' => 4, ); $structSize = $this->getStructSize( $info ); $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; $block = $this->getBlock( $start, $structSize ); $this->eocdr64Locator = $data = $this->unpack( $block, $info ); if ( $data['signature'] !== "PK\x06\x07" ) { // Note: Java will allow this and continue to read the // EOCDR64, so we have to reject the upload, we can't // just use the EOCDR header instead. $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); } } /** * Read the header called the "ZIP64 end of central directory record". It * may replace the regular "end of central directory record" in ZIP64 files. */ function readZip64EndOfCentralDirectoryRecord() { if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 || $this->eocdr64Locator['number of disks'] != 0 ) { $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); } $info = array( 'signature' => array( 'string', 4 ), 'EOCDR64 size' => 8, 'version made by' => 2, 'version needed' => 2, 'disk' => 4, 'CD start disk' => 4, 'CD entries this disk' => 8, 'CD entries total' => 8, 'CD size' => 8, 'CD offset' => 8 ); $structSize = $this->getStructSize( $info ); $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); $this->eocdr64 = $data = $this->unpack( $block, $info ); if ( $data['signature'] !== "PK\x06\x06" ) { $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); } if ( $data['disk'] !== 0 || $data['CD start disk'] !== 0 ) { $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); } } /** * Find the location of the central directory, as would be seen by a * non-ZIP64 reader. * * @return array List containing offset, size and end position. */ function findOldCentralDirectory() { $size = $this->eocdr['CD size']; $offset = $this->eocdr['CD offset']; $endPos = $this->eocdr['position']; // Some readers use the EOCDR position instead of the offset field // to find the directory, so to be safe, we check if they both agree. if ( $offset + $size != $endPos ) { $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 'of central directory record' ); } return array( $offset, $size ); } /** * Find the location of the central directory, as would be seen by a * ZIP64-compliant reader. * * @return array List containing offset, size and end position. */ function findZip64CentralDirectory() { // The spec is ambiguous about the exact rules of precedence between the // ZIP64 headers and the original headers. Here we follow zip_util.c // from OpenJDK 7. $size = $this->eocdr['CD size']; $offset = $this->eocdr['CD offset']; $numEntries = $this->eocdr['CD entries total']; $endPos = $this->eocdr['position']; if ( $size == 0xffffffff || $offset == 0xffffffff || $numEntries == 0xffff ) { $this->readZip64EndOfCentralDirectoryLocator(); if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { $this->readZip64EndOfCentralDirectoryRecord(); if ( isset( $this->eocdr64['CD offset'] ) ) { $size = $this->eocdr64['CD size']; $offset = $this->eocdr64['CD offset']; $endPos = $this->eocdr64Locator['eocdr64 offset']; } } } // Some readers use the EOCDR position instead of the offset field // to find the directory, so to be safe, we check if they both agree. if ( $offset + $size != $endPos ) { $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 'of central directory record' ); } return array( $offset, $size ); } /** * Read the central directory at the given location * @param int $offset * @param int $size */ function readCentralDirectory( $offset, $size ) { $block = $this->getBlock( $offset, $size ); $fixedInfo = array( 'signature' => array( 'string', 4 ), 'version made by' => 2, 'version needed' => 2, 'general bits' => 2, 'compression method' => 2, 'mod time' => 2, 'mod date' => 2, 'crc-32' => 4, 'compressed size' => 4, 'uncompressed size' => 4, 'name length' => 2, 'extra field length' => 2, 'comment length' => 2, 'disk number start' => 2, 'internal attrs' => 2, 'external attrs' => 4, 'local header offset' => 4, ); $fixedSize = $this->getStructSize( $fixedInfo ); $pos = 0; while ( $pos < $size ) { $data = $this->unpack( $block, $fixedInfo, $pos ); $pos += $fixedSize; if ( $data['signature'] !== "PK\x01\x02" ) { $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); } $variableInfo = array( 'name' => array( 'string', $data['name length'] ), 'extra field' => array( 'string', $data['extra field length'] ), 'comment' => array( 'string', $data['comment length'] ), ); $data += $this->unpack( $block, $variableInfo, $pos ); $pos += $this->getStructSize( $variableInfo ); if ( $this->zip64 && ( $data['compressed size'] == 0xffffffff || $data['uncompressed size'] == 0xffffffff || $data['local header offset'] == 0xffffffff ) ) { $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); if ( $zip64Data ) { $data = $zip64Data + $data; } } if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); } // Convert the timestamp into MediaWiki format // For the format, please see the MS-DOS 2.0 Programmer's Reference, // pages 3-5 and 3-6. $time = $data['mod time']; $date = $data['mod date']; $year = 1980 + ( $date >> 9 ); $month = ( $date >> 5 ) & 15; $day = $date & 31; $hour = ( $time >> 11 ) & 31; $minute = ( $time >> 5 ) & 63; $second = ( $time & 31 ) * 2; $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", $year, $month, $day, $hour, $minute, $second ); // Convert the character set in the file name if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { $name = $data['name']; } else { $name = iconv( 'CP437', 'UTF-8', $data['name'] ); } // Compile a data array for the user, with a sensible format $userData = array( 'name' => $name, 'mtime' => $timestamp, 'size' => $data['uncompressed size'], ); call_user_func( $this->callback, $userData ); } } /** * Interpret ZIP64 "extra field" data and return an associative array. * @param string $extraField * @return array|bool */ function unpackZip64Extra( $extraField ) { $extraHeaderInfo = array( 'id' => 2, 'size' => 2, ); $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); $zip64ExtraInfo = array( 'uncompressed size' => 8, 'compressed size' => 8, 'local header offset' => 8, 'disk number start' => 4, ); $extraPos = 0; while ( $extraPos < strlen( $extraField ) ) { $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); $extraPos += $extraHeaderSize; $extra += $this->unpack( $extraField, array( 'data' => array( 'string', $extra['size'] ) ), $extraPos ); $extraPos += $extra['size']; if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { return $this->unpack( $extra['data'], $zip64ExtraInfo ); } } return false; } /** * Get the length of the file. * @return int */ function getFileLength() { if ( $this->fileLength === null ) { $stat = fstat( $this->file ); $this->fileLength = $stat['size']; } return $this->fileLength; } /** * Get the file contents from a given offset. If there are not enough bytes * in the file to satisfy the request, an exception will be thrown. * * @param int $start The byte offset of the start of the block. * @param int $length The number of bytes to return. If omitted, the remainder * of the file will be returned. * * @return string */ function getBlock( $start, $length = null ) { $fileLength = $this->getFileLength(); if ( $start >= $fileLength ) { $this->error( 'zip-bad', "getBlock() requested position $start, " . "file length is $fileLength" ); } if ( $length === null ) { $length = $fileLength - $start; } $end = $start + $length; if ( $end > $fileLength ) { $this->error( 'zip-bad', "getBlock() requested end position $end, " . "file length is $fileLength" ); } $startSeg = floor( $start / self::SEGSIZE ); $endSeg = ceil( $end / self::SEGSIZE ); $block = ''; for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { $block .= $this->getSegment( $segIndex ); } $block = substr( $block, $start - $startSeg * self::SEGSIZE, $length ); if ( strlen( $block ) < $length ) { $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); } return $block; } /** * Get a section of the file starting at position $segIndex * self::SEGSIZE, * of length self::SEGSIZE. The result is cached. This is a helper function * for getBlock(). * * If there are not enough bytes in the file to satisfy the request, the * return value will be truncated. If a request is made for a segment beyond * the end of the file, an empty string will be returned. * * @param int $segIndex * * @return string */ function getSegment( $segIndex ) { if ( !isset( $this->buffer[$segIndex] ) ) { $bytePos = $segIndex * self::SEGSIZE; if ( $bytePos >= $this->getFileLength() ) { $this->buffer[$segIndex] = ''; return ''; } if ( fseek( $this->file, $bytePos ) ) { $this->error( 'zip-bad', "seek to $bytePos failed" ); } $seg = fread( $this->file, self::SEGSIZE ); if ( $seg === false ) { $this->error( 'zip-bad', "read from $bytePos failed" ); } $this->buffer[$segIndex] = $seg; } return $this->buffer[$segIndex]; } /** * Get the size of a structure in bytes. See unpack() for the format of $struct. * @param array $struct * @return int */ function getStructSize( $struct ) { $size = 0; foreach ( $struct as $type ) { if ( is_array( $type ) ) { list( , $fieldSize ) = $type; $size += $fieldSize; } else { $size += $type; } } return $size; } /** * Unpack a binary structure. This is like the built-in unpack() function * except nicer. * * @param string $string The binary data input * * @param array $struct An associative array giving structure members and their * types. In the key is the field name. The value may be either an * integer, in which case the field is a little-endian unsigned integer * encoded in the given number of bytes, or an array, in which case the * first element of the array is the type name, and the subsequent * elements are type-dependent parameters. Only one such type is defined: * - "string": The second array element gives the length of string. * Not null terminated. * * @param int $offset The offset into the string at which to start unpacking. * * @throws MWException * @return array Unpacked associative array. Note that large integers in the input * may be represented as floating point numbers in the return value, so * the use of weak comparison is advised. */ function unpack( $string, $struct, $offset = 0 ) { $size = $this->getStructSize( $struct ); if ( $offset + $size > strlen( $string ) ) { $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); } $data = array(); $pos = $offset; foreach ( $struct as $key => $type ) { if ( is_array( $type ) ) { list( $typeName, $fieldSize ) = $type; switch ( $typeName ) { case 'string': $data[$key] = substr( $string, $pos, $fieldSize ); $pos += $fieldSize; break; default: throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); } } else { // Unsigned little-endian integer $length = intval( $type ); // Calculate the value. Use an algorithm which automatically // upgrades the value to floating point if necessary. $value = 0; for ( $i = $length - 1; $i >= 0; $i-- ) { $value *= 256; $value += ord( $string[$pos + $i] ); } // Throw an exception if there was loss of precision if ( $value > pow( 2, 52 ) ) { $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . 'This could happen if we tried to unpack a 64-bit structure ' . 'at an invalid location.' ); } $data[$key] = $value; $pos += $length; } } return $data; } /** * Returns a bit from a given position in an integer value, converted to * boolean. * * @param int $value * @param int $bitIndex The index of the bit, where 0 is the LSB. * @return bool */ function testBit( $value, $bitIndex ) { return (bool)( ( $value >> $bitIndex ) & 1 ); } /** * Debugging helper function which dumps a string in hexdump -C format. * @param string $s */ function hexDump( $s ) { $n = strlen( $s ); for ( $i = 0; $i < $n; $i += 16 ) { printf( "%08X ", $i ); for ( $j = 0; $j < 16; $j++ ) { print " "; if ( $j == 8 ) { print " "; } if ( $i + $j >= $n ) { print " "; } else { printf( "%02X", ord( $s[$i + $j] ) ); } } print " |"; for ( $j = 0; $j < 16; $j++ ) { if ( $i + $j >= $n ) { print " "; } elseif ( ctype_print( $s[$i + $j] ) ) { print $s[$i + $j]; } else { print '.'; } } print "|\n"; } } } /** * Internal exception class. Will be caught by private code. */ class ZipDirectoryReaderError extends Exception { protected $errorCode; function __construct( $code ) { $this->errorCode = $code; parent::__construct( "ZipDirectoryReader error: $code" ); } /** * @return mixed */ function getErrorCode() { return $this->errorCode; } }