summaryrefslogtreecommitdiff
path: root/includes/media
diff options
context:
space:
mode:
Diffstat (limited to 'includes/media')
-rw-r--r--includes/media/BMP.php35
-rw-r--r--includes/media/Bitmap.php363
-rw-r--r--includes/media/BitmapMetadataHandler.php269
-rw-r--r--includes/media/Bitmap_ClientOnly.php14
-rw-r--r--includes/media/DjVu.php65
-rw-r--r--includes/media/Exif.php836
-rw-r--r--includes/media/ExifBitmap.php210
-rw-r--r--includes/media/FormatMetadata.php1354
-rw-r--r--includes/media/GIF.php103
-rw-r--r--includes/media/GIFMetadataExtractor.php236
-rw-r--r--includes/media/Generic.php302
-rw-r--r--includes/media/IPTC.php576
-rw-r--r--includes/media/Jpeg.php46
-rw-r--r--includes/media/JpegMetadataExtractor.php252
-rw-r--r--includes/media/MediaTransformOutput.php26
-rw-r--r--includes/media/PNG.php88
-rw-r--r--includes/media/PNGMetadataExtractor.php359
-rw-r--r--includes/media/SVG.php92
-rw-r--r--includes/media/SVGMetadataExtractor.php27
-rw-r--r--includes/media/Tiff.php51
-rw-r--r--includes/media/XMP.php1174
-rw-r--r--includes/media/XMPInfo.php1139
-rw-r--r--includes/media/XMPValidate.php323
23 files changed, 7622 insertions, 318 deletions
diff --git a/includes/media/BMP.php b/includes/media/BMP.php
index de836b59..6886e950 100644
--- a/includes/media/BMP.php
+++ b/includes/media/BMP.php
@@ -13,22 +13,39 @@
* @ingroup Media
*/
class BmpHandler extends BitmapHandler {
- // We never want to use .bmp in an <img/> tag
+
+ /**
+ * @param $file
+ * @return bool
+ */
function mustRender( $file ) {
return true;
}
- // Render files as PNG
+ /**
+ * Render files as PNG
+ *
+ * @param $text
+ * @param $mime
+ * @param $params
+ * @return array
+ */
function getThumbType( $text, $mime, $params = null ) {
return array( 'png', 'image/png' );
}
- /*
+ /**
* Get width and height from the bmp header.
+ *
+ * @param $image
+ * @param $filename
+ * @return array
*/
function getImageSize( $image, $filename ) {
- $f = fopen( $filename, 'r' );
- if(!$f) return false;
+ $f = fopen( $filename, 'rb' );
+ if( !$f ) {
+ return false;
+ }
$header = fread( $f, 54 );
fclose($f);
@@ -37,8 +54,12 @@ class BmpHandler extends BitmapHandler {
$h = substr( $header, 22, 4);
// Convert the unsigned long 32 bits (little endian):
- $w = unpack( 'V' , $w );
- $h = unpack( 'V' , $h );
+ try {
+ $w = wfUnpack( 'V', $w, 4 );
+ $h = wfUnpack( 'V', $h, 4 );
+ } catch ( MWException $e ) {
+ return false;
+ }
return array( $w[1], $h[1] );
}
}
diff --git a/includes/media/Bitmap.php b/includes/media/Bitmap.php
index f5f7ba6d..5f796095 100644
--- a/includes/media/Bitmap.php
+++ b/includes/media/Bitmap.php
@@ -12,6 +12,14 @@
* @ingroup Media
*/
class BitmapHandler extends ImageHandler {
+
+ /**
+ * @param $image File
+ * @param $params array Transform parameters. Entries with the keys 'width'
+ * and 'height' are the respective screen width and height, while the keys
+ * 'physicalWidth' and 'physicalHeight' indicate the thumbnail dimensions.
+ * @return bool
+ */
function normaliseParams( $image, &$params ) {
global $wgMaxImageArea;
if ( !parent::normaliseParams( $image, $params ) ) {
@@ -19,25 +27,26 @@ class BitmapHandler extends ImageHandler {
}
$mimeType = $image->getMimeType();
+ # Obtain the source, pre-rotation dimensions
$srcWidth = $image->getWidth( $params['page'] );
$srcHeight = $image->getHeight( $params['page'] );
# Don't make an image bigger than the source
- $params['physicalWidth'] = $params['width'];
- $params['physicalHeight'] = $params['height'];
-
if ( $params['physicalWidth'] >= $srcWidth ) {
$params['physicalWidth'] = $srcWidth;
$params['physicalHeight'] = $srcHeight;
+
# Skip scaling limit checks if no scaling is required
- if ( !$image->mustRender() )
+ # due to requested size being bigger than source.
+ if ( !$image->mustRender() ) {
return true;
+ }
}
-
+
# Don't thumbnail an image so big that it will fill hard drives and send servers into swap
# JPEG has the handy property of allowing thumbnailing without full decompression, so we make
# an exception for it.
- # FIXME: This actually only applies to ImageMagick
+ # @todo FIXME: This actually only applies to ImageMagick
if ( $mimeType !== 'image/jpeg' &&
$srcWidth * $srcHeight > $wgMaxImageArea )
{
@@ -46,6 +55,30 @@ class BitmapHandler extends ImageHandler {
return true;
}
+
+ /**
+ * Extracts the width/height if the image will be scaled before rotating
+ *
+ * This will match the physical size/aspect ratio of the original image
+ * prior to application of the rotation -- so for a portrait image that's
+ * stored as raw landscape with 90-degress rotation, the resulting size
+ * will be wider than it is tall.
+ *
+ * @param $params array Parameters as returned by normaliseParams
+ * @param $rotation int The rotation angle that will be applied
+ * @return array ($width, $height) array
+ */
+ public function extractPreRotationDimensions( $params, $rotation ) {
+ if ( $rotation == 90 || $rotation == 270 ) {
+ # We'll resize before rotation, so swap the dimensions again
+ $width = $params['physicalHeight'];
+ $height = $params['physicalWidth'];
+ } else {
+ $width = $params['physicalWidth'];
+ $height = $params['physicalHeight'];
+ }
+ return array( $width, $height );
+ }
// Function that returns the number of pixels to be thumbnailed.
@@ -54,10 +87,15 @@ class BitmapHandler extends ImageHandler {
return $width * $height;
}
+ /**
+ * @param $image File
+ * @param $dstPath
+ * @param $dstUrl
+ * @param $params
+ * @param int $flags
+ * @return MediaTransformError|ThumbnailImage|TransformParameterError
+ */
function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
- global $wgUseImageMagick;
- global $wgCustomConvertCommand, $wgUseImageResize;
-
if ( !$this->normaliseParams( $image, $params ) ) {
return new TransformParameterError( $params );
}
@@ -79,6 +117,7 @@ class BitmapHandler extends ImageHandler {
'mimeType' => $image->getMimeType(),
'srcPath' => $image->getPath(),
'dstPath' => $dstPath,
+ 'dstUrl' => $dstUrl,
);
wfDebug( __METHOD__ . ": creating {$scalerParams['physicalDimensions']} thumbnail at $dstPath\n" );
@@ -93,20 +132,7 @@ class BitmapHandler extends ImageHandler {
}
# Determine scaler type
- if ( !$dstPath ) {
- # No output path available, client side scaling only
- $scaler = 'client';
- } elseif ( !$wgUseImageResize ) {
- $scaler = 'client';
- } elseif ( $wgUseImageMagick ) {
- $scaler = 'im';
- } elseif ( $wgCustomConvertCommand ) {
- $scaler = 'custom';
- } elseif ( function_exists( 'imagecreatetruecolor' ) ) {
- $scaler = 'gd';
- } else {
- $scaler = 'client';
- }
+ $scaler = self::getScalerType( $dstPath );
wfDebug( __METHOD__ . ": scaler $scaler\n" );
if ( $scaler == 'client' ) {
@@ -127,13 +153,28 @@ class BitmapHandler extends ImageHandler {
return $this->getClientScalingThumbnailImage( $image, $scalerParams );
}
+ # Try a hook
+ $mto = null;
+ wfRunHooks( 'BitmapHandlerTransform', array( $this, $image, &$scalerParams, &$mto ) );
+ if ( !is_null( $mto ) ) {
+ wfDebug( __METHOD__ . ": Hook to BitmapHandlerTransform created an mto\n" );
+ $scaler = 'hookaborted';
+ }
+
switch ( $scaler ) {
+ case 'hookaborted':
+ # Handled by the hook above
+ $err = $mto->isError() ? $mto : false;
+ break;
case 'im':
$err = $this->transformImageMagick( $image, $scalerParams );
break;
case 'custom':
$err = $this->transformCustom( $image, $scalerParams );
break;
+ case 'imext':
+ $err = $this->transformImageMagickExt( $image, $scalerParams );
+ break;
case 'gd':
default:
$err = $this->transformGd( $image, $scalerParams );
@@ -149,6 +190,8 @@ class BitmapHandler extends ImageHandler {
# Thumbnail was zero-byte and had to be removed
return new MediaTransformError( 'thumbnail_error',
$scalerParams['clientWidth'], $scalerParams['clientHeight'] );
+ } elseif ( $mto ) {
+ return $mto;
} else {
return new ThumbnailImage( $image, $dstUrl, $scalerParams['clientWidth'],
$scalerParams['clientHeight'], $dstPath );
@@ -156,12 +199,49 @@ class BitmapHandler extends ImageHandler {
}
/**
+ * Returns which scaler type should be used. Creates parent directories
+ * for $dstPath and returns 'client' on error
+ *
+ * @return string client,im,custom,gd
+ */
+ protected static function getScalerType( $dstPath, $checkDstPath = true ) {
+ global $wgUseImageResize, $wgUseImageMagick, $wgCustomConvertCommand;
+
+ if ( !$dstPath && $checkDstPath ) {
+ # No output path available, client side scaling only
+ $scaler = 'client';
+ } elseif ( !$wgUseImageResize ) {
+ $scaler = 'client';
+ } elseif ( $wgUseImageMagick ) {
+ $scaler = 'im';
+ } elseif ( $wgCustomConvertCommand ) {
+ $scaler = 'custom';
+ } elseif ( function_exists( 'imagecreatetruecolor' ) ) {
+ $scaler = 'gd';
+ } elseif ( class_exists( 'Imagick' ) ) {
+ $scaler = 'imext';
+ } else {
+ $scaler = 'client';
+ }
+
+ if ( $scaler != 'client' && $dstPath ) {
+ if ( !wfMkdirParents( dirname( $dstPath ) ) ) {
+ # Unable to create a path for the thumbnail
+ return 'client';
+ }
+ }
+ return $scaler;
+ }
+
+ /**
* Get a ThumbnailImage that respresents an image that will be scaled
* client side
*
* @param $image File File associated with this thumbnail
* @param $params array Array with scaler params
* @return ThumbnailImage
+ *
+ * @fixme no rotation support
*/
protected function getClientScalingThumbnailImage( $image, $params ) {
return new ThumbnailImage( $image, $image->getURL(),
@@ -215,7 +295,7 @@ class BitmapHandler extends ImageHandler {
// We optimize the output, but -optimize is broken,
// use optimizeTransparency instead (bug 11822)
if ( version_compare( $this->getMagickVersion(), "6.3.5" ) >= 0 ) {
- $animation_post = '-fuzz 5% -layers optimizeTransparency +map';
+ $animation_post = '-fuzz 5% -layers optimizeTransparency';
}
}
}
@@ -225,6 +305,9 @@ class BitmapHandler extends ImageHandler {
if ( strval( $wgImageMagickTempDir ) !== '' ) {
$env['MAGICK_TMPDIR'] = $wgImageMagickTempDir;
}
+
+ $rotation = $this->getRotation( $image );
+ list( $width, $height ) = $this->extractPreRotationDimensions( $params, $rotation );
$cmd =
wfEscapeShellArg( $wgImageMagickConvertCommand ) .
@@ -237,12 +320,13 @@ class BitmapHandler extends ImageHandler {
// For the -thumbnail option a "!" is needed to force exact size,
// or ImageMagick may decide your ratio is wrong and slice off
// a pixel.
- " -thumbnail " . wfEscapeShellArg( "{$params['physicalDimensions']}!" ) .
+ " -thumbnail " . wfEscapeShellArg( "{$width}x{$height}!" ) .
// Add the source url as a comment to the thumb, but don't add the flag if there's no comment
( $params['comment'] !== ''
? " -set comment " . wfEscapeShellArg( $this->escapeMagickProperty( $params['comment'] ) )
: '' ) .
- " -depth 8 $sharpen" .
+ " -depth 8 $sharpen " .
+ " -rotate -$rotation " .
" {$animation_post} " .
wfEscapeShellArg( $this->escapeMagickOutput( $params['dstPath'] ) ) . " 2>&1";
@@ -261,6 +345,84 @@ class BitmapHandler extends ImageHandler {
}
/**
+ * Transform an image using the Imagick PHP extension
+ *
+ * @param $image File File associated with this thumbnail
+ * @param $params array Array with scaler params
+ *
+ * @return MediaTransformError Error object if error occured, false (=no error) otherwise
+ */
+ protected function transformImageMagickExt( $image, $params ) {
+ global $wgSharpenReductionThreshold, $wgSharpenParameter, $wgMaxAnimatedGifArea;
+
+ try {
+ $im = new Imagick();
+ $im->readImage( $params['srcPath'] );
+
+ if ( $params['mimeType'] == 'image/jpeg' ) {
+ // Sharpening, see bug 6193
+ if ( ( $params['physicalWidth'] + $params['physicalHeight'] )
+ / ( $params['srcWidth'] + $params['srcHeight'] )
+ < $wgSharpenReductionThreshold ) {
+ // Hack, since $wgSharpenParamater is written specifically for the command line convert
+ list( $radius, $sigma ) = explode( 'x', $wgSharpenParameter );
+ $im->sharpenImage( $radius, $sigma );
+ }
+ $im->setCompressionQuality( 80 );
+ } elseif( $params['mimeType'] == 'image/png' ) {
+ $im->setCompressionQuality( 95 );
+ } elseif ( $params['mimeType'] == 'image/gif' ) {
+ if ( $this->getImageArea( $image, $params['srcWidth'],
+ $params['srcHeight'] ) > $wgMaxAnimatedGifArea ) {
+ // Extract initial frame only; we're so big it'll
+ // be a total drag. :P
+ $im->setImageScene( 0 );
+ } elseif ( $this->isAnimatedImage( $image ) ) {
+ // Coalesce is needed to scale animated GIFs properly (bug 1017).
+ $im = $im->coalesceImages();
+ }
+ }
+
+ $rotation = $this->getRotation( $image );
+ list( $width, $height ) = $this->extractPreRotationDimensions( $params, $rotation );
+
+ $im->setImageBackgroundColor( new ImagickPixel( 'white' ) );
+
+ // Call Imagick::thumbnailImage on each frame
+ foreach ( $im as $i => $frame ) {
+ if ( !$frame->thumbnailImage( $width, $height, /* fit */ false ) ) {
+ return $this->getMediaTransformError( $params, "Error scaling frame $i" );
+ }
+ }
+ $im->setImageDepth( 8 );
+
+ if ( $rotation ) {
+ if ( !$im->rotateImage( new ImagickPixel( 'white' ), 360 - $rotation ) ) {
+ return $this->getMediaTransformError( $params, "Error rotating $rotation degrees" );
+ }
+ }
+
+ if ( $this->isAnimatedImage( $image ) ) {
+ wfDebug( __METHOD__ . ": Writing animated thumbnail\n" );
+ // This is broken somehow... can't find out how to fix it
+ $result = $im->writeImages( $params['dstPath'], true );
+ } else {
+ $result = $im->writeImage( $params['dstPath'] );
+ }
+ if ( !$result ) {
+ return $this->getMediaTransformError( $params,
+ "Unable to write thumbnail to {$params['dstPath']}" );
+ }
+
+ } catch ( ImagickException $e ) {
+ return $this->getMediaTransformError( $params, $e->getMessage() );
+ }
+
+ return false;
+
+ }
+
+ /**
* Transform an image using a custom command
*
* @param $image File File associated with this thumbnail
@@ -306,12 +468,12 @@ class BitmapHandler extends ImageHandler {
}
/**
* Get a MediaTransformError with error 'thumbnail_error'
- *
+ *
* @param $params array Parameter array as passed to the transform* functions
* @param $errMsg string Error message
* @return MediaTransformError
*/
- protected function getMediaTransformError( $params, $errMsg ) {
+ public function getMediaTransformError( $params, $errMsg ) {
return new MediaTransformError( 'thumbnail_error', $params['clientWidth'],
$params['clientHeight'], $errMsg );
}
@@ -360,8 +522,10 @@ class BitmapHandler extends ImageHandler {
}
$src_image = call_user_func( $loader, $params['srcPath'] );
- $dst_image = imagecreatetruecolor( $params['physicalWidth'],
- $params['physicalHeight'] );
+
+ $rotation = function_exists( 'imagerotate' ) ? $this->getRotation( $image ) : 0;
+ list( $width, $height ) = $this->extractPreRotationDimensions( $params, $rotation );
+ $dst_image = imagecreatetruecolor( $width, $height );
// Initialise the destination image to transparent instead of
// the default solid black, to support PNG and GIF transparency nicely
@@ -374,15 +538,21 @@ class BitmapHandler extends ImageHandler {
// It may just uglify them, and completely breaks transparency.
imagecopyresized( $dst_image, $src_image,
0, 0, 0, 0,
- $params['physicalWidth'], $params['physicalHeight'],
+ $width, $height,
imagesx( $src_image ), imagesy( $src_image ) );
} else {
imagecopyresampled( $dst_image, $src_image,
0, 0, 0, 0,
- $params['physicalWidth'], $params['physicalHeight'],
+ $width, $height,
imagesx( $src_image ), imagesy( $src_image ) );
}
+ if ( $rotation % 360 != 0 && $rotation % 90 == 0 ) {
+ $rot_image = imagerotate( $dst_image, $rotation, 0 );
+ imagedestroy( $dst_image );
+ $dst_image = $rot_image;
+ }
+
imagesavealpha( $dst_image, true );
call_user_func( $saveType, $dst_image, $params['dstPath'] );
@@ -508,98 +678,57 @@ class BitmapHandler extends ImageHandler {
imagejpeg( $dst_image, $thumbPath, 95 );
}
-
- function getMetadata( $image, $filename ) {
- global $wgShowEXIF;
- if ( $wgShowEXIF && file_exists( $filename ) ) {
- $exif = new Exif( $filename );
- $data = $exif->getFilteredData();
- if ( $data ) {
- $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
- return serialize( $data );
- } else {
- return '0';
- }
- } else {
- return '';
- }
- }
-
- function getMetadataType( $image ) {
- return 'exif';
- }
-
- function isMetadataValid( $image, $metadata ) {
- global $wgShowEXIF;
- if ( !$wgShowEXIF ) {
- # Metadata disabled and so an empty field is expected
- return true;
- }
- if ( $metadata === '0' ) {
- # Special value indicating that there is no EXIF data in the file
- return true;
- }
- wfSuppressWarnings();
- $exif = unserialize( $metadata );
- wfRestoreWarnings();
- if ( !isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) ||
- $exif['MEDIAWIKI_EXIF_VERSION'] != Exif::version() )
- {
- # Wrong version
- wfDebug( __METHOD__ . ": wrong version\n" );
- return false;
- }
- return true;
+ /**
+ * On supporting image formats, try to read out the low-level orientation
+ * of the file and return the angle that the file needs to be rotated to
+ * be viewed.
+ *
+ * This information is only useful when manipulating the original file;
+ * the width and height we normally work with is logical, and will match
+ * any produced output views.
+ *
+ * The base BitmapHandler doesn't understand any metadata formats, so this
+ * is left up to child classes to implement.
+ *
+ * @param $file File
+ * @return int 0, 90, 180 or 270
+ */
+ public function getRotation( $file ) {
+ return 0;
}
/**
- * Get a list of EXIF metadata items which should be displayed when
- * the metadata table is collapsed.
+ * Returns whether the current scaler supports rotation (im and gd do)
*
- * @return array of strings
- * @access private
+ * @return bool
*/
- function visibleMetadataFields() {
- $fields = array();
- $lines = explode( "\n", wfMsgForContent( 'metadata-fields' ) );
- foreach ( $lines as $line ) {
- $matches = array();
- if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
- $fields[] = $matches[1];
- }
+ public static function canRotate() {
+ $scaler = self::getScalerType( null, false );
+ switch ( $scaler ) {
+ case 'im':
+ # ImageMagick supports autorotation
+ return true;
+ case 'imext':
+ # Imagick::rotateImage
+ return true;
+ case 'gd':
+ # GD's imagerotate function is used to rotate images, but not
+ # all precompiled PHP versions have that function
+ return function_exists( 'imagerotate' );
+ default:
+ # Other scalers don't support rotation
+ return false;
}
- $fields = array_map( 'strtolower', $fields );
- return $fields;
}
- function formatMetadata( $image ) {
- $result = array(
- 'visible' => array(),
- 'collapsed' => array()
- );
- $metadata = $image->getMetadata();
- if ( !$metadata ) {
- return false;
- }
- $exif = unserialize( $metadata );
- if ( !$exif ) {
- return false;
- }
- unset( $exif['MEDIAWIKI_EXIF_VERSION'] );
- $format = new FormatExif( $exif );
-
- $formatted = $format->getFormattedData();
- // Sort fields into visible and collapsed
- $visibleFields = $this->visibleMetadataFields();
- foreach ( $formatted as $name => $value ) {
- $tag = strtolower( $name );
- self::addMeta( $result,
- in_array( $tag, $visibleFields ) ? 'visible' : 'collapsed',
- 'exif',
- $tag,
- $value
- );
- }
- return $result;
+ /**
+ * Rerurns whether the file needs to be rendered. Returns true if the
+ * file requires rotation and we are able to rotate it.
+ *
+ * @param $file File
+ * @return bool
+ */
+ public function mustRender( $file ) {
+ return self::canRotate() && $this->getRotation( $file ) != 0;
}
}
diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php
new file mode 100644
index 00000000..d1caa67a
--- /dev/null
+++ b/includes/media/BitmapMetadataHandler.php
@@ -0,0 +1,269 @@
+<?php
+/**
+Class to deal with reconciling and extracting metadata from bitmap images.
+This is meant to comply with http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
+
+This sort of acts as an intermediary between MediaHandler::getMetadata
+and the various metadata extractors.
+
+@todo other image formats.
+*/
+class BitmapMetadataHandler {
+
+ private $metadata = array();
+ private $metaPriority = array(
+ 20 => array( 'other' ),
+ 40 => array( 'native' ),
+ 60 => array( 'iptc-good-hash', 'iptc-no-hash' ),
+ 70 => array( 'xmp-deprecated' ),
+ 80 => array( 'xmp-general' ),
+ 90 => array( 'xmp-exif' ),
+ 100 => array( 'iptc-bad-hash' ),
+ 120 => array( 'exif' ),
+ );
+ private $iptcType = 'iptc-no-hash';
+
+ /**
+ * This does the photoshop image resource app13 block
+ * of interest, IPTC-IIM metadata is stored here.
+ *
+ * Mostly just calls doPSIR and doIPTC
+ *
+ * @param String $app13 String containing app13 block from jpeg file
+ */
+ private function doApp13 ( $app13 ) {
+ $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 );
+
+ $iptc = IPTC::parse( $app13 );
+ $this->addMetadata( $iptc, $this->iptcType );
+ }
+
+
+ /**
+ * Get exif info using exif class.
+ * Basically what used to be in BitmapHandler::getMetadata().
+ * Just calls stuff in the Exif class.
+ *
+ * @param $filename string
+ */
+ function getExif ( $filename, $byteOrder ) {
+ global $wgShowEXIF;
+ if ( file_exists( $filename ) && $wgShowEXIF ) {
+ $exif = new Exif( $filename, $byteOrder );
+ $data = $exif->getFilteredData();
+ if ( $data ) {
+ $this->addMetadata( $data, 'exif' );
+ }
+ }
+ }
+ /** Add misc metadata. Warning: atm if the metadata category
+ * doesn't have a priority, it will be silently discarded.
+ *
+ * @param Array $metaArray array of metadata values
+ * @param string $type type. defaults to other. if two things have the same type they're merged
+ */
+ function addMetadata ( $metaArray, $type = 'other' ) {
+ if ( isset( $this->metadata[$type] ) ) {
+ /* merge with old data */
+ $metaArray = $metaArray + $this->metadata[$type];
+ }
+
+ $this->metadata[$type] = $metaArray;
+ }
+
+ /**
+ * Merge together the various types of metadata
+ * the different types have different priorites,
+ * and are merged in order.
+ *
+ * This function is generally called by the media handlers' getMetadata()
+ *
+ * @return Array metadata array
+ */
+ function getMetadataArray () {
+ // this seems a bit ugly... This is all so its merged in right order
+ // based on the MWG recomendation.
+ $temp = Array();
+ krsort( $this->metaPriority );
+ foreach ( $this->metaPriority as $pri ) {
+ foreach ( $pri as $type ) {
+ if ( isset( $this->metadata[$type] ) ) {
+ // Do some special casing for multilingual values.
+ // Don't discard translations if also as a simple value.
+ foreach ( $this->metadata[$type] as $itemName => $item ) {
+ if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' ) {
+ if ( isset( $temp[$itemName] ) && !is_array( $temp[$itemName] ) ) {
+ $default = $temp[$itemName];
+ $temp[$itemName] = $item;
+ $temp[$itemName]['x-default'] = $default;
+ unset( $this->metadata[$type][$itemName] );
+ }
+ }
+ }
+
+ $temp = $temp + $this->metadata[$type];
+ }
+ }
+ }
+ return $temp;
+ }
+
+ /** Main entry point for jpeg's.
+ *
+ * @param $filename string filename (with full path)
+ * @return metadata result array.
+ * @throws MWException on invalid file.
+ */
+ static function Jpeg ( $filename ) {
+ $showXMP = function_exists( 'xml_parser_create_ns' );
+ $meta = new self();
+
+ $seg = JpegMetadataExtractor::segmentSplitter( $filename );
+ if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
+ $meta->addMetadata( Array( 'JPEGFileComment' => $seg['COM'] ), 'native' );
+ }
+ if ( isset( $seg['PSIR'] ) ) {
+ $meta->doApp13( $seg['PSIR'] );
+ }
+ if ( isset( $seg['XMP'] ) && $showXMP ) {
+ $xmp = new XMPReader();
+ $xmp->parse( $seg['XMP'] );
+ foreach ( $seg['XMP_ext'] as $xmpExt ) {
+ /* Support for extended xmp in jpeg files
+ * is not well tested and a bit fragile.
+ */
+ $xmp->parseExtended( $xmpExt );
+
+ }
+ $res = $xmp->getResults();
+ foreach ( $res as $type => $array ) {
+ $meta->addMetadata( $array, $type );
+ }
+ }
+ if ( isset( $seg['byteOrder'] ) ) {
+ $meta->getExif( $filename, $seg['byteOrder'] );
+ }
+ return $meta->getMetadataArray();
+ }
+
+ /** Entry point for png
+ * At some point in the future this might
+ * merge the png various tEXt chunks to that
+ * are interesting, but for now it only does XMP
+ *
+ * @param $filename String full path to file
+ * @return Array Array for storage in img_metadata.
+ */
+ static public function PNG ( $filename ) {
+ $showXMP = function_exists( 'xml_parser_create_ns' );
+
+ $meta = new self();
+ $array = PNGMetadataExtractor::getMetadata( $filename );
+ if ( isset( $array['text']['xmp']['x-default'] ) && $array['text']['xmp']['x-default'] !== '' && $showXMP ) {
+ $xmp = new XMPReader();
+ $xmp->parse( $array['text']['xmp']['x-default'] );
+ $xmpRes = $xmp->getResults();
+ foreach ( $xmpRes as $type => $xmpSection ) {
+ $meta->addMetadata( $xmpSection, $type );
+ }
+ }
+ unset( $array['text']['xmp'] );
+ $meta->addMetadata( $array['text'], 'native' );
+ unset( $array['text'] );
+ $array['metadata'] = $meta->getMetadataArray();
+ $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION;
+ return $array;
+ }
+
+ /** function for gif images.
+ *
+ * They don't really have native metadata, so just merges together
+ * XMP and image comment.
+ *
+ * @param $filename full path to file
+ * @return Array metadata array
+ */
+ static public function GIF ( $filename ) {
+
+ $meta = new self();
+ $baseArray = GIFMetadataExtractor::getMetadata( $filename );
+
+ if ( count( $baseArray['comment'] ) > 0 ) {
+ $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' );
+ }
+
+ if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) {
+ $xmp = new XMPReader();
+ $xmp->parse( $baseArray['xmp'] );
+ $xmpRes = $xmp->getResults();
+ foreach ( $xmpRes as $type => $xmpSection ) {
+ $meta->addMetadata( $xmpSection, $type );
+ }
+
+ }
+
+ unset( $baseArray['comment'] );
+ unset( $baseArray['xmp'] );
+
+ $baseArray['metadata'] = $meta->getMetadataArray();
+ $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION;
+ return $baseArray;
+ }
+
+ /**
+ * This doesn't do much yet, but eventually I plan to add
+ * XMP support for Tiff. (PHP's exif support already extracts
+ * but needs some further processing because PHP's exif support
+ * is stupid...)
+ *
+ * @todo Add XMP support, so this function actually makes
+ * sense to put here.
+ *
+ * The various exceptions this throws are caught later.
+ * @param $filename String
+ * @return Array The metadata.
+ */
+ static public function Tiff ( $filename ) {
+ if ( file_exists( $filename ) ) {
+ $byteOrder = self::getTiffByteOrder( $filename );
+ if ( !$byteOrder ) {
+ throw new MWException( "Error determining byte order of $filename" );
+ }
+ $exif = new Exif( $filename, $byteOrder );
+ $data = $exif->getFilteredData();
+ if ( $data ) {
+ $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
+ return $data;
+ } else {
+ throw new MWException( "Could not extract data from tiff file $filename" );
+ }
+ } else {
+ throw new MWException( "File doesn't exist - $filename" );
+ }
+ }
+ /**
+ * Read the first 2 bytes of a tiff file to figure out
+ * Little Endian or Big Endian. Needed for exif stuff.
+ *
+ * @param $filename String The filename
+ * @return String 'BE' or 'LE' or false
+ */
+ static function getTiffByteOrder( $filename ) {
+ $fh = fopen( $filename, 'rb' );
+ if ( !$fh ) return false;
+ $head = fread( $fh, 2 );
+ fclose( $fh );
+
+ switch( $head ) {
+ case 'II':
+ return 'LE'; // II for intel.
+ case 'MM':
+ return 'BE'; // MM for motorla.
+ default:
+ return false; // Something went wrong.
+
+ }
+ }
+
+
+}
diff --git a/includes/media/Bitmap_ClientOnly.php b/includes/media/Bitmap_ClientOnly.php
index 9f6f7b33..50679229 100644
--- a/includes/media/Bitmap_ClientOnly.php
+++ b/includes/media/Bitmap_ClientOnly.php
@@ -15,10 +15,24 @@
* @ingroup Media
*/
class BitmapHandler_ClientOnly extends BitmapHandler {
+
+ /**
+ * @param $image File
+ * @param $params
+ * @return bool
+ */
function normaliseParams( $image, &$params ) {
return ImageHandler::normaliseParams( $image, $params );
}
+ /**
+ * @param $image File
+ * @param $dstPath
+ * @param $dstUrl
+ * @param $params
+ * @param int $flags
+ * @return ThumbnailImage|TransformParameterError
+ */
function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
if ( !$this->normaliseParams( $image, $params ) ) {
return new TransformParameterError( $params );
diff --git a/includes/media/DjVu.php b/includes/media/DjVu.php
index cc3f1db5..2833f683 100644
--- a/includes/media/DjVu.php
+++ b/includes/media/DjVu.php
@@ -5,13 +5,17 @@
* @file
* @ingroup Media
*/
-
+
/**
* Handler for DjVu images
*
* @ingroup Media
*/
class DjVuHandler extends ImageHandler {
+
+ /**
+ * @return bool
+ */
function isEnabled() {
global $wgDjvuRenderer, $wgDjvuDump, $wgDjvuToXML;
if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
@@ -22,9 +26,25 @@ class DjVuHandler extends ImageHandler {
}
}
- function mustRender( $file ) { return true; }
- function isMultiPage( $file ) { return true; }
+ /**
+ * @param $file
+ * @return bool
+ */
+ function mustRender( $file ) {
+ return true;
+ }
+
+ /**
+ * @param $file
+ * @return bool
+ */
+ function isMultiPage( $file ) {
+ return true;
+ }
+ /**
+ * @return array
+ */
function getParamMap() {
return array(
'img_width' => 'width',
@@ -32,6 +52,11 @@ class DjVuHandler extends ImageHandler {
);
}
+ /**
+ * @param $name
+ * @param $value
+ * @return bool
+ */
function validateParam( $name, $value ) {
if ( in_array( $name, array( 'width', 'height', 'page' ) ) ) {
if ( $value <= 0 ) {
@@ -44,6 +69,10 @@ class DjVuHandler extends ImageHandler {
}
}
+ /**
+ * @param $params
+ * @return bool|string
+ */
function makeParamString( $params ) {
$page = isset( $params['page'] ) ? $params['page'] : 1;
if ( !isset( $params['width'] ) ) {
@@ -52,6 +81,10 @@ class DjVuHandler extends ImageHandler {
return "page{$page}-{$params['width']}px";
}
+ /**
+ * @param $str
+ * @return array|bool
+ */
function parseParamString( $str ) {
$m = false;
if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
@@ -61,6 +94,10 @@ class DjVuHandler extends ImageHandler {
}
}
+ /**
+ * @param $params
+ * @return array
+ */
function getScriptParams( $params ) {
return array(
'width' => $params['width'],
@@ -68,6 +105,14 @@ class DjVuHandler extends ImageHandler {
);
}
+ /**
+ * @param $image File
+ * @param $dstPath
+ * @param $dstUrl
+ * @param $params
+ * @param int $flags
+ * @return MediaTransformError|ThumbnailImage|TransformParameterError
+ */
function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
global $wgDjvuRenderer, $wgDjvuPostProcessor;
@@ -75,7 +120,9 @@ class DjVuHandler extends ImageHandler {
// normaliseParams will inevitably give.
$xml = $image->getMetadata();
if ( !$xml ) {
- return new MediaTransformError( 'thumbnail_error', @$params['width'], @$params['height'],
+ $width = isset( $params['width'] ) ? $params['width'] : 0;
+ $height = isset( $params['height'] ) ? $params['height'] : 0;
+ return new MediaTransformError( 'thumbnail_error', $width, $height,
wfMsg( 'djvu_no_xml' ) );
}
@@ -100,7 +147,8 @@ class DjVuHandler extends ImageHandler {
# Use a subshell (brackets) to aggregate stderr from both pipeline commands
# before redirecting it to the overall stdout. This works in both Linux and Windows XP.
- $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " .
+ $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page}" .
+ " -size={$params['physicalWidth']}x{$params['physicalHeight']} " .
wfEscapeShellArg( $srcPath );
if ( $wgDjvuPostProcessor ) {
$cmd .= " | {$wgDjvuPostProcessor}";
@@ -125,6 +173,8 @@ class DjVuHandler extends ImageHandler {
/**
* Cache an instance of DjVuImage in an Image object, return that instance
+ *
+ * @return DjVuImage
*/
function getDjVuImage( $image, $path ) {
if ( !$image ) {
@@ -139,6 +189,7 @@ class DjVuHandler extends ImageHandler {
/**
* Cache a document tree for the DjVu XML metadata
+ * @param $image File
*/
function getMetaTree( $image , $gettext = false ) {
if ( isset( $image->dejaMetaTree ) ) {
@@ -159,11 +210,11 @@ class DjVuHandler extends ImageHandler {
$image->djvuTextTree = false;
$tree = new SimpleXMLElement( $metadata );
if( $tree->getName() == 'mw-djvu' ) {
- foreach($tree->children() as $b){
+ foreach($tree->children() as $b){
if( $b->getName() == 'DjVuTxt' ) {
$image->djvuTextTree = $b;
}
- else if ( $b->getName() == 'DjVuXML' ) {
+ elseif ( $b->getName() == 'DjVuXML' ) {
$image->dejaMetaTree = $b;
}
}
diff --git a/includes/media/Exif.php b/includes/media/Exif.php
new file mode 100644
index 00000000..345a6f19
--- /dev/null
+++ b/includes/media/Exif.php
@@ -0,0 +1,836 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Media
+ * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
+ * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
+ * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
+ * @file
+ */
+
+/**
+ * Class to extract and validate Exif data from jpeg (and possibly tiff) files.
+ * @ingroup Media
+ */
+class Exif {
+
+ const BYTE = 1; //!< An 8-bit (1-byte) unsigned integer.
+ const ASCII = 2; //!< An 8-bit byte containing one 7-bit ASCII code. The final byte is terminated with NULL.
+ const SHORT = 3; //!< A 16-bit (2-byte) unsigned integer.
+ const LONG = 4; //!< A 32-bit (4-byte) unsigned integer.
+ const RATIONAL = 5; //!< Two LONGs. The first LONG is the numerator and the second LONG expresses the denominator
+ const UNDEFINED = 7; //!< An 8-bit byte that can take any value depending on the field definition
+ const SLONG = 9; //!< A 32-bit (4-byte) signed integer (2's complement notation),
+ const SRATIONAL = 10; //!< Two SLONGs. The first SLONG is the numerator and the second SLONG is the denominator.
+ const IGNORE = -1; // A fake value for things we don't want or don't support.
+
+ //@{
+ /* @var array
+ * @private
+ */
+
+ /**
+ * Exif tags grouped by category, the tagname itself is the key and the type
+ * is the value, in the case of more than one possible value type they are
+ * separated by commas.
+ */
+ var $mExifTags;
+
+ /**
+ * The raw Exif data returned by exif_read_data()
+ */
+ var $mRawExifData;
+
+ /**
+ * A Filtered version of $mRawExifData that has been pruned of invalid
+ * tags and tags that contain content they shouldn't contain according
+ * to the Exif specification
+ */
+ var $mFilteredExifData;
+
+ /**
+ * Filtered and formatted Exif data, see FormatMetadata::getFormattedData()
+ */
+ var $mFormattedExifData;
+
+ //@}
+
+ //@{
+ /* @var string
+ * @private
+ */
+
+ /**
+ * The file being processed
+ */
+ var $file;
+
+ /**
+ * The basename of the file being processed
+ */
+ var $basename;
+
+ /**
+ * The private log to log to, e.g. 'exif'
+ */
+ var $log = false;
+
+ /**
+ * The byte order of the file. Needed because php's
+ * extension doesn't fully process some obscure props.
+ */
+ private $byteOrder;
+ //@}
+
+ /**
+ * Constructor
+ *
+ * @param $file String: filename.
+ * @todo FIXME: The following are broke:
+ * SubjectArea. Need to test the more obscure tags.
+ *
+ * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid.
+ * possibly should treat 0/0 = 0. need to read exif spec on that.
+ */
+ function __construct( $file, $byteOrder = '' ) {
+ /**
+ * Page numbers here refer to pages in the EXIF 2.2 standard
+ *
+ * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes
+ * so don't put a count parameter for any UNDEFINED values.
+ *
+ * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
+ */
+ $this->mExifTags = array(
+ # TIFF Rev. 6.0 Attribute Information (p22)
+ 'IFD0' => array(
+ # Tags relating to image structure
+ 'ImageWidth' => Exif::SHORT.','.Exif::LONG, # Image width
+ 'ImageLength' => Exif::SHORT.','.Exif::LONG, # Image height
+ 'BitsPerSample' => array( Exif::SHORT, 3 ), # Number of bits per component
+ # "When a primary image is JPEG compressed, this designation is not"
+ # "necessary and is omitted." (p23)
+ 'Compression' => Exif::SHORT, # Compression scheme #p23
+ 'PhotometricInterpretation' => Exif::SHORT, # Pixel composition #p23
+ 'Orientation' => Exif::SHORT, # Orientation of image #p24
+ 'SamplesPerPixel' => Exif::SHORT, # Number of components
+ 'PlanarConfiguration' => Exif::SHORT, # Image data arrangement #p24
+ 'YCbCrSubSampling' => array( Exif::SHORT, 2), # Subsampling ratio of Y to C #p24
+ 'YCbCrPositioning' => Exif::SHORT, # Y and C positioning #p24-25
+ 'XResolution' => Exif::RATIONAL, # Image resolution in width direction
+ 'YResolution' => Exif::RATIONAL, # Image resolution in height direction
+ 'ResolutionUnit' => Exif::SHORT, # Unit of X and Y resolution #(p26)
+
+ # Tags relating to recording offset
+ 'StripOffsets' => Exif::SHORT.','.Exif::LONG, # Image data location
+ 'RowsPerStrip' => Exif::SHORT.','.Exif::LONG, # Number of rows per strip
+ 'StripByteCounts' => Exif::SHORT.','.Exif::LONG, # Bytes per compressed strip
+ 'JPEGInterchangeFormat' => Exif::SHORT.','.Exif::LONG, # Offset to JPEG SOI
+ 'JPEGInterchangeFormatLength' => Exif::SHORT.','.Exif::LONG, # Bytes of JPEG data
+
+ # Tags relating to image data characteristics
+ 'TransferFunction' => Exif::IGNORE, # Transfer function
+ 'WhitePoint' => array( Exif::RATIONAL, 2), # White point chromaticity
+ 'PrimaryChromaticities' => array( Exif::RATIONAL, 6), # Chromaticities of primarities
+ 'YCbCrCoefficients' => array( Exif::RATIONAL, 3), # Color space transformation matrix coefficients #p27
+ 'ReferenceBlackWhite' => array( Exif::RATIONAL, 6), # Pair of black and white reference values
+
+ # Other tags
+ 'DateTime' => Exif::ASCII, # File change date and time
+ 'ImageDescription' => Exif::ASCII, # Image title
+ 'Make' => Exif::ASCII, # Image input equipment manufacturer
+ 'Model' => Exif::ASCII, # Image input equipment model
+ 'Software' => Exif::ASCII, # Software used
+ 'Artist' => Exif::ASCII, # Person who created the image
+ 'Copyright' => Exif::ASCII, # Copyright holder
+ ),
+
+ # Exif IFD Attribute Information (p30-31)
+ 'EXIF' => array(
+ # TODO: NOTE: Nonexistence of this field is taken to mean nonconformance
+ # to the EXIF 2.1 AND 2.2 standards
+ 'ExifVersion' => Exif::UNDEFINED, # Exif version
+ 'FlashPixVersion' => Exif::UNDEFINED, # Supported Flashpix version #p32
+
+ # Tags relating to Image Data Characteristics
+ 'ColorSpace' => Exif::SHORT, # Color space information #p32
+
+ # Tags relating to image configuration
+ 'ComponentsConfiguration' => Exif::UNDEFINED, # Meaning of each component #p33
+ 'CompressedBitsPerPixel' => Exif::RATIONAL, # Image compression mode
+ 'PixelYDimension' => Exif::SHORT.','.Exif::LONG, # Valid image width
+ 'PixelXDimension' => Exif::SHORT.','.Exif::LONG, # Valid image height
+
+ # Tags relating to related user information
+ 'MakerNote' => Exif::IGNORE, # Manufacturer notes
+ 'UserComment' => Exif::UNDEFINED, # User comments #p34
+
+ # Tags relating to related file information
+ 'RelatedSoundFile' => Exif::ASCII, # Related audio file
+
+ # Tags relating to date and time
+ 'DateTimeOriginal' => Exif::ASCII, # Date and time of original data generation #p36
+ 'DateTimeDigitized' => Exif::ASCII, # Date and time of original data generation
+ 'SubSecTime' => Exif::ASCII, # DateTime subseconds
+ 'SubSecTimeOriginal' => Exif::ASCII, # DateTimeOriginal subseconds
+ 'SubSecTimeDigitized' => Exif::ASCII, # DateTimeDigitized subseconds
+
+ # Tags relating to picture-taking conditions (p31)
+ 'ExposureTime' => Exif::RATIONAL, # Exposure time
+ 'FNumber' => Exif::RATIONAL, # F Number
+ 'ExposureProgram' => Exif::SHORT, # Exposure Program #p38
+ 'SpectralSensitivity' => Exif::ASCII, # Spectral sensitivity
+ 'ISOSpeedRatings' => Exif::SHORT, # ISO speed rating
+ 'OECF' => Exif::IGNORE,
+ # Optoelectronic conversion factor. Note: We don't have support for this atm.
+ 'ShutterSpeedValue' => Exif::SRATIONAL, # Shutter speed
+ 'ApertureValue' => Exif::RATIONAL, # Aperture
+ 'BrightnessValue' => Exif::SRATIONAL, # Brightness
+ 'ExposureBiasValue' => Exif::SRATIONAL, # Exposure bias
+ 'MaxApertureValue' => Exif::RATIONAL, # Maximum land aperture
+ 'SubjectDistance' => Exif::RATIONAL, # Subject distance
+ 'MeteringMode' => Exif::SHORT, # Metering mode #p40
+ 'LightSource' => Exif::SHORT, # Light source #p40-41
+ 'Flash' => Exif::SHORT, # Flash #p41-42
+ 'FocalLength' => Exif::RATIONAL, # Lens focal length
+ 'SubjectArea' => array( Exif::SHORT, 4 ), # Subject area
+ 'FlashEnergy' => Exif::RATIONAL, # Flash energy
+ 'SpatialFrequencyResponse' => Exif::IGNORE, # Spatial frequency response. Not supported atm.
+ 'FocalPlaneXResolution' => Exif::RATIONAL, # Focal plane X resolution
+ 'FocalPlaneYResolution' => Exif::RATIONAL, # Focal plane Y resolution
+ 'FocalPlaneResolutionUnit' => Exif::SHORT, # Focal plane resolution unit #p46
+ 'SubjectLocation' => array( Exif::SHORT, 2), # Subject location
+ 'ExposureIndex' => Exif::RATIONAL, # Exposure index
+ 'SensingMethod' => Exif::SHORT, # Sensing method #p46
+ 'FileSource' => Exif::UNDEFINED, # File source #p47
+ 'SceneType' => Exif::UNDEFINED, # Scene type #p47
+ 'CFAPattern' => Exif::IGNORE, # CFA pattern. not supported atm.
+ 'CustomRendered' => Exif::SHORT, # Custom image processing #p48
+ 'ExposureMode' => Exif::SHORT, # Exposure mode #p48
+ 'WhiteBalance' => Exif::SHORT, # White Balance #p49
+ 'DigitalZoomRatio' => Exif::RATIONAL, # Digital zoom ration
+ 'FocalLengthIn35mmFilm' => Exif::SHORT, # Focal length in 35 mm film
+ 'SceneCaptureType' => Exif::SHORT, # Scene capture type #p49
+ 'GainControl' => Exif::SHORT, # Scene control #p49-50
+ 'Contrast' => Exif::SHORT, # Contrast #p50
+ 'Saturation' => Exif::SHORT, # Saturation #p50
+ 'Sharpness' => Exif::SHORT, # Sharpness #p50
+ 'DeviceSettingDescription' => Exif::IGNORE,
+ # Device settings description. This could maybe be supported. Need to find an
+ # example file that uses this to see if it has stuff of interest in it.
+ 'SubjectDistanceRange' => Exif::SHORT, # Subject distance range #p51
+
+ 'ImageUniqueID' => Exif::ASCII, # Unique image ID
+ ),
+
+ # GPS Attribute Information (p52)
+ 'GPS' => array(
+ 'GPSVersion' => Exif::UNDEFINED,
+ # Should be an array of 4 Exif::BYTE's. However php treats it as an undefined
+ # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix
+ 'GPSLatitudeRef' => Exif::ASCII, # North or South Latitude #p52-53
+ 'GPSLatitude' => array( Exif::RATIONAL, 3 ), # Latitude
+ 'GPSLongitudeRef' => Exif::ASCII, # East or West Longitude #p53
+ 'GPSLongitude' => array( Exif::RATIONAL, 3), # Longitude
+ 'GPSAltitudeRef' => Exif::UNDEFINED,
+ # Altitude reference. Note, the exif standard says this should be an EXIF::Byte,
+ # but php seems to disagree.
+ 'GPSAltitude' => Exif::RATIONAL, # Altitude
+ 'GPSTimeStamp' => array( Exif::RATIONAL, 3), # GPS time (atomic clock)
+ 'GPSSatellites' => Exif::ASCII, # Satellites used for measurement
+ 'GPSStatus' => Exif::ASCII, # Receiver status #p54
+ 'GPSMeasureMode' => Exif::ASCII, # Measurement mode #p54-55
+ 'GPSDOP' => Exif::RATIONAL, # Measurement precision
+ 'GPSSpeedRef' => Exif::ASCII, # Speed unit #p55
+ 'GPSSpeed' => Exif::RATIONAL, # Speed of GPS receiver
+ 'GPSTrackRef' => Exif::ASCII, # Reference for direction of movement #p55
+ 'GPSTrack' => Exif::RATIONAL, # Direction of movement
+ 'GPSImgDirectionRef' => Exif::ASCII, # Reference for direction of image #p56
+ 'GPSImgDirection' => Exif::RATIONAL, # Direction of image
+ 'GPSMapDatum' => Exif::ASCII, # Geodetic survey data used
+ 'GPSDestLatitudeRef' => Exif::ASCII, # Reference for latitude of destination #p56
+ 'GPSDestLatitude' => array( Exif::RATIONAL, 3 ), # Latitude destination
+ 'GPSDestLongitudeRef' => Exif::ASCII, # Reference for longitude of destination #p57
+ 'GPSDestLongitude' => array( Exif::RATIONAL, 3 ), # Longitude of destination
+ 'GPSDestBearingRef' => Exif::ASCII, # Reference for bearing of destination #p57
+ 'GPSDestBearing' => Exif::RATIONAL, # Bearing of destination
+ 'GPSDestDistanceRef' => Exif::ASCII, # Reference for distance to destination #p57-58
+ 'GPSDestDistance' => Exif::RATIONAL, # Distance to destination
+ 'GPSProcessingMethod' => Exif::UNDEFINED, # Name of GPS processing method
+ 'GPSAreaInformation' => Exif::UNDEFINED, # Name of GPS area
+ 'GPSDateStamp' => Exif::ASCII, # GPS date
+ 'GPSDifferential' => Exif::SHORT, # GPS differential correction
+ ),
+ );
+
+ $this->file = $file;
+ $this->basename = wfBaseName( $this->file );
+ if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) {
+ $this->byteOrder = $byteOrder;
+ } else {
+ // Only give a warning for b/c, since originally we didn't
+ // require this. The number of things affected by this is
+ // rather small.
+ wfWarn( 'Exif class did not have byte order specified. '
+ . 'Some properties may be decoded incorrectly.' );
+ $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's.
+ }
+
+ $this->debugFile( $this->basename, __FUNCTION__, true );
+ if( function_exists( 'exif_read_data' ) ) {
+ wfSuppressWarnings();
+ $data = exif_read_data( $this->file, 0, true );
+ wfRestoreWarnings();
+ } else {
+ throw new MWException( "Internal error: exif_read_data not present. \$wgShowEXIF may be incorrectly set or not checked by an extension." );
+ }
+ /**
+ * exif_read_data() will return false on invalid input, such as
+ * when somebody uploads a file called something.jpeg
+ * containing random gibberish.
+ */
+ $this->mRawExifData = $data ? $data : array();
+ $this->makeFilteredData();
+ $this->collapseData();
+ $this->debugFile( __FUNCTION__, false );
+ }
+
+ /**
+ * Make $this->mFilteredExifData
+ */
+ function makeFilteredData() {
+ $this->mFilteredExifData = Array();
+
+ foreach ( array_keys( $this->mRawExifData ) as $section ) {
+ if ( !in_array( $section, array_keys( $this->mExifTags ) ) ) {
+ $this->debug( $section , __FUNCTION__, "'$section' is not a valid Exif section" );
+ continue;
+ }
+
+ foreach ( array_keys( $this->mRawExifData[$section] ) as $tag ) {
+ if ( !in_array( $tag, array_keys( $this->mExifTags[$section] ) ) ) {
+ $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" );
+ continue;
+ }
+
+ $this->mFilteredExifData[$tag] = $this->mRawExifData[$section][$tag];
+ // This is ok, as the tags in the different sections do not conflict.
+ // except in computed and thumbnail section, which we don't use.
+
+ $value = $this->mRawExifData[$section][$tag];
+ if ( !$this->validate( $section, $tag, $value ) ) {
+ $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" );
+ unset( $this->mFilteredExifData[$tag] );
+ }
+ }
+ }
+ }
+
+ /**
+ * Collapse some fields together.
+ * This converts some fields from exif form, to a more friendly form.
+ * For example GPS latitude to a single number.
+ *
+ * The rationale behind this is that we're storing data, not presenting to the user
+ * For example a longitude is a single number describing how far away you are from
+ * the prime meridian. Well it might be nice to split it up into minutes and seconds
+ * for the user, it doesn't really make sense to split a single number into 4 parts
+ * for storage. (degrees, minutes, second, direction vs single floating point number).
+ *
+ * Other things this might do (not really sure if they make sense or not):
+ * Dates -> mediawiki date format.
+ * convert values that can be in different units to be in one standardized unit.
+ *
+ * As an alternative approach, some of this could be done in the validate phase
+ * if we make up our own types like Exif::DATE.
+ */
+ function collapseData( ) {
+
+ $this->exifGPStoNumber( 'GPSLatitude' );
+ $this->exifGPStoNumber( 'GPSDestLatitude' );
+ $this->exifGPStoNumber( 'GPSLongitude' );
+ $this->exifGPStoNumber( 'GPSDestLongitude' );
+
+ if ( isset( $this->mFilteredExifData['GPSAltitude'] ) && isset( $this->mFilteredExifData['GPSAltitudeRef'] ) ) {
+ if ( $this->mFilteredExifData['GPSAltitudeRef'] === "\1" ) {
+ $this->mFilteredExifData['GPSAltitude'] *= - 1;
+ }
+ unset( $this->mFilteredExifData['GPSAltitudeRef'] );
+ }
+
+ $this->exifPropToOrd( 'FileSource' );
+ $this->exifPropToOrd( 'SceneType' );
+
+ $this->charCodeString( 'UserComment' );
+ $this->charCodeString( 'GPSProcessingMethod');
+ $this->charCodeString( 'GPSAreaInformation' );
+
+ //ComponentsConfiguration should really be an array instead of a string...
+ //This turns a string of binary numbers into an array of numbers.
+
+ if ( isset ( $this->mFilteredExifData['ComponentsConfiguration'] ) ) {
+ $val = $this->mFilteredExifData['ComponentsConfiguration'];
+ $ccVals = array();
+ for ($i = 0; $i < strlen($val); $i++) {
+ $ccVals[$i] = ord( substr($val, $i, 1) );
+ }
+ $ccVals['_type'] = 'ol'; //this is for formatting later.
+ $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals;
+ }
+
+ //GPSVersion(ID) is treated as the wrong type by php exif support.
+ //Go through each byte turning it into a version string.
+ //For example: "\x02\x02\x00\x00" -> "2.2.0.0"
+
+ //Also change exif tag name from GPSVersion (what php exif thinks it is)
+ //to GPSVersionID (what the exif standard thinks it is).
+
+ if ( isset ( $this->mFilteredExifData['GPSVersion'] ) ) {
+ $val = $this->mFilteredExifData['GPSVersion'];
+ $newVal = '';
+ for ($i = 0; $i < strlen($val); $i++) {
+ if ( $i !== 0 ) {
+ $newVal .= '.';
+ }
+ $newVal .= ord( substr($val, $i, 1) );
+ }
+ if ( $this->byteOrder === 'LE' ) {
+ // Need to reverse the string
+ $newVal2 = '';
+ for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) {
+ $newVal2 .= substr( $newVal, $i, 1 );
+ }
+ $this->mFilteredExifData['GPSVersionID'] = $newVal2;
+ } else {
+ $this->mFilteredExifData['GPSVersionID'] = $newVal;
+ }
+ unset( $this->mFilteredExifData['GPSVersion'] );
+ }
+
+ }
+ /**
+ * Do userComment tags and similar. See pg. 34 of exif standard.
+ * basically first 8 bytes is charset, rest is value.
+ * This has not been tested on any shift-JIS strings.
+ * @param $prop String prop name.
+ */
+ private function charCodeString ( $prop ) {
+ if ( isset( $this->mFilteredExifData[$prop] ) ) {
+
+ if ( strlen($this->mFilteredExifData[$prop]) <= 8 ) {
+ //invalid. Must be at least 9 bytes long.
+
+ $this->debug( $this->mFilteredExifData[$prop] , __FUNCTION__, false );
+ unset($this->mFilteredExifData[$prop]);
+ return;
+ }
+ $charCode = substr( $this->mFilteredExifData[$prop], 0, 8);
+ $val = substr( $this->mFilteredExifData[$prop], 8);
+
+
+ switch ($charCode) {
+ case "\x4A\x49\x53\x00\x00\x00\x00\x00":
+ //JIS
+ $charset = "Shift-JIS";
+ break;
+ case "UNICODE\x00":
+ $charset = "UTF-16" . $this->byteOrder;
+ break;
+ default: //ascii or undefined.
+ $charset = "";
+ break;
+ }
+ // This could possibly check to see if iconv is really installed
+ // or if we're using the compatibility wrapper in globalFunctions.php
+ if ($charset) {
+ wfSuppressWarnings();
+ $val = iconv($charset, 'UTF-8//IGNORE', $val);
+ wfRestoreWarnings();
+ } else {
+ // if valid utf-8, assume that, otherwise assume windows-1252
+ $valCopy = $val;
+ UtfNormal::quickIsNFCVerify( $valCopy ); //validates $valCopy.
+ if ( $valCopy !== $val ) {
+ wfSuppressWarnings();
+ $val = iconv('Windows-1252', 'UTF-8//IGNORE', $val);
+ wfRestoreWarnings();
+ }
+ }
+
+ //trim and check to make sure not only whitespace.
+ $val = trim($val);
+ if ( strlen( $val ) === 0 ) {
+ //only whitespace.
+ $this->debug( $this->mFilteredExifData[$prop] , __FUNCTION__, "$prop: Is only whitespace" );
+ unset($this->mFilteredExifData[$prop]);
+ return;
+ }
+
+ //all's good.
+ $this->mFilteredExifData[$prop] = $val;
+ }
+ }
+ /**
+ * Convert an Exif::UNDEFINED from a raw binary string
+ * to its value. This is sometimes needed depending on
+ * the type of UNDEFINED field
+ * @param $prop String name of property
+ */
+ private function exifPropToOrd ( $prop ) {
+ if ( isset( $this->mFilteredExifData[$prop] ) ) {
+ $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] );
+ }
+ }
+ /**
+ * Convert gps in exif form to a single floating point number
+ * for example 10 degress 20`40`` S -> -10.34444
+ * @param String $prop a gps coordinate exif tag name (like GPSLongitude)
+ */
+ private function exifGPStoNumber ( $prop ) {
+ $loc =& $this->mFilteredExifData[$prop];
+ $dir =& $this->mFilteredExifData[$prop . 'Ref'];
+ $res = false;
+
+ if ( isset( $loc ) && isset( $dir ) && ( $dir === 'N' || $dir === 'S' || $dir === 'E' || $dir === 'W' ) ) {
+ list( $num, $denom ) = explode( '/', $loc[0] );
+ $res = $num / $denom;
+ list( $num, $denom ) = explode( '/', $loc[1] );
+ $res += ( $num / $denom ) * ( 1 / 60 );
+ list( $num, $denom ) = explode( '/', $loc[2] );
+ $res += ( $num / $denom ) * ( 1 / 3600 );
+
+ if ( $dir === 'S' || $dir === 'W' ) {
+ $res *= - 1; // make negative
+ }
+ }
+
+ // update the exif records.
+
+ if ( $res !== false ) { // using !== as $res could potentially be 0
+ $this->mFilteredExifData[$prop] = $res;
+ unset( $this->mFilteredExifData[$prop . 'Ref'] );
+ } else { // if invalid
+ unset( $this->mFilteredExifData[$prop] );
+ unset( $this->mFilteredExifData[$prop . 'Ref'] );
+ }
+ }
+
+ /**
+ * Use FormatMetadata to create formatted values for display to user
+ * (is this ever used?)
+ *
+ * @deprecated since 1.18
+ */
+ function makeFormattedData( ) {
+ wfDeprecated( __METHOD__ );
+ $this->mFormattedExifData = FormatMetadata::getFormattedData(
+ $this->mFilteredExifData );
+ }
+ /**#@-*/
+
+ /**#@+
+ * @return array
+ */
+ /**
+ * Get $this->mRawExifData
+ */
+ function getData() {
+ return $this->mRawExifData;
+ }
+
+ /**
+ * Get $this->mFilteredExifData
+ */
+ function getFilteredData() {
+ return $this->mFilteredExifData;
+ }
+
+ /**
+ * Get $this->mFormattedExifData
+ *
+ * This returns the data for display to user.
+ * Its unclear if this is ever used.
+ *
+ * @deprecated since 1.18
+ */
+ function getFormattedData() {
+ wfDeprecated( __METHOD__ );
+ if (!$this->mFormattedExifData) {
+ $this->makeFormattedData();
+ }
+ return $this->mFormattedExifData;
+ }
+ /**#@-*/
+
+ /**
+ * The version of the output format
+ *
+ * Before the actual metadata information is saved in the database we
+ * strip some of it since we don't want to save things like thumbnails
+ * which usually accompany Exif data. This value gets saved in the
+ * database along with the actual Exif data, and if the version in the
+ * database doesn't equal the value returned by this function the Exif
+ * data is regenerated.
+ *
+ * @return int
+ */
+ public static function version() {
+ return 2; // We don't need no bloddy constants!
+ }
+
+ /**#@+
+ * Validates if a tag value is of the type it should be according to the Exif spec
+ *
+ * @private
+ *
+ * @param $in Mixed: the input value to check
+ * @return bool
+ */
+ private function isByte( $in ) {
+ if ( !is_array( $in ) && sprintf('%d', $in) == $in && $in >= 0 && $in <= 255 ) {
+ $this->debug( $in, __FUNCTION__, true );
+ return true;
+ } else {
+ $this->debug( $in, __FUNCTION__, false );
+ return false;
+ }
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isASCII( $in ) {
+ if ( is_array( $in ) ) {
+ return false;
+ }
+
+ if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) {
+ $this->debug( $in, __FUNCTION__, 'found a character not in our whitelist' );
+ return false;
+ }
+
+ if ( preg_match( '/^\s*$/', $in ) ) {
+ $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' );
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isShort( $in ) {
+ if ( !is_array( $in ) && sprintf('%d', $in) == $in && $in >= 0 && $in <= 65536 ) {
+ $this->debug( $in, __FUNCTION__, true );
+ return true;
+ } else {
+ $this->debug( $in, __FUNCTION__, false );
+ return false;
+ }
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isLong( $in ) {
+ if ( !is_array( $in ) && sprintf('%d', $in) == $in && $in >= 0 && $in <= 4294967296 ) {
+ $this->debug( $in, __FUNCTION__, true );
+ return true;
+ } else {
+ $this->debug( $in, __FUNCTION__, false );
+ return false;
+ }
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isRational( $in ) {
+ $m = array();
+ if ( !is_array( $in ) && @preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) ) { # Avoid division by zero
+ return $this->isLong( $m[1] ) && $this->isLong( $m[2] );
+ } else {
+ $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
+ return false;
+ }
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isUndefined( $in ) {
+ $this->debug( $in, __FUNCTION__, true );
+ return true;
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isSlong( $in ) {
+ if ( $this->isLong( abs( $in ) ) ) {
+ $this->debug( $in, __FUNCTION__, true );
+ return true;
+ } else {
+ $this->debug( $in, __FUNCTION__, false );
+ return false;
+ }
+ }
+
+ /**
+ * @param $in
+ * @return bool
+ */
+ private function isSrational( $in ) {
+ $m = array();
+ if ( !is_array( $in ) && preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) ) { # Avoid division by zero
+ return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] );
+ } else {
+ $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
+ return false;
+ }
+ }
+ /**#@-*/
+
+ /**
+ * Validates if a tag has a legal value according to the Exif spec
+ *
+ * @private
+ * @param $section String: section where tag is located.
+ * @param $tag String: the tag to check.
+ * @param $val Mixed: the value of the tag.
+ * @param $recursive Boolean: true if called recursively for array types.
+ * @return bool
+ */
+ private function validate( $section, $tag, $val, $recursive = false ) {
+ $debug = "tag is '$tag'";
+ $etype = $this->mExifTags[$section][$tag];
+ $ecount = 1;
+ if( is_array( $etype ) ) {
+ list( $etype, $ecount ) = $etype;
+ if ( $recursive )
+ $ecount = 1; // checking individual elements
+ }
+ $count = count( $val );
+ if( $ecount != $count ) {
+ $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" );
+ return false;
+ }
+ if( $count > 1 ) {
+ foreach( $val as $v ) {
+ if( !$this->validate( $section, $tag, $v, true ) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+ // Does not work if not typecast
+ switch( (string)$etype ) {
+ case (string)Exif::BYTE:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isByte( $val );
+ case (string)Exif::ASCII:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isASCII( $val );
+ case (string)Exif::SHORT:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isShort( $val );
+ case (string)Exif::LONG:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isLong( $val );
+ case (string)Exif::RATIONAL:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isRational( $val );
+ case (string)Exif::UNDEFINED:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isUndefined( $val );
+ case (string)Exif::SLONG:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isSlong( $val );
+ case (string)Exif::SRATIONAL:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isSrational( $val );
+ case (string)Exif::SHORT.','.Exif::LONG:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return $this->isShort( $val ) || $this->isLong( $val );
+ case (string)Exif::IGNORE:
+ $this->debug( $val, __FUNCTION__, $debug );
+ return false;
+ default:
+ $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" );
+ return false;
+ }
+ }
+
+ /**
+ * Convenience function for debugging output
+ *
+ * @private
+ *
+ * @param $in Mixed:
+ * @param $fname String:
+ * @param $action Mixed: , default NULL.
+ */
+ private function debug( $in, $fname, $action = null ) {
+ if ( !$this->log ) {
+ return;
+ }
+ $type = gettype( $in );
+ $class = ucfirst( __CLASS__ );
+ if ( $type === 'array' ) {
+ $in = print_r( $in, true );
+ }
+
+ if ( $action === true ) {
+ wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)\n");
+ } elseif ( $action === false ) {
+ wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)\n");
+ } elseif ( $action === null ) {
+ wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)\n");
+ } else {
+ wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')\n");
+ }
+ }
+
+ /**
+ * Convenience function for debugging output
+ *
+ * @private
+ *
+ * @param $fname String: the name of the function calling this function
+ * @param $io Boolean: Specify whether we're beginning or ending
+ */
+ private function debugFile( $fname, $io ) {
+ if ( !$this->log ) {
+ return;
+ }
+ $class = ucfirst( __CLASS__ );
+ if ( $io ) {
+ wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'\n" );
+ } else {
+ wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'\n" );
+ }
+ }
+}
+
diff --git a/includes/media/ExifBitmap.php b/includes/media/ExifBitmap.php
new file mode 100644
index 00000000..05ce161b
--- /dev/null
+++ b/includes/media/ExifBitmap.php
@@ -0,0 +1,210 @@
+<?php
+/**
+ * @file
+ * @ingroup Media
+ */
+
+/**
+ * Stuff specific to JPEG and (built-in) TIFF handler.
+ * All metadata related, since both JPEG and TIFF support Exif.
+ *
+ * @ingroup Media
+ */
+class ExifBitmapHandler extends BitmapHandler {
+
+ const BROKEN_FILE = '-1'; // error extracting metadata
+ const OLD_BROKEN_FILE = '0'; // outdated error extracting metadata.
+
+ function convertMetadataVersion( $metadata, $version = 1 ) {
+ // basically flattens arrays.
+ $version = explode(';', $version, 2);
+ $version = intval($version[0]);
+ if ( $version < 1 || $version >= 2 ) {
+ return $metadata;
+ }
+
+ $avoidHtml = true;
+
+ if ( !is_array( $metadata ) ) {
+ $metadata = unserialize( $metadata );
+ }
+ if ( !isset( $metadata['MEDIAWIKI_EXIF_VERSION'] ) || $metadata['MEDIAWIKI_EXIF_VERSION'] != 2 ) {
+ return $metadata;
+ }
+
+ // Treat Software as a special case because in can contain
+ // an array of (SoftwareName, Version).
+ if (isset( $metadata['Software'] )
+ && is_array( $metadata['Software'] )
+ && is_array( $metadata['Software'][0])
+ && isset( $metadata['Software'][0][0] )
+ && isset( $metadata['Software'][0][1])
+ ) {
+ $metadata['Software'] = $metadata['Software'][0][0] . ' (Version '
+ . $metadata['Software'][0][1] . ')';
+ }
+
+ // ContactInfo also has to be dealt with specially
+ if ( isset( $metadata['Contact'] ) ) {
+ $metadata['Contact'] =
+ FormatMetadata::collapseContactInfo(
+ $metadata['Contact'] );
+ }
+
+ foreach ( $metadata as &$val ) {
+ if ( is_array( $val ) ) {
+ $val = FormatMetadata::flattenArray( $val, 'ul', $avoidHtml );
+ }
+ }
+ $metadata['MEDIAWIKI_EXIF_VERSION'] = 1;
+ return $metadata;
+ }
+
+ function isMetadataValid( $image, $metadata ) {
+ global $wgShowEXIF;
+ if ( !$wgShowEXIF ) {
+ # Metadata disabled and so an empty field is expected
+ return self::METADATA_GOOD;
+ }
+ if ( $metadata === self::OLD_BROKEN_FILE ) {
+ # Old special value indicating that there is no EXIF data in the file.
+ # or that there was an error well extracting the metadata.
+ wfDebug( __METHOD__ . ": back-compat version\n");
+ return self::METADATA_COMPATIBLE;
+ }
+ if ( $metadata === self::BROKEN_FILE ) {
+ return self::METADATA_GOOD;
+ }
+ wfSuppressWarnings();
+ $exif = unserialize( $metadata );
+ wfRestoreWarnings();
+ if ( !isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) ||
+ $exif['MEDIAWIKI_EXIF_VERSION'] != Exif::version() )
+ {
+ if ( isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) &&
+ $exif['MEDIAWIKI_EXIF_VERSION'] == 1 )
+ {
+ //back-compatible but old
+ wfDebug( __METHOD__.": back-compat version\n" );
+ return self::METADATA_COMPATIBLE;
+ }
+ # Wrong (non-compatible) version
+ wfDebug( __METHOD__.": wrong version\n" );
+ return self::METADATA_BAD;
+ }
+ return self::METADATA_GOOD;
+ }
+
+ /**
+ * @param $image File
+ * @return array|bool
+ */
+ function formatMetadata( $image ) {
+ $metadata = $image->getMetadata();
+ if ( $metadata === self::OLD_BROKEN_FILE ||
+ $metadata === self::BROKEN_FILE ||
+ $this->isMetadataValid( $image, $metadata ) === self::METADATA_BAD )
+ {
+ // So we don't try and display metadata from PagedTiffHandler
+ // for example when using InstantCommons.
+ return false;
+ }
+
+ $exif = unserialize( $metadata );
+ if ( !$exif ) {
+ return false;
+ }
+ unset( $exif['MEDIAWIKI_EXIF_VERSION'] );
+ if ( count( $exif ) == 0 ) {
+ return false;
+ }
+ return $this->formatMetadataHelper( $exif );
+ }
+
+ function getMetadataType( $image ) {
+ return 'exif';
+ }
+
+ /**
+ * Wrapper for base classes ImageHandler::getImageSize() that checks for
+ * rotation reported from metadata and swaps the sizes to match.
+ *
+ * @param File $image
+ * @param string $path
+ * @return array
+ */
+ function getImageSize( $image, $path ) {
+ global $wgEnableAutoRotation;
+ $gis = parent::getImageSize( $image, $path );
+
+ // Don't just call $image->getMetadata(); File::getPropsFromPath() calls us with a bogus object.
+ // This may mean we read EXIF data twice on initial upload.
+ if ( $wgEnableAutoRotation ) {
+ $meta = $this->getMetadata( $image, $path );
+ $rotation = $this->getRotationForExif( $meta );
+ } else {
+ $rotation = 0;
+ }
+
+ if ($rotation == 90 || $rotation == 270) {
+ $width = $gis[0];
+ $gis[0] = $gis[1];
+ $gis[1] = $width;
+ }
+ return $gis;
+ }
+
+ /**
+ * On supporting image formats, try to read out the low-level orientation
+ * of the file and return the angle that the file needs to be rotated to
+ * be viewed.
+ *
+ * This information is only useful when manipulating the original file;
+ * the width and height we normally work with is logical, and will match
+ * any produced output views.
+ *
+ * @param $file File
+ * @return int 0, 90, 180 or 270
+ */
+ public function getRotation( $file ) {
+ global $wgEnableAutoRotation;
+ if ( !$wgEnableAutoRotation ) {
+ return 0;
+ }
+
+ $data = $file->getMetadata();
+ return $this->getRotationForExif( $data );
+ }
+
+ /**
+ * Given a chunk of serialized Exif metadata, return the orientation as
+ * degrees of rotation.
+ *
+ * @param string $data
+ * @return int 0, 90, 180 or 270
+ * @fixme orientation can include flipping as well; see if this is an issue!
+ */
+ protected function getRotationForExif( $data ) {
+ if ( !$data ) {
+ return 0;
+ }
+ wfSuppressWarnings();
+ $data = unserialize( $data );
+ wfRestoreWarnings();
+ if ( isset( $data['Orientation'] ) ) {
+ # See http://sylvana.net/jpegcrop/exif_orientation.html
+ switch ( $data['Orientation'] ) {
+ case 8:
+ return 90;
+ case 3:
+ return 180;
+ case 6:
+ return 270;
+ default:
+ return 0;
+ }
+ }
+ return 0;
+ }
+}
+
diff --git a/includes/media/FormatMetadata.php b/includes/media/FormatMetadata.php
new file mode 100644
index 00000000..47fc1adc
--- /dev/null
+++ b/includes/media/FormatMetadata.php
@@ -0,0 +1,1354 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Media
+ * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
+ * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
+ * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
+ * @file
+ */
+
+
+/**
+ * Format Image metadata values into a human readable form.
+ *
+ * Note lots of these messages use the prefix 'exif' even though
+ * they may not be exif properties. For example 'exif-ImageDescription'
+ * can be the Exif ImageDescription, or it could be the iptc-iim caption
+ * property, or it could be the xmp dc:description property. This
+ * is because these messages should be independent of how the data is
+ * stored, sine the user doesn't care if the description is stored in xmp,
+ * exif, etc only that its a description. (Additionally many of these properties
+ * are merged together following the MWG standard, such that for example,
+ * exif properties override XMP properties that mean the same thing if
+ * there is a conflict).
+ *
+ * It should perhaps use a prefix like 'metadata' instead, but there
+ * is already a large number of messages using the 'exif' prefix.
+ *
+ * @ingroup Media
+ */
+class FormatMetadata {
+
+ /**
+ * Numbers given by Exif user agents are often magical, that is they
+ * should be replaced by a detailed explanation depending on their
+ * value which most of the time are plain integers. This function
+ * formats Exif (and other metadata) values into human readable form.
+ *
+ * @param $tags Array: the Exif data to format ( as returned by
+ * Exif::getFilteredData() or BitmapMetadataHandler )
+ * @return array
+ */
+ public static function getFormattedData( $tags ) {
+ global $wgLang;
+
+ $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
+ unset( $tags['ResolutionUnit'] );
+
+ foreach ( $tags as $tag => &$vals ) {
+
+ // This seems ugly to wrap non-array's in an array just to unwrap again,
+ // especially when most of the time it is not an array
+ if ( !is_array( $tags[$tag] ) ) {
+ $vals = Array( $vals );
+ }
+
+ // _type is a special value to say what array type
+ if ( isset( $tags[$tag]['_type'] ) ) {
+ $type = $tags[$tag]['_type'];
+ unset( $vals['_type'] );
+ } else {
+ $type = 'ul'; // default unordered list.
+ }
+
+ //This is done differently as the tag is an array.
+ if ($tag == 'GPSTimeStamp' && count($vals) === 3) {
+ //hour min sec array
+
+ $h = explode('/', $vals[0]);
+ $m = explode('/', $vals[1]);
+ $s = explode('/', $vals[2]);
+
+ // this should already be validated
+ // when loaded from file, but it could
+ // come from a foreign repo, so be
+ // paranoid.
+ if ( !isset($h[1])
+ || !isset($m[1])
+ || !isset($s[1])
+ || $h[1] == 0
+ || $m[1] == 0
+ || $s[1] == 0
+ ) {
+ continue;
+ }
+ $tags[$tag] = intval( $h[0] / $h[1] )
+ . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT )
+ . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
+
+ $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] );
+ // the 1971:01:01 is just a placeholder, and not shown to user.
+ if ( $time && intval( $time ) > 0 ) {
+ $tags[$tag] = $wgLang->time( $time );
+ }
+ continue;
+ }
+
+ // The contact info is a multi-valued field
+ // instead of the other props which are single
+ // valued (mostly) so handle as a special case.
+ if ( $tag === 'Contact' ) {
+ $vals = self::collapseContactInfo( $vals );
+ continue;
+ }
+
+ foreach ( $vals as &$val ) {
+
+ switch( $tag ) {
+ case 'Compression':
+ switch( $val ) {
+ case 1: case 2: case 3: case 4:
+ case 5: case 6: case 7: case 8:
+ case 32773: case 32946: case 34712:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'PhotometricInterpretation':
+ switch( $val ) {
+ case 2: case 6:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'Orientation':
+ switch( $val ) {
+ case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'PlanarConfiguration':
+ switch( $val ) {
+ case 1: case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ // TODO: YCbCrSubSampling
+ case 'YCbCrPositioning':
+ switch ( $val ) {
+ case 1:
+ case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'XResolution':
+ case 'YResolution':
+ switch( $resolutionunit ) {
+ case 2:
+ $val = self::msg( 'XYResolution', 'i', self::formatNum( $val ) );
+ break;
+ case 3:
+ $val = self::msg( 'XYResolution', 'c', self::formatNum( $val ) );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ // TODO: YCbCrCoefficients #p27 (see annex E)
+ case 'ExifVersion': case 'FlashpixVersion':
+ $val = "$val" / 100;
+ break;
+
+ case 'ColorSpace':
+ switch( $val ) {
+ case 1: case 65535:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'ComponentsConfiguration':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3: case 4: case 5: case 6:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'DateTime':
+ case 'DateTimeOriginal':
+ case 'DateTimeDigitized':
+ case 'DateTimeReleased':
+ case 'DateTimeExpires':
+ case 'GPSDateStamp':
+ case 'dc-date':
+ case 'DateTimeMetadata':
+ if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
+ $val = wfMsg( 'exif-unknowndate' );
+ } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D', $val ) ) {
+ $time = wfTimestamp( TS_MW, $val );
+ if ( $time && intval( $time ) > 0 ) {
+ $val = $wgLang->timeanddate( $time );
+ }
+ } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) {
+ // If only the date but not the time is filled in.
+ $time = wfTimestamp( TS_MW, substr( $val, 0, 4 )
+ . substr( $val, 5, 2 )
+ . substr( $val, 8, 2 )
+ . '000000' );
+ if ( $time && intval( $time ) > 0 ) {
+ $val = $wgLang->date( $time );
+ }
+ }
+ // else it will just output $val without formatting it.
+ break;
+
+ case 'ExposureProgram':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'SubjectDistance':
+ $val = self::msg( $tag, '', self::formatNum( $val ) );
+ break;
+
+ case 'MeteringMode':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 255:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'LightSource':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3: case 4: case 9: case 10: case 11:
+ case 12: case 13: case 14: case 15: case 17: case 18: case 19: case 20:
+ case 21: case 22: case 23: case 24: case 255:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'Flash':
+ $flashDecode = array(
+ 'fired' => $val & bindec( '00000001' ),
+ 'return' => ( $val & bindec( '00000110' ) ) >> 1,
+ 'mode' => ( $val & bindec( '00011000' ) ) >> 3,
+ 'function' => ( $val & bindec( '00100000' ) ) >> 5,
+ 'redeye' => ( $val & bindec( '01000000' ) ) >> 6,
+// 'reserved' => ($val & bindec( '10000000' )) >> 7,
+ );
+
+ # We do not need to handle unknown values since all are used.
+ foreach ( $flashDecode as $subTag => $subValue ) {
+ # We do not need any message for zeroed values.
+ if ( $subTag != 'fired' && $subValue == 0 ) {
+ continue;
+ }
+ $fullTag = $tag . '-' . $subTag ;
+ $flashMsgs[] = self::msg( $fullTag, $subValue );
+ }
+ $val = $wgLang->commaList( $flashMsgs );
+ break;
+
+ case 'FocalPlaneResolutionUnit':
+ switch( $val ) {
+ case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'SensingMethod':
+ switch( $val ) {
+ case 1: case 2: case 3: case 4: case 5: case 7: case 8:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'FileSource':
+ switch( $val ) {
+ case 3:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'SceneType':
+ switch( $val ) {
+ case 1:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'CustomRendered':
+ switch( $val ) {
+ case 0: case 1:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'ExposureMode':
+ switch( $val ) {
+ case 0: case 1: case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'WhiteBalance':
+ switch( $val ) {
+ case 0: case 1:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'SceneCaptureType':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GainControl':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3: case 4:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'Contrast':
+ switch( $val ) {
+ case 0: case 1: case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'Saturation':
+ switch( $val ) {
+ case 0: case 1: case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'Sharpness':
+ switch( $val ) {
+ case 0: case 1: case 2:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'SubjectDistanceRange':
+ switch( $val ) {
+ case 0: case 1: case 2: case 3:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ //The GPS...Ref values are kept for compatibility, probably won't be reached.
+ case 'GPSLatitudeRef':
+ case 'GPSDestLatitudeRef':
+ switch( $val ) {
+ case 'N': case 'S':
+ $val = self::msg( 'GPSLatitude', $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GPSLongitudeRef':
+ case 'GPSDestLongitudeRef':
+ switch( $val ) {
+ case 'E': case 'W':
+ $val = self::msg( 'GPSLongitude', $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GPSAltitude':
+ if ( $val < 0 ) {
+ $val = self::msg( 'GPSAltitude', 'below-sealevel', self::formatNum( -$val, 3 ) );
+ } else {
+ $val = self::msg( 'GPSAltitude', 'above-sealevel', self::formatNum( $val, 3 ) );
+ }
+ break;
+
+ case 'GPSStatus':
+ switch( $val ) {
+ case 'A': case 'V':
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GPSMeasureMode':
+ switch( $val ) {
+ case 2: case 3:
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+
+ case 'GPSTrackRef':
+ case 'GPSImgDirectionRef':
+ case 'GPSDestBearingRef':
+ switch( $val ) {
+ case 'T': case 'M':
+ $val = self::msg( 'GPSDirection', $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GPSLatitude':
+ case 'GPSDestLatitude':
+ $val = self::formatCoords( $val, 'latitude' );
+ break;
+ case 'GPSLongitude':
+ case 'GPSDestLongitude':
+ $val = self::formatCoords( $val, 'longitude' );
+ break;
+
+ case 'GPSSpeedRef':
+ switch( $val ) {
+ case 'K': case 'M': case 'N':
+ $val = self::msg( 'GPSSpeed', $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GPSDestDistanceRef':
+ switch( $val ) {
+ case 'K': case 'M': case 'N':
+ $val = self::msg( 'GPSDestDistance', $val );
+ break;
+ default:
+ /* If not recognized, display as is. */
+ break;
+ }
+ break;
+
+ case 'GPSDOP':
+ // See http://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
+ if ( $val <= 2 ) {
+ $val = self::msg( $tag, 'excellent', self::formatNum( $val ) );
+ } elseif ( $val <= 5 ) {
+ $val = self::msg( $tag, 'good', self::formatNum( $val ) );
+ } elseif ( $val <= 10 ) {
+ $val = self::msg( $tag, 'moderate', self::formatNum( $val ) );
+ } elseif ( $val <= 20 ) {
+ $val = self::msg( $tag, 'fair', self::formatNum( $val ) );
+ } else {
+ $val = self::msg( $tag, 'poor', self::formatNum( $val ) );
+ }
+ break;
+
+ // This is not in the Exif standard, just a special
+ // case for our purposes which enables wikis to wikify
+ // the make, model and software name to link to their articles.
+ case 'Make':
+ case 'Model':
+ $val = self::msg( $tag, '', $val );
+ break;
+
+ case 'Software':
+ if ( is_array( $val ) ) {
+ //if its a software, version array.
+ $val = wfMsg( 'exif-software-version-value', $val[0], $val[1] );
+ } else {
+ $val = self::msg( $tag, '', $val );
+ }
+ break;
+
+ case 'ExposureTime':
+ // Show the pretty fraction as well as decimal version
+ $val = wfMsg( 'exif-exposuretime-format',
+ self::formatFraction( $val ), self::formatNum( $val ) );
+ break;
+ case 'ISOSpeedRatings':
+ // If its = 65535 that means its at the
+ // limit of the size of Exif::short and
+ // is really higher.
+ if ( $val == '65535' ) {
+ $val = self::msg( $tag, 'overflow' );
+ } else {
+ $val = self::formatNum( $val );
+ }
+ break;
+ case 'FNumber':
+ $val = wfMsg( 'exif-fnumber-format',
+ self::formatNum( $val ) );
+ break;
+
+ case 'FocalLength': case 'FocalLengthIn35mmFilm':
+ $val = wfMsg( 'exif-focallength-format',
+ self::formatNum( $val ) );
+ break;
+
+ case 'MaxApertureValue':
+ if ( strpos( $val, '/' ) !== false ) {
+ // need to expand this earlier to calculate fNumber
+ list($n, $d) = explode('/', $val);
+ if ( is_numeric( $n ) && is_numeric( $d ) ) {
+ $val = $n / $d;
+ }
+ }
+ if ( is_numeric( $val ) ) {
+ $fNumber = pow( 2, $val / 2 );
+ if ( $fNumber !== false ) {
+ $val = wfMsg( 'exif-maxaperturevalue-value',
+ self::formatNum( $val ),
+ self::formatNum( $fNumber, 2 )
+ );
+ }
+ }
+ break;
+
+ case 'iimCategory':
+ switch( strtolower( $val ) ) {
+ // See pg 29 of IPTC photo
+ // metadata standard.
+ case 'ace': case 'clj':
+ case 'dis': case 'fin':
+ case 'edu': case 'evn':
+ case 'hth': case 'hum':
+ case 'lab': case 'lif':
+ case 'pol': case 'rel':
+ case 'sci': case 'soi':
+ case 'spo': case 'war':
+ case 'wea':
+ $val = self::msg(
+ 'iimcategory',
+ $val
+ );
+ }
+ break;
+ case 'SubjectNewsCode':
+ // Essentially like iimCategory.
+ // 8 (numeric) digit hierarchical
+ // classification. We decode the
+ // first 2 digits, which provide
+ // a broad category.
+ $val = self::convertNewsCode( $val );
+ break;
+ case 'Urgency':
+ // 1-8 with 1 being highest, 5 normal
+ // 0 is reserved, and 9 is 'user-defined'.
+ $urgency = '';
+ if ( $val == 0 || $val == 9 ) {
+ $urgency = 'other';
+ } elseif ( $val < 5 && $val > 1 ) {
+ $urgency = 'high';
+ } elseif ( $val == 5 ) {
+ $urgency = 'normal';
+ } elseif ( $val <= 8 && $val > 5) {
+ $urgency = 'low';
+ }
+
+ if ( $urgency !== '' ) {
+ $val = self::msg( 'urgency',
+ $urgency, $val
+ );
+ }
+ break;
+
+ // Things that have a unit of pixels.
+ case 'OriginalImageHeight':
+ case 'OriginalImageWidth':
+ case 'PixelXDimension':
+ case 'PixelYDimension':
+ case 'ImageWidth':
+ case 'ImageLength':
+ $val = self::formatNum( $val ) . ' ' . wfMsg( 'unit-pixel' );
+ break;
+
+ // Do not transform fields with pure text.
+ // For some languages the formatNum()
+ // conversion results to wrong output like
+ // foo,bar@example,com or fooÙ«bar@exampleÙ«com.
+ // Also some 'numeric' things like Scene codes
+ // are included here as we really don't want
+ // commas inserted.
+ case 'ImageDescription':
+ case 'Artist':
+ case 'Copyright':
+ case 'RelatedSoundFile':
+ case 'ImageUniqueID':
+ case 'SpectralSensitivity':
+ case 'GPSSatellites':
+ case 'GPSVersionID':
+ case 'GPSMapDatum':
+ case 'Keywords':
+ case 'WorldRegionDest':
+ case 'CountryDest':
+ case 'CountryCodeDest':
+ case 'ProvinceOrStateDest':
+ case 'CityDest':
+ case 'SublocationDest':
+ case 'WorldRegionCreated':
+ case 'CountryCreated':
+ case 'CountryCodeCreated':
+ case 'ProvinceOrStateCreated':
+ case 'CityCreated':
+ case 'SublocationCreated':
+ case 'ObjectName':
+ case 'SpecialInstructions':
+ case 'Headline':
+ case 'Credit':
+ case 'Source':
+ case 'EditStatus':
+ case 'FixtureIdentifier':
+ case 'LocationDest':
+ case 'LocationDestCode':
+ case 'Writer':
+ case 'JPEGFileComment':
+ case 'iimSupplementalCategory':
+ case 'OriginalTransmissionRef':
+ case 'Identifier':
+ case 'dc-contributor':
+ case 'dc-coverage':
+ case 'dc-publisher':
+ case 'dc-relation':
+ case 'dc-rights':
+ case 'dc-source':
+ case 'dc-type':
+ case 'Lens':
+ case 'SerialNumber':
+ case 'CameraOwnerName':
+ case 'Label':
+ case 'Nickname':
+ case 'RightsCertificate':
+ case 'CopyrightOwner':
+ case 'UsageTerms':
+ case 'WebStatement':
+ case 'OriginalDocumentID':
+ case 'LicenseUrl':
+ case 'MorePermissionsUrl':
+ case 'AttributionUrl':
+ case 'PreferredAttributionName':
+ case 'PNGFileComment':
+ case 'Disclaimer':
+ case 'ContentWarning':
+ case 'GIFFileComment':
+ case 'SceneCode':
+ case 'IntellectualGenre':
+ case 'Event':
+ case 'OrginisationInImage':
+ case 'PersonInImage':
+
+ $val = htmlspecialchars( $val );
+ break;
+
+ case 'ObjectCycle':
+ switch ( $val ) {
+ case 'a': case 'p': case 'b':
+ $val = self::msg( $tag, $val );
+ break;
+ default:
+ $val = htmlspecialchars( $val );
+ break;
+ }
+ break;
+ case 'Copyrighted':
+ switch( $val ) {
+ case 'True': case 'False':
+ $val = self::msg( $tag, $val );
+ break;
+ }
+ break;
+ case 'Rating':
+ if ( $val == '-1' ) {
+ $val = self::msg( $tag, 'rejected' );
+ } else {
+ $val = self::formatNum( $val );
+ }
+ break;
+
+ case 'LanguageCode':
+ $lang = $wgLang->getLanguageName( strtolower( $val ) );
+ if ($lang) {
+ $val = htmlspecialchars( $lang );
+ } else {
+ $val = htmlspecialchars( $val );
+ }
+ break;
+
+ default:
+ $val = self::formatNum( $val );
+ break;
+ }
+ }
+ // End formatting values, start flattening arrays.
+ $vals = self::flattenArray( $vals, $type );
+
+ }
+ return $tags;
+ }
+
+ /**
+ * A function to collapse multivalued tags into a single value.
+ * This turns an array of (for example) authors into a bulleted list.
+ *
+ * This is public on the basis it might be useful outside of this class.
+ *
+ * @param $vals Array array of values
+ * @param $type String Type of array (either lang, ul, ol).
+ * lang = language assoc array with keys being the lang code
+ * ul = unordered list, ol = ordered list
+ * type can also come from the '_type' member of $vals.
+ * @param $noHtml Boolean If to avoid returning anything resembling
+ * html. (Ugly hack for backwards compatibility with old mediawiki).
+ * @return String single value (in wiki-syntax).
+ */
+ public static function flattenArray( $vals, $type = 'ul', $noHtml = false ) {
+ if ( isset( $vals['_type'] ) ) {
+ $type = $vals['_type'];
+ unset( $vals['_type'] );
+ }
+
+ if ( !is_array( $vals ) ) {
+ return $vals; // do nothing if not an array;
+ }
+ elseif ( count( $vals ) === 1 && $type !== 'lang' ) {
+ return $vals[0];
+ }
+ elseif ( count( $vals ) === 0 ) {
+ wfDebug( __METHOD__ . ' metadata array with 0 elements!' );
+ return ""; // paranoia. This should never happen
+ }
+ /* @todo FIXME: This should hide some of the list entries if there are
+ * say more than four. Especially if a field is translated into 20
+ * languages, we don't want to show them all by default
+ */
+ else {
+ global $wgContLang;
+ switch( $type ) {
+ case 'lang':
+ // Display default, followed by ContLang,
+ // followed by the rest in no particular
+ // order.
+
+ // Todo: hide some items if really long list.
+
+ $content = '';
+
+ $cLang = $wgContLang->getCode();
+ $defaultItem = false;
+ $defaultLang = false;
+
+ // If default is set, save it for later,
+ // as we don't know if it's equal to
+ // one of the lang codes. (In xmp
+ // you specify the language for a
+ // default property by having both
+ // a default prop, and one in the language
+ // that are identical)
+ if ( isset( $vals['x-default'] ) ) {
+ $defaultItem = $vals['x-default'];
+ unset( $vals['x-default'] );
+ }
+ // Do contentLanguage.
+ if ( isset( $vals[$cLang] ) ) {
+ $isDefault = false;
+ if ( $vals[$cLang] === $defaultItem ) {
+ $defaultItem = false;
+ $isDefault = true;
+ }
+ $content .= self::langItem(
+ $vals[$cLang], $cLang,
+ $isDefault, $noHtml );
+
+ unset( $vals[$cLang] );
+ }
+
+ // Now do the rest.
+ foreach ( $vals as $lang => $item ) {
+ if ( $item === $defaultItem ) {
+ $defaultLang = $lang;
+ continue;
+ }
+ $content .= self::langItem( $item,
+ $lang, false, $noHtml );
+ }
+ if ( $defaultItem !== false ) {
+ $content = self::langItem( $defaultItem,
+ $defaultLang, true, $noHtml )
+ . $content;
+ }
+ if ( $noHtml ) {
+ return $content;
+ }
+ return '<ul class="metadata-langlist">' .
+ $content .
+ '</ul>';
+ case 'ol':
+ if ( $noHtml ) {
+ return "\n#" . implode( "\n#", $vals );
+ }
+ return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>';
+ case 'ul':
+ default:
+ if ( $noHtml ) {
+ return "\n*" . implode( "\n*", $vals );
+ }
+ return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>';
+ }
+ }
+ }
+
+ /** Helper function for creating lists of translations.
+ *
+ * @param $value String value (this is not escaped)
+ * @param $lang String lang code of item or false
+ * @param $default Boolean if it is default value.
+ * @param $noHtml Boolean If to avoid html (for back-compat)
+ * @return language item (Note: despite how this looks,
+ * this is treated as wikitext not html).
+ */
+ private static function langItem( $value, $lang, $default = false, $noHtml = false ) {
+ global $wgContLang;
+ if ( $lang === false && $default === false) {
+ throw new MWException('$lang and $default cannot both '
+ . 'be false.');
+ }
+
+ if ( $noHtml ) {
+ $wrappedValue = $value;
+ } else {
+ $wrappedValue = '<span class="mw-metadata-lang-value">'
+ . $value . '</span>';
+ }
+
+ if ( $lang === false ) {
+ if ( $noHtml ) {
+ return wfMsg( 'metadata-langitem-default',
+ $wrappedValue ) . "\n\n";
+ } /* else */
+ return '<li class="mw-metadata-lang-default">'
+ . wfMsg( 'metadata-langitem-default',
+ $wrappedValue )
+ . "</li>\n";
+ }
+
+ $lowLang = strtolower( $lang );
+ $langName = $wgContLang->getLanguageName( $lowLang );
+ if ( $langName === '' ) {
+ //try just the base language name. (aka en-US -> en ).
+ list( $langPrefix ) = explode( '-', $lowLang, 2 );
+ $langName = $wgContLang->getLanguageName( $langPrefix );
+ if ( $langName === '' ) {
+ // give up.
+ $langName = $lang;
+ }
+ }
+ // else we have a language specified
+
+ if ( $noHtml ) {
+ return '*' . wfMsg( 'metadata-langitem',
+ $wrappedValue, $langName, $lang );
+ } /* else: */
+
+ $item = '<li class="mw-metadata-lang-code-'
+ . $lang;
+ if ( $default ) {
+ $item .= ' mw-metadata-lang-default';
+ }
+ $item .= '" lang="' . $lang . '">';
+ $item .= wfMsg( 'metadata-langitem',
+ $wrappedValue, $langName, $lang );
+ $item .= "</li>\n";
+ return $item;
+ }
+
+ /**
+ * Convenience function for getFormattedData()
+ *
+ * @private
+ *
+ * @param $tag String: the tag name to pass on
+ * @param $val String: the value of the tag
+ * @param $arg String: an argument to pass ($1)
+ * @param $arg2 String: a 2nd argument to pass ($2)
+ * @return string A wfMsg of "exif-$tag-$val" in lower case
+ */
+ static function msg( $tag, $val, $arg = null, $arg2 = null ) {
+ global $wgContLang;
+
+ if ($val === '')
+ $val = 'value';
+ return wfMsg( $wgContLang->lc( "exif-$tag-$val" ), $arg, $arg2 );
+ }
+
+ /**
+ * Format a number, convert numbers from fractions into floating point
+ * numbers, joins arrays of numbers with commas.
+ *
+ * @private
+ *
+ * @param $num Mixed: the value to format
+ * @param $round digits to round to or false.
+ * @return mixed A floating point number or whatever we were fed
+ */
+ static function formatNum( $num, $round = false ) {
+ global $wgLang;
+ $m = array();
+ if( is_array($num) ) {
+ $out = array();
+ foreach( $num as $number ) {
+ $out[] = self::formatNum($number);
+ }
+ return $wgLang->commaList( $out );
+ }
+ if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
+ if ( $m[2] != 0 ) {
+ $newNum = $m[1] / $m[2];
+ if ( $round !== false ) {
+ $newNum = round( $newNum, $round );
+ }
+ } else {
+ $newNum = $num;
+ }
+
+ return $wgLang->formatNum( $newNum );
+ } else {
+ if ( is_numeric( $num ) && $round !== false ) {
+ $num = round( $num, $round );
+ }
+ return $wgLang->formatNum( $num );
+ }
+ }
+
+ /**
+ * Format a rational number, reducing fractions
+ *
+ * @private
+ *
+ * @param $num Mixed: the value to format
+ * @return mixed A floating point number or whatever we were fed
+ */
+ static function formatFraction( $num ) {
+ $m = array();
+ if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
+ $numerator = intval( $m[1] );
+ $denominator = intval( $m[2] );
+ $gcd = self::gcd( abs( $numerator ), $denominator );
+ if( $gcd != 0 ) {
+ // 0 shouldn't happen! ;)
+ return self::formatNum( $numerator / $gcd ) . '/' . self::formatNum( $denominator / $gcd );
+ }
+ }
+ return self::formatNum( $num );
+ }
+
+ /**
+ * Calculate the greatest common divisor of two integers.
+ *
+ * @param $a Integer: Numerator
+ * @param $b Integer: Denominator
+ * @return int
+ * @private
+ */
+ static function gcd( $a, $b ) {
+ /*
+ // http://en.wikipedia.org/wiki/Euclidean_algorithm
+ // Recursive form would be:
+ if( $b == 0 )
+ return $a;
+ else
+ return gcd( $b, $a % $b );
+ */
+ while( $b != 0 ) {
+ $remainder = $a % $b;
+
+ // tail recursion...
+ $a = $b;
+ $b = $remainder;
+ }
+ return $a;
+ }
+
+ /** Fetch the human readable version of a news code.
+ * A news code is an 8 digit code. The first two
+ * digits are a general classification, so we just
+ * translate that.
+ *
+ * Note, leading 0's are significant, so this is
+ * a string, not an int.
+ *
+ * @param $val String: The 8 digit news code.
+ * @return The human readable form
+ */
+ static private function convertNewsCode( $val ) {
+ if ( !preg_match( '/^\d{8}$/D', $val ) ) {
+ // Not a valid news code.
+ return $val;
+ }
+ $cat = '';
+ switch( substr( $val , 0, 2 ) ) {
+ case '01':
+ $cat = 'ace';
+ break;
+ case '02':
+ $cat = 'clj';
+ break;
+ case '03':
+ $cat = 'dis';
+ break;
+ case '04':
+ $cat = 'fin';
+ break;
+ case '05':
+ $cat = 'edu';
+ break;
+ case '06':
+ $cat = 'evn';
+ break;
+ case '07':
+ $cat = 'hth';
+ break;
+ case '08':
+ $cat = 'hum';
+ break;
+ case '09':
+ $cat = 'lab';
+ break;
+ case '10':
+ $cat = 'lif';
+ break;
+ case '11':
+ $cat = 'pol';
+ break;
+ case '12':
+ $cat = 'rel';
+ break;
+ case '13':
+ $cat = 'sci';
+ break;
+ case '14':
+ $cat = 'soi';
+ break;
+ case '15':
+ $cat = 'spo';
+ break;
+ case '16':
+ $cat = 'war';
+ break;
+ case '17':
+ $cat = 'wea';
+ break;
+ }
+ if ( $cat !== '' ) {
+ $catMsg = self::msg( 'iimcategory', $cat );
+ $val = self::msg( 'subjectnewscode', '', $val, $catMsg );
+ }
+ return $val;
+ }
+
+ /**
+ * Format a coordinate value, convert numbers from floating point
+ * into degree minute second representation.
+ *
+ * @param $coords Array: degrees, minutes and seconds
+ * @param $type String: latitude or longitude (for if its a NWS or E)
+ * @return mixed A floating point number or whatever we were fed
+ */
+ static function formatCoords( $coord, $type ) {
+ $ref = '';
+ if ( $coord < 0 ) {
+ $nCoord = -$coord;
+ if ( $type === 'latitude' ) {
+ $ref = 'S';
+ }
+ elseif ( $type === 'longitude' ) {
+ $ref = 'W';
+ }
+ }
+ else {
+ $nCoord = $coord;
+ if ( $type === 'latitude' ) {
+ $ref = 'N';
+ }
+ elseif ( $type === 'longitude' ) {
+ $ref = 'E';
+ }
+ }
+
+ $deg = floor( $nCoord );
+ $min = floor( ( $nCoord - $deg ) * 60.0 );
+ $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 );
+
+ $deg = self::formatNum( $deg );
+ $min = self::formatNum( $min );
+ $sec = self::formatNum( $sec );
+
+ return wfMsg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord );
+ }
+
+ /**
+ * Format the contact info field into a single value.
+ *
+ * @param $vals Array array with fields of the ContactInfo
+ * struct defined in the IPTC4XMP spec. Or potentially
+ * an array with one element that is a free form text
+ * value from the older iptc iim 1:118 prop.
+ *
+ * This function might be called from
+ * JpegHandler::convertMetadataVersion which is why it is
+ * public.
+ *
+ * @return String of html-ish looking wikitext
+ */
+ public static function collapseContactInfo( $vals ) {
+ if( ! ( isset( $vals['CiAdrExtadr'] )
+ || isset( $vals['CiAdrCity'] )
+ || isset( $vals['CiAdrCtry'] )
+ || isset( $vals['CiEmailWork'] )
+ || isset( $vals['CiTelWork'] )
+ || isset( $vals['CiAdrPcode'] )
+ || isset( $vals['CiAdrRegion'] )
+ || isset( $vals['CiUrlWork'] )
+ ) ) {
+ // We don't have any sub-properties
+ // This could happen if its using old
+ // iptc that just had this as a free-form
+ // text value.
+ // Note: We run this through htmlspecialchars
+ // partially to be consistent, and partially
+ // because people often insert >, etc into
+ // the metadata which should not be interpreted
+ // but we still want to auto-link urls.
+ foreach( $vals as &$val ) {
+ $val = htmlspecialchars( $val );
+ }
+ return self::flattenArray( $vals );
+ } else {
+ // We have a real ContactInfo field.
+ // Its unclear if all these fields have to be
+ // set, so assume they do not.
+ $url = $tel = $street = $city = $country = '';
+ $email = $postal = $region = '';
+
+ // Also note, some of the class names this uses
+ // are similar to those used by hCard. This is
+ // mostly because they're sensible names. This
+ // does not (and does not attempt to) output
+ // stuff in the hCard microformat. However it
+ // might output in the adr microformat.
+
+ if ( isset( $vals['CiAdrExtadr'] ) ) {
+ // Todo: This can potentially be multi-line.
+ // Need to check how that works in XMP.
+ $street = '<span class="extended-address">'
+ . htmlspecialchars(
+ $vals['CiAdrExtadr'] )
+ . '</span>';
+ }
+ if ( isset( $vals['CiAdrCity'] ) ) {
+ $city = '<span class="locality">'
+ . htmlspecialchars( $vals['CiAdrCity'] )
+ . '</span>';
+ }
+ if ( isset( $vals['CiAdrCtry'] ) ) {
+ $country = '<span class="country-name">'
+ . htmlspecialchars( $vals['CiAdrCtry'] )
+ . '</span>';
+ }
+ if ( isset( $vals['CiEmailWork'] ) ) {
+ $emails = array();
+ // Have to split multiple emails at commas/new lines.
+ $splitEmails = explode( "\n", $vals['CiEmailWork'] );
+ foreach ( $splitEmails as $e1 ) {
+ // Also split on comma
+ foreach ( explode( ',', $e1 ) as $e2 ) {
+ $finalEmail = trim( $e2 );
+ if ( $finalEmail == ',' || $finalEmail == '' ) {
+ continue;
+ }
+ if ( strpos( $finalEmail, '<' ) !== false ) {
+ // Don't do fancy formatting to
+ // "My name" <foo@bar.com> style stuff
+ $emails[] = $finalEmail;
+ } else {
+ $emails[] = '[mailto:'
+ . $finalEmail
+ . ' <span class="email">'
+ . $finalEmail
+ . '</span>]';
+ }
+ }
+ }
+ $email = implode( ', ', $emails );
+ }
+ if ( isset( $vals['CiTelWork'] ) ) {
+ $tel = '<span class="tel">'
+ . htmlspecialchars( $vals['CiTelWork'] )
+ . '</span>';
+ }
+ if ( isset( $vals['CiAdrPcode'] ) ) {
+ $postal = '<span class="postal-code">'
+ . htmlspecialchars(
+ $vals['CiAdrPcode'] )
+ . '</span>';
+ }
+ if ( isset( $vals['CiAdrRegion'] ) ) {
+ // Note this is province/state.
+ $region = '<span class="region">'
+ . htmlspecialchars(
+ $vals['CiAdrRegion'] )
+ . '</span>';
+ }
+ if ( isset( $vals['CiUrlWork'] ) ) {
+ $url = '<span class="url">'
+ . htmlspecialchars( $vals['CiUrlWork'] )
+ . '</span>';
+ }
+ return wfMsg( 'exif-contact-value', $email, $url,
+ $street, $city, $region, $postal, $country,
+ $tel );
+ }
+ }
+}
+
+/** For compatability with old FormatExif class
+ * which some extensions use.
+ *
+ * @deprecated since 1.18
+ *
+**/
+class FormatExif {
+ var $meta;
+ function FormatExif ( $meta ) {
+ wfDeprecated(__METHOD__);
+ $this->meta = $meta;
+ }
+
+ function getFormattedData ( ) {
+ return FormatMetadata::getFormattedData( $this->meta );
+ }
+}
diff --git a/includes/media/GIF.php b/includes/media/GIF.php
index c4ede331..325ceb9a 100644
--- a/includes/media/GIF.php
+++ b/includes/media/GIF.php
@@ -12,56 +12,104 @@
* @ingroup Media
*/
class GIFHandler extends BitmapHandler {
+
+ const BROKEN_FILE = '0'; // value to store in img_metadata if error extracting metadata.
function getMetadata( $image, $filename ) {
- if ( !isset( $image->parsedGIFMetadata ) ) {
- try {
- $image->parsedGIFMetadata = GIFMetadataExtractor::getMetadata( $filename );
- } catch( Exception $e ) {
- // Broken file?
- wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
- return '0';
- }
+ try {
+ $parsedGIFMetadata = BitmapMetadataHandler::GIF( $filename );
+ } catch( Exception $e ) {
+ // Broken file?
+ wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
+ return self::BROKEN_FILE;
}
- return serialize( $image->parsedGIFMetadata );
-
+ return serialize($parsedGIFMetadata);
}
-
+
+ /**
+ * @param $image File
+ * @return array|bool
+ */
function formatMetadata( $image ) {
- return false;
+ $meta = $image->getMetadata();
+
+ if ( !$meta ) {
+ return false;
+ }
+ $meta = unserialize( $meta );
+ if ( !isset( $meta['metadata'] ) || count( $meta['metadata'] ) <= 1 ) {
+ return false;
+ }
+
+ if ( isset( $meta['metadata']['_MW_GIF_VERSION'] ) ) {
+ unset( $meta['metadata']['_MW_GIF_VERSION'] );
+ }
+ return $this->formatMetadataHelper( $meta['metadata'] );
}
-
+
+ /**
+ * @param $image File
+ * @param $width
+ * @param $height
+ * @return
+ */
function getImageArea( $image, $width, $height ) {
$ser = $image->getMetadata();
- if ($ser) {
- $metadata = unserialize($ser);
+ if ( $ser ) {
+ $metadata = unserialize( $ser );
return $width * $height * $metadata['frameCount'];
} else {
return $width * $height;
}
}
+ /**
+ * @param $image File
+ * @return bool
+ */
function isAnimatedImage( $image ) {
$ser = $image->getMetadata();
- if ($ser) {
+ if ( $ser ) {
$metadata = unserialize($ser);
- if( $metadata['frameCount'] > 1 ) return true;
+ if( $metadata['frameCount'] > 1 ) {
+ return true;
+ }
}
return false;
}
-
+
function getMetadataType( $image ) {
return 'parsed-gif';
}
-
+
function isMetadataValid( $image, $metadata ) {
+ if ( $metadata === self::BROKEN_FILE ) {
+ // Do not repetitivly regenerate metadata on broken file.
+ return self::METADATA_GOOD;
+ }
+
wfSuppressWarnings();
$data = unserialize( $metadata );
wfRestoreWarnings();
- return (boolean) $data;
+
+ if ( !$data || !is_array( $data ) ) {
+ wfDebug(__METHOD__ . ' invalid GIF metadata' );
+ return self::METADATA_BAD;
+ }
+
+ if ( !isset( $data['metadata']['_MW_GIF_VERSION'] )
+ || $data['metadata']['_MW_GIF_VERSION'] != GIFMetadataExtractor::VERSION ) {
+ wfDebug(__METHOD__ . ' old but compatible GIF metadata' );
+ return self::METADATA_COMPATIBLE;
+ }
+ return self::METADATA_GOOD;
}
+ /**
+ * @param $image File
+ * @return string
+ */
function getLongDesc( $image ) {
global $wgLang;
@@ -71,20 +119,25 @@ class GIFHandler extends BitmapHandler {
$metadata = unserialize($image->getMetadata());
wfRestoreWarnings();
- if (!$metadata || $metadata['frameCount'] <= 1)
+ if (!$metadata || $metadata['frameCount'] <= 1) {
return $original;
-
+ }
+
+ /* Preserve original image info string, but strip the last char ')' so we can add even more */
$info = array();
$info[] = $original;
- if ($metadata['looped'])
+ if ( $metadata['looped'] ) {
$info[] = wfMsgExt( 'file-info-gif-looped', 'parseinline' );
+ }
- if ($metadata['frameCount'] > 1)
+ if ( $metadata['frameCount'] > 1 ) {
$info[] = wfMsgExt( 'file-info-gif-frames', 'parseinline', $metadata['frameCount'] );
+ }
- if ($metadata['duration'])
+ if ( $metadata['duration'] ) {
$info[] = $wgLang->formatTimePeriod( $metadata['duration'] );
+ }
return $wgLang->commaList( $info );
}
diff --git a/includes/media/GIFMetadataExtractor.php b/includes/media/GIFMetadataExtractor.php
index bc1a4804..5dbeb8f8 100644
--- a/includes/media/GIFMetadataExtractor.php
+++ b/includes/media/GIFMetadataExtractor.php
@@ -21,164 +21,294 @@ class GIFMetadataExtractor {
static $gif_extension_sep;
static $gif_term;
+ const VERSION = 1;
+
+ // Each sub-block is less than or equal to 255 bytes.
+ // Most of the time its 255 bytes, except for in XMP
+ // blocks, where it's usually between 32-127 bytes each.
+ const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.
+
+ /**
+ * @throws Exception
+ * @param $filename string
+ * @return array
+ */
static function getMetadata( $filename ) {
self::$gif_frame_sep = pack( "C", ord("," ) );
self::$gif_extension_sep = pack( "C", ord("!" ) );
self::$gif_term = pack( "C", ord(";" ) );
-
+
$frameCount = 0;
$duration = 0.0;
$isLooped = false;
+ $xmp = "";
+ $comment = array();
- if (!$filename)
+ if ( !$filename ) {
throw new Exception( "No file name specified" );
- elseif ( !file_exists($filename) || is_dir($filename) )
+ } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
throw new Exception( "File $filename does not exist" );
-
- $fh = fopen( $filename, 'r' );
-
- if (!$fh)
+ }
+
+ $fh = fopen( $filename, 'rb' );
+
+ if ( !$fh ) {
throw new Exception( "Unable to open file $filename" );
-
+ }
+
// Check for the GIF header
$buf = fread( $fh, 6 );
if ( !($buf == 'GIF87a' || $buf == 'GIF89a') ) {
throw new Exception( "Not a valid GIF file; header: $buf" );
}
-
+
// Skip over width and height.
fread( $fh, 4 );
-
+
// Read BPP
$buf = fread( $fh, 1 );
$bpp = self::decodeBPP( $buf );
-
+
// Skip over background and aspect ratio
fread( $fh, 2 );
-
+
// Skip over the GCT
self::readGCT( $fh, $bpp );
-
+
while( !feof( $fh ) ) {
$buf = fread( $fh, 1 );
-
+
if ($buf == self::$gif_frame_sep) {
// Found a frame
$frameCount++;
-
+
## Skip bounding box
fread( $fh, 8 );
-
+
## Read BPP
$buf = fread( $fh, 1 );
$bpp = self::decodeBPP( $buf );
-
+
## Read GCT
self::readGCT( $fh, $bpp );
fread( $fh, 1 );
self::skipBlock( $fh );
} elseif ( $buf == self::$gif_extension_sep ) {
$buf = fread( $fh, 1 );
+ if ( strlen( $buf ) < 1 ) throw new Exception( "Ran out of input" );
$extension_code = unpack( 'C', $buf );
$extension_code = $extension_code[1];
-
+
if ($extension_code == 0xF9) {
// Graphics Control Extension.
fread( $fh, 1 ); // Block size
-
+
fread( $fh, 1 ); // Transparency, disposal method, user input
-
+
$buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
+ if ( strlen( $buf ) < 2 ) throw new Exception( "Ran out of input" );
$delay = unpack( 'v', $buf );
$delay = $delay[1];
$duration += $delay * 0.01;
-
+
fread( $fh, 1 ); // Transparent colour index
-
+
$term = fread( $fh, 1 ); // Should be a terminator
+ if ( strlen( $term ) < 1 ) throw new Exception( "Ran out of input" );
$term = unpack( 'C', $term );
$term = $term[1];
- if ($term != 0 )
+ if ($term != 0 ) {
throw new Exception( "Malformed Graphics Control Extension block" );
+ }
+ } elseif ($extension_code == 0xFE) {
+ // Comment block(s).
+ $data = self::readBlock( $fh );
+ if ( $data === "" ) {
+ throw new Exception( 'Read error, zero-length comment block' );
+ }
+
+ // The standard says this should be ASCII, however its unclear if
+ // thats true in practise. Check to see if its valid utf-8, if so
+ // assume its that, otherwise assume its windows-1252 (iso-8859-1)
+ $dataCopy = $data;
+ // quickIsNFCVerify has the side effect of replacing any invalid characters
+ UtfNormal::quickIsNFCVerify( $dataCopy );
+
+ if ( $dataCopy !== $data ) {
+ wfSuppressWarnings();
+ $data = iconv( 'windows-1252', 'UTF-8', $data );
+ wfRestoreWarnings();
+ }
+
+ $commentCount = count( $comment );
+ if ( $commentCount === 0
+ || $comment[$commentCount-1] !== $data )
+ {
+ // Some applications repeat the same comment on each
+ // frame of an animated GIF image, so if this comment
+ // is identical to the last, only extract once.
+ $comment[] = $data;
+ }
} elseif ($extension_code == 0xFF) {
// Application extension (Netscape info about the animated gif)
+ // or XMP (or theoretically any other type of extension block)
$blockLength = fread( $fh, 1 );
+ if ( strlen( $blockLength ) < 1 ) throw new Exception( "Ran out of input" );
$blockLength = unpack( 'C', $blockLength );
$blockLength = $blockLength[1];
$data = fread( $fh, $blockLength );
-
- // NETSCAPE2.0 (application name)
- if ($blockLength != 11 || $data != 'NETSCAPE2.0') {
+
+ if ($blockLength != 11 ) {
+ wfDebug( __METHOD__ . ' GIF application block with wrong length' );
fseek( $fh, -($blockLength + 1), SEEK_CUR );
self::skipBlock( $fh );
continue;
}
+
+ // NETSCAPE2.0 (application name for animated gif)
+ if ( $data == 'NETSCAPE2.0' ) {
- $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
-
- if ($data != "\x03\x01") {
- throw new Exception( "Expected \x03\x01, got $data" );
- }
-
- // Unsigned little-endian integer, loop count or zero for "forever"
- $loopData = fread( $fh, 2 );
- $loopData = unpack( 'v', $loopData );
- $loopCount = $loopData[1];
-
- if ($loopCount != 1) {
- $isLooped = true;
+ $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
+
+ if ($data != "\x03\x01") {
+ throw new Exception( "Expected \x03\x01, got $data" );
+ }
+
+ // Unsigned little-endian integer, loop count or zero for "forever"
+ $loopData = fread( $fh, 2 );
+ if ( strlen( $loopData ) < 2 ) throw new Exception( "Ran out of input" );
+ $loopData = unpack( 'v', $loopData );
+ $loopCount = $loopData[1];
+
+ if ($loopCount != 1) {
+ $isLooped = true;
+ }
+
+ // Read out terminator byte
+ fread( $fh, 1 );
+ } elseif ( $data == 'XMP DataXMP' ) {
+ // application name for XMP data.
+ // see pg 18 of XMP spec part 3.
+
+ $xmp = self::readBlock( $fh, true );
+
+ if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
+ || substr( $xmp, -4 ) !== "\x03\x02\x01\x00" )
+ {
+ // this is just a sanity check.
+ throw new Exception( "XMP does not have magic trailer!" );
+ }
+
+ // strip out trailer.
+ $xmp = substr( $xmp, 0, -257 );
+
+ } else {
+ // unrecognized extension block
+ fseek( $fh, -($blockLength + 1), SEEK_CUR );
+ self::skipBlock( $fh );
+ continue;
}
-
- // Read out terminator byte
- fread( $fh, 1 );
} else {
self::skipBlock( $fh );
}
} elseif ( $buf == self::$gif_term ) {
break;
} else {
+ if ( strlen( $buf ) < 1 ) throw new Exception( "Ran out of input" );
$byte = unpack( 'C', $buf );
$byte = $byte[1];
throw new Exception( "At position: ".ftell($fh). ", Unknown byte ".$byte );
}
}
-
+
return array(
'frameCount' => $frameCount,
'looped' => $isLooped,
- 'duration' => $duration
+ 'duration' => $duration,
+ 'xmp' => $xmp,
+ 'comment' => $comment,
);
-
}
-
+
+ /**
+ * @param $fh
+ * @param $bpp
+ * @return void
+ */
static function readGCT( $fh, $bpp ) {
- if ($bpp > 0) {
- for( $i=1; $i<=pow(2,$bpp); ++$i ) {
+ if ( $bpp > 0 ) {
+ for( $i=1; $i<=pow( 2, $bpp ); ++$i ) {
fread( $fh, 3 );
}
}
}
-
+
+ /**
+ * @param $data
+ * @return int
+ */
static function decodeBPP( $data ) {
+ if ( strlen( $data ) < 1 ) throw new Exception( "Ran out of input" );
$buf = unpack( 'C', $data );
$buf = $buf[1];
$bpp = ( $buf & 7 ) + 1;
$buf >>= 7;
-
+
$have_map = $buf & 1;
-
+
return $have_map ? $bpp : 0;
}
-
+
+ /**
+ * @param $fh
+ * @return
+ */
static function skipBlock( $fh ) {
while ( !feof( $fh ) ) {
$buf = fread( $fh, 1 );
+ if ( strlen( $buf ) < 1 ) throw new Exception( "Ran out of input" );
$block_len = unpack( 'C', $buf );
$block_len = $block_len[1];
- if ($block_len == 0)
+ if ($block_len == 0) {
return;
+ }
fread( $fh, $block_len );
}
}
+ /**
+ * Read a block. In the GIF format, a block is made up of
+ * several sub-blocks. Each sub block starts with one byte
+ * saying how long the sub-block is, followed by the sub-block.
+ * The entire block is terminated by a sub-block of length
+ * 0.
+ * @param $fh FileHandle
+ * @param $includeLengths Boolean Include the length bytes of the
+ * sub-blocks in the returned value. Normally this is false,
+ * except XMP is weird and does a hack where you need to keep
+ * these length bytes.
+ * @return The data.
+ */
+ static function readBlock( $fh, $includeLengths = false ) {
+ $data = '';
+ $subLength = fread( $fh, 1 );
+ $blocks = 0;
+
+ while( $subLength !== "\0" ) {
+ $blocks++;
+ if ( $blocks > self::MAX_SUBBLOCKS ) {
+ throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
+ }
+ if ( feof( $fh ) ) {
+ throw new Exception( "Read error: Unexpected EOF." );
+ }
+ if ( $includeLengths ) {
+ $data .= $subLength;
+ }
+
+ $data .= fread( $fh, ord( $subLength ) );
+ $subLength = fread( $fh, 1 );
+ }
+ return $data;
+ }
}
diff --git a/includes/media/Generic.php b/includes/media/Generic.php
index fa4e731a..48735ebf 100644
--- a/includes/media/Generic.php
+++ b/includes/media/Generic.php
@@ -13,7 +13,9 @@
*/
abstract class MediaHandler {
const TRANSFORM_LATER = 1;
-
+ const METADATA_GOOD = true;
+ const METADATA_BAD = false;
+ const METADATA_COMPATIBLE = 2; // for old but backwards compatible.
/**
* Instance cache
*/
@@ -21,6 +23,10 @@ abstract class MediaHandler {
/**
* Get a MediaHandler for a given MIME type from the instance cache
+ *
+ * @param $type string
+ *
+ * @return MediaHandler
*/
static function getHandler( $type ) {
global $wgMediaHandlers;
@@ -44,20 +50,27 @@ abstract class MediaHandler {
*/
abstract function getParamMap();
- /*
+ /**
* Validate a thumbnail parameter at parse time.
* Return true to accept the parameter, and false to reject it.
* If you return false, the parser will do something quiet and forgiving.
+ *
+ * @param $name
+ * @param $value
*/
abstract function validateParam( $name, $value );
/**
* Merge a parameter array into a string appropriate for inclusion in filenames
+ *
+ * @param $params array
*/
abstract function makeParamString( $params );
/**
* Parse a param string made with makeParamString back into an array
+ *
+ * @param $str string
*/
abstract function parseParamString( $str );
@@ -65,6 +78,8 @@ abstract class MediaHandler {
* Changes the parameter array as necessary, ready for transformation.
* Should be idempotent.
* Returns false if the parameters are unacceptable and the transform should fail
+ * @param $image
+ * @param $params
*/
abstract function normaliseParams( $image, &$params );
@@ -89,15 +104,66 @@ abstract class MediaHandler {
function getMetadata( $image, $path ) { return ''; }
/**
+ * Get metadata version.
+ *
+ * This is not used for validating metadata, this is used for the api when returning
+ * metadata, since api content formats should stay the same over time, and so things
+ * using ForiegnApiRepo can keep backwards compatibility
+ *
+ * All core media handlers share a common version number, and extensions can
+ * use the GetMetadataVersion hook to append to the array (they should append a unique
+ * string so not to get confusing). If there was a media handler named 'foo' with metadata
+ * version 3 it might add to the end of the array the element 'foo=3'. if the core metadata
+ * version is 2, the end version string would look like '2;foo=3'.
+ *
+ * @return string version string
+ */
+ static function getMetadataVersion () {
+ $version = Array( '2' ); // core metadata version
+ wfRunHooks('GetMetadataVersion', Array(&$version));
+ return implode( ';', $version);
+ }
+
+ /**
+ * Convert metadata version.
+ *
+ * By default just returns $metadata, but can be used to allow
+ * media handlers to convert between metadata versions.
+ *
+ * @param $metadata Mixed String or Array metadata array (serialized if string)
+ * @param $version Integer target version
+ * @return Array serialized metadata in specified version, or $metadata on fail.
+ */
+ function convertMetadataVersion( $metadata, $version = 1 ) {
+ if ( !is_array( $metadata ) ) {
+
+ //unserialize to keep return parameter consistent.
+ wfSuppressWarnings();
+ $ret = unserialize( $metadata );
+ wfRestoreWarnings();
+ return $ret;
+ }
+ return $metadata;
+ }
+
+ /**
* Get a string describing the type of metadata, for display purposes.
+ *
+ * @return string
*/
function getMetadataType( $image ) { return false; }
/**
* Check if the metadata string is valid for this handler.
- * If it returns false, Image will reload the metadata from the file and update the database
+ * If it returns MediaHandler::METADATA_BAD (or false), Image
+ * will reload the metadata from the file and update the database.
+ * MediaHandler::METADATA_GOOD for if the metadata is a-ok,
+ * MediaHanlder::METADATA_COMPATIBLE if metadata is old but backwards
+ * compatible (which may or may not trigger a metadata reload).
*/
- function isMetadataValid( $image, $metadata ) { return true; }
+ function isMetadataValid( $image, $metadata ) {
+ return self::METADATA_GOOD;
+ }
/**
@@ -142,6 +208,18 @@ abstract class MediaHandler {
* @return array thumbnail extension and MIME type
*/
function getThumbType( $ext, $mime, $params = null ) {
+ $magic = MimeMagic::singleton();
+ if ( !$ext || $magic->isMatchingExtension( $ext, $mime ) === false ) {
+ // The extension is not valid for this mime type and we do
+ // recognize the mime type
+ $extensions = $magic->getExtensionsForType( $mime );
+ if ( $extensions ) {
+ return array( strtok( $extensions, ' ' ), $mime );
+ }
+ }
+
+ // The extension is correct (true) or the mime type is unknown to
+ // MediaWiki (null)
return array( $ext, $mime );
}
@@ -176,6 +254,8 @@ abstract class MediaHandler {
* Currently "width" and "height" are understood, but this might be
* expanded in the future.
* Returns false if unknown or if the document is not multi-page.
+ *
+ * @param $image File
*/
function getPageDimensions( $image, $page ) {
$gis = $this->getImageSize( $image, $image->getPath() );
@@ -213,7 +293,7 @@ abstract class MediaHandler {
*/
/**
- * FIXME: I don't really like this interface, it's not very flexible
+ * @todo FIXME: I don't really like this interface, it's not very flexible
* I think the media handler should generate HTML instead. It can do
* all the formatting according to some standard. That makes it possible
* to do things like visual indication of grouped and chained streams
@@ -223,22 +303,104 @@ abstract class MediaHandler {
return false;
}
+ /** sorts the visible/invisible field.
+ * Split off from ImageHandler::formatMetadata, as used by more than
+ * one type of handler.
+ *
+ * This is used by the media handlers that use the FormatMetadata class
+ *
+ * @param $metadataArray Array metadata array
+ * @return array for use displaying metadata.
+ */
+ function formatMetadataHelper( $metadataArray ) {
+ $result = array(
+ 'visible' => array(),
+ 'collapsed' => array()
+ );
+
+ $formatted = FormatMetadata::getFormattedData( $metadataArray );
+ // Sort fields into visible and collapsed
+ $visibleFields = $this->visibleMetadataFields();
+ foreach ( $formatted as $name => $value ) {
+ $tag = strtolower( $name );
+ self::addMeta( $result,
+ in_array( $tag, $visibleFields ) ? 'visible' : 'collapsed',
+ 'exif',
+ $tag,
+ $value
+ );
+ }
+ return $result;
+ }
+
+ /**
+ * Get a list of metadata items which should be displayed when
+ * the metadata table is collapsed.
+ *
+ * @return array of strings
+ * @access protected
+ */
+ function visibleMetadataFields() {
+ $fields = array();
+ $lines = explode( "\n", wfMsgForContent( 'metadata-fields' ) );
+ foreach( $lines as $line ) {
+ $matches = array();
+ if( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
+ $fields[] = $matches[1];
+ }
+ }
+ $fields = array_map( 'strtolower', $fields );
+ return $fields;
+ }
+
+
/**
- * @todo Fixme: document this!
- * 'value' thingy goes into a wikitext table; it used to be escaped but
- * that was incompatible with previous practice of customized display
+ * This is used to generate an array element for each metadata value
+ * That array is then used to generate the table of metadata values
+ * on the image page
+ *
+ * @param &$array Array An array containing elements for each type of visibility
+ * and each of those elements being an array of metadata items. This function adds
+ * a value to that array.
+ * @param $visbility string ('visible' or 'collapsed') if this value is hidden
+ * by default.
+ * @param $type String type of metadata tag (currently always 'exif')
+ * @param $id String the name of the metadata tag (like 'artist' for example).
+ * its name in the table displayed is the message "$type-$id" (Ex exif-artist ).
+ * @param $value String thingy goes into a wikitext table; it used to be escaped but
+ * that was incompatible with previous practise of customized display
* with wikitext formatting via messages such as 'exif-model-value'.
* So the escaping is taken back out, but generally this seems a confusing
* interface.
+ * @param $param String value to pass to the message for the name of the field
+ * as $1. Currently this parameter doesn't seem to ever be used.
+ *
+ * Note, everything here is passed through the parser later on (!)
*/
protected static function addMeta( &$array, $visibility, $type, $id, $value, $param = false ) {
+ $msgName = "$type-$id";
+ if ( wfEmptyMsg( $msgName ) ) {
+ // This is for future compatibility when using instant commons.
+ // So as to not display as ugly a name if a new metadata
+ // property is defined that we don't know about
+ // (not a major issue since such a property would be collapsed
+ // by default).
+ wfDebug( __METHOD__ . ' Unknown metadata name: ' . $id . "\n" );
+ $name = wfEscapeWikiText( $id );
+ } else {
+ $name = wfMsg( $msgName, $param );
+ }
$array[$visibility][] = array(
'id' => "$type-$id",
- 'name' => wfMsg( "$type-$id", $param ),
+ 'name' => $name,
'value' => $value
);
}
+ /**
+ * @param $file File
+ * @return string
+ */
function getShortDesc( $file ) {
global $wgLang;
$nbytes = wfMsgExt( 'nbytes', array( 'parsemag', 'escape' ),
@@ -246,14 +408,21 @@ abstract class MediaHandler {
return "$nbytes";
}
+ /**
+ * @param $file File
+ * @return string
+ */
function getLongDesc( $file ) {
- global $wgUser;
- $sk = $wgUser->getSkin();
+ global $wgLang;
return wfMsgExt( 'file-info', 'parseinline',
- $sk->formatSize( $file->getSize() ),
+ $wgLang->formatSize( $file->getSize() ),
$file->getMimeType() );
}
-
+
+ /**
+ * @param $file File
+ * @return string
+ */
static function getGeneralShortDesc( $file ) {
global $wgLang;
$nbytes = wfMsgExt( 'nbytes', array( 'parsemag', 'escape' ),
@@ -261,11 +430,14 @@ abstract class MediaHandler {
return "$nbytes";
}
+ /**
+ * @param $file File
+ * @return string
+ */
static function getGeneralLongDesc( $file ) {
- global $wgUser;
- $sk = $wgUser->getSkin();
+ global $wgLang;
return wfMsgExt( 'file-info', 'parseinline',
- $sk->formatSize( $file->getSize() ),
+ $wgLang->formatSize( $file->getSize() ),
$file->getMimeType() );
}
@@ -281,10 +453,10 @@ abstract class MediaHandler {
/**
* File validation hook called on upload.
*
- * If the file at the given local path is not valid, or its MIME type does not
+ * If the file at the given local path is not valid, or its MIME type does not
* match the handler class, a Status object should be returned containing
* relevant errors.
- *
+ *
* @param $fileName The local path to the file.
* @return Status object
*/
@@ -321,12 +493,13 @@ abstract class MediaHandler {
* @ingroup Media
*/
abstract class ImageHandler extends MediaHandler {
+
+ /**
+ * @param $file File
+ * @return bool
+ */
function canRender( $file ) {
- if ( $file->getWidth() && $file->getHeight() ) {
- return true;
- } else {
- return false;
- }
+ return ( $file->getWidth() && $file->getHeight() );
}
function getParamMap() {
@@ -371,6 +544,11 @@ abstract class ImageHandler extends MediaHandler {
return array( 'width' => $params['width'] );
}
+ /**
+ * @param $image File
+ * @param $params
+ * @return bool
+ */
function normaliseParams( $image, &$params ) {
$mimeType = $image->getMimeType();
@@ -392,13 +570,44 @@ abstract class ImageHandler extends MediaHandler {
$srcWidth = $image->getWidth( $params['page'] );
$srcHeight = $image->getHeight( $params['page'] );
+
if ( isset( $params['height'] ) && $params['height'] != -1 ) {
+ # Height & width were both set
if ( $params['width'] * $srcHeight > $params['height'] * $srcWidth ) {
+ # Height is the relative smaller dimension, so scale width accordingly
$params['width'] = wfFitBoxWidth( $srcWidth, $srcHeight, $params['height'] );
+
+ if ( $params['width'] == 0 ) {
+ # Very small image, so we need to rely on client side scaling :(
+ $params['width'] = 1;
+ }
+
+ $params['physicalWidth'] = $params['width'];
+ } else {
+ # Height was crap, unset it so that it will be calculated later
+ unset( $params['height'] );
}
}
- $params['height'] = File::scaleHeight( $srcWidth, $srcHeight, $params['width'] );
- if ( !$this->validateThumbParams( $params['width'], $params['height'], $srcWidth, $srcHeight, $mimeType ) ) {
+
+ if ( !isset( $params['physicalWidth'] ) ) {
+ # Passed all validations, so set the physicalWidth
+ $params['physicalWidth'] = $params['width'];
+ }
+
+ # Because thumbs are only referred to by width, the height always needs
+ # to be scaled by the width to keep the thumbnail sizes consistent,
+ # even if it was set inside the if block above
+ $params['physicalHeight'] = File::scaleHeight( $srcWidth, $srcHeight,
+ $params['physicalWidth'] );
+
+ # Set the height if it was not validated in the if block higher up
+ if ( !isset( $params['height'] ) || $params['height'] == -1 ) {
+ $params['height'] = $params['physicalHeight'];
+ }
+
+
+ if ( !$this->validateThumbParams( $params['physicalWidth'],
+ $params['physicalHeight'], $srcWidth, $srcHeight, $mimeType ) ) {
return false;
}
return true;
@@ -435,9 +644,19 @@ abstract class ImageHandler extends MediaHandler {
}
$height = File::scaleHeight( $srcWidth, $srcHeight, $width );
+ if ( $height == 0 ) {
+ # Force height to be at least 1 pixel
+ $height = 1;
+ }
return true;
}
+ /**
+ * @param $image File
+ * @param $script
+ * @param $params
+ * @return bool|ThumbnailImage
+ */
function getScriptedTransform( $image, $script, $params ) {
if ( !$this->normaliseParams( $image, $params ) ) {
return false;
@@ -461,6 +680,10 @@ abstract class ImageHandler extends MediaHandler {
return false;
}
+ /**
+ * @param $file File
+ * @return string
+ */
function getShortDesc( $file ) {
global $wgLang;
$nbytes = wfMsgExt( 'nbytes', array( 'parsemag', 'escape' ),
@@ -470,15 +693,34 @@ abstract class ImageHandler extends MediaHandler {
return "$widthheight ($nbytes)";
}
+ /**
+ * @param $file File
+ * @return string
+ */
function getLongDesc( $file ) {
global $wgLang;
- return wfMsgExt('file-info-size', 'parseinline',
- $wgLang->formatNum( $file->getWidth() ),
- $wgLang->formatNum( $file->getHeight() ),
- $wgLang->formatSize( $file->getSize() ),
- $file->getMimeType() );
+ $pages = $file->pageCount();
+ if ( $pages === false || $pages <= 1 ) {
+ $msg = wfMsgExt('file-info-size', 'parseinline',
+ $wgLang->formatNum( $file->getWidth() ),
+ $wgLang->formatNum( $file->getHeight() ),
+ $wgLang->formatSize( $file->getSize() ),
+ $file->getMimeType() );
+ } else {
+ $msg = wfMsgExt('file-info-size-pages', 'parseinline',
+ $wgLang->formatNum( $file->getWidth() ),
+ $wgLang->formatNum( $file->getHeight() ),
+ $wgLang->formatSize( $file->getSize() ),
+ $file->getMimeType(),
+ $wgLang->formatNum( $pages ) );
+ }
+ return $msg;
}
+ /**
+ * @param $file File
+ * @return string
+ */
function getDimensionsString( $file ) {
global $wgLang;
$pages = $file->pageCount();
diff --git a/includes/media/IPTC.php b/includes/media/IPTC.php
new file mode 100644
index 00000000..1d19791c
--- /dev/null
+++ b/includes/media/IPTC.php
@@ -0,0 +1,576 @@
+<?php
+/**
+*Class for some IPTC functions.
+
+*/
+class IPTC {
+
+ /**
+ * This takes the results of iptcparse() and puts it into a
+ * form that can be handled by mediawiki. Generally called from
+ * BitmapMetadataHandler::doApp13.
+ *
+ * @see http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
+ *
+ * @param $rawData String app13 block from jpeg containing iptc/iim data
+ * @return Array iptc metadata array
+ */
+ static function parse( $rawData ) {
+ $parsed = iptcparse( $rawData );
+ $data = Array();
+ if (!is_array($parsed)) {
+ return $data;
+ }
+
+ $c = '';
+ //charset info contained in tag 1:90.
+ if (isset($parsed['1#090']) && isset($parsed['1#090'][0])) {
+ $c = self::getCharset($parsed['1#090'][0]);
+ if ($c === false) {
+ //Unknown charset. refuse to parse.
+ //note: There is a different between
+ //unknown and no charset specified.
+ return array();
+ }
+ unset( $parsed['1#090'] );
+ }
+
+ foreach ( $parsed as $tag => $val ) {
+ if ( isset( $val[0] ) && trim($val[0]) == '' ) {
+ wfDebugLog('iptc', "IPTC tag $tag had only whitespace as its value.");
+ continue;
+ }
+ switch( $tag ) {
+ case '2#120': /*IPTC caption. mapped with exif ImageDescription*/
+ $data['ImageDescription'] = self::convIPTC( $val, $c );
+ break;
+ case '2#116': /* copyright. Mapped with exif copyright */
+ $data['Copyright'] = self::convIPTC( $val, $c );
+ break;
+ case '2#080': /* byline. Mapped with exif Artist */
+ /* merge with byline title (2:85)
+ * like how exif does it with
+ * Title, person. Not sure if this is best
+ * approach since we no longer have the two fields
+ * separate. each byline title entry corresponds to a
+ * specific byline. */
+
+ $bylines = self::convIPTC( $val, $c );
+ if ( isset( $parsed['2#085'] ) ) {
+ $titles = self::convIPTC( $parsed['2#085'], $c );
+ } else {
+ $titles = array();
+ }
+
+ for ( $i = 0; $i < count( $titles ); $i++ ) {
+ if ( isset( $bylines[$i] ) ) {
+ // theoretically this should always be set
+ // but doesn't hurt to be careful.
+ $bylines[$i] = $titles[$i] . ', ' . $bylines[$i];
+ }
+ }
+ $data['Artist'] = $bylines;
+ break;
+ case '2#025': /* keywords */
+ $data['Keywords'] = self::convIPTC( $val, $c );
+ break;
+ case '2#101': /* Country (shown)*/
+ $data['CountryDest'] = self::convIPTC( $val, $c );
+ break;
+ case '2#095': /* state/province (shown) */
+ $data['ProvinceOrStateDest'] = self::convIPTC( $val, $c );
+ break;
+ case '2#090': /* city (Shown) */
+ $data['CityDest'] = self::convIPTC( $val, $c );
+ break;
+ case '2#092': /* sublocation (shown) */
+ $data['SublocationDest'] = self::convIPTC( $val, $c );
+ break;
+ case '2#005': /* object name/title */
+ $data['ObjectName'] = self::convIPTC( $val, $c );
+ break;
+ case '2#040': /* special instructions */
+ $data['SpecialInstructions'] = self::convIPTC( $val, $c );
+ break;
+ case '2#105': /* headline*/
+ $data['Headline'] = self::convIPTC( $val, $c );
+ break;
+ case '2#110': /* credit */
+ /*"Identifies the provider of the objectdata,
+ * not necessarily the owner/creator". */
+ $data['Credit'] = self::convIPTC( $val, $c );
+ break;
+ case '2#115': /* source */
+ /* "Identifies the original owner of the intellectual content of the
+ *objectdata. This could be an agency, a member of an agency or
+ *an individual." */
+ $data['Source'] = self::convIPTC( $val, $c );
+ break;
+
+ case '2#007': /* edit status (lead, correction, etc) */
+ $data['EditStatus'] = self::convIPTC( $val, $c );
+ break;
+ case '2#015': /* category. deprecated. max 3 letters in theory, often more */
+ $data['iimCategory'] = self::convIPTC( $val, $c );
+ break;
+ case '2#020': /* category. deprecated. */
+ $data['iimSupplementalCategory'] = self::convIPTC( $val, $c );
+ break;
+ case '2#010': /*urgency (1-8. 1 most, 5 normal, 8 low priority)*/
+ $data['Urgency'] = self::convIPTC( $val, $c );
+ break;
+ case '2#022':
+ /* "Identifies objectdata that recurs often and predictably...
+ * Example: Euroweather" */
+ $data['FixtureIdentifier'] = self::convIPTC( $val, $c );
+ break;
+ case '2#026':
+ /* Content location code (iso 3166 + some custom things)
+ * ex: TUR (for turkey), XUN (for UN), XSP (outer space)
+ * See wikipedia article on iso 3166 and appendix D of iim std. */
+ $data['LocationDestCode'] = self::convIPTC( $val, $c );
+ break;
+ case '2#027':
+ /* Content location name. Full printable name
+ * of location of photo. */
+ $data['LocationDest'] = self::convIPTC( $val, $c );
+ break;
+ case '2#065':
+ /* Originating Program.
+ * Combine with Program version (2:70) if present.
+ */
+ $software = self::convIPTC( $val, $c );
+
+ if ( count( $software ) !== 1 ) {
+ //according to iim standard this cannot have multiple values
+ //so if there is more than one, something weird is happening,
+ //and we skip it.
+ wfDebugLog( 'iptc', 'IPTC: Wrong count on 2:65 Software field' );
+ break;
+ }
+
+ if ( isset( $parsed['2#070'] ) ) {
+ //if a version is set for the software.
+ $softwareVersion = self::convIPTC( $parsed['2#070'], $c );
+ unset($parsed['2#070']);
+ $data['Software'] = array( array( $software[0], $softwareVersion[0] ) );
+ } else {
+ $data['Software'] = $software;
+ }
+ break;
+ case '2#075':
+ /* Object cycle.
+ * a for morning (am), p for evening, b for both */
+ $data['ObjectCycle'] = self::convIPTC( $val, $c );
+ break;
+ case '2#100':
+ /* Country/Primary location code.
+ * "Indicates the code of the country/primary location where the
+ * intellectual property of the objectdata was created"
+ * unclear how this differs from 2#026
+ */
+ $data['CountryCodeDest'] = self::convIPTC( $val, $c );
+ break;
+ case '2#103':
+ /* original transmission ref.
+ * "A code representing the location of original transmission ac-
+ * cording to practises of the provider."
+ */
+ $data['OriginalTransmissionRef'] = self::convIPTC( $val, $c );
+ break;
+ case '2#118': /*contact*/
+ $data['Contact'] = self::convIPTC( $val, $c );
+ break;
+ case '2#122':
+ /* Writer/Editor
+ * "Identification of the name of the person involved in the writing,
+ * editing or correcting the objectdata or caption/abstract."
+ */
+ $data['Writer'] = self::convIPTC( $val, $c );
+ break;
+ case '2#135': /* lang code */
+ $data['LanguageCode'] = self::convIPTC( $val, $c );
+ break;
+
+ // Start date stuff.
+ // It doesn't accept incomplete dates even though they are valid
+ // according to spec.
+ // Should potentially store timezone as well.
+ case '2#055':
+ //Date created (not date digitized).
+ //Maps to exif DateTimeOriginal
+ if ( isset( $parsed['2#060'] ) ) {
+ $time = $parsed['2#060'];
+ } else {
+ $time = Array();
+ }
+ $timestamp = self::timeHelper( $val, $time, $c );
+ if ($timestamp) {
+ $data['DateTimeOriginal'] = $timestamp;
+ }
+ break;
+
+ case '2#062':
+ //Date converted to digital representation.
+ //Maps to exif DateTimeDigitized
+ if ( isset( $parsed['2#063'] ) ) {
+ $time = $parsed['2#063'];
+ } else {
+ $time = Array();
+ }
+ $timestamp = self::timeHelper( $val, $time, $c );
+ if ($timestamp) {
+ $data['DateTimeDigitized'] = $timestamp;
+ }
+ break;
+
+ case '2#030':
+ //Date released.
+ if ( isset( $parsed['2#035'] ) ) {
+ $time = $parsed['2#035'];
+ } else {
+ $time = Array();
+ }
+ $timestamp = self::timeHelper( $val, $time, $c );
+ if ($timestamp) {
+ $data['DateTimeReleased'] = $timestamp;
+ }
+ break;
+
+ case '2#037':
+ //Date expires.
+ if ( isset( $parsed['2#038'] ) ) {
+ $time = $parsed['2#038'];
+ } else {
+ $time = Array();
+ }
+ $timestamp = self::timeHelper( $val, $time, $c );
+ if ($timestamp) {
+ $data['DateTimeExpires'] = $timestamp;
+ }
+ break;
+
+ case '2#000': /* iim version */
+ // unlike other tags, this is a 2-byte binary number.
+ //technically this is required if there is iptc data
+ //but in practise it isn't always there.
+ if ( strlen( $val[0] ) == 2 ) {
+ //if is just to be paranoid.
+ $versionValue = ord( substr( $val[0], 0, 1 ) ) * 256;
+ $versionValue += ord( substr( $val[0], 1, 1 ) );
+ $data['iimVersion'] = $versionValue;
+ }
+ break;
+
+ case '2#004':
+ // IntellectualGenere.
+ // first 4 characters are an id code
+ // That we're not really interested in.
+
+ // This prop is weird, since it's
+ // allowed to have multiple values
+ // in iim 4.1, but not in the XMP
+ // stuff. We're going to just
+ // extract the first value.
+ $con = self::ConvIPTC( $val, $c );
+ if ( strlen( $con[0] ) < 5 ) {
+ wfDebugLog( 'iptc', 'IPTC: '
+ . '2:04 too short. '
+ . 'Ignoring.' );
+ break;
+ }
+ $extracted = substr( $con[0], 4 );
+ $data['IntellectualGenre'] = $extracted;
+ break;
+
+ case '2#012':
+ // Subject News code - this is a compound field
+ // at the moment we only extract the subject news
+ // code, which is an 8 digit (ascii) number
+ // describing the subject matter of the content.
+ $codes = self::convIPTC( $val, $c );
+ foreach ( $codes as $ic ) {
+ $fields = explode(':', $ic, 3 );
+
+ if ( count( $fields ) < 2 ||
+ $fields[0] !== 'IPTC' )
+ {
+ wfDebugLog( 'IPTC', 'IPTC: '
+ . 'Invalid 2:12 - ' . $ic );
+ break;
+ }
+ $data['SubjectNewsCode'] = $fields[1];
+ }
+ break;
+
+ // purposely does not do 2:125, 2:130, 2:131,
+ // 2:47, 2:50, 2:45, 2:42, 2:8, 2:3
+ // 2:200, 2:201, 2:202
+ // or the audio stuff (2:150 to 2:154)
+
+ case '2#070':
+ case '2#060':
+ case '2#063':
+ case '2#085':
+ case '2#038':
+ case '2#035':
+ //ignore. Handled elsewhere.
+ break;
+
+ default:
+ wfDebugLog( 'iptc', "Unsupported iptc tag: $tag. Value: " . implode( ',', $val ));
+ break;
+ }
+
+ }
+ return $data;
+ }
+
+ /**
+ * Convert an iptc date and time tags into the exif format
+ *
+ * @todo Potentially this should also capture the timezone offset.
+ * @param Array $date The date tag
+ * @param Array $time The time tag
+ * @param $c
+ * @return String Date in exif format.
+ */
+ private static function timeHelper( $date, $time, $c ) {
+ if ( count( $date ) === 1 ) {
+ //the standard says this should always be 1
+ //just double checking.
+ list($date) = self::convIPTC( $date, $c );
+ } else {
+ return null;
+ }
+
+ if ( count( $time ) === 1 ) {
+ list($time) = self::convIPTC( $time, $c );
+ $dateOnly = false;
+ } else {
+ $time = '000000+0000'; //placeholder
+ $dateOnly = true;
+ }
+
+ if ( ! ( preg_match('/\d\d\d\d\d\d[-+]\d\d\d\d/', $time)
+ && preg_match('/\d\d\d\d\d\d\d\d/', $date)
+ && substr($date, 0, 4) !== '0000'
+ && substr($date, 4, 2) !== '00'
+ && substr($date, 6, 2) !== '00'
+ ) ) {
+ //something wrong.
+ // Note, this rejects some valid dates according to iptc spec
+ // for example: the date 00000400 means the photo was taken in
+ // April, but the year and day is unknown. We don't process these
+ // types of incomplete dates atm.
+ wfDebugLog( 'iptc', "IPTC: invalid time ( $time ) or date ( $date )");
+ return null;
+ }
+
+ $unixTS = wfTimestamp( TS_UNIX, $date . substr( $time, 0, 6 ));
+ if ( $unixTS === false ) {
+ wfDebugLog( 'iptc', "IPTC: can't convert date to TS_UNIX: $date $time." );
+ return null;
+ }
+
+ $tz = ( intval( substr( $time, 7, 2 ) ) *60*60 )
+ + ( intval( substr( $time, 9, 2 ) ) * 60 );
+
+ if ( substr( $time, 6, 1 ) === '-' ) {
+ $tz = - $tz;
+ }
+
+ $finalTimestamp = wfTimestamp( TS_EXIF, $unixTS + $tz );
+ if ( $finalTimestamp === false ) {
+ wfDebugLog( 'iptc', "IPTC: can't make final timestamp. Date: " . ( $unixTS + $tz ) );
+ return null;
+ }
+ if ( $dateOnly ) {
+ //return the date only
+ return substr( $finalTimestamp, 0, 10 );
+ } else {
+ return $finalTimestamp;
+ }
+ }
+
+ /**
+ * Helper function to convert charset for iptc values.
+ * @param $data Mixed String or Array: The iptc string
+ * @param $charset String: The charset
+ *
+ * @return string
+ */
+ private static function convIPTC ( $data, $charset ) {
+ if ( is_array( $data ) ) {
+ foreach ($data as &$val) {
+ $val = self::convIPTCHelper( $val, $charset );
+ }
+ } else {
+ $data = self::convIPTCHelper( $data, $charset );
+ }
+
+ return $data;
+ }
+ /**
+ * Helper function of a helper function to convert charset for iptc values.
+ * @param $data Mixed String or Array: The iptc string
+ * @param $charset String: The charset
+ *
+ * @return string
+ */
+ private static function convIPTCHelper ( $data, $charset ) {
+ if ( $charset ) {
+ wfSuppressWarnings();
+ $data = iconv($charset, "UTF-8//IGNORE", $data);
+ wfRestoreWarnings();
+ if ($data === false) {
+ $data = "";
+ wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset $charset to utf-8");
+ }
+ } else {
+ //treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252
+ // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8
+ $oldData = $data;
+ UtfNormal::quickIsNFCVerify( $data ); //make $data valid utf-8
+ if ($data === $oldData) {
+ return $data; //if validation didn't change $data
+ } else {
+ return self::convIPTCHelper( $oldData, 'Windows-1252' );
+ }
+ }
+ return trim( $data );
+ }
+
+ /**
+ * take the value of 1:90 tag and returns a charset
+ * @param String $tag 1:90 tag.
+ * @return string charset name or "?"
+ * Warning, this function does not (and is not intended to) detect
+ * all iso 2022 escape codes. In practise, the code for utf-8 is the
+ * only code that seems to have wide use. It does detect that code.
+ */
+ static function getCharset($tag) {
+
+ //According to iim standard, charset is defined by the tag 1:90.
+ //in which there are iso 2022 escape sequences to specify the character set.
+ //the iim standard seems to encourage that all necessary escape sequences are
+ //in the 1:90 tag, but says it doesn't have to be.
+
+ //This is in need of more testing probably. This is definitely not complete.
+ //however reading the docs of some other iptc software, it appears that most iptc software
+ //only recognizes utf-8. If 1:90 tag is not present content is
+ // usually ascii or iso-8859-1 (and sometimes utf-8), but no guarantee.
+
+ //This also won't work if there are more than one escape sequence in the 1:90 tag
+ //or if something is put in the G2, or G3 charsets, etc. It will only reliably recognize utf-8.
+
+ // This is just going through the charsets mentioned in appendix C of the iim standard.
+
+ // \x1b = ESC.
+ switch ( $tag ) {
+ case "\x1b%G": //utf-8
+ //Also call things that are compatible with utf-8, utf-8 (e.g. ascii)
+ case "\x1b(B": // ascii
+ case "\x1b(@": // iso-646-IRV (ascii in latest version, $ different in older version)
+ $c = 'UTF-8';
+ break;
+ case "\x1b(A": //like ascii, but british.
+ $c = 'ISO646-GB';
+ break;
+ case "\x1b(C": //some obscure sweedish/finland encoding
+ $c = 'ISO-IR-8-1';
+ break;
+ case "\x1b(D":
+ $c = 'ISO-IR-8-2';
+ break;
+ case "\x1b(E": //some obscure danish/norway encoding
+ $c = 'ISO-IR-9-1';
+ break;
+ case "\x1b(F":
+ $c = 'ISO-IR-9-2';
+ break;
+ case "\x1b(G":
+ $c = 'SEN_850200_B'; // aka iso 646-SE; ascii-like
+ break;
+ case "\x1b(I":
+ $c = "ISO646-IT";
+ break;
+ case "\x1b(L":
+ $c = "ISO646-PT";
+ break;
+ case "\x1b(Z":
+ $c = "ISO646-ES";
+ break;
+ case "\x1b([":
+ $c = "GREEK7-OLD";
+ break;
+ case "\x1b(K":
+ $c = "ISO646-DE";
+ break;
+ case "\x1b(N": //crylic
+ $c = "ISO_5427";
+ break;
+ case "\x1b(`": //iso646-NO
+ $c = "NS_4551-1";
+ break;
+ case "\x1b(f": //iso646-FR
+ $c = "NF_Z_62-010";
+ break;
+ case "\x1b(g":
+ $c = "PT2"; //iso646-PT2
+ break;
+ case "\x1b(h":
+ $c = "ES2";
+ break;
+ case "\x1b(i": //iso646-HU
+ $c = "MSZ_7795.3";
+ break;
+ case "\x1b(w":
+ $c = "CSA_Z243.4-1985-1";
+ break;
+ case "\x1b(x":
+ $c = "CSA_Z243.4-1985-2";
+ break;
+ case "\x1b\$(B":
+ case "\x1b\$B":
+ case "\x1b&@\x1b\$B":
+ case "\x1b&@\x1b\$(B":
+ $c = "JIS_C6226-1983";
+ break;
+ case "\x1b-A": // iso-8859-1. at least for the high code characters.
+ case "\x1b(@\x1b-A":
+ case "\x1b(B\x1b-A":
+ $c = 'ISO-8859-1';
+ break;
+ case "\x1b-B": // iso-8859-2. at least for the high code characters.
+ $c = 'ISO-8859-2';
+ break;
+ case "\x1b-C": // iso-8859-3. at least for the high code characters.
+ $c = 'ISO-8859-3';
+ break;
+ case "\x1b-D": // iso-8859-4. at least for the high code characters.
+ $c = 'ISO-8859-4';
+ break;
+ case "\x1b-E": // iso-8859-5. at least for the high code characters.
+ $c = 'ISO-8859-5';
+ break;
+ case "\x1b-F": // iso-8859-6. at least for the high code characters.
+ $c = 'ISO-8859-6';
+ break;
+ case "\x1b-G": // iso-8859-7. at least for the high code characters.
+ $c = 'ISO-8859-7';
+ break;
+ case "\x1b-H": // iso-8859-8. at least for the high code characters.
+ $c = 'ISO-8859-8';
+ break;
+ case "\x1b-I": // CSN_369103. at least for the high code characters.
+ $c = 'CSN_369103';
+ break;
+ default:
+ wfDebugLog('iptc', __METHOD__ . 'Unknown charset in iptc 1:90: ' . bin2hex( $tag ) );
+ //at this point just give up and refuse to parse iptc?
+ $c = false;
+ }
+ return $c;
+ }
+}
diff --git a/includes/media/Jpeg.php b/includes/media/Jpeg.php
new file mode 100644
index 00000000..7033409b
--- /dev/null
+++ b/includes/media/Jpeg.php
@@ -0,0 +1,46 @@
+<?php
+/**
+ * @file
+ * @ingroup Media
+ */
+
+/**
+ * JPEG specific handler.
+ * Inherits most stuff from BitmapHandler, just here to do the metadata handler differently.
+ *
+ * Metadata stuff common to Jpeg and built-in Tiff (not PagedTiffHandler) is
+ * in ExifBitmapHandler.
+ *
+ * @ingroup Media
+ */
+class JpegHandler extends ExifBitmapHandler {
+
+ function getMetadata ( $image, $filename ) {
+ try {
+ $meta = BitmapMetadataHandler::Jpeg( $filename );
+ if ( !is_array( $meta ) ) {
+ // This should never happen, but doesn't hurt to be paranoid.
+ throw new MWException('Metadata array is not an array');
+ }
+ $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
+ return serialize( $meta );
+ }
+ catch ( MWException $e ) {
+ // BitmapMetadataHandler throws an exception in certain exceptional cases like if file does not exist.
+ wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
+
+ /* This used to use 0 (ExifBitmapHandler::OLD_BROKEN_FILE) for the cases
+ * * No metadata in the file
+ * * Something is broken in the file.
+ * However, if the metadata support gets expanded then you can't tell if the 0 is from
+ * a broken file, or just no props found. A broken file is likely to stay broken, but
+ * a file which had no props could have props once the metadata support is improved.
+ * Thus switch to using -1 to denote only a broken file, and use an array with only
+ * MEDIAWIKI_EXIF_VERSION to denote no props.
+ */
+ return ExifBitmapHandler::BROKEN_FILE;
+ }
+ }
+
+}
+
diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php
new file mode 100644
index 00000000..4769bf8e
--- /dev/null
+++ b/includes/media/JpegMetadataExtractor.php
@@ -0,0 +1,252 @@
+<?php
+/**
+* Class for reading jpegs and extracting metadata.
+* see also BitmapMetadataHandler.
+*
+* Based somewhat on GIFMetadataExtrator.
+*/
+class JpegMetadataExtractor {
+
+ const MAX_JPEG_SEGMENTS = 200;
+ // the max segment is a sanity check.
+ // A jpeg file should never even remotely have
+ // that many segments. Your average file has about 10.
+
+ /** Function to extract metadata segments of interest from jpeg files
+ * based on GIFMetadataExtractor.
+ *
+ * we can almost use getimagesize to do this
+ * but gis doesn't support having multiple app1 segments
+ * and those can't extract xmp on files containing both exif and xmp data
+ *
+ * @param String $filename name of jpeg file
+ * @return Array of interesting segments.
+ * @throws MWException if given invalid file.
+ */
+ static function segmentSplitter ( $filename ) {
+ $showXMP = function_exists( 'xml_parser_create_ns' );
+
+ $segmentCount = 0;
+
+ $segments = array(
+ 'XMP_ext' => array(),
+ 'COM' => array(),
+ );
+
+ if ( !$filename ) {
+ throw new MWException( "No filename specified for " . __METHOD__ );
+ }
+ if ( !file_exists( $filename ) || is_dir( $filename ) ) {
+ throw new MWException( "Invalid file $filename passed to " . __METHOD__ );
+ }
+
+ $fh = fopen( $filename, "rb" );
+
+ if ( !$fh ) {
+ throw new MWException( "Could not open file $filename" );
+ }
+
+ $buffer = fread( $fh, 2 );
+ if ( $buffer !== "\xFF\xD8" ) {
+ throw new MWException( "Not a jpeg, no SOI" );
+ }
+ while ( !feof( $fh ) ) {
+ $buffer = fread( $fh, 1 );
+ $segmentCount++;
+ if ( $segmentCount > self::MAX_JPEG_SEGMENTS ) {
+ // this is just a sanity check
+ throw new MWException( 'Too many jpeg segments. Aborting' );
+ }
+ if ( $buffer !== "\xFF" ) {
+ throw new MWException( "Error reading jpeg file marker. Expected 0xFF but got " . bin2hex( $buffer ) );
+ }
+
+ $buffer = fread( $fh, 1 );
+ while( $buffer === "\xFF" && !feof( $fh ) ) {
+ // Skip through any 0xFF padding bytes.
+ $buffer = fread( $fh, 1 );
+ }
+ if ( $buffer === "\xFE" ) {
+
+ // COM section -- file comment
+ // First see if valid utf-8,
+ // if not try to convert it to windows-1252.
+ $com = $oldCom = trim( self::jpegExtractMarker( $fh ) );
+ UtfNormal::quickIsNFCVerify( $com );
+ // turns $com to valid utf-8.
+ // thus if no change, its utf-8, otherwise its something else.
+ if ( $com !== $oldCom ) {
+ wfSuppressWarnings();
+ $com = $oldCom = iconv( 'windows-1252', 'UTF-8//IGNORE', $oldCom );
+ wfRestoreWarnings();
+ }
+ // Try it again, if its still not a valid string, then probably
+ // binary junk or some really weird encoding, so don't extract.
+ UtfNormal::quickIsNFCVerify( $com );
+ if ( $com === $oldCom ) {
+ $segments["COM"][] = $oldCom;
+ } else {
+ wfDebug( __METHOD__ . ' Ignoring JPEG comment as is garbage.' );
+ }
+
+ } elseif ( $buffer === "\xE1" ) {
+ // APP1 section (Exif, XMP, and XMP extended)
+ // only extract if XMP is enabled.
+ $temp = self::jpegExtractMarker( $fh );
+ // check what type of app segment this is.
+ if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" && $showXMP ) {
+ $segments["XMP"] = substr( $temp, 29 );
+ } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" && $showXMP ) {
+ $segments["XMP_ext"][] = substr( $temp, 35 );
+ } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) {
+ // Some images (especially flickr images) seem to have this.
+ // I really have no idea what the deal is with them, but
+ // whatever...
+ $segments["XMP"] = substr( $temp, 29 );
+ wfDebug( __METHOD__ . ' Found XMP section with wrong app identifier '
+ . "Using anyways.\n" );
+ } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0" ) {
+ // Just need to find out what the byte order is.
+ // because php's exif plugin sucks...
+ // This is a II for little Endian, MM for big. Not a unicode BOM.
+ $byteOrderMarker = substr( $temp, 6, 2 );
+ if ( $byteOrderMarker === 'MM' ) {
+ $segments['byteOrder'] = 'BE';
+ } elseif ( $byteOrderMarker === 'II' ) {
+ $segments['byteOrder'] = 'LE';
+ } else {
+ wfDebug( __METHOD__ . ' Invalid byte ordering?!' );
+ }
+ }
+ } elseif ( $buffer === "\xED" ) {
+ // APP13 - PSIR. IPTC and some photoshop stuff
+ $temp = self::jpegExtractMarker( $fh );
+ if ( substr( $temp, 0, 14 ) === "Photoshop 3.0\x00" ) {
+ $segments["PSIR"] = $temp;
+ }
+ } elseif ( $buffer === "\xD9" || $buffer === "\xDA" ) {
+ // EOI - end of image or SOS - start of scan. either way we're past any interesting segments
+ return $segments;
+ } else {
+ // segment we don't care about, so skip
+ $size = wfUnpack( "nint", fread( $fh, 2 ), 2 );
+ if ( $size['int'] <= 2 ) throw new MWException( "invalid marker size in jpeg" );
+ fseek( $fh, $size['int'] - 2, SEEK_CUR );
+ }
+
+ }
+ // shouldn't get here.
+ throw new MWException( "Reached end of jpeg file unexpectedly" );
+ }
+
+ /**
+ * Helper function for jpegSegmentSplitter
+ * @param &$fh FileHandle for jpeg file
+ * @return data content of segment.
+ */
+ private static function jpegExtractMarker( &$fh ) {
+ $size = wfUnpack( "nint", fread( $fh, 2 ), 2 );
+ if ( $size['int'] <= 2 ) throw new MWException( "invalid marker size in jpeg" );
+ $segment = fread( $fh, $size['int'] - 2 );
+ if ( strlen( $segment ) !== $size['int'] - 2 ) throw new MWException( "Segment shorter than expected" );
+ return $segment;
+ }
+
+ /**
+ * This reads the photoshop image resource.
+ * Currently it only compares the iptc/iim hash
+ * with the stored hash, which is used to determine the precedence
+ * of the iptc data. In future it may extract some other info, like
+ * url of copyright license.
+ *
+ * This should generally be called by BitmapMetadataHandler::doApp13()
+ *
+ * @param String $app13 photoshop psir app13 block from jpg.
+ * @return String if the iptc hash is good or not.
+ */
+ public static function doPSIR ( $app13 ) {
+ if ( !$app13 ) {
+ return;
+ }
+ // First compare hash with real thing
+ // 0x404 contains IPTC, 0x425 has hash
+ // This is used to determine if the iptc is newer than
+ // the xmp data, as xmp programs update the hash,
+ // where non-xmp programs don't.
+
+ $offset = 14; // skip past PHOTOSHOP 3.0 identifier. should already be checked.
+ $appLen = strlen( $app13 );
+ $realHash = "";
+ $recordedHash = "";
+
+ // the +12 is the length of an empty item.
+ while ( $offset + 12 <= $appLen ) {
+ $valid = true;
+ if ( substr( $app13, $offset, 4 ) !== '8BIM' ) {
+ // its supposed to be 8BIM
+ // but apparently sometimes isn't esp. in
+ // really old jpg's
+ $valid = false;
+ }
+ $offset += 4;
+ $id = substr( $app13, $offset, 2 );
+ // id is a 2 byte id number which identifies
+ // the piece of info this record contains.
+
+ $offset += 2;
+
+ // some record types can contain a name, which
+ // is a pascal string 0-padded to be an even
+ // number of bytes. Most times (and any time
+ // we care) this is empty, making it two null bytes.
+
+ $lenName = ord( substr( $app13, $offset, 1 ) ) + 1;
+ // we never use the name so skip it. +1 for length byte
+ if ( $lenName % 2 == 1 ) {
+ $lenName++;
+ } // pad to even.
+ $offset += $lenName;
+
+ // now length of data (unsigned long big endian)
+ $lenData = wfUnpack( 'Nlen', substr( $app13, $offset, 4 ), 4 );
+ // PHP can take issue with very large unsigned ints and make them negative.
+ // Which should never ever happen, as this has to be inside a segment
+ // which is limited to a 16 bit number.
+ if ( $lenData['len'] < 0 ) throw new MWException( "Too big PSIR (" . $lenData['len'] . ')' );
+
+ $offset += 4; // 4bytes length field;
+
+ // this should not happen, but check.
+ if ( $lenData['len'] + $offset > $appLen ) {
+ wfDebug( __METHOD__ . " PSIR data too long.\n" );
+ return 'iptc-no-hash';
+ }
+
+ if ( $valid ) {
+ switch ( $id ) {
+ case "\x04\x04":
+ // IPTC block
+ $realHash = md5( substr( $app13, $offset, $lenData['len'] ), true );
+ break;
+ case "\x04\x25":
+ $recordedHash = substr( $app13, $offset, $lenData['len'] );
+ break;
+ }
+ }
+
+ // if odd, add 1 to length to account for
+ // null pad byte.
+ if ( $lenData['len'] % 2 == 1 ) $lenData['len']++;
+ $offset += $lenData['len'];
+
+ }
+
+ if ( !$realHash || !$recordedHash ) {
+ return 'iptc-no-hash';
+ } elseif ( $realHash === $recordedHash ) {
+ return 'iptc-good-hash';
+ } else { /*$realHash !== $recordedHash */
+ return 'iptc-bad-hash';
+ }
+ }
+}
diff --git a/includes/media/MediaTransformOutput.php b/includes/media/MediaTransformOutput.php
index c441f06c..f170bb9d 100644
--- a/includes/media/MediaTransformOutput.php
+++ b/includes/media/MediaTransformOutput.php
@@ -12,7 +12,12 @@
* @ingroup Media
*/
abstract class MediaTransformOutput {
- var $file, $width, $height, $url, $page, $path;
+ /**
+ * @var File
+ */
+ var $file;
+
+ var $width, $height, $url, $page, $path;
/**
* Get the width of the output box
@@ -45,7 +50,7 @@ abstract class MediaTransformOutput {
/**
* Fetch HTML for this transform output
*
- * @param $options Associative array of options. Boolean options
+ * @param $options array Associative array of options. Boolean options
* should be indicated with a value of true for true, and false or
* absent for false.
*
@@ -73,6 +78,11 @@ abstract class MediaTransformOutput {
/**
* Wrap some XHTML text in an anchor tag with the given attributes
+ *
+ * @param $linkAttribs array
+ * @param $contents string
+ *
+ * @return string
*/
protected function linkWrap( $linkAttribs, $contents ) {
if ( $linkAttribs ) {
@@ -82,6 +92,11 @@ abstract class MediaTransformOutput {
}
}
+ /**
+ * @param $title string
+ * @param $params array
+ * @return array
+ */
function getDescLinkAttribs( $title = null, $params = '' ) {
$query = $this->page ? ( 'page=' . urlencode( $this->page ) ) : '';
if( $params ) {
@@ -98,7 +113,6 @@ abstract class MediaTransformOutput {
}
}
-
/**
* Media transform output for images
*
@@ -131,7 +145,7 @@ class ThumbnailImage extends MediaTransformOutput {
* Return HTML <img ... /> tag for the thumbnail, will include
* width and height attributes and a blank alt text (as required).
*
- * @param $options Associative array of options. Boolean options
+ * @param $options array Associative array of options. Boolean options
* should be indicated with a value of true for true, and false or
* absent for false.
*
@@ -212,8 +226,8 @@ class MediaTransformError extends MediaTransformOutput {
$htmlArgs = array_map( 'htmlspecialchars', $args );
$htmlArgs = array_map( 'nl2br', $htmlArgs );
- $this->htmlMsg = wfMsgReplaceArgs( htmlspecialchars( wfMsgGetKey( $msg, true ) ), $htmlArgs );
- $this->textMsg = wfMsgReal( $msg, $args );
+ $this->htmlMsg = wfMessage( $msg )->rawParams( $htmlArgs )->escaped();
+ $this->textMsg = wfMessage( $msg )->rawParams( $htmlArgs )->text();
$this->width = intval( $width );
$this->height = intval( $height );
$this->url = false;
diff --git a/includes/media/PNG.php b/includes/media/PNG.php
index 5197282c..8fe9ecb4 100644
--- a/includes/media/PNG.php
+++ b/includes/media/PNG.php
@@ -12,26 +12,51 @@
* @ingroup Media
*/
class PNGHandler extends BitmapHandler {
-
+
+ const BROKEN_FILE = '0';
+
+ /**
+ * @param File $image
+ * @param string $filename
+ * @return string
+ */
function getMetadata( $image, $filename ) {
- if ( !isset($image->parsedPNGMetadata) ) {
- try {
- $image->parsedPNGMetadata = PNGMetadataExtractor::getMetadata( $filename );
- } catch( Exception $e ) {
- // Broken file?
- wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
- return '0';
- }
+ try {
+ $metadata = BitmapMetadataHandler::PNG( $filename );
+ } catch( Exception $e ) {
+ // Broken file?
+ wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
+ return self::BROKEN_FILE;
}
- return serialize($image->parsedPNGMetadata);
-
+ return serialize($metadata);
}
-
+
+ /**
+ * @param $image File
+ * @return array|bool
+ */
function formatMetadata( $image ) {
- return false;
+ $meta = $image->getMetadata();
+
+ if ( !$meta ) {
+ return false;
+ }
+ $meta = unserialize( $meta );
+ if ( !isset( $meta['metadata'] ) || count( $meta['metadata'] ) <= 1 ) {
+ return false;
+ }
+
+ if ( isset( $meta['metadata']['_MW_PNG_VERSION'] ) ) {
+ unset( $meta['metadata']['_MW_PNG_VERSION'] );
+ }
+ return $this->formatMetadataHelper( $meta['metadata'] );
}
-
+
+ /**
+ * @param $image File
+ * @return bool
+ */
function isAnimatedImage( $image ) {
$ser = $image->getMetadata();
if ($ser) {
@@ -46,11 +71,33 @@ class PNGHandler extends BitmapHandler {
}
function isMetadataValid( $image, $metadata ) {
+
+ if ( $metadata === self::BROKEN_FILE ) {
+ // Do not repetitivly regenerate metadata on broken file.
+ return self::METADATA_GOOD;
+ }
+
wfSuppressWarnings();
$data = unserialize( $metadata );
wfRestoreWarnings();
- return (boolean) $data;
+
+ if ( !$data || !is_array( $data ) ) {
+ wfDebug(__METHOD__ . ' invalid png metadata' );
+ return self::METADATA_BAD;
+ }
+
+ if ( !isset( $data['metadata']['_MW_PNG_VERSION'] )
+ || $data['metadata']['_MW_PNG_VERSION'] != PNGMetadataExtractor::VERSION ) {
+ wfDebug(__METHOD__ . ' old but compatible png metadata' );
+ return self::METADATA_COMPATIBLE;
+ }
+ return self::METADATA_GOOD;
}
+
+ /**
+ * @param $image File
+ * @return string
+ */
function getLongDesc( $image ) {
global $wgLang;
$original = parent::getLongDesc( $image );
@@ -65,16 +112,19 @@ class PNGHandler extends BitmapHandler {
$info = array();
$info[] = $original;
- if ($metadata['loopCount'] == 0)
+ if ( $metadata['loopCount'] == 0 ) {
$info[] = wfMsgExt( 'file-info-png-looped', 'parseinline' );
- elseif ($metadata['loopCount'] > 1)
+ } elseif ( $metadata['loopCount'] > 1 ) {
$info[] = wfMsgExt( 'file-info-png-repeat', 'parseinline', $metadata['loopCount'] );
+ }
- if ($metadata['frameCount'] > 0)
+ if ( $metadata['frameCount'] > 0 ) {
$info[] = wfMsgExt( 'file-info-png-frames', 'parseinline', $metadata['frameCount'] );
+ }
- if ($metadata['duration'])
+ if ( $metadata['duration'] ) {
$info[] = $wgLang->formatTimePeriod( $metadata['duration'] );
+ }
return $wgLang->commaList( $info );
}
diff --git a/includes/media/PNGMetadataExtractor.php b/includes/media/PNGMetadataExtractor.php
index 6a931e6c..d3c44d4f 100644
--- a/includes/media/PNGMetadataExtractor.php
+++ b/includes/media/PNGMetadataExtractor.php
@@ -1,6 +1,6 @@
<?php
/**
- * PNG frame counter.
+ * PNG frame counter and metadata extractor.
* Slightly derived from GIFMetadataExtractor.php
* Deliberately not using MWExceptions to avoid external dependencies, encouraging
* redistribution.
@@ -17,26 +17,61 @@
class PNGMetadataExtractor {
static $png_sig;
static $CRC_size;
+ static $text_chunks;
+
+ const VERSION = 1;
+ const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
static function getMetadata( $filename ) {
self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
self::$CRC_size = 4;
-
+ /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
+ * and http://www.w3.org/TR/PNG/#11keywords
+ */
+ self::$text_chunks = array(
+ 'xml:com.adobe.xmp' => 'xmp',
+ # Artist is unofficial. Author is the recommended
+ # keyword in the PNG spec. However some people output
+ # Artist so support both.
+ 'artist' => 'Artist',
+ 'model' => 'Model',
+ 'make' => 'Make',
+ 'author' => 'Artist',
+ 'comment' => 'PNGFileComment',
+ 'description' => 'ImageDescription',
+ 'title' => 'ObjectName',
+ 'copyright' => 'Copyright',
+ # Source as in original device used to make image
+ # not as in who gave you the image
+ 'source' => 'Model',
+ 'software' => 'Software',
+ 'disclaimer' => 'Disclaimer',
+ 'warning' => 'ContentWarning',
+ 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
+ 'label' => 'Label',
+ 'creation time' => 'DateTimeDigitized',
+ /* Other potentially useful things - Document */
+ );
+
$frameCount = 0;
$loopCount = 1;
+ $text = array();
$duration = 0.0;
+ $bitDepth = 0;
+ $colorType = 'unknown';
- if (!$filename)
+ if ( !$filename ) {
throw new Exception( __METHOD__ . ": No file name specified" );
- elseif ( !file_exists($filename) || is_dir($filename) )
+ } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
throw new Exception( __METHOD__ . ": File $filename does not exist" );
-
- $fh = fopen( $filename, 'r' );
-
- if (!$fh) {
+ }
+
+ $fh = fopen( $filename, 'rb' );
+
+ if ( !$fh ) {
throw new Exception( __METHOD__ . ": Unable to open file $filename" );
}
-
+
// Check for the PNG header
$buf = fread( $fh, 8 );
if ( $buf != self::$png_sig ) {
@@ -44,22 +79,54 @@ class PNGMetadataExtractor {
}
// Read chunks
- while( !feof( $fh ) ) {
+ while ( !feof( $fh ) ) {
$buf = fread( $fh, 4 );
- if( !$buf ) {
+ if ( !$buf || strlen( $buf ) < 4 ) {
throw new Exception( __METHOD__ . ": Read error" );
}
- $chunk_size = unpack( "N", $buf);
+ $chunk_size = unpack( "N", $buf );
$chunk_size = $chunk_size[1];
+ if ( $chunk_size < 0 ) {
+ throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
+ }
+
$chunk_type = fread( $fh, 4 );
- if( !$chunk_type ) {
+ if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
throw new Exception( __METHOD__ . ": Read error" );
}
- if ( $chunk_type == "acTL" ) {
+ if ( $chunk_type == "IHDR" ) {
+ $buf = self::read( $fh, $chunk_size );
+ if ( !$buf || strlen( $buf ) < $chunk_size ) {
+ throw new Exception( __METHOD__ . ": Read error" );
+ }
+ $bitDepth = ord( substr( $buf, 8, 1 ) );
+ // Detect the color type in British English as per the spec
+ // http://www.w3.org/TR/PNG/#11IHDR
+ switch ( ord( substr( $buf, 9, 1 ) ) ) {
+ case 0:
+ $colorType = 'greyscale';
+ break;
+ case 2:
+ $colorType = 'truecolour';
+ break;
+ case 3:
+ $colorType = 'index-coloured';
+ break;
+ case 4:
+ $colorType = 'greyscale-alpha';
+ break;
+ case 6:
+ $colorType = 'truecolour-alpha';
+ break;
+ default:
+ $colorType = 'unknown';
+ break;
+ }
+ } elseif ( $chunk_type == "acTL" ) {
$buf = fread( $fh, $chunk_size );
- if( !$buf ) {
+ if( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
throw new Exception( __METHOD__ . ": Read error" );
}
@@ -67,20 +134,216 @@ class PNGMetadataExtractor {
$frameCount = $actl['frames'];
$loopCount = $actl['plays'];
} elseif ( $chunk_type == "fcTL" ) {
- $buf = fread( $fh, $chunk_size );
- if( !$buf ) {
+ $buf = self::read( $fh, $chunk_size );
+ if ( !$buf || strlen( $buf ) < $chunk_size ) {
+ throw new Exception( __METHOD__ . ": Read error" );
+ }
+ $buf = substr( $buf, 20 );
+ if ( strlen( $buf ) < 4 ) {
throw new Exception( __METHOD__ . ": Read error" );
}
- $buf = substr( $buf, 20 );
$fctldur = unpack( "ndelay_num/ndelay_den", $buf );
- if( $fctldur['delay_den'] == 0 ) $fctldur['delay_den'] = 100;
- if( $fctldur['delay_num'] ) {
+ if ( $fctldur['delay_den'] == 0 ) {
+ $fctldur['delay_den'] = 100;
+ }
+ if ( $fctldur['delay_num'] ) {
$duration += $fctldur['delay_num'] / $fctldur['delay_den'];
}
- } elseif ( ( $chunk_type == "IDAT" || $chunk_type == "IEND" ) && $frameCount == 0 ) {
- // Not a valid animated image. No point in continuing.
- break;
+ } elseif ( $chunk_type == "iTXt" ) {
+ // Extracts iTXt chunks, uncompressing if necessary.
+ $buf = self::read( $fh, $chunk_size );
+ $items = array();
+ if ( preg_match(
+ '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
+ $buf, $items )
+ ) {
+ /* $items[1] = text chunk name, $items[2] = compressed flag,
+ * $items[3] = lang code (or ""), $items[4]= compression type.
+ * $items[5] = content
+ */
+
+ // Theoretically should be case-sensitive, but in practise...
+ $items[1] = strtolower( $items[1] );
+ if ( !isset( self::$text_chunks[$items[1]] ) ) {
+ // Only extract textual chunks on our list.
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+
+ $items[3] = strtolower( $items[3] );
+ if ( $items[3] == '' ) {
+ // if no lang specified use x-default like in xmp.
+ $items[3] = 'x-default';
+ }
+
+ // if compressed
+ if ( $items[2] == "\x01" ) {
+ if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
+ wfSuppressWarnings();
+ $items[5] = gzuncompress( $items[5] );
+ wfRestoreWarnings();
+
+ if ( $items[5] === false ) {
+ // decompression failed
+ wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+
+ } else {
+ wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
+ . ' or potentially invalid compression method' );
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+ }
+ $finalKeyword = self::$text_chunks[ $items[1] ];
+ $text[ $finalKeyword ][ $items[3] ] = $items[5];
+ $text[ $finalKeyword ]['_type'] = 'lang';
+
+ } else {
+ // Error reading iTXt chunk
+ throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
+ }
+
+ } elseif ( $chunk_type == 'tEXt' ) {
+ $buf = self::read( $fh, $chunk_size );
+
+ // In case there is no \x00 which will make explode fail.
+ if ( strpos( $buf, "\x00" ) === false ) {
+ throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
+ }
+
+ list( $keyword, $content ) = explode( "\x00", $buf, 2 );
+ if ( $keyword === '' || $content === '' ) {
+ throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
+ }
+
+ // Theoretically should be case-sensitive, but in practise...
+ $keyword = strtolower( $keyword );
+ if ( !isset( self::$text_chunks[ $keyword ] ) ) {
+ // Don't recognize chunk, so skip.
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+ wfSuppressWarnings();
+ $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
+ wfRestoreWarnings();
+
+ if ( $content === false ) {
+ throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
+ }
+
+ $finalKeyword = self::$text_chunks[ $keyword ];
+ $text[ $finalKeyword ][ 'x-default' ] = $content;
+ $text[ $finalKeyword ]['_type'] = 'lang';
+
+ } elseif ( $chunk_type == 'zTXt' ) {
+ if ( function_exists( 'gzuncompress' ) ) {
+ $buf = self::read( $fh, $chunk_size );
+
+ // In case there is no \x00 which will make explode fail.
+ if ( strpos( $buf, "\x00" ) === false ) {
+ throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
+ }
+
+ list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
+ if ( $keyword === '' || $postKeyword === '' ) {
+ throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
+ }
+ // Theoretically should be case-sensitive, but in practise...
+ $keyword = strtolower( $keyword );
+
+ if ( !isset( self::$text_chunks[ $keyword ] ) ) {
+ // Don't recognize chunk, so skip.
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+ $compression = substr( $postKeyword, 0, 1 );
+ $content = substr( $postKeyword, 1 );
+ if ( $compression !== "\x00" ) {
+ wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+
+ wfSuppressWarnings();
+ $content = gzuncompress( $content );
+ wfRestoreWarnings();
+
+ if ( $content === false ) {
+ // decompression failed
+ wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
+ fseek( $fh, self::$CRC_size, SEEK_CUR );
+ continue;
+ }
+
+ wfSuppressWarnings();
+ $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
+ wfRestoreWarnings();
+
+ if ( $content === false ) {
+ throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
+ }
+
+ $finalKeyword = self::$text_chunks[ $keyword ];
+ $text[ $finalKeyword ][ 'x-default' ] = $content;
+ $text[ $finalKeyword ]['_type'] = 'lang';
+
+ } else {
+ wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
+ fseek( $fh, $chunk_size, SEEK_CUR );
+ }
+ } elseif ( $chunk_type == 'tIME' ) {
+ // last mod timestamp.
+ if ( $chunk_size !== 7 ) {
+ throw new Exception( __METHOD__ . ": tIME wrong size" );
+ }
+ $buf = self::read( $fh, $chunk_size );
+ if ( !$buf || strlen( $buf ) < $chunk_size ) {
+ throw new Exception( __METHOD__ . ": Read error" );
+ }
+
+ // Note: spec says this should be UTC.
+ $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
+ $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
+ $t['y'], $t['m'], $t['d'], $t['h'],
+ $t['min'], $t['s'] );
+
+ $exifTime = wfTimestamp( TS_EXIF, $strTime );
+
+ if ( $exifTime ) {
+ $text['DateTime'] = $exifTime;
+ }
+
+ } elseif ( $chunk_type == 'pHYs' ) {
+ // how big pixels are (dots per meter).
+ if ( $chunk_size !== 9 ) {
+ throw new Exception( __METHOD__ . ": pHYs wrong size" );
+ }
+
+ $buf = self::read( $fh, $chunk_size );
+ if ( !$buf || strlen( $buf ) < $chunk_size ) {
+ throw new Exception( __METHOD__ . ": Read error" );
+ }
+
+ $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
+ if ( $dim['unit'] == 1 ) {
+ // Need to check for negative because php
+ // doesn't deal with super-large unsigned 32-bit ints well
+ if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
+ // unit is meters
+ // (as opposed to 0 = undefined )
+ $text['XResolution'] = $dim['width']
+ . '/100';
+ $text['YResolution'] = $dim['height']
+ . '/100';
+ $text['ResolutionUnit'] = 3;
+ // 3 = dots per cm (from Exif).
+ }
+ }
+
} elseif ( $chunk_type == "IEND" ) {
break;
} else {
@@ -90,15 +353,59 @@ class PNGMetadataExtractor {
}
fclose( $fh );
- if( $loopCount > 1 ) {
+ if ( $loopCount > 1 ) {
$duration *= $loopCount;
}
+ if ( isset( $text['DateTimeDigitized'] ) ) {
+ // Convert date format from rfc2822 to exif.
+ foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
+ if ( $name === '_type' ) {
+ continue;
+ }
+
+ // @todo FIXME: Currently timezones are ignored.
+ // possibly should be wfTimestamp's
+ // responsibility. (at least for numeric TZ)
+ $formatted = wfTimestamp( TS_EXIF, $value );
+ if ( $formatted ) {
+ // Only change if we could convert the
+ // date.
+ // The png standard says it should be
+ // in rfc2822 format, but not required.
+ // In general for the exif stuff we
+ // prettify the date if we can, but we
+ // display as-is if we cannot or if
+ // it is invalid.
+ // So do the same here.
+
+ $value = $formatted;
+ }
+ }
+ }
return array(
'frameCount' => $frameCount,
'loopCount' => $loopCount,
- 'duration' => $duration
+ 'duration' => $duration,
+ 'text' => $text,
+ 'bitDepth' => $bitDepth,
+ 'colorType' => $colorType,
);
-
+
+ }
+ /**
+ * Read a chunk, checking to make sure its not too big.
+ *
+ * @param $fh resource The file handle
+ * @param $size Integer size in bytes.
+ * @throws Exception if too big.
+ * @return String The chunk.
+ */
+ static private function read( $fh, $size ) {
+ if ( $size > self::MAX_CHUNK_SIZE ) {
+ throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
+ ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
+ }
+ return fread( $fh, $size );
}
}
diff --git a/includes/media/SVG.php b/includes/media/SVG.php
index a78be952..ceffd7c3 100644
--- a/includes/media/SVG.php
+++ b/includes/media/SVG.php
@@ -32,6 +32,10 @@ class SvgHandler extends ImageHandler {
return true;
}
+ /**
+ * @param $file File
+ * @return bool
+ */
function isAnimatedImage( $file ) {
# TODO: detect animated SVGs
$metadata = $file->getMetadata();
@@ -44,14 +48,17 @@ class SvgHandler extends ImageHandler {
return false;
}
+ /**
+ * @param $image File
+ * @param $params
+ * @return bool
+ */
function normaliseParams( $image, &$params ) {
global $wgSVGMaxSize;
if ( !parent::normaliseParams( $image, $params ) ) {
return false;
}
# Don't make an image bigger than wgMaxSVGSize on the smaller side
- $params['physicalWidth'] = $params['width'];
- $params['physicalHeight'] = $params['height'];
if ( $params['physicalWidth'] <= $params['physicalHeight'] ) {
if ( $params['physicalWidth'] > $wgSVGMaxSize ) {
$srcWidth = $image->getWidth( $params['page'] );
@@ -70,6 +77,14 @@ class SvgHandler extends ImageHandler {
return true;
}
+ /**
+ * @param $image File
+ * @param $dstPath
+ * @param $dstUrl
+ * @param $params
+ * @param int $flags
+ * @return bool|MediaTransformError|ThumbnailImage|TransformParameterError
+ */
function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
if ( !$this->normaliseParams( $image, $params ) ) {
return new TransformParameterError( $params );
@@ -97,7 +112,7 @@ class SvgHandler extends ImageHandler {
}
}
- /*
+ /**
* Transform an SVG file to PNG
* This function can be called outside of thumbnail contexts
* @param string $srcPath
@@ -111,19 +126,32 @@ class SvgHandler extends ImageHandler {
$err = false;
$retval = '';
if ( isset( $wgSVGConverters[$wgSVGConverter] ) ) {
- $cmd = str_replace(
- array( '$path/', '$width', '$height', '$input', '$output' ),
- array( $wgSVGConverterPath ? wfEscapeShellArg( "$wgSVGConverterPath/" ) : "",
- intval( $width ),
- intval( $height ),
- wfEscapeShellArg( $srcPath ),
- wfEscapeShellArg( $dstPath ) ),
- $wgSVGConverters[$wgSVGConverter]
- ) . " 2>&1";
- wfProfileIn( 'rsvg' );
- wfDebug( __METHOD__.": $cmd\n" );
- $err = wfShellExec( $cmd, $retval );
- wfProfileOut( 'rsvg' );
+ if ( is_array( $wgSVGConverters[$wgSVGConverter] ) ) {
+ // This is a PHP callable
+ $func = $wgSVGConverters[$wgSVGConverter][0];
+ $args = array_merge( array( $srcPath, $dstPath, $width, $height ),
+ array_slice( $wgSVGConverters[$wgSVGConverter], 1 ) );
+ if ( !is_callable( $func ) ) {
+ throw new MWException( "$func is not callable" );
+ }
+ $err = call_user_func_array( $func, $args );
+ $retval = (bool)$err;
+ } else {
+ // External command
+ $cmd = str_replace(
+ array( '$path/', '$width', '$height', '$input', '$output' ),
+ array( $wgSVGConverterPath ? wfEscapeShellArg( "$wgSVGConverterPath/" ) : "",
+ intval( $width ),
+ intval( $height ),
+ wfEscapeShellArg( $srcPath ),
+ wfEscapeShellArg( $dstPath ) ),
+ $wgSVGConverters[$wgSVGConverter]
+ ) . " 2>&1";
+ wfProfileIn( 'rsvg' );
+ wfDebug( __METHOD__.": $cmd\n" );
+ $err = wfShellExec( $cmd, $retval );
+ wfProfileOut( 'rsvg' );
+ }
}
$removed = $this->removeBadFile( $dstPath, $retval );
if ( $retval != 0 || $removed ) {
@@ -133,7 +161,27 @@ class SvgHandler extends ImageHandler {
}
return true;
}
+
+ public static function rasterizeImagickExt( $srcPath, $dstPath, $width, $height ) {
+ $im = new Imagick( $srcPath );
+ $im->setImageFormat( 'png' );
+ $im->setBackgroundColor( 'transparent' );
+ $im->setImageDepth( 8 );
+
+ if ( !$im->thumbnailImage( intval( $width ), intval( $height ), /* fit */ false ) ) {
+ return 'Could not resize image';
+ }
+ if ( !$im->writeImage( $dstPath ) ) {
+ return "Could not write to $dstPath";
+ }
+ }
+ /**
+ * @param $file File
+ * @param $path
+ * @param bool $metadata
+ * @return array
+ */
function getImageSize( $file, $path, $metadata = false ) {
if ( $metadata === false ) {
$metadata = $file->getMetaData();
@@ -150,6 +198,10 @@ class SvgHandler extends ImageHandler {
return array( 'png', 'image/png' );
}
+ /**
+ * @param $file File
+ * @return string
+ */
function getLongDesc( $file ) {
global $wgLang;
return wfMsgExt( 'svg-long-desc', 'parseinline',
@@ -171,7 +223,9 @@ class SvgHandler extends ImageHandler {
}
function unpackMetadata( $metadata ) {
- $unser = @unserialize( $metadata );
+ wfSuppressWarnings();
+ $unser = unserialize( $metadata );
+ wfRestoreWarnings();
if ( isset( $unser['version'] ) && $unser['version'] == self::SVG_METADATA_VERSION ) {
return $unser;
} else {
@@ -192,6 +246,10 @@ class SvgHandler extends ImageHandler {
return $fields;
}
+ /**
+ * @param $file File
+ * @return array|bool
+ */
function formatMetadata( $file ) {
$result = array(
'visible' => array(),
diff --git a/includes/media/SVGMetadataExtractor.php b/includes/media/SVGMetadataExtractor.php
index 66ae1edf..22ef8e61 100644
--- a/includes/media/SVGMetadataExtractor.php
+++ b/includes/media/SVGMetadataExtractor.php
@@ -55,7 +55,7 @@ class SVGReader {
$size = filesize( $source );
if ( $size === false ) {
throw new MWException( "Error getting filesize of SVG." );
- }
+ }
if ( $size > $wgSVGMetadataCutoff ) {
$this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." );
@@ -84,14 +84,14 @@ class SVGReader {
wfRestoreWarnings();
}
- /*
+ /**
* @return Array with the known metadata
*/
public function getMetadata() {
return $this->metadata;
}
- /*
+ /**
* Read the SVG
*/
public function read() {
@@ -139,10 +139,12 @@ class SVGReader {
$keepReading = $this->reader->next();
}
+ $this->reader->close();
+
return true;
}
- /*
+ /**
* Read a textelement from an element
*
* @param String $name of the element that we are reading from
@@ -155,7 +157,7 @@ class SVGReader {
}
$keepReading = $this->reader->read();
while( $keepReading ) {
- if( $this->reader->localName == $name && $this->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) {
+ if( $this->reader->localName == $name && $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) {
break;
} elseif( $this->reader->nodeType == XmlReader::TEXT ){
$this->metadata[$metafield] = trim( $this->reader->value );
@@ -175,20 +177,27 @@ class SVGReader {
return;
}
// TODO: find and store type of xml snippet. metadata['metadataType'] = "rdf"
- $this->metadata[$metafield] = trim( $this->reader->readInnerXML() );
+ if( method_exists( $this->reader, 'readInnerXML' ) ) {
+ $this->metadata[$metafield] = trim( $this->reader->readInnerXML() );
+ } else {
+ throw new MWException( "The PHP XMLReader extension does not come with readInnerXML() method. Your libxml is probably out of date (need 2.6.20 or later)." );
+ }
$this->reader->next();
}
- /*
+ /**
* Filter all children, looking for animate elements
*
* @param String $name of the element that we are reading from
*/
private function animateFilter( $name ) {
- $this->debug ( "animate filter" );
+ $this->debug ( "animate filter for tag $name" );
if( $this->reader->nodeType != XmlReader::ELEMENT ) {
return;
}
+ if ( $this->reader->isEmptyElement ) {
+ return;
+ }
$exitDepth = $this->reader->depth;
$keepReading = $this->reader->read();
while( $keepReading ) {
@@ -230,7 +239,7 @@ class SVGReader {
wfDebug( "SVGReader WARN: $data\n" );
}
- /*
+ /**
* Parse the attributes of an SVG element
*
* The parser has to be in the start element of <svg>
diff --git a/includes/media/Tiff.php b/includes/media/Tiff.php
index 8773201f..0f317e1a 100644
--- a/includes/media/Tiff.php
+++ b/includes/media/Tiff.php
@@ -11,27 +11,74 @@
*
* @ingroup Media
*/
-class TiffHandler extends BitmapHandler {
+class TiffHandler extends ExifBitmapHandler {
/**
* Conversion to PNG for inline display can be disabled here...
* Note scaling should work with ImageMagick, but may not with GD scaling.
+ *
+ * Files pulled from an another MediaWiki instance via ForeignAPIRepo /
+ * InstantCommons will have thumbnails managed from the remote instance,
+ * so we can skip this check.
+ *
+ * @param $file
+ *
+ * @return bool
*/
function canRender( $file ) {
global $wgTiffThumbnailType;
- return (bool)$wgTiffThumbnailType;
+ return (bool)$wgTiffThumbnailType
+ || ($file->getRepo() instanceof ForeignAPIRepo);
}
/**
* Browsers don't support TIFF inline generally...
* For inline display, we need to convert to PNG.
+ *
+ * @param $file
+ *
+ * @return bool
*/
function mustRender( $file ) {
return true;
}
+ /**
+ * @param $ext
+ * @param $mime
+ * @param $params
+ * @return bool
+ */
function getThumbType( $ext, $mime, $params = null ) {
global $wgTiffThumbnailType;
return $wgTiffThumbnailType;
}
+
+ /**
+ * @param $image
+ * @param $filename
+ * @return string
+ */
+ function getMetadata( $image, $filename ) {
+ global $wgShowEXIF;
+ if ( $wgShowEXIF ) {
+ try {
+ $meta = BitmapMetadataHandler::Tiff( $filename );
+ if ( !is_array( $meta ) ) {
+ // This should never happen, but doesn't hurt to be paranoid.
+ throw new MWException('Metadata array is not an array');
+ }
+ $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
+ return serialize( $meta );
+ }
+ catch ( MWException $e ) {
+ // BitmapMetadataHandler throws an exception in certain exceptional
+ // cases like if file does not exist.
+ wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
+ return ExifBitmapHandler::BROKEN_FILE;
+ }
+ } else {
+ return '';
+ }
+ }
}
diff --git a/includes/media/XMP.php b/includes/media/XMP.php
new file mode 100644
index 00000000..1e578582
--- /dev/null
+++ b/includes/media/XMP.php
@@ -0,0 +1,1174 @@
+<?php
+/**
+* Class for reading xmp data containing properties relevant to
+* images, and spitting out an array that FormatExif accepts.
+*
+* Note, this is not meant to recognize every possible thing you can
+* encode in XMP. It should recognize all the properties we want.
+* For example it doesn't have support for structures with multiple
+* nesting levels, as none of the properties we're supporting use that
+* feature. If it comes across properties it doesn't recognize, it should
+* ignore them.
+*
+* The public methods one would call in this class are
+* - parse( $content )
+* Reads in xmp content.
+* Can potentially be called multiple times with partial data each time.
+* - parseExtended( $content )
+* Reads XMPExtended blocks (jpeg files only).
+* - getResults
+* Outputs a results array.
+*
+* Note XMP kind of looks like rdf. They are not the same thing - XMP is
+* encoded as a specific subset of rdf. This class can read XMP. It cannot
+* read rdf.
+*
+*/
+class XMPReader {
+
+ private $curItem = array(); // array to hold the current element (and previous element, and so on)
+ private $ancestorStruct = false; // the structure name when processing nested structures.
+ private $charContent = false; // temporary holder for character data that appears in xmp doc.
+ private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants)
+ private $results = array(); // array to hold results
+ private $processingArray = false; // if we're doing a seq or bag.
+ private $itemLang = false; // used for lang alts only
+
+ private $xmlParser;
+ private $charset = false;
+ private $extendedXMPOffset = 0;
+
+ protected $items;
+
+ /**
+ * These are various mode constants.
+ * they are used to figure out what to do
+ * with an element when its encountered.
+ *
+ * For example, MODE_IGNORE is used when processing
+ * a property we're not interested in. So if a new
+ * element pops up when we're in that mode, we ignore it.
+ */
+ const MODE_INITIAL = 0;
+ const MODE_IGNORE = 1;
+ const MODE_LI = 2;
+ const MODE_LI_LANG = 3;
+ const MODE_QDESC = 4;
+
+ // The following MODE constants are also used in the
+ // $items array to denote what type of property the item is.
+ const MODE_SIMPLE = 10;
+ const MODE_STRUCT = 11; // structure (associative array)
+ const MODE_SEQ = 12; // ordered list
+ const MODE_BAG = 13; // unordered list
+ const MODE_LANG = 14;
+ const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm.
+ const MODE_BAGSTRUCT = 16; // A BAG of Structs.
+
+ const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ const NS_XML = 'http://www.w3.org/XML/1998/namespace';
+
+
+ /**
+ * Constructor.
+ *
+ * Primary job is to initialize the XMLParser
+ */
+ function __construct() {
+
+ if ( !function_exists( 'xml_parser_create_ns' ) ) {
+ // this should already be checked by this point
+ throw new MWException( 'XMP support requires XML Parser' );
+ }
+
+ $this->items = XMPInfo::getItems();
+
+ $this->resetXMLParser();
+
+ }
+ /**
+ * Main use is if a single item has multiple xmp documents describing it.
+ * For example in jpeg's with extendedXMP
+ */
+ private function resetXMLParser() {
+
+ if ($this->xmlParser) {
+ //is this needed?
+ xml_parser_free( $this->xmlParser );
+ }
+
+ $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' );
+ xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 );
+ xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 );
+
+ xml_set_element_handler( $this->xmlParser,
+ array( $this, 'startElement' ),
+ array( $this, 'endElement' ) );
+
+ xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) );
+ }
+
+ /** Destroy the xml parser
+ *
+ * Not sure if this is actually needed.
+ */
+ function __destruct() {
+ // not sure if this is needed.
+ xml_parser_free( $this->xmlParser );
+ }
+
+ /** Get the result array. Do some post-processing before returning
+ * the array, and transform any metadata that is special-cased.
+ *
+ * @return Array array of results as an array of arrays suitable for
+ * FormatMetadata::getFormattedData().
+ */
+ public function getResults() {
+ // xmp-special is for metadata that affects how stuff
+ // is extracted. For example xmpNote:HasExtendedXMP.
+
+ // It is also used to handle photoshop:AuthorsPosition
+ // which is weird and really part of another property,
+ // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard.
+ // The location fields also use it.
+
+ $data = $this->results;
+
+ wfRunHooks('XMPGetResults', Array(&$data));
+
+ if ( isset( $data['xmp-special']['AuthorsPosition'] )
+ && is_string( $data['xmp-special']['AuthorsPosition'] )
+ && isset( $data['xmp-general']['Artist'][0] )
+ ) {
+ // Note, if there is more than one creator,
+ // this only applies to first. This also will
+ // only apply to the dc:Creator prop, not the
+ // exif:Artist prop.
+
+ $data['xmp-general']['Artist'][0] =
+ $data['xmp-special']['AuthorsPosition'] . ', '
+ . $data['xmp-general']['Artist'][0];
+ }
+
+ // Go through the LocationShown and LocationCreated
+ // changing it to the non-hierarchal form used by
+ // the other location fields.
+
+ if ( isset( $data['xmp-special']['LocationShown'][0] )
+ && is_array( $data['xmp-special']['LocationShown'][0] )
+ ) {
+ // the is_array is just paranoia. It should always
+ // be an array.
+ foreach( $data['xmp-special']['LocationShown'] as $loc ) {
+ if ( !is_array( $loc ) ) {
+ // To avoid copying over the _type meta-fields.
+ continue;
+ }
+ foreach( $loc as $field => $val ) {
+ $data['xmp-general'][$field . 'Dest'][] = $val;
+ }
+ }
+ }
+ if ( isset( $data['xmp-special']['LocationCreated'][0] )
+ && is_array( $data['xmp-special']['LocationCreated'][0] )
+ ) {
+ // the is_array is just paranoia. It should always
+ // be an array.
+ foreach( $data['xmp-special']['LocationCreated'] as $loc ) {
+ if ( !is_array( $loc ) ) {
+ // To avoid copying over the _type meta-fields.
+ continue;
+ }
+ foreach( $loc as $field => $val ) {
+ $data['xmp-general'][$field . 'Created'][] = $val;
+ }
+ }
+ }
+
+
+ // We don't want to return the special values, since they're
+ // special and not info to be stored about the file.
+ unset( $data['xmp-special'] );
+
+ // Convert GPSAltitude to negative if below sea level.
+ if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) ) {
+ if ( $data['xmp-exif']['GPSAltitudeRef'] == '1'
+ && isset( $data['xmp-exif']['GPSAltitude'] )
+ ) {
+ $data['xmp-exif']['GPSAltitude'] *= -1;
+ }
+ unset( $data['xmp-exif']['GPSAltitudeRef'] );
+ }
+
+ return $data;
+ }
+
+ /**
+ * Main function to call to parse XMP. Use getResults to
+ * get results.
+ *
+ * Also catches any errors during processing, writes them to
+ * debug log, blanks result array and returns false.
+ *
+ * @param String: $content XMP data
+ * @param Boolean: $allOfIt If this is all the data (true) or if its split up (false). Default true
+ * @param Boolean: $reset - does xml parser need to be reset. Default false
+ * @return Boolean success.
+ */
+ public function parse( $content, $allOfIt = true, $reset = false ) {
+ if ( $reset ) {
+ $this->resetXMLParser();
+ }
+ try {
+
+ // detect encoding by looking for BOM which is supposed to be in processing instruction.
+ // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf
+ if ( !$this->charset ) {
+ $bom = array();
+ if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/',
+ $content, $bom )
+ ) {
+ switch ( $bom[0] ) {
+ case "\xFE\xFF":
+ $this->charset = 'UTF-16BE';
+ break;
+ case "\xFF\xFE":
+ $this->charset = 'UTF-16LE';
+ break;
+ case "\x00\x00\xFE\xFF":
+ $this->charset = 'UTF-32BE';
+ break;
+ case "\xFF\xFE\x00\x00":
+ $this->charset = 'UTF-32LE';
+ break;
+ case "\xEF\xBB\xBF":
+ $this->charset = 'UTF-8';
+ break;
+ default:
+ //this should be impossible to get to
+ throw new MWException("Invalid BOM");
+ break;
+
+ }
+
+ } else {
+ // standard specifically says, if no bom assume utf-8
+ $this->charset = 'UTF-8';
+ }
+ }
+ if ( $this->charset !== 'UTF-8' ) {
+ //don't convert if already utf-8
+ wfSuppressWarnings();
+ $content = iconv( $this->charset, 'UTF-8//IGNORE', $content );
+ wfRestoreWarnings();
+ }
+
+ $ok = xml_parse( $this->xmlParser, $content, $allOfIt );
+ if ( !$ok ) {
+ $error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
+ $where = 'line: ' . xml_get_current_line_number( $this->xmlParser )
+ . ' column: ' . xml_get_current_column_number( $this->xmlParser )
+ . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser );
+
+ wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" );
+ $this->results = array(); // blank if error.
+ return false;
+ }
+ } catch ( MWException $e ) {
+ wfDebugLog( 'XMP', 'XMP parse error: ' . $e );
+ $this->results = array();
+ return false;
+ }
+ return true;
+ }
+
+ /** Entry point for XMPExtended blocks in jpeg files
+ *
+ * @todo In serious need of testing
+ * @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20
+ * @param String $content XMPExtended block minus the namespace signature
+ * @return Boolean If it succeeded.
+ */
+ public function parseExtended( $content ) {
+ // @todo FIXME: This is untested. Hard to find example files
+ // or programs that make such files..
+ $guid = substr( $content, 0, 32 );
+ if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] )
+ || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) {
+ wfDebugLog('XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid' )");
+ return false;
+ }
+ $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) );
+
+ if (!$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) {
+ wfDebugLog('XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.');
+ return false;
+ }
+
+
+ // we're not very robust here. we should accept it in the wrong order. To quote
+ // the xmp standard:
+ // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the
+ // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A
+ // robust JPEG reader should tolerate the marker segments in any order."
+ //
+ // otoh the probability that an image will have more than 128k of metadata is rather low...
+ // so the probability that it will have > 128k, and be in the wrong order is very low...
+
+ if ( $len['offset'] !== $this->extendedXMPOffset ) {
+ wfDebugLog('XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was '
+ . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')');
+ return false;
+ }
+
+ if ( $len['offset'] === 0 ) {
+ // if we're starting the extended block, we've probably already
+ // done the XMPStandard block, so reset.
+ $this->resetXMLParser();
+ }
+
+ $this->extendedXMPOffset += $len['length'];
+
+ $actualContent = substr( $content, 40 );
+
+ if ( $this->extendedXMPOffset === strlen( $actualContent ) ) {
+ $atEnd = true;
+ } else {
+ $atEnd = false;
+ }
+
+ wfDebugLog('XMP', __METHOD__ . 'Parsing a XMPExtended block');
+ return $this->parse( $actualContent, $atEnd );
+ }
+
+ /**
+ * Character data handler
+ * Called whenever character data is found in the xmp document.
+ *
+ * does nothing if we're in MODE_IGNORE or if the data is whitespace
+ * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character
+ * data in the other modes).
+ *
+ * As an example, this happens when we encounter XMP like:
+ * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
+ * and are processing the 0/10 bit.
+ *
+ * @param $parser XMLParser reference to the xml parser
+ * @param $data String Character data
+ * @throws MWException on invalid data
+ */
+ function char( $parser, $data ) {
+
+ $data = trim( $data );
+ if ( trim( $data ) === "" ) {
+ return;
+ }
+
+ if ( !isset( $this->mode[0] ) ) {
+ throw new MWException( 'Unexpected character data before first rdf:Description element' );
+ }
+
+ if ( $this->mode[0] === self::MODE_IGNORE ) return;
+
+ if ( $this->mode[0] !== self::MODE_SIMPLE
+ && $this->mode[0] !== self::MODE_QDESC
+ ) {
+ throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' );
+ }
+
+ // to check, how does this handle w.s.
+ if ( $this->charContent === false ) {
+ $this->charContent = $data;
+ } else {
+ $this->charContent .= $data;
+ }
+
+ }
+
+ /** When we hit a closing element in MODE_IGNORE
+ * Check to see if this is the element we started to ignore,
+ * in which case we get out of MODE_IGNORE
+ *
+ * @param $elm String Namespace of element followed by a space and then tag name of element.
+ */
+ private function endElementModeIgnore ( $elm ) {
+
+ if ( $this->curItem[0] === $elm ) {
+ array_shift( $this->curItem );
+ array_shift( $this->mode );
+ }
+ return;
+
+ }
+
+ /**
+ * Hit a closing element when in MODE_SIMPLE.
+ * This generally means that we finished processing a
+ * property value, and now have to save the result to the
+ * results array
+ *
+ * For example, when processing:
+ * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
+ * this deals with when we hit </exif:DigitalZoomRatio>.
+ *
+ * Or it could be if we hit the end element of a property
+ * of a compound data structure (like a member of an array).
+ *
+ * @param $elm String namespace, space, and tag name.
+ */
+ private function endElementModeSimple ( $elm ) {
+ if ( $this->charContent !== false ) {
+ if ( $this->processingArray ) {
+ // if we're processing an array, use the original element
+ // name instead of rdf:li.
+ list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
+ } else {
+ list( $ns, $tag ) = explode( ' ', $elm, 2 );
+ }
+ $this->saveValue( $ns, $tag, $this->charContent );
+
+ $this->charContent = false; // reset
+ }
+ array_shift( $this->curItem );
+ array_shift( $this->mode );
+
+ }
+
+ /**
+ * Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG
+ * generally means we've finished processing a nested structure.
+ * resets some internal variables to indicate that.
+ *
+ * Note this means we hit the </closing element> not the </rdf:Seq>.
+ *
+ * For example, when processing:
+ * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li>
+ * </rdf:Seq> </exif:ISOSpeedRatings>
+ *
+ * This method is called when we hit the </exif:ISOSpeedRatings> tag.
+ *
+ * @param $elm String namespace . space . tag name.
+ */
+ private function endElementNested( $elm ) {
+
+ /* cur item must be the same as $elm, unless if in MODE_STRUCT
+ in which case it could also be rdf:Description */
+ if ( $this->curItem[0] !== $elm
+ && !( $elm === self::NS_RDF . ' Description'
+ && $this->mode[0] === self::MODE_STRUCT )
+ ) {
+ throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' );
+ }
+
+ // Validate structures.
+ list( $ns, $tag ) = explode( ' ', $elm, 2 );
+ if ( isset( $this->items[$ns][$tag]['validate'] ) ) {
+
+ $info =& $this->items[$ns][$tag];
+ $finalName = isset( $info['map_name'] )
+ ? $info['map_name'] : $tag;
+
+ $validate = is_array( $info['validate'] ) ? $info['validate']
+ : array( 'XMPValidate', $info['validate'] );
+
+ if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
+ // This can happen if all the members of the struct failed validation.
+ wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." );
+
+ } elseif ( is_callable( $validate ) ) {
+ $val =& $this->results['xmp-' . $info['map_group']][$finalName];
+ call_user_func_array( $validate, array( $info, &$val, false ) );
+ if ( is_null( $val ) ) {
+ // the idea being the validation function will unset the variable if
+ // its invalid.
+ wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
+ unset( $this->results['xmp-' . $info['map_group']][$finalName] );
+ }
+ } else {
+ wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
+ . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
+ }
+ }
+
+ array_shift( $this->curItem );
+ array_shift( $this->mode );
+ $this->ancestorStruct = false;
+ $this->processingArray = false;
+ $this->itemLang = false;
+ }
+
+ /**
+ * Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag )
+ * Add information about what type of element this is.
+ *
+ * Note we still have to hit the outer </property>
+ *
+ * For example, when processing:
+ * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li>
+ * </rdf:Seq> </exif:ISOSpeedRatings>
+ *
+ * This method is called when we hit the </rdf:Seq>.
+ * (For comparison, we call endElementModeSimple when we
+ * hit the </rdf:li>)
+ *
+ * @param $elm String namespace . ' ' . element name
+ */
+ private function endElementModeLi( $elm ) {
+
+ list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
+ $info = $this->items[$ns][$tag];
+ $finalName = isset( $info['map_name'] )
+ ? $info['map_name'] : $tag;
+
+ array_shift( $this->mode );
+
+ if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." );
+ return;
+ }
+
+ if ( $elm === self::NS_RDF . ' Seq' ) {
+ $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol';
+ } elseif ( $elm === self::NS_RDF . ' Bag' ) {
+ $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul';
+ } elseif ( $elm === self::NS_RDF . ' Alt' ) {
+ // extra if needed as you could theoretically have a non-language alt.
+ if ( $info['mode'] === self::MODE_LANG ) {
+ $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang';
+ }
+
+ } else {
+ throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." );
+ }
+ }
+
+ /**
+ * End element while in MODE_QDESC
+ * mostly when ending an element when we have a simple value
+ * that has qualifiers.
+ *
+ * Qualifiers aren't all that common, and we don't do anything
+ * with them.
+ *
+ * @param $elm String namespace and element
+ */
+ private function endElementModeQDesc( $elm ) {
+
+ if ( $elm === self::NS_RDF . ' value' ) {
+ list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
+ $this->saveValue( $ns, $tag, $this->charContent );
+ return;
+ } else {
+ array_shift( $this->mode );
+ array_shift( $this->curItem );
+ }
+
+
+ }
+
+ /**
+ * Handler for hitting a closing element.
+ *
+ * generally just calls a helper function depending on what
+ * mode we're in.
+ *
+ * Ignores the outer wrapping elements that are optional in
+ * xmp and have no meaning.
+ *
+ * @param $parser XMLParser
+ * @param $elm String namespace . ' ' . element name
+ */
+ function endElement( $parser, $elm ) {
+ if ( $elm === ( self::NS_RDF . ' RDF' )
+ || $elm === 'adobe:ns:meta/ xmpmeta'
+ || $elm === 'adobe:ns:meta/ xapmeta' )
+ {
+ // ignore these.
+ return;
+ }
+
+ if ( $elm === self::NS_RDF . ' type' ) {
+ // these aren't really supported properly yet.
+ // However, it appears they almost never used.
+ wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' );
+ }
+
+ if ( strpos( $elm, ' ' ) === false ) {
+ // This probably shouldn't happen.
+ // However, there is a bug in an adobe product
+ // that forgets the namespace on some things.
+ // (Luckily they are unimportant things).
+ wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
+ return;
+ }
+
+ if ( count( $this->mode[0] ) === 0 ) {
+ // This should never ever happen and means
+ // there is a pretty major bug in this class.
+ throw new MWException( 'Encountered end element with no mode' );
+ }
+
+ if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) {
+ // just to be paranoid. Should always have a curItem, except for initially
+ // (aka during MODE_INITAL).
+ throw new MWException( "Hit end element </$elm> but no curItem" );
+ }
+
+ switch( $this->mode[0] ) {
+ case self::MODE_IGNORE:
+ $this->endElementModeIgnore( $elm );
+ break;
+ case self::MODE_SIMPLE:
+ $this->endElementModeSimple( $elm );
+ break;
+ case self::MODE_STRUCT:
+ case self::MODE_SEQ:
+ case self::MODE_BAG:
+ case self::MODE_LANG:
+ case self::MODE_BAGSTRUCT:
+ $this->endElementNested( $elm );
+ break;
+ case self::MODE_INITIAL:
+ if ( $elm === self::NS_RDF . ' Description' ) {
+ array_shift( $this->mode );
+ } else {
+ throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' );
+ }
+ break;
+ case self::MODE_LI:
+ case self::MODE_LI_LANG:
+ $this->endElementModeLi( $elm );
+ break;
+ case self::MODE_QDESC:
+ $this->endElementModeQDesc( $elm );
+ break;
+ default:
+ wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" );
+ break;
+ }
+ }
+
+ /**
+ * Hit an opening element while in MODE_IGNORE
+ *
+ * XMP is extensible, so ignore any tag we don't understand.
+ *
+ * Mostly ignores, unless we encounter the element that we are ignoring.
+ * in which case we add it to the item stack, so we can ignore things
+ * that are nested, correctly.
+ *
+ * @param $elm String namespace . ' ' . tag name
+ */
+ private function startElementModeIgnore( $elm ) {
+ if ( $elm === $this->curItem[0] ) {
+ array_unshift( $this->curItem, $elm );
+ array_unshift( $this->mode, self::MODE_IGNORE );
+ }
+ }
+
+ /**
+ * Start element in MODE_BAG (unordered array)
+ * this should always be <rdf:Bag>
+ *
+ * @param $elm String namespace . ' ' . tag
+ * @throws MWException if we have an element that's not <rdf:Bag>
+ */
+ private function startElementModeBag( $elm ) {
+ if ( $elm === self::NS_RDF . ' Bag' ) {
+ array_unshift( $this->mode, self::MODE_LI );
+ } else {
+ throw new MWException( "Expected <rdf:Bag> but got $elm." );
+ }
+
+ }
+
+ /**
+ * Start element in MODE_SEQ (ordered array)
+ * this should always be <rdf:Seq>
+ *
+ * @param $elm String namespace . ' ' . tag
+ * @throws MWException if we have an element that's not <rdf:Seq>
+ */
+ private function startElementModeSeq( $elm ) {
+ if ( $elm === self::NS_RDF . ' Seq' ) {
+ array_unshift( $this->mode, self::MODE_LI );
+ } elseif ( $elm === self::NS_RDF . ' Bag' ) {
+ # bug 27105
+ wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending'
+ . ' it is a Seq, since some buggy software is known to screw this up.' );
+ array_unshift( $this->mode, self::MODE_LI );
+ } else {
+ throw new MWException( "Expected <rdf:Seq> but got $elm." );
+ }
+
+ }
+
+ /**
+ * Start element in MODE_LANG (language alternative)
+ * this should always be <rdf:Alt>
+ *
+ * This tag tends to be used for metadata like describe this
+ * picture, which can be translated into multiple languages.
+ *
+ * XMP supports non-linguistic alternative selections,
+ * which are really only used for thumbnails, which
+ * we don't care about.
+ *
+ * @param $elm String namespace . ' ' . tag
+ * @throws MWException if we have an element that's not <rdf:Alt>
+ */
+ private function startElementModeLang( $elm ) {
+ if ( $elm === self::NS_RDF . ' Alt' ) {
+ array_unshift( $this->mode, self::MODE_LI_LANG );
+ } else {
+ throw new MWException( "Expected <rdf:Seq> but got $elm." );
+ }
+
+ }
+
+ /**
+ * Handle an opening element when in MODE_SIMPLE
+ *
+ * This should not happen often. This is for if a simple element
+ * already opened has a child element. Could happen for a
+ * qualified element.
+ *
+ * For example:
+ * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value>
+ * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description>
+ * </exif:DigitalZoomRatio>
+ *
+ * This method is called when processing the <rdf:Description> element
+ *
+ * @param $elm String namespace and tag names separated by space.
+ * @param $attribs Array Attributes of the element.
+ */
+ private function startElementModeSimple( $elm, $attribs ) {
+ if ( $elm === self::NS_RDF . ' Description' ) {
+ // If this value has qualifiers
+ array_unshift( $this->mode, self::MODE_QDESC );
+ array_unshift( $this->curItem, $this->curItem[0] );
+
+ if ( isset( $attribs[self::NS_RDF . ' value'] ) ) {
+ list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
+ $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] );
+ }
+ } elseif ( $elm === self::NS_RDF . ' value' ) {
+ // This should not be here.
+ throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' );
+
+ } else {
+ // something else we don't recognize, like a qualifier maybe.
+ wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] );
+ array_unshift( $this->mode, self::MODE_IGNORE );
+ array_unshift( $this->curItem, $elm );
+
+ }
+
+ }
+
+ /**
+ * Start an element when in MODE_QDESC.
+ * This generally happens when a simple element has an inner
+ * rdf:Description to hold qualifier elements.
+ *
+ * For example in:
+ * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value>
+ * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description>
+ * </exif:DigitalZoomRatio>
+ * Called when processing the <rdf:value> or <foo:someQualifier>.
+ *
+ * @param $elm String namespace and tag name separated by a space.
+ *
+ */
+ private function startElementModeQDesc( $elm ) {
+ if ( $elm === self::NS_RDF . ' value' ) {
+ return; // do nothing
+ } else {
+ // otherwise its a qualifier, which we ignore
+ array_unshift( $this->mode, self::MODE_IGNORE );
+ array_unshift( $this->curItem, $elm );
+ }
+ }
+
+ /**
+ * Starting an element when in MODE_INITIAL
+ * This usually happens when we hit an element inside
+ * the outer rdf:Description
+ *
+ * This is generally where most properties start.
+ *
+ * @param $ns String Namespace
+ * @param $tag String tag name (without namespace prefix)
+ * @param $attribs Array array of attributes
+ */
+ private function startElementModeInitial( $ns, $tag, $attribs ) {
+ if ( $ns !== self::NS_RDF ) {
+
+ if ( isset( $this->items[$ns][$tag] ) ) {
+ if ( isset( $this->items[$ns][$tag]['structPart'] ) ) {
+ // If this element is supposed to appear only as
+ // a child of a structure, but appears here (not as
+ // a child of a struct), then something weird is
+ // happening, so ignore this element and its children.
+
+ wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside"
+ . " of its expected parent. Ignoring." );
+
+ array_unshift( $this->mode, self::MODE_IGNORE );
+ array_unshift( $this->curItem, $ns . ' ' . $tag );
+ return;
+ }
+ $mode = $this->items[$ns][$tag]['mode'];
+ array_unshift( $this->mode, $mode );
+ array_unshift( $this->curItem, $ns . ' ' . $tag );
+ if ( $mode === self::MODE_STRUCT ) {
+ $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] )
+ ? $this->items[$ns][$tag]['map_name'] : $tag;
+ }
+ if ( $this->charContent !== false ) {
+ // Something weird.
+ // Should not happen in valid XMP.
+ throw new MWException( 'tag nested in non-whitespace characters.' );
+ }
+ } else {
+ // This element is not on our list of allowed elements so ignore.
+ wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
+ array_unshift( $this->mode, self::MODE_IGNORE );
+ array_unshift( $this->curItem, $ns . ' ' . $tag );
+ return;
+ }
+
+ }
+ // process attributes
+ $this->doAttribs( $attribs );
+ }
+
+ /**
+ * Hit an opening element when in a Struct (MODE_STRUCT)
+ * This is generally for fields of a compound property.
+ *
+ * Example of a struct (abbreviated; flash has more properties):
+ *
+ * <exif:Flash> <rdf:Description> <exif:Fired>True</exif:Fired>
+ * <exif:Mode>1</exif:Mode></rdf:Description></exif:Flash>
+ *
+ * or:
+ *
+ * <exif:Flash rdf:parseType='Resource'> <exif:Fired>True</exif:Fired>
+ * <exif:Mode>1</exif:Mode></exif:Flash>
+ *
+ * @param $ns String namespace
+ * @param $tag String tag name (no ns)
+ * @param $attribs Array array of attribs w/ values.
+ */
+ private function startElementModeStruct( $ns, $tag, $attribs ) {
+ if ( $ns !== self::NS_RDF ) {
+
+ if ( isset( $this->items[$ns][$tag] ) ) {
+ if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
+ && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) )
+ {
+ // This assumes that we don't have inter-namespace nesting
+ // which we don't in all the properties we're interested in.
+ throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct
+ . "> where it is not allowed." );
+ }
+ array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] );
+ array_unshift( $this->curItem, $ns . ' ' . $tag );
+ if ( $this->charContent !== false ) {
+ // Something weird.
+ // Should not happen in valid XMP.
+ throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." );
+ }
+ } else {
+ array_unshift( $this->mode, self::MODE_IGNORE );
+ array_unshift( $this->curItem, $elm );
+ return;
+ }
+
+ }
+
+ if ( $ns === self::NS_RDF && $tag === 'Description' ) {
+ $this->doAttribs( $attribs );
+ array_unshift( $this->mode, self::MODE_STRUCT );
+ array_unshift( $this->curItem, $this->curItem[0] );
+ }
+ }
+
+ /**
+ * opening element in MODE_LI
+ * process elements of arrays.
+ *
+ * Example:
+ * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li>
+ * </rdf:Seq> </exif:ISOSpeedRatings>
+ * This method is called when we hit the <rdf:li> element.
+ *
+ * @param $elm String: namespace . ' ' . tagname
+ * @param $attribs Array: Attributes. (needed for BAGSTRUCTS)
+ * @throws MWException if gets a tag other than <rdf:li>
+ */
+ private function startElementModeLi( $elm, $attribs ) {
+ if ( ( $elm ) !== self::NS_RDF . ' li' ) {
+ throw new MWException( "<rdf:li> expected but got $elm." );
+ }
+
+ if ( !isset( $this->mode[1] ) ) {
+ // This should never ever ever happen. Checking for it
+ // to be paranoid.
+ throw new MWException( 'In mode Li, but no 2xPrevious mode!' );
+ }
+
+ if ( $this->mode[1] === self::MODE_BAGSTRUCT ) {
+ // This list item contains a compound (STRUCT) value.
+ array_unshift( $this->mode, self::MODE_STRUCT );
+ array_unshift( $this->curItem, $elm );
+ $this->processingArray = true;
+
+ if ( !isset( $this->curItem[1] ) ) {
+ // be paranoid.
+ throw new MWException( 'Can not find parent of BAGSTRUCT.' );
+ }
+ list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] );
+ $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] )
+ ? $this->items[$curNS][$curTag]['map_name'] : $curTag;
+
+ $this->doAttribs( $attribs );
+
+ } else {
+ // Normal BAG or SEQ containing simple values.
+ array_unshift( $this->mode, self::MODE_SIMPLE );
+ // need to add curItem[0] on again since one is for the specific item
+ // and one is for the entire group.
+ array_unshift( $this->curItem, $this->curItem[0] );
+ $this->processingArray = true;
+ }
+
+ }
+
+ /**
+ * Opening element in MODE_LI_LANG.
+ * process elements of language alternatives
+ *
+ * Example:
+ * <dc:title> <rdf:Alt> <rdf:li xml:lang="x-default">My house
+ * </rdf:li> </rdf:Alt> </dc:title>
+ *
+ * This method is called when we hit the <rdf:li> element.
+ *
+ * @param $elm String namespace . ' ' . tag
+ * @param $attribs array array of elements (most importantly xml:lang)
+ * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang
+ */
+ private function startElementModeLiLang( $elm, $attribs ) {
+ if ( $elm !== self::NS_RDF . ' li' ) {
+ throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
+ }
+ if ( !isset( $attribs[ self::NS_XML . ' lang'] )
+ || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[ self::NS_XML . ' lang' ] ) )
+ {
+ throw new MWException( __METHOD__
+ . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
+ }
+
+ // Lang is case-insensitive.
+ $this->itemLang = strtolower( $attribs[ self::NS_XML . ' lang' ] );
+
+ // need to add curItem[0] on again since one is for the specific item
+ // and one is for the entire group.
+ array_unshift( $this->curItem, $this->curItem[0] );
+ array_unshift( $this->mode, self::MODE_SIMPLE );
+ $this->processingArray = true;
+ }
+
+ /**
+ * Hits an opening element.
+ * Generally just calls a helper based on what MODE we're in.
+ * Also does some initial set up for the wrapper element
+ *
+ * @param $parser XMLParser
+ * @param $elm String namespace <space> element
+ * @param $attribs Array attribute name => value
+ */
+ function startElement( $parser, $elm, $attribs ) {
+
+ if ( $elm === self::NS_RDF . ' RDF'
+ || $elm === 'adobe:ns:meta/ xmpmeta'
+ || $elm === 'adobe:ns:meta/ xapmeta')
+ {
+ /* ignore. */
+ return;
+ } elseif ( $elm === self::NS_RDF . ' Description' ) {
+ if ( count( $this->mode ) === 0 ) {
+ // outer rdf:desc
+ array_unshift( $this->mode, self::MODE_INITIAL );
+ }
+ } elseif ( $elm === self::NS_RDF . ' type' ) {
+ // This doesn't support rdf:type properly.
+ // In practise I have yet to see a file that
+ // uses this element, however it is mentioned
+ // on page 25 of part 1 of the xmp standard.
+ //
+ // also it seems as if exiv2 and exiftool do not support
+ // this either (That or I misunderstand the standard)
+ wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' );
+ }
+
+ if ( strpos( $elm, ' ' ) === false ) {
+ // This probably shouldn't happen.
+ wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
+ return;
+ }
+
+ list( $ns, $tag ) = explode( ' ', $elm, 2 );
+
+ if ( count( $this->mode ) === 0 ) {
+ // This should not happen.
+ throw new MWException('Error extracting XMP, '
+ . "encountered <$elm> with no mode" );
+ }
+
+ switch( $this->mode[0] ) {
+ case self::MODE_IGNORE:
+ $this->startElementModeIgnore( $elm );
+ break;
+ case self::MODE_SIMPLE:
+ $this->startElementModeSimple( $elm, $attribs );
+ break;
+ case self::MODE_INITIAL:
+ $this->startElementModeInitial( $ns, $tag, $attribs );
+ break;
+ case self::MODE_STRUCT:
+ $this->startElementModeStruct( $ns, $tag, $attribs );
+ break;
+ case self::MODE_BAG:
+ case self::MODE_BAGSTRUCT:
+ $this->startElementModeBag( $elm );
+ break;
+ case self::MODE_SEQ:
+ $this->startElementModeSeq( $elm );
+ break;
+ case self::MODE_LANG:
+ $this->startElementModeLang( $elm );
+ break;
+ case self::MODE_LI_LANG:
+ $this->startElementModeLiLang( $elm, $attribs );
+ break;
+ case self::MODE_LI:
+ $this->startElementModeLi( $elm, $attribs );
+ break;
+ case self::MODE_QDESC:
+ $this->startElementModeQDesc( $elm );
+ break;
+ default:
+ throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] );
+ break;
+ }
+ }
+
+ /**
+ * Process attributes.
+ * Simple values can be stored as either a tag or attribute
+ *
+ * Often the initial <rdf:Description> tag just has all the simple
+ * properties as attributes.
+ *
+ * Example:
+ * <rdf:Description rdf:about="" xmlns:exif="http://ns.adobe.com/exif/1.0/" exif:DigitalZoomRatio="0/10">
+ *
+ * @param $attribs Array attribute=>value array.
+ */
+ private function doAttribs( $attribs ) {
+
+ // first check for rdf:parseType attribute, as that can change
+ // how the attributes are interperted.
+
+ if ( isset( $attribs[self::NS_RDF . ' parseType'] )
+ && $attribs[self::NS_RDF . ' parseType'] === 'Resource'
+ && $this->mode[0] === self::MODE_SIMPLE )
+ {
+ // this is equivalent to having an inner rdf:Description
+ $this->mode[0] = self::MODE_QDESC;
+ }
+ foreach ( $attribs as $name => $val ) {
+
+
+ if ( strpos( $name, ' ' ) === false ) {
+ // This shouldn't happen, but so far some old software forgets namespace
+ // on rdf:about.
+ wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: '
+ . " $name=\"$val\". Skipping. " );
+ continue;
+ }
+ list( $ns, $tag ) = explode( ' ', $name, 2 );
+ if ( $ns === self::NS_RDF ) {
+ if ( $tag === 'value' || $tag === 'resource' ) {
+ // resource is for url.
+ // value attribute is a weird way of just putting the contents.
+ $this->char( $this->xmlParser, $val );
+ }
+ } elseif ( isset( $this->items[$ns][$tag] ) ) {
+ if ( $this->mode[0] === self::MODE_SIMPLE ) {
+ throw new MWException( __METHOD__
+ . " $ns:$tag found as attribute where not allowed" );
+ }
+ $this->saveValue( $ns, $tag, $val );
+ } else {
+ wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
+ }
+ }
+ }
+
+ /**
+ * Given an extracted value, save it to results array
+ *
+ * note also uses $this->ancestorStruct and
+ * $this->processingArray to determine what name to
+ * save the value under. (in addition to $tag).
+ *
+ * @param $ns String namespace of tag this is for
+ * @param $tag String tag name
+ * @param $val String value to save
+ */
+ private function saveValue( $ns, $tag, $val ) {
+
+ $info =& $this->items[$ns][$tag];
+ $finalName = isset( $info['map_name'] )
+ ? $info['map_name'] : $tag;
+ if ( isset( $info['validate'] ) ) {
+ $validate = is_array( $info['validate'] ) ? $info['validate']
+ : array( 'XMPValidate', $info['validate'] );
+
+ if ( is_callable( $validate ) ) {
+ call_user_func_array( $validate, array( $info, &$val, true ) );
+ // the reasoning behind using &$val instead of using the return value
+ // is to be consistent between here and validating structures.
+ if ( is_null( $val ) ) {
+ wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
+ return;
+ }
+ } else {
+ wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
+ . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
+ }
+ }
+
+ if ( $this->ancestorStruct && $this->processingArray ) {
+ // Aka both an array and a struct. ( self::MODE_BAGSTRUCT )
+ $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val;
+ } elseif ( $this->ancestorStruct ) {
+ $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val;
+ } elseif ( $this->processingArray ) {
+ if ( $this->itemLang === false ) {
+ // normal array
+ $this->results['xmp-' . $info['map_group']][$finalName][] = $val;
+ } else {
+ // lang array.
+ $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val;
+ }
+ } else {
+ $this->results['xmp-' . $info['map_group']][$finalName] = $val;
+ }
+ }
+}
diff --git a/includes/media/XMPInfo.php b/includes/media/XMPInfo.php
new file mode 100644
index 00000000..1d580ff7
--- /dev/null
+++ b/includes/media/XMPInfo.php
@@ -0,0 +1,1139 @@
+<?php
+/**
+* This class is just a container for a big array
+* used by XMPReader to determine which XMP items to
+* extract.
+*/
+class XMPInfo {
+
+ /** get the items array
+ * @return Array XMP item configuration array.
+ */
+ public static function getItems ( ) {
+ if( !self::$ranHooks ) {
+ // This is for if someone makes a custom metadata extension.
+ // For example, a medical wiki might want to decode DICOM xmp properties.
+ wfRunHooks('XMPGetInfo', Array(&self::$items));
+ self::$ranHooks = true; // Only want to do this once.
+ }
+ return self::$items;
+ }
+
+ static private $ranHooks = false;
+
+ /**
+ * XMPInfo::$items keeps a list of all the items
+ * we are interested to extract, as well as
+ * information about the item like what type
+ * it is.
+ *
+ * Format is an array of namespaces,
+ * each containing an array of tags
+ * each tag is an array of information about the
+ * tag, including:
+ * * map_group - what group (used for precedence during conflicts)
+ * * mode - What type of item (self::MODE_SIMPLE usually, see above for all values)
+ * * validate - method to validate input. Could also post-process the input. A string value is assumed to be a static method of XMPValidate. Can also take a array( 'className', 'methodName' ).
+ * * choices - array of potential values (format of 'value' => true ). Only used with validateClosed
+ * * rangeLow and rangeHigh - alternative to choices for numeric ranges. Again for validateClosed only.
+ * * children - for MODE_STRUCT items, allowed children.
+ * * structPart - Indicates that this element can only appear as a member of a structure.
+ *
+ * currently this just has a bunch of exif values as this class is only half-done
+ */
+
+ static private $items = array(
+ 'http://ns.adobe.com/exif/1.0/' => array(
+ 'ApertureValue' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'BrightnessValue' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'CompressedBitsPerPixel' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'DigitalZoomRatio' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'ExposureBiasValue' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'ExposureIndex' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'ExposureTime' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'FlashEnergy' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational',
+ ),
+ 'FNumber' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'FocalLength' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'FocalPlaneXResolution' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'FocalPlaneYResolution' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'GPSAltitude' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational',
+ ),
+ 'GPSDestBearing' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'GPSDestDistance' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'GPSDOP' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'GPSImgDirection' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'GPSSpeed' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'GPSTrack' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'MaxApertureValue' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'ShutterSpeedValue' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ 'SubjectDistance' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational'
+ ),
+ /* Flash */
+ 'Flash' => array(
+ 'mode' => XMPReader::MODE_STRUCT,
+ 'children' => array(
+ 'Fired' => true,
+ 'Function' => true,
+ 'Mode' => true,
+ 'RedEyeMode' => true,
+ 'Return' => true,
+ ),
+ 'validate' => 'validateFlash',
+ 'map_group' => 'exif',
+ ),
+ 'Fired' => array(
+ 'map_group' => 'exif',
+ 'validate' => 'validateBoolean',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'Function' => array(
+ 'map_group' => 'exif',
+ 'validate' => 'validateBoolean',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'Mode' => array(
+ 'map_group' => 'exif',
+ 'validate' => 'validateClosed',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'choices' => array( '0' => true, '1' => true,
+ '2' => true, '3' => true ),
+ 'structPart'=> true,
+ ),
+ 'Return' => array(
+ 'map_group' => 'exif',
+ 'validate' => 'validateClosed',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'choices' => array( '0' => true,
+ '2' => true, '3' => true ),
+ 'structPart'=> true,
+ ),
+ 'RedEyeMode' => array(
+ 'map_group' => 'exif',
+ 'validate' => 'validateBoolean',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ /* End Flash */
+ 'ISOSpeedRatings' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateInteger'
+ ),
+ /* end rational things */
+ 'ColorSpace' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true, '65535' => true ),
+ ),
+ 'ComponentsConfiguration' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true, '2' => true, '3' => true, '4' => true,
+ '5' => true, '6' => true )
+ ),
+ 'Contrast' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '0' => true, '1' => true, '2' => true )
+ ),
+ 'CustomRendered' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '0' => true, '1' => true )
+ ),
+ 'DateTimeOriginal' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateDate',
+ ),
+ 'DateTimeDigitized' => array( /* xmp:CreateDate */
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateDate',
+ ),
+ /* todo: there might be interesting information in
+ * exif:DeviceSettingDescription, but need to find an
+ * example
+ */
+ 'ExifVersion' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'ExposureMode' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 2,
+ ),
+ 'ExposureProgram' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 8,
+ ),
+ 'FileSource' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '3' => true )
+ ),
+ 'FlashpixVersion' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'FocalLengthIn35mmFilm' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ ),
+ 'FocalPlaneResolutionUnit' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '2' => true, '3' => true ),
+ ),
+ 'GainControl' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 4,
+ ),
+ /* this value is post-processed out later */
+ 'GPSAltitudeRef' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '0' => true, '1' => true ),
+ ),
+ 'GPSAreaInformation' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'GPSDestBearingRef' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( 'T' => true, 'M' => true ),
+ ),
+ 'GPSDestDistanceRef' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( 'K' => true, 'M' => true,
+ 'N' => true ),
+ ),
+ 'GPSDestLatitude' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateGPS',
+ ),
+ 'GPSDestLongitude' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateGPS',
+ ),
+ 'GPSDifferential' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '0' => true, '1' => true ),
+ ),
+ 'GPSImgDirectionRef' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( 'T' => true, 'M' => true ),
+ ),
+ 'GPSLatitude' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateGPS',
+ ),
+ 'GPSLongitude' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateGPS',
+ ),
+ 'GPSMapDatum' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'GPSMeasureMode' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '2' => true, '3' => true )
+ ),
+ 'GPSProcessingMethod' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'GPSSatellites' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'GPSSpeedRef' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( 'K' => true, 'M' => true,
+ 'N' => true ),
+ ),
+ 'GPSStatus' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( 'A' => true, 'V' => true )
+ ),
+ 'GPSTimeStamp' => array(
+ 'map_group' => 'exif',
+ // Note: in exif, GPSDateStamp does not include
+ // the time, where here it does.
+ 'map_name' => 'GPSDateStamp',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateDate',
+ ),
+ 'GPSTrackRef' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( 'T' => true, 'M' => true )
+ ),
+ 'GPSVersionID' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'ImageUniqueID' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'LightSource' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ /* can't use a range, as it skips... */
+ 'choices' => array( '0' => true, '1' => true,
+ '2' => true, '3' => true, '4' => true,
+ '9' => true, '10' => true, '11' => true,
+ '12' => true, '13' => true,
+ '14' => true, '15' => true,
+ '17' => true, '18' => true,
+ '19' => true, '20' => true,
+ '21' => true, '22' => true,
+ '23' => true, '24' => true,
+ '255' => true,
+ ),
+ ),
+ 'MeteringMode' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 6,
+ 'choices' => array( '255' => true ),
+ ),
+ /* Pixel(X|Y)Dimension are rather useless, but for
+ * completeness since we do it with exif.
+ */
+ 'PixelXDimension' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ ),
+ 'PixelYDimension' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ ),
+ 'Saturation' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 2,
+ ),
+ 'SceneCaptureType' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 3,
+ ),
+ 'SceneType' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true ),
+ ),
+ // Note, 6 is not valid SensingMethod.
+ 'SensingMethod' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 1,
+ 'rangeHigh' => 5,
+ 'choices' => array( '7' => true, 8 => true ),
+ ),
+ 'Sharpness' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 2,
+ ),
+ 'SpectralSensitivity' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ // This tag should perhaps be displayed to user better.
+ 'SubjectArea' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateInteger',
+ ),
+ 'SubjectDistanceRange' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'rangeLow' => 0,
+ 'rangeHigh' => 3,
+ ),
+ 'SubjectLocation' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateInteger',
+ ),
+ 'UserComment' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_LANG,
+ ),
+ 'WhiteBalance' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '0' => true, '1' => true )
+ ),
+ ),
+ 'http://ns.adobe.com/tiff/1.0/' => array(
+ 'Artist' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'BitsPerSample' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateInteger',
+ ),
+ 'Compression' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true, '6' => true ),
+ ),
+ /* this prop should not be used in XMP. dc:rights is the correct prop */
+ 'Copyright' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_LANG,
+ ),
+ 'DateTime' => array( /* proper prop is xmp:ModifyDate */
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateDate',
+ ),
+ 'ImageDescription' => array( /* proper one is dc:description */
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_LANG,
+ ),
+ 'ImageLength' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ ),
+ 'ImageWidth' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ ),
+ 'Make' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'Model' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ /**** Do not extract this property
+ * It interferes with auto exif rotation.
+ * 'Orientation' => array(
+ * 'map_group' => 'exif',
+ * 'mode' => XMPReader::MODE_SIMPLE,
+ * 'validate' => 'validateClosed',
+ * 'choices' => array( '1' => true, '2' => true, '3' => true, '4' => true, 5 => true,
+ * '6' => true, '7' => true, '8' => true ),
+ *),
+ ******/
+ 'PhotometricInterpretation' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '2' => true, '6' => true ),
+ ),
+ 'PlanerConfiguration' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true, '2' => true ),
+ ),
+ 'PrimaryChromaticities' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateRational',
+ ),
+ 'ReferenceBlackWhite' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateRational',
+ ),
+ 'ResolutionUnit' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '2' => true, '3' => true ),
+ ),
+ 'SamplesPerPixel' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ ),
+ 'Software' => array( /* see xmp:CreatorTool */
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ /* ignore TransferFunction */
+ 'WhitePoint' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateRational',
+ ),
+ 'XResolution' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational',
+ ),
+ 'YResolution' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRational',
+ ),
+ 'YCbCrCoefficients' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateRational',
+ ),
+ 'YCbCrPositioning' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true, '2' => true ),
+ ),
+ 'YCbCrSubSampling' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateClosed',
+ 'choices' => array( '1' => true, '2' => true ),
+ ),
+ ),
+ 'http://ns.adobe.com/exif/1.0/aux/' => array(
+ 'Lens' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'SerialNumber' => array(
+ 'map_group' => 'exif',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'OwnerName' => array(
+ 'map_group' => 'exif',
+ 'map_name' => 'CameraOwnerName',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ ),
+ 'http://purl.org/dc/elements/1.1/' => array(
+ 'title' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'ObjectName',
+ 'mode' => XMPReader::MODE_LANG
+ ),
+ 'description' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'ImageDescription',
+ 'mode' => XMPReader::MODE_LANG
+ ),
+ 'contributor' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'dc-contributor',
+ 'mode' => XMPReader::MODE_BAG
+ ),
+ 'coverage' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'dc-coverage',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'creator' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'Artist', //map with exif Artist, iptc byline (2:80)
+ 'mode' => XMPReader::MODE_SEQ,
+ ),
+ 'date' => array(
+ 'map_group' => 'general',
+ // Note, not mapped with other date properties, as this type of date is
+ // non-specific: "A point or period of time associated with an event in
+ // the lifecycle of the resource"
+ 'map_name' => 'dc-date',
+ 'mode' => XMPReader::MODE_SEQ,
+ 'validate' => 'validateDate',
+ ),
+ /* Do not extract dc:format, as we've got better ways to determine mimetype */
+ 'identifier' => array(
+ 'map_group' => 'deprecated',
+ 'map_name' => 'Identifier',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'language' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'LanguageCode', /* mapped with iptc 2:135 */
+ 'mode' => XMPReader::MODE_BAG,
+ 'validate' => 'validateLangCode',
+ ),
+ 'publisher' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'dc-publisher',
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ // for related images/resources
+ 'relation' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'dc-relation',
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ 'rights' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'Copyright',
+ 'mode' => XMPReader::MODE_LANG,
+ ),
+ // Note: source is not mapped with iptc source, since iptc
+ // source describes the source of the image in terms of a person
+ // who provided the image, where this is to describe an image that the
+ // current one is based on.
+ 'source' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'dc-source',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'subject' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'Keywords', /* maps to iptc 2:25 */
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ 'type' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'dc-type',
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ ),
+ 'http://ns.adobe.com/xap/1.0/' => array(
+ 'CreateDate' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'DateTimeDigitized',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateDate',
+ ),
+ 'CreatorTool' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'Software',
+ 'mode' => XMPReader::MODE_SIMPLE
+ ),
+ 'Identifier' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ 'Label' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'ModifyDate' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'DateTime',
+ 'validate' => 'validateDate',
+ ),
+ 'MetadataDate' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ // map_name to be consistent with other date names.
+ 'map_name' => 'DateTimeMetadata',
+ 'validate' => 'validateDate',
+ ),
+ 'Nickname' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'Rating' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateRating',
+ ),
+ ),
+ 'http://ns.adobe.com/xap/1.0/rights/' => array(
+ 'Certificate' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'RightsCertificate',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'Marked' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'Copyrighted',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateBoolean',
+ ),
+ 'Owner' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'CopyrightOwner',
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ // this seems similar to dc:rights.
+ 'UsageTerms' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_LANG,
+ ),
+ 'WebStatement' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ ),
+ // XMP media management.
+ 'http://ns.adobe.com/xap/1.0/mm/' => array(
+ // if we extract the exif UniqueImageID, might
+ // as well do this too.
+ 'OriginalDocumentID' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ // It might also be useful to do xmpMM:LastURL
+ // and xmpMM:DerivedFrom as you can potentially,
+ // get the url of this document/source for this
+ // document. However whats more likely is you'd
+ // get a file:// url for the path of the doc,
+ // which is somewhat of a privacy issue.
+ ),
+ 'http://creativecommons.org/ns#' => array(
+ 'license' => array(
+ 'map_name' => 'LicenseUrl',
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'morePermissions' => array(
+ 'map_name' => 'MorePermissionsUrl',
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'attributionURL' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'AttributionUrl',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'attributionName' => array(
+ 'map_group' => 'general',
+ 'map_name' => 'PreferredAttributionName',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ ),
+ //Note, this property affects how jpeg metadata is extracted.
+ 'http://ns.adobe.com/xmp/note/' => array(
+ 'HasExtendedXMP' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ ),
+ /* Note, in iptc schemas, the legacy properties are denoted
+ * as deprecated, since other properties should used instead,
+ * and properties marked as deprecated in the standard are
+ * are marked as general here as they don't have replacements
+ */
+ 'http://ns.adobe.com/photoshop/1.0/' => array(
+ 'City' => array(
+ 'map_group' => 'deprecated',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'CityDest',
+ ),
+ 'Country' => array(
+ 'map_group' => 'deprecated',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'CountryDest',
+ ),
+ 'State' => array(
+ 'map_group' => 'deprecated',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'ProvinceOrStateDest',
+ ),
+ 'DateCreated' => array(
+ 'map_group' => 'deprecated',
+ // marking as deprecated as the xmp prop preferred
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'DateTimeOriginal',
+ 'validate' => 'validateDate',
+ // note this prop is an XMP, not IPTC date
+ ),
+ 'CaptionWriter' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'Writer',
+ ),
+ 'Instructions' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'SpecialInstructions',
+ ),
+ 'TransmissionReference' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'OriginalTransmissionRef',
+ ),
+ 'AuthorsPosition' => array(
+ /* This corresponds with 2:85
+ * By-line Title, which needs to be
+ * handled weirdly to correspond
+ * with iptc/exif. */
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE
+ ),
+ 'Credit' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'Source' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'Urgency' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'Category' => array(
+ // Note, this prop is deprecated, but in general
+ // group since it doesn't have a replacement.
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'iimCategory',
+ ),
+ 'SupplementalCategories' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_BAG,
+ 'map_name' => 'iimSupplementalCategory',
+ ),
+ 'Headline' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE
+ ),
+ ),
+ 'http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/' => array(
+ 'CountryCode' => array(
+ 'map_group' => 'deprecated',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'CountryCodeDest',
+ ),
+ 'IntellectualGenre' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ // Note, this is a six digit code.
+ // See: http://cv.iptc.org/newscodes/scene/
+ // Since these aren't really all that common,
+ // we just show the number.
+ 'Scene' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_BAG,
+ 'validate' => 'validateInteger',
+ 'map_name' => 'SceneCode',
+ ),
+ /* Note: SubjectCode should be an 8 ascii digits.
+ * it is not really an integer (has leading 0's,
+ * cannot have a +/- sign), but validateInteger
+ * will let it through.
+ */
+ 'SubjectCode' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_BAG,
+ 'map_name' => 'SubjectNewsCode',
+ 'validate' => 'validateInteger'
+ ),
+ 'Location' => array(
+ 'map_group' => 'deprecated',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'map_name' => 'SublocationDest',
+ ),
+ 'CreatorContactInfo' => array(
+ /* Note this maps to 2:118 in iim
+ * (Contact) field. However those field
+ * types are slightly different - 2:118
+ * is free form text field, where this
+ * is more structured.
+ */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_STRUCT,
+ 'map_name' => 'Contact',
+ 'children' => array(
+ 'CiAdrExtadr' => true,
+ 'CiAdrCity' => true,
+ 'CiAdrCtry' => true,
+ 'CiEmailWork' => true,
+ 'CiTelWork' => true,
+ 'CiAdrPcode' => true,
+ 'CiAdrRegion' => true,
+ 'CiUrlWork' => true,
+ ),
+ ),
+ 'CiAdrExtadr' => array( /* address */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiAdrCity' => array( /* city */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiAdrCtry' => array( /* country */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiEmailWork' => array( /* email (possibly separated by ',') */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiTelWork' => array( /* telephone */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiAdrPcode' => array( /* postal code */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiAdrRegion' => array( /* province/state */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CiUrlWork' => array( /* url. Multiple may be separated by comma. */
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ /* End contact info struct properties */
+ ),
+ 'http://iptc.org/std/Iptc4xmpExt/2008-02-29/' => array(
+ 'Event' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ ),
+ 'OrganisationInImageName' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_BAG,
+ 'map_name' => 'OrganisationInImage'
+ ),
+ 'PersonInImage' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_BAG,
+ ),
+ 'MaxAvailHeight' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ 'map_name' => 'OriginalImageHeight',
+ ),
+ 'MaxAvailWidth' => array(
+ 'map_group' => 'general',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'validate' => 'validateInteger',
+ 'map_name' => 'OriginalImageWidth',
+ ),
+ // LocationShown and LocationCreated are handled
+ // specially because they are hierarchical, but we
+ // also want to merge with the old non-hierarchical.
+ 'LocationShown' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_BAGSTRUCT,
+ 'children' => array(
+ 'WorldRegion' => true,
+ 'CountryCode' => true, /* iso code */
+ 'CountryName' => true,
+ 'ProvinceState' => true,
+ 'City' => true,
+ 'Sublocation' => true,
+ ),
+ ),
+ 'LocationCreated' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_BAGSTRUCT,
+ 'children' => array(
+ 'WorldRegion' => true,
+ 'CountryCode' => true, /* iso code */
+ 'CountryName' => true,
+ 'ProvinceState' => true,
+ 'City' => true,
+ 'Sublocation' => true,
+ ),
+ ),
+ 'WorldRegion' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CountryCode' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'CountryName' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ 'map_name' => 'Country',
+ ),
+ 'ProvinceState' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ 'map_name' => 'ProvinceOrState',
+ ),
+ 'City' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+ 'Sublocation' => array(
+ 'map_group' => 'special',
+ 'mode' => XMPReader::MODE_SIMPLE,
+ 'structPart'=> true,
+ ),
+
+ /* Other props that might be interesting but
+ * Not currently extracted:
+ * ArtworkOrObject, (info about objects in picture)
+ * DigitalSourceType
+ * RegistryId
+ */
+ ),
+
+ /* Plus props we might want to consider:
+ * (Note: some of these have unclear/incomplete definitions
+ * from the iptc4xmp standard).
+ * ImageSupplier (kind of like iptc source field)
+ * ImageSupplierId (id code for image from supplier)
+ * CopyrightOwner
+ * ImageCreator
+ * Licensor
+ * Various model release fields
+ * Property release fields.
+ */
+ );
+}
diff --git a/includes/media/XMPValidate.php b/includes/media/XMPValidate.php
new file mode 100644
index 00000000..0f1d375c
--- /dev/null
+++ b/includes/media/XMPValidate.php
@@ -0,0 +1,323 @@
+<?php
+/**
+* This contains some static methods for
+* validating XMP properties. See XMPInfo and XMPReader classes.
+*
+* Each of these functions take the same parameters
+* * an info array which is a subset of the XMPInfo::items array
+* * A value (passed as reference) to validate. This can be either a
+* simple value or an array
+* * A boolean to determine if this is validating a simple or complex values
+*
+* It should be noted that when an array is being validated, typically the validation
+* function is called once for each value, and then once at the end for the entire array.
+*
+* These validation functions can also be used to modify the data. See the gps and flash one's
+* for example.
+*
+* @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf starting at pg 28
+* @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf starting at pg 11
+*/
+class XMPValidate {
+ /**
+ * function to validate boolean properties ( True or False )
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateBoolean( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+ if ( $val !== 'True' && $val !== 'False' ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Expected True or False but got $val" );
+ $val = null;
+ }
+
+ }
+
+ /**
+ * function to validate rational properties ( 12/10 )
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateRational( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+ if ( !preg_match( '/^(?:-?\d+)\/(?:\d+[1-9]|[1-9]\d*)$/D', $val ) ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Expected rational but got $val" );
+ $val = null;
+ }
+
+ }
+
+ /**
+ * function to validate rating properties -1, 0-5
+ *
+ * if its outside of range put it into range.
+ *
+ * @see MWG spec
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateRating( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+ if ( !preg_match( '/^[-+]?\d*(?:\.?\d*)$/D', $val )
+ || !is_numeric($val)
+ ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Expected rating but got $val" );
+ $val = null;
+ return;
+ } else {
+ $nVal = (float) $val;
+ if ( $nVal < 0 ) {
+ // We do < 0 here instead of < -1 here, since
+ // the values between 0 and -1 are also illegal
+ // as -1 is meant as a special reject rating.
+ wfDebugLog( 'XMP', __METHOD__ . " Rating too low, setting to -1 (Rejected)");
+ $val = '-1';
+ return;
+ }
+ if ( $nVal > 5 ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Rating too high, setting to 5");
+ $val = '5';
+ return;
+ }
+ }
+ }
+
+ /**
+ * function to validate integers
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateInteger( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+ if ( !preg_match( '/^[-+]?\d+$/D', $val ) ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Expected integer but got $val" );
+ $val = null;
+ }
+
+ }
+
+ /**
+ * function to validate properties with a fixed number of allowed
+ * choices. (closed choice)
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateClosed( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+
+ //check if its in a numeric range
+ $inRange = false;
+ if ( isset( $info['rangeLow'] )
+ && isset( $info['rangeHigh'] )
+ && is_numeric( $val )
+ && ( intval( $val ) <= $info['rangeHigh'] )
+ && ( intval( $val ) >= $info['rangeLow'] )
+ ) {
+ $inRange = true;
+ }
+
+ if ( !isset( $info['choices'][$val] ) && !$inRange ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Expected closed choice, but got $val" );
+ $val = null;
+ }
+ }
+
+ /**
+ * function to validate and modify flash structure
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateFlash( $info, &$val, $standalone ) {
+ if ( $standalone ) {
+ // this only validates flash structs, not individual properties
+ return;
+ }
+ if ( !( isset( $val['Fired'] )
+ && isset( $val['Function'] )
+ && isset( $val['Mode'] )
+ && isset( $val['RedEyeMode'] )
+ && isset( $val['Return'] )
+ ) ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Flash structure did not have all the required components" );
+ $val = null;
+ } else {
+ $val = ( "\0" | ( $val['Fired'] === 'True' )
+ | ( intval( $val['Return'] ) << 1 )
+ | ( intval( $val['Mode'] ) << 3 )
+ | ( ( $val['Function'] === 'True' ) << 5 )
+ | ( ( $val['RedEyeMode'] === 'True' ) << 6 ) );
+ }
+ }
+
+ /**
+ * function to validate LangCode properties ( en-GB, etc )
+ *
+ * This is just a naive check to make sure it somewhat looks like a lang code.
+ *
+ * @see rfc 3066
+ * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf page 30 (section 8.2.2.5)
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateLangCode( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+ if ( !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $val) ) {
+ //this is a rather naive check.
+ wfDebugLog( 'XMP', __METHOD__ . " Expected Lang code but got $val" );
+ $val = null;
+ }
+
+ }
+
+ /**
+ * function to validate date properties, and convert to Exif format.
+ *
+ * @param $info Array information about current property
+ * @param &$val Mixed current value to validate. Converts to TS_EXIF as a side-effect.
+ * @param $standalone Boolean if this is a simple property or array
+ */
+ public static function validateDate( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ // this only validates standalone properties, not arrays, etc
+ return;
+ }
+ $res = array();
+ if ( !preg_match(
+ /* ahh! scary regex... */
+ '/^([0-3]\d{3})(?:-([01]\d)(?:-([0-3]\d)(?:T([0-2]\d):([0-6]\d)(?::([0-6]\d)(?:\.\d+)?)?([-+]\d{2}:\d{2}|Z)?)?)?)?$/D'
+ , $val, $res)
+ ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Expected date but got $val" );
+ $val = null;
+ } else {
+ /*
+ * $res is formatted as follows:
+ * 0 -> full date.
+ * 1 -> year, 2-> month, 3-> day, 4-> hour, 5-> minute, 6->second
+ * 7-> Timezone specifier (Z or something like +12:30 )
+ * many parts are optional, some aren't. For example if you specify
+ * minute, you must specify hour, day, month, and year but not second or TZ.
+ */
+
+ /*
+ * First of all, if year = 0000, Something is wrongish,
+ * so don't extract. This seems to happen when
+ * some programs convert between metadata formats.
+ */
+ if ( $res[1] === '0000' ) {
+ wfDebugLog( 'XMP', __METHOD__ . " Invalid date (year 0): $val" );
+ $val = null;
+ return;
+ }
+ //if month, etc unspecified, full out as 01.
+ $res[2] = isset( $res[2] ) ? $res[2] : '01'; //month
+ $res[3] = isset( $res[3] ) ? $res[3] : '01'; //day
+ if ( !isset( $res[4] ) ) { //hour
+ //just have the year month day
+ $val = $res[1] . ':' . $res[2] . ':' . $res[3];
+ return;
+ }
+ //if hour is set, so is minute or regex above will fail.
+ //Extra check for empty string necessary due to TZ but no second case.
+ $res[6] = isset( $res[6] ) && $res[6] != '' ? $res[6] : '00';
+
+ if ( !isset( $res[7] ) || $res[7] === 'Z' ) {
+ $val = $res[1] . ':' . $res[2] . ':' . $res[3]
+ . ' ' . $res[4] . ':' . $res[5] . ':' . $res[6];
+ return;
+ }
+
+ //do timezone processing. We've already done the case that tz = Z.
+
+ $unix = wfTimestamp( TS_UNIX, $res[1] . $res[2] . $res[3] . $res[4] . $res[5] . $res[6] );
+ $offset = intval( substr( $res[7], 1, 2 ) ) * 60 * 60;
+ $offset += intval( substr( $res[7], 4, 2 ) ) * 60;
+ if ( substr( $res[7], 0, 1 ) === '-' ) {
+ $offset = -$offset;
+ }
+ $val = wfTimestamp( TS_EXIF, $unix + $offset );
+ }
+
+ }
+
+ /** function to validate, and more importantly
+ * translate the XMP DMS form of gps coords to
+ * the decimal form we use.
+ *
+ * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf
+ * section 1.2.7.4 on page 23
+ *
+ * @param $info Array unused (info about prop)
+ * @param &$val String GPS string in either DDD,MM,SSk or
+ * or DDD,MM.mmk form
+ * @param $standalone Boolean if its a simple prop (should always be true)
+ */
+ public static function validateGPS ( $info, &$val, $standalone ) {
+ if ( !$standalone ) {
+ return;
+ }
+
+ $m = array();
+ if ( preg_match(
+ '/(\d{1,3}),(\d{1,2}),(\d{1,2})([NWSE])/D',
+ $val, $m )
+ ) {
+ $coord = intval( $m[1] );
+ $coord += intval( $m[2] ) * (1/60);
+ $coord += intval( $m[3] ) * (1/3600);
+ if ( $m[4] === 'S' || $m[4] === 'W' ) {
+ $coord = -$coord;
+ }
+ $val = $coord;
+ return;
+ } elseif ( preg_match(
+ '/(\d{1,3}),(\d{1,2}(?:.\d*)?)([NWSE])/D',
+ $val, $m )
+ ) {
+ $coord = intval( $m[1] );
+ $coord += floatval( $m[2] ) * (1/60);
+ if ( $m[3] === 'S' || $m[3] === 'W' ) {
+ $coord = -$coord;
+ }
+ $val = $coord;
+ return;
+
+ } else {
+ wfDebugLog( 'XMP', __METHOD__
+ . " Expected GPSCoordinate, but got $val." );
+ $val = null;
+ return;
+ }
+ }
+}