summaryrefslogtreecommitdiff
path: root/includes/media/XMP.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/media/XMP.php')
-rw-r--r--includes/media/XMP.php213
1 files changed, 120 insertions, 93 deletions
diff --git a/includes/media/XMP.php b/includes/media/XMP.php
index 7eb3d19e..cdbd5ab2 100644
--- a/includes/media/XMP.php
+++ b/includes/media/XMP.php
@@ -23,7 +23,7 @@
/**
* Class for reading xmp data containing properties relevant to
- * images, and spitting out an array that FormatExif accepts.
+ * images, and spitting out an array that FormatMetadata accepts.
*
* Note, this is not meant to recognize every possible thing you can
* encode in XMP. It should recognize all the properties we want.
@@ -34,12 +34,12 @@
*
* The public methods one would call in this class are
* - parse( $content )
- * Reads in xmp content.
- * Can potentially be called multiple times with partial data each time.
+ * Reads in xmp content.
+ * Can potentially be called multiple times with partial data each time.
* - parseExtended( $content )
- * Reads XMPExtended blocks (jpeg files only).
+ * Reads XMPExtended blocks (jpeg files only).
* - getResults
- * Outputs a results array.
+ * Outputs a results array.
*
* Note XMP kind of looks like rdf. They are not the same thing - XMP is
* encoded as a specific subset of rdf. This class can read XMP. It cannot
@@ -47,20 +47,38 @@
*
*/
class XMPReader {
+ /** @var array XMP item configuration array */
+ protected $items;
+
+ /** @var array Array to hold the current element (and previous element, and so on) */
+ private $curItem = array();
+
+ /** @var bool|string The structure name when processing nested structures. */
+ private $ancestorStruct = false;
+
+ /** @var bool|string Temporary holder for character data that appears in xmp doc. */
+ private $charContent = false;
+
+ /** @var array Stores the state the xmpreader is in (see MODE_FOO constants) */
+ private $mode = array();
+
+ /** @var array Array to hold results */
+ private $results = array();
+
+ /** @var bool If we're doing a seq or bag. */
+ private $processingArray = false;
- private $curItem = array(); // array to hold the current element (and previous element, and so on)
- private $ancestorStruct = false; // the structure name when processing nested structures.
- private $charContent = false; // temporary holder for character data that appears in xmp doc.
- private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants)
- private $results = array(); // array to hold results
- private $processingArray = false; // if we're doing a seq or bag.
- private $itemLang = false; // used for lang alts only
+ /** @var bool|string Used for lang alts only */
+ private $itemLang = false;
+ /** @var resource A resource handle for the XML parser */
private $xmlParser;
+
+ /** @var bool|string Character set like 'UTF-8' */
private $charset = false;
- private $extendedXMPOffset = 0;
- protected $items;
+ /** @var int */
+ private $extendedXMPOffset = 0;
/**
* These are various mode constants.
@@ -105,8 +123,8 @@ class XMPReader {
$this->items = XMPInfo::getItems();
$this->resetXMLParser();
-
}
+
/**
* Main use is if a single item has multiple xmp documents describing it.
* For example in jpeg's with extendedXMP
@@ -141,8 +159,8 @@ class XMPReader {
/** Get the result array. Do some post-processing before returning
* the array, and transform any metadata that is special-cased.
*
- * @return Array array of results as an array of arrays suitable for
- * FormatMetadata::getFormattedData().
+ * @return array Array of results as an array of arrays suitable for
+ * FormatMetadata::getFormattedData().
*/
public function getResults() {
// xmp-special is for metadata that affects how stuff
@@ -155,7 +173,7 @@ class XMPReader {
$data = $this->results;
- wfRunHooks( 'XMPGetResults', Array( &$data ) );
+ wfRunHooks( 'XMPGetResults', array( &$data ) );
if ( isset( $data['xmp-special']['AuthorsPosition'] )
&& is_string( $data['xmp-special']['AuthorsPosition'] )
@@ -237,10 +255,10 @@ class XMPReader {
* debug log, blanks result array and returns false.
*
* @param string $content XMP data
- * @param $allOfIt Boolean: If this is all the data (true) or if its split up (false). Default true
- * @param $reset Boolean: does xml parser need to be reset. Default false
+ * @param bool $allOfIt If this is all the data (true) or if its split up (false). Default true
+ * @param bool $reset Does xml parser need to be reset. Default false
* @throws MWException
- * @return Boolean success.
+ * @return bool Success.
*/
public function parse( $content, $allOfIt = true, $reset = false ) {
if ( $reset ) {
@@ -301,8 +319,10 @@ class XMPReader {
} catch ( MWException $e ) {
wfDebugLog( 'XMP', 'XMP parse error: ' . $e );
$this->results = array();
+
return false;
}
+
return true;
}
@@ -311,36 +331,43 @@ class XMPReader {
* @todo In serious need of testing
* @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20
* @param string $content XMPExtended block minus the namespace signature
- * @return Boolean If it succeeded.
+ * @return bool If it succeeded.
*/
public function parseExtended( $content ) {
// @todo FIXME: This is untested. Hard to find example files
// or programs that make such files..
$guid = substr( $content, 0, 32 );
if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] )
- || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) {
- wfDebugLog( 'XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
+ || $this->results['xmp-special']['HasExtendedXMP'] !== $guid
+ ) {
+ wfDebugLog( 'XMP', __METHOD__ .
+ " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" );
+
return false;
}
$len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) );
if ( !$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) {
wfDebugLog( 'XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' );
+
return false;
}
- // we're not very robust here. we should accept it in the wrong order. To quote
- // the xmp standard:
- // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the
- // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A
- // robust JPEG reader should tolerate the marker segments in any order."
+ // we're not very robust here. we should accept it in the wrong order.
+ // To quote the XMP standard:
+ // "A JPEG writer should write the ExtendedXMP marker segments in order,
+ // immediately following the StandardXMP. However, the JPEG standard
+ // does not require preservation of marker segment order. A robust JPEG
+ // reader should tolerate the marker segments in any order."
//
- // otoh the probability that an image will have more than 128k of metadata is rather low...
- // so the probability that it will have > 128k, and be in the wrong order is very low...
+ // otoh the probability that an image will have more than 128k of
+ // metadata is rather low... so the probability that it will have
+ // > 128k, and be in the wrong order is very low...
if ( $len['offset'] !== $this->extendedXMPOffset ) {
wfDebugLog( 'XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was '
. $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' );
+
return false;
}
@@ -361,6 +388,7 @@ class XMPReader {
}
wfDebugLog( 'XMP', __METHOD__ . 'Parsing a XMPExtended block' );
+
return $this->parse( $actualContent, $atEnd );
}
@@ -376,9 +404,9 @@ class XMPReader {
* <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
* and are processing the 0/10 bit.
*
- * @param $parser XMLParser reference to the xml parser
+ * @param XMLParser $parser XMLParser reference to the xml parser
* @param string $data Character data
- * @throws MWException on invalid data
+ * @throws MWException On invalid data
*/
function char( $parser, $data ) {
@@ -407,7 +435,6 @@ class XMPReader {
} else {
$this->charContent .= $data;
}
-
}
/** When we hit a closing element in MODE_IGNORE
@@ -436,7 +463,7 @@ class XMPReader {
* Or it could be if we hit the end element of a property
* of a compound data structure (like a member of an array).
*
- * @param string $elm namespace, space, and tag name.
+ * @param string $elm Namespace, space, and tag name.
*/
private function endElementModeSimple( $elm ) {
if ( $this->charContent !== false ) {
@@ -453,7 +480,6 @@ class XMPReader {
}
array_shift( $this->curItem );
array_shift( $this->mode );
-
}
/**
@@ -471,7 +497,7 @@ class XMPReader {
*
* This method is called when we hit the "</exif:ISOSpeedRatings>" tag.
*
- * @param string $elm namespace . space . tag name.
+ * @param string $elm Namespace . space . tag name.
* @throws MWException
*/
private function endElementNested( $elm ) {
@@ -482,7 +508,8 @@ class XMPReader {
&& !( $elm === self::NS_RDF . ' Description'
&& $this->mode[0] === self::MODE_STRUCT )
) {
- throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' );
+ throw new MWException( "nesting mismatch. got a </$elm> but expected a </" .
+ $this->curItem[0] . '>' );
}
// Validate structures.
@@ -499,7 +526,6 @@ class XMPReader {
if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
// This can happen if all the members of the struct failed validation.
wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." );
-
} elseif ( is_callable( $validate ) ) {
$val =& $this->results['xmp-' . $info['map_group']][$finalName];
call_user_func_array( $validate, array( $info, &$val, false ) );
@@ -538,7 +564,7 @@ class XMPReader {
* (For comparison, we call endElementModeSimple when we
* hit the "</rdf:li>")
*
- * @param string $elm namespace . ' ' . element name
+ * @param string $elm Namespace . ' ' . element name
* @throws MWException
*/
private function endElementModeLi( $elm ) {
@@ -552,6 +578,7 @@ class XMPReader {
if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) {
wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." );
+
return;
}
@@ -564,7 +591,6 @@ class XMPReader {
if ( $info['mode'] === self::MODE_LANG ) {
$this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang';
}
-
} else {
throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." );
}
@@ -578,13 +604,14 @@ class XMPReader {
* Qualifiers aren't all that common, and we don't do anything
* with them.
*
- * @param string $elm namespace and element
+ * @param string $elm Namespace and element
*/
private function endElementModeQDesc( $elm ) {
if ( $elm === self::NS_RDF . ' value' ) {
list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
$this->saveValue( $ns, $tag, $this->charContent );
+
return;
} else {
array_shift( $this->mode );
@@ -601,15 +628,15 @@ class XMPReader {
* Ignores the outer wrapping elements that are optional in
* xmp and have no meaning.
*
- * @param $parser XMLParser
- * @param string $elm namespace . ' ' . element name
+ * @param XMLParser $parser
+ * @param string $elm Namespace . ' ' . element name
* @throws MWException
*/
function endElement( $parser, $elm ) {
if ( $elm === ( self::NS_RDF . ' RDF' )
|| $elm === 'adobe:ns:meta/ xmpmeta'
- || $elm === 'adobe:ns:meta/ xapmeta' )
- {
+ || $elm === 'adobe:ns:meta/ xapmeta'
+ ) {
// ignore these.
return;
}
@@ -626,6 +653,7 @@ class XMPReader {
// that forgets the namespace on some things.
// (Luckily they are unimportant things).
wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
+
return;
}
@@ -684,7 +712,7 @@ class XMPReader {
* in which case we add it to the item stack, so we can ignore things
* that are nested, correctly.
*
- * @param string $elm namespace . ' ' . tag name
+ * @param string $elm Namespace . ' ' . tag name
*/
private function startElementModeIgnore( $elm ) {
if ( $elm === $this->curItem[0] ) {
@@ -697,8 +725,8 @@ class XMPReader {
* Start element in MODE_BAG (unordered array)
* this should always be <rdf:Bag>
*
- * @param string $elm namespace . ' ' . tag
- * @throws MWException if we have an element that's not <rdf:Bag>
+ * @param string $elm Namespace . ' ' . tag
+ * @throws MWException If we have an element that's not <rdf:Bag>
*/
private function startElementModeBag( $elm ) {
if ( $elm === self::NS_RDF . ' Bag' ) {
@@ -706,15 +734,14 @@ class XMPReader {
} else {
throw new MWException( "Expected <rdf:Bag> but got $elm." );
}
-
}
/**
* Start element in MODE_SEQ (ordered array)
* this should always be <rdf:Seq>
*
- * @param string $elm namespace . ' ' . tag
- * @throws MWException if we have an element that's not <rdf:Seq>
+ * @param string $elm Namespace . ' ' . tag
+ * @throws MWException If we have an element that's not <rdf:Seq>
*/
private function startElementModeSeq( $elm ) {
if ( $elm === self::NS_RDF . ' Seq' ) {
@@ -727,7 +754,6 @@ class XMPReader {
} else {
throw new MWException( "Expected <rdf:Seq> but got $elm." );
}
-
}
/**
@@ -741,8 +767,8 @@ class XMPReader {
* which are really only used for thumbnails, which
* we don't care about.
*
- * @param string $elm namespace . ' ' . tag
- * @throws MWException if we have an element that's not <rdf:Alt>
+ * @param string $elm Namespace . ' ' . tag
+ * @throws MWException If we have an element that's not <rdf:Alt>
*/
private function startElementModeLang( $elm ) {
if ( $elm === self::NS_RDF . ' Alt' ) {
@@ -750,7 +776,6 @@ class XMPReader {
} else {
throw new MWException( "Expected <rdf:Seq> but got $elm." );
}
-
}
/**
@@ -767,7 +792,7 @@ class XMPReader {
*
* This method is called when processing the <rdf:Description> element
*
- * @param string $elm namespace and tag names separated by space.
+ * @param string $elm Namespace and tag names separated by space.
* @param array $attribs Attributes of the element.
* @throws MWException
*/
@@ -784,15 +809,14 @@ class XMPReader {
} elseif ( $elm === self::NS_RDF . ' value' ) {
// This should not be here.
throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' );
-
} else {
// something else we don't recognize, like a qualifier maybe.
- wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] );
+ wfDebugLog( 'XMP', __METHOD__ .
+ " Encountered element <$elm> where only expecting character data as value of " .
+ $this->curItem[0] );
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $elm );
-
}
-
}
/**
@@ -806,7 +830,7 @@ class XMPReader {
* </exif:DigitalZoomRatio>
* Called when processing the <rdf:value> or <foo:someQualifier>.
*
- * @param string $elm namespace and tag name separated by a space.
+ * @param string $elm Namespace and tag name separated by a space.
*
*/
private function startElementModeQDesc( $elm ) {
@@ -827,8 +851,8 @@ class XMPReader {
* This is generally where most properties start.
*
* @param string $ns Namespace
- * @param string $tag tag name (without namespace prefix)
- * @param array $attribs array of attributes
+ * @param string $tag Tag name (without namespace prefix)
+ * @param array $attribs Array of attributes
* @throws MWException
*/
private function startElementModeInitial( $ns, $tag, $attribs ) {
@@ -846,6 +870,7 @@ class XMPReader {
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $ns . ' ' . $tag );
+
return;
}
$mode = $this->items[$ns][$tag]['mode'];
@@ -865,9 +890,9 @@ class XMPReader {
wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." );
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $ns . ' ' . $tag );
+
return;
}
-
}
// process attributes
$this->doAttribs( $attribs );
@@ -887,9 +912,9 @@ class XMPReader {
* <exif:Flash rdf:parseType='Resource'> <exif:Fired>True</exif:Fired>
* <exif:Mode>1</exif:Mode></exif:Flash>
*
- * @param string $ns namespace
- * @param string $tag tag name (no ns)
- * @param array $attribs array of attribs w/ values.
+ * @param string $ns Namespace
+ * @param string $tag Tag name (no ns)
+ * @param array $attribs Array of attribs w/ values.
* @throws MWException
*/
private function startElementModeStruct( $ns, $tag, $attribs ) {
@@ -897,8 +922,8 @@ class XMPReader {
if ( isset( $this->items[$ns][$tag] ) ) {
if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
- && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) )
- {
+ && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] )
+ ) {
// This assumes that we don't have inter-namespace nesting
// which we don't in all the properties we're interested in.
throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct
@@ -909,14 +934,15 @@ class XMPReader {
if ( $this->charContent !== false ) {
// Something weird.
// Should not happen in valid XMP.
- throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." );
+ throw new MWException( "tag <$tag> nested in non-whitespace characters (" .
+ $this->charContent . ")." );
}
} else {
array_unshift( $this->mode, self::MODE_IGNORE );
array_unshift( $this->curItem, $elm );
+
return;
}
-
}
if ( $ns === self::NS_RDF && $tag === 'Description' ) {
@@ -935,9 +961,9 @@ class XMPReader {
* </rdf:Seq> </exif:ISOSpeedRatings>
* This method is called when we hit the <rdf:li> element.
*
- * @param string $elm namespace . ' ' . tagname
+ * @param string $elm Namespace . ' ' . tagname
* @param array $attribs Attributes. (needed for BAGSTRUCTS)
- * @throws MWException if gets a tag other than <rdf:li>
+ * @throws MWException If gets a tag other than <rdf:li>
*/
private function startElementModeLi( $elm, $attribs ) {
if ( ( $elm ) !== self::NS_RDF . ' li' ) {
@@ -965,7 +991,6 @@ class XMPReader {
? $this->items[$curNS][$curTag]['map_name'] : $curTag;
$this->doAttribs( $attribs );
-
} else {
// Normal BAG or SEQ containing simple values.
array_unshift( $this->mode, self::MODE_SIMPLE );
@@ -974,7 +999,6 @@ class XMPReader {
array_unshift( $this->curItem, $this->curItem[0] );
$this->processingArray = true;
}
-
}
/**
@@ -987,17 +1011,17 @@ class XMPReader {
*
* This method is called when we hit the <rdf:li> element.
*
- * @param string $elm namespace . ' ' . tag
- * @param array $attribs array of elements (most importantly xml:lang)
- * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang
+ * @param string $elm Namespace . ' ' . tag
+ * @param array $attribs Array of elements (most importantly xml:lang)
+ * @throws MWException If gets a tag other than <rdf:li> or if no xml:lang
*/
private function startElementModeLiLang( $elm, $attribs ) {
if ( $elm !== self::NS_RDF . ' li' ) {
throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
}
if ( !isset( $attribs[self::NS_XML . ' lang'] )
- || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] ) )
- {
+ || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] )
+ ) {
throw new MWException( __METHOD__
. " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
}
@@ -1017,17 +1041,17 @@ class XMPReader {
* Generally just calls a helper based on what MODE we're in.
* Also does some initial set up for the wrapper element
*
- * @param $parser XMLParser
- * @param string $elm namespace "<space>" element
- * @param array $attribs attribute name => value
+ * @param XMLParser $parser
+ * @param string $elm Namespace "<space>" element
+ * @param array $attribs Attribute name => value
* @throws MWException
*/
function startElement( $parser, $elm, $attribs ) {
if ( $elm === self::NS_RDF . ' RDF'
|| $elm === 'adobe:ns:meta/ xmpmeta'
- || $elm === 'adobe:ns:meta/ xapmeta' )
- {
+ || $elm === 'adobe:ns:meta/ xapmeta'
+ ) {
/* ignore. */
return;
} elseif ( $elm === self::NS_RDF . ' Description' ) {
@@ -1049,6 +1073,7 @@ class XMPReader {
if ( strpos( $elm, ' ' ) === false ) {
// This probably shouldn't happen.
wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
+
return;
}
@@ -1104,23 +1129,24 @@ class XMPReader {
* Often the initial "<rdf:Description>" tag just has all the simple
* properties as attributes.
*
+ * @codingStandardsIgnoreStart Long line that cannot be broken
* @par Example:
* @code
* <rdf:Description rdf:about="" xmlns:exif="http://ns.adobe.com/exif/1.0/" exif:DigitalZoomRatio="0/10">
* @endcode
+ * @codingStandardsIgnoreEnd
*
- * @param array $attribs attribute=>value array.
+ * @param array $attribs Array attribute=>value
* @throws MWException
*/
private function doAttribs( $attribs ) {
-
// first check for rdf:parseType attribute, as that can change
// how the attributes are interperted.
if ( isset( $attribs[self::NS_RDF . ' parseType'] )
&& $attribs[self::NS_RDF . ' parseType'] === 'Resource'
- && $this->mode[0] === self::MODE_SIMPLE )
- {
+ && $this->mode[0] === self::MODE_SIMPLE
+ ) {
// this is equivalent to having an inner rdf:Description
$this->mode[0] = self::MODE_QDESC;
}
@@ -1158,9 +1184,9 @@ class XMPReader {
* $this->processingArray to determine what name to
* save the value under. (in addition to $tag).
*
- * @param string $ns namespace of tag this is for
- * @param string $tag tag name
- * @param string $val value to save
+ * @param string $ns Namespace of tag this is for
+ * @param string $tag Tag name
+ * @param string $val Value to save
*/
private function saveValue( $ns, $tag, $val ) {
@@ -1177,6 +1203,7 @@ class XMPReader {
// is to be consistent between here and validating structures.
if ( is_null( $val ) ) {
wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
+
return;
}
} else {