summaryrefslogtreecommitdiff
path: root/includes/DjVuImage.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/DjVuImage.php')
-rw-r--r--includes/DjVuImage.php134
1 files changed, 123 insertions, 11 deletions
diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php
index 3b8a68ba..1e423565 100644
--- a/includes/DjVuImage.php
+++ b/includes/DjVuImage.php
@@ -1,11 +1,6 @@
<?php
+
/**
- * Support for detecting/validating DjVu image files and getting
- * some basic file metadata (resolution etc)
- *
- * File format docs are available in source package for DjVuLibre:
- * http://djvulibre.djvuzone.org/
- *
*
* Copyright (C) 2006 Brion Vibber <brion@pobox.com>
* http://www.mediawiki.org/
@@ -25,9 +20,17 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
- * @package MediaWiki
*/
+/**
+ * Support for detecting/validating DjVu image files and getting
+ * some basic file metadata (resolution etc)
+ *
+ * File format docs are available in source package for DjVuLibre:
+ * http://djvulibre.djvuzone.org/
+ *
+ * @addtogroup Media
+ */
class DjVuImage {
function __construct( $filename ) {
$this->mFilename = $filename;
@@ -68,6 +71,7 @@ class DjVuImage {
function dump() {
$file = fopen( $this->mFilename, 'rb' );
$header = fread( $file, 12 );
+ // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) );
echo "$chunk $chunkLength\n";
$this->dumpForm( $file, $chunkLength, 1 );
@@ -83,6 +87,7 @@ class DjVuImage {
if( $chunkHeader == '' ) {
break;
}
+ // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) );
echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n";
@@ -111,6 +116,7 @@ class DjVuImage {
if( strlen( $header ) < 16 ) {
wfDebug( __METHOD__ . ": too short file header\n" );
} else {
+ // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) );
if( $magic != 'AT&T' ) {
@@ -134,6 +140,7 @@ class DjVuImage {
if( strlen( $header ) < 8 ) {
return array( false, 0 );
} else {
+ // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
extract( unpack( 'a4chunk/Nlength', $header ) );
return array( $chunk, $length );
}
@@ -192,6 +199,7 @@ class DjVuImage {
return false;
}
+ // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
extract( unpack(
'nwidth/' .
'nheight/' .
@@ -214,17 +222,121 @@ class DjVuImage {
* @return string
*/
function retrieveMetaData() {
- global $wgDjvuToXML;
- if ( isset( $wgDjvuToXML ) ) {
- $cmd = $wgDjvuToXML . ' --without-anno --without-text ' .
+ global $wgDjvuToXML, $wgDjvuDump;
+ if ( isset( $wgDjvuDump ) ) {
+ # djvudump is faster as of version 3.5
+ # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583
+ wfProfileIn( 'djvudump' );
+ $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename );
+ $dump = wfShellExec( $cmd );
+ $xml = $this->convertDumpToXML( $dump );
+ wfProfileOut( 'djvudump' );
+ } elseif ( isset( $wgDjvuToXML ) ) {
+ wfProfileIn( 'djvutoxml' );
+ $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' .
wfEscapeShellArg( $this->mFilename );
$xml = wfShellExec( $cmd );
+ wfProfileOut( 'djvutoxml' );
} else {
$xml = null;
}
return $xml;
}
-
+
+ /**
+ * Hack to temporarily work around djvutoxml bug
+ */
+ function convertDumpToXML( $dump ) {
+ if ( strval( $dump ) == '' ) {
+ return false;
+ }
+
+ $xml = <<<EOT
+<?xml version="1.0" ?>
+<!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd">
+<DjVuXML>
+<HEAD></HEAD>
+<BODY>
+EOT;
+
+ $dump = str_replace( "\r", '', $dump );
+ $line = strtok( $dump, "\n" );
+ $m = false;
+ $good = false;
+ if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) {
+ # Single-page
+ if ( $this->parseFormDjvu( $line, $xml ) ) {
+ $good = true;
+ } else {
+ return false;
+ }
+ } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) {
+ # Multi-page
+ $parentLevel = strlen( $m[1] );
+ # Find DIRM
+ $line = strtok( "\n" );
+ while ( $line !== false ) {
+ $childLevel = strspn( $line, ' ' );
+ if ( $childLevel <= $parentLevel ) {
+ # End of chunk
+ break;
+ }
+
+ if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) {
+ wfDebug( "Indirect multi-page DjVu document, bad for server!\n" );
+ return false;
+ }
+ if ( preg_match( '/^ *FORM:DJVU/', $line ) ) {
+ # Found page
+ if ( $this->parseFormDjvu( $line, $xml ) ) {
+ $good = true;
+ } else {
+ return false;
+ }
+ }
+ $line = strtok( "\n" );
+ }
+ }
+ if ( !$good ) {
+ return false;
+ }
+
+ $xml .= "</BODY>\n</DjVuXML>\n";
+ return $xml;
+ }
+
+ function parseFormDjvu( $line, &$xml ) {
+ $parentLevel = strspn( $line, ' ' );
+ $line = strtok( "\n" );
+
+ # Find INFO
+ while ( $line !== false ) {
+ $childLevel = strspn( $line, ' ' );
+ if ( $childLevel <= $parentLevel ) {
+ # End of chunk
+ break;
+ }
+
+ if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) {
+ $xml .= Xml::tags( 'OBJECT',
+ array(
+ #'data' => '',
+ #'type' => 'image/x.djvu',
+ 'height' => $m[2],
+ 'width' => $m[1],
+ #'usemap' => '',
+ ),
+ "\n" .
+ Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" .
+ Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n"
+ ) . "\n";
+ return true;
+ }
+ $line = strtok( "\n" );
+ }
+ # Not found
+ return false;
+ }
}