diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2011-06-22 11:28:20 +0200 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2011-06-22 11:28:20 +0200 |
commit | 9db190c7e736ec8d063187d4241b59feaf7dc2d1 (patch) | |
tree | 46d1a0dee7febef5c2d57a9f7b972be16a163b3d /includes/DjVuImage.php | |
parent | 78677c7bbdcc9739f6c10c75935898a20e1acd9e (diff) |
update to MediaWiki 1.17.0
Diffstat (limited to 'includes/DjVuImage.php')
-rw-r--r-- | includes/DjVuImage.php | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php index 75df0fd5..cccb070a 100644 --- a/includes/DjVuImage.php +++ b/includes/DjVuImage.php @@ -1,8 +1,8 @@ <?php - /** + * DjVu image handler * - * Copyright (C) 2006 Brion Vibber <brion@pobox.com> + * Copyright © 2006 Brion Vibber <brion@pobox.com> * http://www.mediawiki.org/ * * This program is free software; you can redistribute it and/or modify @@ -20,6 +20,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * + * @file */ /** @@ -225,6 +226,8 @@ class DjVuImage { */ function retrieveMetaData() { global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; + wfProfileIn( __METHOD__ ); + if ( isset( $wgDjvuDump ) ) { # djvudump is faster as of version 3.5 # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 @@ -247,28 +250,38 @@ class DjVuImage { wfProfileIn( 'djvutxt' ); $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; wfDebug( __METHOD__.": $cmd\n" ); + $retval = ''; $txt = wfShellExec( $cmd, $retval ); wfProfileOut( 'djvutxt' ); if( $retval == 0) { - # Get rid of invalid UTF-8, strip control characters - if( is_callable( 'iconv' ) ) { - wfSuppressWarnings(); - $txt = iconv( "UTF-8","UTF-8//IGNORE", $txt ); - wfRestoreWarnings(); - } else { - $txt = UtfNormal::cleanUp( $txt ); - } + # Strip some control characters $txt = preg_replace( "/[\013\035\037]/", "", $txt ); - $txt = htmlspecialchars($txt); - $txt = preg_replace( "/\((page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*\"([^<]*?)\"\s*|)\)/s", "<PAGE value=\"$2\" />", $txt ); + $reg = <<<EOR + /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*" + ((?> # Text to match is composed of atoms of either: + \\\\. # - any escaped character + | # - any character different from " and \ + [^"\\\\]+ + )*?) + "\s*\) + | # Or page can be empty ; in this case, djvutxt dumps () + \(\s*()\)/sx +EOR; + $txt = preg_replace_callback( $reg, array( $this, 'pageTextCallback' ), $txt ); $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n"; $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml ); $xml = $xml . $txt. '</mw-djvu>' ; } } + wfProfileOut( __METHOD__ ); return $xml; } + function pageTextCallback( $matches ) { + # Get rid of invalid UTF-8, strip control characters + return '<PAGE value="' . htmlspecialchars( UtfNormal::cleanUp( $matches[1] ) ) . '" />'; + } + /** * Hack to temporarily work around djvutoxml bug */ |