commit 67290f70d8143b18f24ffd6a1827def9bdc21108 Author: Akira Kakuto Date: Thu Dec 6 23:31:33 2018 +0000 support system poppler 0.72.0 git-svn-id: svn://tug.org/texlive/trunk/Build/source@49336 c570f23f-e606-0410-a88d-b1316a301751 diff --git a/texk/web2c/pdftexdir/pdftoepdf-poppler0.72.0.cc b/texk/web2c/pdftexdir/pdftoepdf-poppler0.72.0.cc new file mode 100644 index 000000000..630d3366d --- /dev/null +++ b/texk/web2c/pdftexdir/pdftoepdf-poppler0.72.0.cc @@ -0,0 +1,1113 @@ +/* +Copyright 1996-2017 Han The Thanh, + +This file is part of pdfTeX. + +pdfTeX is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +pdfTeX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program. If not, see . +*/ + +/* +This is based on the patch texlive-poppler-0.59.patch <2017-09-19> at +https://git.archlinux.org/svntogit/packages.git/plain/texlive-bin/trunk +by Arch Linux. A little modifications are made to avoid a crash for +some kind of pdf images, such as figure_missing.pdf in gnuplot. +The poppler should be 0.72.0 or newer versions. +POPPLER_VERSION should be defined. +*/ + +/* Do this early in order to avoid a conflict between + MINGW32 defining 'boolean' as 'unsigned char' and + defining Pascal's boolean as 'int'. +*/ +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef POPPLER_VERSION +#include +#include +#include +#include +#include +#define GString GooString +#else +#error POPPLER_VERSION should be defined. +#endif +#include + +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Link.h" +#include "Page.h" +#include "GfxFont.h" +#include "PDFDoc.h" +#include "GlobalParams.h" +#include "Error.h" + +// This file is mostly C and not very much C++; it's just used to interface +// the functions of xpdf, which are written in C++. + +extern "C" { +#include +#include + +// These functions from pdftex.web gets declared in pdftexcoerce.h in the +// usual web2c way, but we cannot include that file here because C++ +// does not allow it. +extern int getpdfsuppresswarningpagegroup(void); +extern integer getpdfsuppressptexinfo(void); +extern integer zround(double); +} + +// The prefix "PTEX" for the PDF keys is special to pdfTeX; +// this has been registered with Adobe by Hans Hagen. + +#define pdfkeyprefix "PTEX" + +#define MASK_SUPPRESS_PTEX_FULLBANNER 0x01 +#define MASK_SUPPRESS_PTEX_FILENAME 0x02 +#define MASK_SUPPRESS_PTEX_PAGENUMBER 0x04 +#define MASK_SUPPRESS_PTEX_INFODICT 0x08 + +// When copying the Resources of the selected page, all objects are copied +// recusively top-down. Indirect objects however are not fetched during +// copying, but get a new object number from pdfTeX and then will be +// appended into a linked list. Duplicates are checked and removed from the +// list of indirect objects during appending. + +enum InObjType { + objFont, + objFontDesc, + objOther +}; + +struct InObj { + Ref ref; // ref in original PDF + InObjType type; // object type + InObj *next; // next entry in list of indirect objects + int num; // new object number in output PDF + fd_entry *fd; // pointer to /FontDescriptor object structure + int enc_objnum; // Encoding for objFont + int written; // has it been written to output PDF? +}; + +struct UsedEncoding { + int enc_objnum; + GfxFont *font; + UsedEncoding *next; +}; + +static InObj *inObjList; +static UsedEncoding *encodingList; +static bool isInit = false; + +// -------------------------------------------------------------------- +// Maintain list of open embedded PDF files +// -------------------------------------------------------------------- + +struct PdfDocument { + char *file_name; + PDFDoc *doc; + XRef *xref; + InObj *inObjList; + int occurences; // number of references to the document; the doc can be + // deleted when this is negative + PdfDocument *next; +}; + +static PdfDocument *pdfDocuments = 0; + +static XRef *xref = 0; + +// Returns pointer to PdfDocument record for PDF file. +// Creates a new record if it doesn't exist yet. +// xref is made current for the document. + +static PdfDocument *find_add_document(char *file_name) +{ + PdfDocument *p = pdfDocuments; + while (p && strcmp(p->file_name, file_name) != 0) + p = p->next; + if (p) { + xref = p->xref; + (p->occurences)++; + return p; + } + p = new PdfDocument; + p->file_name = xstrdup(file_name); + p->xref = xref = 0; + p->occurences = 0; + GString *docName = new GString(p->file_name); + p->doc = new PDFDoc(docName); // takes ownership of docName + if (!p->doc->isOk() || !p->doc->okToPrint()) { + pdftex_fail("xpdf: reading PDF image failed"); + } + p->inObjList = 0; + p->next = pdfDocuments; + pdfDocuments = p; + return p; +} + +// Deallocate a PdfDocument with all its resources + +static void delete_document(PdfDocument * pdf_doc) +{ + PdfDocument **p = &pdfDocuments; + while (*p && *p != pdf_doc) + p = &((*p)->next); + // should not happen: + if (!*p) + return; + // unlink from list + *p = pdf_doc->next; + // free pdf_doc's resources + InObj *r, *n; + for (r = pdf_doc->inObjList; r != 0; r = n) { + n = r->next; + delete r; + } + xref = pdf_doc->xref; + delete pdf_doc->doc; + xfree(pdf_doc->file_name); + delete pdf_doc; +} + +// -------------------------------------------------------------------- + +static int addEncoding(GfxFont * gfont) +{ + UsedEncoding *n; + n = new UsedEncoding; + n->next = encodingList; + encodingList = n; + n->font = gfont; + n->enc_objnum = pdfnewobjnum(); + return n->enc_objnum; +} + +#define addFont(ref, fd, enc_objnum) \ + addInObj(objFont, ref, fd, enc_objnum) + +// addFontDesc is only used to avoid writing the original FontDescriptor +// from the PDF file. + +#define addFontDesc(ref, fd) \ + addInObj(objFontDesc, ref, fd, 0) + +#define addOther(ref) \ + addInObj(objOther, ref, 0, 0) + +static int addInObj(InObjType type, Ref ref, fd_entry * fd, int e) +{ + InObj *p, *q, *n = new InObj; + if (ref.num == 0) + pdftex_fail("PDF inclusion: invalid reference"); + n->ref = ref; + n->type = type; + n->next = 0; + n->fd = fd; + n->enc_objnum = e; + n->written = 0; + if (inObjList == 0) + inObjList = n; + else { + for (p = inObjList; p != 0; p = p->next) { + if (p->ref.num == ref.num && p->ref.gen == ref.gen) { + delete n; + return p->num; + } + q = p; + } + // it is important to add new objects at the end of the list, + // because new objects are being added while the list is being + // written out. + q->next = n; + } + if (type == objFontDesc) + n->num = get_fd_objnum(fd); + else + n->num = pdfnewobjnum(); + return n->num; +} + +#if 0 /* unusewd */ +static int getNewObjectNumber(Ref ref) +{ + InObj *p; + if (inObjList == 0) { + pdftex_fail("No objects copied yet"); + } else { + for (p = inObjList; p != 0; p = p->next) { + if (p->ref.num == ref.num && p->ref.gen == ref.gen) { + return p->num; + } + } + pdftex_fail("Object not yet copied: %i %i", ref.num, ref.gen); + } +#ifdef _MSC_VER + /* Never reached, but without __attribute__((noreturn)) for pdftex_fail() + MSVC 5.0 requires an int return value. */ + return -60000; +#endif +} +#endif + +static void copyObject(Object *); + +static void copyName(char *s) +{ + pdf_puts("/"); + for (; *s != 0; s++) { + if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' || + *s == '.' || *s == '-' || *s == '+') + pdfout(*s); + else + pdf_printf("#%.2X", *s & 0xFF); + } +} + +static void copyDictEntry(Object * obj, int i) +{ + Object obj1; + copyName((char *)obj->dictGetKey(i)); + pdf_puts(" "); + obj1 = obj->dictGetValNF(i); + copyObject(&obj1); + pdf_puts("\n"); +} + +static void copyDict(Object * obj) +{ + int i, l; + if (!obj->isDict()) + pdftex_fail("PDF inclusion: invalid dict type <%s>", + obj->getTypeName()); + for (i = 0, l = obj->dictGetLength(); i < l; ++i) + copyDictEntry(obj, i); +} + +static void copyFontDict(Object * obj, InObj * r) +{ + int i, l; + char *key; + if (!obj->isDict()) + pdftex_fail("PDF inclusion: invalid dict type <%s>", + obj->getTypeName()); + pdf_puts("<<\n"); + assert(r->type == objFont); // FontDescriptor is in fd_tree + for (i = 0, l = obj->dictGetLength(); i < l; ++i) { + key = (char *)obj->dictGetKey(i); + if (strncmp("FontDescriptor", key, strlen("FontDescriptor")) == 0 + || strncmp("BaseFont", key, strlen("BaseFont")) == 0 + || strncmp("Encoding", key, strlen("Encoding")) == 0) + continue; // skip original values + copyDictEntry(obj, i); + } + // write new FontDescriptor, BaseFont, and Encoding + pdf_printf("/FontDescriptor %d 0 R\n", get_fd_objnum(r->fd)); + pdf_printf("/BaseFont %d 0 R\n", get_fn_objnum(r->fd)); + pdf_printf("/Encoding %d 0 R\n", r->enc_objnum); + pdf_puts(">>"); +} + +static void copyStream(Stream * str) +{ + int c, c2 = 0; + str->reset(); + while ((c = str->getChar()) != EOF) { + pdfout(c); + c2 = c; + } + pdflastbyte = c2; +} + +static void copyProcSet(Object * obj) +{ + int i, l; + Object procset; + if (!obj->isArray()) + pdftex_fail("PDF inclusion: invalid ProcSet array type <%s>", + obj->getTypeName()); + pdf_puts("/ProcSet [ "); + for (i = 0, l = obj->arrayGetLength(); i < l; ++i) { + procset = obj->arrayGetNF(i); + if (!procset.isName()) + pdftex_fail("PDF inclusion: invalid ProcSet entry type <%s>", + procset.getTypeName()); + copyName((char *)procset.getName()); + pdf_puts(" "); + } + pdf_puts("]\n"); +} + +#define REPLACE_TYPE1C true + +static bool embeddableFont(Object * fontdesc) +{ + Object fontfile, ffsubtype; + + if (!fontdesc->isDict()) + return false; + fontfile = fontdesc->dictLookup("FontFile"); + if (fontfile.isStream()) + return true; + if (REPLACE_TYPE1C) { + fontfile = fontdesc->dictLookup("FontFile3"); + if (!fontfile.isStream()) + return false; + ffsubtype = fontfile.streamGetDict()->lookup("Subtype"); + return ffsubtype.isName() && !strcmp(ffsubtype.getName(), "Type1C"); + } + return false; +} + +static void copyFont(char *tag, Object * fontRef) +{ + Object fontdict, subtype, basefont, fontdescRef, fontdesc, charset, + stemV; + GfxFont *gfont; + fd_entry *fd; + fm_entry *fontmap; + // Check whether the font has already been embedded before analysing it. + InObj *p; + Ref ref = fontRef->getRef(); + for (p = inObjList; p; p = p->next) { + if (p->ref.num == ref.num && p->ref.gen == ref.gen) { + copyName(tag); + pdf_printf(" %d 0 R ", p->num); + return; + } + } + // Only handle included Type1 (and Type1C) fonts; anything else will be copied. + // Type1C fonts are replaced by Type1 fonts, if REPLACE_TYPE1C is true. + fontdict = fontRef->fetch(xref); + fontdesc = Object(objNull); + if (fontdict.isDict()) { + subtype = fontdict.dictLookup("Subtype"); + basefont = fontdict.dictLookup("BaseFont"); + fontdescRef = fontdict.dictLookupNF("FontDescriptor"); + if (fontdescRef.isRef()) { + fontdesc = fontdescRef.fetch(xref); + } + } + if (!fixedinclusioncopyfont && fontdict.isDict() + && subtype.isName() + && !strcmp(subtype.getName(), "Type1") + && basefont.isName() + && fontdescRef.isRef() + && fontdesc.isDict() + && embeddableFont(&fontdesc) + && (fontmap = lookup_fontmap((char *)basefont.getName())) != NULL) { + // round /StemV value, since the PDF input is a float + // (see Font Descriptors in PDF reference), but we only store an + // integer, since we don't want to change the struct. + stemV = fontdesc.dictLookup("StemV"); + fd = epdf_create_fontdescriptor(fontmap, zround(stemV.getNum())); + charset = fontdesc.dictLookup("CharSet"); + if (!charset.isNull() && + charset.isString() && is_subsetable(fontmap)) + epdf_mark_glyphs(fd, (char *)charset.getString()->c_str()); + else + embed_whole_font(fd); + addFontDesc(fontdescRef.getRef(), fd); + copyName(tag); + gfont = GfxFont::makeFont(xref, tag, fontRef->getRef(), + fontdict.getDict()); + pdf_printf(" %d 0 R ", addFont(fontRef->getRef(), fd, + addEncoding(gfont))); + } else { + copyName(tag); + pdf_puts(" "); + copyObject(fontRef); + } +} + +static void copyFontResources(Object * obj) +{ + Object fontRef; + int i, l; + if (!obj->isDict()) + pdftex_fail("PDF inclusion: invalid font resources dict type <%s>", + obj->getTypeName()); + pdf_puts("/Font << "); + for (i = 0, l = obj->dictGetLength(); i < l; ++i) { + fontRef = obj->dictGetValNF(i); + if (fontRef.isRef()) + copyFont((char *)obj->dictGetKey(i), &fontRef); + else if (fontRef.isDict()) { // some programs generate pdf with embedded font object + copyName((char *)obj->dictGetKey(i)); + pdf_puts(" "); + copyObject(&fontRef); + } + else + pdftex_fail("PDF inclusion: invalid font in reference type <%s>", + fontRef.getTypeName()); + } + pdf_puts(">>\n"); +} + +static void copyOtherResources(Object * obj, char *key) +{ + // copies all other resources (write_epdf handles Fonts and ProcSets), + + // if Subtype is present, it must be a name + if (strcmp("Subtype", key) == 0) { + if (!obj->isName()) { + pdftex_warn("PDF inclusion: Subtype in Resources dict is not a name" + " (key '%s', type <%s>); ignored.", + key, obj->getTypeName()); + return; + } + } else if (!obj->isDict()) { + //FIXME: Write the message only to the log file + pdftex_warn("PDF inclusion: invalid other resource which is no dict" + " (key '%s', type <%s>); ignored.", + key, obj->getTypeName()); + return; + } + copyName(key); + pdf_puts(" "); + copyObject(obj); +} + +// Function onverts double to string; very small and very large numbers +// are NOT converted to scientific notation. +// n must be a number or real conforming to the implementation limits +// of PDF as specified in appendix C.1 of the PDF Ref. +// These are: +// maximum value of ints is +2^32 +// maximum value of reals is +2^15 +// smalles values of reals is 1/(2^16) + +static char *convertNumToPDF(double n) +{ + static const int precision = 6; + static const int fact = (int) 1E6; // must be 10^precision + static const double epsilon = 0.5E-6; // 2epsilon must be 10^-precision + static char buf[64]; + // handle very small values: return 0 + if (fabs(n) < epsilon) { + buf[0] = '0'; + buf[1] = '\0'; + } else { + char ints[64]; + int bindex = 0, sindex = 0; + int ival, fval; + // handle the sign part if n is negative + if (n < 0) { + buf[bindex++] = '-'; + n = -n; + } + n += epsilon; // for rounding + // handle the integer part, simply with sprintf + ival = (int) floor(n); + n -= ival; + sprintf(ints, "%d", ival); + while (ints[sindex] != 0) + buf[bindex++] = ints[sindex++]; + // handle the fractional part up to 'precision' digits + fval = (int) floor(n * fact); + if (fval) { + // set a dot + buf[bindex++] = '.'; + sindex = bindex + precision; + buf[sindex--] = '\0'; + // fill up trailing zeros with the string terminator NULL + while (((fval % 10) == 0) && (sindex >= bindex)) { + buf[sindex--] = '\0'; + fval /= 10; + } + // fill up the fractional part back to front + while (sindex >= bindex) { + buf[sindex--] = (fval % 10) + '0'; + fval /= 10; + } + } else + buf[bindex++] = 0; + } + return (char *) buf; +} + +static void copyObject(Object * obj) +{ + Object obj1; + int i, l, c; + Ref ref; + char *p; + GString *s; + if (obj->isBool()) { + pdf_printf("%s", obj->getBool()? "true" : "false"); + } else if (obj->isInt()) { + pdf_printf("%i", obj->getInt()); + } else if (obj->isReal()) { + pdf_printf("%s", convertNumToPDF(obj->getReal())); + } else if (obj->isNum()) { + pdf_printf("%s", convertNumToPDF(obj->getNum())); + } else if (obj->isString()) { + s = (GooString *)obj->getString(); + p = (char *)s->c_str(); + l = s->getLength(); + if (strlen(p) == (unsigned int) l) { + pdf_puts("("); + for (; *p != 0; p++) { + c = (unsigned char) *p; + if (c == '(' || c == ')' || c == '\\') + pdf_printf("\\%c", c); + else if (c < 0x20 || c > 0x7F) + pdf_printf("\\%03o", c); + else + pdfout(c); + } + pdf_puts(")"); + } else { + pdf_puts("<"); + for (i = 0; i < l; i++) { + c = s->getChar(i) & 0xFF; + pdf_printf("%.2x", c); + } + pdf_puts(">"); + } + } else if (obj->isName()) { + copyName((char *)obj->getName()); + } else if (obj->isNull()) { + pdf_puts("null"); + } else if (obj->isArray()) { + pdf_puts("["); + for (i = 0, l = obj->arrayGetLength(); i < l; ++i) { + obj1 = obj->arrayGetNF(i); + if (!obj1.isName()) + pdf_puts(" "); + copyObject(&obj1); + } + pdf_puts("]"); + } else if (obj->isDict()) { + pdf_puts("<<\n"); + copyDict(obj); + pdf_puts(">>"); + } else if (obj->isStream()) { + pdf_puts("<<\n"); + copyDict(obj->getStream()->getDictObject()); + pdf_puts(">>\n"); + pdf_puts("stream\n"); + copyStream(obj->getStream()->getUndecodedStream()); + pdf_puts("\nendstream"); + } else if (obj->isRef()) { + ref = obj->getRef(); + if (ref.num == 0) { + pdftex_fail + ("PDF inclusion: reference to invalid object" + " (is the included pdf broken?)"); + } else + pdf_printf("%d 0 R", addOther(ref)); + } else { + pdftex_fail("PDF inclusion: type <%s> cannot be copied", + obj->getTypeName()); + } +} + +static void writeRefs() +{ + InObj *r; + for (r = inObjList; r != 0; r = r->next) { + if (!r->written) { + r->written = 1; + Object obj1 = xref->fetch(r->ref.num, r->ref.gen); + if (r->type == objFont) { + assert(!obj1.isStream()); + pdfbeginobj(r->num, 2); // \pdfobjcompresslevel = 2 is for this + copyFontDict(&obj1, r); + pdf_puts("\n"); + pdfendobj(); + } else if (r->type != objFontDesc) { // /FontDescriptor is written via write_fontdescriptor() + if (obj1.isStream()) + pdfbeginobj(r->num, 0); + else + pdfbeginobj(r->num, 2); // \pdfobjcompresslevel = 2 is for this + copyObject(&obj1); + pdf_puts("\n"); + pdfendobj(); + } + } + } +} + +static void writeEncodings() +{ + UsedEncoding *r, *n; + char *glyphNames[256], *s; + int i; + for (r = encodingList; r != 0; r = r->next) { + for (i = 0; i < 256; i++) { + if (r->font->isCIDFont()) { + pdftex_fail + ("PDF inclusion: CID fonts are not supported" + " (try to disable font replacement to fix this)"); + } + if ((s = (char *)((Gfx8BitFont *) r->font)->getCharName(i)) != 0) + glyphNames[i] = s; + else + glyphNames[i] = notdef; + } + epdf_write_enc(glyphNames, r->enc_objnum); + } + for (r = encodingList; r != 0; r = n) { + n = r->next; +#ifdef POPPLER_VERSION + r->font->decRefCnt(); +#else +#error POPPLER_VERSION should be defined. +#endif + delete r; + } +} + +// get the pagebox according to the pagebox_spec +static const PDFRectangle *get_pagebox(Page * page, int pagebox_spec) +{ + if (pagebox_spec == pdfboxspecmedia) + return page->getMediaBox(); + else if (pagebox_spec == pdfboxspeccrop) + return page->getCropBox(); + else if (pagebox_spec == pdfboxspecbleed) + return page->getBleedBox(); + else if (pagebox_spec == pdfboxspectrim) + return page->getTrimBox(); + else if (pagebox_spec == pdfboxspecart) + return page->getArtBox(); + else + pdftex_fail("PDF inclusion: unknown value of pagebox spec (%i)", + (int) pagebox_spec); + return page->getMediaBox(); // to make the compiler happy +} + + +// Reads various information about the PDF and sets it up for later inclusion. +// This will fail if the PDF version of the PDF is higher than +// minor_pdf_version_wanted or page_name is given and can not be found. +// It makes no sense to give page_name _and_ page_num. +// Returns the page number. + +int +read_pdf_info(char *image_name, char *page_name, int page_num, + int pagebox_spec, int minor_pdf_version_wanted, + int pdf_inclusion_errorlevel) +{ + PdfDocument *pdf_doc; + Page *page; + const PDFRectangle *pagebox; +#ifdef POPPLER_VERSION + int pdf_major_version_found, pdf_minor_version_found; +#else +#error POPPLER_VERSION should be defined. +#endif + // initialize + if (!isInit) { + globalParams = new GlobalParams(); + globalParams->setErrQuiet(false); + isInit = true; + } + // open PDF file + pdf_doc = find_add_document(image_name); + epdf_doc = (void *) pdf_doc; + + // check PDF version + // this works only for PDF 1.x -- but since any versions of PDF newer + // than 1.x will not be backwards compatible to PDF 1.x, pdfTeX will + // then have to changed drastically anyway. +#ifdef POPPLER_VERSION + pdf_major_version_found = pdf_doc->doc->getPDFMajorVersion(); + pdf_minor_version_found = pdf_doc->doc->getPDFMinorVersion(); + if ((pdf_major_version_found > 1) + || (pdf_minor_version_found > minor_pdf_version_wanted)) { + const char *msg = + "PDF inclusion: found PDF version <%d.%d>, but at most version <1.%d> allowed"; + if (pdf_inclusion_errorlevel > 0) { + pdftex_fail(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted); + } else if (pdf_inclusion_errorlevel < 0) { + ; /* do nothing */ + } else { /* = 0, give warning */ + pdftex_warn(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted); + } + } +#else +#error POPPLER_VERSION should be defined. +#endif + epdf_num_pages = pdf_doc->doc->getCatalog()->getNumPages(); + if (page_name) { + // get page by name + GString name(page_name); + LinkDest *link = pdf_doc->doc->findDest(&name); + if (link == 0 || !link->isOk()) + pdftex_fail("PDF inclusion: invalid destination <%s>", page_name); + Ref ref = link->getPageRef(); + page_num = pdf_doc->doc->getCatalog()->findPage(ref.num, ref.gen); + if (page_num == 0) + pdftex_fail("PDF inclusion: destination is not a page <%s>", + page_name); + delete link; + } else { + // get page by number + if (page_num <= 0 || page_num > epdf_num_pages) + pdftex_fail("PDF inclusion: required page does not exist <%i>", + epdf_num_pages); + } + // get the required page + page = pdf_doc->doc->getCatalog()->getPage(page_num); + + // get the pagebox (media, crop...) to use. + pagebox = get_pagebox(page, pagebox_spec); + if (pagebox->x2 > pagebox->x1) { + epdf_orig_x = pagebox->x1; + epdf_width = pagebox->x2 - pagebox->x1; + } else { + epdf_orig_x = pagebox->x2; + epdf_width = pagebox->x1 - pagebox->x2; + } + if (pagebox->y2 > pagebox->y1) { + epdf_orig_y = pagebox->y1; + epdf_height = pagebox->y2 - pagebox->y1; + } else { + epdf_orig_y = pagebox->y2; + epdf_height = pagebox->y1 - pagebox->y2; + } + + // get page rotation + epdf_rotate = page->getRotate() % 360; + if (epdf_rotate < 0) + epdf_rotate += 360; + + // page group + if (page->getGroup() != NULL) + epdf_has_page_group = 1; // only flag that page group is present; + // the actual object number will be + // generated in pdftex.web + else + epdf_has_page_group = 0; // no page group present + + pdf_doc->xref = pdf_doc->doc->getXRef(); + return page_num; +} + +// writes the current epf_doc. +// Here the included PDF is copied, so most errors that can happen during PDF +// inclusion will arise here. + +void write_epdf(void) +{ + Page *page; + Ref *pageRef; + Dict *pageDict; + Object contents, obj1, obj2, pageObj, dictObj; + Object groupDict; + bool writeSepGroup = false; + Object info; + char *key; + char s[256]; + int i, l; + int rotate; + double scale[6] = { 0, 0, 0, 0, 0, 0 }; + bool writematrix = false; + int suppress_ptex_info = getpdfsuppressptexinfo(); + static const char *pageDictKeys[] = { + "LastModified", + "Metadata", + "PieceInfo", + "SeparationInfo", +// "Group", +// "Resources", + NULL + }; + + PdfDocument *pdf_doc = (PdfDocument *) epdf_doc; + (pdf_doc->occurences)--; + xref = pdf_doc->xref; + inObjList = pdf_doc->inObjList; + encodingList = 0; + page = pdf_doc->doc->getCatalog()->getPage(epdf_selected_page); + pageRef = pdf_doc->doc->getCatalog()->getPageRef(epdf_selected_page); + pageObj = xref->fetch(pageRef->num, pageRef->gen); + pageDict = pageObj.getDict(); + rotate = page->getRotate(); + const PDFRectangle *pagebox; + // write the Page header + pdf_puts("/Type /XObject\n"); + pdf_puts("/Subtype /Form\n"); + pdf_puts("/FormType 1\n"); + + // write additional information + if ((suppress_ptex_info & MASK_SUPPRESS_PTEX_FILENAME) == 0) { + pdf_printf("/%s.FileName (%s)\n", pdfkeyprefix, + convertStringToPDFString(pdf_doc->file_name, + strlen(pdf_doc->file_name))); + } + if ((suppress_ptex_info & MASK_SUPPRESS_PTEX_PAGENUMBER) == 0) { + pdf_printf("/%s.PageNumber %i\n", pdfkeyprefix, (int) epdf_selected_page); + } + if ((suppress_ptex_info & MASK_SUPPRESS_PTEX_INFODICT) == 0) { + info = pdf_doc->doc->getDocInfoNF(); + if (info.isRef()) { + // the info dict must be indirect (PDF Ref p. 61) + pdf_printf("/%s.InfoDict ", pdfkeyprefix); + pdf_printf("%d 0 R\n", addOther(info.getRef())); + } + } + // get the pagebox (media, crop...) to use. + pagebox = get_pagebox(page, epdf_page_box); + + // handle page rotation + if (rotate != 0) { + if (rotate % 90 == 0) { + // this handles only the simple case: multiple of 90s but these + // are the only values allowed according to the reference + // (v1.3, p. 78). + // the image is rotated around its center. + // the /Rotate key is clockwise while the matrix is + // counterclockwise :-% + tex_printf(", page is rotated %d degrees", rotate); + switch (rotate) { + case 90: + scale[1] = -1; + scale[2] = 1; + scale[4] = pagebox->x1 - pagebox->y1; + scale[5] = pagebox->y1 + pagebox->x2; + writematrix = true; + break; + case 180: + scale[0] = scale[3] = -1; + scale[4] = pagebox->x1 + pagebox->x2; + scale[5] = pagebox->y1 + pagebox->y2; + writematrix = true; + break; // width and height are exchanged + case 270: + scale[1] = 1; + scale[2] = -1; + scale[4] = pagebox->x1 + pagebox->y2; + scale[5] = pagebox->y1 - pagebox->x1; + writematrix = true; + break; + } + if (writematrix) { // The matrix is only written if the image is rotated. + sprintf(s, "/Matrix [%.8f %.8f %.8f %.8f %.8f %.8f]\n", + scale[0], + scale[1], scale[2], scale[3], scale[4], scale[5]); + pdf_puts(stripzeros(s)); + } + } + } + + sprintf(s, "/BBox [%.8f %.8f %.8f %.8f]\n", + pagebox->x1, pagebox->y1, pagebox->x2, pagebox->y2); + pdf_puts(stripzeros(s)); + + // Metadata validity check (as a stream it must be indirect) + dictObj = pageDict->lookupNF("Metadata"); + if (!dictObj.isNull() && !dictObj.isRef()) + pdftex_warn("PDF inclusion: /Metadata must be indirect object"); + + // copy selected items in Page dictionary except Resources & Group + for (i = 0; pageDictKeys[i] != NULL; i++) { + dictObj = pageDict->lookupNF(pageDictKeys[i]); + if (!dictObj.isNull()) { + pdf_newline(); + pdf_printf("/%s ", pageDictKeys[i]); + copyObject(&dictObj); // preserves indirection + } + } + + // handle page group + dictObj = pageDict->lookupNF("Group"); + if (!dictObj.isNull()) { + if (pdfpagegroupval == 0) { + // another pdf with page group was included earlier on the + // same page; copy the Group entry as is. See manual for + // info on why this is a warning. + if (getpdfsuppresswarningpagegroup() == 0) { + pdftex_warn + ("PDF inclusion: multiple pdfs with page group included in a single page"); + } + pdf_newline(); + pdf_puts("/Group "); + copyObject(&dictObj); + } else { + // write Group dict as a separate object, since the Page dict also refers to it + dictObj = pageDict->lookup("Group"); + if (!dictObj.isDict()) + pdftex_fail("PDF inclusion: /Group dict missing"); + writeSepGroup = true; +/* +This part is only a single line + groupDict = Object(page->getGroup()); +in the original patch. In this case, however, pdftex crashes at +"delete pdf_doc->doc" in "delete_document()" for inclusion of some +kind of pdf images, for example, figure_missing.pdf in gnuplot. +A change + groupDict = Object(page->getGroup()).copy(); +does not improve the situation. +The changes below seem to work fine. +*/ +// begin modification + groupDict = pageDict->lookup("Group"); + const Dict& dic1 = page->getGroup(); + const Dict& dic2 = groupDict.getDict(); + // replace dic2 in groupDict with dic1 + l = dic2.getLength(); + for (i = 0; i < l; i++) { + groupDict.dictRemove(dic2.getKey(i)); + } + l = dic1.getLength(); + for (i = 0; i < l; i++) { + groupDict.dictAdd((const char *)copyString(dic1.getKey(i)), + dic1.getValNF(i)); + } +// end modification + pdf_printf("/Group %ld 0 R\n", (long)pdfpagegroupval); + } + } + + // write the Resources dictionary + if (page->getResourceDict() == NULL) { + // Resources can be missing (files without them have been spotted + // in the wild); in which case the /Resouces of the /Page will be used. + // "This practice is not recommended". + pdftex_warn + ("PDF inclusion: /Resources missing. 'This practice is not recommended' (PDF Ref)"); + } else { + Object *obj1 = page->getResourceDictObject(); + if (!obj1->isDict()) + pdftex_fail("PDF inclusion: invalid resources dict type <%s>", + obj1->getTypeName()); + pdf_newline(); + pdf_puts("/Resources <<\n"); + for (i = 0, l = obj1->dictGetLength(); i < l; ++i) { + obj2 = obj1->dictGetVal(i); + key = (char *)obj1->dictGetKey(i); + if (strcmp("Font", key) == 0) + copyFontResources(&obj2); + else if (strcmp("ProcSet", key) == 0) + copyProcSet(&obj2); + else + copyOtherResources(&obj2, (char *)key); + } + pdf_puts(">>\n"); + } + + // write the page contents + contents = page->getContents(); + if (contents.isStream()) { + + // Variant A: get stream and recompress under control + // of \pdfcompresslevel + // + // pdfbeginstream(); + // copyStream(contents->getStream()); + // pdfendstream(); + + // Variant B: copy stream without recompressing + // + obj1 = contents.streamGetDict()->lookup("F"); + if (!obj1.isNull()) { + pdftex_fail("PDF inclusion: Unsupported external stream"); + } + obj1 = contents.streamGetDict()->lookup("Length"); + assert(!obj1.isNull()); + pdf_puts("/Length "); + copyObject(&obj1); + pdf_puts("\n"); + obj1 = contents.streamGetDict()->lookup("Filter"); + if (!obj1.isNull()) { + pdf_puts("/Filter "); + copyObject(&obj1); + pdf_puts("\n"); + obj1 = contents.streamGetDict()->lookup("DecodeParms"); + if (!obj1.isNull()) { + pdf_puts("/DecodeParms "); + copyObject(&obj1); + pdf_puts("\n"); + } + } + pdf_puts(">>\nstream\n"); + copyStream(contents.getStream()->getUndecodedStream()); + pdfendstream(); + } else if (contents.isArray()) { + pdfbeginstream(); + for (i = 0, l = contents.arrayGetLength(); i < l; ++i) { + Object contentsobj = contents.arrayGet(i); + copyStream(contentsobj.getStream()); + if (i < l - 1) + pdf_newline(); // add a newline after each stream except the last + } + pdfendstream(); + } else { // the contents are optional, but we need to include an empty stream + pdfbeginstream(); + pdfendstream(); + } + + // write out all indirect objects + writeRefs(); + + // write out all used encodings (and delete list) + writeEncodings(); + + // write the Group dict if needed + if (writeSepGroup) { + pdfbeginobj(pdfpagegroupval, 2); + copyObject(&groupDict); + pdf_puts("\n"); + pdfendobj(); + pdfpagegroupval = 0; // only the 1st included pdf on a page gets its + // Group included in the Page dict + } + + // save object list, xref + pdf_doc->inObjList = inObjList; + pdf_doc->xref = xref; +} + +// Called when an image has been written and it's resources in image_tab are +// freed and it's not referenced anymore. + +void epdf_delete() +{ + PdfDocument *pdf_doc = (PdfDocument *) epdf_doc; + xref = pdf_doc->xref; + if (pdf_doc->occurences < 0) { + delete_document(pdf_doc); + } +} + +// Called when PDF embedding system is finalized. +// Now deallocate all remaining PdfDocuments. + +void epdf_check_mem() +{ + if (isInit) { + PdfDocument *p, *n; + for (p = pdfDocuments; p; p = n) { + n = p->next; + delete_document(p); + } + // see above for globalParams + delete globalParams; + } +} diff --git a/texk/web2c/pdftexdir/pdftosrc-poppler0.72.0.cc b/texk/web2c/pdftexdir/pdftosrc-poppler0.72.0.cc new file mode 100644 index 000000000..6089e68d9 --- /dev/null +++ b/texk/web2c/pdftexdir/pdftosrc-poppler0.72.0.cc @@ -0,0 +1,207 @@ +/* +Copyright 1996-2017 Han The Thanh, + +This file is part of pdfTeX. + +pdfTeX is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +pdfTeX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program. If not, see . +*/ + +/* +This is based on the patch texlive-poppler-0.59.patch <2017-09-19> at +https://git.archlinux.org/svntogit/packages.git/plain/texlive-bin/trunk +by Arch Linux. The poppler should be 0.72.0 or newer versions. +POPPLER_VERSION should be defined. +*/ + +#include + +#include +#include +#include +#include +#include + +#ifdef POPPLER_VERSION +#define GString GooString +#define xpdfVersion POPPLER_VERSION +#include +#include +#include +#include +#else +#error POPPLER_VERSION should be defined. +#endif +#include + +#include "Object.h" +#include "Stream.h" +#include "Lexer.h" +#include "Parser.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "GfxFont.h" +#include "PDFDoc.h" +#include "GlobalParams.h" +#include "Error.h" + +static XRef *xref = 0; + +int main(int argc, char *argv[]) +{ + char *p, buf[1024]; + PDFDoc *doc; + GString *fileName; + Stream *s; + Object srcStream, srcName, catalogDict; + FILE *outfile; + char *outname; + int objnum = 0, objgen = 0; + bool extract_xref_table = false; + int c; + fprintf(stderr, "pdftosrc version %s\n", xpdfVersion); + if (argc < 2) { + fprintf(stderr, + "Usage: pdftosrc []\n"); + exit(1); + } + fileName = new GString(argv[1]); + globalParams = new GlobalParams(); + doc = new PDFDoc(fileName); + if (!doc->isOk()) { + fprintf(stderr, "Invalid PDF file\n"); + exit(1); + } + if (argc >= 3) { + objnum = atoi(argv[2]); + if (argc >= 4) + objgen = atoi(argv[3]); + } + xref = doc->getXRef(); + catalogDict = xref->getCatalog(); + if (!catalogDict.isDict("Catalog")) { + fprintf(stderr, "No Catalog found\n"); + exit(1); + } + srcStream = Object(objNull); + if (objnum == 0) { + srcStream = catalogDict.dictLookup("SourceObject"); + static char const_SourceFile[] = "SourceFile"; + if (!srcStream.isStream(const_SourceFile)) { + fprintf(stderr, "No SourceObject found\n"); + exit(1); + } + srcName = srcStream.getStream()->getDict()->lookup("SourceName"); + if (!srcName.isString()) { + fprintf(stderr, "No SourceName found\n"); + exit(1); + } + outname = (char *)srcName.getString()->c_str(); + // We cannot free srcName, as objname shares its string. + // srcName.free(); + } else if (objnum > 0) { + srcStream = xref->fetch(objnum, objgen); + if (!srcStream.isStream()) { + fprintf(stderr, "Not a Stream object\n"); + exit(1); + } + sprintf(buf, "%s", fileName->c_str()); + if ((p = strrchr(buf, '.')) == 0) + p = strchr(buf, 0); + if (objgen == 0) + sprintf(p, ".%i", objnum); + else + sprintf(p, ".%i+%i", objnum, objgen); + outname = buf; + } else { // objnum < 0 means we are extracting the XRef table + extract_xref_table = true; + sprintf(buf, "%s", fileName->c_str()); + if ((p = strrchr(buf, '.')) == 0) + p = strchr(buf, 0); + sprintf(p, ".xref"); + outname = buf; + } + if (!(outfile = fopen(outname, "wb"))) { + fprintf(stderr, "Cannot open file \"%s\" for writing\n", outname); + exit(1); + } + if (extract_xref_table) { + int size = xref->getNumObjects(); + int i; + for (i = 0; i < size; i++) { + if (xref->getEntry(i)->offset == 0xffffffff) + break; + } + size = i; + fprintf(outfile, "xref\n"); + fprintf(outfile, "0 %i\n", size); + for (i = 0; i < size; i++) { + XRefEntry *e = xref->getEntry(i); + if (e->type != xrefEntryCompressed) + fprintf(outfile, "%.10lu %.5i %s\n", + (long unsigned) e->offset, e->gen, + (e->type == xrefEntryFree ? "f" : "n")); + else { // e->offset is the object number of the object stream + Stream *str; + Lexer *lexer; + Parser *parser; + Object objStr, obj1, obj2; + int nObjects, first, n; + int localOffset = 0; + unsigned int firstOffset; + + objStr = xref->fetch(e->offset, 0); + assert(objStr.isStream()); + obj1 = objStr.streamGetDict()->lookup("N"); + nObjects = obj1.getInt(); + obj1 = objStr.streamGetDict()->lookup("First"); + first = obj1.getInt(); + firstOffset = objStr.getStream()->getBaseStream()->getStart() + first; + + // parse the header: object numbers and offsets + objStr.streamReset(); + str = new EmbedStream(objStr.getStream(), Object(objNull), true, first); + lexer = new Lexer(xref, str); + parser = new Parser(xref, lexer, false); + for (n = 0; n < nObjects; ++n) { + obj1 = parser->getObj(); + obj2 = parser->getObj(); + if (n == e->gen) + localOffset = obj2.getInt(); + } + while (str->getChar() != EOF) ; + delete parser; + + fprintf(outfile, "%.10lu 00000 n\n", + (long unsigned)(firstOffset + localOffset)); + } + } + } else { + s = srcStream.getStream(); + s->reset(); + while ((c = s->getChar()) != EOF) + fputc(c, outfile); + } + if (objnum == 0) + fprintf(stderr, "Source file extracted to %s\n", outname); + else if (objnum > 0) + fprintf(stderr, "Stream object extracted to %s\n", outname); + else + fprintf(stderr, "Cross-reference table extracted to %s\n", outname); + fclose(outfile); + delete doc; + delete globalParams; +}