Backport of: From 771c82623e8e1e0c92b8ca6f7c2b8a81ccbb60d3 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid <aacid@kde.org> Date: Mon, 3 Jul 2017 22:44:42 +0200 Subject: pdfunite: fix crash in broken documents Bug #101208 Index: poppler-0.24.5/poppler/PDFDoc.cc =================================================================== --- poppler-0.24.5.orig/poppler/PDFDoc.cc 2017-07-06 11:32:57.863183534 -0400 +++ poppler-0.24.5/poppler/PDFDoc.cc 2017-07-06 11:36:28.085807796 -0400 @@ -975,8 +975,22 @@ void PDFDoc::saveCompleteRewrite (OutStr } void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey, - CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen) + CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts) { + bool deleteSet = false; + if (!alreadyWrittenDicts) { + alreadyWrittenDicts = new std::set<Dict*>; + deleteSet = true; + } + + if (alreadyWrittenDicts->find(dict) != alreadyWrittenDicts->end()) { + error(errSyntaxWarning, -1, "PDFDoc::writeDictionnary: Found recursive dicts"); + if (deleteSet) delete alreadyWrittenDicts; + return; + } else { + alreadyWrittenDicts->insert(dict); + } + Object obj1; outStr->printf("<<"); for (int i=0; i<dict->getLength(); i++) { @@ -984,10 +998,14 @@ void PDFDoc::writeDictionnary (Dict* dic GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */); outStr->printf("/%s ", keyNameToPrint->getCString()); delete keyNameToPrint; - writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen); + writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); obj1.free(); } outStr->printf(">> "); + + if (deleteSet) { + delete alreadyWrittenDicts; + } } void PDFDoc::writeStream (Stream* str, OutStream* outStr) @@ -1090,7 +1108,7 @@ Goffset PDFDoc::writeObjectHeader (Ref * } void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey, - CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen) + CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts) { Array *array; Object obj1; @@ -1137,7 +1155,7 @@ void PDFDoc::writeObject (Object* obj, O outStr->printf("] "); break; case objDict: - writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen); + writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); break; case objStream: { @@ -1200,7 +1218,7 @@ void PDFDoc::writeObject (Object* obj, O } stream->getDict()->remove("DecodeParms"); - writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen); + writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); writeStream (stream,outStr); delete encStream; obj1.free(); @@ -1218,7 +1236,7 @@ void PDFDoc::writeObject (Object* obj, O } } } - writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen); + writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); writeRawStream (stream, outStr); } break; @@ -1353,7 +1371,7 @@ void PDFDoc::writeXRefTableTrailer(Dict { uxref->writeTableToFile( outStr, writeAllEntries ); outStr->printf( "trailer\r\n"); - writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0); + writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0, NULL); outStr->printf( "\r\nstartxref\r\n"); outStr->printf( "%lli\r\n", uxrefOffset); outStr->printf( "%%%%EOF\r\n"); @@ -1407,16 +1425,34 @@ void PDFDoc::writeHeader(OutStream *outS outStr->printf("%%\xE2\xE3\xCF\xD3\n"); } -void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset) +void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, std::set<Dict*> *alreadyMarkedDicts) { + bool deleteSet = false; + if (!alreadyMarkedDicts) { + alreadyMarkedDicts = new std::set<Dict*>; + deleteSet = true; + } + + if (alreadyMarkedDicts->find(dict) != alreadyMarkedDicts->end()) { + error(errSyntaxWarning, -1, "PDFDoc::markDictionnary: Found recursive dicts"); + if (deleteSet) delete alreadyMarkedDicts; + return; + } else { + alreadyMarkedDicts->insert(dict); + } + Object obj1; for (int i=0; i<dict->getLength(); i++) { - markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset); + markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, alreadyMarkedDicts); obj1.free(); } + + if (deleteSet) { + delete alreadyMarkedDicts; + } } -void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset) +void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, std::set<Dict*> *alreadyMarkedDicts) { Array *array; Object obj1; @@ -1430,12 +1466,12 @@ void PDFDoc::markObject (Object* obj, XR } break; case objDict: - markDictionnary (obj->getDict(), xRef, countRef, numOffset); + markDictionnary (obj->getDict(), xRef, countRef, numOffset, alreadyMarkedDicts); break; case objStream: { Stream *stream = obj->getStream(); - markDictionnary (stream->getDict(), xRef, countRef, numOffset); + markDictionnary (stream->getDict(), xRef, countRef, numOffset, alreadyMarkedDicts); } break; case objRef: @@ -1548,7 +1584,7 @@ void PDFDoc::replacePageDict(int pageNo, page.free(); } -void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset) +void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, std::set<Dict*> *alreadyMarkedDicts) { pageDict->remove("Names"); pageDict->remove("OpenAction"); @@ -1561,7 +1597,7 @@ void PDFDoc::markPageObjects(Dict *pageD strcmp(key, "Annots") != 0 && strcmp(key, "P") != 0 && strcmp(key, "Root") != 0) { - markObject(&value, xRef, countRef, numOffset); + markObject(&value, xRef, countRef, numOffset, alreadyMarkedDicts); } value.free(); } Index: poppler-0.24.5/poppler/PDFDoc.h =================================================================== --- poppler-0.24.5.orig/poppler/PDFDoc.h 2017-07-06 11:32:57.863183534 -0400 +++ poppler-0.24.5/poppler/PDFDoc.h 2017-07-06 11:38:51.431576995 -0400 @@ -246,13 +246,13 @@ public: // rewrite pageDict with MediaBox, CropBox and new page CTM void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, PDFRectangle *cropBox, Object *pageCTM); - void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset); + void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, std::set<Dict*> *alreadyMarkedDicts = NULL); GBool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, Guint numOffset, Guint oldPageNum, Guint newPageNum); void markAcroForm(Object *acrpForm, XRef *xRef, XRef *countRef, Guint numOffset, Guint oldPageNum, Guint newPageNum); // write all objects used by pageDict to outStr Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine = gFalse); static void writeObject (Object *obj, OutStream* outStr, XRef *xref, Guint numOffset, Guchar *fileKey, - CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen); + CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts = NULL); static void writeHeader(OutStream *outStr, int major, int minor); // Ownership goes to the caller @@ -263,21 +263,18 @@ public: private: // insert referenced objects in XRef - void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset); - void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset); + void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset, std::set<Dict*> *alreadyMarkedDicts); + void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset, std::set<Dict*> *alreadyMarkedDicts = NULL); static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey, - CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen); + CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts); // Write object header to current file stream and return its offset static Goffset writeObjectHeader (Ref *ref, OutStream* outStr); static void writeObjectFooter (OutStream* outStr); void writeObject (Object *obj, OutStream* outStr, Guchar *fileKey, CryptAlgorithm encAlgorithm, - int keyLength, int objNum, int objGen) - { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, objNum, objGen); } - void writeDictionnary (Dict* dict, OutStream* outStr, Guchar *fileKey, CryptAlgorithm encAlgorithm, - int keyLength, int objNum, int objGen) - { writeDictionnary(dict, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, objNum, objGen); } + int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts = NULL) + { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); } static void writeStream (Stream* str, OutStream* outStr); static void writeRawStream (Stream* str, OutStream* outStr); void writeXRefTableTrailer (Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,