[Okular-devel] branches/KDE/3.5/kdegraphics/kpdf/xpdf/xpdf

Wed Jan 10 21:12:13 CET 2007

SVN commit 622127 by aacid:

Keep a set of the already read page tree nodes and each time we follow a new one check if we already processed it so we don't end in a infinite loop.
Fixes crash in malicious pdf found at http://projects.info-pull.com/moab/MOAB-06-01-2007.html
It may seem quite intensive but my measurements indicate the page tree processing is I/O bound as i got the same average times with and without this patch. Obviously if anyone wants to measure more it'll be appreciated.
CCMAIL: okular-devel at kde.org
CCMAIL: security at kde.org


 M  +2 -0      Array.h  
 M  +18 -7     Catalog.cc  
 M  +3 -1      Catalog.h

--- branches/KDE/3.5/kdegraphics/kpdf/xpdf/xpdf/Array.h #622126:622127
@@ -47,6 +47,8 @@
   Object *getNF(int i, Object *obj);
   GBool getString(int i, GString *string);
 
+  XRef *getXRef() { return xref; }
+
 private:
 
   XRef *xref;			// the xref table for this PDF file
--- branches/KDE/3.5/kdegraphics/kpdf/xpdf/xpdf/Catalog.cc #622126:622127
@@ -35,6 +35,7 @@
   Object obj, obj2;
   int numPages0;
   int i;
+  std::set< std::pair<int, int> > readNodes;
 
   ok = gTrue;
   xref = xrefA;
@@ -51,7 +52,9 @@
   }
 
   // read page tree
-  catDict.dictLookup("Pages", &pagesDict);
+  catDict.dictLookupNF("Pages", &pagesDict);
+  readNodes.insert( std::pair<int, int>(pagesDict.getRef().num, pagesDict.getRef().gen) );
+  pagesDict.fetch(xref, &pagesDict);
   // This should really be isDict("Pages"), but I've seen at least one
   // PDF file where the /Type entry is missing.
   if (!pagesDict.isDict()) {
@@ -76,7 +79,7 @@
     pageRefs[i].num = -1;
     pageRefs[i].gen = -1;
   }
-  numPages = readPageTree(pagesDict.getDict(), NULL, 0);
+  numPages = readPageTree(pagesDict.getDict(), NULL, 0, readNodes);
   if (numPages != numPages0) {
     error(-1, "Page count in top-level pages object is incorrect");
   }
@@ -191,7 +194,7 @@
   return s;
 }
 
-int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start) {
+int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start, std::set< std::pair<int, int> > &readNodes) {
   Object kids;
   Object kid;
   Object kidRef;
@@ -207,7 +210,9 @@
     goto err1;
   }
   for (i = 0; i < kids.arrayGetLength(); ++i) {
-    kids.arrayGet(i, &kid);
+    kids.arrayGetNF(i, &kid);
+    const Ref &ref = kid.getRef();
+    kid.fetch(kids.getArray()->getXRef(), &kid);
     if (kid.isDict("Page")) {
       attrs2 = new PageAttrs(attrs1, kid.getDict());
       page = new Page(xref, start+1, kid.getDict(), attrs2);
@@ -236,9 +241,15 @@
     // This should really be isDict("Pages"), but I've seen at least one
     // PDF file where the /Type entry is missing.
     } else if (kid.isDict()) {
-      if ((start = readPageTree(kid.getDict(), attrs1, start))
-	  < 0)
-	goto err2;
+      std::pair<int, int> node(ref.num, ref.gen);
+      std::pair< std::set< std::pair<int, int> >::iterator, bool> insertResult = readNodes.insert(node);
+      if (insertResult.second) {
+        if ((start = readPageTree(kid.getDict(), attrs1, start, readNodes))
+	    < 0)
+	  goto err2;
+      } else {
+       error(-1, "Kid object was already processed. The pdf is faulty.");
+      }
     } else {
       error(-1, "Kid object (page %d) is wrong type (%s)",
 	    start+1, kid.getTypeName());
--- branches/KDE/3.5/kdegraphics/kpdf/xpdf/xpdf/Catalog.h #622126:622127
@@ -11,6 +11,8 @@
 
 #include <aconf.h>
 
+#include <set>
+
 #ifdef USE_GCC_PRAGMAS
 #pragma interface
 #endif
@@ -128,7 +130,7 @@
   Object acroForm;		// AcroForm dictionary
   GBool ok;			// true if catalog is valid
 
-  int readPageTree(Dict *pages, PageAttrs *attrs, int start);
+  int readPageTree(Dict *pages, PageAttrs *attrs, int start, std::set< std::pair<int, int> > &readNodes);
   Object *findDestInTree(Object *tree, GString *name, Object *obj);
 };