Blob Blame History Raw
diff -up qpdf-5.1.1/include/qpdf/QPDFObjectHandle.hh.CVE-2017-12595 qpdf-5.1.1/include/qpdf/QPDFObjectHandle.hh
--- qpdf-5.1.1/include/qpdf/QPDFObjectHandle.hh.CVE-2017-12595	2014-01-14 21:45:35.000000000 +0100
+++ qpdf-5.1.1/include/qpdf/QPDFObjectHandle.hh	2018-04-20 11:57:29.001931111 +0200
@@ -618,7 +618,6 @@ class QPDFObjectHandle
         std::string const& object_description,
         QPDFTokenizer& tokenizer, bool& empty,
         StringDecrypter* decrypter, QPDF* context,
-        bool in_array, bool in_dictionary,
         bool content_stream);
     static void parseContentStream_internal(
         QPDFObjectHandle stream, ParserCallbacks* callbacks);
diff -up qpdf-5.1.1/libqpdf/QPDFObjectHandle.cc.CVE-2017-12595 qpdf-5.1.1/libqpdf/QPDFObjectHandle.cc
--- qpdf-5.1.1/libqpdf/QPDFObjectHandle.cc.CVE-2017-12595	2018-04-20 11:57:28.999931128 +0200
+++ qpdf-5.1.1/libqpdf/QPDFObjectHandle.cc	2018-04-20 11:59:06.157119107 +0200
@@ -23,6 +23,8 @@
 #include <stdlib.h>
 #include <ctype.h>
 
+enum state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array };
+
 class TerminateParsing
 {
 };
@@ -777,8 +779,7 @@ QPDFObjectHandle::parseContentStream_int
     while (static_cast<size_t>(input->tell()) < length)
     {
         QPDFObjectHandle obj =
-            parseInternal(input, "content", tokenizer, empty,
-                          0, 0, false, false, true);
+            parseInternal(input, "content", tokenizer, empty, 0, 0, true);
         if (! obj.isInitialized())
         {
             // EOF
@@ -839,7 +840,7 @@ QPDFObjectHandle::parse(PointerHolder<In
                         StringDecrypter* decrypter, QPDF* context)
 {
     return parseInternal(input, object_description, tokenizer, empty,
-                         decrypter, context, false, false, false);
+                         decrypter, context, false);
 }
 
 QPDFObjectHandle
@@ -847,26 +848,25 @@ QPDFObjectHandle::parseInternal(PointerH
                                 std::string const& object_description,
                                 QPDFTokenizer& tokenizer, bool& empty,
                                 StringDecrypter* decrypter, QPDF* context,
-                                bool in_array, bool in_dictionary,
                                 bool content_stream)
 {
     empty = false;
-    if (in_dictionary && in_array)
-    {
-	// Although dictionaries and arrays arbitrarily nest, these
-	// variables indicate what is at the top of the stack right
-	// now, so they can, by definition, never both be true.
-	throw std::logic_error(
-	    "INTERNAL ERROR: parseInternal: in_dict && in_array");
-    }
 
     QPDFObjectHandle object;
 
-    qpdf_offset_t offset = input->tell();
-    std::vector<QPDFObjectHandle> olist;
-    bool done = false;
+    std::vector<std::vector<QPDFObjectHandle> > olist_stack;
+    olist_stack.push_back(std::vector<QPDFObjectHandle>());
+    std::vector<state_e> state_stack;
+    state_stack.push_back(st_top);
+    std::vector<qpdf_offset_t> offset_stack;
+    offset_stack.push_back(input->tell());
+   bool done = false;
     while (! done)
     {
+        std::vector<QPDFObjectHandle>& olist = olist_stack.back();
+        state_e state = state_stack.back();
+        qpdf_offset_t offset = offset_stack.back();
+
 	object = QPDFObjectHandle();
 
 	QPDFTokenizer::Token token =
@@ -877,8 +877,7 @@ QPDFObjectHandle::parseInternal(PointerH
           case QPDFTokenizer::tt_eof:
             if (content_stream)
             {
-                // Return uninitialized object to indicate EOF
-                return object;
+                state = st_eof;
             }
             else
             {
@@ -900,9 +899,9 @@ QPDFObjectHandle::parseInternal(PointerH
 	    break;
 
 	  case QPDFTokenizer::tt_array_close:
-	    if (in_array)
+	    if (state == st_array)
 	    {
-		done = true;
+                state = st_stop;
 	    }
 	    else
 	    {
@@ -915,9 +914,9 @@ QPDFObjectHandle::parseInternal(PointerH
 	    break;
 
 	  case QPDFTokenizer::tt_dict_close:
-	    if (in_dictionary)
+	    if (state == st_dictionary)
 	    {
-		done = true;
+                state = st_stop;
 	    }
 	    else
 	    {
@@ -930,15 +929,13 @@ QPDFObjectHandle::parseInternal(PointerH
 	    break;
 
 	  case QPDFTokenizer::tt_array_open:
-	    object = parseInternal(
-		input, object_description, tokenizer, empty,
-                decrypter, context, true, false, content_stream);
-	    break;
-
 	  case QPDFTokenizer::tt_dict_open:
-	    object = parseInternal(
-		input, object_description, tokenizer, empty,
-                decrypter, context, false, true, content_stream);
+            olist_stack.push_back(std::vector<QPDFObjectHandle>());
+            state = st_start;
+            offset_stack.push_back(input->tell());
+            state_stack.push_back(
+                (token.getType() == QPDFTokenizer::tt_array_open) ?
+                st_array : st_dictionary);
 	    break;
 
 	  case QPDFTokenizer::tt_bool:
@@ -963,11 +960,13 @@ QPDFObjectHandle::parseInternal(PointerH
 
 	  case QPDFTokenizer::tt_word:
 	    {
-		std::string const& value = token.getValue();
-		if ((value == "R") && (in_array || in_dictionary) &&
-		    (olist.size() >= 2) &&
-		    (olist.at(olist.size() - 1).isInteger()) &&
-		    (olist.at(olist.size() - 2).isInteger()))
+	std::string const& value = token.getValue();
+		if ((value == "R") && (state != st_top) &&
+                         (olist.size() >= 2) &&
+                         (! olist.at(olist.size() - 1).isIndirect()) &&
+                         (olist.at(olist.size() - 1).isInteger()) &&
+                         (! olist.at(olist.size() - 2).isIndirect()) &&
+                         (olist.at(olist.size() - 2).isInteger()))
 		{
                     if (context == 0)
                     {
@@ -984,8 +983,7 @@ QPDFObjectHandle::parseInternal(PointerH
 		    olist.pop_back();
 		    olist.pop_back();
 		}
-		else if ((value == "endobj") &&
-			 (! (in_array || in_dictionary)))
+		else if ((value == "endobj") && (state == st_top))
 		{
 		    // We just saw endobj without having read
 		    // anything.  Treat this as a null and do not move
@@ -1028,57 +1026,129 @@ QPDFObjectHandle::parseInternal(PointerH
 			  "unknown token type while reading object");
 	    break;
 	}
+  
+        if ((! object.isInitialized()) &&
+            (! ((state == st_start) ||
+                (state == st_stop) ||
+                (state == st_eof))))
+        {
+            throw std::logic_error(
+                "QPDFObjectHandle::parseInternal: "
+                "unexpected uninitialized object");
+        }
+ 
+        switch (state)
+        {
+          case st_eof:
+            if (state_stack.size() > 1)
+            {
+                throw QPDFExc(qpdf_e_damaged_pdf, 
+                             input->getName(),
+                             object_description,
+                             input->getLastOffset(),
+                             "parse error while reading object");
+            }
+            done = true;
+            // Leave object uninitialized to indicate EOF
+            break;
 
-	if (in_dictionary || in_array)
-	{
-	    if (! done)
-	    {
-		olist.push_back(object);
-	    }
-	}
-	else if (! object.isInitialized())
-	{
-	    throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
-			  object_description,
-			  input->getLastOffset(),
-			  "parse error while reading object");
-	}
-	else
-	{
-	    done = true;
-	}
-    }
+          case st_dictionary:
+          case st_array:
+            olist.push_back(object);
+            break;
 
-    if (in_array)
-    {
-	object = newArray(olist);
-    }
-    else if (in_dictionary)
-    {
-	// Convert list to map.  Alternating elements are keys.
-	std::map<std::string, QPDFObjectHandle> dict;
-	if (olist.size() % 2)
-	{
-	    QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
-	    throw QPDFExc(
-		qpdf_e_damaged_pdf, input->getName(),
-		object_description, input->getLastOffset(),
-		"dictionary ending here has an odd number of elements");
-	}
-	for (unsigned int i = 0; i < olist.size(); i += 2)
-	{
-	    QPDFObjectHandle key_obj = olist.at(i);
-	    QPDFObjectHandle val = olist.at(i + 1);
-	    if (! key_obj.isName())
-	    {
-		throw QPDFExc(
-		    qpdf_e_damaged_pdf,
-		    input->getName(), object_description, offset,
-        std::string("dictionary key is not not a name token"));        
-	    }
-	    dict[key_obj.getName()] = val;
-	}
-	object = newDictionary(dict);
+          case st_top:
+            done = true;
+            break;
+
+          case st_start:
+            break;
+
+          case st_stop:
+            if ((state_stack.size() < 2) || (olist_stack.size() < 2))
+            {
+                throw std::logic_error(
+                    "QPDFObjectHandle::parseInternal: st_stop encountered"
+                    " with insufficient elements in stack");
+            }
+            state_e old_state = state_stack.back();
+            state_stack.pop_back();
+            if (old_state == st_array)
+            {
+                object = newArray(olist);
+            }
+            else if (old_state == st_dictionary)
+            {
+                // Convert list to map. Alternating elements are keys.
+                // Attempt to recover more or less gracefully from
+                // invalid dictionaries.
+                std::set<std::string> names;
+                for (std::vector<QPDFObjectHandle>::iterator iter =
+                         olist.begin();
+                     iter != olist.end(); ++iter)
+                {
+                    if ((! (*iter).isIndirect()) && (*iter).isName())
+                    {
+                        names.insert((*iter).getName());
+                    }
+                }
+
+                std::map<std::string, QPDFObjectHandle> dict;
+                int next_fake_key = 1;
+                for (unsigned int i = 0; i < olist.size(); ++i)
+                {
+                    QPDFObjectHandle key_obj = olist.at(i);
+                    QPDFObjectHandle val;
+                    if (key_obj.isIndirect() || (! key_obj.isName()))
+                    {
+                        bool found_fake = false;
+                        std::string candidate;
+                        while (! found_fake)
+                        {
+                            candidate =
+                                "/QPDFFake" +
+                                QUtil::int_to_string(next_fake_key++);
+                            found_fake = (names.count(candidate) == 0);
+                            QTC::TC("qpdf", "QPDFObjectHandle found fake",
+                                    (found_fake ? 0 : 1));
+                        }
+                        throw QPDFExc(
+                                 qpdf_e_damaged_pdf,
+                                 input->getName(), object_description, offset,
+                                 "expected dictionary key but found"
+                                 " non-name object; inserting key " +
+                                 candidate);
+                        val = key_obj;
+                        key_obj = newName(candidate);
+                    }
+                    else if (i + 1 >= olist.size())
+                    {
+                        QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
+                        throw QPDFExc(
+                                 qpdf_e_damaged_pdf,
+                                 input->getName(), object_description, offset,
+                                 "dictionary ended prematurely; "
+                                 "using null as value for last key");
+                    }
+                    else
+                    {
+                        val = olist.at(++i);
+                    }
+                    dict[key_obj.getName()] = val;
+                }
+                object = newDictionary(dict);
+            }
+            olist_stack.pop_back();
+            offset_stack.pop_back();
+            if (state_stack.back() == st_top)
+            {
+                done = true;
+            }
+            else
+            {
+                olist_stack.back().push_back(object);
+            }
+        }
     }
 
     return object;
diff -up qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.out.CVE-2017-12595 qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.out
--- qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.out.CVE-2017-12595	2018-04-20 11:57:29.001931111 +0200
+++ qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.out	2018-04-20 11:57:29.001931111 +0200
@@ -0,0 +1,5 @@
+WARNING: issue-146.pdf: file is damaged
+WARNING: issue-146.pdf: can't find startxref
+WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
+WARNING: issue-146.pdf (trailer, file position 695): ignoring excessively deeply nested data structure
+issue-146.pdf: unable to find trailer dictionary while recovering damaged file
diff -up qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.pdf.CVE-2017-12595 qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.pdf
--- qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.pdf.CVE-2017-12595	2018-04-20 11:57:29.001931111 +0200
+++ qpdf-5.1.1/qpdf/qtest/qpdf/issue-146.pdf	2018-04-20 11:57:29.001931111 +0200
@@ -0,0 +1,20 @@
+%PDF-1.4 0 obj
+
+1 0 obj
+<</Type//Parent 4 0 R/Reces 6 0 R/Meox[092]/S/Trcy/CS/DeRGB/I true>>0 R>1>>
+endobj
+
+7 0 obj
+<</Type/log/s 4 /Opeion[1 0 R =X 0]
+/Lang(en-US)
+>>
+endo>
+endobj 9
+0n 
+trailer
ize 9/Root 7 0 R
+/Info 8 0 R
+/ID [ <F0E> ]
+/Dum /
+>>
+EOF