Add support for empty arrays (<>) in PDF objects. Fixes #183.

author NoDRM <no_drm123@protonmail.com>

Mon, 17 Oct 2022 15:12:03 +0000 (17:12 +0200)

committer NoDRM <no_drm123@protonmail.com>

Mon, 17 Oct 2022 15:13:41 +0000 (17:13 +0200)
author NoDRM <no_drm123@protonmail.com>
Mon, 17 Oct 2022 15:12:03 +0000 (17:12 +0200)
committer NoDRM <no_drm123@protonmail.com>
Mon, 17 Oct 2022 15:13:41 +0000 (17:13 +0200)
diff --git a/CHANGELOG.md b/CHANGELOG.md

index 09c6914ffbe36315791177f5f071a93aea84c72e..6886c0e2f5eadb2b024f14ccc8edb07561c4e482 100644 (file)
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -81,3 +81,4 @@ List of changes since the fork of Apprentice Harper's repository:
  - Re-enable Xrefs in exported PDF files since the file corruption bug is hopefully fixed. Please open bug reports if you encounter new issues with PDF files.
  - Fix a bug that would sometimes cause corrupted keys to be added when adding them through the config dialog (fixes #145, #134, #119, #116, #115, #109).
  - Update the README (fixes #136) to indicate that Apprentice Harper's version is no longer being updated.
+- Fix a bug where PDFs with empty arrays (`<>`) in a PDF object failed to decrypt, fixes #183.
diff --git a/DeDRM_plugin/ineptpdf.py b/DeDRM_plugin/ineptpdf.py

index 7fb997893f1b7e6ecaef432e611f4a449524c05b..adb99374c32e5d5b68d28adcb23278b60eb91b41 100755 (executable)
--- a/DeDRM_plugin/ineptpdf.py
+++ b/DeDRM_plugin/ineptpdf.py
@@ -270,6 +270,11 @@ END_STRING = re.compile(br'[()\\]')
  OCT_STRING = re.compile(br'[0-7]')
  ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 }
  
+class EmptyArrayValue(object):
+    def __str__(self):
+        return "<>"
+
+
  class PSBaseParser(object):
  
      '''
@@ -519,6 +524,13 @@ class PSBaseParser(object):
          if c == b'<':
              self.add_token(KEYWORD_DICT_BEGIN)
              i += 1
+        if c == b'>':
+            # Empty array without any contents. Why though?
+            # We need to add some dummy python object that will serialize to 
+            # nothing, otherwise the code removes the whole array.
+            self.add_token(EmptyArrayValue())
+            i += 1
+
          return (self.parse_main, i)
  
      def parse_wclose(self, s, i):
@@ -544,7 +556,6 @@ class PSBaseParser(object):
          else: 
              token = HEX_PAIR.sub(lambda m: bytes([int(m.group(0), 16)]),
                                                   SPC.sub(b'', self.token))
-
          self.add_token(token)
          return (self.parse_main, j)
  
@@ -1591,7 +1602,13 @@ class PDFDocument(object):
  
      def initialize_ebx_ignoble(self, keyb64, docid, param):
          self.is_printable = self.is_modifiable = self.is_extractable = True
-        key = keyb64.decode('base64')[:16]
+
+        try: 
+            key = keyb64.decode('base64')[:16]
+            # This will probably always error, but I'm not 100% sure, so lets leave the old code in.
+        except AttributeError: 
+            key = codecs.decode(keyb64.encode("ascii"), 'base64')[:16]
+
  
          length = int_value(param.get('Length', 0)) / 8
          rights = codecs.decode(str_value(param.get('ADEPT_LICENSE')), "base64")
@@ -2225,11 +2242,7 @@ class PDFSerializer(object):
          elif isinstance(obj, bytearray):
              self.write(b'(%s)' % self.escape_string(obj))
          elif isinstance(obj, bytes):
-            # I'm not 100% sure if this is correct, but it seems to fix some PDFs ...
-            # If needed, revert that change.
              self.write(b'<%s>' % binascii.hexlify(obj).upper())
-            print("ineptpdf.py: Unknown bytes element found - guessing.")            
-            print("If this PDF is corrupted and/or doesn't work, please open a bug report.")
          elif isinstance(obj, str):
              self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
          elif isinstance(obj, bool):
author	NoDRM <no_drm123@protonmail.com>
	Mon, 17 Oct 2022 15:12:03 +0000 (17:12 +0200)
committer	NoDRM <no_drm123@protonmail.com>
	Mon, 17 Oct 2022 15:13:41 +0000 (17:13 +0200)
CHANGELOG.md		patch \| blob \| blame \| history
DeDRM_plugin/ineptpdf.py		patch \| blob \| blame \| history