Fix tons of PDF-related issues

author NoDRM <no_drm123@protonmail.com>

Sun, 7 Aug 2022 13:58:01 +0000 (15:58 +0200)

committer NoDRM <no_drm123@protonmail.com>

Sun, 7 Aug 2022 13:58:01 +0000 (15:58 +0200)
author NoDRM <no_drm123@protonmail.com>
Sun, 7 Aug 2022 13:58:01 +0000 (15:58 +0200)
committer NoDRM <no_drm123@protonmail.com>
Sun, 7 Aug 2022 13:58:01 +0000 (15:58 +0200)
diff --git a/CHANGELOG.md b/CHANGELOG.md

index da8525a68a5603a558ebc67ea363a23d28c77a42..a2599407e4a1a94916384cfc59ce56398f0e0a95 100644 (file)
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -73,8 +73,9 @@ List of changes since the fork of Apprentice Harper's repository:
  - Fix some more Calibre-6 bugs in the Obok plugin (should fix #114).
  - Fix a bug where invalid Adobe keys could cause the plugin to stop trying subsequent keys (partially fixes #109).
  - Fix DRM removal sometimes resetting the ZIP's internal "external_attr" value on Calibre 5 and newer.
-- Fix PDF decryption issues on Calibre 4 (hopefully fixes #104).
+- Fix tons of PDF decryption issues (hopefully fixes #104 and other PDF-related issues).
  - Small Python 2 / Calibre 4 bugfix for Obok.
  - Removing ancient AlfCrypto machine code libraries, moving all encryption / decryption to Python code.
  - General cleanup and removal of dead code.
  - Fix a bug where ADE account keys weren't automatically imported from the DeACSM plugin when importing a PDF file.
+- Re-enable Xrefs in exported PDF files since the file corruption bug is hopefully fixed. Please open bug reports if you encounter new issues with PDF files.
diff --git a/DeDRM_plugin/ineptpdf.py b/DeDRM_plugin/ineptpdf.py

index b34b38cf11c4a14aed0ae5aa10488f2702dd9e0d..621416cd145026adb4dbfd0040d65cad8222aedd 100755 (executable)
--- a/DeDRM_plugin/ineptpdf.py
+++ b/DeDRM_plugin/ineptpdf.py
@@ -117,7 +117,7 @@ def SHA256(message):
  # 1 = only if present in input
  # 2 = always
  
-GEN_XREF_STM = 0
+GEN_XREF_STM = 1
  
  # This is the value for the current document
  gen_xref_stm = False # will be set in PDFSerializer
@@ -565,7 +565,11 @@ class PSBaseParser(object):
          while 1:
              self.fillbuf()
              if eol:
-                c = bytes([self.buf[self.charpos]])
+                if sys.version_info[0] == 2: 
+                    c = self.buf[self.charpos]
+                else: 
+                    c = bytes([self.buf[self.charpos]])
+
                  # handle '\r\n'
                  if c == b'\n':
                      linebuf += c
@@ -575,10 +579,17 @@ class PSBaseParser(object):
              if m:
                  linebuf += self.buf[self.charpos:m.end(0)]
                  self.charpos = m.end(0)
-                if bytes([linebuf[-1]]) == b'\r':
-                    eol = True
-                else:
-                    break
+                if sys.version_info[0] == 2:
+                    if linebuf[-1] == b'\r':
+                        eol = True
+                    else:
+                        break
+                else: 
+                    if bytes([linebuf[-1]]) == b'\r':
+                        eol = True
+                    else:
+                        break
+                    
              else:
                  linebuf += self.buf[self.charpos:]
                  self.charpos = len(self.buf)
@@ -954,9 +965,14 @@ class PDFStream(PDFObject):
                      for i in range(0, len(data), columns+1):
                          pred = data[i]
                          ent1 = data[i+1:i+1+columns]
-                        if pred == 2:
-                            ent1 = b''.join(bytes([(a+b) & 255]) \
-                                           for (a,b) in zip(ent0,ent1))
+                        if sys.version_info[0] == 2:
+                            if pred == '\x02':
+                                ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
+                                               for (a,b) in zip(ent0,ent1))
+                        else: 
+                            if pred == 2:
+                                ent1 = b''.join(bytes([(a+b) & 255]) \
+                                            for (a,b) in zip(ent0,ent1))
                          buf += ent1
                          ent0 = ent1
                      data = buf
@@ -1070,8 +1086,6 @@ class PDFXRef(object):
          return (None, pos)
  
  
-##  PDFXRefStream
-##
  class PDFXRefStream(object):
  
      def __init__(self):
@@ -1404,7 +1418,10 @@ class PDFDocument(object):
          x = ARC4.new(hash).decrypt(Odata) # 4
          if R >= 3:
              for i in range(1,19+1):
-                k = b''.join(bytes([c ^ i]) for c in hash )
+                if sys.version_info[0] == 2:
+                    k = b''.join(chr(ord(c) ^ i) for c in hash )
+                else: 
+                    k = b''.join(bytes([c ^ i]) for c in hash )
                  x = ARC4.new(k).decrypt(x)
  
  
@@ -1462,7 +1479,10 @@ class PDFDocument(object):
              hash.update(docid[0]) # 3
              x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
              for i in range(1,19+1):
-                k = b''.join(bytes([c ^ i]) for c in key )
+                if sys.version_info[0] == 2:
+                    k = b''.join(chr(ord(c) ^ i) for c in key )
+                else: 
+                    k = b''.join(bytes([c ^ i]) for c in key )
                  x = ARC4.new(k).decrypt(x)
              u1 = x+x # 32bytes total
          if R == 2:
@@ -1490,8 +1510,8 @@ class PDFDocument(object):
  
          # check owner pass:
          retval = self.check_owner_password(password, docid, param)
-        if retval is True or retval is not None:
-            #print("Owner pass is valid - " + str(retval))
+        if retval is True or (retval is not False and retval is not None):
+            #print("Owner pass is valid")
              if retval is True:
                  self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param)
              else:
@@ -1500,7 +1520,7 @@ class PDFDocument(object):
          if self.decrypt_key is None or self.decrypt_key is True or self.decrypt_key is False:
              # That's not the owner password. Check if it's the user password.
              retval = self.check_user_password(password, docid, param)
-            if retval is True or retval is not None:
+            if retval is True or (retval is not False and retval is not None):
                  #print("User pass is valid")
                  if retval is True:
                      self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param)
@@ -1723,7 +1743,11 @@ class PDFDocument(object):
          data = data[16:]
          plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
          # remove pkcs#5 aes padding
-        cutter = -1 * plaintext[-1]
+        if sys.version_info[0] == 2: 
+            cutter = -1 * ord(plaintext[-1])
+        else: 
+            cutter = -1 * plaintext[-1]
+
          plaintext = plaintext[:cutter]
          return plaintext
  
@@ -2199,7 +2223,11 @@ class PDFSerializer(object):
          elif isinstance(obj, bytearray):
              self.write(b'(%s)' % self.escape_string(obj))
          elif isinstance(obj, bytes):
-            self.write(b'(%s)' % self.escape_string(obj))
+            # I'm not 100% sure if this is correct, but it seems to fix some PDFs ...
+            # If needed, revert that change.
+            self.write(b'<%s>' % binascii.hexlify(obj).upper())
+            print("ineptpdf.py: Unknown bytes element found - guessing.")            
+            print("If this PDF is corrupted and/or doesn't work, please open a bug report.")
          elif isinstance(obj, str):
              self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
          elif isinstance(obj, bool):
@@ -2226,6 +2254,20 @@ class PDFSerializer(object):
                  self.write(b'(deleted)')
              else:
                  data = obj.get_decdata()
+
+                # Fix length:
+                # We've decompressed and then recompressed the PDF stream.
+                # Depending on the algorithm, the implementation, and the compression level, 
+                # the resulting recompressed stream is unlikely to have the same length as the original.
+                # So we need to update the PDF object to contain the new proper length.
+
+                # Without this change, all PDFs exported by this plugin are slightly corrupted - 
+                # even though most if not all PDF readers can correct that on-the-fly.
+
+                if 'Length' in obj.dic: 
+                    obj.dic['Length'] = len(data)
+
+
                  self.serialize_object(obj.dic)
                  self.write(b'stream\n')
                  self.write(data)
author	NoDRM <no_drm123@protonmail.com>
	Sun, 7 Aug 2022 13:58:01 +0000 (15:58 +0200)
committer	NoDRM <no_drm123@protonmail.com>
	Sun, 7 Aug 2022 13:58:01 +0000 (15:58 +0200)
CHANGELOG.md		patch \| blob \| blame \| history
DeDRM_plugin/ineptpdf.py		patch \| blob \| blame \| history