Fixes a bug that sometimes caused the plugin to fail

author NoDRM <no_drm123@protonmail.com>

Fri, 19 Nov 2021 11:42:29 +0000 (12:42 +0100)

committer NoDRM <no_drm123@protonmail.com>

Fri, 19 Nov 2021 11:44:10 +0000 (12:44 +0100)
author NoDRM <no_drm123@protonmail.com>
Fri, 19 Nov 2021 11:42:29 +0000 (12:42 +0100)
committer NoDRM <no_drm123@protonmail.com>
Fri, 19 Nov 2021 11:44:10 +0000 (12:44 +0100)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml

index 707a5deaec66e7282576c1b3f5d0df4286de0598..c9f3ab7f1b8c35a3147cbfc4daa41b0e62eaf03f 100644 (file)
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -10,7 +10,7 @@ jobs:
        - name: Checkout
          uses: actions/checkout@v2
        - name: Package
-        run: python3 make_release.py 10.0.0
+        run: python3 make_release.py 10.0.1
        - name: Upload
          uses: actions/upload-artifact@v2
          with: 
diff --git a/DeDRM_plugin/__init__.py b/DeDRM_plugin/__init__.py

index 778b84b53f3c097c002f5103beb9b664d4ac6b51..3be03f7428fafd2c32228ebe31a9a7b58120abb6 100644 (file)
--- a/DeDRM_plugin/__init__.py
+++ b/DeDRM_plugin/__init__.py
@@ -6,7 +6,7 @@
  # Copyright © 2021 NoDRM
  
  __license__   = 'GPL v3'
-__version__ = '10.0.0'
+__version__ = '10.0.1'
  __docformat__ = 'restructuredtext en'
  
  
@@ -79,6 +79,7 @@ __docformat__ = 'restructuredtext en'
  #   7.2.0 - Update for latest KFX changes, and Python 3 Obok fixes.
  #   7.2.1 - Whitespace!
  #  10.0.0 - First forked version by NoDRM. See CHANGELOG.md for details.
+#  10.0.1 - Fixes a bug in the watermark code.
  
  """
  Decrypt DRMed ebooks.
@@ -210,6 +211,8 @@ class DeDRM(FileTypePlugin):
          # It does stuff like de-obfuscating fonts (by calling checkFonts) 
          # or removing watermarks. 
  
+        postProcessStart = time.time()
+
          try: 
              import calibre_plugins.dedrm.prefs as prefs
              dedrmprefs = prefs.DeDRM_Prefs()
@@ -224,13 +227,17 @@ class DeDRM(FileTypePlugin):
                  # Remove Tolino's CDP watermark file
                  path_to_ebook = watermark.removeCDPwatermark(self, path_to_ebook) or path_to_ebook
  
-                # Remove watermarks (currently just Amazon) from the OPF file
+                # Remove watermarks (Amazon or LemonInk) from the OPF file
                  path_to_ebook = watermark.removeOPFwatermarks(self, path_to_ebook) or path_to_ebook
  
-                # Remove watermarks (currently just Adobe's resource ID) from all HTML and XHTML files
+                # Remove watermarks (Adobe or LemonInk) from all HTML and XHTML files
                  path_to_ebook = watermark.removeHTMLwatermarks(self, path_to_ebook) or path_to_ebook
  
-                return path_to_ebook
+                postProcessEnd = time.time()
+                print("{0} v{1}: Post-processing took {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION, postProcessEnd-postProcessStart))
+
+
+            return path_to_ebook
  
          except: 
              print("Error while checking settings")
diff --git a/DeDRM_plugin/epubwatermark.py b/DeDRM_plugin/epubwatermark.py

index 3139e7a3d36e8443a810ee7064da23472def7056..e6115b3b37cfb862871cd763b526e6a4dffb9ad2 100644 (file)
--- a/DeDRM_plugin/epubwatermark.py
+++ b/DeDRM_plugin/epubwatermark.py
@@ -29,6 +29,12 @@ def removeHTMLwatermarks(object, path_to_ebook):
          modded_names = []
          modded_contents = []
  
+        count_adept = 0
+
+        count_lemonink_invisible = 0
+        count_lemonink_visible = 0
+        lemonink_trackingID = None
+
          for file in namelist:
              if not (file.endswith('.html') or file.endswith('.xhtml') or file.endswith('.xml')):
                  continue
@@ -40,8 +46,33 @@ def removeHTMLwatermarks(object, path_to_ebook):
                  # Remove Adobe ADEPT watermarks
                  # Match optional newline at the beginning, then a "meta" tag with name = "Adept.expected.resource" or "Adept.resource"
                  # and either a "value" or a "content" element with an Adobe UUID
+                pre_remove = str_new
                  str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s*\/>', '', str_new)
                  str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s*\/>', '', str_new)
+
+                if (str_new != pre_remove):
+                    count_adept += 1
+
+                # Remove eLibri / LemonInk watermark
+                # Run this in a loop, as it is possible a file has been watermarked twice ...
+                while True: 
+                    pre_remove = str_new
+                    unique_id = re.search(r'<body[^>]+class="[^"]*(t0x[0-9a-fA-F]{25})[^"]*"[^>]*>', str_new)
+                    if (unique_id):
+                        lemonink_trackingID = unique_id.groups()[0]
+                        count_lemonink_invisible += 1
+                        str_new = re.sub(lemonink_trackingID, '', str_new)
+                        pre_remove = str_new
+                        pm = r'(<body[^>]+class="[^"]*"[^>]*>)'
+                        pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
+                        pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
+                        str_new = re.sub(pm, r'\1', str_new)
+
+                        if (str_new != pre_remove):
+                            count_lemonink_visible += 1
+                    else: 
+                        break
+
              except:
                  traceback.print_exc()
                  continue
@@ -51,6 +82,7 @@ def removeHTMLwatermarks(object, path_to_ebook):
  
              modded_names.append(file)
              modded_contents.append(str_new)
+
          
          if len(modded_names) == 0:
              # No file modified, return original
@@ -58,7 +90,7 @@ def removeHTMLwatermarks(object, path_to_ebook):
  
          if len(modded_names) != len(modded_contents):
              # Something went terribly wrong, return original
-            print("Watermark: Error during ADEPT watermark removal")
+            print("Watermark: Error during watermark removal")
              return path_to_ebook
  
          # Re-package with modified files:
@@ -105,12 +137,20 @@ def removeHTMLwatermarks(object, path_to_ebook):
              traceback.print_exc()
              return path_to_ebook
  
+        if (count_adept > 0):
+            print("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook.".format(count_adept))
+        
+        if (count_lemonink_invisible > 0 or count_lemonink_visible > 0):
+            print("Watermark: Successfully stripped {0} visible and {1} invisible LemonInk watermark(s) (\"{2}\") from ebook."
+                .format(count_lemonink_visible, count_lemonink_invisible, lemonink_trackingID))
+
+        return output
+
      except:
          traceback.print_exc()
          return path_to_ebook
          
-    print("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook.".format(len(modded_names)))
-    return output
+
  
  
  # Finds the main OPF file, then uses RegEx to remove watermarks
@@ -141,10 +181,27 @@ def removeOPFwatermarks(object, path_to_ebook):
              container_str = inf.read(opf_path).decode("utf-8")
              container_str_new = container_str
  
+            had_amazon = False
+            had_elibri = False
+
              # Remove Amazon hex watermarks
              # Match optional newline at the beginning, then spaces, then a "meta" tag with name = "Watermark" or "Watermark_(hex)" and a "content" element.
-            container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"Watermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>', '', container_str_new)
-            container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"Watermark(_\(hex\))?\"\s*\/>', '', container_str_new)
+            # This regex also matches DuMont watermarks with meta name="watermark", with the case-insensitive match on the "w" in watermark.
+            pre_remove = container_str_new
+            container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"[Ww]atermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>', '', container_str_new)
+            container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"[Ww]atermark(_\(hex\))?\"\s*\/>', '', container_str_new)
+            if pre_remove != container_str_new:
+                had_amazon = True
+
+            # Remove elibri / lemonink watermark
+            # Lemonink replaces all "id" fields in the opf with "idX_Y", with X being the watermark and Y being a number for that particular ID.
+            # This regex replaces all "idX_Y" IDs with "id_Y", removing the watermark IDs.
+            pre_remove = container_str_new
+            container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<\!\-\-\s*Wygenerowane przez elibri dla zamówienia numer [0-9a-fA-F]+\s*\-\-\>', '', container_str_new)
+            container_str_new = re.sub(r'\=\"id[0-9]+_([0-9]+)\"', r'="id_\1"', container_str_new)
+            if pre_remove != container_str_new:
+                had_elibri = True
+
          except:
              traceback.print_exc()
              return path_to_ebook
@@ -191,7 +248,11 @@ def removeOPFwatermarks(object, path_to_ebook):
              traceback.print_exc()
              return path_to_ebook
          
-        print("Watermark: Successfully stripped Amazon watermark from OPF file.")
+        if had_elibri:
+            print("Watermark: Successfully stripped eLibri watermark from OPF file.")
+        if had_amazon:
+            print("Watermark: Successfully stripped Amazon watermark from OPF file.")
+
          return output
author	NoDRM <no_drm123@protonmail.com>
	Fri, 19 Nov 2021 11:42:29 +0000 (12:42 +0100)
committer	NoDRM <no_drm123@protonmail.com>
	Fri, 19 Nov 2021 11:44:10 +0000 (12:44 +0100)
.github/workflows/main.yml		patch \| blob \| blame \| history
DeDRM_plugin/__init__.py		patch \| blob \| blame \| history
DeDRM_plugin/epubwatermark.py		patch \| blob \| blame \| history