]> xmof Git - DeDRM.git/commitdiff
Improve testing of decrypted text file. (And so decrypt badly formatted ePubs)
authorApprentice Harper <apprenticeharper@gmail.com>
Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
committerApprentice Harper <apprenticeharper@gmail.com>
Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
Obok_calibre_plugin/obok_plugin.zip
Obok_calibre_plugin/obok_plugin/obok/obok.py

index b3116433c4954ed24e57543ea0ee1dc362f94f81..9cb32b18facb77a5ef39a0a8d2a466dae7f4616e 100644 (file)
Binary files a/Obok_calibre_plugin/obok_plugin.zip and b/Obok_calibre_plugin/obok_plugin.zip differ
index 5050c7ab367c09abe51e80e08e88793efa5d6384..d2188e36e0859b853ce10a0a1a1a43171c344037 100644 (file)
@@ -1,6 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+# Version 3.2.5 December 2016
+# Improve detection of good text decryption.
+#
 # Version 3.2.4 December 2016
 # Remove incorrect support for Kobo Desktop under Wine
 #
@@ -585,6 +588,36 @@ class KoboFile(object):
         Returns True if the content was checked, False if it was not
         checked."""
         if self.mimetype == 'application/xhtml+xml':
+            # assume utf-8 with no BOM
+            textoffset = 0
+            stride = 1
+            print u"Checking text:{0}:".format(contents[:10])
+            # check for byte order mark
+            if contents[:3]=="\xef\xbb\xbf":
+                # seems to be utf-8 with BOM
+                print u"Could be utf-8 with BOM"
+                textoffset = 3
+            elif contents[:2]=="\xfe\xff":
+                # seems to be utf-16BE
+                print u"Could be  utf-16BE"
+                textoffset = 3
+                stride = 2
+            elif contents[:2]=="\xff\xfe":
+                # seems to be utf-16LE
+                print u"Could be  utf-16LE"
+                textoffset = 2
+                stride = 2
+            else:
+                print u"Perhaps utf-8 without BOM"
+                
+            # now check that the first few characters are in the ASCII range
+            for i in xrange(textoffset,textoffset+5*stride,stride):
+                if ord(contents[i])<32 or ord(contents[i])>127:
+                    # Non-ascii, so decryption probably failed
+                    print u"Bad character at {0}, value {1}".format(i,ord(contents[i]))
+                    raise ValueError
+            print u"Seems to be good text"
+            return True
             if contents[:5]=="<?xml" or contents[:8]=="\xef\xbb\xbf<?xml":
                 # utf-8
                 return True