Improve testing of decrypted text file. (And so decrypt badly formatted ePubs)

author Apprentice Harper <apprenticeharper@gmail.com>

Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)

committer Apprentice Harper <apprenticeharper@gmail.com>

Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
author Apprentice Harper <apprenticeharper@gmail.com>
Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
committer Apprentice Harper <apprenticeharper@gmail.com>
Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
diff --git a/Obok_calibre_plugin/obok_plugin.zip b/Obok_calibre_plugin/obok_plugin.zip

index b3116433c4954ed24e57543ea0ee1dc362f94f81..9cb32b18facb77a5ef39a0a8d2a466dae7f4616e 100644 (file)

Binary files a/Obok_calibre_plugin/obok_plugin.zip and b/Obok_calibre_plugin/obok_plugin.zip differ
diff --git a/Obok_calibre_plugin/obok_plugin/obok/obok.py b/Obok_calibre_plugin/obok_plugin/obok/obok.py

index 5050c7ab367c09abe51e80e08e88793efa5d6384..d2188e36e0859b853ce10a0a1a1a43171c344037 100644 (file)
--- a/Obok_calibre_plugin/obok_plugin/obok/obok.py
+++ b/Obok_calibre_plugin/obok_plugin/obok/obok.py
@@ -1,6 +1,9 @@
  #!/usr/bin/env python
  # -*- coding: utf-8 -*-
  
+# Version 3.2.5 December 2016
+# Improve detection of good text decryption.
+#
  # Version 3.2.4 December 2016
  # Remove incorrect support for Kobo Desktop under Wine
  #
@@ -585,6 +588,36 @@ class KoboFile(object):
          Returns True if the content was checked, False if it was not
          checked."""
          if self.mimetype == 'application/xhtml+xml':
+            # assume utf-8 with no BOM
+            textoffset = 0
+            stride = 1
+            print u"Checking text:{0}:".format(contents[:10])
+            # check for byte order mark
+            if contents[:3]=="\xef\xbb\xbf":
+                # seems to be utf-8 with BOM
+                print u"Could be utf-8 with BOM"
+                textoffset = 3
+            elif contents[:2]=="\xfe\xff":
+                # seems to be utf-16BE
+                print u"Could be  utf-16BE"
+                textoffset = 3
+                stride = 2
+            elif contents[:2]=="\xff\xfe":
+                # seems to be utf-16LE
+                print u"Could be  utf-16LE"
+                textoffset = 2
+                stride = 2
+            else:
+                print u"Perhaps utf-8 without BOM"
+                
+            # now check that the first few characters are in the ASCII range
+            for i in xrange(textoffset,textoffset+5*stride,stride):
+                if ord(contents[i])<32 or ord(contents[i])>127:
+                    # Non-ascii, so decryption probably failed
+                    print u"Bad character at {0}, value {1}".format(i,ord(contents[i]))
+                    raise ValueError
+            print u"Seems to be good text"
+            return True
              if contents[:5]=="<?xml" or contents[:8]=="\xef\xbb\xbf<?xml":
                  # utf-8
                  return True
author	Apprentice Harper <apprenticeharper@gmail.com>
	Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
committer	Apprentice Harper <apprenticeharper@gmail.com>
	Wed, 21 Dec 2016 06:33:34 +0000 (06:33 +0000)
Obok_calibre_plugin/obok_plugin.zip		patch \| blob \| blame \| history
Obok_calibre_plugin/obok_plugin/obok/obok.py		patch \| blob \| blame \| history