tools v1.1

author Apprentice Alf <apprenticealf@gmail.com>

Sun, 14 Feb 2010 15:47:48 +0000 (15:47 +0000)

committer Apprentice Alf <apprenticealf@gmail.com>

Mon, 2 Mar 2015 07:32:21 +0000 (07:32 +0000)
author Apprentice Alf <apprenticealf@gmail.com>
Sun, 14 Feb 2010 15:47:48 +0000 (15:47 +0000)
committer Apprentice Alf <apprenticealf@gmail.com>
Mon, 2 Mar 2015 07:32:21 +0000 (07:32 +0000)
diff --git a/Adobe_EPUB_Tools/ineptepub.pyw b/Adobe_EPUB_Tools/ineptepub.pyw

index 7f8cb4563375d88bdf7fe73316064e5792a63d88..14e51ff57c14091def8d9bf9aa775279e4c17f37 100644 (file)
--- a/Adobe_EPUB_Tools/ineptepub.pyw
+++ b/Adobe_EPUB_Tools/ineptepub.pyw
@@ -62,7 +62,7 @@ class ASN1Parser(object):
          def __init__(self, bytes):
              self.bytes = bytes
              self.index = 0
-    
+
          def get(self, length):
              if self.index + length > len(self.bytes):
                  raise ASN1Error("Error decoding ASN.1")
@@ -72,22 +72,22 @@ class ASN1Parser(object):
                  x |= self.bytes[self.index]
                  self.index += 1
              return x
-    
+
          def getFixBytes(self, lengthBytes):
              bytes = self.bytes[self.index : self.index+lengthBytes]
              self.index += lengthBytes
              return bytes
-    
+
          def getVarBytes(self, lengthLength):
              lengthBytes = self.get(lengthLength)
              return self.getFixBytes(lengthBytes)
-    
+
          def getFixList(self, length, lengthList):
              l = [0] * lengthList
              for x in range(lengthList):
                  l[x] = self.get(length)
              return l
-    
+
          def getVarList(self, length, lengthLength):
              lengthList = self.get(lengthLength)
              if lengthList % length != 0:
@@ -97,19 +97,19 @@ class ASN1Parser(object):
              for x in range(lengthList):
                  l[x] = self.get(length)
              return l
-    
+
          def startLengthCheck(self, lengthLength):
              self.lengthCheck = self.get(lengthLength)
              self.indexCheck = self.index
-    
+
          def setLengthCheck(self, length):
              self.lengthCheck = length
              self.indexCheck = self.index
-    
+
          def stopLengthCheck(self):
              if (self.index - self.indexCheck) != self.lengthCheck:
                  raise ASN1Error("Error decoding ASN.1")
-    
+
          def atLengthCheck(self):
              if (self.index - self.indexCheck) < self.lengthCheck:
                  return False
@@ -162,7 +162,7 @@ class Decryptor(object):
              path = elem.get('URI', None)
              if path is not None:
                  encrypted.add(path)
-    
+
      def decompress(self, bytes):
          dc = zlib.decompressobj(-15)
          bytes = dc.decompress(bytes)
@@ -170,7 +170,7 @@ class Decryptor(object):
          if ex:
              bytes = bytes + ex
          return bytes
-    
+
      def decrypt(self, path, data):
          if path in self._encrypted:
              data = self._aes.decrypt(data)[16:]
@@ -336,5 +336,6 @@ def gui_main():
      return 0
  
  if __name__ == '__main__':
-    # sys.exit(cli_main())
+    if len(sys.argv) > 1:
+        sys.exit(cli_main())
      sys.exit(gui_main())
diff --git a/Topaz_Tools/lib/flatxml2html.py b/Topaz_Tools/lib/flatxml2html.py

index 9e3080bf0332b7de296631ea450f38c06f50619d..6047749bd7265555d2507518e8ea901435107fe3 100644 (file)
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@@ -346,35 +346,40 @@ class DocParser(object):
          if end == -1 :
              end = self.docSize
  
+        # seems some xml has last* coming before first* so we have to 
+        # handle any order
+        sp_first = -1
+        sp_last = -1
+
+        gl_first = -1
+        gl_last = -1
+
+        ws_first = -1
+        ws_last = -1
+
+        word_class = ''
+
          while (line < end) :
  
              (name, argres) = self.lineinDoc(line)
  
-            # handle both span and _span
              if name.endswith('span.firstWord') :
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('span.lastWord'):
-                    print 'Error: - incorrect _span ordering inside paragraph'
-                last = int(argres)
-                for wordnum in xrange(first, last):
-                    result.append(('ocr', wordnum))
-                line += 1
+                sp_first = int(argres)
+
+            elif name.endswith('span.lastWord') :
+                sp_last = int(argres)
  
              elif name.endswith('word.firstGlyph') :
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('word.lastGlyph'):
-                    print 'Error: - incorrect glyph ordering inside word in paragraph'
-                last = int(argres)
-                glyphList = []
-                for glyphnum in xrange(first, last):
-                    glyphList.append(glyphnum)
-                num = self.svgcount
-                self.glyphs_to_image(glyphList)
-                self.svgcount += 1
-                result.append(('svg', num))
-                line += 1
+                gl_first = int(argres)
+
+            elif name.endswith('word.lastGlyph') :
+                gl_last = int(argres)
+
+            elif name.endswith('word_semantic.firstWord'):
+                ws_first = int(argres)
+
+            elif name.endswith('word_semantic.lastWord'):
+                ws_last = int(argres)
  
              elif name.endswith('word.class'):
                 (cname, space) = argres.split('-',1)
@@ -386,15 +391,28 @@ class DocParser(object):
                  result.append(('img' + word_class, int(argres)))
                  word_class = ''
  
-            elif name.endswith('word_semantic.firstWord'):
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('word_semantic.lastWord'):
-                    print 'Error: - incorrect word_semantic ordering inside paragraph'
-                last = int(argres)
-                for wordnum in xrange(first, last):
+            if (sp_first != -1) and (sp_last != -1):
+                for wordnum in xrange(sp_first, sp_last):
+                    result.append(('ocr', wordnum))
+                sp_first = -1
+                sp_last = -1
+
+            if (gl_first != -1) and (gl_last != -1):
+                glyphList = []
+                for glyphnum in xrange(gl_first, gl_last):
+                    glyphList.append(glyphnum)
+                num = self.svgcount
+                self.glyphs_to_image(glyphList)
+                self.svgcount += 1
+                result.append(('svg', num))
+                gl_first = -1
+                gl_last = -1
+
+            if (ws_first != -1) and (ws_last != -1):
+                for wordnum in xrange(ws_first, ws_last):
                      result.append(('ocr', wordnum))
-                line += 1
+                ws_first = -1
+                ws_last = -1
                                
              line += 1
author	Apprentice Alf <apprenticealf@gmail.com>
	Sun, 14 Feb 2010 15:47:48 +0000 (15:47 +0000)
committer	Apprentice Alf <apprenticealf@gmail.com>
	Mon, 2 Mar 2015 07:32:21 +0000 (07:32 +0000)
Adobe_EPUB_Tools/ineptepub.pyw		patch \| blob \| blame \| history
Topaz_Tools/lib/flatxml2html.py		patch \| blob \| blame \| history