def __init__(self, bytes):
self.bytes = bytes
self.index = 0
-
+
def get(self, length):
if self.index + length > len(self.bytes):
raise ASN1Error("Error decoding ASN.1")
x |= self.bytes[self.index]
self.index += 1
return x
-
+
def getFixBytes(self, lengthBytes):
bytes = self.bytes[self.index : self.index+lengthBytes]
self.index += lengthBytes
return bytes
-
+
def getVarBytes(self, lengthLength):
lengthBytes = self.get(lengthLength)
return self.getFixBytes(lengthBytes)
-
+
def getFixList(self, length, lengthList):
l = [0] * lengthList
for x in range(lengthList):
l[x] = self.get(length)
return l
-
+
def getVarList(self, length, lengthLength):
lengthList = self.get(lengthLength)
if lengthList % length != 0:
for x in range(lengthList):
l[x] = self.get(length)
return l
-
+
def startLengthCheck(self, lengthLength):
self.lengthCheck = self.get(lengthLength)
self.indexCheck = self.index
-
+
def setLengthCheck(self, length):
self.lengthCheck = length
self.indexCheck = self.index
-
+
def stopLengthCheck(self):
if (self.index - self.indexCheck) != self.lengthCheck:
raise ASN1Error("Error decoding ASN.1")
-
+
def atLengthCheck(self):
if (self.index - self.indexCheck) < self.lengthCheck:
return False
path = elem.get('URI', None)
if path is not None:
encrypted.add(path)
-
+
def decompress(self, bytes):
dc = zlib.decompressobj(-15)
bytes = dc.decompress(bytes)
if ex:
bytes = bytes + ex
return bytes
-
+
def decrypt(self, path, data):
if path in self._encrypted:
data = self._aes.decrypt(data)[16:]
return 0
if __name__ == '__main__':
- # sys.exit(cli_main())
+ if len(sys.argv) > 1:
+ sys.exit(cli_main())
sys.exit(gui_main())
if end == -1 :
end = self.docSize
+ # seems some xml has last* coming before first* so we have to
+ # handle any order
+ sp_first = -1
+ sp_last = -1
+
+ gl_first = -1
+ gl_last = -1
+
+ ws_first = -1
+ ws_last = -1
+
+ word_class = ''
+
while (line < end) :
(name, argres) = self.lineinDoc(line)
- # handle both span and _span
if name.endswith('span.firstWord') :
- first = int(argres)
- (name, argres) = self.lineinDoc(line+1)
- if not name.endswith('span.lastWord'):
- print 'Error: - incorrect _span ordering inside paragraph'
- last = int(argres)
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- line += 1
+ sp_first = int(argres)
+
+ elif name.endswith('span.lastWord') :
+ sp_last = int(argres)
elif name.endswith('word.firstGlyph') :
- first = int(argres)
- (name, argres) = self.lineinDoc(line+1)
- if not name.endswith('word.lastGlyph'):
- print 'Error: - incorrect glyph ordering inside word in paragraph'
- last = int(argres)
- glyphList = []
- for glyphnum in xrange(first, last):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- line += 1
+ gl_first = int(argres)
+
+ elif name.endswith('word.lastGlyph') :
+ gl_last = int(argres)
+
+ elif name.endswith('word_semantic.firstWord'):
+ ws_first = int(argres)
+
+ elif name.endswith('word_semantic.lastWord'):
+ ws_last = int(argres)
elif name.endswith('word.class'):
(cname, space) = argres.split('-',1)
result.append(('img' + word_class, int(argres)))
word_class = ''
- elif name.endswith('word_semantic.firstWord'):
- first = int(argres)
- (name, argres) = self.lineinDoc(line+1)
- if not name.endswith('word_semantic.lastWord'):
- print 'Error: - incorrect word_semantic ordering inside paragraph'
- last = int(argres)
- for wordnum in xrange(first, last):
+ if (sp_first != -1) and (sp_last != -1):
+ for wordnum in xrange(sp_first, sp_last):
+ result.append(('ocr', wordnum))
+ sp_first = -1
+ sp_last = -1
+
+ if (gl_first != -1) and (gl_last != -1):
+ glyphList = []
+ for glyphnum in xrange(gl_first, gl_last):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ gl_first = -1
+ gl_last = -1
+
+ if (ws_first != -1) and (ws_last != -1):
+ for wordnum in xrange(ws_first, ws_last):
result.append(('ocr', wordnum))
- line += 1
+ ws_first = -1
+ ws_last = -1
line += 1