From: Apprentice Alf Date: Tue, 6 Mar 2012 18:24:28 +0000 (+0000) Subject: tools v5.0 X-Git-Url: http://git.xmof.duckdns.org/?a=commitdiff_plain;h=07e532f59c584b62a1fe34020a2ccbcd7ad12223;p=DeDRM.git tools v5.0 Introduction of alfcrypto library for speed Reorganisation of archive plugins,apps,other --- diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py index 30c1e13..233b462 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py @@ -4,28 +4,62 @@ from __future__ import with_statement from calibre.customize import FileTypePlugin from calibre.gui2 import is_ok_to_use_qt +from calibre.utils.config import config_dir +from calibre.constants import iswindows, isosx # from calibre.ptempfile import PersistentTemporaryDirectory -from calibre_plugins.k4mobidedrm import kgenpids -from calibre_plugins.k4mobidedrm import topazextract -from calibre_plugins.k4mobidedrm import mobidedrm import sys import os import re +from zipfile import ZipFile class K4DeDRM(FileTypePlugin): name = 'K4PC, K4Mac, Kindle Mobi and Topaz DeDRM' # Name of the plugin description = 'Removes DRM from Mobipocket, Kindle/Mobi, Kindle/Topaz and Kindle/Print Replica files. Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.' supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on author = 'DiapDealer, SomeUpdates' # The author of this plugin - version = (0, 3, 8) # The version number of this plugin + version = (0, 4, 1) # The version number of this plugin file_types = set(['prc','mobi','azw','azw1','azw4','tpz']) # The file types that this plugin will be applied to on_import = True # Run this plugin during the import priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm minimum_calibre_version = (0, 7, 55) + + def initialize(self): + """ + Dynamic modules can't be imported/loaded from a zipfile... so this routine + runs whenever the plugin gets initialized. This will extract the appropriate + library for the target OS and copy it to the 'alfcrypto' subdirectory of + calibre's configuration directory. That 'alfcrypto' directory is then + inserted into the syspath (as the very first entry) in the run function + so the CDLL stuff will work in the alfcrypto.py script. + """ + if iswindows: + names = ['alfcrypto.dll','alfcrypto64.dll'] + elif isosx: + names = ['libalfcrypto.dylib'] + else: + names = ['libalfcrypto32.so','libalfcrypto64.so'] + lib_dict = self.load_resources(names) + self.alfdir = os.path.join(config_dir, 'alfcrypto') + if not os.path.exists(self.alfdir): + os.mkdir(self.alfdir) + for entry, data in lib_dict.items(): + file_path = os.path.join(self.alfdir, entry) + with open(file_path,'wb') as f: + f.write(data) def run(self, path_to_ebook): + # add the alfcrypto directory to sys.path so alfcrypto.py + # will be able to locate the custom lib(s) for CDLL import. + sys.path.insert(0, self.alfdir) + # Had to move these imports here so the custom libs can be + # extracted to the appropriate places beforehand these routines + # look for them. + from calibre_plugins.k4mobidedrm import kgenpids + from calibre_plugins.k4mobidedrm import topazextract + from calibre_plugins.k4mobidedrm import mobidedrm + plug_ver = '.'.join(str(self.version).strip('()').replace(' ', '').split(',')) k4 = True if sys.platform.startswith('linux'): @@ -45,7 +79,7 @@ class K4DeDRM(FileTypePlugin): serials.append(customvalue) else: print "%s is not a valid Kindle serial number or PID." % str(customvalue) - + # Load any kindle info files (*.info) included Calibre's config directory. try: # Find Calibre's configuration directory. @@ -77,7 +111,7 @@ class K4DeDRM(FileTypePlugin): title = mb.getBookTitle() md1, md2 = mb.getPIDMetaInfo() - pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles) + pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles) try: mb.processBook(pidlst) @@ -94,11 +128,11 @@ class K4DeDRM(FileTypePlugin): except topazextract.TpzDRMError, e: #if you reached here then no luck raise and exception if is_ok_to_use_qt(): - from PyQt4.Qt import QMessageBox - d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM v%s Plugin" % plug_ver, "Error: " + str(e) + "... %s\n" % path_to_ebook) - d.show() - d.raise_() - d.exec_() + from PyQt4.Qt import QMessageBox + d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM v%s Plugin" % plug_ver, "Error: " + str(e) + "... %s\n" % path_to_ebook) + d.show() + d.raise_() + d.exec_() raise Exception("K4MobiDeDRM plugin v%s Error: %s" % (plug_ver, str(e))) print "Success!" @@ -117,3 +151,11 @@ class K4DeDRM(FileTypePlugin): def customization_help(self, gui=False): return 'Enter 10 character PIDs and/or Kindle serial numbers, use a comma (no spaces) to separate each PID or SerialNumber from the next.' + + def load_resources(self, names): + ans = {} + with ZipFile(self.plugin_path, 'r') as zf: + for candidate in zf.namelist(): + if candidate in names: + ans[candidate] = zf.read(candidate) + return ans \ No newline at end of file diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py index 0328206..5667511 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py @@ -1,832 +1,568 @@ -#! /usr/bin/python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# For use with Topaz Scripts Version 2.6 - -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - -import sys -sys.stdout=Unbuffered(sys.stdout) - -import csv -import os -import getopt -from struct import pack -from struct import unpack - -class TpzDRMError(Exception): - pass - -# Get a 7 bit encoded number from string. The most -# significant byte comes first and has the high bit (8th) set - -def readEncodedNumber(file): - flag = False - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - - if data == 0xFF: - flag = True - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - - if data >= 0x80: - datax = (data & 0x7F) - while data >= 0x80 : - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - datax = (datax <<7) + (data & 0x7F) - data = datax - - if flag: - data = -data - return data - - -# returns a binary string that encodes a number into 7 bits -# most significant byte first which has the high bit set - -def encodeNumber(number): - result = "" - negative = False - flag = 0 - - if number < 0 : - number = -number + 1 - negative = True - - while True: - byte = number & 0x7F - number = number >> 7 - byte += flag - result += chr(byte) - flag = 0x80 - if number == 0 : - if (byte == 0xFF and negative == False) : - result += chr(0x80) - break - - if negative: - result += chr(0xFF) - - return result[::-1] - - - -# create / read a length prefixed string from the file - -def lengthPrefixString(data): - return encodeNumber(len(data))+data - -def readString(file): - stringLength = readEncodedNumber(file) - if (stringLength == None): - return "" - sv = file.read(stringLength) - if (len(sv) != stringLength): - return "" - return unpack(str(stringLength)+"s",sv)[0] - - -# convert a binary string generated by encodeNumber (7 bit encoded number) -# to the value you would find inside the page*.dat files to be processed - -def convert(i): - result = '' - val = encodeNumber(i) - for j in xrange(len(val)): - c = ord(val[j:j+1]) - result += '%02x' % c - return result - - - -# the complete string table used to store all book text content -# as well as the xml tokens and values that make sense out of it - -class Dictionary(object): - def __init__(self, dictFile): - self.filename = dictFile - self.size = 0 - self.fo = file(dictFile,'rb') - self.stable = [] - self.size = readEncodedNumber(self.fo) - for i in xrange(self.size): - self.stable.append(self.escapestr(readString(self.fo))) - self.pos = 0 - - def escapestr(self, str): - str = str.replace('&','&') - str = str.replace('<','<') - str = str.replace('>','>') - str = str.replace('=','=') - return str - - def lookup(self,val): - if ((val >= 0) and (val < self.size)) : - self.pos = val - return self.stable[self.pos] +#! /usr/bin/env python + +""" + Routines for doing AES CBC in one file + + Modified by some_updates to extract + and combine only those parts needed for AES CBC + into one simple to add python file + + Original Version + Copyright (c) 2002 by Paul A. Lambert + Under: + CryptoPy Artisitic License Version 1.0 + See the wonderful pure python package cryptopy-1.2.5 + and read its LICENSE.txt for complete license details. +""" + +class CryptoError(Exception): + """ Base class for crypto exceptions """ + def __init__(self,errorMessage='Error!'): + self.message = errorMessage + def __str__(self): + return self.message + +class InitCryptoError(CryptoError): + """ Crypto errors during algorithm initialization """ +class BadKeySizeError(InitCryptoError): + """ Bad key size error """ +class EncryptError(CryptoError): + """ Error in encryption processing """ +class DecryptError(CryptoError): + """ Error in decryption processing """ +class DecryptNotBlockAlignedError(DecryptError): + """ Error in decryption processing """ + +def xorS(a,b): + """ XOR two strings """ + assert len(a)==len(b) + x = [] + for i in range(len(a)): + x.append( chr(ord(a[i])^ord(b[i]))) + return ''.join(x) + +def xor(a,b): + """ XOR two strings """ + x = [] + for i in range(min(len(a),len(b))): + x.append( chr(ord(a[i])^ord(b[i]))) + return ''.join(x) + +""" + Base 'BlockCipher' and Pad classes for cipher instances. + BlockCipher supports automatic padding and type conversion. The BlockCipher + class was written to make the actual algorithm code more readable and + not for performance. +""" + +class BlockCipher: + """ Block ciphers """ + def __init__(self): + self.reset() + + def reset(self): + self.resetEncrypt() + self.resetDecrypt() + def resetEncrypt(self): + self.encryptBlockCount = 0 + self.bytesToEncrypt = '' + def resetDecrypt(self): + self.decryptBlockCount = 0 + self.bytesToDecrypt = '' + + def encrypt(self, plainText, more = None): + """ Encrypt a string and return a binary string """ + self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt + numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize) + cipherText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize]) + self.encryptBlockCount += 1 + cipherText += ctBlock + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] else: - print "Error - %d outside of string table limits" % val - raise TpzDRMError('outside of string table limits') - # sys.exit(-1) - - def getSize(self): - return self.size - - def getPos(self): - return self.pos - - def dumpDict(self): - for i in xrange(self.size): - print "%d %s %s" % (i, convert(i), self.stable[i]) - return - -# parses the xml snippets that are represented by each page*.dat file. -# also parses the other0.dat file - the main stylesheet -# and information used to inject the xml snippets into page*.dat files - -class PageParser(object): - def __init__(self, filename, dict, debug, flat_xml): - self.fo = file(filename,'rb') - self.id = os.path.basename(filename).replace('.dat','') - self.dict = dict - self.debug = debug - self.flat_xml = flat_xml - self.tagpath = [] - self.doc = [] - self.snippetList = [] - - - # hash table used to enable the decoding process - # This has all been developed by trial and error so it may still have omissions or - # contain errors - # Format: - # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped) - - token_tags = { - 'x' : (1, 'scalar_number', 0, 0), - 'y' : (1, 'scalar_number', 0, 0), - 'h' : (1, 'scalar_number', 0, 0), - 'w' : (1, 'scalar_number', 0, 0), - 'firstWord' : (1, 'scalar_number', 0, 0), - 'lastWord' : (1, 'scalar_number', 0, 0), - 'rootID' : (1, 'scalar_number', 0, 0), - 'stemID' : (1, 'scalar_number', 0, 0), - 'type' : (1, 'scalar_text', 0, 0), - - 'info' : (0, 'number', 1, 0), - - 'info.word' : (0, 'number', 1, 1), - 'info.word.ocrText' : (1, 'text', 0, 0), - 'info.word.firstGlyph' : (1, 'raw', 0, 0), - 'info.word.lastGlyph' : (1, 'raw', 0, 0), - 'info.word.bl' : (1, 'raw', 0, 0), - 'info.word.link_id' : (1, 'number', 0, 0), - - 'glyph' : (0, 'number', 1, 1), - 'glyph.x' : (1, 'number', 0, 0), - 'glyph.y' : (1, 'number', 0, 0), - 'glyph.glyphID' : (1, 'number', 0, 0), - - 'dehyphen' : (0, 'number', 1, 1), - 'dehyphen.rootID' : (1, 'number', 0, 0), - 'dehyphen.stemID' : (1, 'number', 0, 0), - 'dehyphen.stemPage' : (1, 'number', 0, 0), - 'dehyphen.sh' : (1, 'number', 0, 0), - - 'links' : (0, 'number', 1, 1), - 'links.page' : (1, 'number', 0, 0), - 'links.rel' : (1, 'number', 0, 0), - 'links.row' : (1, 'number', 0, 0), - 'links.title' : (1, 'text', 0, 0), - 'links.href' : (1, 'text', 0, 0), - 'links.type' : (1, 'text', 0, 0), - - 'paraCont' : (0, 'number', 1, 1), - 'paraCont.rootID' : (1, 'number', 0, 0), - 'paraCont.stemID' : (1, 'number', 0, 0), - 'paraCont.stemPage' : (1, 'number', 0, 0), - - 'paraStems' : (0, 'number', 1, 1), - 'paraStems.stemID' : (1, 'number', 0, 0), - - 'wordStems' : (0, 'number', 1, 1), - 'wordStems.stemID' : (1, 'number', 0, 0), - - 'empty' : (1, 'snippets', 1, 0), - - 'page' : (1, 'snippets', 1, 0), - 'page.pageid' : (1, 'scalar_text', 0, 0), - 'page.pagelabel' : (1, 'scalar_text', 0, 0), - 'page.type' : (1, 'scalar_text', 0, 0), - 'page.h' : (1, 'scalar_number', 0, 0), - 'page.w' : (1, 'scalar_number', 0, 0), - 'page.startID' : (1, 'scalar_number', 0, 0), - - 'group' : (1, 'snippets', 1, 0), - 'group.type' : (1, 'scalar_text', 0, 0), - 'group._tag' : (1, 'scalar_text', 0, 0), - - 'region' : (1, 'snippets', 1, 0), - 'region.type' : (1, 'scalar_text', 0, 0), - 'region.x' : (1, 'scalar_number', 0, 0), - 'region.y' : (1, 'scalar_number', 0, 0), - 'region.h' : (1, 'scalar_number', 0, 0), - 'region.w' : (1, 'scalar_number', 0, 0), - - 'empty_text_region' : (1, 'snippets', 1, 0), - - 'img' : (1, 'snippets', 1, 0), - 'img.x' : (1, 'scalar_number', 0, 0), - 'img.y' : (1, 'scalar_number', 0, 0), - 'img.h' : (1, 'scalar_number', 0, 0), - 'img.w' : (1, 'scalar_number', 0, 0), - 'img.src' : (1, 'scalar_number', 0, 0), - 'img.color_src' : (1, 'scalar_number', 0, 0), - - 'paragraph' : (1, 'snippets', 1, 0), - 'paragraph.class' : (1, 'scalar_text', 0, 0), - 'paragraph.firstWord' : (1, 'scalar_number', 0, 0), - 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), - 'paragraph.lastWord' : (1, 'scalar_number', 0, 0), - 'paragraph.gridSize' : (1, 'scalar_number', 0, 0), - 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0), - 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0), - - - 'word_semantic' : (1, 'snippets', 1, 1), - 'word_semantic.type' : (1, 'scalar_text', 0, 0), - 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0), - 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0), - - 'word' : (1, 'snippets', 1, 0), - 'word.type' : (1, 'scalar_text', 0, 0), - 'word.class' : (1, 'scalar_text', 0, 0), - 'word.firstGlyph' : (1, 'scalar_number', 0, 0), - 'word.lastGlyph' : (1, 'scalar_number', 0, 0), - - '_span' : (1, 'snippets', 1, 0), - '_span.firstWord' : (1, 'scalar_number', 0, 0), - '_span.lastWord' : (1, 'scalar_number', 0, 0), - '_span.gridSize' : (1, 'scalar_number', 0, 0), - '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0), - '_span.gridTopCenter' : (1, 'scalar_number', 0, 0), - - 'span' : (1, 'snippets', 1, 0), - 'span.firstWord' : (1, 'scalar_number', 0, 0), - 'span.lastWord' : (1, 'scalar_number', 0, 0), - 'span.gridSize' : (1, 'scalar_number', 0, 0), - 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0), - 'span.gridTopCenter' : (1, 'scalar_number', 0, 0), - - 'extratokens' : (1, 'snippets', 1, 0), - 'extratokens.type' : (1, 'scalar_text', 0, 0), - 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0), - 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0), - - 'glyph.h' : (1, 'number', 0, 0), - 'glyph.w' : (1, 'number', 0, 0), - 'glyph.use' : (1, 'number', 0, 0), - 'glyph.vtx' : (1, 'number', 0, 1), - 'glyph.len' : (1, 'number', 0, 1), - 'glyph.dpi' : (1, 'number', 0, 0), - 'vtx' : (0, 'number', 1, 1), - 'vtx.x' : (1, 'number', 0, 0), - 'vtx.y' : (1, 'number', 0, 0), - 'len' : (0, 'number', 1, 1), - 'len.n' : (1, 'number', 0, 0), - - 'book' : (1, 'snippets', 1, 0), - 'version' : (1, 'snippets', 1, 0), - 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0), - 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0), - 'version.Schema_id' : (1, 'scalar_text', 0, 0), - 'version.Schema_version' : (1, 'scalar_text', 0, 0), - 'version.Topaz_version' : (1, 'scalar_text', 0, 0), - 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0), - 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0), - 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0), - 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0), - 'version.chapterheaders' : (1, 'scalar_text', 0, 0), - 'version.creation_date' : (1, 'scalar_text', 0, 0), - 'version.header_footer' : (1, 'scalar_text', 0, 0), - 'version.init_from_ocr' : (1, 'scalar_text', 0, 0), - 'version.letter_insertion' : (1, 'scalar_text', 0, 0), - 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0), - 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0), - 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0), - 'version.findlists' : (1, 'scalar_text', 0, 0), - 'version.page_num' : (1, 'scalar_text', 0, 0), - 'version.page_type' : (1, 'scalar_text', 0, 0), - 'version.bad_text' : (1, 'scalar_text', 0, 0), - 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0), - 'version.margins' : (1, 'scalar_text', 0, 0), - 'version.staggered_lines' : (1, 'scalar_text', 0, 0), - 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0), - 'version.toc' : (1, 'scalar_text', 0, 0), - - 'stylesheet' : (1, 'snippets', 1, 0), - 'style' : (1, 'snippets', 1, 0), - 'style._tag' : (1, 'scalar_text', 0, 0), - 'style.type' : (1, 'scalar_text', 0, 0), - 'style._parent_type' : (1, 'scalar_text', 0, 0), - 'style.class' : (1, 'scalar_text', 0, 0), - 'style._after_class' : (1, 'scalar_text', 0, 0), - 'rule' : (1, 'snippets', 1, 0), - 'rule.attr' : (1, 'scalar_text', 0, 0), - 'rule.value' : (1, 'scalar_text', 0, 0), - - 'original' : (0, 'number', 1, 1), - 'original.pnum' : (1, 'number', 0, 0), - 'original.pid' : (1, 'text', 0, 0), - 'pages' : (0, 'number', 1, 1), - 'pages.ref' : (1, 'number', 0, 0), - 'pages.id' : (1, 'number', 0, 0), - 'startID' : (0, 'number', 1, 1), - 'startID.page' : (1, 'number', 0, 0), - 'startID.id' : (1, 'number', 0, 0), - - } - - - # full tag path record keeping routines - def tag_push(self, token): - self.tagpath.append(token) - def tag_pop(self): - if len(self.tagpath) > 0 : - self.tagpath.pop() - def tagpath_len(self): - return len(self.tagpath) - def get_tagpath(self, i): - cnt = len(self.tagpath) - if i < cnt : result = self.tagpath[i] - for j in xrange(i+1, cnt) : - result += '.' + self.tagpath[j] - return result - - - # list of absolute command byte values values that indicate - # various types of loop meachanisms typically used to generate vectors - - cmd_list = (0x76, 0x76) - - # peek at and return 1 byte that is ahead by i bytes - def peek(self, aheadi): - c = self.fo.read(aheadi) - if (len(c) == 0): - return None - self.fo.seek(-aheadi,1) - c = c[-1:] - return ord(c) - - - # get the next value from the file being processed - def getNext(self): - nbyte = self.peek(1); - if (nbyte == None): - return None - val = readEncodedNumber(self.fo) - return val - - - # format an arg by argtype - def formatArg(self, arg, argtype): - if (argtype == 'text') or (argtype == 'scalar_text') : - result = self.dict.lookup(arg) - elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') : - result = arg - elif (argtype == 'snippets') : - result = arg - else : - print "Error Unknown argtype %s" % argtype - sys.exit(-2) - return result - - - # process the next tag token, recursively handling subtags, - # arguments, and commands - def procToken(self, token): - - known_token = False - self.tag_push(token) - - if self.debug : print 'Processing: ', self.get_tagpath(0) - cnt = self.tagpath_len() - for j in xrange(cnt): - tkn = self.get_tagpath(j) - if tkn in self.token_tags : - num_args = self.token_tags[tkn][0] - argtype = self.token_tags[tkn][1] - subtags = self.token_tags[tkn][2] - splcase = self.token_tags[tkn][3] - ntags = -1 - known_token = True - break - - if known_token : - - # handle subtags if present - subtagres = [] - if (splcase == 1): - # this type of tag uses of escape marker 0x74 indicate subtag count - if self.peek(1) == 0x74: - skip = readEncodedNumber(self.fo) - subtags = 1 - num_args = 0 - - if (subtags == 1): - ntags = readEncodedNumber(self.fo) - if self.debug : print 'subtags: ' + token + ' has ' + str(ntags) - for j in xrange(ntags): - val = readEncodedNumber(self.fo) - subtagres.append(self.procToken(self.dict.lookup(val))) - - # arguments can be scalars or vectors of text or numbers - argres = [] - if num_args > 0 : - firstarg = self.peek(1) - if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'): - # single argument is a variable length vector of data - arg = readEncodedNumber(self.fo) - argres = self.decodeCMD(arg,argtype) - else : - # num_arg scalar arguments - for i in xrange(num_args): - argres.append(self.formatArg(readEncodedNumber(self.fo), argtype)) - - # build the return tag - result = [] - tkn = self.get_tagpath(0) - result.append(tkn) - result.append(subtagres) - result.append(argtype) - result.append(argres) - self.tag_pop() - return result - - # all tokens that need to be processed should be in the hash - # table if it may indicate a problem, either new token - # or an out of sync condition + self.bytesToEncrypt = '' + + if more == None: # no more data expected from caller + finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) + if len(finalBytes) > 0: + ctBlock = self.encryptBlock(finalBytes) + self.encryptBlockCount += 1 + cipherText += ctBlock + self.resetEncrypt() + return cipherText + + def decrypt(self, cipherText, more = None): + """ Decrypt a string and return a string """ + self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt + + numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) + if more == None: # no more calls to decrypt, should have all the data + if numExtraBytes != 0: + raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' + + # hold back some bytes in case last decrypt has zero len + if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : + numBlocks -= 1 + numExtraBytes = self.blockSize + + plainText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) + self.decryptBlockCount += 1 + plainText += ptBlock + + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] else: - result = [] - if (self.debug): - print 'Unknown Token:', token - self.tag_pop() - return result - - - # special loop used to process code snippets - # it is NEVER used to format arguments. - # builds the snippetList - def doLoop72(self, argtype): - cnt = readEncodedNumber(self.fo) - if self.debug : - result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n' - result += 'of the document is indicated by snippet number sets at the\n' - result += 'end of each snippet. \n' - print result - for i in xrange(cnt): - if self.debug: print 'Snippet:',str(i) - snippet = [] - snippet.append(i) - val = readEncodedNumber(self.fo) - snippet.append(self.procToken(self.dict.lookup(val))) - self.snippetList.append(snippet) - return - - - - # general loop code gracisouly submitted by "skindle" - thank you! - def doLoop76Mode(self, argtype, cnt, mode): - result = [] - adj = 0 - if mode & 1: - adj = readEncodedNumber(self.fo) - mode = mode >> 1 - x = [] - for i in xrange(cnt): - x.append(readEncodedNumber(self.fo) - adj) - for i in xrange(mode): - for j in xrange(1, cnt): - x[j] = x[j] + x[j - 1] - for i in xrange(cnt): - result.append(self.formatArg(x[i],argtype)) - return result - - - # dispatches loop commands bytes with various modes - # The 0x76 style loops are used to build vectors - - # This was all derived by trial and error and - # new loop types may exist that are not handled here - # since they did not appear in the test cases - - def decodeCMD(self, cmd, argtype): - if (cmd == 0x76): - - # loop with cnt, and mode to control loop styles - cnt = readEncodedNumber(self.fo) - mode = readEncodedNumber(self.fo) - - if self.debug : print 'Loop for', cnt, 'with mode', mode, ': ' - return self.doLoop76Mode(argtype, cnt, mode) - - if self.dbug: print "Unknown command", cmd - result = [] - return result - - - - # add full tag path to injected snippets - def updateName(self, tag, prefix): - name = tag[0] - subtagList = tag[1] - argtype = tag[2] - argList = tag[3] - nname = prefix + '.' + name - nsubtaglist = [] - for j in subtagList: - nsubtaglist.append(self.updateName(j,prefix)) - ntag = [] - ntag.append(nname) - ntag.append(nsubtaglist) - ntag.append(argtype) - ntag.append(argList) - return ntag - - - - # perform depth first injection of specified snippets into this one - def injectSnippets(self, snippet): - snipno, tag = snippet - name = tag[0] - subtagList = tag[1] - argtype = tag[2] - argList = tag[3] - nsubtagList = [] - if len(argList) > 0 : - for j in argList: - asnip = self.snippetList[j] - aso, atag = self.injectSnippets(asnip) - atag = self.updateName(atag, name) - nsubtagList.append(atag) - argtype='number' - argList=[] - if len(nsubtagList) > 0 : - subtagList.extend(nsubtagList) - tag = [] - tag.append(name) - tag.append(subtagList) - tag.append(argtype) - tag.append(argList) - snippet = [] - snippet.append(snipno) - snippet.append(tag) - return snippet - - - - # format the tag for output - def formatTag(self, node): - name = node[0] - subtagList = node[1] - argtype = node[2] - argList = node[3] - fullpathname = name.split('.') - nodename = fullpathname.pop() - ilvl = len(fullpathname) - indent = ' ' * (3 * ilvl) - result = indent + '<' + nodename + '>' - if len(argList) > 0: - argres = '' - for j in argList: - if (argtype == 'text') or (argtype == 'scalar_text') : - argres += j + '|' - else : - argres += str(j) + ',' - argres = argres[0:-1] - if argtype == 'snippets' : - result += 'snippets:' + argres - else : - result += argres - if len(subtagList) > 0 : - result += '\n' - for j in subtagList: - if len(j) > 0 : - result += self.formatTag(j) - result += indent + '\n' + self.bytesToEncrypt = '' + + if more == None: # last decrypt remove padding + plainText = self.padding.removePad(plainText, self.blockSize) + self.resetDecrypt() + return plainText + + +class Pad: + def __init__(self): + pass # eventually could put in calculation of min and max size extension + +class padWithPadLen(Pad): + """ Pad a binary string with the length of the padding """ + + def addPad(self, extraBytes, blockSize): + """ Add padding to a binary string to make it an even multiple + of the block size """ + blocks, numExtraBytes = divmod(len(extraBytes), blockSize) + padLength = blockSize - numExtraBytes + return extraBytes + padLength*chr(padLength) + + def removePad(self, paddedBinaryString, blockSize): + """ Remove padding from a binary string """ + if not(0 6 and i%Nk == 4 : + temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) + w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) + return w + +Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! + 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, + 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) + +#------------------------------------- +def AddRoundKey(algInstance, keyBlock): + """ XOR the algorithm state with a block of key material """ + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] ^= keyBlock[column][row] +#------------------------------------- + +def SubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = Sbox[algInstance.state[column][row]] + +def InvSubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] + +Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, + 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, + 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, + 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, + 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, + 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, + 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, + 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, + 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, + 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, + 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, + 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, + 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, + 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, + 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, + 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, + 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, + 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, + 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, + 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, + 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, + 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, + 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, + 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, + 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, + 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, + 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, + 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, + 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, + 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, + 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, + 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) + +InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) + +#------------------------------------- +""" For each block size (Nb), the ShiftRow operation shifts row i + by the amount Ci. Note that row 0 is not shifted. + Nb C1 C2 C3 + ------------------- """ +shiftOffset = { 4 : ( 0, 1, 2, 3), + 5 : ( 0, 1, 2, 3), + 6 : ( 0, 1, 2, 3), + 7 : ( 0, 1, 2, 4), + 8 : ( 0, 1, 3, 4) } +def ShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] +def InvShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] +#------------------------------------- +def MixColumns(a): + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) + Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + +def InvMixColumns(a): + """ Mix the four bytes of every column in a linear way + This is the opposite operation of Mixcolumn """ + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) + Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) + Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) + Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + +#------------------------------------- +def mul(a, b): + """ Multiply two elements of GF(2^m) + needed for MixColumn and InvMixColumn """ + if (a !=0 and b!=0): + return Alogtable[(Logtable[a] + Logtable[b])%255] + else: + return 0 + +Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, + 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, + 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, + 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, + 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, + 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, + 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, + 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, + 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, + 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, + 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, + 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, + 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, + 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, + 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, + 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) + +Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, + 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, + 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, + 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, + 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, + 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, + 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, + 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, + 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, + 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, + 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, + 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, + 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, + 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, + 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, + 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) + + + + +""" + AES Encryption Algorithm + The AES algorithm is just Rijndael algorithm restricted to the default + blockSize of 128 bits. +""" + +class AES(Rijndael): + """ The AES algorithm is the Rijndael block cipher restricted to block + sizes of 128 bits and key sizes of 128, 192 or 256 bits + """ + def __init__(self, key = None, padding = padWithPadLen(), keySize=16): + """ Initialize AES, keySize is in bytes """ + if not (keySize == 16 or keySize == 24 or keySize == 32) : + raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' + + Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) + + self.name = 'AES' + + +""" + CBC mode of encryption for block ciphers. + This algorithm mode wraps any BlockCipher to make a + Cipher Block Chaining mode. +""" +from random import Random # should change to crypto.random!!! + + +class CBC(BlockCipher): + """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode + algorithms. The initialization (IV) is automatic if set to None. Padding + is also automatic based on the Pad class used to initialize the algorithm + """ + def __init__(self, blockCipherInstance, padding = padWithPadLen()): + """ CBC algorithms are created by initializing with a BlockCipher instance """ + self.baseCipher = blockCipherInstance + self.name = self.baseCipher.name + '_CBC' + self.blockSize = self.baseCipher.blockSize + self.keySize = self.baseCipher.keySize + self.padding = padding + self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! + self.r = Random() # for IV generation, currently uses + # mediocre standard distro version <---------------- + import time + newSeed = time.ctime()+str(self.r) # seed with instance location + self.r.seed(newSeed) # to make unique + self.reset() + + def setKey(self, key): + self.baseCipher.setKey(key) + + # Overload to reset both CBC state and the wrapped baseCipher + def resetEncrypt(self): + BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) + self.baseCipher.resetEncrypt() # reset base cipher encrypt state + + def resetDecrypt(self): + BlockCipher.resetDecrypt(self) # reset CBC state (super class) + self.baseCipher.resetDecrypt() # reset base cipher decrypt state + + def encrypt(self, plainText, iv=None, more=None): + """ CBC encryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.encryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to encrypt' + + return BlockCipher.encrypt(self,plainText, more=more) + + def decrypt(self, cipherText, iv=None, more=None): + """ CBC decryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.decryptBlockCount == 0: + self.iv = iv else: - result += '\n' - return result - - - # flatten tag - def flattenTag(self, node): - name = node[0] - subtagList = node[1] - argtype = node[2] - argList = node[3] - result = name - if (len(argList) > 0): - argres = '' - for j in argList: - if (argtype == 'text') or (argtype == 'scalar_text') : - argres += j + '|' - else : - argres += str(j) + '|' - argres = argres[0:-1] - if argtype == 'snippets' : - result += '.snippets=' + argres - else : - result += '=' + argres - result += '\n' - for j in subtagList: - if len(j) > 0 : - result += self.flattenTag(j) - return result - - - # reduce create xml output - def formatDoc(self, flat_xml): - result = '' - for j in self.doc : - if len(j) > 0: - if flat_xml: - result += self.flattenTag(j) - else: - result += self.formatTag(j) - if self.debug : print result - return result - - - - # main loop - parse the page.dat files - # to create structured document and snippets - - # FIXME: value at end of magic appears to be a subtags count - # but for what? For now, inject an 'info" tag as it is in - # every dictionary and seems close to what is meant - # The alternative is to special case the last _ "0x5f" to mean something - - def process(self): - - # peek at the first bytes to see what type of file it is - magic = self.fo.read(9) - if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'): - first_token = 'info' - elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'): - skip = self.fo.read(2) - first_token = 'info' - elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'): - first_token = 'info' - elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'): - skip = self.fo.read(3) - first_token = 'info' - else : - # other0.dat file - first_token = None - self.fo.seek(-9,1) - - - # main loop to read and build the document tree - while True: - - if first_token != None : - # use "inserted" first token 'info' for page and glyph files - tag = self.procToken(first_token) - if len(tag) > 0 : - self.doc.append(tag) - first_token = None - - v = self.getNext() - if (v == None): - break - - if (v == 0x72): - self.doLoop72('number') - elif (v > 0) and (v < self.dict.getSize()) : - tag = self.procToken(self.dict.lookup(v)) - if len(tag) > 0 : - self.doc.append(tag) + assert(iv==None), 'IV used only on first call to decrypt' + + return BlockCipher.decrypt(self, cipherText, more=more) + + def encryptBlock(self, plainTextBlock): + """ CBC block encryption, IV is set with 'encrypt' """ + auto_IV = '' + if self.encryptBlockCount == 0: + if self.iv == None: + # generate IV and use + self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) + self.prior_encr_CT_block = self.iv + auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic + else: # application provided IV + assert(len(self.iv) == self.blockSize ),'IV must be same length as block' + self.prior_encr_CT_block = self.iv + """ encrypt the prior CT XORed with the PT """ + ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) + self.prior_encr_CT_block = ct + return auto_IV+ct + + def decryptBlock(self, encryptedBlock): + """ Decrypt a single block """ + + if self.decryptBlockCount == 0: # first call, process IV + if self.iv == None: # auto decrypt IV? + self.prior_CT_block = encryptedBlock + return '' else: - if self.debug: - print "Main Loop: Unknown value: %x" % v - if (v == 0): - if (self.peek(1) == 0x5f): - skip = self.fo.read(1) - first_token = 'info' - - # now do snippet injection - if len(self.snippetList) > 0 : - if self.debug : print 'Injecting Snippets:' - snippet = self.injectSnippets(self.snippetList[0]) - snipno = snippet[0] - tag_add = snippet[1] - if self.debug : print self.formatTag(tag_add) - if len(tag_add) > 0: - self.doc.append(tag_add) - - # handle generation of xml output - xmlpage = self.formatDoc(self.flat_xml) - - return xmlpage - - -def fromData(dict, fname): - flat_xml = True - debug = False - pp = PageParser(fname, dict, debug, flat_xml) - xmlpage = pp.process() - return xmlpage - -def getXML(dict, fname): - flat_xml = False - debug = False - pp = PageParser(fname, dict, debug, flat_xml) - xmlpage = pp.process() - return xmlpage - -def usage(): - print 'Usage: ' - print ' convert2xml.py dict0000.dat infile.dat ' - print ' ' - print ' Options:' - print ' -h print this usage help message ' - print ' -d turn on debug output to check for potential errors ' - print ' --flat-xml output the flattened xml page description only ' - print ' ' - print ' This program will attempt to convert a page*.dat file or ' - print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. ' - print ' ' - print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump ' - print ' the *.dat files from a Topaz format e-book.' - -# -# Main -# - -def main(argv): - dictFile = "" - pageFile = "" - debug = False - flat_xml = False - printOutput = False - if len(argv) == 0: - printOutput = True - argv = sys.argv - - try: - opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"]) - - except getopt.GetoptError, err: - - # print help information and exit: - print str(err) # will print something like "option -a not recognized" - usage() - sys.exit(2) - - if len(opts) == 0 and len(args) == 0 : - usage() - sys.exit(2) - - for o, a in opts: - if o =="-d": - debug=True - if o =="-h": - usage() - sys.exit(0) - if o =="--flat-xml": - flat_xml = True - - dictFile, pageFile = args[0], args[1] - - # read in the string table dictionary - dict = Dictionary(dictFile) - # dict.dumpDict() - - # create a page parser - pp = PageParser(pageFile, dict, debug, flat_xml) - - xmlpage = pp.process() - - if printOutput: - print xmlpage - return 0 + assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" + self.prior_CT_block = self.iv + + dct = self.baseCipher.decryptBlock(encryptedBlock) + """ XOR the prior decrypted CT with the prior CT """ + dct_XOR_priorCT = xor( self.prior_CT_block, dct ) + + self.prior_CT_block = encryptedBlock + + return dct_XOR_priorCT + - return xmlpage +""" + AES_CBC Encryption Algorithm +""" -if __name__ == '__main__': - sys.exit(main('')) +class AES_CBC(CBC): + """ AES encryption in CBC feedback mode """ + def __init__(self, key=None, padding=padWithPadLen(), keySize=16): + CBC.__init__( self, AES(key, noPadding(), keySize), padding) + self.name = 'AES_CBC' diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py index 3b32fc0..26d740d 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py index 49cf6f5..e25a0c8 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py @@ -1,250 +1,290 @@ -#! /usr/bin/python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +#! /usr/bin/env python -import sys -import csv -import os -import getopt +import sys, os +import hmac from struct import pack -from struct import unpack - - -class PParser(object): - def __init__(self, gd, flatxml, meta_array): - self.gd = gd - self.flatdoc = flatxml.split('\n') - self.docSize = len(self.flatdoc) - self.temp = [] - - self.ph = -1 - self.pw = -1 - startpos = self.posinDoc('page.h') or self.posinDoc('book.h') - for p in startpos: - (name, argres) = self.lineinDoc(p) - self.ph = max(self.ph, int(argres)) - startpos = self.posinDoc('page.w') or self.posinDoc('book.w') - for p in startpos: - (name, argres) = self.lineinDoc(p) - self.pw = max(self.pw, int(argres)) - - if self.ph <= 0: - self.ph = int(meta_array.get('pageHeight', '11000')) - if self.pw <= 0: - self.pw = int(meta_array.get('pageWidth', '8500')) - - res = [] - startpos = self.posinDoc('info.glyph.x') - for p in startpos: - argres = self.getDataatPos('info.glyph.x', p) - res.extend(argres) - self.gx = res - - res = [] - startpos = self.posinDoc('info.glyph.y') - for p in startpos: - argres = self.getDataatPos('info.glyph.y', p) - res.extend(argres) - self.gy = res - - res = [] - startpos = self.posinDoc('info.glyph.glyphID') - for p in startpos: - argres = self.getDataatPos('info.glyph.glyphID', p) - res.extend(argres) - self.gid = res - - - # return tag at line pos in document - def lineinDoc(self, pos) : - if (pos >= 0) and (pos < self.docSize) : - item = self.flatdoc[pos] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - return name, argres - - # find tag in doc if within pos to end inclusive - def findinDoc(self, tagpath, pos, end) : - result = None - if end == -1 : - end = self.docSize - else: - end = min(self.docSize, end) - foundat = -1 - for j in xrange(pos, end): - item = self.flatdoc[j] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - if name.endswith(tagpath) : - result = argres - foundat = j - break - return foundat, result - - # return list of start positions for the tagpath - def posinDoc(self, tagpath): - startpos = [] - pos = 0 - res = "" - while res != None : - (foundpos, res) = self.findinDoc(tagpath, pos, -1) - if res != None : - startpos.append(foundpos) - pos = foundpos + 1 - return startpos - - def getData(self, path): - result = None - cnt = len(self.flatdoc) - for j in xrange(cnt): - item = self.flatdoc[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name.endswith(path)): - result = argres - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result - - def getDataatPos(self, path, pos): - result = None - item = self.flatdoc[pos] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') +import hashlib + + +# interface to needed routines libalfcrypto +def _load_libalfcrypto(): + import ctypes + from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, addressof, string_at, cast, sizeof + + pointer_size = ctypes.sizeof(ctypes.c_voidp) + name_of_lib = None + if sys.platform.startswith('darwin'): + name_of_lib = 'libalfcrypto.dylib' + elif sys.platform.startswith('win'): + if pointer_size == 4: + name_of_lib = 'alfcrypto.dll' else: - name = item - argres = [] - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - if (name.endswith(path)): - result = argres - return result - - def getDataTemp(self, path): - result = None - cnt = len(self.temp) - for j in xrange(cnt): - item = self.temp[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name.endswith(path)): - result = argres - self.temp.pop(j) - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result - - def getImages(self): - result = [] - self.temp = self.flatdoc - while (self.getDataTemp('img') != None): - h = self.getDataTemp('img.h')[0] - w = self.getDataTemp('img.w')[0] - x = self.getDataTemp('img.x')[0] - y = self.getDataTemp('img.y')[0] - src = self.getDataTemp('img.src')[0] - result.append('\n' % (src, x, y, w, h)) - return result - - def getGlyphs(self): - result = [] - if (self.gid != None) and (len(self.gid) > 0): - glyphs = [] - for j in set(self.gid): - glyphs.append(j) - glyphs.sort() - for gid in glyphs: - id='id="gl%d"' % gid - path = self.gd.lookup(id) - if path: - result.append(id + ' ' + path) - return result - - -def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): - ml = '' - pp = PParser(gdict, flat_xml, meta_array) - ml += '\n' - if (raw): - ml += '\n' - ml += '\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1) - ml += 'Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors']) + name_of_lib = 'alfcrypto64.dll' else: - ml += '\n' - ml += '\n' - ml += 'Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors']) - ml += '\n' - ml += '\n' - ml += '\n' - ml += '
\n' - if previd == None: - ml += '\n' + if pointer_size == 4: + name_of_lib = 'libalfcrypto32.so' else: - ml += '\n' - - ml += '' % (pp.pw, pp.ph) - if (pp.gid != None): - ml += '\n' - gdefs = pp.getGlyphs() - for j in xrange(0,len(gdefs)): - ml += gdefs[j] - ml += '\n' - img = pp.getImages() - if (img != None): - for j in xrange(0,len(img)): - ml += img[j] - if (pp.gid != None): - for j in xrange(0,len(pp.gid)): - ml += '\n' % (pp.gid[j], pp.gx[j], pp.gy[j]) - if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): - xpos = "%d" % (pp.pw // 3) - ypos = "%d" % (pp.ph // 3) - ml += 'This page intentionally left blank.\n' - if (raw) : - ml += '' - else : - ml += '\n' - if nextid == None: - ml += '\n' - else : - ml += '\n' - ml += '
\n' - ml += '
zoom in - zoom out
\n' - ml += '\n' - ml += '\n' - return ml + name_of_lib = 'libalfcrypto64.so' + + libalfcrypto = sys.path[0] + os.sep + name_of_lib + + if not os.path.isfile(libalfcrypto): + raise Exception('libalfcrypto not found') + + libalfcrypto = CDLL(libalfcrypto) + + c_char_pp = POINTER(c_char_p) + c_int_p = POINTER(c_int) + + + def F(restype, name, argtypes): + func = getattr(libalfcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + # aes cbc decryption + # + # struct aes_key_st { + # unsigned long rd_key[4 *(AES_MAXNR + 1)]; + # int rounds; + # }; + # + # typedef struct aes_key_st AES_KEY; + # + # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + # + # + # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + # const unsigned long length, const AES_KEY *key, + # unsigned char *ivec, const int enc); + + AES_MAXNR = 14 + + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] + + AES_KEY_p = POINTER(AES_KEY) + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int]) + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) + + + + # Pukall 1 Cipher + # unsigned char *PC1(const unsigned char *key, unsigned int klen, const unsigned char *src, + # unsigned char *dest, unsigned int len, int decryption); + + PC1 = F(c_char_p, 'PC1', [c_char_p, c_ulong, c_char_p, c_char_p, c_ulong, c_ulong]) + + # Topaz Encryption + # typedef struct _TpzCtx { + # unsigned int v[2]; + # } TpzCtx; + # + # void topazCryptoInit(TpzCtx *ctx, const unsigned char *key, int klen); + # void topazCryptoDecrypt(const TpzCtx *ctx, const unsigned char *in, unsigned char *out, int len); + + class TPZ_CTX(Structure): + _fields_ = [('v', c_long * 2)] + + TPZ_CTX_p = POINTER(TPZ_CTX) + topazCryptoInit = F(None, 'topazCryptoInit', [TPZ_CTX_p, c_char_p, c_ulong]) + topazCryptoDecrypt = F(None, 'topazCryptoDecrypt', [TPZ_CTX_p, c_char_p, c_char_p, c_ulong]) + + + class AES_CBC(object): + def __init__(self): + self._blocksize = 0 + self._keyctx = None + self._iv = 0 + + def set_decrypt_key(self, userkey, iv): + self._blocksize = len(userkey) + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise Exception('AES CBC improper key used') + return + keyctx = self._keyctx = AES_KEY() + self._iv = iv + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + if rv < 0: + raise Exception('Failed to initialize AES CBC key') + + def decrypt(self, data): + out = create_string_buffer(len(data)) + mutable_iv = create_string_buffer(self._iv, len(self._iv)) + rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, mutable_iv, 0) + if rv == 0: + raise Exception('AES CBC decryption failed') + return out.raw + + class Pukall_Cipher(object): + def __init__(self): + self.key = None + + def PC1(self, key, src, decryption=True): + self.key = key + out = create_string_buffer(len(src)) + de = 0 + if decryption: + de = 1 + rv = PC1(key, len(key), src, out, len(src), de) + return out.raw + + class Topaz_Cipher(object): + def __init__(self): + self._ctx = None + + def ctx_init(self, key): + tpz_ctx = self._ctx = TPZ_CTX() + topazCryptoInit(tpz_ctx, key, len(key)) + return tpz_ctx + + def decrypt(self, data, ctx=None): + if ctx == None: + ctx = self._ctx + out = create_string_buffer(len(data)) + topazCryptoDecrypt(ctx, data, out, len(data)) + return out.raw + + print "Using Library AlfCrypto DLL/DYLIB/SO" + return (AES_CBC, Pukall_Cipher, Topaz_Cipher) + + +def _load_python_alfcrypto(): + + import aescbc + + class Pukall_Cipher(object): + def __init__(self): + self.key = None + + def PC1(self, key, src, decryption=True): + sum1 = 0; + sum2 = 0; + keyXorVal = 0; + if len(key)!=16: + print "Bad key length!" + return None + wkey = [] + for i in xrange(8): + wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) + dst = "" + for i in xrange(len(src)): + temp1 = 0; + byteXorVal = 0; + for j in xrange(8): + temp1 ^= wkey[j] + sum2 = (sum2+j)*20021 + sum1 + sum1 = (temp1*346)&0xFFFF + sum2 = (sum2+sum1)&0xFFFF + temp1 = (temp1*20021+1)&0xFFFF + byteXorVal ^= temp1 ^ sum2 + curByte = ord(src[i]) + if not decryption: + keyXorVal = curByte * 257; + curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF + if decryption: + keyXorVal = curByte * 257; + for j in xrange(8): + wkey[j] ^= keyXorVal; + dst+=chr(curByte) + return dst + + class Topaz_Cipher(object): + def __init__(self): + self._ctx = None + + def ctx_init(self, key): + ctx1 = 0x0CAFFE19E + for keyChar in key: + keyByte = ord(keyChar) + ctx2 = ctx1 + ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) + self._ctx = [ctx1, ctx2] + return [ctx1,ctx2] + + def decrypt(self, data, ctx=None): + if ctx == None: + ctx = self._ctx + ctx1 = ctx[0] + ctx2 = ctx[1] + plainText = "" + for dataChar in data: + dataByte = ord(dataChar) + m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF + ctx2 = ctx1 + ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) + plainText += chr(m) + return plainText + + class AES_CBC(object): + def __init__(self): + self._key = None + self._iv = None + self.aes = None + + def set_decrypt_key(self, userkey, iv): + self._key = userkey + self._iv = iv + self.aes = aescbc.AES_CBC(userkey, aescbc.noPadding(), len(userkey)) + + def decrypt(self, data): + iv = self._iv + cleartext = self.aes.decrypt(iv + data) + return cleartext + + return (AES_CBC, Pukall_Cipher, Topaz_Cipher) + + +def _load_crypto(): + AES_CBC = Pukall_Cipher = Topaz_Cipher = None + cryptolist = (_load_libalfcrypto, _load_python_alfcrypto) + for loader in cryptolist: + try: + AES_CBC, Pukall_Cipher, Topaz_Cipher = loader() + break + except (ImportError, Exception): + pass + return AES_CBC, Pukall_Cipher, Topaz_Cipher + +AES_CBC, Pukall_Cipher, Topaz_Cipher = _load_crypto() + + +class KeyIVGen(object): + # this only exists in openssl so we will use pure python implementation instead + # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', + # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) + def pbkdf2(self, passwd, salt, iter, keylen): + + def xorstr( a, b ): + if len(a) != len(b): + raise Exception("xorstr(): lengths differ") + return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) + + def prf( h, data ): + hm = h.copy() + hm.update( data ) + return hm.digest() + + def pbkdf2_F( h, salt, itercount, blocknum ): + U = prf( h, salt + pack('>i',blocknum ) ) + T = U + for i in range(2, itercount+1): + U = prf( h, U ) + T = xorstr( T, U ) + return T + + sha = hashlib.sha1 + digest_size = sha().digest_size + # l - number of output blocks to produce + l = keylen / digest_size + if keylen % digest_size != 0: + l += 1 + h = hmac.new( passwd, None, sha ) + T = "" + for i in range(1, l+1): + T += pbkdf2_F( h, salt, iter, i ) + return T[0: keylen] + diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py index 9ad87ea..7bef68e 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py b/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py index d962a02..269810c 100644 Binary files a/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py and b/Calibre_Plugins/K4MobiDeDRM_plugin/k4mobidedrm_orig.py differ diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py index abfc7e4..e5647f4 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py @@ -1,270 +1,793 @@ -#!/usr/bin/env python +#! /usr/bin/python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +# For use with Topaz Scripts Version 2.6 -from __future__ import with_statement import sys -import os, csv -import binascii -import zlib -import re -from struct import pack, unpack, unpack_from - -class DrmException(Exception): - pass - -global charMap1 -global charMap3 -global charMap4 - -if 'calibre' in sys.modules: - inCalibre = True -else: - inCalibre = False - -if inCalibre: - if sys.platform.startswith('win'): - from calibre_plugins.k4mobidedrm.k4pcutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString - - if sys.platform.startswith('darwin'): - from calibre_plugins.k4mobidedrm.k4mutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString -else: - if sys.platform.startswith('win'): - from k4pcutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString - - if sys.platform.startswith('darwin'): - from k4mutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString - - -charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" -charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" -charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789" - -# crypto digestroutines -import hashlib - -def MD5(message): - ctx = hashlib.md5() - ctx.update(message) - return ctx.digest() - -def SHA1(message): - ctx = hashlib.sha1() - ctx.update(message) - return ctx.digest() - - -# Encode the bytes in data with the characters in map -def encode(data, map): - result = "" - for char in data: - value = ord(char) - Q = (value ^ 0x80) // len(map) - R = value % len(map) - result += map[Q] - result += map[R] - return result - -# Hash the bytes in data and then encode the digest with the characters in map -def encodeHash(data,map): - return encode(MD5(data),map) - -# Decode the string in data with the characters in map. Returns the decoded bytes -def decode(data,map): - result = "" - for i in range (0,len(data)-1,2): - high = map.find(data[i]) - low = map.find(data[i+1]) - if (high == -1) or (low == -1) : - break - value = (((high * len(map)) ^ 0x80) & 0xFF) + low - result += pack("B",value) - return result - -# -# PID generation routines -# - -# Returns two bit at offset from a bit field -def getTwoBitsFromBitField(bitField,offset): - byteNumber = offset // 4 - bitPosition = 6 - 2*(offset % 4) - return ord(bitField[byteNumber]) >> bitPosition & 3 - -# Returns the six bits at offset from a bit field -def getSixBitsFromBitField(bitField,offset): - offset *= 3 - value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2) - return value - -# 8 bits to six bits encoding from hash to generate PID string -def encodePID(hash): - global charMap3 - PID = "" - for position in range (0,8): - PID += charMap3[getSixBitsFromBitField(hash,position)] - return PID - -# Encryption table used to generate the device PID -def generatePidEncryptionTable() : - table = [] - for counter1 in range (0,0x100): - value = counter1 - for counter2 in range (0,8): - if (value & 1 == 0) : - value = value >> 1 +import csv +import os +import math +import getopt +from struct import pack +from struct import unpack + + +class DocParser(object): + def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage): + self.id = os.path.basename(fileid).replace('.dat','') + self.svgcount = 0 + self.docList = flatxml.split('\n') + self.docSize = len(self.docList) + self.classList = {} + self.bookDir = bookDir + self.gdict = gdict + tmpList = classlst.split('\n') + for pclass in tmpList: + if pclass != '': + # remove the leading period from the css name + cname = pclass[1:] + self.classList[cname] = True + self.fixedimage = fixedimage + self.ocrtext = [] + self.link_id = [] + self.link_title = [] + self.link_page = [] + self.link_href = [] + self.link_type = [] + self.dehyphen_rootid = [] + self.paracont_stemid = [] + self.parastems_stemid = [] + + + def getGlyph(self, gid): + result = '' + id='id="gl%d"' % gid + return self.gdict.lookup(id) + + def glyphs_to_image(self, glyphList): + + def extract(path, key): + b = path.find(key) + len(key) + e = path.find(' ',b) + return int(path[b:e]) + + svgDir = os.path.join(self.bookDir,'svg') + + imgDir = os.path.join(self.bookDir,'img') + imgname = self.id + '_%04d.svg' % self.svgcount + imgfile = os.path.join(imgDir,imgname) + + # get glyph information + gxList = self.getData('info.glyph.x',0,-1) + gyList = self.getData('info.glyph.y',0,-1) + gidList = self.getData('info.glyph.glyphID',0,-1) + + gids = [] + maxws = [] + maxhs = [] + xs = [] + ys = [] + gdefs = [] + + # get path defintions, positions, dimensions for each glyph + # that makes up the image, and find min x and min y to reposition origin + minx = -1 + miny = -1 + for j in glyphList: + gid = gidList[j] + gids.append(gid) + + xs.append(gxList[j]) + if minx == -1: minx = gxList[j] + else : minx = min(minx, gxList[j]) + + ys.append(gyList[j]) + if miny == -1: miny = gyList[j] + else : miny = min(miny, gyList[j]) + + path = self.getGlyph(gid) + gdefs.append(path) + + maxws.append(extract(path,'width=')) + maxhs.append(extract(path,'height=')) + + + # change the origin to minx, miny and calc max height and width + maxw = maxws[0] + xs[0] - minx + maxh = maxhs[0] + ys[0] - miny + for j in xrange(0, len(xs)): + xs[j] = xs[j] - minx + ys[j] = ys[j] - miny + maxw = max( maxw, (maxws[j] + xs[j]) ) + maxh = max( maxh, (maxhs[j] + ys[j]) ) + + # open the image file for output + ifile = open(imgfile,'w') + ifile.write('\n') + ifile.write('\n') + ifile.write('\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh)) + ifile.write('\n') + for j in xrange(0,len(gdefs)): + ifile.write(gdefs[j]) + ifile.write('\n') + for j in xrange(0,len(gids)): + ifile.write('\n' % (gids[j], xs[j], ys[j])) + ifile.write('') + ifile.close() + + return 0 + + + + # return tag at line pos in document + def lineinDoc(self, pos) : + if (pos >= 0) and (pos < self.docSize) : + item = self.docList[pos] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) else : - value = value >> 1 - value = value ^ 0xEDB88320 - table.append(value) - return table - -# Seed value used to generate the device PID -def generatePidSeed(table,dsn) : - value = 0 - for counter in range (0,4) : - index = (ord(dsn[counter]) ^ value) &0xFF - value = (value >> 8) ^ table[index] - return value - -# Generate the device PID -def generateDevicePID(table,dsn,nbRoll): - global charMap4 - seed = generatePidSeed(table,dsn) - pidAscii = "" - pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF] - index = 0 - for counter in range (0,nbRoll): - pid[index] = pid[index] ^ ord(dsn[counter]) - index = (index+1) %8 - for counter in range (0,8): - index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7) - pidAscii += charMap4[index] - return pidAscii - -def crc32(s): - return (~binascii.crc32(s,-1))&0xFFFFFFFF - -# convert from 8 digit PID to 10 digit PID with checksum -def checksumPid(s): - global charMap4 - crc = crc32(s) - crc = crc ^ (crc >> 16) - res = s - l = len(charMap4) - for i in (0,1): - b = crc & 0xff - pos = (b // l) ^ (b % l) - res += charMap4[pos%l] - crc >>= 8 - return res - - -# old kindle serial number to fixed pid -def pidFromSerial(s, l): - global charMap4 - crc = crc32(s) - arr1 = [0]*l - for i in xrange(len(s)): - arr1[i%l] ^= ord(s[i]) - crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff] - for i in xrange(l): - arr1[i] ^= crc_bytes[i&3] - pid = "" - for i in xrange(l): - b = arr1[i] & 0xff - pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))] - return pid - - -# Parse the EXTH header records and use the Kindle serial number to calculate the book pid. -def getKindlePid(pidlst, rec209, token, serialnum): - # Compute book PID - pidHash = SHA1(serialnum+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pidlst.append(bookPID) - - # compute fixed pid for old pre 2.5 firmware update pid as well - bookPID = pidFromSerial(serialnum, 7) + "*" - bookPID = checksumPid(bookPID) - pidlst.append(bookPID) - - return pidlst - - -# parse the Kindleinfo file to calculate the book pid. - -keynames = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"] - -def getK4Pids(pidlst, rec209, token, kInfoFile): - global charMap1 - kindleDatabase = None - try: - kindleDatabase = getDBfromFile(kInfoFile) - except Exception, message: - print(message) - kindleDatabase = None - pass - - if kindleDatabase == None : - return pidlst - - try: - # Get the Mazama Random number - MazamaRandomNumber = kindleDatabase["MazamaRandomNumber"] - - # Get the kindle account token - kindleAccountToken = kindleDatabase["kindle.account.tokens"] - except KeyError: - print "Keys not found in " + kInfoFile - return pidlst - - # Get the ID string used - encodedIDString = encodeHash(GetIDString(),charMap1) - - # Get the current user name - encodedUsername = encodeHash(GetUserName(),charMap1) - - # concat, hash and encode to calculate the DSN - DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1) - - # Compute the device PID (for which I can tell, is used for nothing). - table = generatePidEncryptionTable() - devicePID = generateDevicePID(table,DSN,4) - devicePID = checksumPid(devicePID) - pidlst.append(devicePID) - - # Compute book PIDs - - # book pid - pidHash = SHA1(DSN+kindleAccountToken+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pidlst.append(bookPID) - - # variant 1 - pidHash = SHA1(kindleAccountToken+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pidlst.append(bookPID) - - # variant 2 - pidHash = SHA1(DSN+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pidlst.append(bookPID) - - return pidlst - -def getPidList(md1, md2, k4, pids, serials, kInfoFiles): - pidlst = [] - if kInfoFiles is None: - kInfoFiles = [] - if k4: - kInfoFiles = getKindleInfoFiles(kInfoFiles) - for infoFile in kInfoFiles: - pidlst = getK4Pids(pidlst, md1, md2, infoFile) - for serialnum in serials: - pidlst = getKindlePid(pidlst, md1, md2, serialnum) - for pid in pids: - pidlst.append(pid) - return pidlst + name = item + argres = '' + return name, argres + + + # find tag in doc if within pos to end inclusive + def findinDoc(self, tagpath, pos, end) : + result = None + if end == -1 : + end = self.docSize + else: + end = min(self.docSize, end) + foundat = -1 + for j in xrange(pos, end): + item = self.docList[j] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + if name.endswith(tagpath) : + result = argres + foundat = j + break + return foundat, result + + + # return list of start positions for the tagpath + def posinDoc(self, tagpath): + startpos = [] + pos = 0 + res = "" + while res != None : + (foundpos, res) = self.findinDoc(tagpath, pos, -1) + if res != None : + startpos.append(foundpos) + pos = foundpos + 1 + return startpos + + + # returns a vector of integers for the tagpath + def getData(self, tagpath, pos, end): + argres=[] + (foundat, argt) = self.findinDoc(tagpath, pos, end) + if (argt != None) and (len(argt) > 0) : + argList = argt.split('|') + argres = [ int(strval) for strval in argList] + return argres + + + # get the class + def getClass(self, pclass): + nclass = pclass + + # class names are an issue given topaz may start them with numerals (not allowed), + # use a mix of cases (which cause some browsers problems), and actually + # attach numbers after "_reclustered*" to the end to deal classeses that inherit + # from a base class (but then not actually provide all of these _reclustereed + # classes in the stylesheet! + + # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass + # that exists in the stylesheet first, and then adding this specific class + # after + + # also some class names have spaces in them so need to convert to dashes + if nclass != None : + nclass = nclass.replace(' ','-') + classres = '' + nclass = nclass.lower() + nclass = 'cl-' + nclass + baseclass = '' + # graphic is the base class for captions + if nclass.find('cl-cap-') >=0 : + classres = 'graphic' + ' ' + else : + # strip to find baseclass + p = nclass.find('_') + if p > 0 : + baseclass = nclass[0:p] + if baseclass in self.classList: + classres += baseclass + ' ' + classres += nclass + nclass = classres + return nclass + + + # develop a sorted description of the starting positions of + # groups and regions on the page, as well as the page type + def PageDescription(self): + + def compare(x, y): + (xtype, xval) = x + (ytype, yval) = y + if xval > yval: + return 1 + if xval == yval: + return 0 + return -1 + + result = [] + (pos, pagetype) = self.findinDoc('page.type',0,-1) + + groupList = self.posinDoc('page.group') + groupregionList = self.posinDoc('page.group.region') + pageregionList = self.posinDoc('page.region') + # integrate into one list + for j in groupList: + result.append(('grpbeg',j)) + for j in groupregionList: + result.append(('gregion',j)) + for j in pageregionList: + result.append(('pregion',j)) + result.sort(compare) + + # insert group end and page end indicators + inGroup = False + j = 0 + while True: + if j == len(result): break + rtype = result[j][0] + rval = result[j][1] + if not inGroup and (rtype == 'grpbeg') : + inGroup = True + j = j + 1 + elif inGroup and (rtype in ('grpbeg', 'pregion')): + result.insert(j,('grpend',rval)) + inGroup = False + else: + j = j + 1 + if inGroup: + result.append(('grpend',-1)) + result.append(('pageend', -1)) + return pagetype, result + + + + # build a description of the paragraph + def getParaDescription(self, start, end, regtype): + + result = [] + + # paragraph + (pos, pclass) = self.findinDoc('paragraph.class',start,end) + + pclass = self.getClass(pclass) + + # if paragraph uses extratokens (extra glyphs) then make it fixed + (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end) + + # build up a description of the paragraph in result and return it + # first check for the basic - all words paragraph + (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end) + (pos, slast) = self.findinDoc('paragraph.lastWord',start,end) + if (sfirst != None) and (slast != None) : + first = int(sfirst) + last = int(slast) + + makeImage = (regtype == 'vertical') or (regtype == 'table') + makeImage = makeImage or (extraglyphs != None) + if self.fixedimage: + makeImage = makeImage or (regtype == 'fixed') + + if (pclass != None): + makeImage = makeImage or (pclass.find('.inverted') >= 0) + if self.fixedimage : + makeImage = makeImage or (pclass.find('cl-f-') >= 0) + + # before creating an image make sure glyph info exists + gidList = self.getData('info.glyph.glyphID',0,-1) + + makeImage = makeImage & (len(gidList) > 0) + + if not makeImage : + # standard all word paragraph + for wordnum in xrange(first, last): + result.append(('ocr', wordnum)) + return pclass, result + + # convert paragraph to svg image + # translate first and last word into first and last glyphs + # and generate inline image and include it + glyphList = [] + firstglyphList = self.getData('word.firstGlyph',0,-1) + gidList = self.getData('info.glyph.glyphID',0,-1) + firstGlyph = firstglyphList[first] + if last < len(firstglyphList): + lastGlyph = firstglyphList[last] + else : + lastGlyph = len(gidList) + + # handle case of white sapce paragraphs with no actual glyphs in them + # by reverting to text based paragraph + if firstGlyph >= lastGlyph: + # revert to standard text based paragraph + for wordnum in xrange(first, last): + result.append(('ocr', wordnum)) + return pclass, result + + for glyphnum in xrange(firstGlyph, lastGlyph): + glyphList.append(glyphnum) + # include any extratokens if they exist + (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end) + (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end) + if (sfg != None) and (slg != None): + for glyphnum in xrange(int(sfg), int(slg)): + glyphList.append(glyphnum) + num = self.svgcount + self.glyphs_to_image(glyphList) + self.svgcount += 1 + result.append(('svg', num)) + return pclass, result + + # this type of paragraph may be made up of multiple spans, inline + # word monograms (images), and words with semantic meaning, + # plus glyphs used to form starting letter of first word + + # need to parse this type line by line + line = start + 1 + word_class = '' + + # if end is -1 then we must search to end of document + if end == -1 : + end = self.docSize + + # seems some xml has last* coming before first* so we have to + # handle any order + sp_first = -1 + sp_last = -1 + + gl_first = -1 + gl_last = -1 + + ws_first = -1 + ws_last = -1 + + word_class = '' + + word_semantic_type = '' + + while (line < end) : + + (name, argres) = self.lineinDoc(line) + + if name.endswith('span.firstWord') : + sp_first = int(argres) + + elif name.endswith('span.lastWord') : + sp_last = int(argres) + + elif name.endswith('word.firstGlyph') : + gl_first = int(argres) + + elif name.endswith('word.lastGlyph') : + gl_last = int(argres) + + elif name.endswith('word_semantic.firstWord'): + ws_first = int(argres) + + elif name.endswith('word_semantic.lastWord'): + ws_last = int(argres) + + elif name.endswith('word.class'): + (cname, space) = argres.split('-',1) + if space == '' : space = '0' + if (cname == 'spaceafter') and (int(space) > 0) : + word_class = 'sa' + + elif name.endswith('word.img.src'): + result.append(('img' + word_class, int(argres))) + word_class = '' + + elif name.endswith('region.img.src'): + result.append(('img' + word_class, int(argres))) + + if (sp_first != -1) and (sp_last != -1): + for wordnum in xrange(sp_first, sp_last): + result.append(('ocr', wordnum)) + sp_first = -1 + sp_last = -1 + + if (gl_first != -1) and (gl_last != -1): + glyphList = [] + for glyphnum in xrange(gl_first, gl_last): + glyphList.append(glyphnum) + num = self.svgcount + self.glyphs_to_image(glyphList) + self.svgcount += 1 + result.append(('svg', num)) + gl_first = -1 + gl_last = -1 + + if (ws_first != -1) and (ws_last != -1): + for wordnum in xrange(ws_first, ws_last): + result.append(('ocr', wordnum)) + ws_first = -1 + ws_last = -1 + + line += 1 + + return pclass, result + + + def buildParagraph(self, pclass, pdesc, type, regtype) : + parares = '' + sep ='' + + classres = '' + if pclass : + classres = ' class="' + pclass + '"' + + br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical') + + handle_links = len(self.link_id) > 0 + + if (type == 'full') or (type == 'begin') : + parares += '' + + if (type == 'end'): + parares += ' ' + + lstart = len(parares) + + cnt = len(pdesc) + + for j in xrange( 0, cnt) : + + (wtype, num) = pdesc[j] + + if wtype == 'ocr' : + word = self.ocrtext[num] + sep = ' ' + + if handle_links: + link = self.link_id[num] + if (link > 0): + linktype = self.link_type[link-1] + title = self.link_title[link-1] + if (title == "") or (parares.rfind(title) < 0): + title=parares[lstart:] + if linktype == 'external' : + linkhref = self.link_href[link-1] + linkhtml = '' % linkhref + else : + if len(self.link_page) >= link : + ptarget = self.link_page[link-1] - 1 + linkhtml = '' % ptarget + else : + # just link to the current page + linkhtml = '' + linkhtml += title + '' + pos = parares.rfind(title) + if pos >= 0: + parares = parares[0:pos] + linkhtml + parares[pos+len(title):] + else : + parares += linkhtml + lstart = len(parares) + if word == '_link_' : word = '' + elif (link < 0) : + if word == '_link_' : word = '' + + if word == '_lb_': + if ((num-1) in self.dehyphen_rootid ) or handle_links: + word = '' + sep = '' + elif br_lb : + word = '
\n' + sep = '' + else : + word = '\n' + sep = '' + + if num in self.dehyphen_rootid : + word = word[0:-1] + sep = '' + + parares += word + sep + + elif wtype == 'img' : + sep = '' + parares += '' % num + parares += sep + + elif wtype == 'imgsa' : + sep = ' ' + parares += '' % num + parares += sep + + elif wtype == 'svg' : + sep = '' + parares += '' % num + parares += sep + + if len(sep) > 0 : parares = parares[0:-1] + if (type == 'full') or (type == 'end') : + parares += '

' + return parares + + + def buildTOCEntry(self, pdesc) : + parares = '' + sep ='' + tocentry = '' + handle_links = len(self.link_id) > 0 + + lstart = 0 + + cnt = len(pdesc) + for j in xrange( 0, cnt) : + + (wtype, num) = pdesc[j] + + if wtype == 'ocr' : + word = self.ocrtext[num] + sep = ' ' + + if handle_links: + link = self.link_id[num] + if (link > 0): + linktype = self.link_type[link-1] + title = self.link_title[link-1] + title = title.rstrip('. ') + alt_title = parares[lstart:] + alt_title = alt_title.strip() + # now strip off the actual printed page number + alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.') + alt_title = alt_title.rstrip('. ') + # skip over any external links - can't have them in a books toc + if linktype == 'external' : + title = '' + alt_title = '' + linkpage = '' + else : + if len(self.link_page) >= link : + ptarget = self.link_page[link-1] - 1 + linkpage = '%04d' % ptarget + else : + # just link to the current page + linkpage = self.id[4:] + if len(alt_title) >= len(title): + title = alt_title + if title != '' and linkpage != '': + tocentry += title + '|' + linkpage + '\n' + lstart = len(parares) + if word == '_link_' : word = '' + elif (link < 0) : + if word == '_link_' : word = '' + + if word == '_lb_': + word = '' + sep = '' + + if num in self.dehyphen_rootid : + word = word[0:-1] + sep = '' + + parares += word + sep + + else : + continue + + return tocentry + + + + + # walk the document tree collecting the information needed + # to build an html page using the ocrText + + def process(self): + + tocinfo = '' + hlst = [] + + # get the ocr text + (pos, argres) = self.findinDoc('info.word.ocrText',0,-1) + if argres : self.ocrtext = argres.split('|') + + # get information to dehyphenate the text + self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1) + + # determine if first paragraph is continued from previous page + (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1) + first_para_continued = (self.parastems_stemid != None) + + # determine if last paragraph is continued onto the next page + (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1) + last_para_continued = (self.paracont_stemid != None) + + # collect link ids + self.link_id = self.getData('info.word.link_id',0,-1) + + # collect link destination page numbers + self.link_page = self.getData('info.links.page',0,-1) + + # collect link types (container versus external) + (pos, argres) = self.findinDoc('info.links.type',0,-1) + if argres : self.link_type = argres.split('|') + + # collect link destinations + (pos, argres) = self.findinDoc('info.links.href',0,-1) + if argres : self.link_href = argres.split('|') + + # collect link titles + (pos, argres) = self.findinDoc('info.links.title',0,-1) + if argres : + self.link_title = argres.split('|') + else: + self.link_title.append('') + + # get a descriptions of the starting points of the regions + # and groups on the page + (pagetype, pageDesc) = self.PageDescription() + regcnt = len(pageDesc) - 1 + + anchorSet = False + breakSet = False + inGroup = False + + # process each region on the page and convert what you can to html + + for j in xrange(regcnt): + + (etype, start) = pageDesc[j] + (ntype, end) = pageDesc[j+1] + + + # set anchor for link target on this page + if not anchorSet and not first_para_continued: + hlst.append('\n') + anchorSet = True + + # handle groups of graphics with text captions + if (etype == 'grpbeg'): + (pos, grptype) = self.findinDoc('group.type', start, end) + if grptype != None: + if grptype == 'graphic': + gcstr = ' class="' + grptype + '"' + hlst.append('') + inGroup = True + + elif (etype == 'grpend'): + if inGroup: + hlst.append('\n') + inGroup = False + + else: + (pos, regtype) = self.findinDoc('region.type',start,end) + + if regtype == 'graphic' : + (pos, simgsrc) = self.findinDoc('img.src',start,end) + if simgsrc: + if inGroup: + hlst.append('' % int(simgsrc)) + else: + hlst.append('
' % int(simgsrc)) + + elif regtype == 'chapterheading' : + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + if not breakSet: + hlst.append('
 
\n') + breakSet = True + tag = 'h1' + if pclass and (len(pclass) >= 7): + if pclass[3:7] == 'ch1-' : tag = 'h1' + if pclass[3:7] == 'ch2-' : tag = 'h2' + if pclass[3:7] == 'ch3-' : tag = 'h3' + hlst.append('<' + tag + ' class="' + pclass + '">') + else: + hlst.append('<' + tag + '>') + hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) + hlst.append('') + + elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'): + ptype = 'full' + # check to see if this is a continution from the previous page + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + if pclass and (len(pclass) >= 6) and (ptype == 'full'): + tag = 'p' + if pclass[3:6] == 'h1-' : tag = 'h4' + if pclass[3:6] == 'h2-' : tag = 'h5' + if pclass[3:6] == 'h3-' : tag = 'h6' + hlst.append('<' + tag + ' class="' + pclass + '">') + hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) + hlst.append('') + else : + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + + elif (regtype == 'tocentry') : + ptype = 'full' + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + tocinfo += self.buildTOCEntry(pdesc) + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + + elif (regtype == 'vertical') or (regtype == 'table') : + ptype = 'full' + if inGroup: + ptype = 'middle' + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start, end, regtype) + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + + + elif (regtype == 'synth_fcvr.center'): + (pos, simgsrc) = self.findinDoc('img.src',start,end) + if simgsrc: + hlst.append('
' % int(simgsrc)) + + else : + print ' Making region type', regtype, + (pos, temp) = self.findinDoc('paragraph',start,end) + (pos2, temp) = self.findinDoc('span',start,end) + if pos != -1 or pos2 != -1: + print ' a "text" region' + orig_regtype = regtype + regtype = 'fixed' + ptype = 'full' + # check to see if this is a continution from the previous page + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + if not pclass: + if orig_regtype.endswith('.right') : pclass = 'cl-right' + elif orig_regtype.endswith('.center') : pclass = 'cl-center' + elif orig_regtype.endswith('.left') : pclass = 'cl-left' + elif orig_regtype.endswith('.justify') : pclass = 'cl-justify' + if pclass and (ptype == 'full') and (len(pclass) >= 6): + tag = 'p' + if pclass[3:6] == 'h1-' : tag = 'h4' + if pclass[3:6] == 'h2-' : tag = 'h5' + if pclass[3:6] == 'h3-' : tag = 'h6' + hlst.append('<' + tag + ' class="' + pclass + '">') + hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) + hlst.append('') + else : + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + else : + print ' a "graphic" region' + (pos, simgsrc) = self.findinDoc('img.src',start,end) + if simgsrc: + hlst.append('
' % int(simgsrc)) + + + htmlpage = "".join(hlst) + if last_para_continued : + if htmlpage[-4:] == '

': + htmlpage = htmlpage[0:-4] + last_para_continued = False + + return htmlpage, tocinfo + + +def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage): + # create a document parser + dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage) + htmlpage, tocinfo = dp.process() + return htmlpage, tocinfo diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt b/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt index e69de29..e66e9f3 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/plugin-import-name-k4mobidedrm.txt @@ -0,0 +1,726 @@ +# standlone set of Mac OSX specific routines needed for KindleBooks + +from __future__ import with_statement + +import sys +import os +import os.path +import re +import copy +import subprocess +from struct import pack, unpack, unpack_from + +class DrmException(Exception): + pass + + +# interface to needed routines in openssl's libcrypto +def _load_crypto_libcrypto(): + from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, addressof, string_at, cast + from ctypes.util import find_library + + libcrypto = find_library('crypto') + if libcrypto is None: + raise DrmException('libcrypto not found') + libcrypto = CDLL(libcrypto) + + # From OpenSSL's crypto aes header + # + # AES_ENCRYPT 1 + # AES_DECRYPT 0 + # AES_MAXNR 14 (in bytes) + # AES_BLOCK_SIZE 16 (in bytes) + # + # struct aes_key_st { + # unsigned long rd_key[4 *(AES_MAXNR + 1)]; + # int rounds; + # }; + # typedef struct aes_key_st AES_KEY; + # + # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + # + # note: the ivec string, and output buffer are both mutable + # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc); + + AES_MAXNR = 14 + c_char_pp = POINTER(c_char_p) + c_int_p = POINTER(c_int) + + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] + AES_KEY_p = POINTER(AES_KEY) + + def F(restype, name, argtypes): + func = getattr(libcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int]) + + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) + + # From OpenSSL's Crypto evp/p5_crpt2.c + # + # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen, + # const unsigned char *salt, int saltlen, int iter, + # int keylen, unsigned char *out); + + PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', + [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) + + class LibCrypto(object): + def __init__(self): + self._blocksize = 0 + self._keyctx = None + self._iv = 0 + + def set_decrypt_key(self, userkey, iv): + self._blocksize = len(userkey) + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise DrmException('AES improper key used') + return + keyctx = self._keyctx = AES_KEY() + self._iv = iv + self._userkey = userkey + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + if rv < 0: + raise DrmException('Failed to initialize AES key') + + def decrypt(self, data): + out = create_string_buffer(len(data)) + mutable_iv = create_string_buffer(self._iv, len(self._iv)) + keyctx = self._keyctx + rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0) + if rv == 0: + raise DrmException('AES decryption failed') + return out.raw + + def keyivgen(self, passwd, salt, iter, keylen): + saltlen = len(salt) + passlen = len(passwd) + out = create_string_buffer(keylen) + rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out) + return out.raw + return LibCrypto + +def _load_crypto(): + LibCrypto = None + try: + LibCrypto = _load_crypto_libcrypto() + except (ImportError, DrmException): + pass + return LibCrypto + +LibCrypto = _load_crypto() + +# +# Utility Routines +# + +# crypto digestroutines +import hashlib + +def MD5(message): + ctx = hashlib.md5() + ctx.update(message) + return ctx.digest() + +def SHA1(message): + ctx = hashlib.sha1() + ctx.update(message) + return ctx.digest() + +def SHA256(message): + ctx = hashlib.sha256() + ctx.update(message) + return ctx.digest() + +# Various character maps used to decrypt books. Probably supposed to act as obfuscation +charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" +charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM" + +# For kinf approach of K4Mac 1.6.X or later +# On K4PC charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE" +# For Mac they seem to re-use charMap2 here +charMap5 = charMap2 + +# new in K4M 1.9.X +testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD" + + +def encode(data, map): + result = "" + for char in data: + value = ord(char) + Q = (value ^ 0x80) // len(map) + R = value % len(map) + result += map[Q] + result += map[R] + return result + +# Hash the bytes in data and then encode the digest with the characters in map +def encodeHash(data,map): + return encode(MD5(data),map) + +# Decode the string in data with the characters in map. Returns the decoded bytes +def decode(data,map): + result = "" + for i in range (0,len(data)-1,2): + high = map.find(data[i]) + low = map.find(data[i+1]) + if (high == -1) or (low == -1) : + break + value = (((high * len(map)) ^ 0x80) & 0xFF) + low + result += pack("B",value) + return result + +# For K4M 1.6.X and later +# generate table of prime number less than or equal to int n +def primes(n): + if n==2: return [2] + elif n<2: return [] + s=range(3,n+1,2) + mroot = n ** 0.5 + half=(n+1)/2-1 + i=0 + m=3 + while m <= mroot: + if s[i]: + j=(m*m-3)/2 + s[j]=0 + while j 7: + return mungedmac + sernum = GetVolumeSerialNumber() + if len(sernum) > 7: + return sernum + diskpart = GetUserHomeAppSupKindleDirParitionName() + uuidnum = GetDiskPartitionUUID(diskpart) + if len(uuidnum) > 7: + return uuidnum + mungedmac = GetMACAddressMunged() + if len(mungedmac) > 7: + return mungedmac + return '9999999999' + + +# implements an Pseudo Mac Version of Windows built-in Crypto routine +# used by Kindle for Mac versions < 1.6.0 +class CryptUnprotectData(object): + def __init__(self): + sernum = GetVolumeSerialNumber() + if sernum == '': + sernum = '9999999999' + sp = sernum + '!@#' + GetUserName() + passwdData = encode(SHA256(sp),charMap1) + salt = '16743' + self.crp = LibCrypto() + iter = 0x3e8 + keylen = 0x80 + key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) + self.key = key_iv[0:32] + self.iv = key_iv[32:48] + self.crp.set_decrypt_key(self.key, self.iv) + + def decrypt(self, encryptedData): + cleartext = self.crp.decrypt(encryptedData) + cleartext = decode(cleartext,charMap1) + return cleartext + + +# implements an Pseudo Mac Version of Windows built-in Crypto routine +# used for Kindle for Mac Versions >= 1.6.0 +class CryptUnprotectDataV2(object): + def __init__(self): + sp = GetUserName() + ':&%:' + GetIDString() + passwdData = encode(SHA256(sp),charMap5) + # salt generation as per the code + salt = 0x0512981d * 2 * 1 * 1 + salt = str(salt) + GetUserName() + salt = encode(salt,charMap5) + self.crp = LibCrypto() + iter = 0x800 + keylen = 0x400 + key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) + self.key = key_iv[0:32] + self.iv = key_iv[32:48] + self.crp.set_decrypt_key(self.key, self.iv) + + def decrypt(self, encryptedData): + cleartext = self.crp.decrypt(encryptedData) + cleartext = decode(cleartext, charMap5) + return cleartext + + +# unprotect the new header blob in .kinf2011 +# used in Kindle for Mac Version >= 1.9.0 +def UnprotectHeaderData(encryptedData): + passwdData = 'header_key_data' + salt = 'HEADER.2011' + iter = 0x80 + keylen = 0x100 + crp = LibCrypto() + key_iv = crp.keyivgen(passwdData, salt, iter, keylen) + key = key_iv[0:32] + iv = key_iv[32:48] + crp.set_decrypt_key(key,iv) + cleartext = crp.decrypt(encryptedData) + return cleartext + + +# implements an Pseudo Mac Version of Windows built-in Crypto routine +# used for Kindle for Mac Versions >= 1.9.0 +class CryptUnprotectDataV3(object): + def __init__(self, entropy): + sp = GetUserName() + '+@#$%+' + GetIDString() + passwdData = encode(SHA256(sp),charMap2) + salt = entropy + self.crp = LibCrypto() + iter = 0x800 + keylen = 0x400 + key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) + self.key = key_iv[0:32] + self.iv = key_iv[32:48] + self.crp.set_decrypt_key(self.key, self.iv) + + def decrypt(self, encryptedData): + cleartext = self.crp.decrypt(encryptedData) + cleartext = decode(cleartext, charMap2) + return cleartext + + +# Locate the .kindle-info files +def getKindleInfoFiles(kInfoFiles): + # first search for current .kindle-info files + home = os.getenv('HOME') + cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p1.communicate() + reslst = out1.split('\n') + kinfopath = 'NONE' + found = False + for resline in reslst: + if os.path.isfile(resline): + kInfoFiles.append(resline) + found = True + # add any .rainier*-kinf files + cmdline = 'find "' + home + '/Library/Application Support" -name ".rainier*-kinf"' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p1.communicate() + reslst = out1.split('\n') + for resline in reslst: + if os.path.isfile(resline): + kInfoFiles.append(resline) + found = True + # add any .kinf2011 files + cmdline = 'find "' + home + '/Library/Application Support" -name ".kinf2011"' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p1.communicate() + reslst = out1.split('\n') + for resline in reslst: + if os.path.isfile(resline): + kInfoFiles.append(resline) + found = True + if not found: + print('No kindle-info files have been found.') + return kInfoFiles + +# determine type of kindle info provided and return a +# database of keynames and values +def getDBfromFile(kInfoFile): + names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber", "max_date", "SIGVERIF"] + DB = {} + cnt = 0 + infoReader = open(kInfoFile, 'r') + hdr = infoReader.read(1) + data = infoReader.read() + + if data.find('[') != -1 : + + # older style kindle-info file + cud = CryptUnprotectData() + items = data.split('[') + for item in items: + if item != '': + keyhash, rawdata = item.split(':') + keyname = "unknown" + for name in names: + if encodeHash(name,charMap2) == keyhash: + keyname = name + break + if keyname == "unknown": + keyname = keyhash + encryptedValue = decode(rawdata,charMap2) + cleartext = cud.decrypt(encryptedValue) + DB[keyname] = cleartext + cnt = cnt + 1 + if cnt == 0: + DB = None + return DB + + if hdr == '/': + + # else newer style .kinf file used by K4Mac >= 1.6.0 + # the .kinf file uses "/" to separate it into records + # so remove the trailing "/" to make it easy to use split + data = data[:-1] + items = data.split('/') + cud = CryptUnprotectDataV2() + + # loop through the item records until all are processed + while len(items) > 0: + + # get the first item record + item = items.pop(0) + + # the first 32 chars of the first record of a group + # is the MD5 hash of the key name encoded by charMap5 + keyhash = item[0:32] + keyname = "unknown" + + # the raw keyhash string is also used to create entropy for the actual + # CryptProtectData Blob that represents that keys contents + # "entropy" not used for K4Mac only K4PC + # entropy = SHA1(keyhash) + + # the remainder of the first record when decoded with charMap5 + # has the ':' split char followed by the string representation + # of the number of records that follow + # and make up the contents + srcnt = decode(item[34:],charMap5) + rcnt = int(srcnt) + + # read and store in rcnt records of data + # that make up the contents value + edlst = [] + for i in xrange(rcnt): + item = items.pop(0) + edlst.append(item) + + keyname = "unknown" + for name in names: + if encodeHash(name,charMap5) == keyhash: + keyname = name + break + if keyname == "unknown": + keyname = keyhash + + # the charMap5 encoded contents data has had a length + # of chars (always odd) cut off of the front and moved + # to the end to prevent decoding using charMap5 from + # working properly, and thereby preventing the ensuing + # CryptUnprotectData call from succeeding. + + # The offset into the charMap5 encoded contents seems to be: + # len(contents) - largest prime number less than or equal to int(len(content)/3) + # (in other words split "about" 2/3rds of the way through) + + # move first offsets chars to end to align for decode by charMap5 + encdata = "".join(edlst) + contlen = len(encdata) + + # now properly split and recombine + # by moving noffset chars from the start of the + # string to the end of the string + noffset = contlen - primes(int(contlen/3))[-1] + pfx = encdata[0:noffset] + encdata = encdata[noffset:] + encdata = encdata + pfx + + # decode using charMap5 to get the CryptProtect Data + encryptedValue = decode(encdata,charMap5) + cleartext = cud.decrypt(encryptedValue) + DB[keyname] = cleartext + cnt = cnt + 1 + + if cnt == 0: + DB = None + return DB + + # the latest .kinf2011 version for K4M 1.9.1 + # put back the hdr char, it is needed + data = hdr + data + data = data[:-1] + items = data.split('/') + + # the headerblob is the encrypted information needed to build the entropy string + headerblob = items.pop(0) + encryptedValue = decode(headerblob, charMap1) + cleartext = UnprotectHeaderData(encryptedValue) + + # now extract the pieces in the same way + # this version is different from K4PC it scales the build number by multipying by 735 + pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE) + for m in re.finditer(pattern, cleartext): + entropy = str(int(m.group(2)) * 0x2df) + m.group(4) + + cud = CryptUnprotectDataV3(entropy) + + # loop through the item records until all are processed + while len(items) > 0: + + # get the first item record + item = items.pop(0) + + # the first 32 chars of the first record of a group + # is the MD5 hash of the key name encoded by charMap5 + keyhash = item[0:32] + keyname = "unknown" + + # unlike K4PC the keyhash is not used in generating entropy + # entropy = SHA1(keyhash) + added_entropy + # entropy = added_entropy + + # the remainder of the first record when decoded with charMap5 + # has the ':' split char followed by the string representation + # of the number of records that follow + # and make up the contents + srcnt = decode(item[34:],charMap5) + rcnt = int(srcnt) + + # read and store in rcnt records of data + # that make up the contents value + edlst = [] + for i in xrange(rcnt): + item = items.pop(0) + edlst.append(item) + + keyname = "unknown" + for name in names: + if encodeHash(name,testMap8) == keyhash: + keyname = name + break + if keyname == "unknown": + keyname = keyhash + + # the testMap8 encoded contents data has had a length + # of chars (always odd) cut off of the front and moved + # to the end to prevent decoding using testMap8 from + # working properly, and thereby preventing the ensuing + # CryptUnprotectData call from succeeding. + + # The offset into the testMap8 encoded contents seems to be: + # len(contents) - largest prime number less than or equal to int(len(content)/3) + # (in other words split "about" 2/3rds of the way through) + + # move first offsets chars to end to align for decode by testMap8 + encdata = "".join(edlst) + contlen = len(encdata) + + # now properly split and recombine + # by moving noffset chars from the start of the + # string to the end of the string + noffset = contlen - primes(int(contlen/3))[-1] + pfx = encdata[0:noffset] + encdata = encdata[noffset:] + encdata = encdata + pfx + + # decode using testMap8 to get the CryptProtect Data + encryptedValue = decode(encdata,testMap8) + cleartext = cud.decrypt(encryptedValue) + # print keyname + # print cleartext + DB[keyname] = cleartext + cnt = cnt + 1 + + if cnt == 0: + DB = None + return DB diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py index e30abfa..a412a7b 100644 --- a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py +++ b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py @@ -1,50 +1,134 @@ #! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# For use with Topaz Scripts Version 2.6 -import csv +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + import sys +sys.stdout=Unbuffered(sys.stdout) + +import csv import os import getopt from struct import pack from struct import unpack +class TpzDRMError(Exception): + pass + +# local support routines +if 'calibre' in sys.modules: + inCalibre = True +else: + inCalibre = False + +if inCalibre : + from calibre_plugins.k4mobidedrm import convert2xml + from calibre_plugins.k4mobidedrm import flatxml2html + from calibre_plugins.k4mobidedrm import flatxml2svg + from calibre_plugins.k4mobidedrm import stylexml2css +else : + import convert2xml + import flatxml2html + import flatxml2svg + import stylexml2css + +# global switch +buildXML = False + +# Get a 7 bit encoded number from a file +def readEncodedNumber(file): + flag = False + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + if data == 0xFF: + flag = True + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + datax = (datax <<7) + (data & 0x7F) + data = datax + if flag: + data = -data + return data + +# Get a length prefixed string from the file +def lengthPrefixString(data): + return encodeNumber(len(data))+data + +def readString(file): + stringLength = readEncodedNumber(file) + if (stringLength == None): + return None + sv = file.read(stringLength) + if (len(sv) != stringLength): + return "" + return unpack(str(stringLength)+"s",sv)[0] + +def getMetaArray(metaFile): + # parse the meta file + result = {} + fo = file(metaFile,'rb') + size = readEncodedNumber(fo) + for i in xrange(size): + tag = readString(fo) + value = readString(fo) + result[tag] = value + # print tag, value + fo.close() + return result + + +# dictionary of all text strings by index value +class Dictionary(object): + def __init__(self, dictFile): + self.filename = dictFile + self.size = 0 + self.fo = file(dictFile,'rb') + self.stable = [] + self.size = readEncodedNumber(self.fo) + for i in xrange(self.size): + self.stable.append(self.escapestr(readString(self.fo))) + self.pos = 0 + def escapestr(self, str): + str = str.replace('&','&') + str = str.replace('<','<') + str = str.replace('>','>') + str = str.replace('=','=') + return str + def lookup(self,val): + if ((val >= 0) and (val < self.size)) : + self.pos = val + return self.stable[self.pos] + else: + print "Error - %d outside of string table limits" % val + raise TpzDRMError('outside or string table limits') + # sys.exit(-1) + def getSize(self): + return self.size + def getPos(self): + return self.pos + -class DocParser(object): - def __init__(self, flatxml, fontsize, ph, pw): +class PageDimParser(object): + def __init__(self, flatxml): self.flatdoc = flatxml.split('\n') - self.fontsize = int(fontsize) - self.ph = int(ph) * 1.0 - self.pw = int(pw) * 1.0 - - stags = { - 'paragraph' : 'p', - 'graphic' : '.graphic' - } - - attr_val_map = { - 'hang' : 'text-indent: ', - 'indent' : 'text-indent: ', - 'line-space' : 'line-height: ', - 'margin-bottom' : 'margin-bottom: ', - 'margin-left' : 'margin-left: ', - 'margin-right' : 'margin-right: ', - 'margin-top' : 'margin-top: ', - 'space-after' : 'padding-bottom: ', - } - - attr_str_map = { - 'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;', - 'align-left' : 'text-align: left;', - 'align-right' : 'text-align: right;', - 'align-justify' : 'text-align: justify;', - 'display-inline' : 'display: inline;', - 'pos-left' : 'text-align: left;', - 'pos-right' : 'text-align: right;', - 'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;', - } - - # find tag if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None @@ -58,198 +142,568 @@ class DocParser(object): for j in xrange(pos, end): item = docList[j] if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : + (name, argres) = item.split('=') + else : name = item argres = '' - if name.endswith(tagpath) : + if name.endswith(tagpath) : result = argres foundat = j break return foundat, result - - - # return list of start positions for the tagpath - def posinDoc(self, tagpath): - startpos = [] - pos = 0 - res = "" - while res != None : - (foundpos, res) = self.findinDoc(tagpath, pos, -1) - if res != None : - startpos.append(foundpos) - pos = foundpos + 1 - return startpos - - # returns a vector of integers for the tagpath - def getData(self, tagpath, pos, end): - argres=[] - (foundat, argt) = self.findinDoc(tagpath, pos, end) - if (argt != None) and (len(argt) > 0) : - argList = argt.split('|') - argres = [ int(strval) for strval in argList] - return argres - def process(self): + (pos, sph) = self.findinDoc('page.h',0,-1) + (pos, spw) = self.findinDoc('page.w',0,-1) + if (sph == None): sph = '-1' + if (spw == None): spw = '-1' + return sph, spw - classlst = '' - csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n' - csspage += '.cl-right { text-align: right; }\n' - csspage += '.cl-left { text-align: left; }\n' - csspage += '.cl-justify { text-align: justify; }\n' - - # generate a list of each