#! /usr/bin/python
# -*- coding: utf-8 -*-
-# ineptepub.pyw, version 5.2
+# ineptepub.pyw, version 5.4
# Copyright © 2009-2010 i♥cabbages
# Released under the terms of the GNU General Public Licence, version 3 or
# 5.1 - Improve OpenSSL error checking
# 5.2 - Fix ctypes error causing segfaults on some systems
# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o
+# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
+
"""
Decrypt Adobe ADEPT-encrypted EPUB books.
"""
for elem in encryption.findall(expr):
path = elem.get('URI', None)
if path is not None:
+ path = path.encode('utf-8')
encrypted.add(path)
def decompress(self, bytes):
--- /dev/null
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# ineptkey.pyw, version 5
+# Copyright © 2009-2010 i♥cabbages
+
+# Released under the terms of the GNU General Public Licence, version 3 or
+# later. <http://www.gnu.org/licenses/>
+
+# Windows users: Before running this program, you must first install Python 2.6
+# from <http://www.python.org/download/> and PyCrypto from
+# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make certain
+# to install the version for Python 2.6). Then save this script file as
+# ineptkey.pyw and double-click on it to run it. It will create a file named
+# adeptkey.der in the same directory. This is your ADEPT user key.
+#
+# Mac OS X users: Save this script file as ineptkey.pyw. You can run this
+# program from the command line (pythonw ineptkey.pyw) or by double-clicking
+# it when it has been associated with PythonLauncher. It will create a file
+# named adeptkey.der in the same directory. This is your ADEPT user key.
+
+# Revision history:
+# 1 - Initial release, for Adobe Digital Editions 1.7
+# 2 - Better algorithm for finding pLK; improved error handling
+# 3 - Rename to INEPT
+# 4 - Series of changes by joblack (and others?) --
+# 4.1 - quick beta fix for ADE 1.7.2 (anon)
+# 4.2 - added old 1.7.1 processing
+# 4.3 - better key search
+# 4.4 - Make it working on 64-bit Python
+# 5 - Clean up and improve 4.x changes;
+# Clean up and merge OS X support by unknown
+# 5.1 - add support for using OpenSSL on Windows in place of PyCrypto
+# 5.2 - added support for output of key to a particular file
+
+"""
+Retrieve Adobe ADEPT user key.
+"""
+
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+
+import sys
+import os
+import struct
+import Tkinter
+import Tkconstants
+import tkMessageBox
+import traceback
+
+class ADEPTError(Exception):
+ pass
+
+if sys.platform.startswith('win'):
+ from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast, c_size_t, memmove, CDLL, c_int, \
+ c_long, c_ulong
+
+ from ctypes.wintypes import LPVOID, DWORD, BOOL
+ import _winreg as winreg
+
+ def _load_crypto_libcrypto():
+ from ctypes.util import find_library
+ libcrypto = find_library('libeay32')
+ if libcrypto is None:
+ raise ADEPTError('libcrypto not found')
+ libcrypto = CDLL(libcrypto)
+ AES_MAXNR = 14
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))),
+ ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
+ [c_char_p, c_int, AES_KEY_p])
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
+ [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
+ c_int])
+ class AES(object):
+ def __init__(self, userkey):
+ self._blocksize = len(userkey)
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise ADEPTError('AES improper key used')
+ key = self._key = AES_KEY()
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
+ if rv < 0:
+ raise ADEPTError('Failed to initialize AES key')
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ iv = ("\x00" * self._blocksize)
+ rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
+ if rv == 0:
+ raise ADEPTError('AES decryption failed')
+ return out.raw
+ return AES
+
+ def _load_crypto_pycrypto():
+ from Crypto.Cipher import AES as _AES
+ class AES(object):
+ def __init__(self, key):
+ self._aes = _AES.new(key, _AES.MODE_CBC)
+ def decrypt(self, data):
+ return self._aes.decrypt(data)
+ return AES
+
+ def _load_crypto():
+ AES = None
+ for loader in (_load_crypto_libcrypto, _load_crypto_pycrypto):
+ try:
+ AES = loader()
+ break
+ except (ImportError, ADEPTError):
+ pass
+ return AES
+
+ AES = _load_crypto()
+
+
+ DEVICE_KEY_PATH = r'Software\Adobe\Adept\Device'
+ PRIVATE_LICENCE_KEY_PATH = r'Software\Adobe\Adept\Activation'
+
+ MAX_PATH = 255
+
+ kernel32 = windll.kernel32
+ advapi32 = windll.advapi32
+ crypt32 = windll.crypt32
+
+ def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+ GetSystemDirectory = GetSystemDirectory()
+
+ def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path):
+ vsn = c_uint(0)
+ GetVolumeInformationW(
+ path, None, 0, byref(vsn), None, None, None, 0)
+ return vsn.value
+ return GetVolumeSerialNumber
+ GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+ def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(32)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+ GetUserName = GetUserName()
+
+ PAGE_EXECUTE_READWRITE = 0x40
+ MEM_COMMIT = 0x1000
+ MEM_RESERVE = 0x2000
+
+ def VirtualAlloc():
+ _VirtualAlloc = kernel32.VirtualAlloc
+ _VirtualAlloc.argtypes = [LPVOID, c_size_t, DWORD, DWORD]
+ _VirtualAlloc.restype = LPVOID
+ def VirtualAlloc(addr, size, alloctype=(MEM_COMMIT | MEM_RESERVE),
+ protect=PAGE_EXECUTE_READWRITE):
+ return _VirtualAlloc(addr, size, alloctype, protect)
+ return VirtualAlloc
+ VirtualAlloc = VirtualAlloc()
+
+ MEM_RELEASE = 0x8000
+
+ def VirtualFree():
+ _VirtualFree = kernel32.VirtualFree
+ _VirtualFree.argtypes = [LPVOID, c_size_t, DWORD]
+ _VirtualFree.restype = BOOL
+ def VirtualFree(addr, size=0, freetype=MEM_RELEASE):
+ return _VirtualFree(addr, size, freetype)
+ return VirtualFree
+ VirtualFree = VirtualFree()
+
+ class NativeFunction(object):
+ def __init__(self, restype, argtypes, insns):
+ self._buf = buf = VirtualAlloc(None, len(insns))
+ memmove(buf, insns, len(insns))
+ ftype = CFUNCTYPE(restype, *argtypes)
+ self._native = ftype(buf)
+
+ def __call__(self, *args):
+ return self._native(*args)
+
+ def __del__(self):
+ if self._buf is not None:
+ VirtualFree(self._buf)
+ self._buf = None
+
+ if struct.calcsize("P") == 4:
+ CPUID0_INSNS = (
+ "\x53" # push %ebx
+ "\x31\xc0" # xor %eax,%eax
+ "\x0f\xa2" # cpuid
+ "\x8b\x44\x24\x08" # mov 0x8(%esp),%eax
+ "\x89\x18" # mov %ebx,0x0(%eax)
+ "\x89\x50\x04" # mov %edx,0x4(%eax)
+ "\x89\x48\x08" # mov %ecx,0x8(%eax)
+ "\x5b" # pop %ebx
+ "\xc3" # ret
+ )
+ CPUID1_INSNS = (
+ "\x53" # push %ebx
+ "\x31\xc0" # xor %eax,%eax
+ "\x40" # inc %eax
+ "\x0f\xa2" # cpuid
+ "\x5b" # pop %ebx
+ "\xc3" # ret
+ )
+ else:
+ CPUID0_INSNS = (
+ "\x49\x89\xd8" # mov %rbx,%r8
+ "\x49\x89\xc9" # mov %rcx,%r9
+ "\x48\x31\xc0" # xor %rax,%rax
+ "\x0f\xa2" # cpuid
+ "\x4c\x89\xc8" # mov %r9,%rax
+ "\x89\x18" # mov %ebx,0x0(%rax)
+ "\x89\x50\x04" # mov %edx,0x4(%rax)
+ "\x89\x48\x08" # mov %ecx,0x8(%rax)
+ "\x4c\x89\xc3" # mov %r8,%rbx
+ "\xc3" # retq
+ )
+ CPUID1_INSNS = (
+ "\x53" # push %rbx
+ "\x48\x31\xc0" # xor %rax,%rax
+ "\x48\xff\xc0" # inc %rax
+ "\x0f\xa2" # cpuid
+ "\x5b" # pop %rbx
+ "\xc3" # retq
+ )
+
+ def cpuid0():
+ _cpuid0 = NativeFunction(None, [c_char_p], CPUID0_INSNS)
+ buf = create_string_buffer(12)
+ def cpuid0():
+ _cpuid0(buf)
+ return buf.raw
+ return cpuid0
+ cpuid0 = cpuid0()
+
+ cpuid1 = NativeFunction(c_uint, [], CPUID1_INSNS)
+
+ class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+ DataBlob_p = POINTER(DataBlob)
+
+ def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, 0, byref(outdata)):
+ raise ADEPTError("Failed to decrypt user key key (sic)")
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+ CryptUnprotectData = CryptUnprotectData()
+
+ def retrieve_key(keypath):
+ if AES is None:
+ tkMessageBox.showerror(
+ "ADEPT Key",
+ "This script requires PyCrypto or OpenSSL which must be installed "
+ "separately. Read the top-of-script comment for details.")
+ return False
+ root = GetSystemDirectory().split('\\')[0] + '\\'
+ serial = GetVolumeSerialNumber(root)
+ vendor = cpuid0()
+ signature = struct.pack('>I', cpuid1())[1:]
+ user = GetUserName()
+ entropy = struct.pack('>I12s3s13s', serial, vendor, signature, user)
+ cuser = winreg.HKEY_CURRENT_USER
+ try:
+ regkey = winreg.OpenKey(cuser, DEVICE_KEY_PATH)
+ except WindowsError:
+ raise ADEPTError("Adobe Digital Editions not activated")
+ device = winreg.QueryValueEx(regkey, 'key')[0]
+ keykey = CryptUnprotectData(device, entropy)
+ userkey = None
+ try:
+ plkroot = winreg.OpenKey(cuser, PRIVATE_LICENCE_KEY_PATH)
+ except WindowsError:
+ raise ADEPTError("Could not locate ADE activation")
+ for i in xrange(0, 16):
+ try:
+ plkparent = winreg.OpenKey(plkroot, "%04d" % (i,))
+ except WindowsError:
+ break
+ ktype = winreg.QueryValueEx(plkparent, None)[0]
+ if ktype != 'credentials':
+ continue
+ for j in xrange(0, 16):
+ try:
+ plkkey = winreg.OpenKey(plkparent, "%04d" % (j,))
+ except WindowsError:
+ break
+ ktype = winreg.QueryValueEx(plkkey, None)[0]
+ if ktype != 'privateLicenseKey':
+ continue
+ userkey = winreg.QueryValueEx(plkkey, 'value')[0]
+ break
+ if userkey is not None:
+ break
+ if userkey is None:
+ raise ADEPTError('Could not locate privateLicenseKey')
+ userkey = userkey.decode('base64')
+ aes = AES(keykey)
+ userkey = aes.decrypt(userkey)
+ userkey = userkey[26:-ord(userkey[-1])]
+ with open(keypath, 'wb') as f:
+ f.write(userkey)
+ return True
+
+elif sys.platform.startswith('darwin'):
+ import xml.etree.ElementTree as etree
+ import Carbon.File
+ import Carbon.Folder
+ import Carbon.Folders
+ import MacOS
+
+ ACTIVATION_PATH = 'Adobe/Digital Editions/activation.dat'
+ NSMAP = {'adept': 'http://ns.adobe.com/adept',
+ 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
+
+ def find_folder(domain, dtype):
+ try:
+ fsref = Carbon.Folder.FSFindFolder(domain, dtype, False)
+ return Carbon.File.pathname(fsref)
+ except MacOS.Error:
+ return None
+
+ def find_app_support_file(subpath):
+ dtype = Carbon.Folders.kApplicationSupportFolderType
+ for domain in Carbon.Folders.kUserDomain, Carbon.Folders.kLocalDomain:
+ path = find_folder(domain, dtype)
+ if path is None:
+ continue
+ path = os.path.join(path, subpath)
+ if os.path.isfile(path):
+ return path
+ return None
+
+ def retrieve_key(keypath):
+ actpath = find_app_support_file(ACTIVATION_PATH)
+ if actpath is None:
+ raise ADEPTError("Could not locate ADE activation")
+ tree = etree.parse(actpath)
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = '//%s/%s' % (adept('credentials'), adept('privateLicenseKey'))
+ userkey = tree.findtext(expr)
+ userkey = userkey.decode('base64')
+ userkey = userkey[26:]
+ with open(keypath, 'wb') as f:
+ f.write(userkey)
+ return True
+
+elif sys.platform.startswith('cygwin'):
+ def retrieve_key(keypath):
+ tkMessageBox.showerror(
+ "ADEPT Key",
+ "This script requires a Windows-native Python, and cannot be run "
+ "under Cygwin. Please install a Windows-native Python and/or "
+ "check your file associations.")
+ return False
+
+else:
+ def retrieve_key(keypath):
+ tkMessageBox.showerror(
+ "ADEPT Key",
+ "This script only supports Windows and Mac OS X. For Linux "
+ "you should be able to run ADE and this script under Wine (with "
+ "an appropriate version of Windows Python installed).")
+ return False
+
+class ExceptionDialog(Tkinter.Frame):
+ def __init__(self, root, text):
+ Tkinter.Frame.__init__(self, root, border=5)
+ label = Tkinter.Label(self, text="Unexpected error:",
+ anchor=Tkconstants.W, justify=Tkconstants.LEFT)
+ label.pack(fill=Tkconstants.X, expand=0)
+ self.text = Tkinter.Text(self)
+ self.text.pack(fill=Tkconstants.BOTH, expand=1)
+ self.text.insert(Tkconstants.END, text)
+
+def cli_main(argv=sys.argv):
+ keypath = argv[1]
+ try:
+ success = retrieve_key(keypath)
+ except ADEPTError, e:
+ print "Key generation Error: " + str(e)
+ return 1
+ except Exception, e:
+ print "General Error: " + str(e)
+ return 1
+ if not success:
+ return 1
+ return 0
+
+def main(argv=sys.argv):
+ root = Tkinter.Tk()
+ root.withdraw()
+ progname = os.path.basename(argv[0])
+ keypath = 'adeptkey.der'
+ success = False
+ try:
+ success = retrieve_key(keypath)
+ except ADEPTError, e:
+ tkMessageBox.showerror("ADEPT Key", "Error: " + str(e))
+ except Exception:
+ root.wm_state('normal')
+ root.title('ADEPT Key')
+ text = traceback.format_exc()
+ ExceptionDialog(root, text).pack(fill=Tkconstants.BOTH, expand=1)
+ root.mainloop()
+ if not success:
+ return 1
+ tkMessageBox.showinfo(
+ "ADEPT Key", "Key successfully retrieved to %s" % (keypath))
+ return 0
+
+if __name__ == '__main__':
+ if len(sys.argv) > 1:
+ sys.exit(cli_main())
+ sys.exit(main())
enc('CipherReference'))
for elem in encryption.findall(expr):
path = elem.get('URI', None)
+ path = path.encode('utf-8')
if path is not None:
encrypted.add(path)
return xmlpage
-
+def fromData(dict, fname):
+ flat_xml = True
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def getXML(dict, fname):
+ flat_xml = False
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
def usage():
print 'Usage: '
print ' convert2xml.py dict0000.dat infile.dat '
return xmlpage
if __name__ == '__main__':
- sys.exit(main(''))
\ No newline at end of file
+ sys.exit(main(''))
class DocParser(object):
- def __init__(self, flatxml, classlst, fileid, bookDir, fixedimage):
+ def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
self.id = os.path.basename(fileid).replace('.dat','')
self.svgcount = 0
self.docList = flatxml.split('\n')
self.docSize = len(self.docList)
self.classList = {}
self.bookDir = bookDir
- self.glyphPaths = { }
- self.numPaths = 0
+ self.gdict = gdict
tmpList = classlst.split('\n')
for pclass in tmpList:
if pclass != '':
def getGlyph(self, gid):
result = ''
- id='gl%d' % gid
- return self.glyphPaths[id]
-
+ id='id="gl%d"' % gid
+ return self.gdict.lookup(id)
def glyphs_to_image(self, glyphList):
e = path.find(' ',b)
return int(path[b:e])
- def extractID(path, key):
- b = path.find(key) + len(key)
- e = path.find('"',b)
- return path[b:e]
-
-
svgDir = os.path.join(self.bookDir,'svg')
- glyfile = os.path.join(svgDir,'glyphs.svg')
imgDir = os.path.join(self.bookDir,'img')
imgname = self.id + '_%04d.svg' % self.svgcount
imgfile = os.path.join(imgDir,imgname)
- # build hashtable of glyph paths keyed by glyph id
- if self.numPaths == 0:
- gfile = open(glyfile, 'r')
- while True:
- path = gfile.readline()
- if (path == ''): break
- glyphid = extractID(path,'id="')
- self.glyphPaths[glyphid] = path
- self.numPaths += 1
- gfile.close()
-
-
# get glyph information
gxList = self.getData('info.glyph.x',0,-1)
gyList = self.getData('info.glyph.y',0,-1)
-def convert2HTML(flatxml, classlst, fileid, bookDir, fixedimage):
-
+def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
# create a document parser
- dp = DocParser(flatxml, classlst, fileid, bookDir, fixedimage)
-
+ dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
htmlpage = dp.process()
-
return htmlpage
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import sys
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class PParser(object):
+ def __init__(self, gd, flatxml):
+ self.gd = gd
+ self.flatdoc = flatxml.split('\n')
+ self.temp = []
+ foo = self.getData('page.h') or self.getData('book.h')
+ self.ph = foo[0]
+ foo = self.getData('page.w') or self.getData('book.w')
+ self.pw = foo[0]
+ self.gx = self.getData('info.glyph.x')
+ self.gy = self.getData('info.glyph.y')
+ self.gid = self.getData('info.glyph.glyphID')
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getDataTemp(self, path):
+ result = None
+ cnt = len(self.temp)
+ for j in xrange(cnt):
+ item = self.temp[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ self.temp.pop(j)
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getImages(self):
+ result = []
+ self.temp = self.flatdoc
+ while (self.getDataTemp('img') != None):
+ h = self.getDataTemp('img.h')[0]
+ w = self.getDataTemp('img.w')[0]
+ x = self.getDataTemp('img.x')[0]
+ y = self.getDataTemp('img.y')[0]
+ src = self.getDataTemp('img.src')[0]
+ result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
+ return result
+ def getGlyphs(self):
+ result = []
+ if (self.gid != None) and (len(self.gid) > 0):
+ glyphs = []
+ for j in set(self.gid):
+ glyphs.append(j)
+ glyphs.sort()
+ for gid in glyphs:
+ id='id="gl%d"' % gid
+ path = self.gd.lookup(id)
+ if path:
+ result.append(id + ' ' + path)
+ return result
+
+
+def convert2SVG(gdict, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi):
+ ml = ''
+ pp = PParser(gdict, flat_xml)
+ ml += '<?xml version="1.0" standalone="no"?>\n'
+ if (raw):
+ ml += '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
+ ml += '<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)
+ ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+ else:
+ ml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ ml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n'
+ ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+ ml += '<script><![CDATA[\n'
+ ml += 'function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n'
+ ml += 'var dpi=%d;\n' % scaledpi
+ if (counter) :
+ ml += 'var prevpage="page%04d.xhtml";\n' % (counter - 1)
+ if (counter < numfiles-1) :
+ ml += 'var nextpage="page%04d.xhtml";\n' % (counter + 1)
+ ml += 'var pw=%d;var ph=%d;' % (pp.pw, pp.ph)
+ ml += 'function zoomin(){dpi=dpi*(0.8);setsize();}\n'
+ ml += 'function zoomout(){dpi=dpi*1.25;setsize();}\n'
+ ml += 'function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n'
+ ml += 'function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n'
+ ml += 'function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n'
+ ml += 'var gt=gd();if(gt>0){dpi=gt;}\n'
+ ml += 'window.onload=setsize;\n'
+ ml += ']]></script>\n'
+ ml += '</head>\n'
+ ml += '<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n'
+ ml += '<div style="white-space:nowrap;">\n'
+ if (counter == 0) :
+ ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ else:
+ ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n'
+ ml += '<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph)
+ if (pp.gid != None):
+ ml += '<defs>\n'
+ gdefs = pp.getGlyphs()
+ for j in xrange(0,len(gdefs)):
+ ml += gdefs[j]
+ ml += '</defs>\n'
+ img = pp.getImages()
+ if (img != None):
+ for j in xrange(0,len(img)):
+ ml += img[j]
+ if (pp.gid != None):
+ for j in xrange(0,len(pp.gid)):
+ ml += '<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j])
+ if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+ ml += '<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n'
+ if (raw) :
+ ml += '</svg>'
+ else :
+ ml += '</svg></a>\n'
+ if (counter == numfiles - 1) :
+ ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ else :
+ ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n'
+ ml += '</div>\n'
+ ml += '<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n'
+ ml += '</body>\n'
+ ml += '</html>\n'
+ return ml
+
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# local support routines
+import convert2xml
+import flatxml2html
+import flatxml2svg
+import stylexml2css
+
+
+# Get a 7 bit encoded number from a file
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from the file
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return None
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+def getMetaArray(metaFile):
+ # parse the meta file
+ result = {}
+ fo = file(metaFile,'rb')
+ size = readEncodedNumber(fo)
+ for i in xrange(size):
+ tag = readString(fo)
+ value = readString(fo)
+ result[tag] = value
+ # print tag, value
+ fo.close()
+ return result
+
+
+# dictionary of all text strings by index value
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+ def escapestr(self, str):
+ str = str.replace('&','&')
+ str = str.replace('<','<')
+ str = str.replace('>','>')
+ str = str.replace('=','=')
+ return str
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
+ else:
+ print "Error - %d outside of string table limits" % val
+ sys.exit(-1)
+ def getSize(self):
+ return self.size
+ def getPos(self):
+ return self.pos
+
+
+class PageDimParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ # find tag if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ docList = self.flatdoc
+ cnt = len(docList)
+ if end == -1 :
+ end = cnt
+ else:
+ end = min(cnt,end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=')
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+ def process(self):
+ (pos, sph) = self.findinDoc('page.h',0,-1)
+ (pos, spw) = self.findinDoc('page.w',0,-1)
+ if (sph == None): sph = '-1'
+ if (spw == None): spw = '-1'
+ return sph, spw
+
+def getPageDim(flatxml):
+ # create a document parser
+ dp = PageDimParser(flatxml)
+ (ph, pw) = dp.process()
+ return ph, pw
+
+class GParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ self.dpi = 1440
+ self.gh = self.getData('info.glyph.h')
+ self.gw = self.getData('info.glyph.w')
+ self.guse = self.getData('info.glyph.use')
+ if self.guse :
+ self.count = len(self.guse)
+ else :
+ self.count = 0
+ self.gvtx = self.getData('info.glyph.vtx')
+ self.glen = self.getData('info.glyph.len')
+ self.gdpi = self.getData('info.glyph.dpi')
+ self.vx = self.getData('info.vtx.x')
+ self.vy = self.getData('info.vtx.y')
+ self.vlen = self.getData('info.len.n')
+ if self.vlen :
+ self.glen.append(len(self.vlen))
+ elif self.glen:
+ self.glen.append(0)
+ if self.vx :
+ self.gvtx.append(len(self.vx))
+ elif self.gvtx :
+ self.gvtx.append(0)
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name == path):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getGlyphDim(self, gly):
+ maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
+ maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
+ return maxh, maxw
+ def getPath(self, gly):
+ path = ''
+ if (gly < 0) or (gly >= self.count):
+ return path
+ tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
+ ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
+ p = 0
+ for k in xrange(self.glen[gly], self.glen[gly+1]):
+ if (p == 0):
+ zx = tx[0:self.vlen[k]+1]
+ zy = ty[0:self.vlen[k]+1]
+ else:
+ zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
+ zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
+ p += 1
+ j = 0
+ while ( j < len(zx) ):
+ if (j == 0):
+ # Start Position.
+ path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
+ elif (j <= len(zx)-3):
+ # Cubic Bezier Curve
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
+ j += 2
+ elif (j == len(zx)-2):
+ # Cubic Bezier Curve to Start Position
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+ j += 1
+ elif (j == len(zx)-1):
+ # Quadratic Bezier Curve to Start Position
+ path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+
+ j += 1
+ path += 'z'
+ return path
+
+
+
+# dictionary of all text strings by index value
+class GlyphDict(object):
+ def __init__(self):
+ self.gdict = {}
+ def lookup(self, id):
+ # id='id="gl%d"' % val
+ if id in self.gdict:
+ return self.gdict[id]
+ return None
+ def addGlyph(self, val, path):
+ id='id="gl%d"' % val
+ self.gdict[id] = path
+
+
+def generateBook(bookDir, raw, fixedimage):
+ # sanity check Topaz file extraction
+ if not os.path.exists(bookDir) :
+ print "Can not find directory with unencrypted book"
+ return 1
+
+ dictFile = os.path.join(bookDir,'dict0000.dat')
+ if not os.path.exists(dictFile) :
+ print "Can not find dict0000.dat file"
+ return 1
+
+ pageDir = os.path.join(bookDir,'page')
+ if not os.path.exists(pageDir) :
+ print "Can not find page directory in unencrypted book"
+ return 1
+
+ imgDir = os.path.join(bookDir,'img')
+ if not os.path.exists(imgDir) :
+ print "Can not find image directory in unencrypted book"
+ return 1
+
+ glyphsDir = os.path.join(bookDir,'glyphs')
+ if not os.path.exists(glyphsDir) :
+ print "Can not find glyphs directory in unencrypted book"
+ return 1
+
+ metaFile = os.path.join(bookDir,'metadata0000.dat')
+ if not os.path.exists(metaFile) :
+ print "Can not find metadata0000.dat in unencrypted book"
+ return 1
+
+ svgDir = os.path.join(bookDir,'svg')
+ if not os.path.exists(svgDir) :
+ os.makedirs(svgDir)
+
+ xmlDir = os.path.join(bookDir,'xml')
+ if not os.path.exists(xmlDir) :
+ os.makedirs(xmlDir)
+
+ otherFile = os.path.join(bookDir,'other0000.dat')
+ if not os.path.exists(otherFile) :
+ print "Can not find other0000.dat in unencrypted book"
+ return 1
+
+ print "Updating to color images if available"
+ spath = os.path.join(bookDir,'color_img')
+ dpath = os.path.join(bookDir,'img')
+ filenames = os.listdir(spath)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname = filename.replace('color','img')
+ sfile = os.path.join(spath,filename)
+ dfile = os.path.join(dpath,imgname)
+ imgdata = file(sfile,'rb').read()
+ file(dfile,'wb').write(imgdata)
+
+ print "Creating cover.jpg"
+ isCover = False
+ cpath = os.path.join(bookDir,'img')
+ cpath = os.path.join(cpath,'img0000.jpg')
+ if os.path.isfile(cpath):
+ cover = file(cpath, 'rb').read()
+ cpath = os.path.join(bookDir,'cover.jpg')
+ file(cpath, 'wb').write(cover)
+ isCover = True
+
+
+ print 'Processing Dictionary'
+ dict = Dictionary(dictFile)
+
+ print 'Processing Meta Data and creating OPF'
+ meta_array = getMetaArray(metaFile)
+
+ xname = os.path.join(xmlDir, 'metadata.xml')
+ metastr = ''
+ for key in meta_array:
+ metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n'
+ file(xname, 'wb').write(metastr)
+
+ print 'Processing StyleSheet'
+ # get some scaling info from metadata to use while processing styles
+ fontsize = '135'
+ if 'fontSize' in meta_array:
+ fontsize = meta_array['fontSize']
+
+ # also get the size of a normal text page
+ spage = '1'
+ if 'firstTextPage' in meta_array:
+ spage = meta_array['firstTextPage']
+ pnum = int(spage)
+
+ # get page height and width from first text page for use in stylesheet scaling
+ pname = 'page%04d.dat' % (pnum + 1)
+ fname = os.path.join(pageDir,pname)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ (ph, pw) = getPageDim(flat_xml)
+ if (ph == '-1') or (ph == '0') : ph = '11000'
+ if (pw == '-1') or (pw == '0') : pw = '8500'
+
+ # print ' ', 'other0000.dat'
+ xname = os.path.join(bookDir, 'style.css')
+ flat_xml = convert2xml.fromData(dict, otherFile)
+ cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
+ file(xname, 'wb').write(cssstr)
+ xname = os.path.join(xmlDir, 'other0000.xml')
+ file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
+
+ print 'Processing Glyphs'
+ gd = GlyphDict()
+ filenames = os.listdir(glyphsDir)
+ filenames = sorted(filenames)
+ glyfname = os.path.join(svgDir,'glyphs.svg')
+ glyfile = open(glyfname, 'w')
+ glyfile.write('<?xml version="1.0" standalone="no"?>\n')
+ glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
+ glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
+ glyfile.write('<defs>\n')
+ counter = 0
+ for filename in filenames:
+ # print ' ', filename
+ print '.',
+ fname = os.path.join(glyphsDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ gp = GParser(flat_xml)
+ for i in xrange(0, gp.count):
+ path = gp.getPath(i)
+ maxh, maxw = gp.getGlyphDim(i)
+ fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
+ glyfile.write(fullpath)
+ gd.addGlyph(counter * 256 + i, fullpath)
+ counter += 1
+ glyfile.write('</defs>\n')
+ glyfile.write('</svg>\n')
+ glyfile.close()
+ print " "
+
+ # start up the html
+ htmlFileName = "book.html"
+ htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+ htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n'
+ htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
+ htmlstr += '<head>\n'
+ htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
+ htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
+ htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+ htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+ htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
+ htmlstr += '</head>\n<body>\n'
+
+ print 'Processing Pages'
+ # Books are at 1440 DPI. This is rendering at twice that size for
+ # readability when rendering to the screen.
+ scaledpi = 1440.0
+
+ svgindex = '<?xml version="1.0" encoding="utf-8"?>\n'
+ svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
+ svgindex += '<head>\n'
+ svgindex += '<title>' + meta_array['Title'] + '</title>\n'
+ svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+ svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+ svgindex += '</head>\n'
+ svgindex += '<body>\n'
+
+ filenames = os.listdir(pageDir)
+ filenames = sorted(filenames)
+ numfiles = len(filenames)
+ counter = 0
+
+ for filename in filenames:
+ # print ' ', filename
+ print ".",
+
+ fname = os.path.join(pageDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ # first get the html
+ htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
+
+ # now get the svg image of the page
+ svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi)
+
+ if (raw) :
+ pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
+ svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter)
+ else :
+ pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
+ svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (counter, counter)
+
+
+ pfile.write(svgxml)
+ pfile.close()
+
+ counter += 1
+
+ print " "
+
+ # finish up the html string and output it
+ htmlstr += '</body>\n</html>\n'
+ file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+
+ # finish up the svg index string and output it
+ svgindex += '</body>\n</html>\n'
+ file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
+
+ # build the opf file
+ opfname = os.path.join(bookDir, 'book.opf')
+ opfstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+ opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n'
+ # adding metadata
+ opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n'
+ opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n'
+ opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n'
+ opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n'
+ opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n'
+ opfstr += ' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n'
+ opfstr += ' <dc:language>en</dc:language>\n'
+ opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n'
+ if isCover:
+ opfstr += ' <meta name="cover" content="bookcover"/>\n'
+ opfstr += ' </metadata>\n'
+ opfstr += '<manifest>\n'
+ opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n'
+ opfstr += ' <item id="stylesheet" href="style.css" media-type="text.css"/>\n'
+ # adding image files to manifest
+ filenames = os.listdir(imgDir)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname, imgext = os.path.splitext(filename)
+ if imgext == '.jpg':
+ imgext = 'jpeg'
+ if imgext == '.svg':
+ imgext = 'svg+xml'
+ opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n'
+ if isCover:
+ opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n'
+ opfstr += '</manifest>\n'
+ # adding spine
+ opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n'
+ if isCover:
+ opfstr += ' <guide>\n'
+ opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n'
+ opfstr += ' </guide>\n'
+ opfstr += '</package>\n'
+ file(opfname, 'wb').write(opfstr)
+
+ print 'Processing Complete'
+
+ return 0
+
+def usage():
+ print "genbook.py generates a book from the extract Topaz Files"
+ print "Usage:"
+ print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
+ print " "
+ print "Options:"
+ print " -h : help - print this usage message"
+ print " -r : generate raw svg files (not wrapped in xhtml)"
+ print " --fixed-image : genearate any Fixed Area as an svg image in the html"
+ print " "
+
+
+def main(argv):
+ bookDir = ''
+
+ if len(argv) == 0:
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
+
+ except getopt.GetoptError, err:
+ print str(err)
+ usage()
+ return 1
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ return 1
+
+ raw = 0
+ fixedimage = False
+ for o, a in opts:
+ if o =="-h":
+ usage()
+ return 0
+ if o =="-r":
+ raw = 1
+ if o =="--fixed-image":
+ fixedimage = True
+
+ bookDir = args[0]
+
+ rv = generateBook(bookDir, raw, fixedimage)
+ return rv
+
+
+if __name__ == '__main__':
+ sys.exit(main(''))
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import sys
+import os, csv
+import binascii
+import zlib
+import re
+from struct import pack, unpack, unpack_from
+
+class DrmException(Exception):
+ pass
+
+global kindleDatabase
+global charMap1
+global charMap2
+global charMap3
+global charMap4
+
+if sys.platform.startswith('win'):
+ from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+if sys.platform.startswith('darwin'):
+ from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+
+# Parse the Kindle.info file and return the records as a list of key-values
+def parseKindleInfo(kInfoFile):
+ DB = {}
+ infoReader = openKindleInfo(kInfoFile)
+ infoReader.read(1)
+ data = infoReader.read()
+ if sys.platform.startswith('win'):
+ items = data.split('{')
+ else :
+ items = data.split('[')
+ for item in items:
+ splito = item.split(':')
+ DB[splito[0]] =splito[1]
+ return DB
+
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ global charMap1
+ global charMap2
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ if sys.platform.startswith('win'):
+ return CryptUnprotectData(encryptedValue,"")
+ else:
+ cleartext = CryptUnprotectData(encryptedValue)
+ return decode(cleartext, charMap1)
+
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+def getKindleInfoValueForKey(key):
+ global charMap2
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
+def findNameForHash(hash):
+ global charMap2
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+ result = ""
+ for name in names:
+ if hash == encodeHash(name, charMap2):
+ result = name
+ break
+ return result
+
+# Print all the records from the kindle.info file (option -i)
+def printKindleInfo():
+ for record in kindleDatabase:
+ name = findNameForHash(record)
+ if name != "" :
+ print (name)
+ print ("--------------------------")
+ else :
+ print ("Unknown Record")
+ print getKindleInfoValueForHash(record)
+ print "\n"
+
+#
+# PID generation routines
+#
+
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+ global charMap4
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+def crc32(s):
+ return (~binascii.crc32(s,-1))&0xFFFFFFFF
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+ global charMap4
+ crc = crc32(s)
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(charMap4)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += charMap4[pos%l]
+ crc >>= 8
+ return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+ global charMap4
+ crc = crc32(s)
+ arr1 = [0]*l
+ for i in xrange(len(s)):
+ arr1[i%l] ^= ord(s[i])
+ crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+ for i in xrange(l):
+ arr1[i] ^= crc_bytes[i&3]
+ pid = ""
+ for i in xrange(l):
+ b = arr1[i] & 0xff
+ pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+ return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePid(pidlst, rec209, token, serialnum):
+
+ if rec209 != None:
+ # Compute book PID
+ pidHash = SHA1(serialnum+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # compute fixed pid for old pre 2.5 firmware update pid as well
+ bookPID = pidFromSerial(serialnum, 7) + "*"
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+
+# Parse the EXTH header records and parse the Kindleinfo
+# file to calculate the book pid.
+
+def getK4Pids(pidlst, rec209, token, kInfoFile=None):
+ global kindleDatabase
+ global charMap1
+ kindleDatabase = None
+ try:
+ kindleDatabase = parseKindleInfo(kInfoFile)
+ except Exception, message:
+ print(message)
+ pass
+
+ if kindleDatabase == None :
+ return pidlst
+
+ # Get the Mazama Random number
+ MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+ # Get the HDD serial
+ encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode to calculate the DSN
+ DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+
+ # Compute the device PID (for which I can tell, is used for nothing).
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ devicePID = checksumPid(devicePID)
+ pidlst.append(devicePID)
+
+ # Compute book PID
+ if rec209 == None:
+ print "\nNo EXTH record type 209 - Perhaps not a K4 file?"
+ return pidlst
+
+ # Get the kindle account token
+ kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+ # book pid
+ pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 1
+ pidHash = SHA1(kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 2
+ pidHash = SHA1(DSN+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+def getPidList(md1, md2, k4, pids, serials, kInfoFiles):
+ pidlst = []
+ if k4:
+ pidlst = getK4Pids(pidlst, md1, md2)
+ for infoFile in kInfoFiles:
+ pidlst = getK4Pids(pidlst, md1, md2, infoFile)
+ for serialnum in serials:
+ pidlst = getKindlePid(pidlst, md1, md2, serialnum)
+ for pid in pids:
+ pidlst.append(pid)
+ return pidlst
--- /dev/null
+#!/usr/bin/env python
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+import os, csv, getopt
+import zlib, zipfile, tempfile, shutil
+from struct import pack
+from struct import unpack
+
+class TpzDRMError(Exception):
+ pass
+
+# local support routines
+import kgenpids
+import genbook
+#
+# Utility routines
+#
+
+# Get a 7 bit encoded number from file
+def bookReadEncodedNumber(fo):
+ flag = False
+ data = ord(fo.read(1))
+ if data == 0xFF:
+ flag = True
+ data = ord(fo.read(1))
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(fo.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from file
+def bookReadString(fo):
+ stringLength = bookReadEncodedNumber(fo)
+ return unpack(str(stringLength)+"s",fo.read(stringLength))[0]
+
+#
+# crypto routines
+#
+
+# Context initialisation for the Topaz Crypto
+def topazCryptoInit(key):
+ ctx1 = 0x0CAFFE19E
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ return [ctx1,ctx2]
+
+# decrypt data with the context prepared by topazCryptoInit()
+def topazCryptoDecrypt(data, ctx):
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+ plainText = ""
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+ return plainText
+
+# Decrypt data with the PID
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+# Try to decrypt a dkey record (contains the bookPID)
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise TpzDRMError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise TpzDRMError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise TpzDRMError("Record didn't contain PID")
+ return fields[4]
+
+# Decrypt all dkey records (contain the book PID)
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except TpzDRMError:
+ pass
+ data = data[1+length:]
+ if len(records) == 0:
+ raise TpzDRMError("BookKey Not Found")
+ return records
+
+
+class TopazBook:
+ def __init__(self, filename, outdir):
+ self.fo = file(filename, 'rb')
+ self.outdir = outdir
+ self.bookPayloadOffset = 0
+ self.bookHeaderRecords = {}
+ self.bookMetadata = {}
+ self.bookKey = None
+ magic = unpack("4s",self.fo.read(4))[0]
+ if magic != 'TPZ0':
+ raise TpzDRMError("Parse Error : Invalid Header, not a Topaz file")
+ self.parseTopazHeaders()
+ self.parseMetadata()
+
+ def parseTopazHeaders(self):
+ def bookReadHeaderRecordData():
+ # Read and return the data of one header record at the current book file position
+ # [[offset,decompressedLength,compressedLength],...]
+ nbValues = bookReadEncodedNumber(self.fo)
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
+ return values
+ def parseTopazHeaderRecord():
+ # Read and parse one header record at the current book file position and return the associated data
+ # [[offset,decompressedLength,compressedLength],...]
+ if ord(self.fo.read(1)) != 0x63:
+ raise TpzDRMError("Parse Error : Invalid Header")
+ tag = bookReadString(self.fo)
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+ nbRecords = bookReadEncodedNumber(self.fo)
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ # print result[0], result[1]
+ self.bookHeaderRecords[result[0]] = result[1]
+ if ord(self.fo.read(1)) != 0x64 :
+ raise TpzDRMError("Parse Error : Invalid Header")
+ self.bookPayloadOffset = self.fo.tell()
+
+ def parseMetadata(self):
+ # Parse the metadata record from the book payload and return a list of [key,values]
+ self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString(self.fo)
+ if tag != "metadata" :
+ raise TpzDRMError("Parse Error : Record Names Don't Match")
+ flags = ord(self.fo.read(1))
+ nbRecords = ord(self.fo.read(1))
+ for i in range (0,nbRecords) :
+ record = [bookReadString(self.fo), bookReadString(self.fo)]
+ self.bookMetadata[record[0]] = record[1]
+ return self.bookMetadata
+
+ def getPIDMetaInfo(self):
+ keysRecord = None
+ KeysRecordRecord = None
+ if 'keys' in self.bookMetadata:
+ keysRecord = self.bookMetadata['keys']
+ keysRecordRecord = self.bookMetadata[keysRecord]
+ return keysRecord, keysRecordRecord
+
+ def getBookTitle(self):
+ title = ''
+ if 'Title' in self.bookMetadata:
+ title = self.bookMetadata['Title']
+ return title
+
+ def setBookKey(self, key):
+ self.bookKey = key
+
+ def getBookPayloadRecord(self, name, index):
+ # Get a record in the book payload, given its name and index.
+ # decrypted and decompressed if necessary
+ encrypted = False
+ compressed = False
+ try:
+ recordOffset = self.bookHeaderRecords[name][index][0]
+ except:
+ raise TpzDRMError("Parse Error : Invalid Record, record not found")
+
+ self.fo.seek(self.bookPayloadOffset + recordOffset)
+
+ tag = bookReadString(self.fo)
+ if tag != name :
+ raise TpzDRMError("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber(self.fo)
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise TpzDRMError("Parse Error : Invalid Record, index doesn't match")
+
+ if (self.bookHeaderRecords[name][index][2] > 0):
+ compressed = True
+ record = self.fo.read(self.bookHeaderRecords[name][index][2])
+ else:
+ record = self.fo.read(self.bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ if self.bookKey:
+ ctx = topazCryptoInit(self.bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+ else :
+ raise TpzDRMError("Error: Attempt to decrypt without bookKey")
+
+ if compressed:
+ record = zlib.decompress(record)
+
+ return record
+
+ def processBook(self, pidlst):
+ raw = 0
+ fixedimage=True
+ try:
+ keydata = self.getBookPayloadRecord('dkey', 0)
+ except TpzDRMError, e:
+ print "no dkey record found, book may not be encrypted"
+ print "attempting to extrct files without a book key"
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ # try each pid to decode the file
+ bookKey = None
+ for pid in pidlst:
+ # use 8 digit pids here
+ pid = pid[0:8]
+ print "\nTrying: ", pid
+ bookKeys = []
+ data = keydata
+ try:
+ bookKeys+=decryptDkeyRecords(data,pid)
+ except TpzDRMError, e:
+ pass
+ else:
+ bookKey = bookKeys[0]
+ print "Book Key Found!"
+ break
+
+ if not bookKey:
+ raise TpzDRMError('Decryption Unsucessful; No valid pid found')
+
+ self.setBookKey(bookKey)
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ def createBookDirectory(self):
+ outdir = self.outdir
+ # create output directory structure
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ destdir = os.path.join(outdir,'img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'color_img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'page')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'glyphs')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ def extractFiles(self):
+ outdir = self.outdir
+ for headerRecord in self.bookHeaderRecords:
+ name = headerRecord
+ if name != "dkey" :
+ ext = '.dat'
+ if name == 'img' : ext = '.jpg'
+ if name == 'color' : ext = '.jpg'
+ print "\nProcessing Section: %s " % name
+ for index in range (0,len(self.bookHeaderRecords[name])) :
+ fnum = "%04d" % index
+ fname = name + fnum + ext
+ destdir = outdir
+ if name == 'img':
+ destdir = os.path.join(outdir,'img')
+ if name == 'color':
+ destdir = os.path.join(outdir,'color_img')
+ if name == 'page':
+ destdir = os.path.join(outdir,'page')
+ if name == 'glyphs':
+ destdir = os.path.join(outdir,'glyphs')
+ outputFile = os.path.join(destdir,fname)
+ print ".",
+ record = self.getBookPayloadRecord(name,index)
+ if record != '':
+ file(outputFile, 'wb').write(record)
+ print " "
+
+
+def zipUpDir(myzip, tempdir,localname):
+ currentdir = tempdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tempdir, localfilePath)
+
+
+def usage(progname):
+ print "Removes DRM protection from Topaz ebooks and extract the contents"
+ print "Usage:"
+ print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
+
+
+# Main
+def main(argv=sys.argv):
+ progname = os.path.basename(argv[0])
+ k4 = False
+ pids = []
+ serials = []
+ kInfoFiles = []
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
+ except getopt.GetoptError, err:
+ print str(err)
+ usage(progname)
+ return 1
+ if len(args)<2:
+ usage(progname)
+ return 1
+
+ for o, a in opts:
+ if o == "-k":
+ if a == None :
+ print "Invalid parameter for -k"
+ return 1
+ kInfoFiles.append(a)
+ if o == "-p":
+ if a == None :
+ print "Invalid parameter for -p"
+ return 1
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ print "Invalid parameter for -s"
+ return 1
+ serials = a.split(',')
+ k4 = True
+
+ infile = args[0]
+ outdir = args[1]
+
+ if not os.path.isfile(infile):
+ print "Input File Does Not Exist"
+ return 1
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+ tempdir = tempfile.mkdtemp()
+
+ tb = TopazBook(infile, tempdir)
+ title = tb.getBookTitle()
+ print "Processing Book: ", title
+ keysRecord, keysRecordRecord = tb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(keysRecord, keysRecordRecord, k4, pids, serials, kInfoFiles)
+
+ try:
+ tb.processBook(pidlst)
+ except TpzDRMError, e:
+ print str(e)
+ print " Creating DeBug Full Zip Archive of Book"
+ zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+ myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ zipUpDir(myzip, tempdir, '')
+ myzip.close()
+ return 1
+
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+ myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip1, tempdir, 'img')
+ myzip1.close()
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(myzip2, tempdir, 'svg')
+ zipUpDir(myzip2, tempdir, 'img')
+ myzip2.close()
+
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(tempdir,'xml')
+ zipUpDir(myzip3, targetdir, '')
+ zipUpDir(myzip3, tempdir, 'img')
+ myzip3.close()
+
+ shutil.rmtree(tempdir)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
+
--- /dev/null
+Inept PDF Plugin - ineptpdf_vXX_plugin.zip
+Requires Calibre version 0.6.44 or higher.
+
+All credit given to IHeartCabbages for the original standalone scripts.
+I had the much easier job of converting them to a Calibre plugin.
+
+This plugin is meant to decrypt Adobe Digital Edition PDFs that are protected with Adobe's Adept encryption. It is meant to function without having to install any dependencies... other than having Calibre installed, of course. It will still work if you have Python, PyCrypto and/or OpenSSL already installed, but they aren't necessary.
+
+Installation:
+
+Go to Calibre's Preferences page... click on the Plugins button. Use the file dialog button to select the plugin's zip file (ineptpdf_vXX_plugin.zip) and click the 'Add' button. you're done.
+
+Configuration:
+
+When first run, the plugin will attempt to find your Adobe Digital Editions installation (on Windows and Mac OS's). If successful, it will create an 'adeptkey.der' file and save it in Calibre's configuration directory. It will use that file on subsequent runs. If there are already '*.der' files in the directory, the plugin won't attempt to
+find the Adobe Digital Editions installation installation.
+
+So if you have Adobe Digital Editions installation installed on the same machine as Calibre... you are ready to go. If not... keep reading.
+
+If you already have keyfiles generated with I <3 Cabbages' ineptkey.pyw script, you can put those keyfiles in Calibre's configuration directory. The easiest way to find the correct directory is to go to Calibre's Preferences page... click on the 'Miscellaneous' button (looks like a gear), and then click the 'Open Calibre configuration directory' button. Paste your keyfiles in there. Just make sure that
+they have different names and are saved with the '.der' extension (like the ineptkey script produces). This directory isn't touched when upgrading Calibre, so it's quite safe to leave them there.
+
+Since there is no Linux version of Adobe Digital Editions, Linux users will have to obtain a keyfile through other methods and put the file in Calibre's configuration directory.
+
+All keyfiles with a '.der' extension found in Calibre's configuration directory will be used to attempt to decrypt a book.
+
+** NOTE ** There is no plugin customization data for the Inept PDF plugin.
+
+Troubleshooting:
+
+If you find that it's not working for you (imported PDFs still have DRM), you can save a lot of time and trouble by trying to add the PDF to Calibre with the command line tools. This will print out a lot of helpful debugging info that can be copied into any online help requests. I'm going to ask you to do it first, anyway, so you might
+as well get used to it. ;)
+
+Open a command prompt (terminal) and change to the directory where the ebook you're trying to import resides. Then type the command "calibredb add your_ebook.pdf". Don't type the quotes and obviously change the 'your_ebook.pdf' to whatever the filename of your book is. Copy the resulting output and paste it into any online help request you make.
+
+** Note: the Mac version of Calibre doesn't install the command line tools by default. If you go to the 'Preferences' page and click on the miscellaneous button, you'll see the option to install the command line tools.
\ No newline at end of file
-Plugin for K4PC, K4Mac and Mobi Books
+Plugin for K4PC, K4Mac, standalone Kindles, Mobi Books, and for Devices with Fixed PIDs.
-Will work on Linux (standard DRM Mobi books only), Mac OS X (standard DRM Mobi books and "Kindle for Mac" books, and Windows (standard DRM Mobi books and "Kindle for PC" books.
+This plugin supersedes MobiDeDRM, K4DeDRM, and K4PCDeDRM and K4X plugins. If you install this plugin, those plugins can be safely removed.
-This plugin supersedes MobiDeDRM, K4DeDRM, and K4PCDeDRM plugins. If you install this plugin, those plugins can be safely removed.
-
-This plugin is meant to convert "Kindle for PC", "Kindle for Mac" and "Mobi" ebooks with DRM to unlocked Mobi files. Calibre can then convert them to whatever format you desire. It is meant to function without having to install any dependencies except for Calibre being on your same machine and in the same account as your "Kindle for PC" or "Kindle for Mac" application if you are going to remove the DRM from those types of books.
+This plugin is meant to remove the DRM from .prc, .azw, .azw1, and .tpz ebooks. Calibre can then convert them to whatever format you desire. It is meant to function without having to install any dependencies except for Calibre being on your same machine and in the same account as your "Kindle for PC" or "Kindle for Mac" application if you are going to remove the DRM from those types of books.
Installation:
-Go to Calibre's Preferences page... click on the Plugins button. Use the file dialog button to select the plugin's zip file (k4mobidedrm_vXX_plugin.zip) and click the 'Add' button. You're done.
+Go to Calibre's Preferences page... click on the Plugins button. Use the file dialog button to select the plugin's zip file (K4MobiDeDRM_vXX_plugin.zip) and click the 'Add' button. You're done.
Configuration:
-Highlight the plugin (K4MobiDeDRM under the "File type plugins" category) and click the "Customize Plugin" button on Calibre's Preferences->Plugins page. Enter a comma separated list of your 10 digit PIDs. This is not needed if you only want to decode "Kindle for PC" or "Kindle for Mac" books.
+Highlight the plugin (K4MobiDeDRM under the "File type plugins" category) and click the "Customize Plugin" button on Calibre's Preferences->Plugins page. Enter a comma separated list of your 10 digit PIDs. Include in this list (again separated by commas) any 16 digit serial numbers the standalone Kindles you may have (these typically begin "B0...") This is not needed if you only want to decode "Kindle for PC" or "Kindle for Mac" books.
Troubleshooting:
-If you find that it's not working for you (imported azw's are not converted to mobi format), you can save a lot of time and trouble by trying to add the azw file to Calibre with the command line tools. This will print out a lot of helpful debugging info that can be copied into any online help requests. I'm going to ask you to do it first, anyway, so you might
+If you find that it's not working for you, you can save a lot of time and trouble by trying to add the azw file to Calibre with the command line tools. This will print out a lot of helpful debugging info that can be copied into any online help requests. I'm going to ask you to do it first, anyway, so you might
as well get used to it. ;)
Open a command prompt (terminal) and change to the directory where the ebook you're trying to import resides. Then type the command "calibredb add your_ebook.azw". Don't type the quotes and obviously change the 'your_ebook.azw' to whatever the filename of your book is. Copy the resulting output and paste it into any online help request you make.
# This plugin is meant to convert secure Ereader files (PDB) to unsecured PMLZ files.
# Calibre can then convert it to whatever format you desire.
# It is meant to function without having to install any dependencies...
-# other than having Calibre installed, of course. I've included the psyco libraries
-# (compiled for each platform) for speed. If your system can use them, great!
-# Otherwise, they won't be used and things will just work slower.
+# other than having Calibre installed, of course.
#
# Installation:
# Go to Calibre's Preferences page... click on the Plugins button. Use the file
# Revision history:
# 0.0.1 - Initial release
# 0.0.2 - updated to distinguish it from earlier non-openssl version
+# 0.0.3 - removed added psyco code as it is not supported under Calibre's Python 2.7
import sys, os
Credit given to The Dark Reverser for the original standalone script.'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'DiapDealer' # The author of this plugin
- version = (0, 0, 2) # The version number of this plugin
+ version = (0, 0, 3) # The version number of this plugin
file_types = set(['pdb']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
def run(self, path_to_ebook):
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.constants import iswindows, isosx
- pdir = 'windows' if iswindows else 'osx' if isosx else 'linux'
- ppath = os.path.join(self.sys_insertion_path, pdir)
- sys.path.insert(0, ppath)
global bookname, erdr2pml
import erdr2pml
- if 'psyco' in sys.modules:
- print 'Using psyco acceleration for %s.' % pdir
- else:
- print 'NOT using psyco acceleration for %s. Conversion may be slow.' % pdir
-
infile = path_to_ebook
bookname = os.path.splitext(os.path.basename(infile))[0]
outdir = PersistentTemporaryDirectory()
try:
name, cc = i.split(',')
except ValueError:
- sys.path.remove(ppath)
print ' Error parsing user supplied data.'
return path_to_ebook
# 0.14 - contributed enhancement to support --make-pmlz switch
# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available
+# 0.17 - added support for pycrypto's DES as well
Des = None
import openssl_des
Des = openssl_des.load_libcrypto()
+# if that did not work then try pycrypto version of DES
+if Des == None:
+ import pycrypto_des
+ Des = pycrypto_des.load_pycrypto()
+
# if that did not work then use pure python implementation
# of DES and try to speed it up with Psycho
if Des == None:
Des = python_des.Des
# Import Psyco if available
try:
- # Dumb speed hack 1
# http://psyco.sourceforge.net
import psyco
psyco.full()
- pass
except ImportError:
pass
-__version__='0.16'
+__version__='0.17'
class Unbuffered:
def __init__(self, stream):
--- /dev/null
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+
+def load_pycrypto():
+ try :
+ from Crypto.Cipher import DES as _DES
+ except:
+ return None
+
+ class DES(object):
+ def __init__(self, key):
+ if len(key) != 8 :
+ raise Error('DES improper key used')
+ self.key = key
+ self._des = _DES.new(key,_DES.MODE_ECB)
+ def desdecrypt(self, data):
+ return self._des.decrypt(data)
+ def decrypt(self, data):
+ if not data:
+ return ''
+ i = 0
+ result = []
+ while i < len(data):
+ block = data[i:i+8]
+ processed_block = self.desdecrypt(block)
+ result.append(processed_block)
+ i += 8
+ return ''.join(result)
+ return DES
+
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import sys
ECB = 0
# 0.1.0 - Initial release
# 0.1.1 - Allow Windows users to make use of openssl if they have it installed.
# - Incorporated SomeUpdates zipfix routine.
-
+# 0.1.2 - bug fix for non-ascii file names in encryption.xml
"""
Decrypt Barnes & Noble ADEPT encrypted EPUB books.
enc('CipherReference'))
for elem in encryption.findall(expr):
path = elem.get('URI', None)
+ path = path.encode('utf-8')
if path is not None:
encrypted.add(path)
Credit given to I <3 Cabbages for the original stand-alone scripts.'
supported_platforms = ['linux', 'osx', 'windows']
author = 'DiapDealer'
- version = (0, 1, 1)
+ version = (0, 1, 2)
minimum_calibre_version = (0, 6, 44) # Compiled python libraries cannot be imported in earlier versions.
file_types = set(['epub'])
on_import = True
from PyQt4.Qt import QMessageBox
from calibre.constants import iswindows, isosx
- # Add the included pycrypto import directory for Windows users.
- pdir = 'windows' if iswindows else 'osx' if isosx else 'linux'
- ppath = os.path.join(self.sys_insertion_path, pdir)
- sys.path.append(ppath)
-
AES, AES2 = _load_crypto()
if AES == None or AES2 == None:
# Failed to load libcrypto or PyCrypto... Adobe Epubs can't be decrypted.'
- sys.path.remove(ppath)
raise IGNOBLEError('IgnobleEpub - Failed to load crypto libs.')
return
# Get name and credit card number from Plugin Customization
if not userkeys and not self.site_customization:
# Plugin hasn't been configured... do nothing.
- sys.path.remove(ppath)
raise IGNOBLEError('IgnobleEpub - No keys found. Plugin not configured.')
return
name, ccn = i.split(',')
keycount += 1
except ValueError:
- sys.path.remove(ppath)
raise IGNOBLEError('IgnobleEpub - Error parsing user supplied data.')
return
if result == 1:
print 'IgnobleEpub: Not a B&N Adept Epub... punting.'
of.close()
- sys.path.remove(ppath)
return path_to_ebook
break
if result == 0:
print 'IgnobleEpub: Encryption successfully removed.'
of.close()
- sys.path.remove(ppath)
return of.name
break
# Something went wrong with decryption.
# Import the original unmolested epub.
of.close
- sys.path.remove(ppath)
raise IGNOBLEError('IgnobleEpub - Ultimately failed to decrypt.')
return
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Secret-key encryption algorithms.
-
-Secret-key encryption algorithms transform plaintext in some way that
-is dependent on a key, producing ciphertext. This transformation can
-easily be reversed, if (and, hopefully, only if) one knows the key.
-
-The encryption modules here all support the interface described in PEP
-272, "API for Block Encryption Algorithms".
-
-If you don't know which algorithm to choose, use AES because it's
-standard and has undergone a fair bit of examination.
-
-Crypto.Cipher.AES Advanced Encryption Standard
-Crypto.Cipher.ARC2 Alleged RC2
-Crypto.Cipher.ARC4 Alleged RC4
-Crypto.Cipher.Blowfish
-Crypto.Cipher.CAST
-Crypto.Cipher.DES The Data Encryption Standard. Very commonly used
- in the past, but today its 56-bit keys are too small.
-Crypto.Cipher.DES3 Triple DES.
-Crypto.Cipher.XOR The simple XOR cipher.
-"""
-
-__all__ = ['AES', 'ARC2', 'ARC4',
- 'Blowfish', 'CAST', 'DES', 'DES3',
- 'XOR'
- ]
-
-__revision__ = "$Id$"
-
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Python Cryptography Toolkit
-
-A collection of cryptographic modules implementing various algorithms
-and protocols.
-
-Subpackages:
-Crypto.Cipher Secret-key encryption algorithms (AES, DES, ARC4)
-Crypto.Hash Hashing algorithms (MD5, SHA, HMAC)
-Crypto.Protocol Cryptographic protocols (Chaffing, all-or-nothing
- transform). This package does not contain any
- network protocols.
-Crypto.PublicKey Public-key encryption and signature algorithms
- (RSA, DSA)
-Crypto.Util Various useful modules and functions (long-to-string
- conversion, random number generation, number
- theoretic functions)
-"""
-
-__all__ = ['Cipher', 'Hash', 'Protocol', 'PublicKey', 'Util']
-
-__version__ = '2.3' # See also below and setup.py
-__revision__ = "$Id$"
-
-# New software should look at this instead of at __version__ above.
-version_info = (2, 1, 0, 'final', 0) # See also above and setup.py
-
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# pct_warnings.py : PyCrypto warnings file
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-#
-# Base classes. All our warnings inherit from one of these in order to allow
-# the user to specifically filter them.
-#
-
-class CryptoWarning(Warning):
- """Base class for PyCrypto warnings"""
-
-class CryptoDeprecationWarning(DeprecationWarning, CryptoWarning):
- """Base PyCrypto DeprecationWarning class"""
-
-class CryptoRuntimeWarning(RuntimeWarning, CryptoWarning):
- """Base PyCrypto RuntimeWarning class"""
-
-#
-# Warnings that we might actually use
-#
-
-class RandomPool_DeprecationWarning(CryptoDeprecationWarning):
- """Issued when Crypto.Util.randpool.RandomPool is instantiated."""
-
-class ClockRewindWarning(CryptoRuntimeWarning):
- """Warning for when the system clock moves backwards."""
-
-class GetRandomNumber_DeprecationWarning(CryptoDeprecationWarning):
- """Issued when Crypto.Util.number.getRandomNumber is invoked."""
-
-# By default, we want this warning to be shown every time we compensate for
-# clock rewinding.
-import warnings as _warnings
-_warnings.filterwarnings('always', category=ClockRewindWarning, append=1)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
# - Incorporated SomeUpdates zipfix routine.
# 0.1.2 - Removed Carbon dependency for Mac users. Fixes an issue that was a
# result of Calibre changing to python 2.7.
+# 0.1.3 - bug fix for epubs with non-ascii chars in file names
+
"""
enc('CipherReference'))
for elem in encryption.findall(expr):
path = elem.get('URI', None)
+ path = path.encode('utf-8')
if path is not None:
encrypted.add(path)
Credit given to I <3 Cabbages for the original stand-alone scripts.'
supported_platforms = ['linux', 'osx', 'windows']
author = 'DiapDealer'
- version = (0, 1, 2)
+ version = (0, 1, 3)
minimum_calibre_version = (0, 6, 44) # Compiled python libraries cannot be imported in earlier versions.
file_types = set(['epub'])
on_import = True
from PyQt4.Qt import QMessageBox
from calibre.constants import iswindows, isosx
- # Add the included pycrypto import directory for Windows users.
- # Add the included Carbon import directory for Mac users.
- pdir = 'windows' if iswindows else 'osx' if isosx else 'linux'
- ppath = os.path.join(self.sys_insertion_path, pdir)
- sys.path.append(ppath)
-
AES, RSA = _load_crypto()
if AES == None or RSA == None:
# Failed to load libcrypto or PyCrypto... Adobe Epubs can\'t be decrypted.'
- sys.path.remove(ppath)
raise ADEPTError('IneptEpub: Failed to load crypto libs... Adobe Epubs can\'t be decrypted.')
return
if not userkeys:
# No user keys found... bail out.
- sys.path.remove(ppath)
raise ADEPTError('IneptEpub - No keys found. Check keyfile(s)/ADE install')
return
if result == 1:
print 'IneptEpub: Not an Adobe Adept Epub... punting.'
of.close()
- sys.path.remove(ppath)
return path_to_ebook
break
if result == 0:
print 'IneptEpub: Encryption successfully removed.'
of.close
- sys.path.remove(ppath)
return of.name
break
# Something went wrong with decryption.
# Import the original unmolested epub.
of.close
- sys.path.remove(ppath)
raise ADEPTError('IneptEpub - Ultimately failed to decrypt')
return
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Secret-key encryption algorithms.
-
-Secret-key encryption algorithms transform plaintext in some way that
-is dependent on a key, producing ciphertext. This transformation can
-easily be reversed, if (and, hopefully, only if) one knows the key.
-
-The encryption modules here all support the interface described in PEP
-272, "API for Block Encryption Algorithms".
-
-If you don't know which algorithm to choose, use AES because it's
-standard and has undergone a fair bit of examination.
-
-Crypto.Cipher.AES Advanced Encryption Standard
-Crypto.Cipher.ARC2 Alleged RC2
-Crypto.Cipher.ARC4 Alleged RC4
-Crypto.Cipher.Blowfish
-Crypto.Cipher.CAST
-Crypto.Cipher.DES The Data Encryption Standard. Very commonly used
- in the past, but today its 56-bit keys are too small.
-Crypto.Cipher.DES3 Triple DES.
-Crypto.Cipher.XOR The simple XOR cipher.
-"""
-
-__all__ = ['AES', 'ARC2', 'ARC4',
- 'Blowfish', 'CAST', 'DES', 'DES3',
- 'XOR'
- ]
-
-__revision__ = "$Id$"
-
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Hashing algorithms
-
-Hash functions take arbitrary strings as input, and produce an output
-of fixed size that is dependent on the input; it should never be
-possible to derive the input data given only the hash function's
-output. Hash functions can be used simply as a checksum, or, in
-association with a public-key algorithm, can be used to implement
-digital signatures.
-
-The hashing modules here all support the interface described in PEP
-247, "API for Cryptographic Hash Functions".
-
-Submodules:
-Crypto.Hash.HMAC RFC 2104: Keyed-Hashing for Message Authentication
-Crypto.Hash.MD2
-Crypto.Hash.MD4
-Crypto.Hash.MD5
-Crypto.Hash.RIPEMD160
-Crypto.Hash.SHA
-"""
-
-__all__ = ['HMAC', 'MD2', 'MD4', 'MD5', 'RIPEMD', 'RIPEMD160', 'SHA', 'SHA256']
-__revision__ = "$Id$"
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# PublicKey/RSA.py : RSA public key primitive
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""RSA public-key cryptography algorithm."""
-
-__revision__ = "$Id$"
-
-__all__ = ['generate', 'construct', 'error']
-
-from Crypto.Util.python_compat import *
-
-from Crypto.PublicKey import _RSA, _slowmath, pubkey
-from Crypto import Random
-
-try:
- from Crypto.PublicKey import _fastmath
-except ImportError:
- _fastmath = None
-
-class _RSAobj(pubkey.pubkey):
- keydata = ['n', 'e', 'd', 'p', 'q', 'u']
-
- def __init__(self, implementation, key):
- self.implementation = implementation
- self.key = key
-
- def __getattr__(self, attrname):
- if attrname in self.keydata:
- # For backward compatibility, allow the user to get (not set) the
- # RSA key parameters directly from this object.
- return getattr(self.key, attrname)
- else:
- raise AttributeError("%s object has no %r attribute" % (self.__class__.__name__, attrname,))
-
- def _encrypt(self, c, K):
- return (self.key._encrypt(c),)
-
- def _decrypt(self, c):
- #(ciphertext,) = c
- (ciphertext,) = c[:1] # HACK - We should use the previous line
- # instead, but this is more compatible and we're
- # going to replace the Crypto.PublicKey API soon
- # anyway.
- return self.key._decrypt(ciphertext)
-
- def _blind(self, m, r):
- return self.key._blind(m, r)
-
- def _unblind(self, m, r):
- return self.key._unblind(m, r)
-
- def _sign(self, m, K=None):
- return (self.key._sign(m),)
-
- def _verify(self, m, sig):
- #(s,) = sig
- (s,) = sig[:1] # HACK - We should use the previous line instead, but
- # this is more compatible and we're going to replace
- # the Crypto.PublicKey API soon anyway.
- return self.key._verify(m, s)
-
- def has_private(self):
- return self.key.has_private()
-
- def size(self):
- return self.key.size()
-
- def can_blind(self):
- return True
-
- def can_encrypt(self):
- return True
-
- def can_sign(self):
- return True
-
- def publickey(self):
- return self.implementation.construct((self.key.n, self.key.e))
-
- def __getstate__(self):
- d = {}
- for k in self.keydata:
- try:
- d[k] = getattr(self.key, k)
- except AttributeError:
- pass
- return d
-
- def __setstate__(self, d):
- if not hasattr(self, 'implementation'):
- self.implementation = RSAImplementation()
- t = []
- for k in self.keydata:
- if not d.has_key(k):
- break
- t.append(d[k])
- self.key = self.implementation._math.rsa_construct(*tuple(t))
-
- def __repr__(self):
- attrs = []
- for k in self.keydata:
- if k == 'n':
- attrs.append("n(%d)" % (self.size()+1,))
- elif hasattr(self.key, k):
- attrs.append(k)
- if self.has_private():
- attrs.append("private")
- return "<%s @0x%x %s>" % (self.__class__.__name__, id(self), ",".join(attrs))
-
-class RSAImplementation(object):
- def __init__(self, **kwargs):
- # 'use_fast_math' parameter:
- # None (default) - Use fast math if available; Use slow math if not.
- # True - Use fast math, and raise RuntimeError if it's not available.
- # False - Use slow math.
- use_fast_math = kwargs.get('use_fast_math', None)
- if use_fast_math is None: # Automatic
- if _fastmath is not None:
- self._math = _fastmath
- else:
- self._math = _slowmath
-
- elif use_fast_math: # Explicitly select fast math
- if _fastmath is not None:
- self._math = _fastmath
- else:
- raise RuntimeError("fast math module not available")
-
- else: # Explicitly select slow math
- self._math = _slowmath
-
- self.error = self._math.error
-
- # 'default_randfunc' parameter:
- # None (default) - use Random.new().read
- # not None - use the specified function
- self._default_randfunc = kwargs.get('default_randfunc', None)
- self._current_randfunc = None
-
- def _get_randfunc(self, randfunc):
- if randfunc is not None:
- return randfunc
- elif self._current_randfunc is None:
- self._current_randfunc = Random.new().read
- return self._current_randfunc
-
- def generate(self, bits, randfunc=None, progress_func=None):
- rf = self._get_randfunc(randfunc)
- obj = _RSA.generate_py(bits, rf, progress_func) # TODO: Don't use legacy _RSA module
- key = self._math.rsa_construct(obj.n, obj.e, obj.d, obj.p, obj.q, obj.u)
- return _RSAobj(self, key)
-
- def construct(self, tup):
- key = self._math.rsa_construct(*tup)
- return _RSAobj(self, key)
-
-_impl = RSAImplementation()
-generate = _impl.generate
-construct = _impl.construct
-error = _impl.error
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
-
+++ /dev/null
-#
-# RSA.py : RSA encryption/decryption
-#
-# Part of the Python Cryptography Toolkit
-#
-# Written by Andrew Kuchling, Paul Swartz, and others
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-#
-
-__revision__ = "$Id$"
-
-from Crypto.PublicKey import pubkey
-from Crypto.Util import number
-
-def generate_py(bits, randfunc, progress_func=None):
- """generate(bits:int, randfunc:callable, progress_func:callable)
-
- Generate an RSA key of length 'bits', using 'randfunc' to get
- random data and 'progress_func', if present, to display
- the progress of the key generation.
- """
- obj=RSAobj()
- obj.e = 65537L
-
- # Generate the prime factors of n
- if progress_func:
- progress_func('p,q\n')
- p = q = 1L
- while number.size(p*q) < bits:
- # Note that q might be one bit longer than p if somebody specifies an odd
- # number of bits for the key. (Why would anyone do that? You don't get
- # more security.)
- #
- # Note also that we ensure that e is coprime to (p-1) and (q-1).
- # This is needed for encryption to work properly, according to the 1997
- # paper by Robert D. Silverman of RSA Labs, "Fast generation of random,
- # strong RSA primes", available at
- # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.17.2713&rep=rep1&type=pdf
- # Since e=65537 is prime, it is sufficient to check that e divides
- # neither (p-1) nor (q-1).
- p = 1L
- while (p - 1) % obj.e == 0:
- if progress_func:
- progress_func('p\n')
- p = pubkey.getPrime(bits/2, randfunc)
- q = 1L
- while (q - 1) % obj.e == 0:
- if progress_func:
- progress_func('q\n')
- q = pubkey.getPrime(bits - (bits/2), randfunc)
-
- # p shall be smaller than q (for calc of u)
- if p > q:
- (p, q)=(q, p)
- obj.p = p
- obj.q = q
-
- if progress_func:
- progress_func('u\n')
- obj.u = pubkey.inverse(obj.p, obj.q)
- obj.n = obj.p*obj.q
-
- if progress_func:
- progress_func('d\n')
- obj.d=pubkey.inverse(obj.e, (obj.p-1)*(obj.q-1))
-
- assert bits <= 1+obj.size(), "Generated key is too small"
-
- return obj
-
-class RSAobj(pubkey.pubkey):
-
- def size(self):
- """size() : int
- Return the maximum number of bits that can be handled by this key.
- """
- return number.size(self.n) - 1
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Public-key encryption and signature algorithms.
-
-Public-key encryption uses two different keys, one for encryption and
-one for decryption. The encryption key can be made public, and the
-decryption key is kept private. Many public-key algorithms can also
-be used to sign messages, and some can *only* be used for signatures.
-
-Crypto.PublicKey.DSA Digital Signature Algorithm. (Signature only)
-Crypto.PublicKey.ElGamal (Signing and encryption)
-Crypto.PublicKey.RSA (Signing, encryption, and blinding)
-Crypto.PublicKey.qNEW (Signature only)
-
-"""
-
-__all__ = ['RSA', 'DSA', 'ElGamal', 'qNEW']
-__revision__ = "$Id$"
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# PubKey/RSA/_slowmath.py : Pure Python implementation of the RSA portions of _fastmath
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Pure Python implementation of the RSA-related portions of Crypto.PublicKey._fastmath."""
-
-__revision__ = "$Id$"
-
-__all__ = ['rsa_construct']
-
-from Crypto.Util.python_compat import *
-
-from Crypto.Util.number import size, inverse
-
-class error(Exception):
- pass
-
-class _RSAKey(object):
- def _blind(self, m, r):
- # compute r**e * m (mod n)
- return m * pow(r, self.e, self.n)
-
- def _unblind(self, m, r):
- # compute m / r (mod n)
- return inverse(r, self.n) * m % self.n
-
- def _decrypt(self, c):
- # compute c**d (mod n)
- if not self.has_private():
- raise TypeError("No private key")
- return pow(c, self.d, self.n) # TODO: CRT exponentiation
-
- def _encrypt(self, m):
- # compute m**d (mod n)
- return pow(m, self.e, self.n)
-
- def _sign(self, m): # alias for _decrypt
- if not self.has_private():
- raise TypeError("No private key")
- return self._decrypt(m)
-
- def _verify(self, m, sig):
- return self._encrypt(sig) == m
-
- def has_private(self):
- return hasattr(self, 'd')
-
- def size(self):
- """Return the maximum number of bits that can be encrypted"""
- return size(self.n) - 1
-
-def rsa_construct(n, e, d=None, p=None, q=None, u=None):
- """Construct an RSAKey object"""
- assert isinstance(n, long)
- assert isinstance(e, long)
- assert isinstance(d, (long, type(None)))
- assert isinstance(p, (long, type(None)))
- assert isinstance(q, (long, type(None)))
- assert isinstance(u, (long, type(None)))
- obj = _RSAKey()
- obj.n = n
- obj.e = e
- if d is not None: obj.d = d
- if p is not None: obj.p = p
- if q is not None: obj.q = q
- if u is not None: obj.u = u
- return obj
-
-class _DSAKey(object):
- def size(self):
- """Return the maximum number of bits that can be encrypted"""
- return size(self.p) - 1
-
- def has_private(self):
- return hasattr(self, 'x')
-
- def _sign(self, m, k): # alias for _decrypt
- # SECURITY TODO - We _should_ be computing SHA1(m), but we don't because that's the API.
- if not self.has_private():
- raise TypeError("No private key")
- if not (1L < k < self.q):
- raise ValueError("k is not between 2 and q-1")
- inv_k = inverse(k, self.q) # Compute k**-1 mod q
- r = pow(self.g, k, self.p) % self.q # r = (g**k mod p) mod q
- s = (inv_k * (m + self.x * r)) % self.q
- return (r, s)
-
- def _verify(self, m, r, s):
- # SECURITY TODO - We _should_ be computing SHA1(m), but we don't because that's the API.
- if not (0 < r < self.q) or not (0 < s < self.q):
- return False
- w = inverse(s, self.q)
- u1 = (m*w) % self.q
- u2 = (r*w) % self.q
- v = (pow(self.g, u1, self.p) * pow(self.y, u2, self.p) % self.p) % self.q
- return v == r
-
-def dsa_construct(y, g, p, q, x=None):
- assert isinstance(y, long)
- assert isinstance(g, long)
- assert isinstance(p, long)
- assert isinstance(q, long)
- assert isinstance(x, (long, type(None)))
- obj = _DSAKey()
- obj.y = y
- obj.g = g
- obj.p = p
- obj.q = q
- if x is not None: obj.x = x
- return obj
-
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
-
+++ /dev/null
-#
-# pubkey.py : Internal functions for public key operations
-#
-# Part of the Python Cryptography Toolkit
-#
-# Written by Andrew Kuchling, Paul Swartz, and others
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-#
-
-__revision__ = "$Id$"
-
-import types, warnings
-from Crypto.Util.number import *
-
-# Basic public key class
-class pubkey:
- def __init__(self):
- pass
-
- def __getstate__(self):
- """To keep key objects platform-independent, the key data is
- converted to standard Python long integers before being
- written out. It will then be reconverted as necessary on
- restoration."""
- d=self.__dict__
- for key in self.keydata:
- if d.has_key(key): d[key]=long(d[key])
- return d
-
- def __setstate__(self, d):
- """On unpickling a key object, the key data is converted to the big
-number representation being used, whether that is Python long
-integers, MPZ objects, or whatever."""
- for key in self.keydata:
- if d.has_key(key): self.__dict__[key]=bignum(d[key])
-
- def encrypt(self, plaintext, K):
- """encrypt(plaintext:string|long, K:string|long) : tuple
- Encrypt the string or integer plaintext. K is a random
- parameter required by some algorithms.
- """
- wasString=0
- if isinstance(plaintext, types.StringType):
- plaintext=bytes_to_long(plaintext) ; wasString=1
- if isinstance(K, types.StringType):
- K=bytes_to_long(K)
- ciphertext=self._encrypt(plaintext, K)
- if wasString: return tuple(map(long_to_bytes, ciphertext))
- else: return ciphertext
-
- def decrypt(self, ciphertext):
- """decrypt(ciphertext:tuple|string|long): string
- Decrypt 'ciphertext' using this key.
- """
- wasString=0
- if not isinstance(ciphertext, types.TupleType):
- ciphertext=(ciphertext,)
- if isinstance(ciphertext[0], types.StringType):
- ciphertext=tuple(map(bytes_to_long, ciphertext)) ; wasString=1
- plaintext=self._decrypt(ciphertext)
- if wasString: return long_to_bytes(plaintext)
- else: return plaintext
-
- def sign(self, M, K):
- """sign(M : string|long, K:string|long) : tuple
- Return a tuple containing the signature for the message M.
- K is a random parameter required by some algorithms.
- """
- if (not self.has_private()):
- raise TypeError('Private key not available in this object')
- if isinstance(M, types.StringType): M=bytes_to_long(M)
- if isinstance(K, types.StringType): K=bytes_to_long(K)
- return self._sign(M, K)
-
- def verify (self, M, signature):
- """verify(M:string|long, signature:tuple) : bool
- Verify that the signature is valid for the message M;
- returns true if the signature checks out.
- """
- if isinstance(M, types.StringType): M=bytes_to_long(M)
- return self._verify(M, signature)
-
- # alias to compensate for the old validate() name
- def validate (self, M, signature):
- warnings.warn("validate() method name is obsolete; use verify()",
- DeprecationWarning)
-
- def blind(self, M, B):
- """blind(M : string|long, B : string|long) : string|long
- Blind message M using blinding factor B.
- """
- wasString=0
- if isinstance(M, types.StringType):
- M=bytes_to_long(M) ; wasString=1
- if isinstance(B, types.StringType): B=bytes_to_long(B)
- blindedmessage=self._blind(M, B)
- if wasString: return long_to_bytes(blindedmessage)
- else: return blindedmessage
-
- def unblind(self, M, B):
- """unblind(M : string|long, B : string|long) : string|long
- Unblind message M using blinding factor B.
- """
- wasString=0
- if isinstance(M, types.StringType):
- M=bytes_to_long(M) ; wasString=1
- if isinstance(B, types.StringType): B=bytes_to_long(B)
- unblindedmessage=self._unblind(M, B)
- if wasString: return long_to_bytes(unblindedmessage)
- else: return unblindedmessage
-
-
- # The following methods will usually be left alone, except for
- # signature-only algorithms. They both return Boolean values
- # recording whether this key's algorithm can sign and encrypt.
- def can_sign (self):
- """can_sign() : bool
- Return a Boolean value recording whether this algorithm can
- generate signatures. (This does not imply that this
- particular key object has the private information required to
- to generate a signature.)
- """
- return 1
-
- def can_encrypt (self):
- """can_encrypt() : bool
- Return a Boolean value recording whether this algorithm can
- encrypt data. (This does not imply that this
- particular key object has the private information required to
- to decrypt a message.)
- """
- return 1
-
- def can_blind (self):
- """can_blind() : bool
- Return a Boolean value recording whether this algorithm can
- blind data. (This does not imply that this
- particular key object has the private information required to
- to blind a message.)
- """
- return 0
-
- # The following methods will certainly be overridden by
- # subclasses.
-
- def size (self):
- """size() : int
- Return the maximum number of bits that can be handled by this key.
- """
- return 0
-
- def has_private (self):
- """has_private() : bool
- Return a Boolean denoting whether the object contains
- private components.
- """
- return 0
-
- def publickey (self):
- """publickey(): object
- Return a new key object containing only the public information.
- """
- return self
-
- def __eq__ (self, other):
- """__eq__(other): 0, 1
- Compare us to other for equality.
- """
- return self.__getstate__() == other.__getstate__()
-
- def __ne__ (self, other):
- """__ne__(other): 0, 1
- Compare us to other for inequality.
- """
- return not self.__eq__(other)
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# FortunaAccumulator.py : Fortuna's internal accumulator
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-__revision__ = "$Id$"
-
-from Crypto.Util.python_compat import *
-
-from binascii import b2a_hex
-import time
-import warnings
-
-from Crypto.pct_warnings import ClockRewindWarning
-import SHAd256
-
-import FortunaGenerator
-
-class FortunaPool(object):
- """Fortuna pool type
-
- This object acts like a hash object, with the following differences:
-
- - It keeps a count (the .length attribute) of the number of bytes that
- have been added to the pool
- - It supports a .reset() method for in-place reinitialization
- - The method to add bytes to the pool is .append(), not .update().
- """
-
- digest_size = SHAd256.digest_size
-
- def __init__(self):
- self.reset()
-
- def append(self, data):
- self._h.update(data)
- self.length += len(data)
-
- def digest(self):
- return self._h.digest()
-
- def hexdigest(self):
- return b2a_hex(self.digest())
-
- def reset(self):
- self._h = SHAd256.new()
- self.length = 0
-
-def which_pools(r):
- """Return a list of pools indexes (in range(32)) that are to be included during reseed number r.
-
- According to _Practical Cryptography_, chapter 10.5.2 "Pools":
-
- "Pool P_i is included if 2**i is a divisor of r. Thus P_0 is used
- every reseed, P_1 every other reseed, P_2 every fourth reseed, etc."
- """
- # This is a separate function so that it can be unit-tested.
- assert r >= 1
- retval = []
- mask = 0
- for i in range(32):
- # "Pool P_i is included if 2**i is a divisor of [reseed_count]"
- if (r & mask) == 0:
- retval.append(i)
- else:
- break # optimization. once this fails, it always fails
- mask = (mask << 1) | 1L
- return retval
-
-class FortunaAccumulator(object):
-
- min_pool_size = 64 # TODO: explain why
- reseed_interval = 0.100 # 100 ms TODO: explain why
-
- def __init__(self):
- self.reseed_count = 0
- self.generator = FortunaGenerator.AESGenerator()
- self.last_reseed = None
-
- # Initialize 32 FortunaPool instances.
- # NB: This is _not_ equivalent to [FortunaPool()]*32, which would give
- # us 32 references to the _same_ FortunaPool instance (and cause the
- # assertion below to fail).
- self.pools = [FortunaPool() for i in range(32)] # 32 pools
- assert(self.pools[0] is not self.pools[1])
-
- def random_data(self, bytes):
- current_time = time.time()
- if self.last_reseed > current_time:
- warnings.warn("Clock rewind detected. Resetting last_reseed.", ClockRewindWarning)
- self.last_reseed = None
- if (self.pools[0].length >= self.min_pool_size and
- (self.last_reseed is None or
- current_time > self.last_reseed + self.reseed_interval)):
- self._reseed(current_time)
- # The following should fail if we haven't seeded the pool yet.
- return self.generator.pseudo_random_data(bytes)
-
- def _reseed(self, current_time=None):
- if current_time is None:
- current_time = time.time()
- seed = []
- self.reseed_count += 1
- self.last_reseed = current_time
- for i in which_pools(self.reseed_count):
- seed.append(self.pools[i].digest())
- self.pools[i].reset()
-
- seed = "".join(seed)
- self.generator.reseed(seed)
-
- def add_random_event(self, source_number, pool_number, data):
- assert 1 <= len(data) <= 32
- assert 0 <= source_number <= 255
- assert 0 <= pool_number <= 31
- self.pools[pool_number].append(chr(source_number))
- self.pools[pool_number].append(chr(len(data)))
- self.pools[pool_number].append(data)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# FortunaGenerator.py : Fortuna's internal PRNG
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-__revision__ = "$Id$"
-
-from Crypto.Util.python_compat import *
-
-import struct
-
-from Crypto.Util.number import ceil_shift, exact_log2, exact_div
-from Crypto.Util import Counter
-from Crypto.Cipher import AES
-
-import SHAd256
-
-class AESGenerator(object):
- """The Fortuna "generator"
-
- This is used internally by the Fortuna PRNG to generate arbitrary amounts
- of pseudorandom data from a smaller amount of seed data.
-
- The output is generated by running AES-256 in counter mode and re-keying
- after every mebibyte (2**16 blocks) of output.
- """
-
- block_size = AES.block_size # output block size in octets (128 bits)
- key_size = 32 # key size in octets (256 bits)
-
- # Because of the birthday paradox, we expect to find approximately one
- # collision for every 2**64 blocks of output from a real random source.
- # However, this code generates pseudorandom data by running AES in
- # counter mode, so there will be no collisions until the counter
- # (theoretically) wraps around at 2**128 blocks. Thus, in order to prevent
- # Fortuna's pseudorandom output from deviating perceptibly from a true
- # random source, Ferguson and Schneier specify a limit of 2**16 blocks
- # without rekeying.
- max_blocks_per_request = 2**16 # Allow no more than this number of blocks per _pseudo_random_data request
-
- _four_kiblocks_of_zeros = "\0" * block_size * 4096
-
- def __init__(self):
- self.counter = Counter.new(nbits=self.block_size*8, initial_value=0, little_endian=True)
- self.key = None
-
- # Set some helper constants
- self.block_size_shift = exact_log2(self.block_size)
- assert (1 << self.block_size_shift) == self.block_size
-
- self.blocks_per_key = exact_div(self.key_size, self.block_size)
- assert self.key_size == self.blocks_per_key * self.block_size
-
- self.max_bytes_per_request = self.max_blocks_per_request * self.block_size
-
- def reseed(self, seed):
- if self.key is None:
- self.key = "\0" * self.key_size
- self._set_key(SHAd256.new(self.key + seed).digest())
- self.counter() # increment counter
- assert len(self.key) == self.key_size
-
- def pseudo_random_data(self, bytes):
- assert bytes >= 0
-
- num_full_blocks = bytes >> 20
- remainder = bytes & ((1<<20)-1)
-
- retval = []
- for i in xrange(num_full_blocks):
- retval.append(self._pseudo_random_data(1<<20))
- retval.append(self._pseudo_random_data(remainder))
-
- return "".join(retval)
-
- def _set_key(self, key):
- self.key = key
- self._cipher = AES.new(key, AES.MODE_CTR, counter=self.counter)
-
- def _pseudo_random_data(self, bytes):
- if not (0 <= bytes <= self.max_bytes_per_request):
- raise AssertionError("You cannot ask for more than 1 MiB of data per request")
-
- num_blocks = ceil_shift(bytes, self.block_size_shift) # num_blocks = ceil(bytes / self.block_size)
-
- # Compute the output
- retval = self._generate_blocks(num_blocks)[:bytes]
-
- # Switch to a new key to avoid later compromises of this output (i.e.
- # state compromise extension attacks)
- self._set_key(self._generate_blocks(self.blocks_per_key))
-
- assert len(retval) == bytes
- assert len(self.key) == self.key_size
-
- return retval
-
- def _generate_blocks(self, num_blocks):
- if self.key is None:
- raise AssertionError("generator must be seeded before use")
- assert 0 <= num_blocks <= self.max_blocks_per_request
- retval = []
- for i in xrange(num_blocks >> 12): # xrange(num_blocks / 4096)
- retval.append(self._cipher.encrypt(self._four_kiblocks_of_zeros))
- remaining_bytes = (num_blocks & 4095) << self.block_size_shift # (num_blocks % 4095) * self.block_size
- retval.append(self._cipher.encrypt(self._four_kiblocks_of_zeros[:remaining_bytes]))
- return "".join(retval)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# Random/Fortuna/SHAd256.py : SHA_d-256 hash function implementation
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""\
-SHA_d-256 hash function implementation.
-
-This module should comply with PEP 247.
-"""
-
-__revision__ = "$Id$"
-__all__ = ['new', 'digest_size']
-
-from Crypto.Util.python_compat import *
-
-from binascii import b2a_hex
-
-from Crypto.Hash import SHA256
-
-assert SHA256.digest_size == 32
-
-class _SHAd256(object):
- """SHA-256, doubled.
-
- Returns SHA-256(SHA-256(data)).
- """
-
- digest_size = SHA256.digest_size
-
- _internal = object()
-
- def __init__(self, internal_api_check, sha256_hash_obj):
- if internal_api_check is not self._internal:
- raise AssertionError("Do not instantiate this class directly. Use %s.new()" % (__name__,))
- self._h = sha256_hash_obj
-
- # PEP 247 "copy" method
- def copy(self):
- """Return a copy of this hashing object"""
- return _SHAd256(SHAd256._internal, self._h.copy())
-
- # PEP 247 "digest" method
- def digest(self):
- """Return the hash value of this object as a binary string"""
- retval = SHA256.new(self._h.digest()).digest()
- assert len(retval) == 32
- return retval
-
- # PEP 247 "hexdigest" method
- def hexdigest(self):
- """Return the hash value of this object as a (lowercase) hexadecimal string"""
- retval = b2a_hex(self.digest())
- assert len(retval) == 64
- return retval
-
- # PEP 247 "update" method
- def update(self, data):
- self._h.update(data)
-
-# PEP 247 module-level "digest_size" variable
-digest_size = _SHAd256.digest_size
-
-# PEP 247 module-level "new" function
-def new(data=""):
- """Return a new SHAd256 hashing object"""
- return _SHAd256(_SHAd256._internal, SHA256.new(data))
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-#
-# Random/OSRNG/__init__.py : Platform-independent OS RNG API
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Provides a platform-independent interface to the random number generators
-supplied by various operating systems."""
-
-__revision__ = "$Id$"
-
-import os
-
-if os.name == 'posix':
- from Crypto.Random.OSRNG.posix import new
-elif os.name == 'nt':
- from Crypto.Random.OSRNG.nt import new
-elif hasattr(os, 'urandom'):
- from Crypto.Random.OSRNG.fallback import new
-else:
- raise ImportError("Not implemented")
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-#
-# Random/OSRNG/fallback.py : Fallback entropy source for systems with os.urandom
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-
-__revision__ = "$Id$"
-__all__ = ['PythonOSURandomRNG']
-
-import os
-
-from rng_base import BaseRNG
-
-class PythonOSURandomRNG(BaseRNG):
-
- name = "<os.urandom>"
-
- def __init__(self):
- self._read = os.urandom
- BaseRNG.__init__(self)
-
- def _close(self):
- self._read = None
-
-def new(*args, **kwargs):
- return PythonOSURandomRNG(*args, **kwargs)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-#
-# Random/OSRNG/nt.py : OS entropy source for MS Windows
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-
-__revision__ = "$Id$"
-__all__ = ['WindowsRNG']
-
-import winrandom
-from rng_base import BaseRNG
-
-class WindowsRNG(BaseRNG):
-
- name = "<CryptGenRandom>"
-
- def __init__(self):
- self.__winrand = winrandom.new()
- BaseRNG.__init__(self)
-
- def flush(self):
- """Work around weakness in Windows RNG.
-
- The CryptGenRandom mechanism in some versions of Windows allows an
- attacker to learn 128 KiB of past and future output. As a workaround,
- this function reads 128 KiB of 'random' data from Windows and discards
- it.
-
- For more information about the weaknesses in CryptGenRandom, see
- _Cryptanalysis of the Random Number Generator of the Windows Operating
- System_, by Leo Dorrendorf and Zvi Gutterman and Benny Pinkas
- http://eprint.iacr.org/2007/419
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
- data = self.__winrand.get_bytes(128*1024)
- assert (len(data) == 128*1024)
- BaseRNG.flush(self)
-
- def _close(self):
- self.__winrand = None
-
- def _read(self, N):
- # Unfortunately, research shows that CryptGenRandom doesn't provide
- # forward secrecy and fails the next-bit test unless we apply a
- # workaround, which we do here. See http://eprint.iacr.org/2007/419
- # for information on the vulnerability.
- self.flush()
- data = self.__winrand.get_bytes(N)
- self.flush()
- return data
-
-def new(*args, **kwargs):
- return WindowsRNG(*args, **kwargs)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-#
-# Random/OSRNG/rng_base.py : Base class for OSRNG
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-__revision__ = "$Id$"
-
-from Crypto.Util.python_compat import *
-
-class BaseRNG(object):
-
- def __init__(self):
- self.closed = False
- self._selftest()
-
- def __del__(self):
- self.close()
-
- def _selftest(self):
- # Test that urandom can return data
- data = self.read(16)
- if len(data) != 16:
- raise AssertionError("read truncated")
-
- # Test that we get different data every time (if we don't, the RNG is
- # probably malfunctioning)
- data2 = self.read(16)
- if data == data2:
- raise AssertionError("OS RNG returned duplicate data")
-
- # PEP 343: Support for the "with" statement
- def __enter__(self):
- pass
- def __exit__(self):
- """PEP 343 support"""
- self.close()
-
- def close(self):
- if not self.closed:
- self._close()
- self.closed = True
-
- def flush(self):
- pass
-
- def read(self, N=-1):
- """Return N bytes from the RNG."""
- if self.closed:
- raise ValueError("I/O operation on closed file")
- if not isinstance(N, (long, int)):
- raise TypeError("an integer is required")
- if N < 0:
- raise ValueError("cannot read to end of infinite stream")
- elif N == 0:
- return ""
- data = self._read(N)
- if len(data) != N:
- raise AssertionError("%s produced truncated output (requested %d, got %d)" % (self.name, N, len(data)))
- return data
-
- def _close(self):
- raise NotImplementedError("child class must implement this")
-
- def _read(self, N):
- raise NotImplementedError("child class must implement this")
-
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# Random/_UserFriendlyRNG.py : A user-friendly random number generator
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-__revision__ = "$Id$"
-
-from Crypto.Util.python_compat import *
-
-import os
-import threading
-import struct
-import time
-from math import floor
-
-from Crypto.Random import OSRNG
-from Crypto.Random.Fortuna import FortunaAccumulator
-
-class _EntropySource(object):
- def __init__(self, accumulator, src_num):
- self._fortuna = accumulator
- self._src_num = src_num
- self._pool_num = 0
-
- def feed(self, data):
- self._fortuna.add_random_event(self._src_num, self._pool_num, data)
- self._pool_num = (self._pool_num + 1) & 31
-
-class _EntropyCollector(object):
-
- def __init__(self, accumulator):
- self._osrng = OSRNG.new()
- self._osrng_es = _EntropySource(accumulator, 255)
- self._time_es = _EntropySource(accumulator, 254)
- self._clock_es = _EntropySource(accumulator, 253)
-
- def reinit(self):
- # Add 256 bits to each of the 32 pools, twice. (For a total of 16384
- # bits collected from the operating system.)
- for i in range(2):
- block = self._osrng.read(32*32)
- for p in range(32):
- self._osrng_es.feed(block[p*32:(p+1)*32])
- block = None
- self._osrng.flush()
-
- def collect(self):
- # Collect 64 bits of entropy from the operating system and feed it to Fortuna.
- self._osrng_es.feed(self._osrng.read(8))
-
- # Add the fractional part of time.time()
- t = time.time()
- self._time_es.feed(struct.pack("@I", int(2**30 * (t - floor(t)))))
-
- # Add the fractional part of time.clock()
- t = time.clock()
- self._clock_es.feed(struct.pack("@I", int(2**30 * (t - floor(t)))))
-
-
-class _UserFriendlyRNG(object):
-
- def __init__(self):
- self.closed = False
- self._fa = FortunaAccumulator.FortunaAccumulator()
- self._ec = _EntropyCollector(self._fa)
- self.reinit()
-
- def reinit(self):
- """Initialize the random number generator and seed it with entropy from
- the operating system.
- """
- self._pid = os.getpid()
- self._ec.reinit()
-
- def close(self):
- self.closed = True
- self._osrng = None
- self._fa = None
-
- def flush(self):
- pass
-
- def read(self, N):
- """Return N bytes from the RNG."""
- if self.closed:
- raise ValueError("I/O operation on closed file")
- if not isinstance(N, (long, int)):
- raise TypeError("an integer is required")
- if N < 0:
- raise ValueError("cannot read to end of infinite stream")
-
- # Collect some entropy and feed it to Fortuna
- self._ec.collect()
-
- # Ask Fortuna to generate some bytes
- retval = self._fa.random_data(N)
-
- # Check that we haven't forked in the meantime. (If we have, we don't
- # want to use the data, because it might have been duplicated in the
- # parent process.
- self._check_pid()
-
- # Return the random data.
- return retval
-
- def _check_pid(self):
- # Lame fork detection to remind developers to invoke Random.atfork()
- # after every call to os.fork(). Note that this check is not reliable,
- # since process IDs can be reused on most operating systems.
- #
- # You need to do Random.atfork() in the child process after every call
- # to os.fork() to avoid reusing PRNG state. If you want to avoid
- # leaking PRNG state to child processes (for example, if you are using
- # os.setuid()) then you should also invoke Random.atfork() in the
- # *parent* process.
- if os.getpid() != self._pid:
- raise AssertionError("PID check failed. RNG must be re-initialized after fork(). Hint: Try Random.atfork()")
-
-
-class _LockingUserFriendlyRNG(_UserFriendlyRNG):
- def __init__(self):
- self._lock = threading.Lock()
- _UserFriendlyRNG.__init__(self)
-
- def close(self):
- self._lock.acquire()
- try:
- return _UserFriendlyRNG.close(self)
- finally:
- self._lock.release()
-
- def reinit(self):
- self._lock.acquire()
- try:
- return _UserFriendlyRNG.reinit(self)
- finally:
- self._lock.release()
-
- def read(self, bytes):
- self._lock.acquire()
- try:
- return _UserFriendlyRNG.read(self, bytes)
- finally:
- self._lock.release()
-
-class RNGFile(object):
- def __init__(self, singleton):
- self.closed = False
- self._singleton = singleton
-
- # PEP 343: Support for the "with" statement
- def __enter__(self):
- """PEP 343 support"""
- def __exit__(self):
- """PEP 343 support"""
- self.close()
-
- def close(self):
- # Don't actually close the singleton, just close this RNGFile instance.
- self.closed = True
- self._singleton = None
-
- def read(self, bytes):
- if self.closed:
- raise ValueError("I/O operation on closed file")
- return self._singleton.read(bytes)
-
- def flush(self):
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
-_singleton_lock = threading.Lock()
-_singleton = None
-def _get_singleton():
- global _singleton
- _singleton_lock.acquire()
- try:
- if _singleton is None:
- _singleton = _LockingUserFriendlyRNG()
- return _singleton
- finally:
- _singleton_lock.release()
-
-def new():
- return RNGFile(_get_singleton())
-
-def reinit():
- _get_singleton().reinit()
-
-def get_random_bytes(n):
- """Return the specified number of cryptographically-strong random bytes."""
- return _get_singleton().read(n)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# Random/__init__.py : PyCrypto random number generation
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-__revision__ = "$Id$"
-__all__ = ['new']
-
-import OSRNG
-import _UserFriendlyRNG
-
-def new(*args, **kwargs):
- """Return a file-like object that outputs cryptographically random bytes."""
- return _UserFriendlyRNG.new(*args, **kwargs)
-
-def atfork():
- """Call this whenever you call os.fork()"""
- _UserFriendlyRNG.reinit()
-
-def get_random_bytes(n):
- """Return the specified number of cryptographically-strong random bytes."""
- return _UserFriendlyRNG.get_random_bytes(n)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# Random/random.py : Strong alternative for the standard 'random' module
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""A cryptographically strong version of Python's standard "random" module."""
-
-__revision__ = "$Id$"
-__all__ = ['StrongRandom', 'getrandbits', 'randrange', 'randint', 'choice', 'shuffle', 'sample']
-
-from Crypto import Random
-
-from Crypto.Util.python_compat import *
-
-class StrongRandom(object):
- def __init__(self, rng=None, randfunc=None):
- if randfunc is None and rng is None:
- self._randfunc = None
- elif randfunc is not None and rng is None:
- self._randfunc = randfunc
- elif randfunc is None and rng is not None:
- self._randfunc = rng.read
- else:
- raise ValueError("Cannot specify both 'rng' and 'randfunc'")
-
- def getrandbits(self, k):
- """Return a python long integer with k random bits."""
- if self._randfunc is None:
- self._randfunc = Random.new().read
- mask = (1L << k) - 1
- return mask & bytes_to_long(self._randfunc(ceil_div(k, 8)))
-
- def randrange(self, *args):
- """randrange([start,] stop[, step]):
- Return a randomly-selected element from range(start, stop, step)."""
- if len(args) == 3:
- (start, stop, step) = args
- elif len(args) == 2:
- (start, stop) = args
- step = 1
- elif len(args) == 1:
- (stop,) = args
- start = 0
- step = 1
- else:
- raise TypeError("randrange expected at most 3 arguments, got %d" % (len(args),))
- if (not isinstance(start, (int, long))
- or not isinstance(stop, (int, long))
- or not isinstance(step, (int, long))):
- raise TypeError("randrange requires integer arguments")
- if step == 0:
- raise ValueError("randrange step argument must not be zero")
-
- num_choices = ceil_div(stop - start, step)
- if num_choices < 0:
- num_choices = 0
- if num_choices < 1:
- raise ValueError("empty range for randrange(%r, %r, %r)" % (start, stop, step))
-
- # Pick a random number in the range of possible numbers
- r = num_choices
- while r >= num_choices:
- r = self.getrandbits(size(num_choices))
-
- return start + (step * r)
-
- def randint(self, a, b):
- """Return a random integer N such that a <= N <= b."""
- if not isinstance(a, (int, long)) or not isinstance(b, (int, long)):
- raise TypeError("randint requires integer arguments")
- N = self.randrange(a, b+1)
- assert a <= N <= b
- return N
-
- def choice(self, seq):
- """Return a random element from a (non-empty) sequence.
-
- If the seqence is empty, raises IndexError.
- """
- if len(seq) == 0:
- raise IndexError("empty sequence")
- return seq[self.randrange(len(seq))]
-
- def shuffle(self, x):
- """Shuffle the sequence in place."""
- # Make a (copy) of the list of objects we want to shuffle
- items = list(x)
-
- # Choose a random item (without replacement) until all the items have been
- # chosen.
- for i in xrange(len(x)):
- p = self.randint(len(items))
- x[i] = items[p]
- del items[p]
-
- def sample(self, population, k):
- """Return a k-length list of unique elements chosen from the population sequence."""
-
- num_choices = len(population)
- if k > num_choices:
- raise ValueError("sample larger than population")
-
- retval = []
- selected = {} # we emulate a set using a dict here
- for i in xrange(k):
- r = None
- while r is None or r in selected:
- r = self.randrange(num_choices)
- retval.append(population[r])
- selected[r] = 1
- return retval
-
-_r = StrongRandom()
-getrandbits = _r.getrandbits
-randrange = _r.randrange
-randint = _r.randint
-choice = _r.choice
-shuffle = _r.shuffle
-sample = _r.sample
-
-# These are at the bottom to avoid problems with recursive imports
-from Crypto.Util.number import ceil_div, bytes_to_long, long_to_bytes, size
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# Util/Counter.py : Fast counter for use with CTR-mode ciphers
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-from Crypto.Util.python_compat import *
-
-from Crypto.Util import _counter
-import struct
-
-# Factory function
-def new(nbits, prefix="", suffix="", initial_value=1, overflow=0, little_endian=False, allow_wraparound=False, disable_shortcut=False):
- # TODO: Document this
-
- # Sanity-check the message size
- (nbytes, remainder) = divmod(nbits, 8)
- if remainder != 0:
- # In the future, we might support arbitrary bit lengths, but for now we don't.
- raise ValueError("nbits must be a multiple of 8; got %d" % (nbits,))
- if nbytes < 1:
- raise ValueError("nbits too small")
- elif nbytes > 0xffff:
- raise ValueError("nbits too large")
-
- initval = _encode(initial_value, nbytes, little_endian)
- if little_endian:
- return _counter._newLE(str(prefix), str(suffix), initval, allow_wraparound=allow_wraparound, disable_shortcut=disable_shortcut)
- else:
- return _counter._newBE(str(prefix), str(suffix), initval, allow_wraparound=allow_wraparound, disable_shortcut=disable_shortcut)
-
-def _encode(n, nbytes, little_endian=False):
- retval = []
- n = long(n)
- for i in range(nbytes):
- if little_endian:
- retval.append(chr(n & 0xff))
- else:
- retval.insert(0, chr(n & 0xff))
- n >>= 8
- return "".join(retval)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Miscellaneous modules
-
-Contains useful modules that don't belong into any of the
-other Crypto.* subpackages.
-
-Crypto.Util.number Number-theoretic functions (primality testing, etc.)
-Crypto.Util.randpool Random number generation
-Crypto.Util.RFC1751 Converts between 128-bit keys and human-readable
- strings of words.
-
-"""
-
-__all__ = ['randpool', 'RFC1751', 'number', 'strxor']
-
-__revision__ = "$Id$"
-
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# Util/_number_new.py : utility functions
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-## NOTE: Do not import this module directly. Import these functions from Crypto.Util.number.
-
-__revision__ = "$Id$"
-__all__ = ['ceil_shift', 'ceil_div', 'floor_div', 'exact_log2', 'exact_div']
-
-from Crypto.Util.python_compat import *
-
-def ceil_shift(n, b):
- """Return ceil(n / 2**b) without performing any floating-point or division operations.
-
- This is done by right-shifting n by b bits and incrementing the result by 1
- if any '1' bits were shifted out.
- """
- if not isinstance(n, (int, long)) or not isinstance(b, (int, long)):
- raise TypeError("unsupported operand type(s): %r and %r" % (type(n).__name__, type(b).__name__))
-
- assert n >= 0 and b >= 0 # I haven't tested or even thought about negative values
- mask = (1L << b) - 1
- if n & mask:
- return (n >> b) + 1
- else:
- return n >> b
-
-def ceil_div(a, b):
- """Return ceil(a / b) without performing any floating-point operations."""
-
- if not isinstance(a, (int, long)) or not isinstance(b, (int, long)):
- raise TypeError("unsupported operand type(s): %r and %r" % (type(a).__name__, type(b).__name__))
-
- (q, r) = divmod(a, b)
- if r:
- return q + 1
- else:
- return q
-
-def floor_div(a, b):
- if not isinstance(a, (int, long)) or not isinstance(b, (int, long)):
- raise TypeError("unsupported operand type(s): %r and %r" % (type(a).__name__, type(b).__name__))
-
- (q, r) = divmod(a, b)
- return q
-
-def exact_log2(num):
- """Find and return an integer i >= 0 such that num == 2**i.
-
- If no such integer exists, this function raises ValueError.
- """
-
- if not isinstance(num, (int, long)):
- raise TypeError("unsupported operand type: %r" % (type(num).__name__,))
-
- n = long(num)
- if n <= 0:
- raise ValueError("cannot compute logarithm of non-positive number")
-
- i = 0
- while n != 0:
- if (n & 1) and n != 1:
- raise ValueError("No solution could be found")
- i += 1
- n >>= 1
- i -= 1
-
- assert num == (1L << i)
- return i
-
-def exact_div(p, d, allow_divzero=False):
- """Find and return an integer n such that p == n * d
-
- If no such integer exists, this function raises ValueError.
-
- Both operands must be integers.
-
- If the second operand is zero, this function will raise ZeroDivisionError
- unless allow_divzero is true (default: False).
- """
-
- if not isinstance(p, (int, long)) or not isinstance(d, (int, long)):
- raise TypeError("unsupported operand type(s): %r and %r" % (type(p).__name__, type(d).__name__))
-
- if d == 0 and allow_divzero:
- n = 0
- if p != n * d:
- raise ValueError("No solution could be found")
- else:
- (n, r) = divmod(p, d)
- if r != 0:
- raise ValueError("No solution could be found")
-
- assert p == n * d
- return n
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-#
-# number.py : Number-theoretic functions
-#
-# Part of the Python Cryptography Toolkit
-#
-# Written by Andrew M. Kuchling, Barry A. Warsaw, and others
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-#
-
-__revision__ = "$Id$"
-
-bignum = long
-try:
- from Crypto.PublicKey import _fastmath
-except ImportError:
- _fastmath = None
-
-# New functions
-from _number_new import *
-
-# Commented out and replaced with faster versions below
-## def long2str(n):
-## s=''
-## while n>0:
-## s=chr(n & 255)+s
-## n=n>>8
-## return s
-
-## import types
-## def str2long(s):
-## if type(s)!=types.StringType: return s # Integers will be left alone
-## return reduce(lambda x,y : x*256+ord(y), s, 0L)
-
-def size (N):
- """size(N:long) : int
- Returns the size of the number N in bits.
- """
- bits, power = 0,1L
- while N >= power:
- bits += 1
- power = power << 1
- return bits
-
-def getRandomNumber(N, randfunc=None):
- """getRandomNumber(N:int, randfunc:callable):long
- Return a random N-bit number.
-
- If randfunc is omitted, then Random.new().read is used.
-
- NOTE: Confusingly, this function does NOT return N random bits; It returns
- a random N-bit number, i.e. a random number between 2**(N-1) and (2**N)-1.
-
- This function is for internal use only and may be renamed or removed in
- the future.
- """
- if randfunc is None:
- _import_Random()
- randfunc = Random.new().read
-
- S = randfunc(N/8)
- odd_bits = N % 8
- if odd_bits != 0:
- char = ord(randfunc(1)) >> (8-odd_bits)
- S = chr(char) + S
- value = bytes_to_long(S)
- value |= 2L ** (N-1) # Ensure high bit is set
- assert size(value) >= N
- return value
-
-def GCD(x,y):
- """GCD(x:long, y:long): long
- Return the GCD of x and y.
- """
- x = abs(x) ; y = abs(y)
- while x > 0:
- x, y = y % x, x
- return y
-
-def inverse(u, v):
- """inverse(u:long, u:long):long
- Return the inverse of u mod v.
- """
- u3, v3 = long(u), long(v)
- u1, v1 = 1L, 0L
- while v3 > 0:
- q=u3 / v3
- u1, v1 = v1, u1 - v1*q
- u3, v3 = v3, u3 - v3*q
- while u1<0:
- u1 = u1 + v
- return u1
-
-# Given a number of bits to generate and a random generation function,
-# find a prime number of the appropriate size.
-
-def getPrime(N, randfunc=None):
- """getPrime(N:int, randfunc:callable):long
- Return a random N-bit prime number.
-
- If randfunc is omitted, then Random.new().read is used.
- """
- if randfunc is None:
- _import_Random()
- randfunc = Random.new().read
-
- number=getRandomNumber(N, randfunc) | 1
- while (not isPrime(number, randfunc=randfunc)):
- number=number+2
- return number
-
-def isPrime(N, randfunc=None):
- """isPrime(N:long, randfunc:callable):bool
- Return true if N is prime.
-
- If randfunc is omitted, then Random.new().read is used.
- """
- _import_Random()
- if randfunc is None:
- randfunc = Random.new().read
-
- randint = StrongRandom(randfunc=randfunc).randint
-
- if N == 1:
- return 0
- if N in sieve:
- return 1
- for i in sieve:
- if (N % i)==0:
- return 0
-
- # Use the accelerator if available
- if _fastmath is not None:
- return _fastmath.isPrime(N)
-
- # Compute the highest bit that's set in N
- N1 = N - 1L
- n = 1L
- while (n<N):
- n=n<<1L
- n = n >> 1L
-
- # Rabin-Miller test
- for c in sieve[:7]:
- a=long(c) ; d=1L ; t=n
- while (t): # Iterate over the bits in N1
- x=(d*d) % N
- if x==1L and d!=1L and d!=N1:
- return 0 # Square root of 1 found
- if N1 & t:
- d=(x*a) % N
- else:
- d=x
- t = t >> 1L
- if d!=1L:
- return 0
- return 1
-
-# Small primes used for checking primality; these are all the primes
-# less than 256. This should be enough to eliminate most of the odd
-# numbers before needing to do a Rabin-Miller test at all.
-
-sieve=[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
- 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
- 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
- 197, 199, 211, 223, 227, 229, 233, 239, 241, 251]
-
-# Improved conversion functions contributed by Barry Warsaw, after
-# careful benchmarking
-
-import struct
-
-def long_to_bytes(n, blocksize=0):
- """long_to_bytes(n:long, blocksize:int) : string
- Convert a long integer to a byte string.
-
- If optional blocksize is given and greater than zero, pad the front of the
- byte string with binary zeros so that the length is a multiple of
- blocksize.
- """
- # after much testing, this algorithm was deemed to be the fastest
- s = ''
- n = long(n)
- pack = struct.pack
- while n > 0:
- s = pack('>I', n & 0xffffffffL) + s
- n = n >> 32
- # strip off leading zeros
- for i in range(len(s)):
- if s[i] != '\000':
- break
- else:
- # only happens when n == 0
- s = '\000'
- i = 0
- s = s[i:]
- # add back some pad bytes. this could be done more efficiently w.r.t. the
- # de-padding being done above, but sigh...
- if blocksize > 0 and len(s) % blocksize:
- s = (blocksize - len(s) % blocksize) * '\000' + s
- return s
-
-def bytes_to_long(s):
- """bytes_to_long(string) : long
- Convert a byte string to a long integer.
-
- This is (essentially) the inverse of long_to_bytes().
- """
- acc = 0L
- unpack = struct.unpack
- length = len(s)
- if length % 4:
- extra = (4 - length % 4)
- s = '\000' * extra + s
- length = length + extra
- for i in range(0, length, 4):
- acc = (acc << 32) + unpack('>I', s[i:i+4])[0]
- return acc
-
-# For backwards compatibility...
-import warnings
-def long2str(n, blocksize=0):
- warnings.warn("long2str() has been replaced by long_to_bytes()")
- return long_to_bytes(n, blocksize)
-def str2long(s):
- warnings.warn("str2long() has been replaced by bytes_to_long()")
- return bytes_to_long(s)
-
-def _import_Random():
- # This is called in a function instead of at the module level in order to avoid problems with recursive imports
- global Random, StrongRandom
- from Crypto import Random
- from Crypto.Random.random import StrongRandom
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# Util/python_compat.py : Compatibility code for old versions of Python
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Compatibility code for old versions of Python
-
-Currently, this just defines:
- - True and False
- - object
- - isinstance
-"""
-
-__revision__ = "$Id$"
-__all__ = []
-
-import sys
-import __builtin__
-
-# 'True' and 'False' aren't defined in Python 2.1. Define them.
-try:
- True, False
-except NameError:
- (True, False) = (1, 0)
- __all__ += ['True', 'False']
-
-# New-style classes were introduced in Python 2.2. Defining "object" in Python
-# 2.1 lets us use new-style classes in versions of Python that support them,
-# while still maintaining backward compatibility with old-style classes
-try:
- object
-except NameError:
- class object: pass
- __all__ += ['object']
-
-# Starting with Python 2.2, isinstance allows a tuple for the second argument.
-# Also, builtins like "tuple", "list", "str", "unicode", "int", and "long"
-# became first-class types, rather than functions. We want to support
-# constructs like:
-# isinstance(x, (int, long))
-# So we hack it for Python 2.1.
-try:
- isinstance(5, (int, long))
-except TypeError:
- __all__ += ['isinstance']
- _builtin_type_map = {
- tuple: type(()),
- list: type([]),
- str: type(""),
- unicode: type(u""),
- int: type(0),
- long: type(0L),
- }
- def isinstance(obj, t):
- if not __builtin__.isinstance(t, type(())):
- # t is not a tuple
- return __builtin__.isinstance(obj, _builtin_type_map.get(t, t))
- else:
- # t is a tuple
- for typ in t:
- if __builtin__.isinstance(obj, _builtin_type_map.get(typ, typ)):
- return True
- return False
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-"""Python Cryptography Toolkit
-
-A collection of cryptographic modules implementing various algorithms
-and protocols.
-
-Subpackages:
-Crypto.Cipher Secret-key encryption algorithms (AES, DES, ARC4)
-Crypto.Hash Hashing algorithms (MD5, SHA, HMAC)
-Crypto.Protocol Cryptographic protocols (Chaffing, all-or-nothing
- transform). This package does not contain any
- network protocols.
-Crypto.PublicKey Public-key encryption and signature algorithms
- (RSA, DSA)
-Crypto.Util Various useful modules and functions (long-to-string
- conversion, random number generation, number
- theoretic functions)
-"""
-
-__all__ = ['Cipher', 'Hash', 'Protocol', 'PublicKey', 'Util']
-
-__version__ = '2.3' # See also below and setup.py
-__revision__ = "$Id$"
-
-# New software should look at this instead of at __version__ above.
-version_info = (2, 1, 0, 'final', 0) # See also above and setup.py
-
+++ /dev/null
-# -*- coding: ascii -*-
-#
-# pct_warnings.py : PyCrypto warnings file
-#
-# Written in 2008 by Dwayne C. Litzenberger <dlitz@dlitz.net>
-#
-# ===================================================================
-# The contents of this file are dedicated to the public domain. To
-# the extent that dedication to the public domain is not available,
-# everyone is granted a worldwide, perpetual, royalty-free,
-# non-exclusive license to exercise all rights associated with the
-# contents of this file for any purpose whatsoever.
-# No rights are reserved.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ===================================================================
-
-#
-# Base classes. All our warnings inherit from one of these in order to allow
-# the user to specifically filter them.
-#
-
-class CryptoWarning(Warning):
- """Base class for PyCrypto warnings"""
-
-class CryptoDeprecationWarning(DeprecationWarning, CryptoWarning):
- """Base PyCrypto DeprecationWarning class"""
-
-class CryptoRuntimeWarning(RuntimeWarning, CryptoWarning):
- """Base PyCrypto RuntimeWarning class"""
-
-#
-# Warnings that we might actually use
-#
-
-class RandomPool_DeprecationWarning(CryptoDeprecationWarning):
- """Issued when Crypto.Util.randpool.RandomPool is instantiated."""
-
-class ClockRewindWarning(CryptoRuntimeWarning):
- """Warning for when the system clock moves backwards."""
-
-class GetRandomNumber_DeprecationWarning(CryptoDeprecationWarning):
- """Issued when Crypto.Util.number.getRandomNumber is invoked."""
-
-# By default, we want this warning to be shown every time we compensate for
-# clock rewinding.
-import warnings as _warnings
-_warnings.filterwarnings('always', category=ClockRewindWarning, append=1)
-
-# vim:set ts=4 sw=4 sts=4 expandtab:
--- /dev/null
+#!/usr/bin/env python
+
+"""
+Retrieve Adobe ADEPT user key.
+"""
+
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+
+import sys
+import os
+import struct
+from calibre.constants import iswindows, isosx
+
+class ADEPTError(Exception):
+ pass
+
+if iswindows:
+ from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast, c_size_t, memmove, CDLL, c_int, \
+ c_long, c_ulong
+
+ from ctypes.wintypes import LPVOID, DWORD, BOOL
+ import _winreg as winreg
+
+ def _load_crypto_libcrypto():
+ from ctypes.util import find_library
+ libcrypto = find_library('libeay32')
+ if libcrypto is None:
+ raise ADEPTError('libcrypto not found')
+ libcrypto = CDLL(libcrypto)
+ AES_MAXNR = 14
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))),
+ ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
+ [c_char_p, c_int, AES_KEY_p])
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
+ [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
+ c_int])
+ class AES(object):
+ def __init__(self, userkey):
+ self._blocksize = len(userkey)
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise ADEPTError('AES improper key used')
+ key = self._key = AES_KEY()
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
+ if rv < 0:
+ raise ADEPTError('Failed to initialize AES key')
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ iv = ("\x00" * self._blocksize)
+ rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
+ if rv == 0:
+ raise ADEPTError('AES decryption failed')
+ return out.raw
+ return AES
+
+ def _load_crypto_pycrypto():
+ from Crypto.Cipher import AES as _AES
+ class AES(object):
+ def __init__(self, key):
+ self._aes = _AES.new(key, _AES.MODE_CBC)
+ def decrypt(self, data):
+ return self._aes.decrypt(data)
+ return AES
+
+ def _load_crypto():
+ AES = None
+ for loader in (_load_crypto_libcrypto, _load_crypto_pycrypto):
+ try:
+ AES = loader()
+ break
+ except (ImportError, ADEPTError):
+ pass
+ return AES
+
+ AES = _load_crypto()
+
+
+ DEVICE_KEY_PATH = r'Software\Adobe\Adept\Device'
+ PRIVATE_LICENCE_KEY_PATH = r'Software\Adobe\Adept\Activation'
+
+ MAX_PATH = 255
+
+ kernel32 = windll.kernel32
+ advapi32 = windll.advapi32
+ crypt32 = windll.crypt32
+
+ def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+ GetSystemDirectory = GetSystemDirectory()
+
+ def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path):
+ vsn = c_uint(0)
+ GetVolumeInformationW(
+ path, None, 0, byref(vsn), None, None, None, 0)
+ return vsn.value
+ return GetVolumeSerialNumber
+ GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+ def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(32)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+ GetUserName = GetUserName()
+
+ PAGE_EXECUTE_READWRITE = 0x40
+ MEM_COMMIT = 0x1000
+ MEM_RESERVE = 0x2000
+
+ def VirtualAlloc():
+ _VirtualAlloc = kernel32.VirtualAlloc
+ _VirtualAlloc.argtypes = [LPVOID, c_size_t, DWORD, DWORD]
+ _VirtualAlloc.restype = LPVOID
+ def VirtualAlloc(addr, size, alloctype=(MEM_COMMIT | MEM_RESERVE),
+ protect=PAGE_EXECUTE_READWRITE):
+ return _VirtualAlloc(addr, size, alloctype, protect)
+ return VirtualAlloc
+ VirtualAlloc = VirtualAlloc()
+
+ MEM_RELEASE = 0x8000
+
+ def VirtualFree():
+ _VirtualFree = kernel32.VirtualFree
+ _VirtualFree.argtypes = [LPVOID, c_size_t, DWORD]
+ _VirtualFree.restype = BOOL
+ def VirtualFree(addr, size=0, freetype=MEM_RELEASE):
+ return _VirtualFree(addr, size, freetype)
+ return VirtualFree
+ VirtualFree = VirtualFree()
+
+ class NativeFunction(object):
+ def __init__(self, restype, argtypes, insns):
+ self._buf = buf = VirtualAlloc(None, len(insns))
+ memmove(buf, insns, len(insns))
+ ftype = CFUNCTYPE(restype, *argtypes)
+ self._native = ftype(buf)
+
+ def __call__(self, *args):
+ return self._native(*args)
+
+ def __del__(self):
+ if self._buf is not None:
+ VirtualFree(self._buf)
+ self._buf = None
+
+ if struct.calcsize("P") == 4:
+ CPUID0_INSNS = (
+ "\x53" # push %ebx
+ "\x31\xc0" # xor %eax,%eax
+ "\x0f\xa2" # cpuid
+ "\x8b\x44\x24\x08" # mov 0x8(%esp),%eax
+ "\x89\x18" # mov %ebx,0x0(%eax)
+ "\x89\x50\x04" # mov %edx,0x4(%eax)
+ "\x89\x48\x08" # mov %ecx,0x8(%eax)
+ "\x5b" # pop %ebx
+ "\xc3" # ret
+ )
+ CPUID1_INSNS = (
+ "\x53" # push %ebx
+ "\x31\xc0" # xor %eax,%eax
+ "\x40" # inc %eax
+ "\x0f\xa2" # cpuid
+ "\x5b" # pop %ebx
+ "\xc3" # ret
+ )
+ else:
+ CPUID0_INSNS = (
+ "\x49\x89\xd8" # mov %rbx,%r8
+ "\x49\x89\xc9" # mov %rcx,%r9
+ "\x48\x31\xc0" # xor %rax,%rax
+ "\x0f\xa2" # cpuid
+ "\x4c\x89\xc8" # mov %r9,%rax
+ "\x89\x18" # mov %ebx,0x0(%rax)
+ "\x89\x50\x04" # mov %edx,0x4(%rax)
+ "\x89\x48\x08" # mov %ecx,0x8(%rax)
+ "\x4c\x89\xc3" # mov %r8,%rbx
+ "\xc3" # retq
+ )
+ CPUID1_INSNS = (
+ "\x53" # push %rbx
+ "\x48\x31\xc0" # xor %rax,%rax
+ "\x48\xff\xc0" # inc %rax
+ "\x0f\xa2" # cpuid
+ "\x5b" # pop %rbx
+ "\xc3" # retq
+ )
+
+ def cpuid0():
+ _cpuid0 = NativeFunction(None, [c_char_p], CPUID0_INSNS)
+ buf = create_string_buffer(12)
+ def cpuid0():
+ _cpuid0(buf)
+ return buf.raw
+ return cpuid0
+ cpuid0 = cpuid0()
+
+ cpuid1 = NativeFunction(c_uint, [], CPUID1_INSNS)
+
+ class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+ DataBlob_p = POINTER(DataBlob)
+
+ def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, 0, byref(outdata)):
+ raise ADEPTError("Failed to decrypt user key key (sic)")
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+ CryptUnprotectData = CryptUnprotectData()
+
+ def retrieve_key():
+ if AES is None:
+ tkMessageBox.showerror(
+ "ADEPT Key",
+ "This script requires PyCrypto or OpenSSL which must be installed "
+ "separately. Read the top-of-script comment for details.")
+ return False
+ root = GetSystemDirectory().split('\\')[0] + '\\'
+ serial = GetVolumeSerialNumber(root)
+ vendor = cpuid0()
+ signature = struct.pack('>I', cpuid1())[1:]
+ user = GetUserName()
+ entropy = struct.pack('>I12s3s13s', serial, vendor, signature, user)
+ cuser = winreg.HKEY_CURRENT_USER
+ try:
+ regkey = winreg.OpenKey(cuser, DEVICE_KEY_PATH)
+ except WindowsError:
+ raise ADEPTError("Adobe Digital Editions not activated")
+ device = winreg.QueryValueEx(regkey, 'key')[0]
+ keykey = CryptUnprotectData(device, entropy)
+ userkey = None
+ try:
+ plkroot = winreg.OpenKey(cuser, PRIVATE_LICENCE_KEY_PATH)
+ except WindowsError:
+ raise ADEPTError("Could not locate ADE activation")
+ for i in xrange(0, 16):
+ try:
+ plkparent = winreg.OpenKey(plkroot, "%04d" % (i,))
+ except WindowsError:
+ break
+ ktype = winreg.QueryValueEx(plkparent, None)[0]
+ if ktype != 'credentials':
+ continue
+ for j in xrange(0, 16):
+ try:
+ plkkey = winreg.OpenKey(plkparent, "%04d" % (j,))
+ except WindowsError:
+ break
+ ktype = winreg.QueryValueEx(plkkey, None)[0]
+ if ktype != 'privateLicenseKey':
+ continue
+ userkey = winreg.QueryValueEx(plkkey, 'value')[0]
+ break
+ if userkey is not None:
+ break
+ if userkey is None:
+ raise ADEPTError('Could not locate privateLicenseKey')
+ userkey = userkey.decode('base64')
+ aes = AES(keykey)
+ userkey = aes.decrypt(userkey)
+ userkey = userkey[26:-ord(userkey[-1])]
+ return userkey
+
+else:
+
+ import xml.etree.ElementTree as etree
+ import subprocess
+
+ NSMAP = {'adept': 'http://ns.adobe.com/adept',
+ 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
+
+ def findActivationDat():
+ home = os.getenv('HOME')
+ cmdline = 'find "' + home + '/Library/Application Support/Adobe/Digital Editions" -name "activation.dat"'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p2 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p2.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ for j in xrange(cnt):
+ resline = reslst[j]
+ pp = resline.find('activation.dat')
+ if pp >= 0:
+ ActDatPath = resline
+ break
+ if os.path.exists(ActDatPath):
+ return ActDatPath
+ return None
+
+ def retrieve_key():
+ actpath = findActivationDat()
+ if actpath is None:
+ raise ADEPTError("Could not locate ADE activation")
+ tree = etree.parse(actpath)
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = '//%s/%s' % (adept('credentials'), adept('privateLicenseKey'))
+ userkey = tree.findtext(expr)
+ userkey = userkey.decode('base64')
+ userkey = userkey[26:]
+ return userkey
--- /dev/null
+#! /usr/bin/env python
+
+# ineptpdf_plugin.py
+# Released under the terms of the GNU General Public Licence, version 3 or
+# later. <http://www.gnu.org/licenses/>
+
+# PLEASE DO NOT PIRATE EBOOKS!
+
+# We want all authors and publishers, and eBook stores to live
+# long and prosperous lives but at the same time we just want to
+# be able to read OUR books on whatever device we want and to keep
+# readable for a long, long time
+
+# Requires Calibre version 0.6.44 or higher.
+#
+# All credit given to I <3 Cabbages for the original standalone scripts.
+# I had the much easier job of converting them to a Calibre plugin.
+#
+# This plugin is meant to decrypt Adobe Digital Edition PDFs that are protected
+# with Adobe's Adept encryption. It is meant to function without having to install
+# any dependencies... other than having Calibre installed, of course. It will still
+# work if you have Python and PyCrypto already installed, but they aren't necessary.
+#
+# Configuration:
+# When first run, the plugin will attempt to find your Adobe Digital Editions installation
+# (on Windows and Mac OS's). If successful, it will create a 'calibre-adeptkey.der' file and
+# save it in Calibre's configuration directory. It will use that file on subsequent runs.
+# If there are already '*.der' files in the directory, the plugin won't attempt to
+# find the ADE installation. So if you have ADE installed on the same machine as Calibre...
+# you are ready to go.
+#
+# If you already have keyfiles generated with I <3 Cabbages' ineptkey.pyw script,
+# you can put those keyfiles in Calibre's configuration directory. The easiest
+# way to find the correct directory is to go to Calibre's Preferences page... click
+# on the 'Miscellaneous' button (looks like a gear), and then click the 'Open Calibre
+# configuration directory' button. Paste your keyfiles in there. Just make sure that
+# they have different names and are saved with the '.der' extension (like the ineptkey
+# script produces). This directory isn't touched when upgrading Calibre, so it's quite
+# safe to leave them there.
+#
+# Since there is no Linux version of Adobe Digital Editions, Linux users will have to
+# obtain a keyfile through other methods and put the file in Calibre's configuration directory.
+#
+# All keyfiles with a '.der' extension found in Calibre's configuration directory will
+# be used to attempt to decrypt a book.
+#
+# ** NOTE ** There is no plugin customization data for the Inept PDF DeDRM plugin.
+#
+# Revision history:
+# 0.1 - Initial release
+
+"""
+Decrypts Adobe ADEPT-encrypted PDF files.
+"""
+
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+
+import sys
+import os
+import re
+import zlib
+import struct
+import hashlib
+from itertools import chain, islice
+import xml.etree.ElementTree as etree
+
+global ARC4, RSA, AES
+
+class ADEPTError(Exception):
+ pass
+
+
+import hashlib
+
+def SHA256(message):
+ ctx = hashlib.sha256()
+ ctx.update(message)
+ return ctx.digest()
+
+
+def _load_crypto_libcrypto():
+ from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, cast
+ from ctypes.util import find_library
+
+ if sys.platform.startswith('win'):
+ libcrypto = find_library('libeay32')
+ else:
+ libcrypto = find_library('crypto')
+
+ if libcrypto is None:
+ raise ADEPTError('libcrypto not found')
+ libcrypto = CDLL(libcrypto)
+
+ AES_MAXNR = 14
+
+ RSA_NO_PADDING = 3
+
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
+ class RC4_KEY(Structure):
+ _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)]
+ RC4_KEY_p = POINTER(RC4_KEY)
+
+ class RSA(Structure):
+ pass
+ RSA_p = POINTER(RSA)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
+
+ RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p])
+ RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p])
+
+ d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
+ [RSA_p, c_char_pp, c_long])
+ RSA_size = F(c_int, 'RSA_size', [RSA_p])
+ RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
+ [c_int, c_char_p, c_char_p, RSA_p, c_int])
+ RSA_free = F(None, 'RSA_free', [RSA_p])
+
+ class RSA(object):
+ def __init__(self, der):
+ buf = create_string_buffer(der)
+ pp = c_char_pp(cast(buf, c_char_p))
+ rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
+ if rsa is None:
+ raise ADEPTError('Error parsing ADEPT user key DER')
+
+ def decrypt(self, from_):
+ rsa = self._rsa
+ to = create_string_buffer(RSA_size(rsa))
+ dlen = RSA_private_decrypt(len(from_), from_, to, rsa,
+ RSA_NO_PADDING)
+ if dlen < 0:
+ raise ADEPTError('RSA decryption failed')
+ return to[1:dlen]
+
+ def __del__(self):
+ if self._rsa is not None:
+ RSA_free(self._rsa)
+ self._rsa = None
+
+ class ARC4(object):
+ @classmethod
+ def new(cls, userkey):
+ self = ARC4()
+ self._blocksize = len(userkey)
+ key = self._key = RC4_KEY()
+ RC4_set_key(key, self._blocksize, userkey)
+ return self
+ def __init__(self):
+ self._blocksize = 0
+ self._key = None
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ RC4_crypt(self._key, len(data), data, out)
+ return out.raw
+
+ class AES(object):
+ @classmethod
+ def new(cls, userkey, mode, iv):
+ self = AES()
+ self._blocksize = len(userkey)
+ # mode is ignored since CBCMODE is only thing supported/used so far
+ self._mode = mode
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise ADEPTError('AES improper key used')
+ return
+ keyctx = self._keyctx = AES_KEY()
+ self._iv = iv
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ if rv < 0:
+ raise ADEPTError('Failed to initialize AES key')
+ return self
+ def __init__(self):
+ self._blocksize = 0
+ self._keyctx = None
+ self._iv = 0
+ self._mode = 0
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0)
+ if rv == 0:
+ raise ADEPTError('AES decryption failed')
+ return out.raw
+
+ return (ARC4, RSA, AES)
+
+
+def _load_crypto_pycrypto():
+ from Crypto.PublicKey import RSA as _RSA
+ from Crypto.Cipher import ARC4 as _ARC4
+ from Crypto.Cipher import AES as _AES
+
+ # ASN.1 parsing code from tlslite
+ class ASN1Error(Exception):
+ pass
+
+ class ASN1Parser(object):
+ class Parser(object):
+ def __init__(self, bytes):
+ self.bytes = bytes
+ self.index = 0
+
+ def get(self, length):
+ if self.index + length > len(self.bytes):
+ raise ASN1Error("Error decoding ASN.1")
+ x = 0
+ for count in range(length):
+ x <<= 8
+ x |= self.bytes[self.index]
+ self.index += 1
+ return x
+
+ def getFixBytes(self, lengthBytes):
+ bytes = self.bytes[self.index : self.index+lengthBytes]
+ self.index += lengthBytes
+ return bytes
+
+ def getVarBytes(self, lengthLength):
+ lengthBytes = self.get(lengthLength)
+ return self.getFixBytes(lengthBytes)
+
+ def getFixList(self, length, lengthList):
+ l = [0] * lengthList
+ for x in range(lengthList):
+ l[x] = self.get(length)
+ return l
+
+ def getVarList(self, length, lengthLength):
+ lengthList = self.get(lengthLength)
+ if lengthList % length != 0:
+ raise ASN1Error("Error decoding ASN.1")
+ lengthList = int(lengthList/length)
+ l = [0] * lengthList
+ for x in range(lengthList):
+ l[x] = self.get(length)
+ return l
+
+ def startLengthCheck(self, lengthLength):
+ self.lengthCheck = self.get(lengthLength)
+ self.indexCheck = self.index
+
+ def setLengthCheck(self, length):
+ self.lengthCheck = length
+ self.indexCheck = self.index
+
+ def stopLengthCheck(self):
+ if (self.index - self.indexCheck) != self.lengthCheck:
+ raise ASN1Error("Error decoding ASN.1")
+
+ def atLengthCheck(self):
+ if (self.index - self.indexCheck) < self.lengthCheck:
+ return False
+ elif (self.index - self.indexCheck) == self.lengthCheck:
+ return True
+ else:
+ raise ASN1Error("Error decoding ASN.1")
+
+ def __init__(self, bytes):
+ p = self.Parser(bytes)
+ p.get(1)
+ self.length = self._getASN1Length(p)
+ self.value = p.getFixBytes(self.length)
+
+ def getChild(self, which):
+ p = self.Parser(self.value)
+ for x in range(which+1):
+ markIndex = p.index
+ p.get(1)
+ length = self._getASN1Length(p)
+ p.getFixBytes(length)
+ return ASN1Parser(p.bytes[markIndex:p.index])
+
+ def _getASN1Length(self, p):
+ firstLength = p.get(1)
+ if firstLength<=127:
+ return firstLength
+ else:
+ lengthLength = firstLength & 0x7F
+ return p.get(lengthLength)
+
+ class ARC4(object):
+ @classmethod
+ def new(cls, userkey):
+ self = ARC4()
+ self._arc4 = _ARC4.new(userkey)
+ return self
+ def __init__(self):
+ self._arc4 = None
+ def decrypt(self, data):
+ return self._arc4.decrypt(data)
+
+ class AES(object):
+ @classmethod
+ def new(cls, userkey, mode, iv):
+ self = AES()
+ self._aes = _AES.new(userkey, mode, iv)
+ return self
+ def __init__(self):
+ self._aes = None
+ def decrypt(self, data):
+ return self._aes.decrypt(data)
+
+ class RSA(object):
+ def __init__(self, der):
+ key = ASN1Parser([ord(x) for x in der])
+ key = [key.getChild(x).value for x in xrange(1, 4)]
+ key = [self.bytesToNumber(v) for v in key]
+ self._rsa = _RSA.construct(key)
+
+ def bytesToNumber(self, bytes):
+ total = 0L
+ for byte in bytes:
+ total = (total << 8) + byte
+ return total
+
+ def decrypt(self, data):
+ return self._rsa.decrypt(data)
+
+ return (ARC4, RSA, AES)
+
+def _load_crypto():
+ ARC4 = RSA = AES = None
+ for loader in (_load_crypto_libcrypto, _load_crypto_pycrypto):
+ try:
+ ARC4, RSA, AES = loader()
+ break
+ except (ImportError, ADEPTError):
+ pass
+ return (ARC4, RSA, AES)
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+
+# Do we generate cross reference streams on output?
+# 0 = never
+# 1 = only if present in input
+# 2 = always
+
+GEN_XREF_STM = 1
+
+# This is the value for the current document
+gen_xref_stm = False # will be set in PDFSerializer
+
+# PDF parsing routines from pdfminer, with changes for EBX_HANDLER
+
+# Utilities
+
+def choplist(n, seq):
+ '''Groups every n elements of the list.'''
+ r = []
+ for x in seq:
+ r.append(x)
+ if len(r) == n:
+ yield tuple(r)
+ r = []
+ return
+
+def nunpack(s, default=0):
+ '''Unpacks up to 4 bytes big endian.'''
+ l = len(s)
+ if not l:
+ return default
+ elif l == 1:
+ return ord(s)
+ elif l == 2:
+ return struct.unpack('>H', s)[0]
+ elif l == 3:
+ return struct.unpack('>L', '\x00'+s)[0]
+ elif l == 4:
+ return struct.unpack('>L', s)[0]
+ else:
+ return TypeError('invalid length: %d' % l)
+
+
+STRICT = 0
+
+
+# PS Exceptions
+
+class PSException(Exception): pass
+class PSEOF(PSException): pass
+class PSSyntaxError(PSException): pass
+class PSTypeError(PSException): pass
+class PSValueError(PSException): pass
+
+
+# Basic PostScript Types
+
+
+# PSLiteral
+class PSObject(object): pass
+
+class PSLiteral(PSObject):
+ '''
+ PS literals (e.g. "/Name").
+ Caution: Never create these objects directly.
+ Use PSLiteralTable.intern() instead.
+ '''
+ def __init__(self, name):
+ self.name = name
+ return
+
+ def __repr__(self):
+ name = []
+ for char in self.name:
+ if not char.isalnum():
+ char = '#%02x' % ord(char)
+ name.append(char)
+ return '/%s' % ''.join(name)
+
+# PSKeyword
+class PSKeyword(PSObject):
+ '''
+ PS keywords (e.g. "showpage").
+ Caution: Never create these objects directly.
+ Use PSKeywordTable.intern() instead.
+ '''
+ def __init__(self, name):
+ self.name = name
+ return
+
+ def __repr__(self):
+ return self.name
+
+# PSSymbolTable
+class PSSymbolTable(object):
+
+ '''
+ Symbol table that stores PSLiteral or PSKeyword.
+ '''
+
+ def __init__(self, classe):
+ self.dic = {}
+ self.classe = classe
+ return
+
+ def intern(self, name):
+ if name in self.dic:
+ lit = self.dic[name]
+ else:
+ lit = self.classe(name)
+ self.dic[name] = lit
+ return lit
+
+PSLiteralTable = PSSymbolTable(PSLiteral)
+PSKeywordTable = PSSymbolTable(PSKeyword)
+LIT = PSLiteralTable.intern
+KWD = PSKeywordTable.intern
+KEYWORD_BRACE_BEGIN = KWD('{')
+KEYWORD_BRACE_END = KWD('}')
+KEYWORD_ARRAY_BEGIN = KWD('[')
+KEYWORD_ARRAY_END = KWD(']')
+KEYWORD_DICT_BEGIN = KWD('<<')
+KEYWORD_DICT_END = KWD('>>')
+
+
+def literal_name(x):
+ if not isinstance(x, PSLiteral):
+ if STRICT:
+ raise PSTypeError('Literal required: %r' % x)
+ else:
+ return str(x)
+ return x.name
+
+def keyword_name(x):
+ if not isinstance(x, PSKeyword):
+ if STRICT:
+ raise PSTypeError('Keyword required: %r' % x)
+ else:
+ return str(x)
+ return x.name
+
+
+## PSBaseParser
+##
+EOL = re.compile(r'[\r\n]')
+SPC = re.compile(r'\s')
+NONSPC = re.compile(r'\S')
+HEX = re.compile(r'[0-9a-fA-F]')
+END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
+END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
+HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
+END_NUMBER = re.compile(r'[^0-9]')
+END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
+END_STRING = re.compile(r'[()\134]')
+OCT_STRING = re.compile(r'[0-7]')
+ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
+
+class PSBaseParser(object):
+
+ '''
+ Most basic PostScript parser that performs only basic tokenization.
+ '''
+ BUFSIZ = 4096
+
+ def __init__(self, fp):
+ self.fp = fp
+ self.seek(0)
+ return
+
+ def __repr__(self):
+ return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
+
+ def flush(self):
+ return
+
+ def close(self):
+ self.flush()
+ return
+
+ def tell(self):
+ return self.bufpos+self.charpos
+
+ def poll(self, pos=None, n=80):
+ pos0 = self.fp.tell()
+ if not pos:
+ pos = self.bufpos+self.charpos
+ self.fp.seek(pos)
+ ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
+ self.fp.seek(pos0)
+ return
+
+ def seek(self, pos):
+ '''
+ Seeks the parser to the given position.
+ '''
+ self.fp.seek(pos)
+ # reset the status for nextline()
+ self.bufpos = pos
+ self.buf = ''
+ self.charpos = 0
+ # reset the status for nexttoken()
+ self.parse1 = self.parse_main
+ self.tokens = []
+ return
+
+ def fillbuf(self):
+ if self.charpos < len(self.buf): return
+ # fetch next chunk.
+ self.bufpos = self.fp.tell()
+ self.buf = self.fp.read(self.BUFSIZ)
+ if not self.buf:
+ raise PSEOF('Unexpected EOF')
+ self.charpos = 0
+ return
+
+ def parse_main(self, s, i):
+ m = NONSPC.search(s, i)
+ if not m:
+ return (self.parse_main, len(s))
+ j = m.start(0)
+ c = s[j]
+ self.tokenstart = self.bufpos+j
+ if c == '%':
+ self.token = '%'
+ return (self.parse_comment, j+1)
+ if c == '/':
+ self.token = ''
+ return (self.parse_literal, j+1)
+ if c in '-+' or c.isdigit():
+ self.token = c
+ return (self.parse_number, j+1)
+ if c == '.':
+ self.token = c
+ return (self.parse_float, j+1)
+ if c.isalpha():
+ self.token = c
+ return (self.parse_keyword, j+1)
+ if c == '(':
+ self.token = ''
+ self.paren = 1
+ return (self.parse_string, j+1)
+ if c == '<':
+ self.token = ''
+ return (self.parse_wopen, j+1)
+ if c == '>':
+ self.token = ''
+ return (self.parse_wclose, j+1)
+ self.add_token(KWD(c))
+ return (self.parse_main, j+1)
+
+ def add_token(self, obj):
+ self.tokens.append((self.tokenstart, obj))
+ return
+
+ def parse_comment(self, s, i):
+ m = EOL.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_comment, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ # We ignore comments.
+ #self.tokens.append(self.token)
+ return (self.parse_main, j)
+
+ def parse_literal(self, s, i):
+ m = END_LITERAL.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_literal, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ c = s[j]
+ if c == '#':
+ self.hex = ''
+ return (self.parse_literal_hex, j+1)
+ self.add_token(LIT(self.token))
+ return (self.parse_main, j)
+
+ def parse_literal_hex(self, s, i):
+ c = s[i]
+ if HEX.match(c) and len(self.hex) < 2:
+ self.hex += c
+ return (self.parse_literal_hex, i+1)
+ if self.hex:
+ self.token += chr(int(self.hex, 16))
+ return (self.parse_literal, i)
+
+ def parse_number(self, s, i):
+ m = END_NUMBER.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_number, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ c = s[j]
+ if c == '.':
+ self.token += c
+ return (self.parse_float, j+1)
+ try:
+ self.add_token(int(self.token))
+ except ValueError:
+ pass
+ return (self.parse_main, j)
+ def parse_float(self, s, i):
+ m = END_NUMBER.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_float, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ self.add_token(float(self.token))
+ return (self.parse_main, j)
+
+ def parse_keyword(self, s, i):
+ m = END_KEYWORD.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_keyword, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ if self.token == 'true':
+ token = True
+ elif self.token == 'false':
+ token = False
+ else:
+ token = KWD(self.token)
+ self.add_token(token)
+ return (self.parse_main, j)
+
+ def parse_string(self, s, i):
+ m = END_STRING.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_string, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ c = s[j]
+ if c == '\\':
+ self.oct = ''
+ return (self.parse_string_1, j+1)
+ if c == '(':
+ self.paren += 1
+ self.token += c
+ return (self.parse_string, j+1)
+ if c == ')':
+ self.paren -= 1
+ if self.paren:
+ self.token += c
+ return (self.parse_string, j+1)
+ self.add_token(self.token)
+ return (self.parse_main, j+1)
+ def parse_string_1(self, s, i):
+ c = s[i]
+ if OCT_STRING.match(c) and len(self.oct) < 3:
+ self.oct += c
+ return (self.parse_string_1, i+1)
+ if self.oct:
+ self.token += chr(int(self.oct, 8))
+ return (self.parse_string, i)
+ if c in ESC_STRING:
+ self.token += chr(ESC_STRING[c])
+ return (self.parse_string, i+1)
+
+ def parse_wopen(self, s, i):
+ c = s[i]
+ if c.isspace() or HEX.match(c):
+ return (self.parse_hexstring, i)
+ if c == '<':
+ self.add_token(KEYWORD_DICT_BEGIN)
+ i += 1
+ return (self.parse_main, i)
+
+ def parse_wclose(self, s, i):
+ c = s[i]
+ if c == '>':
+ self.add_token(KEYWORD_DICT_END)
+ i += 1
+ return (self.parse_main, i)
+
+ def parse_hexstring(self, s, i):
+ m = END_HEX_STRING.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_hexstring, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
+ SPC.sub('', self.token))
+ self.add_token(token)
+ return (self.parse_main, j)
+
+ def nexttoken(self):
+ while not self.tokens:
+ self.fillbuf()
+ (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
+ token = self.tokens.pop(0)
+ return token
+
+ def nextline(self):
+ '''
+ Fetches a next line that ends either with \\r or \\n.
+ '''
+ linebuf = ''
+ linepos = self.bufpos + self.charpos
+ eol = False
+ while 1:
+ self.fillbuf()
+ if eol:
+ c = self.buf[self.charpos]
+ # handle '\r\n'
+ if c == '\n':
+ linebuf += c
+ self.charpos += 1
+ break
+ m = EOL.search(self.buf, self.charpos)
+ if m:
+ linebuf += self.buf[self.charpos:m.end(0)]
+ self.charpos = m.end(0)
+ if linebuf[-1] == '\r':
+ eol = True
+ else:
+ break
+ else:
+ linebuf += self.buf[self.charpos:]
+ self.charpos = len(self.buf)
+ return (linepos, linebuf)
+
+ def revreadlines(self):
+ '''
+ Fetches a next line backword. This is used to locate
+ the trailers at the end of a file.
+ '''
+ self.fp.seek(0, 2)
+ pos = self.fp.tell()
+ buf = ''
+ while 0 < pos:
+ prevpos = pos
+ pos = max(0, pos-self.BUFSIZ)
+ self.fp.seek(pos)
+ s = self.fp.read(prevpos-pos)
+ if not s: break
+ while 1:
+ n = max(s.rfind('\r'), s.rfind('\n'))
+ if n == -1:
+ buf = s + buf
+ break
+ yield s[n:]+buf
+ s = s[:n]
+ buf = ''
+ return
+
+
+## PSStackParser
+##
+class PSStackParser(PSBaseParser):
+
+ def __init__(self, fp):
+ PSBaseParser.__init__(self, fp)
+ self.reset()
+ return
+
+ def reset(self):
+ self.context = []
+ self.curtype = None
+ self.curstack = []
+ self.results = []
+ return
+
+ def seek(self, pos):
+ PSBaseParser.seek(self, pos)
+ self.reset()
+ return
+
+ def push(self, *objs):
+ self.curstack.extend(objs)
+ return
+ def pop(self, n):
+ objs = self.curstack[-n:]
+ self.curstack[-n:] = []
+ return objs
+ def popall(self):
+ objs = self.curstack
+ self.curstack = []
+ return objs
+ def add_results(self, *objs):
+ self.results.extend(objs)
+ return
+
+ def start_type(self, pos, type):
+ self.context.append((pos, self.curtype, self.curstack))
+ (self.curtype, self.curstack) = (type, [])
+ return
+ def end_type(self, type):
+ if self.curtype != type:
+ raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
+ objs = [ obj for (_,obj) in self.curstack ]
+ (pos, self.curtype, self.curstack) = self.context.pop()
+ return (pos, objs)
+
+ def do_keyword(self, pos, token):
+ return
+
+ def nextobject(self, direct=False):
+ '''
+ Yields a list of objects: keywords, literals, strings,
+ numbers, arrays and dictionaries. Arrays and dictionaries
+ are represented as Python sequence and dictionaries.
+ '''
+ while not self.results:
+ (pos, token) = self.nexttoken()
+ ##print (pos,token), (self.curtype, self.curstack)
+ if (isinstance(token, int) or
+ isinstance(token, float) or
+ isinstance(token, bool) or
+ isinstance(token, str) or
+ isinstance(token, PSLiteral)):
+ # normal token
+ self.push((pos, token))
+ elif token == KEYWORD_ARRAY_BEGIN:
+ # begin array
+ self.start_type(pos, 'a')
+ elif token == KEYWORD_ARRAY_END:
+ # end array
+ try:
+ self.push(self.end_type('a'))
+ except PSTypeError:
+ if STRICT: raise
+ elif token == KEYWORD_DICT_BEGIN:
+ # begin dictionary
+ self.start_type(pos, 'd')
+ elif token == KEYWORD_DICT_END:
+ # end dictionary
+ try:
+ (pos, objs) = self.end_type('d')
+ if len(objs) % 2 != 0:
+ raise PSSyntaxError(
+ 'Invalid dictionary construct: %r' % objs)
+ d = dict((literal_name(k), v) \
+ for (k,v) in choplist(2, objs))
+ self.push((pos, d))
+ except PSTypeError:
+ if STRICT: raise
+ else:
+ self.do_keyword(pos, token)
+ if self.context:
+ continue
+ else:
+ if direct:
+ return self.pop(1)[0]
+ self.flush()
+ obj = self.results.pop(0)
+ return obj
+
+
+LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
+LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
+LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
+LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
+
+
+## PDF Objects
+##
+class PDFObject(PSObject): pass
+
+class PDFException(PSException): pass
+class PDFTypeError(PDFException): pass
+class PDFValueError(PDFException): pass
+class PDFNotImplementedError(PSException): pass
+
+
+## PDFObjRef
+##
+class PDFObjRef(PDFObject):
+
+ def __init__(self, doc, objid, genno):
+ if objid == 0:
+ if STRICT:
+ raise PDFValueError('PDF object id cannot be 0.')
+ self.doc = doc
+ self.objid = objid
+ self.genno = genno
+ return
+
+ def __repr__(self):
+ return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
+
+ def resolve(self):
+ return self.doc.getobj(self.objid)
+
+
+# resolve
+def resolve1(x):
+ '''
+ Resolve an object. If this is an array or dictionary,
+ it may still contains some indirect objects inside.
+ '''
+ while isinstance(x, PDFObjRef):
+ x = x.resolve()
+ return x
+
+def resolve_all(x):
+ '''
+ Recursively resolve X and all the internals.
+ Make sure there is no indirect reference within the nested object.
+ This procedure might be slow.
+ '''
+ while isinstance(x, PDFObjRef):
+ x = x.resolve()
+ if isinstance(x, list):
+ x = [ resolve_all(v) for v in x ]
+ elif isinstance(x, dict):
+ for (k,v) in x.iteritems():
+ x[k] = resolve_all(v)
+ return x
+
+def decipher_all(decipher, objid, genno, x):
+ '''
+ Recursively decipher X.
+ '''
+ if isinstance(x, str):
+ return decipher(objid, genno, x)
+ decf = lambda v: decipher_all(decipher, objid, genno, v)
+ if isinstance(x, list):
+ x = [decf(v) for v in x]
+ elif isinstance(x, dict):
+ x = dict((k, decf(v)) for (k, v) in x.iteritems())
+ return x
+
+
+# Type cheking
+def int_value(x):
+ x = resolve1(x)
+ if not isinstance(x, int):
+ if STRICT:
+ raise PDFTypeError('Integer required: %r' % x)
+ return 0
+ return x
+
+def float_value(x):
+ x = resolve1(x)
+ if not isinstance(x, float):
+ if STRICT:
+ raise PDFTypeError('Float required: %r' % x)
+ return 0.0
+ return x
+
+def num_value(x):
+ x = resolve1(x)
+ if not (isinstance(x, int) or isinstance(x, float)):
+ if STRICT:
+ raise PDFTypeError('Int or Float required: %r' % x)
+ return 0
+ return x
+
+def str_value(x):
+ x = resolve1(x)
+ if not isinstance(x, str):
+ if STRICT:
+ raise PDFTypeError('String required: %r' % x)
+ return ''
+ return x
+
+def list_value(x):
+ x = resolve1(x)
+ if not (isinstance(x, list) or isinstance(x, tuple)):
+ if STRICT:
+ raise PDFTypeError('List required: %r' % x)
+ return []
+ return x
+
+def dict_value(x):
+ x = resolve1(x)
+ if not isinstance(x, dict):
+ if STRICT:
+ raise PDFTypeError('Dict required: %r' % x)
+ return {}
+ return x
+
+def stream_value(x):
+ x = resolve1(x)
+ if not isinstance(x, PDFStream):
+ if STRICT:
+ raise PDFTypeError('PDFStream required: %r' % x)
+ return PDFStream({}, '')
+ return x
+
+# ascii85decode(data)
+def ascii85decode(data):
+ n = b = 0
+ out = ''
+ for c in data:
+ if '!' <= c and c <= 'u':
+ n += 1
+ b = b*85+(ord(c)-33)
+ if n == 5:
+ out += struct.pack('>L',b)
+ n = b = 0
+ elif c == 'z':
+ assert n == 0
+ out += '\0\0\0\0'
+ elif c == '~':
+ if n:
+ for _ in range(5-n):
+ b = b*85+84
+ out += struct.pack('>L',b)[:n-1]
+ break
+ return out
+
+
+## PDFStream type
+class PDFStream(PDFObject):
+ def __init__(self, dic, rawdata, decipher=None):
+ length = int_value(dic.get('Length', 0))
+ eol = rawdata[length:]
+ # quick and dirty fix for false length attribute,
+ # might not work if the pdf stream parser has a problem
+ if decipher != None and decipher.__name__ == 'decrypt_aes':
+ if (len(rawdata) % 16) != 0:
+ cutdiv = len(rawdata) // 16
+ rawdata = rawdata[:16*cutdiv]
+ else:
+ if eol in ('\r', '\n', '\r\n'):
+ rawdata = rawdata[:length]
+
+ self.dic = dic
+ self.rawdata = rawdata
+ self.decipher = decipher
+ self.data = None
+ self.decdata = None
+ self.objid = None
+ self.genno = None
+ return
+
+ def set_objid(self, objid, genno):
+ self.objid = objid
+ self.genno = genno
+ return
+
+ def __repr__(self):
+ if self.rawdata:
+ return '<PDFStream(%r): raw=%d, %r>' % \
+ (self.objid, len(self.rawdata), self.dic)
+ else:
+ return '<PDFStream(%r): data=%d, %r>' % \
+ (self.objid, len(self.data), self.dic)
+
+ def decode(self):
+ assert self.data is None and self.rawdata is not None
+ data = self.rawdata
+ if self.decipher:
+ # Handle encryption
+ data = self.decipher(self.objid, self.genno, data)
+ if gen_xref_stm:
+ self.decdata = data # keep decrypted data
+ if 'Filter' not in self.dic:
+ self.data = data
+ self.rawdata = None
+ ##print self.dict
+ return
+ filters = self.dic['Filter']
+ if not isinstance(filters, list):
+ filters = [ filters ]
+ for f in filters:
+ if f in LITERALS_FLATE_DECODE:
+ # will get errors if the document is encrypted.
+ data = zlib.decompress(data)
+ elif f in LITERALS_LZW_DECODE:
+ data = ''.join(LZWDecoder(StringIO(data)).run())
+ elif f in LITERALS_ASCII85_DECODE:
+ data = ascii85decode(data)
+ elif f == LITERAL_CRYPT:
+ raise PDFNotImplementedError('/Crypt filter is unsupported')
+ else:
+ raise PDFNotImplementedError('Unsupported filter: %r' % f)
+ # apply predictors
+ if 'DP' in self.dic:
+ params = self.dic['DP']
+ else:
+ params = self.dic.get('DecodeParms', {})
+ if 'Predictor' in params:
+ pred = int_value(params['Predictor'])
+ if pred:
+ if pred != 12:
+ raise PDFNotImplementedError(
+ 'Unsupported predictor: %r' % pred)
+ if 'Columns' not in params:
+ raise PDFValueError(
+ 'Columns undefined for predictor=12')
+ columns = int_value(params['Columns'])
+ buf = ''
+ ent0 = '\x00' * columns
+ for i in xrange(0, len(data), columns+1):
+ pred = data[i]
+ ent1 = data[i+1:i+1+columns]
+ if pred == '\x02':
+ ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
+ for (a,b) in zip(ent0,ent1))
+ buf += ent1
+ ent0 = ent1
+ data = buf
+ self.data = data
+ self.rawdata = None
+ return
+
+ def get_data(self):
+ if self.data is None:
+ self.decode()
+ return self.data
+
+ def get_rawdata(self):
+ return self.rawdata
+
+ def get_decdata(self):
+ if self.decdata is not None:
+ return self.decdata
+ data = self.rawdata
+ if self.decipher and data:
+ # Handle encryption
+ data = self.decipher(self.objid, self.genno, data)
+ return data
+
+
+## PDF Exceptions
+##
+class PDFSyntaxError(PDFException): pass
+class PDFNoValidXRef(PDFSyntaxError): pass
+class PDFEncryptionError(PDFException): pass
+class PDFPasswordIncorrect(PDFEncryptionError): pass
+
+# some predefined literals and keywords.
+LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
+LITERAL_XREF = PSLiteralTable.intern('XRef')
+LITERAL_PAGE = PSLiteralTable.intern('Page')
+LITERAL_PAGES = PSLiteralTable.intern('Pages')
+LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
+
+
+## XRefs
+##
+
+## PDFXRef
+##
+class PDFXRef(object):
+
+ def __init__(self):
+ self.offsets = None
+ return
+
+ def __repr__(self):
+ return '<PDFXRef: objs=%d>' % len(self.offsets)
+
+ def objids(self):
+ return self.offsets.iterkeys()
+
+ def load(self, parser):
+ self.offsets = {}
+ while 1:
+ try:
+ (pos, line) = parser.nextline()
+ except PSEOF:
+ raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
+ if not line:
+ raise PDFNoValidXRef('Premature eof: %r' % parser)
+ if line.startswith('trailer'):
+ parser.seek(pos)
+ break
+ f = line.strip().split(' ')
+ if len(f) != 2:
+ raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
+ try:
+ (start, nobjs) = map(int, f)
+ except ValueError:
+ raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
+ for objid in xrange(start, start+nobjs):
+ try:
+ (_, line) = parser.nextline()
+ except PSEOF:
+ raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
+ f = line.strip().split(' ')
+ if len(f) != 3:
+ raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
+ (pos, genno, use) = f
+ if use != 'n': continue
+ self.offsets[objid] = (int(genno), int(pos))
+ self.load_trailer(parser)
+ return
+
+ KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
+ def load_trailer(self, parser):
+ try:
+ (_,kwd) = parser.nexttoken()
+ assert kwd is self.KEYWORD_TRAILER
+ (_,dic) = parser.nextobject(direct=True)
+ except PSEOF:
+ x = parser.pop(1)
+ if not x:
+ raise PDFNoValidXRef('Unexpected EOF - file corrupted')
+ (_,dic) = x[0]
+ self.trailer = dict_value(dic)
+ return
+
+ def getpos(self, objid):
+ try:
+ (genno, pos) = self.offsets[objid]
+ except KeyError:
+ raise
+ return (None, pos)
+
+
+## PDFXRefStream
+##
+class PDFXRefStream(object):
+
+ def __init__(self):
+ self.index = None
+ self.data = None
+ self.entlen = None
+ self.fl1 = self.fl2 = self.fl3 = None
+ return
+
+ def __repr__(self):
+ return '<PDFXRef: objids=%s>' % self.index
+
+ def objids(self):
+ for first, size in self.index:
+ for objid in xrange(first, first + size):
+ yield objid
+
+ def load(self, parser, debug=0):
+ (_,objid) = parser.nexttoken() # ignored
+ (_,genno) = parser.nexttoken() # ignored
+ (_,kwd) = parser.nexttoken()
+ (_,stream) = parser.nextobject()
+ if not isinstance(stream, PDFStream) or \
+ stream.dic['Type'] is not LITERAL_XREF:
+ raise PDFNoValidXRef('Invalid PDF stream spec.')
+ size = stream.dic['Size']
+ index = stream.dic.get('Index', (0,size))
+ self.index = zip(islice(index, 0, None, 2),
+ islice(index, 1, None, 2))
+ (self.fl1, self.fl2, self.fl3) = stream.dic['W']
+ self.data = stream.get_data()
+ self.entlen = self.fl1+self.fl2+self.fl3
+ self.trailer = stream.dic
+ return
+
+ def getpos(self, objid):
+ offset = 0
+ for first, size in self.index:
+ if first <= objid and objid < (first + size):
+ break
+ offset += size
+ else:
+ raise KeyError(objid)
+ i = self.entlen * ((objid - first) + offset)
+ ent = self.data[i:i+self.entlen]
+ f1 = nunpack(ent[:self.fl1], 1)
+ if f1 == 1:
+ pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
+ genno = nunpack(ent[self.fl1+self.fl2:])
+ return (None, pos)
+ elif f1 == 2:
+ objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
+ index = nunpack(ent[self.fl1+self.fl2:])
+ return (objid, index)
+ # this is a free object
+ raise KeyError(objid)
+
+
+## PDFDocument
+##
+## A PDFDocument object represents a PDF document.
+## Since a PDF file is usually pretty big, normally it is not loaded
+## at once. Rather it is parsed dynamically as processing goes.
+## A PDF parser is associated with the document.
+##
+class PDFDocument(object):
+
+ def __init__(self):
+ self.xrefs = []
+ self.objs = {}
+ self.parsed_objs = {}
+ self.root = None
+ self.catalog = None
+ self.parser = None
+ self.encryption = None
+ self.decipher = None
+ return
+
+ # set_parser(parser)
+ # Associates the document with an (already initialized) parser object.
+ def set_parser(self, parser):
+ if self.parser: return
+ self.parser = parser
+ # The document is set to be temporarily ready during collecting
+ # all the basic information about the document, e.g.
+ # the header, the encryption information, and the access rights
+ # for the document.
+ self.ready = True
+ # Retrieve the information of each header that was appended
+ # (maybe multiple times) at the end of the document.
+ self.xrefs = parser.read_xref()
+ for xref in self.xrefs:
+ trailer = xref.trailer
+ if not trailer: continue
+
+ # If there's an encryption info, remember it.
+ if 'Encrypt' in trailer:
+ #assert not self.encryption
+ try:
+ self.encryption = (list_value(trailer['ID']),
+ dict_value(trailer['Encrypt']))
+ # fix for bad files
+ except:
+ self.encryption = ('ffffffffffffffffffffffffffffffffffff',
+ dict_value(trailer['Encrypt']))
+ if 'Root' in trailer:
+ self.set_root(dict_value(trailer['Root']))
+ break
+ else:
+ raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
+ # The document is set to be non-ready again, until all the
+ # proper initialization (asking the password key and
+ # verifying the access permission, so on) is finished.
+ self.ready = False
+ return
+
+ # set_root(root)
+ # Set the Root dictionary of the document.
+ # Each PDF file must have exactly one /Root dictionary.
+ def set_root(self, root):
+ self.root = root
+ self.catalog = dict_value(self.root)
+ if self.catalog.get('Type') is not LITERAL_CATALOG:
+ if STRICT:
+ raise PDFSyntaxError('Catalog not found!')
+ return
+ # initialize(password='')
+ # Perform the initialization with a given password.
+ # This step is mandatory even if there's no password associated
+ # with the document.
+ def initialize(self, password=''):
+ if not self.encryption:
+ self.is_printable = self.is_modifiable = self.is_extractable = True
+ self.ready = True
+ return
+ (docid, param) = self.encryption
+ type = literal_name(param['Filter'])
+ if type == 'Adobe.APS':
+ return self.initialize_adobe_ps(password, docid, param)
+ if type == 'Standard':
+ return self.initialize_standard(password, docid, param)
+ if type == 'EBX_HANDLER':
+ return self.initialize_ebx(password, docid, param)
+ raise PDFEncryptionError('Unknown filter: param=%r' % param)
+
+ def initialize_adobe_ps(self, password, docid, param):
+ global KEYFILEPATH
+ self.decrypt_key = self.genkey_adobe_ps(param)
+ self.genkey = self.genkey_v4
+ self.decipher = self.decrypt_aes
+ self.ready = True
+ return
+
+ def genkey_adobe_ps(self, param):
+ # nice little offline principal keys dictionary
+ # global static principal key for German Onleihe / Bibliothek Digital
+ principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')}
+ self.is_printable = self.is_modifiable = self.is_extractable = True
+ length = int_value(param.get('Length', 0)) / 8
+ edcdata = str_value(param.get('EDCData')).decode('base64')
+ pdrllic = str_value(param.get('PDRLLic')).decode('base64')
+ pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
+ edclist = []
+ for pair in edcdata.split('\n'):
+ edclist.append(pair)
+ # principal key request
+ for key in principalkeys:
+ if key in pdrllic:
+ principalkey = principalkeys[key]
+ else:
+ raise ADEPTError('Cannot find principal key for this pdf')
+ shakey = SHA256(principalkey)
+ ivector = 16 * chr(0)
+ plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
+ if plaintext[-16:] != 16 * chr(16):
+ raise ADEPTError('Offlinekey cannot be decrypted, aborting ...')
+ pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
+ if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16:
+ raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
+ else:
+ cutter = -1 * ord(pdrlpol[-1])
+ pdrlpol = pdrlpol[:cutter]
+ return plaintext[:16]
+
+ PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
+ '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
+ # experimental aes pw support
+ def initialize_standard(self, password, docid, param):
+ # copy from a global variable
+ V = int_value(param.get('V', 0))
+ if (V <=0 or V > 4):
+ raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
+ length = int_value(param.get('Length', 40)) # Key length (bits)
+ O = str_value(param['O'])
+ R = int_value(param['R']) # Revision
+ if 5 <= R:
+ raise PDFEncryptionError('Unknown revision: %r' % R)
+ U = str_value(param['U'])
+ P = int_value(param['P'])
+ try:
+ EncMetadata = str_value(param['EncryptMetadata'])
+ except:
+ EncMetadata = 'True'
+ self.is_printable = bool(P & 4)
+ self.is_modifiable = bool(P & 8)
+ self.is_extractable = bool(P & 16)
+ self.is_annotationable = bool(P & 32)
+ self.is_formsenabled = bool(P & 256)
+ self.is_textextractable = bool(P & 512)
+ self.is_assemblable = bool(P & 1024)
+ self.is_formprintable = bool(P & 2048)
+ # Algorithm 3.2
+ password = (password+self.PASSWORD_PADDING)[:32] # 1
+ hash = hashlib.md5(password) # 2
+ hash.update(O) # 3
+ hash.update(struct.pack('<l', P)) # 4
+ hash.update(docid[0]) # 5
+ # aes special handling if metadata isn't encrypted
+ if EncMetadata == ('False' or 'false'):
+ hash.update('ffffffff'.decode('hex'))
+ if 5 <= R:
+ # 8
+ for _ in xrange(50):
+ hash = hashlib.md5(hash.digest()[:length/8])
+ key = hash.digest()[:length/8]
+ if R == 2:
+ # Algorithm 3.4
+ u1 = ARC4.new(key).decrypt(password)
+ elif R >= 3:
+ # Algorithm 3.5
+ hash = hashlib.md5(self.PASSWORD_PADDING) # 2
+ hash.update(docid[0]) # 3
+ x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
+ for i in xrange(1,19+1):
+ k = ''.join( chr(ord(c) ^ i) for c in key )
+ x = ARC4.new(k).decrypt(x)
+ u1 = x+x # 32bytes total
+ if R == 2:
+ is_authenticated = (u1 == U)
+ else:
+ is_authenticated = (u1[:16] == U[:16])
+ if not is_authenticated:
+ raise ADEPTError('Password is not correct.')
+ self.decrypt_key = key
+ # genkey method
+ if V == 1 or V == 2:
+ self.genkey = self.genkey_v2
+ elif V == 3:
+ self.genkey = self.genkey_v3
+ elif V == 4:
+ self.genkey = self.genkey_v2
+ #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
+ # rc4
+ if V != 4:
+ self.decipher = self.decipher_rc4 # XXX may be AES
+ # aes
+ elif V == 4 and Length == 128:
+ elf.decipher = self.decipher_aes
+ elif V == 4 and Length == 256:
+ raise PDFNotImplementedError('AES256 encryption is currently unsupported')
+ self.ready = True
+ return
+
+ def initialize_ebx(self, password, docid, param):
+ self.is_printable = self.is_modifiable = self.is_extractable = True
+ with open(password, 'rb') as f:
+ keyder = f.read()
+ rsa = RSA(keyder)
+ length = int_value(param.get('Length', 0)) / 8
+ rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
+ rights = zlib.decompress(rights, -15)
+ rights = etree.fromstring(rights)
+ expr = './/{http://ns.adobe.com/adept}encryptedKey'
+ bookkey = ''.join(rights.findtext(expr)).decode('base64')
+ bookkey = rsa.decrypt(bookkey)
+ if bookkey[0] != '\x02':
+ raise ADEPTError('error decrypting book session key')
+ index = bookkey.index('\0') + 1
+ bookkey = bookkey[index:]
+ ebx_V = int_value(param.get('V', 4))
+ ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
+ # added because of the booktype / decryption book session key error
+ if ebx_V == 3:
+ V = 3
+ elif ebx_V < 4 or ebx_type < 6:
+ V = ord(bookkey[0])
+ bookkey = bookkey[1:]
+ else:
+ V = 2
+ if length and len(bookkey) != length:
+ raise ADEPTError('error decrypting book session key')
+ self.decrypt_key = bookkey
+ self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
+ self.decipher = self.decrypt_rc4
+ self.ready = True
+ return
+
+ # genkey functions
+ def genkey_v2(self, objid, genno):
+ objid = struct.pack('<L', objid)[:3]
+ genno = struct.pack('<L', genno)[:2]
+ key = self.decrypt_key + objid + genno
+ hash = hashlib.md5(key)
+ key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
+ return key
+
+ def genkey_v3(self, objid, genno):
+ objid = struct.pack('<L', objid ^ 0x3569ac)
+ genno = struct.pack('<L', genno ^ 0xca96)
+ key = self.decrypt_key
+ key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT'
+ hash = hashlib.md5(key)
+ key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
+ return key
+
+ # aes v2 and v4 algorithm
+ def genkey_v4(self, objid, genno):
+ objid = struct.pack('<L', objid)[:3]
+ genno = struct.pack('<L', genno)[:2]
+ key = self.decrypt_key + objid + genno + 'sAlT'
+ hash = hashlib.md5(key)
+ key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
+ return key
+
+ def decrypt_aes(self, objid, genno, data):
+ key = self.genkey(objid, genno)
+ ivector = data[:16]
+ data = data[16:]
+ plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
+ # remove pkcs#5 aes padding
+ cutter = -1 * ord(plaintext[-1])
+ #print cutter
+ plaintext = plaintext[:cutter]
+ return plaintext
+
+ def decrypt_aes256(self, objid, genno, data):
+ key = self.genkey(objid, genno)
+ ivector = data[:16]
+ data = data[16:]
+ plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
+ # remove pkcs#5 aes padding
+ cutter = -1 * ord(plaintext[-1])
+ #print cutter
+ plaintext = plaintext[:cutter]
+ return plaintext
+
+ def decrypt_rc4(self, objid, genno, data):
+ key = self.genkey(objid, genno)
+ return ARC4.new(key).decrypt(data)
+
+
+ KEYWORD_OBJ = PSKeywordTable.intern('obj')
+
+ def getobj(self, objid):
+ if not self.ready:
+ raise PDFException('PDFDocument not initialized')
+ #assert self.xrefs
+ if objid in self.objs:
+ genno = 0
+ obj = self.objs[objid]
+ else:
+ for xref in self.xrefs:
+ try:
+ (stmid, index) = xref.getpos(objid)
+ break
+ except KeyError:
+ pass
+ else:
+ #if STRICT:
+ # raise PDFSyntaxError('Cannot locate objid=%r' % objid)
+ return None
+ if stmid:
+ if gen_xref_stm:
+ return PDFObjStmRef(objid, stmid, index)
+ # Stuff from pdfminer: extract objects from object stream
+ stream = stream_value(self.getobj(stmid))
+ if stream.dic.get('Type') is not LITERAL_OBJSTM:
+ if STRICT:
+ raise PDFSyntaxError('Not a stream object: %r' % stream)
+ try:
+ n = stream.dic['N']
+ except KeyError:
+ if STRICT:
+ raise PDFSyntaxError('N is not defined: %r' % stream)
+ n = 0
+
+ if stmid in self.parsed_objs:
+ objs = self.parsed_objs[stmid]
+ else:
+ parser = PDFObjStrmParser(stream.get_data(), self)
+ objs = []
+ try:
+ while 1:
+ (_,obj) = parser.nextobject()
+ objs.append(obj)
+ except PSEOF:
+ pass
+ self.parsed_objs[stmid] = objs
+ genno = 0
+ i = n*2+index
+ try:
+ obj = objs[i]
+ except IndexError:
+ raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
+ if isinstance(obj, PDFStream):
+ obj.set_objid(objid, 0)
+ else:
+ self.parser.seek(index)
+ (_,objid1) = self.parser.nexttoken() # objid
+ (_,genno) = self.parser.nexttoken() # genno
+ #assert objid1 == objid, (objid, objid1)
+ (_,kwd) = self.parser.nexttoken()
+ # #### hack around malformed pdf files
+ # assert objid1 == objid, (objid, objid1)
+## if objid1 != objid:
+## x = []
+## while kwd is not self.KEYWORD_OBJ:
+## (_,kwd) = self.parser.nexttoken()
+## x.append(kwd)
+## if x:
+## objid1 = x[-2]
+## genno = x[-1]
+##
+ if kwd is not self.KEYWORD_OBJ:
+ raise PDFSyntaxError(
+ 'Invalid object spec: offset=%r' % index)
+ (_,obj) = self.parser.nextobject()
+ if isinstance(obj, PDFStream):
+ obj.set_objid(objid, genno)
+ if self.decipher:
+ obj = decipher_all(self.decipher, objid, genno, obj)
+ self.objs[objid] = obj
+ return obj
+
+
+class PDFObjStmRef(object):
+ maxindex = 0
+ def __init__(self, objid, stmid, index):
+ self.objid = objid
+ self.stmid = stmid
+ self.index = index
+ if index > PDFObjStmRef.maxindex:
+ PDFObjStmRef.maxindex = index
+
+
+## PDFParser
+##
+class PDFParser(PSStackParser):
+
+ def __init__(self, doc, fp):
+ PSStackParser.__init__(self, fp)
+ self.doc = doc
+ self.doc.set_parser(self)
+ return
+
+ def __repr__(self):
+ return '<PDFParser>'
+
+ KEYWORD_R = PSKeywordTable.intern('R')
+ KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
+ KEYWORD_STREAM = PSKeywordTable.intern('stream')
+ KEYWORD_XREF = PSKeywordTable.intern('xref')
+ KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
+ def do_keyword(self, pos, token):
+ if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
+ self.add_results(*self.pop(1))
+ return
+ if token is self.KEYWORD_ENDOBJ:
+ self.add_results(*self.pop(4))
+ return
+
+ if token is self.KEYWORD_R:
+ # reference to indirect object
+ try:
+ ((_,objid), (_,genno)) = self.pop(2)
+ (objid, genno) = (int(objid), int(genno))
+ obj = PDFObjRef(self.doc, objid, genno)
+ self.push((pos, obj))
+ except PSSyntaxError:
+ pass
+ return
+
+ if token is self.KEYWORD_STREAM:
+ # stream object
+ ((_,dic),) = self.pop(1)
+ dic = dict_value(dic)
+ try:
+ objlen = int_value(dic['Length'])
+ except KeyError:
+ if STRICT:
+ raise PDFSyntaxError('/Length is undefined: %r' % dic)
+ objlen = 0
+ self.seek(pos)
+ try:
+ (_, line) = self.nextline() # 'stream'
+ except PSEOF:
+ if STRICT:
+ raise PDFSyntaxError('Unexpected EOF')
+ return
+ pos += len(line)
+ self.fp.seek(pos)
+ data = self.fp.read(objlen)
+ self.seek(pos+objlen)
+ while 1:
+ try:
+ (linepos, line) = self.nextline()
+ except PSEOF:
+ if STRICT:
+ raise PDFSyntaxError('Unexpected EOF')
+ break
+ if 'endstream' in line:
+ i = line.index('endstream')
+ objlen += i
+ data += line[:i]
+ break
+ objlen += len(line)
+ data += line
+ self.seek(pos+objlen)
+ obj = PDFStream(dic, data, self.doc.decipher)
+ self.push((pos, obj))
+ return
+
+ # others
+ self.push((pos, token))
+ return
+
+ def find_xref(self):
+ # search the last xref table by scanning the file backwards.
+ prev = None
+ for line in self.revreadlines():
+ line = line.strip()
+ if line == 'startxref': break
+ if line:
+ prev = line
+ else:
+ raise PDFNoValidXRef('Unexpected EOF')
+ return int(prev)
+
+ # read xref table
+ def read_xref_from(self, start, xrefs):
+ self.seek(start)
+ self.reset()
+ try:
+ (pos, token) = self.nexttoken()
+ except PSEOF:
+ raise PDFNoValidXRef('Unexpected EOF')
+ if isinstance(token, int):
+ # XRefStream: PDF-1.5
+ if GEN_XREF_STM == 1:
+ global gen_xref_stm
+ gen_xref_stm = True
+ self.seek(pos)
+ self.reset()
+ xref = PDFXRefStream()
+ xref.load(self)
+ else:
+ if token is not self.KEYWORD_XREF:
+ raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
+ (pos, token))
+ self.nextline()
+ xref = PDFXRef()
+ xref.load(self)
+ xrefs.append(xref)
+ trailer = xref.trailer
+ if 'XRefStm' in trailer:
+ pos = int_value(trailer['XRefStm'])
+ self.read_xref_from(pos, xrefs)
+ if 'Prev' in trailer:
+ # find previous xref
+ pos = int_value(trailer['Prev'])
+ self.read_xref_from(pos, xrefs)
+ return
+
+ # read xref tables and trailers
+ def read_xref(self):
+ xrefs = []
+ trailerpos = None
+ try:
+ pos = self.find_xref()
+ self.read_xref_from(pos, xrefs)
+ except PDFNoValidXRef:
+ # fallback
+ self.seek(0)
+ pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
+ offsets = {}
+ xref = PDFXRef()
+ while 1:
+ try:
+ (pos, line) = self.nextline()
+ except PSEOF:
+ break
+ if line.startswith('trailer'):
+ trailerpos = pos # remember last trailer
+ m = pat.match(line)
+ if not m: continue
+ (objid, genno) = m.groups()
+ offsets[int(objid)] = (0, pos)
+ if not offsets: raise
+ xref.offsets = offsets
+ if trailerpos:
+ self.seek(trailerpos)
+ xref.load_trailer(self)
+ xrefs.append(xref)
+ return xrefs
+
+## PDFObjStrmParser
+##
+class PDFObjStrmParser(PDFParser):
+
+ def __init__(self, data, doc):
+ PSStackParser.__init__(self, StringIO(data))
+ self.doc = doc
+ return
+
+ def flush(self):
+ self.add_results(*self.popall())
+ return
+
+ KEYWORD_R = KWD('R')
+ def do_keyword(self, pos, token):
+ if token is self.KEYWORD_R:
+ # reference to indirect object
+ try:
+ ((_,objid), (_,genno)) = self.pop(2)
+ (objid, genno) = (int(objid), int(genno))
+ obj = PDFObjRef(self.doc, objid, genno)
+ self.push((pos, obj))
+ except PSSyntaxError:
+ pass
+ return
+ # others
+ self.push((pos, token))
+ return
+
+###
+### My own code, for which there is none else to blame
+
+class PDFSerializer(object):
+ def __init__(self, inf, keypath):
+ global GEN_XREF_STM, gen_xref_stm
+ gen_xref_stm = GEN_XREF_STM > 1
+ self.version = inf.read(8)
+ inf.seek(0)
+ self.doc = doc = PDFDocument()
+ parser = PDFParser(doc, inf)
+ doc.initialize(keypath)
+ self.objids = objids = set()
+ for xref in reversed(doc.xrefs):
+ trailer = xref.trailer
+ for objid in xref.objids():
+ objids.add(objid)
+ trailer = dict(trailer)
+ trailer.pop('Prev', None)
+ trailer.pop('XRefStm', None)
+ if 'Encrypt' in trailer:
+ objids.remove(trailer.pop('Encrypt').objid)
+ self.trailer = trailer
+
+ def dump(self, outf):
+ self.outf = outf
+ self.write(self.version)
+ self.write('\n%\xe2\xe3\xcf\xd3\n')
+ doc = self.doc
+ objids = self.objids
+ xrefs = {}
+ maxobj = max(objids)
+ trailer = dict(self.trailer)
+ trailer['Size'] = maxobj + 1
+ for objid in objids:
+ obj = doc.getobj(objid)
+ if isinstance(obj, PDFObjStmRef):
+ xrefs[objid] = obj
+ continue
+ if obj is not None:
+ try:
+ genno = obj.genno
+ except AttributeError:
+ genno = 0
+ xrefs[objid] = (self.tell(), genno)
+ self.serialize_indirect(objid, obj)
+ startxref = self.tell()
+
+ if not gen_xref_stm:
+ self.write('xref\n')
+ self.write('0 %d\n' % (maxobj + 1,))
+ for objid in xrange(0, maxobj + 1):
+ if objid in xrefs:
+ # force the genno to be 0
+ self.write("%010d 00000 n \n" % xrefs[objid][0])
+ else:
+ self.write("%010d %05d f \n" % (0, 65535))
+
+ self.write('trailer\n')
+ self.serialize_object(trailer)
+ self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
+
+ else: # Generate crossref stream.
+
+ # Calculate size of entries
+ maxoffset = max(startxref, maxobj)
+ maxindex = PDFObjStmRef.maxindex
+ fl2 = 2
+ power = 65536
+ while maxoffset >= power:
+ fl2 += 1
+ power *= 256
+ fl3 = 1
+ power = 256
+ while maxindex >= power:
+ fl3 += 1
+ power *= 256
+
+ index = []
+ first = None
+ prev = None
+ data = []
+ # Put the xrefstream's reference in itself
+ startxref = self.tell()
+ maxobj += 1
+ xrefs[maxobj] = (startxref, 0)
+ for objid in sorted(xrefs):
+ if first is None:
+ first = objid
+ elif objid != prev + 1:
+ index.extend((first, prev - first + 1))
+ first = objid
+ prev = objid
+ objref = xrefs[objid]
+ if isinstance(objref, PDFObjStmRef):
+ f1 = 2
+ f2 = objref.stmid
+ f3 = objref.index
+ else:
+ f1 = 1
+ f2 = objref[0]
+ # we force all generation numbers to be 0
+ # f3 = objref[1]
+ f3 = 0
+
+ data.append(struct.pack('>B', f1))
+ data.append(struct.pack('>L', f2)[-fl2:])
+ data.append(struct.pack('>L', f3)[-fl3:])
+ index.extend((first, prev - first + 1))
+ data = zlib.compress(''.join(data))
+ dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
+ 'W': [1, fl2, fl3], 'Length': len(data),
+ 'Filter': LITERALS_FLATE_DECODE[0],
+ 'Root': trailer['Root'],}
+ if 'Info' in trailer:
+ dic['Info'] = trailer['Info']
+ xrefstm = PDFStream(dic, data)
+ self.serialize_indirect(maxobj, xrefstm)
+ self.write('startxref\n%d\n%%%%EOF' % startxref)
+ def write(self, data):
+ self.outf.write(data)
+ self.last = data[-1:]
+
+ def tell(self):
+ return self.outf.tell()
+
+ def escape_string(self, string):
+ string = string.replace('\\', '\\\\')
+ string = string.replace('\n', r'\n')
+ string = string.replace('(', r'\(')
+ string = string.replace(')', r'\)')
+ # get rid of ciando id
+ regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
+ if regularexp.match(string): return ('http://www.ciando.com')
+ return string
+
+ def serialize_object(self, obj):
+ if isinstance(obj, dict):
+ # Correct malformed Mac OS resource forks for Stanza
+ if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
+ and isinstance(obj['Type'], int):
+ obj['Subtype'] = obj['Type']
+ del obj['Type']
+ # end - hope this doesn't have bad effects
+ self.write('<<')
+ for key, val in obj.items():
+ self.write('/%s' % key)
+ self.serialize_object(val)
+ self.write('>>')
+ elif isinstance(obj, list):
+ self.write('[')
+ for val in obj:
+ self.serialize_object(val)
+ self.write(']')
+ elif isinstance(obj, str):
+ self.write('(%s)' % self.escape_string(obj))
+ elif isinstance(obj, bool):
+ if self.last.isalnum():
+ self.write(' ')
+ self.write(str(obj).lower())
+ elif isinstance(obj, (int, long, float)):
+ if self.last.isalnum():
+ self.write(' ')
+ self.write(str(obj))
+ elif isinstance(obj, PDFObjRef):
+ if self.last.isalnum():
+ self.write(' ')
+ self.write('%d %d R' % (obj.objid, 0))
+ elif isinstance(obj, PDFStream):
+ ### If we don't generate cross ref streams the object streams
+ ### are no longer useful, as we have extracted all objects from
+ ### them. Therefore leave them out from the output.
+ if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
+ self.write('(deleted)')
+ else:
+ data = obj.get_decdata()
+ self.serialize_object(obj.dic)
+ self.write('stream\n')
+ self.write(data)
+ self.write('\nendstream')
+ else:
+ data = str(obj)
+ if data[0].isalnum() and self.last.isalnum():
+ self.write(' ')
+ self.write(data)
+
+ def serialize_indirect(self, objid, obj):
+ self.write('%d 0 obj' % (objid,))
+ self.serialize_object(obj)
+ if self.last.isalnum():
+ self.write('\n')
+ self.write('endobj\n')
+
+def plugin_main(keypath, inpath, outpath):
+ with open(inpath, 'rb') as inf:
+ try:
+ serializer = PDFSerializer(inf, keypath)
+ except:
+ print "Error serializing pdf. Probably wrong key."
+ return 1
+ # hope this will fix the 'bad file descriptor' problem
+ with open(outpath, 'wb') as outf:
+ # help construct to make sure the method runs to the end
+ try:
+ serializer.dump(outf)
+ except:
+ print "error writing pdf."
+ return 1
+ return 0
+
+
+from calibre.customize import FileTypePlugin
+
+class IneptPDFDeDRM(FileTypePlugin):
+ name = 'Inept PDF DeDRM'
+ description = 'Removes DRM from secure Adobe pdf files. \
+ Credit given to I <3 Cabbages for the original stand-alone scripts.'
+ supported_platforms = ['linux', 'osx', 'windows']
+ author = 'DiapDealer'
+ version = (0, 1, 1)
+ minimum_calibre_version = (0, 6, 44) # Compiled python libraries cannot be imported in earlier versions.
+ file_types = set(['pdf'])
+ on_import = True
+
+ def run(self, path_to_ebook):
+ global ARC4, RSA, AES
+
+ from calibre.gui2 import is_ok_to_use_qt
+ from PyQt4.Qt import QMessageBox
+ from calibre.constants import iswindows, isosx
+
+ ARC4, RSA, AES = _load_crypto()
+
+ if AES == None or RSA == None or ARC4 == None:
+ # Failed to load libcrypto or PyCrypto... Adobe PDFs can\'t be decrypted.'
+ raise ADEPTError('IneptPDF: Failed to load crypto libs... Adobe PDFs can\'t be decrypted.')
+ return
+
+ # Load any keyfiles (*.der) included Calibre's config directory.
+ userkeys = []
+
+ # Find Calibre's configuration directory.
+ confpath = os.path.split(os.path.split(self.plugin_path)[0])[0]
+ print 'IneptPDF: Calibre configuration directory = %s' % confpath
+ files = os.listdir(confpath)
+ filefilter = re.compile("\.der$", re.IGNORECASE)
+ files = filter(filefilter.search, files)
+
+ if files:
+ try:
+ for filename in files:
+ fpath = os.path.join(confpath, filename)
+ with open(fpath, 'rb') as f:
+ userkeys.append(f.read())
+ print 'IneptPDF: Keyfile %s found in config folder.' % filename
+ except IOError:
+ print 'IneptPDF: Error reading keyfiles from config directory.'
+ pass
+ else:
+ # Try to find key from ADE install and save the key in
+ # Calibre's configuration directory for future use.
+ if iswindows or isosx:
+ # ADE key retrieval script.
+ from ade_key import retrieve_key
+ try:
+ keydata = retrieve_key()
+ userkeys.append(keydata)
+ keypath = os.path.join(confpath, 'calibre-adeptkey.der')
+ with open(keypath, 'wb') as f:
+ f.write(keydata)
+ print 'IneptPDF: Created keyfile from ADE install.'
+ except:
+ print 'IneptPDF: Couldn\'t Retrieve key from ADE install.'
+ pass
+
+ if not userkeys:
+ # No user keys found... bail out.
+ raise ADEPTError('IneptPDF - No keys found. Check keyfile(s)/ADE install')
+ return None
+
+ # Attempt to decrypt pdf with each encryption key found.
+ for userkey in userkeys:
+ # Create a TemporaryPersistent file to work with.
+ of = self.temporary_file('.pdf')
+ kf = self.temporary_file('.der')
+ with open(kf.name, 'wb') as f:
+ f.write(userkey)
+
+ # Give the user keyfile, ebook and TemporaryPersistent file to the plugin_main function.
+ print "Ready to start decrypting."
+ result = plugin_main(kf.name, path_to_ebook, of.name)
+
+ # Decryption was successful return the modified PersistentTemporary
+ # file to Calibre's import process.
+ if result == 0:
+ print 'IneptPDF: Encryption successfully removed.'
+ of.close
+ return of.name
+ break
+ else:
+ print 'IneptPDF: Encryption key invalid... trying others.'
+ of.close()
+
+ # Something went wrong with decryption.
+ # Import the original unmolested pdf.
+ of.close
+ raise ADEPTError('IneptPDF - Ultimately failed to decrypt')
+ return None
from __future__ import with_statement
-__version__ = '1.2'
+__version__ = '1.4'
class Unbuffered:
def __init__(self, stream):
import binascii
import zlib
import re
+import zlib, zipfile, tempfile, shutil
from struct import pack, unpack, unpack_from
-
-#Exception Handling
class DrmException(Exception):
pass
-#
-# crypto digestroutines
-#
-
-import hashlib
-
-def MD5(message):
- ctx = hashlib.md5()
- ctx.update(message)
- return ctx.digest()
-
-def SHA1(message):
- ctx = hashlib.sha1()
- ctx.update(message)
- return ctx.digest()
-
-# determine if we are running as a calibre plugin
if 'calibre' in sys.modules:
inCalibre = True
- global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
else:
inCalibre = False
-#
-# start of Kindle specific routines
-#
-
-if not inCalibre:
- import mobidedrm
- if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
-
-global kindleDatabase
-
-# Encode the bytes in data with the characters in map
-def encode(data, map):
- result = ""
- for char in data:
- value = ord(char)
- Q = (value ^ 0x80) // len(map)
- R = value % len(map)
- result += map[Q]
- result += map[R]
- return result
-
-# Hash the bytes in data and then encode the digest with the characters in map
-def encodeHash(data,map):
- return encode(MD5(data),map)
-
-# Decode the string in data with the characters in map. Returns the decoded bytes
-def decode(data,map):
- result = ""
- for i in range (0,len(data)-1,2):
- high = map.find(data[i])
- low = map.find(data[i+1])
- if (high == -1) or (low == -1) :
- break
- value = (((high * len(map)) ^ 0x80) & 0xFF) + low
- result += pack("B",value)
- return result
-
-
-# Parse the Kindle.info file and return the records as a list of key-values
-def parseKindleInfo(kInfoFile):
- DB = {}
- infoReader = openKindleInfo(kInfoFile)
- infoReader.read(1)
- data = infoReader.read()
- if sys.platform.startswith('win'):
- items = data.split('{')
- else :
- items = data.split('[')
- for item in items:
- splito = item.split(':')
- DB[splito[0]] =splito[1]
- return DB
-
-# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
-def getKindleInfoValueForHash(hashedKey):
- global kindleDatabase
- encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
- if sys.platform.startswith('win'):
- return CryptUnprotectData(encryptedValue,"")
- else:
- cleartext = CryptUnprotectData(encryptedValue)
- return decode(cleartext, charMap1)
-
-# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
-def getKindleInfoValueForKey(key):
- return getKindleInfoValueForHash(encodeHash(key,charMap2))
-
-# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
-def findNameForHash(hash):
- names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
- result = ""
- for name in names:
- if hash == encodeHash(name, charMap2):
- result = name
- break
- return result
-
-# Print all the records from the kindle.info file (option -i)
-def printKindleInfo():
- for record in kindleDatabase:
- name = findNameForHash(record)
- if name != "" :
- print (name)
- print ("--------------------------")
- else :
- print ("Unknown Record")
- print getKindleInfoValueForHash(record)
- print "\n"
-
-#
-# PID generation routines
-#
-
-# Returns two bit at offset from a bit field
-def getTwoBitsFromBitField(bitField,offset):
- byteNumber = offset // 4
- bitPosition = 6 - 2*(offset % 4)
- return ord(bitField[byteNumber]) >> bitPosition & 3
-
-# Returns the six bits at offset from a bit field
-def getSixBitsFromBitField(bitField,offset):
- offset *= 3
- value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
- return value
-
-# 8 bits to six bits encoding from hash to generate PID string
-def encodePID(hash):
- global charMap3
- PID = ""
- for position in range (0,8):
- PID += charMap3[getSixBitsFromBitField(hash,position)]
- return PID
-
-# Encryption table used to generate the device PID
-def generatePidEncryptionTable() :
- table = []
- for counter1 in range (0,0x100):
- value = counter1
- for counter2 in range (0,8):
- if (value & 1 == 0) :
- value = value >> 1
- else :
- value = value >> 1
- value = value ^ 0xEDB88320
- table.append(value)
- return table
-
-# Seed value used to generate the device PID
-def generatePidSeed(table,dsn) :
- value = 0
- for counter in range (0,4) :
- index = (ord(dsn[counter]) ^ value) &0xFF
- value = (value >> 8) ^ table[index]
- return value
-
-# Generate the device PID
-def generateDevicePID(table,dsn,nbRoll):
- seed = generatePidSeed(table,dsn)
- pidAscii = ""
- pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
- index = 0
- for counter in range (0,nbRoll):
- pid[index] = pid[index] ^ ord(dsn[counter])
- index = (index+1) %8
- for counter in range (0,8):
- index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
- pidAscii += charMap4[index]
- return pidAscii
-
-# convert from 8 digit PID to 10 digit PID with checksum
-def checksumPid(s):
- letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
- crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
- crc = crc ^ (crc >> 16)
- res = s
- l = len(letters)
- for i in (0,1):
- b = crc & 0xff
- pos = (b // l) ^ (b % l)
- res += letters[pos%l]
- crc >>= 8
- return res
-
-
-class MobiPeek:
- def loadSection(self, section):
- before, after = self.sections[section:section+2]
- self.f.seek(before)
- return self.f.read(after - before)
- def __init__(self, filename):
- self.f = file(filename, 'rb')
- self.header = self.f.read(78)
- self.ident = self.header[0x3C:0x3C+8]
- if self.ident != 'BOOKMOBI' and self.ident != 'TEXtREAd':
- raise DrmException('invalid file format')
- self.num_sections, = unpack_from('>H', self.header, 76)
- sections = self.f.read(self.num_sections*8)
- self.sections = unpack_from('>%dL' % (self.num_sections*2), sections, 0)[::2] + (0xfffffff, )
- self.sect0 = self.loadSection(0)
- self.f.close()
- def getBookTitle(self):
- # get book title
- toff, tlen = unpack('>II', self.sect0[0x54:0x5c])
- tend = toff + tlen
- title = self.sect0[toff:tend]
- return title
- def getexthData(self):
- # if exth region exists then grab it
- # get length of this header
- length, type, codepage, unique_id, version = unpack('>LLLLL', self.sect0[20:40])
- exth_flag, = unpack('>L', self.sect0[0x80:0x84])
- exth = ''
- if exth_flag & 0x40:
- exth = self.sect0[16 + length:]
- return exth
- def isNotEncrypted(self):
- lock_type, = unpack('>H', self.sect0[0xC:0xC+2])
- if lock_type == 0:
- return True
- return False
-
-# DiapDealer's stuff: Parse the EXTH header records and parse the Kindleinfo
-# file to calculate the book pid.
-def getK4Pids(exth, title, kInfoFile=None):
- global kindleDatabase
- try:
- kindleDatabase = parseKindleInfo(kInfoFile)
- except Exception, message:
- print(message)
-
- if kindleDatabase != None :
- # Get the Mazama Random number
- MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
-
- # Get the HDD serial
- encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
-
- # Get the current user name
- encodedUsername = encodeHash(GetUserName(),charMap1)
-
- # concat, hash and encode to calculate the DSN
- DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
-
- print("\nDSN: " + DSN)
-
- # Compute the device PID (for which I can tell, is used for nothing).
- # But hey, stuff being printed out is apparently cool.
- table = generatePidEncryptionTable()
- devicePID = generateDevicePID(table,DSN,4)
-
- print("Device PID: " + checksumPid(devicePID))
-
- # Compute book PID
- exth_records = {}
- nitems, = unpack('>I', exth[8:12])
- pos = 12
-
- exth_records[209] = None
- # Parse the exth records, storing data indexed by type
- for i in xrange(nitems):
- type, size = unpack('>II', exth[pos: pos + 8])
- content = exth[pos + 8: pos + size]
-
- exth_records[type] = content
- pos += size
-
- # Grab the contents of the type 209 exth record
- if exth_records[209] != None:
- data = exth_records[209]
- else:
- raise DrmException("\nNo EXTH record type 209 - Perhaps not a K4 file?")
-
- # Parse the 209 data to find the the exth record with the token data.
- # The last character of the 209 data points to the record with the token.
- # Always 208 from my experience, but I'll leave the logic in case that changes.
- for i in xrange(len(data)):
- if ord(data[i]) != 0:
- if exth_records[ord(data[i])] != None:
- token = exth_records[ord(data[i])]
-
- # Get the kindle account token
- kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
-
- print("Account Token: " + kindleAccountToken)
-
- pidHash = SHA1(DSN+kindleAccountToken+exth_records[209]+token)
-
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
-
- if exth_records[503] != None:
- print "Pid for " + exth_records[503] + ": " + bookPID
- else:
- print "Pid for " + title + ":" + bookPID
- return bookPID
-
- raise DrmException("\nCould not access K4 data - Perhaps K4 is not installed/configured?")
- return null
+def zipUpDir(myzip, tempdir,localname):
+ currentdir = tempdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tempdir, localfilePath)
def usage(progname):
- print "Removes DRM protection from K4PC, K4M, and Mobi ebooks"
+ print "Removes DRM protection from K4PC/M, Kindle, Mobi and Topaz ebooks"
print "Usage:"
- print " %s [-k <kindle.info>] [-p <pidnums>] <infile> <outfile> " % progname
+ print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
#
# Main
#
def main(argv=sys.argv):
- global kindleDatabase
import mobidedrm
-
+ import topazextract
+ import kgenpids
progname = os.path.basename(argv[0])
+
+ k4 = False
kInfoFiles = []
- pidnums = ""
+ serials = []
+ pids = []
print ('K4MobiDeDrm v%(__version__)s '
'provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc .' % globals())
+ print ' '
try:
- opts, args = getopt.getopt(sys.argv[1:], "k:p:")
+ opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
except getopt.GetoptError, err:
print str(err)
usage(progname)
sys.exit(2)
-
if len(args)<2:
usage(progname)
sys.exit(2)
if o == "-p":
if a == None :
raise DrmException("Invalid parameter for -p")
- pidnums = a
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ raise DrmException("Invalid parameter for -s")
+ serials = a.split(',')
+
+ # try with built in Kindle Info files
+ k4 = True
- kindleDatabase = None
infile = args[0]
- outfile = args[1]
- DecodeErrorString = ""
- try:
- # first try with K4PC/K4M
- ex = MobiPeek(infile)
- if ex.isNotEncrypted():
- print "File was Not Encrypted"
- return 2
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title)
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except DrmException, e:
- DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
- pass
- except mobidedrm.DrmException, e:
- DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
- pass
+ outdir = args[1]
+
+ # handle the obvious cases at the beginning
+ if not os.path.isfile(infile):
+ print "Error: Input file does not exist"
+ return 1
+
+ mobi = True
+ magic3 = file(infile,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+
+ if mobi:
+ mb = mobidedrm.MobiBook(infile)
else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # now try alternate kindle.info files
- if kInfoFiles:
- for infoFile in kInfoFiles:
- kindleDatabase = None
- try:
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title, infoFile)
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except DrmException, e:
- DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
- pass
- except mobidedrm.DrmException, e:
- DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # Lastly, try from the pid list
- pids = pidnums.split(',')
- for pid in pids:
- try:
- print 'Trying: "'+ pid + '"'
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except mobidedrm.DrmException:
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
+ tempdir = tempfile.mkdtemp()
+ mb = topazextract.TopazBook(infile, tempdir)
- # we could not unencrypt book
- print DecodeErrorString
- print "Error: Could Not Unencrypt Book"
- return 1
+ title = mb.getBookTitle()
+ print "Processing Book: ", title
+ # build pid list
+ md1, md2 = mb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+
+ try:
+ if mobi:
+ unlocked_file = mb.processBook(pidlst)
+ else:
+ mb.processBook(pidlst)
+
+ except mobidedrm.DrmException, e:
+ print " ... not suceessful " + str(e) + "\n"
+ return 1
+ except topazextract.TpzDRMError, e:
+ print str(e)
+ print " Creating DeBug Full Zip Archive of Book"
+ zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+ myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ zipUpDir(myzip, tempdir, '')
+ myzip.close()
+ return 1
+
+ if mobi:
+ outfile = os.path.join(outdir,bookname + '_nodrm' + '.azw')
+ file(outfile, 'wb').write(unlocked_file)
+ return 0
+
+ # topaz: build up zip archives of results
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+ myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip1, tempdir, 'img')
+ myzip1.close()
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(myzip2, tempdir, 'svg')
+ zipUpDir(myzip2, tempdir, 'img')
+ myzip2.close()
+
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(tempdir,'xml')
+ zipUpDir(myzip3, targetdir, '')
+ zipUpDir(myzip3, tempdir, 'img')
+ myzip3.close()
+
+ shutil.rmtree(tempdir)
+ return 0
if __name__ == '__main__':
sys.stdout=Unbuffered(sys.stdout)
sys.exit(main())
-
if not __name__ == "__main__" and inCalibre:
from calibre.customize import FileTypePlugin
class K4DeDRM(FileTypePlugin):
name = 'K4PC, K4Mac, Mobi DeDRM' # Name of the plugin
- description = 'Removes DRM from K4PC, K4Mac, and Mobi files. \
+ description = 'Removes DRM from K4PC and Mac, Kindle Mobi and Topaz files. \
Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
author = 'DiapDealer, SomeUpdates' # The author of this plugin
- version = (0, 1, 4) # The version number of this plugin
- file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
+ version = (0, 1, 7) # The version number of this plugin
+ file_types = set(['prc','mobi','azw','azw1','tpz']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
- priority = 200 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
+ priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
def run(self, path_to_ebook):
from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
-
- # Head Topaz files off at the pass and warn the user that they will NOT
- # be decrypted. Changes the file extension from .azw or .prc to .tpz so
- # Calibre can at least read the metadata properly and the user can find
- # them by sorting on 'format'.
- with open(path_to_ebook, 'rb') as f:
- raw = f.read()
- if raw.startswith('TPZ'):
- tf = self.temporary_file('.tpz')
- if is_ok_to_use_qt():
- d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "%s is a Topaz book. It will NOT be decrypted!" % path_to_ebook)
- d.show()
- d.raise_()
- d.exec_()
- tf.write(raw)
- tf.close
- return tf.name
-
- global kindleDatabase
- global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
+ from calibre.ptempfile import PersistentTemporaryDirectory
+
+ import kgenpids
+ import zlib
+ import zipfile
+ import topazextract
import mobidedrm
+ k4 = True
+ pids = []
+ serials = []
+ kInfoFiles = []
+
# Get supplied list of PIDs to try from plugin customization.
- pidnums = self.site_customization
-
+ customvalues = self.site_customization.split(',')
+ for customvalue in customvalues:
+ customvalue = str(customvalue)
+ customvalue = customvalue.strip()
+ if len(customvalue) == 10 or len(customvalue) == 8:
+ pids.append(customvalue)
+ else :
+ if len(customvalue) == 16 and customvalue[0] == 'B':
+ serials.append(customvalue)
+ else:
+ print "%s is not a valid Kindle serial number or PID." % str(customvalue)
+
# Load any kindle info files (*.info) included Calibre's config directory.
- kInfoFiles = []
try:
# Find Calibre's configuration directory.
confpath = os.path.split(os.path.split(self.plugin_path)[0])[0]
print 'K4MobiDeDRM: Error reading kindle info files from config directory.'
pass
- # first try with book specifc pid from K4PC or K4M
+
+ mobi = True
+ magic3 = file(path_to_ebook,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ bookname = os.path.splitext(os.path.basename(path_to_ebook))[0]
+
+ if mobi:
+ mb = mobidedrm.MobiBook(path_to_ebook)
+ else:
+ tempdir = PersistentTemporaryDirectory()
+ mb = topazextract.TopazBook(path_to_ebook, tempdir)
+
+ title = mb.getBookTitle()
+ md1, md2 = mb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+
try:
- kindleDatabase = None
- ex = MobiPeek(path_to_ebook)
- if ex.isNotEncrypted():
- return path_to_ebook
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title)
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
- except DrmException:
- pass
+ if mobi:
+ unlocked_file = mb.processBook(pidlst)
+ else:
+ mb.processBook(pidlst)
+
except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
+ #if you reached here then no luck raise and exception
+ if is_ok_to_use_qt():
+ d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
+ d.show()
+ d.raise_()
+ d.exec_()
+ raise Exception("K4MobiDeDRM plugin could not decode the file")
+ return ""
+ except topazextract.TpzDRMError:
+ #if you reached here then no luck raise and exception
+ if is_ok_to_use_qt():
+ d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
+ d.show()
+ d.raise_()
+ d.exec_()
+ raise Exception("K4MobiDeDRM plugin could not decode the file")
+ return ""
+
+ print "Success!"
+ if mobi:
+ of = self.temporary_file(bookname+'.mobi')
of.write(unlocked_file)
of.close()
return of.name
-
- # Now try alternate kindle info files
- if kInfoFiles:
- for infoFile in kInfoFiles:
- kindleDatabase = None
- try:
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title, infoFile)
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
- except DrmException:
- pass
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- # now try from the pid list
- pids = pidnums.split(',')
- for pid in pids:
- try:
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook, pid)
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- #if you reached here then no luck raise and exception
- if is_ok_to_use_qt():
- d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
- d.show()
- d.raise_()
- d.exec_()
- raise Exception("K4MobiDeDRM plugin could not decode the file")
- return ""
+
+ # topaz: build up zip archives of results
+ print " Creating HTML ZIP Archive"
+ of = self.temporary_file(bookname + '.zip')
+ myzip = zipfile.ZipFile(of.name,'w',zipfile.ZIP_DEFLATED, False)
+ myzip.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip, tempdir, 'img')
+ myzip.close()
+ return of.name
def customization_help(self, gui=False):
- return 'Enter each 10 character PID separated by a comma (no spaces).'
+ return 'Enter 10 character PIDs and/or Kindle serial numbers, separated by commas.'
# standlone set of Mac OSX specific routines needed for K4DeDRM
from __future__ import with_statement
-
import sys
import os
-
-#Exception Handling
-class K4MDrmException(Exception):
- pass
-
-import signal
-import threading
import subprocess
-from subprocess import Popen, PIPE, STDOUT
-
-# **heavily** chopped up and modfied version of asyncproc.py
-# to make it actually work on Windows as well as Mac/Linux
-# For the original see:
-# "http://www.lysator.liu.se/~bellman/download/"
-# author is "Thomas Bellman <bellman@lysator.liu.se>"
-# available under GPL version 3 or Later
-
-# create an asynchronous subprocess whose output can be collected in
-# a non-blocking manner
-
-# What a mess! Have to use threads just to get non-blocking io
-# in a cross-platform manner
-
-# luckily all thread use is hidden within this class
-
-class Process(object):
- def __init__(self, *params, **kwparams):
- if len(params) <= 3:
- kwparams.setdefault('stdin', subprocess.PIPE)
- if len(params) <= 4:
- kwparams.setdefault('stdout', subprocess.PIPE)
- if len(params) <= 5:
- kwparams.setdefault('stderr', subprocess.PIPE)
- self.__pending_input = []
- self.__collected_outdata = []
- self.__collected_errdata = []
- self.__exitstatus = None
- self.__lock = threading.Lock()
- self.__inputsem = threading.Semaphore(0)
- self.__quit = False
-
- self.__process = subprocess.Popen(*params, **kwparams)
- if self.__process.stdin:
- self.__stdin_thread = threading.Thread(
- name="stdin-thread",
- target=self.__feeder, args=(self.__pending_input,
- self.__process.stdin))
- self.__stdin_thread.setDaemon(True)
- self.__stdin_thread.start()
-
- if self.__process.stdout:
- self.__stdout_thread = threading.Thread(
- name="stdout-thread",
- target=self.__reader, args=(self.__collected_outdata,
- self.__process.stdout))
- self.__stdout_thread.setDaemon(True)
- self.__stdout_thread.start()
-
- if self.__process.stderr:
- self.__stderr_thread = threading.Thread(
- name="stderr-thread",
- target=self.__reader, args=(self.__collected_errdata,
- self.__process.stderr))
- self.__stderr_thread.setDaemon(True)
- self.__stderr_thread.start()
-
- def pid(self):
- return self.__process.pid
-
- def kill(self, signal):
- self.__process.send_signal(signal)
-
- # check on subprocess (pass in 'nowait') to act like poll
- def wait(self, flag):
- if flag.lower() == 'nowait':
- rc = self.__process.poll()
- else:
- rc = self.__process.wait()
- if rc != None:
- if self.__process.stdin:
- self.closeinput()
- if self.__process.stdout:
- self.__stdout_thread.join()
- if self.__process.stderr:
- self.__stderr_thread.join()
- return self.__process.returncode
-
- def terminate(self):
- if self.__process.stdin:
- self.closeinput()
- self.__process.terminate()
-
- # thread gets data from subprocess stdout
- def __reader(self, collector, source):
- while True:
- data = os.read(source.fileno(), 65536)
- self.__lock.acquire()
- collector.append(data)
- self.__lock.release()
- if data == "":
- source.close()
- break
- return
-
- # thread feeds data to subprocess stdin
- def __feeder(self, pending, drain):
- while True:
- self.__inputsem.acquire()
- self.__lock.acquire()
- if not pending and self.__quit:
- drain.close()
- self.__lock.release()
- break
- data = pending.pop(0)
- self.__lock.release()
- drain.write(data)
- # non-blocking read of data from subprocess stdout
- def read(self):
- self.__lock.acquire()
- outdata = "".join(self.__collected_outdata)
- del self.__collected_outdata[:]
- self.__lock.release()
- return outdata
-
- # non-blocking read of data from subprocess stderr
- def readerr(self):
- self.__lock.acquire()
- errdata = "".join(self.__collected_errdata)
- del self.__collected_errdata[:]
- self.__lock.release()
- return errdata
-
- # non-blocking write to stdin of subprocess
- def write(self, data):
- if self.__process.stdin is None:
- raise ValueError("Writing to process with stdin not a pipe")
- self.__lock.acquire()
- self.__pending_input.append(data)
- self.__inputsem.release()
- self.__lock.release()
-
- # close stdinput of subprocess
- def closeinput(self):
- self.__lock.acquire()
- self.__quit = True
- self.__inputsem.release()
- self.__lock.release()
+class K4MDrmException(Exception):
+ pass
# interface to needed routines in openssl's libcrypto
# Utility Routines
#
+
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+
+
# uses a sub process to get the Hard Drive Serial Number using ioreg
# returns with the serial number of drive whose BSD Name is "disk0"
def GetVolumeSerialNumber():
return sernum
cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p.wait('wait')
- results = p.read()
- reslst = results.split('\n')
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
cnt = len(reslst)
bsdname = None
sernum = None
username = os.getenv('USER')
return username
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
-charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
def encode(data, map):
result = ""
cleartext = crp.decrypt(encryptedData)
return cleartext
+
# Locate and open the .kindle-info file
def openKindleInfo(kInfoFile=None):
if kInfoFile == None:
home = os.getenv('HOME')
cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p1.wait('wait')
- results = p1.read()
- reslst = results.split('\n')
+ p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p1.communicate()
+ reslst = out1.split('\n')
kinfopath = 'NONE'
cnt = len(reslst)
for j in xrange(cnt):
crypt32 = windll.crypt32
-#
# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-#
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
-#
-# Exceptions for all the problems that might happen during the script
-#
class DrmException(Exception):
pass
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
-# files reveals that a confusin has arisen because trailing data entries
+# files reveals that a confusion has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# Removed the disabled Calibre plug-in code
# Permit use of 8-digit PIDs
# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
-# 0.20 - Corretion: It seems that multibyte entries are encrypted in a v6 file.
+# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
+# 0.21 - Added support for multiple pids
+# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface
-__version__ = '0.20'
+__version__ = '0.22'
import sys
-import struct
-import binascii
class Unbuffered:
def __init__(self, stream):
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
+sys.stdout=Unbuffered(sys.stdout)
+
+import struct
+import binascii
class DrmException(Exception):
pass
+
+#
+# MobiBook Utility Routines
+#
+
# Implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
wkey = []
for i in xrange(8):
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
-
dst = ""
for i in xrange(len(src)):
temp1 = 0;
num += (ord(ptr[size - num - 1]) & 0x3) + 1
return num
-class DrmStripper:
+
+
+class MobiBook:
def loadSection(self, section):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
off = self.sections[section][0]
return self.data_file[off:endoff]
+ def __init__(self, infile):
+ # initial sanity check on file
+ self.data_file = file(infile, 'rb').read()
+ self.header = self.data_file[0:78]
+ if self.header[0x3C:0x3C+8] != 'BOOKMOBI':
+ raise DrmException("invalid file format")
+
+ # build up section offset and flag info
+ self.num_sections, = struct.unpack('>H', self.header[76:78])
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+
+ # parse information from section 0
+ self.sect = self.loadSection(0)
+ self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
+ self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
+ self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
+ print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length)
+ self.extra_data_flags = 0
+ if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
+ self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
+ print "Extra Data Flags = %d" % self.extra_data_flags
+ if self.mobi_version < 7:
+ # multibyte utf8 data is included in the encryption for mobi_version 6 and below
+ # so clear that byte so that we leave it to be decrypted.
+ self.extra_data_flags &= 0xFFFE
+
+ # if exth region exists parse it for metadata array
+ self.meta_array = {}
+ exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
+ exth = ''
+ if exth_flag & 0x40:
+ exth = self.sect[16 + self.mobi_length:]
+ nitems, = struct.unpack('>I', exth[8:12])
+ pos = 12
+ for i in xrange(nitems):
+ type, size = struct.unpack('>II', exth[pos: pos + 8])
+ content = exth[pos + 8: pos + size]
+ self.meta_array[type] = content
+ pos += size
+
+ def getBookTitle(self):
+ title = ''
+ if 503 in self.meta_array:
+ title = self.meta_array[503]
+ else :
+ toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
+ tend = toff + tlen
+ title = self.sect[toff:tend]
+ if title == '':
+ title = self.header[:32]
+ title = title.split("\0")[0]
+ return title
+
+ def getPIDMetaInfo(self):
+ rec209 = None
+ token = None
+ if 209 in self.meta_array:
+ rec209 = self.meta_array[209]
+ data = rec209
+ # Parse the 209 data to find the the exth record with the token data.
+ # The last character of the 209 data points to the record with the token.
+ # Always 208 from my experience, but I'll leave the logic in case that changes.
+ for i in xrange(len(data)):
+ if ord(data[i]) != 0:
+ if self.meta_array[ord(data[i])] != None:
+ token = self.meta_array[ord(data[i])]
+ return rec209, token
+
def patch(self, off, new):
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
assert off + in_off + len(new) <= endoff
self.patch(off + in_off, new)
- def parseDRM(self, data, count, pid):
- pid = pid.ljust(16,'\0')
- keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
- temp_key = PC1(keyvec1, pid, False)
- temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ def parseDRM(self, data, count, pidlist):
found_key = None
- for i in xrange(count):
- verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
- found_key = finalkey
+ keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
+ for pid in pidlist:
+ bigpid = pid.ljust(16,'\0')
+ temp_key = PC1(keyvec1, bigpid, False)
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ found_key = None
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver and (flags & 0x1F) == 1:
+ found_key = finalkey
+ break
+ if found_key != None:
break
if not found_key:
# Then try the default encoding that doesn't require a PID
+ pid = "00000000"
temp_key = keyvec1
temp_key_sum = sum(map(ord,temp_key)) & 0xff
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum:
- found_key = finalkey
- break
- return found_key
-
- def __init__(self, data_file, pid):
- if len(pid)==10:
- if checksumPid(pid[0:-2]) != pid:
- raise DrmException("invalid PID checksum")
- pid = pid[0:-2]
- elif len(pid)==8:
- print "PID without checksum given. With checksum PID is "+checksumPid(pid)
- else:
- raise DrmException("Invalid PID length")
-
- self.data_file = data_file
- header = data_file[0:72]
- if header[0x3C:0x3C+8] != 'BOOKMOBI':
- raise DrmException("invalid file format")
- self.num_sections, = struct.unpack('>H', data_file[76:78])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver:
+ found_key = finalkey
+ break
+ return [found_key,pid]
- self.sections = []
- for i in xrange(self.num_sections):
- offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
- flags, val = a1, a2<<16|a3<<8|a4
- self.sections.append( (offset, flags, val) )
-
- sect = self.loadSection(0)
- records, = struct.unpack('>H', sect[0x8:0x8+2])
- mobi_length, = struct.unpack('>L',sect[0x14:0x18])
- mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
- extra_data_flags = 0
- print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
- if (mobi_length >= 0xE4) and (mobi_version >= 5):
- extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
- print "Extra Data Flags = %d" %extra_data_flags
- if mobi_version < 7:
- # multibyte utf8 data is included in the encryption for mobi_version 6 and below
- # so clear that byte so that we leave it to be decrypted.
- extra_data_flags &= 0xFFFE
-
- crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
+ def processBook(self, pidlist):
+ crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
if crypto_type == 0:
print "This book is not encrypted."
+ return self.data_file
+ if crypto_type == 1:
+ raise DrmException("Cannot decode Mobipocket encryption type 1")
+ if crypto_type != 2:
+ raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
+
+ goodpids = []
+ for pid in pidlist:
+ if len(pid)==10:
+ if checksumPid(pid[0:-2]) != pid:
+ print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
+ goodpids.append(pid[0:-2])
+ elif len(pid)==8:
+ goodpids.append(pid)
+
+ # calculate the keys
+ drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
+ if drm_count == 0:
+ raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
+ found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
+ if not found_key:
+ raise DrmException("No key found. Most likely the correct PID has not been given.")
+
+ if pid=="00000000":
+ print "File has default encryption, no specific PID."
else:
- if crypto_type == 1:
- raise DrmException("cannot decode Mobipocket encryption type 1")
- if crypto_type != 2:
- raise DrmException("unknown encryption type: %d" % crypto_type)
-
- # calculate the keys
- drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
- if drm_count == 0:
- raise DrmException("no PIDs found in this file")
- found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
- if not found_key:
- raise DrmException("no key found. maybe the PID is incorrect")
-
- # kill the drm keys
- self.patchSection(0, "\0" * drm_size, drm_ptr)
- # kill the drm pointers
- self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
- # clear the crypto type
- self.patchSection(0, "\0" * 2, 0xC)
-
- # decrypt sections
- print "Decrypting. Please wait . . .",
- new_data = self.data_file[:self.sections[1][0]]
- for i in xrange(1, records+1):
- data = self.loadSection(i)
- extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
- if i%100 == 0:
- print ".",
- # print "record %d, extra_size %d" %(i,extra_size)
- new_data += PC1(found_key, data[0:len(data) - extra_size])
- if extra_size > 0:
- new_data += data[-extra_size:]
- #self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
- if self.num_sections > records+1:
- new_data += self.data_file[self.sections[records+1][0]:]
- self.data_file = new_data
- print "done"
-
- def getResult(self):
+ print "File is encoded with PID "+checksumPid(pid)+"."
+
+ # kill the drm keys
+ self.patchSection(0, "\0" * drm_size, drm_ptr)
+ # kill the drm pointers
+ self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
+ # clear the crypto type
+ self.patchSection(0, "\0" * 2, 0xC)
+
+ # decrypt sections
+ print "Decrypting. Please wait . . .",
+ new_data = self.data_file[:self.sections[1][0]]
+ for i in xrange(1, self.records+1):
+ data = self.loadSection(i)
+ extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
+ if i%100 == 0:
+ print ".",
+ # print "record %d, extra_size %d" %(i,extra_size)
+ new_data += PC1(found_key, data[0:len(data) - extra_size])
+ if extra_size > 0:
+ new_data += data[-extra_size:]
+ if self.num_sections > self.records+1:
+ new_data += self.data_file[self.sections[self.records+1][0]:]
+ self.data_file = new_data
+ print "done"
return self.data_file
def getUnencryptedBook(infile,pid):
- sys.stdout=Unbuffered(sys.stdout)
- data_file = file(infile, 'rb').read()
- strippedFile = DrmStripper(data_file, pid)
- return strippedFile.getResult()
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ return book.processBook([pid])
+
+def getUnencryptedBookWithList(infile,pidlist):
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ return book.processBook(pidlist)
def main(argv=sys.argv):
- sys.stdout=Unbuffered(sys.stdout)
print ('MobiDeDrm v%(__version__)s. '
'Copyright 2008-2010 The Dark Reverser.' % globals())
if len(argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
- print " %s <infile> <outfile> <PID>" % sys.argv[0]
+ print " %s <infile> <outfile> <Comma separated list of PIDs to try>" % sys.argv[0]
return 1
else:
infile = argv[1]
outfile = argv[2]
- pid = argv[3]
+ pidlist = argv[3].split(',')
try:
- stripped_file = getUnencryptedBook(infile, pid)
+ stripped_file = getUnencryptedBookWithList(infile, pidlist)
file(outfile, 'wb').write(stripped_file)
except DrmException, e:
print "Error: %s" % e
property prefsFileName : "com.apprenticealf.dedrm.plist"
property prefsFolderName : "com.apprenticealf.dedrm"
-property handledExtensions : {"epub", "pdf", "prc", "azw", "mobi", "pdb", "der", "b64"}
+property handledExtensions : {"epub", "pdf", "prc", "azw", "azw1", "mobi", "pdb", "der", "b64"}
global eReaderTool
global MobipocketTool
global PIDs
global bnKeys
global KindleInfoList
+global KindleSerialList
global AdeptKeyList
global ErrorList
set TOPAZ to read file encryptedFile from 1 for 4
end try
set ErrorCount to ErrorCount + 1
- if TOPAZ is "TPZ0" then
- set ErrorList to ErrorList & encryptedFile & " is a TOPAZ file.
-
-"
- else
- set ErrorList to ErrorList & encryptedFile & " is not a Mobipocket file.
+ if TOPAZ is not "TPZ0" then
+ set ErrorList to ErrorList & encryptedFile & " is neither a Mobipocket nor a TOPAZ file.
"
+ return
end if
- return
end if
set encryptedFilePath to POSIX path of file encryptedFile
tell application "Finder"
set fileExtension to "." & the last text item of fileName
set fileName to (text items 1 through -2 of fileName) as string
end if
- set unlockedFilePath to POSIX path of file (parent_folder & fileName & "_dedrmed" & fileExtension)
+ set unlockedFileParentFolderPath to POSIX path of file parent_folder
set shellcommand to "python " & (quoted form of MobipocketTool)
repeat with KindleInfoPath in KindleInfoList
set shellcommand to shellcommand & " -k " & quoted form of KindleInfoPath
end repeat
+ set Serialstring to GetSerialstring()
+ if Serialstring is not "" then set shellcommand to shellcommand & " -s " & Serialstring
set PIDstring to GetPIDstring()
if PIDstring is not "" then set shellcommand to shellcommand & " -p " & PIDstring
- set shellcommand to shellcommand & " " & (quoted form of encryptedFilePath) & " " & (quoted form of unlockedFilePath)
+ set shellcommand to shellcommand & " " & (quoted form of encryptedFilePath) & " " & (quoted form of unlockedFileParentFolderPath)
--display dialog "shellcommand: " default answer shellcommand buttons {"OK"} default button 1 giving up after 10
try
set shellresult to do shell script shellcommand
set fileExtension to the last text item of fileName
set fileName to (text items 1 through -2 of fileName) as string
end if
- if fileExtension is "prc" or fileExtension is "mobi" or fileExtension is "azw" then
+ if fileExtension is "prc" or fileExtension is "mobi" or fileExtension is "azw" or fileExtension is "azw1" then
set completedebooks to completedebooks + 1
IncProgress(fileName, completedebooks)
unlockmobifile(droppedFile as text)
end if
end countfile
+on GetSerialstring()
+ set Serialstring to ""
+ repeat with Serial in KindleSerialList
+ if Serialstring is "" then
+ set Serialstring to Serial
+ else
+ set Serialstring to Serialstring & "," & Serial
+ end if
+ end repeat
+ return Serialstring
+end GetSerialstring
+
on GetPIDstring()
set PIDstring to ""
repeat with PID in PIDs
Enter any additional Mobipocket PIDs for your Mobipocket books one at a time:"
set FinishedButton to "No More"
end if
- set dialogresult to (display dialog DialogPrompt default answer "" buttons {"Delete All", "Add", FinishedButton} with title "DeDRM Applescript 2/5" default button 2)
+ set dialogresult to (display dialog DialogPrompt default answer "" buttons {"Delete All", "Add", FinishedButton} with title "DeDRM Applescript 2/6" default button 2)
if button returned of dialogresult is "Add" then
set PID to text returned of dialogresult
set PIDlength to length of PID
end repeat
end GetPIDs
+on GetSerials()
+ repeat
+ set Serialstring to GetSerialstring()
+ if Serialstring is "" then
+ set DialogPrompt to "Enter any Kindle Serial Numbers one at a time:"
+ set FinishedButton to "None"
+ else
+ set DialogPrompt to "Current Kindle Serial Numbers: " & Serialstring & ".
+
+Enter any additional Kindle Serial Numbers one at a time:"
+ set FinishedButton to "No More"
+ end if
+ set dialogresult to (display dialog DialogPrompt default answer "" buttons {"Delete All", "Add", FinishedButton} with title "DeDRM Applescript 3/6" default button 2)
+ if button returned of dialogresult is "Add" then
+ set Serial to text returned of dialogresult
+ set Seriallength to length of Serial
+ if Seriallength is 16 and (first character of Serial) is "B" then
+ set KindleSerialList to KindleSerialList & Serial
+ else
+ display dialog "Kindle Serial Numbers are 16 characters long and start with B." buttons {"OK"} default button 1 with title "DeDRM Applescript" with icon caution
+ end if
+ else if button returned of dialogresult is "Delete All" then
+ if Serialstring is not "" then
+ try
+ set dialogresult to (display dialog "Are you sure you want to delete all stored Kindle Serial Numbers?" buttons {"Cancel", "Delete"} default button 1 with title "DeDRM Applescript")
+ end try
+ if button returned of dialogresult is "Delete" then
+ set Serialstring to {}
+ end if
+ end if
+ else
+ exit repeat
+ end if
+ end repeat
+end GetSerials
+
on GetKindleInfoFiles()
repeat
set KInfostring to GetKindleInfostring()
try
tell me to activate
if (always) then
- set promptstring to "DeDRM Applescript 5/5
+ set promptstring to "DeDRM Applescript 6/6
"
else
set promptstring to "DeDRM Applescript
set DialogPrompt to DialogPrompt & "eReader/Barnes & Noble Name,Number key pairs one at a time. If you're only decoding eReader files, the last 8 digits of the Number will do. The full 15 or 16 are only needed for Barnes & Noble ePubs. Only the last eight will be stored or displayed. Please separate the name and number with a comma and click \"Add\". Or to add a an already generated .b64 file, just click \"Add\" with nothing in the text field."
set dialogtitle to "DeDRM Applescript"
if (running) then
- set dialogtitle to dialogtitle & " 3/5"
+ set dialogtitle to dialogtitle & " 4/6"
end if
set dialogresult to (display dialog DialogPrompt default answer bnKeyText buttons {"Delete All", "Add", FinishedButton} with title dialogtitle default button 2)
if button returned of dialogresult is "Add" then
To add extra key files (.der), click the AddÉ button."
set FinishedButton to "No More"
end if
- set dialogresult to (display dialog DialogPrompt buttons {"Forget All", "AddÉ", FinishedButton} with title "DeDRM Applescript 4/5" default button 2)
+ set dialogresult to (display dialog DialogPrompt buttons {"Forget All", "AddÉ", FinishedButton} with title "DeDRM Applescript 5/6" default button 2)
if button returned of dialogresult is "AddÉ" then
try
set newFile to (choose file with prompt "Please select an Adept key file") as text
set PIDs to {}
set bnKeys to {}
set KindleInfoList to {}
+ set KindleSerialList to {}
set AdeptKeyList to {}
if fileexists(POSIX path of file preferencesFilePath) then
tell application "System Events"
try
set PIDs to value of property list item "PIDs" of property list file preferencesFilePath
end try
+ try
+ set KindleSerialList to value of property list item "KindleSerials" of property list file preferencesFilePath
+ end try
+ try
+ set KindleInfoList to value of property list item "KindleInfoFiles" of property list file preferencesFilePath
+ end try
try
set bnKeys to value of property list item "bnKeys" of property list file preferencesFilePath
end try
set the base_dict to make new property list item with properties {kind:record}
set myPrefs to make new property list file with properties {contents:base_dict, name:preferencesFilePath}
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"PIDs", value:PIDs}
+ make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"KindleSerials", value:KindleSerialList}
+ make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"KindleInfoFiles", value:KindleInfoList}
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"bnKeys", value:bnKeys}
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"AdeptKeys", value:AdeptKeyList}
make new property list item at end of property list items of contents of myPrefs with properties {kind:string, name:"IneptPDF", value:AdobePDFTool}
if GetTools() then
display dialog "Drag&Drop encrypted ebooks onto this AppleScript's icon in the Finder to decode them after you have finished configuring it and it has quit.
-Click the Continue button to enter any PIDs for Mobipocket/Kindle ebooks, to enter name/number key pairs for Barnes & Noble/eReader ebooks, and to select Barnes & Noble .b64 key files and Adobe Adept .der key files.
+Click the Continue button to enter any PIDs for Mobipocket ebooks; serial numbers for Kindle ebooks; name,number key pairs for Barnes & Noble/eReader ebooks; to select extra Barnes & Noble .b64 key files; to select extra Adobe Adept .der key files; and to find th optional ineptpdf.pyw script.
-***You do not need to enter any extra info if decoding ebooks downloaded to your installation of Kindle for Mac, or Adobe Digital Editions. If you do not have any PIDS, name/number keys or .b64 or .der files to add, just click the Cancel button.***
+***You do not need to enter any extra info if decoding ebooks downloaded to your installation of Kindle for Mac, or Adobe Digital Editions. If you do not have any PIDS; serial numbers; name,number keys, .b64 or .der files to add or want to decode PDF files, just click the Cancel button.***
Please only use to get access to your own books. Authors, publishers and ebook stores need to make money to produce more ebooks. Don't cheat them.
-This AppleScript is by Apprentice Alf and uses python scripts produced by CMBDTC, IHeartCabbages, DarkReverser, DiapDealer, some_updates, ApprenticeAlf and others.
+This AppleScript is by Apprentice Alf and uses python scripts produced by CMBDTC, IHeartCabbages, DarkReverser, DiapDealer, some_updates, Apprentice Alf and others.
This AppleScript (but not necessarily the enclosed python scripts) is free and unencumbered software released into the public domain.
For more information, please refer to
<http://unlicense.org/>
-" with title "DeDRM Applescript 1/5" buttons {"Cancel", "Continue"} default button 2
+" with title "DeDRM Applescript 1/6" buttons {"Cancel", "Continue"} default button 2
ReadPrefs()
+ GetAdeptKey(true)
+
GetPIDs()
+ GetSerials()
GetKeys(true)
- GetAdeptKey(true)
GetAdeptKeyFiles()
GetIneptPDF(true)
--GetKindleInfoFiles()
<key>CFBundleExecutable</key>
<string>droplet</string>
<key>CFBundleGetInfoString</key>
- <string>DeDRM 1.3, Copyright © 2010 by Apprentice Alf.</string>
+ <string>DeDRM 1.4, Copyright © 2010 by Apprentice Alf.</string>
<key>CFBundleIconFile</key>
<string>droplet</string>
<key>CFBundleInfoDictionaryVersion</key>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
- <string>1.3</string>
- <key>CFBundleSignature</key>
- <string>dplt</string>
+ <string>1.4</string>
<key>LSMinimumSystemVersion</key>
<string>10.5.0</string>
+ <key>CFBundleSignature</key>
+ <string>dplt</string>
<key>LSRequiresCarbon</key>
<true/>
<key>WindowState</key>
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# Get a 7 bit encoded number from string. The most
+# significant byte comes first and has the high bit (8th) set
+
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+
+ if flag:
+ data = -data
+ return data
+
+
+# returns a binary string that encodes a number into 7 bits
+# most significant byte first which has the high bit set
+
+def encodeNumber(number):
+ result = ""
+ negative = False
+ flag = 0
+
+ if number < 0 :
+ number = -number + 1
+ negative = True
+
+ while True:
+ byte = number & 0x7F
+ number = number >> 7
+ byte += flag
+ result += chr(byte)
+ flag = 0x80
+ if number == 0 :
+ if (byte == 0xFF and negative == False) :
+ result += chr(0x80)
+ break
+
+ if negative:
+ result += chr(0xFF)
+
+ return result[::-1]
+
+
+
+# create / read a length prefixed string from the file
+
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return ""
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+
+# convert a binary string generated by encodeNumber (7 bit encoded number)
+# to the value you would find inside the page*.dat files to be processed
+
+def convert(i):
+ result = ''
+ val = encodeNumber(i)
+ for j in xrange(len(val)):
+ c = ord(val[j:j+1])
+ result += '%02x' % c
+ return result
+
+
+
+# the complete string table used to store all book text content
+# as well as the xml tokens and values that make sense out of it
+
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+
+ def escapestr(self, str):
+ str = str.replace('&','&')
+ str = str.replace('<','<')
+ str = str.replace('>','>')
+ str = str.replace('=','=')
+ return str
+
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
+ else:
+ print "Error - %d outside of string table limits" % val
+ sys.exit(-1)
+
+ def getSize(self):
+ return self.size
+
+ def getPos(self):
+ return self.pos
+
+ def dumpDict(self):
+ for i in xrange(self.size):
+ print "%d %s %s" % (i, convert(i), self.stable[i])
+ return
+
+# parses the xml snippets that are represented by each page*.dat file.
+# also parses the other0.dat file - the main stylesheet
+# and information used to inject the xml snippets into page*.dat files
+
+class PageParser(object):
+ def __init__(self, filename, dict, debug, flat_xml):
+ self.fo = file(filename,'rb')
+ self.id = os.path.basename(filename).replace('.dat','')
+ self.dict = dict
+ self.debug = debug
+ self.flat_xml = flat_xml
+ self.tagpath = []
+ self.doc = []
+ self.snippetList = []
+
+
+ # hash table used to enable the decoding process
+ # This has all been developed by trial and error so it may still have omissions or
+ # contain errors
+ # Format:
+ # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
+
+ token_tags = {
+ 'x' : (1, 'scalar_number', 0, 0),
+ 'y' : (1, 'scalar_number', 0, 0),
+ 'h' : (1, 'scalar_number', 0, 0),
+ 'w' : (1, 'scalar_number', 0, 0),
+ 'firstWord' : (1, 'scalar_number', 0, 0),
+ 'lastWord' : (1, 'scalar_number', 0, 0),
+ 'rootID' : (1, 'scalar_number', 0, 0),
+ 'stemID' : (1, 'scalar_number', 0, 0),
+ 'type' : (1, 'scalar_text', 0, 0),
+
+ 'info' : (0, 'number', 1, 0),
+
+ 'info.word' : (0, 'number', 1, 1),
+ 'info.word.ocrText' : (1, 'text', 0, 0),
+ 'info.word.firstGlyph' : (1, 'raw', 0, 0),
+ 'info.word.lastGlyph' : (1, 'raw', 0, 0),
+ 'info.word.bl' : (1, 'raw', 0, 0),
+ 'info.word.link_id' : (1, 'number', 0, 0),
+
+ 'glyph' : (0, 'number', 1, 1),
+ 'glyph.x' : (1, 'number', 0, 0),
+ 'glyph.y' : (1, 'number', 0, 0),
+ 'glyph.glyphID' : (1, 'number', 0, 0),
+
+ 'dehyphen' : (0, 'number', 1, 1),
+ 'dehyphen.rootID' : (1, 'number', 0, 0),
+ 'dehyphen.stemID' : (1, 'number', 0, 0),
+ 'dehyphen.stemPage' : (1, 'number', 0, 0),
+ 'dehyphen.sh' : (1, 'number', 0, 0),
+
+ 'links' : (0, 'number', 1, 1),
+ 'links.page' : (1, 'number', 0, 0),
+ 'links.rel' : (1, 'number', 0, 0),
+ 'links.row' : (1, 'number', 0, 0),
+ 'links.title' : (1, 'text', 0, 0),
+ 'links.href' : (1, 'text', 0, 0),
+ 'links.type' : (1, 'text', 0, 0),
+
+ 'paraCont' : (0, 'number', 1, 1),
+ 'paraCont.rootID' : (1, 'number', 0, 0),
+ 'paraCont.stemID' : (1, 'number', 0, 0),
+ 'paraCont.stemPage' : (1, 'number', 0, 0),
+
+ 'paraStems' : (0, 'number', 1, 1),
+ 'paraStems.stemID' : (1, 'number', 0, 0),
+
+ 'wordStems' : (0, 'number', 1, 1),
+ 'wordStems.stemID' : (1, 'number', 0, 0),
+
+ 'empty' : (1, 'snippets', 1, 0),
+
+ 'page' : (1, 'snippets', 1, 0),
+ 'page.pageid' : (1, 'scalar_text', 0, 0),
+ 'page.pagelabel' : (1, 'scalar_text', 0, 0),
+ 'page.type' : (1, 'scalar_text', 0, 0),
+ 'page.h' : (1, 'scalar_number', 0, 0),
+ 'page.w' : (1, 'scalar_number', 0, 0),
+ 'page.startID' : (1, 'scalar_number', 0, 0),
+
+ 'group' : (1, 'snippets', 1, 0),
+ 'group.type' : (1, 'scalar_text', 0, 0),
+
+ 'region' : (1, 'snippets', 1, 0),
+ 'region.type' : (1, 'scalar_text', 0, 0),
+ 'region.x' : (1, 'scalar_number', 0, 0),
+ 'region.y' : (1, 'scalar_number', 0, 0),
+ 'region.h' : (1, 'scalar_number', 0, 0),
+ 'region.w' : (1, 'scalar_number', 0, 0),
+
+ 'empty_text_region' : (1, 'snippets', 1, 0),
+
+ 'img' : (1, 'snippets', 1, 0),
+ 'img.x' : (1, 'scalar_number', 0, 0),
+ 'img.y' : (1, 'scalar_number', 0, 0),
+ 'img.h' : (1, 'scalar_number', 0, 0),
+ 'img.w' : (1, 'scalar_number', 0, 0),
+ 'img.src' : (1, 'scalar_number', 0, 0),
+ 'img.color_src' : (1, 'scalar_number', 0, 0),
+
+ 'paragraph' : (1, 'snippets', 1, 0),
+ 'paragraph.class' : (1, 'scalar_text', 0, 0),
+ 'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'word_semantic' : (1, 'snippets', 1, 1),
+ 'word_semantic.type' : (1, 'scalar_text', 0, 0),
+ 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
+ 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'word' : (1, 'snippets', 1, 0),
+ 'word.type' : (1, 'scalar_text', 0, 0),
+ 'word.class' : (1, 'scalar_text', 0, 0),
+ 'word.firstGlyph' : (1, 'scalar_number', 0, 0),
+ 'word.lastGlyph' : (1, 'scalar_number', 0, 0),
+
+ '_span' : (1, 'snippets', 1, 0),
+ '_span.firstWord' : (1, 'scalar_number', 0, 0),
+ '-span.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'span' : (1, 'snippets', 1, 0),
+ 'span.firstWord' : (1, 'scalar_number', 0, 0),
+ 'span.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'extratokens' : (1, 'snippets', 1, 0),
+ 'extratokens.type' : (1, 'scalar_text', 0, 0),
+ 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
+ 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0),
+
+ 'glyph.h' : (1, 'number', 0, 0),
+ 'glyph.w' : (1, 'number', 0, 0),
+ 'glyph.use' : (1, 'number', 0, 0),
+ 'glyph.vtx' : (1, 'number', 0, 1),
+ 'glyph.len' : (1, 'number', 0, 1),
+ 'glyph.dpi' : (1, 'number', 0, 0),
+ 'vtx' : (0, 'number', 1, 1),
+ 'vtx.x' : (1, 'number', 0, 0),
+ 'vtx.y' : (1, 'number', 0, 0),
+ 'len' : (0, 'number', 1, 1),
+ 'len.n' : (1, 'number', 0, 0),
+
+ 'book' : (1, 'snippets', 1, 0),
+ 'version' : (1, 'snippets', 1, 0),
+ 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.Schema_id' : (1, 'scalar_text', 0, 0),
+ 'version.Schema_version' : (1, 'scalar_text', 0, 0),
+ 'version.Topaz_version' : (1, 'scalar_text', 0, 0),
+ 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.chapterheaders' : (1, 'scalar_text', 0, 0),
+ 'version.creation_date' : (1, 'scalar_text', 0, 0),
+ 'version.header_footer' : (1, 'scalar_text', 0, 0),
+ 'version.init_from_ocr' : (1, 'scalar_text', 0, 0),
+ 'version.letter_insertion' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0),
+ 'version.findlists' : (1, 'scalar_text', 0, 0),
+ 'version.page_num' : (1, 'scalar_text', 0, 0),
+ 'version.page_type' : (1, 'scalar_text', 0, 0),
+ 'version.bad_text' : (1, 'scalar_text', 0, 0),
+ 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
+ 'version.margins' : (1, 'scalar_text', 0, 0),
+ 'version.staggered_lines' : (1, 'scalar_text', 0, 0),
+ 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
+ 'version.toc' : (1, 'scalar_text', 0, 0),
+
+ 'stylesheet' : (1, 'snippets', 1, 0),
+ 'style' : (1, 'snippets', 1, 0),
+ 'style._tag' : (1, 'scalar_text', 0, 0),
+ 'style.type' : (1, 'scalar_text', 0, 0),
+ 'style._parent_type' : (1, 'scalar_text', 0, 0),
+ 'style.class' : (1, 'scalar_text', 0, 0),
+ 'style._after_class' : (1, 'scalar_text', 0, 0),
+ 'rule' : (1, 'snippets', 1, 0),
+ 'rule.attr' : (1, 'scalar_text', 0, 0),
+ 'rule.value' : (1, 'scalar_text', 0, 0),
+
+ 'original' : (0, 'number', 1, 1),
+ 'original.pnum' : (1, 'number', 0, 0),
+ 'original.pid' : (1, 'text', 0, 0),
+ 'pages' : (0, 'number', 1, 1),
+ 'pages.ref' : (1, 'number', 0, 0),
+ 'pages.id' : (1, 'number', 0, 0),
+ 'startID' : (0, 'number', 1, 1),
+ 'startID.page' : (1, 'number', 0, 0),
+ 'startID.id' : (1, 'number', 0, 0),
+
+ }
+
+
+ # full tag path record keeping routines
+ def tag_push(self, token):
+ self.tagpath.append(token)
+ def tag_pop(self):
+ if len(self.tagpath) > 0 :
+ self.tagpath.pop()
+ def tagpath_len(self):
+ return len(self.tagpath)
+ def get_tagpath(self, i):
+ cnt = len(self.tagpath)
+ if i < cnt : result = self.tagpath[i]
+ for j in xrange(i+1, cnt) :
+ result += '.' + self.tagpath[j]
+ return result
+
+
+ # list of absolute command byte values values that indicate
+ # various types of loop meachanisms typically used to generate vectors
+
+ cmd_list = (0x76, 0x76)
+
+ # peek at and return 1 byte that is ahead by i bytes
+ def peek(self, aheadi):
+ c = self.fo.read(aheadi)
+ if (len(c) == 0):
+ return None
+ self.fo.seek(-aheadi,1)
+ c = c[-1:]
+ return ord(c)
+
+
+ # get the next value from the file being processed
+ def getNext(self):
+ nbyte = self.peek(1);
+ if (nbyte == None):
+ return None
+ val = readEncodedNumber(self.fo)
+ return val
+
+
+ # format an arg by argtype
+ def formatArg(self, arg, argtype):
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ result = self.dict.lookup(arg)
+ elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') :
+ result = arg
+ elif (argtype == 'snippets') :
+ result = arg
+ else :
+ print "Error Unknown argtype %s" % argtype
+ sys.exit(-2)
+ return result
+
+
+ # process the next tag token, recursively handling subtags,
+ # arguments, and commands
+ def procToken(self, token):
+
+ known_token = False
+ self.tag_push(token)
+
+ if self.debug : print 'Processing: ', self.get_tagpath(0)
+ cnt = self.tagpath_len()
+ for j in xrange(cnt):
+ tkn = self.get_tagpath(j)
+ if tkn in self.token_tags :
+ num_args = self.token_tags[tkn][0]
+ argtype = self.token_tags[tkn][1]
+ subtags = self.token_tags[tkn][2]
+ splcase = self.token_tags[tkn][3]
+ ntags = -1
+ known_token = True
+ break
+
+ if known_token :
+
+ # handle subtags if present
+ subtagres = []
+ if (splcase == 1):
+ # this type of tag uses of escape marker 0x74 indicate subtag count
+ if self.peek(1) == 0x74:
+ skip = readEncodedNumber(self.fo)
+ subtags = 1
+ num_args = 0
+
+ if (subtags == 1):
+ ntags = readEncodedNumber(self.fo)
+ if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
+ for j in xrange(ntags):
+ val = readEncodedNumber(self.fo)
+ subtagres.append(self.procToken(self.dict.lookup(val)))
+
+ # arguments can be scalars or vectors of text or numbers
+ argres = []
+ if num_args > 0 :
+ firstarg = self.peek(1)
+ if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'):
+ # single argument is a variable length vector of data
+ arg = readEncodedNumber(self.fo)
+ argres = self.decodeCMD(arg,argtype)
+ else :
+ # num_arg scalar arguments
+ for i in xrange(num_args):
+ argres.append(self.formatArg(readEncodedNumber(self.fo), argtype))
+
+ # build the return tag
+ result = []
+ tkn = self.get_tagpath(0)
+ result.append(tkn)
+ result.append(subtagres)
+ result.append(argtype)
+ result.append(argres)
+ self.tag_pop()
+ return result
+
+ # all tokens that need to be processed should be in the hash
+ # table if it may indicate a problem, either new token
+ # or an out of sync condition
+ else:
+ result = []
+ if (self.debug):
+ print 'Unknown Token:', token
+ self.tag_pop()
+ return result
+
+
+ # special loop used to process code snippets
+ # it is NEVER used to format arguments.
+ # builds the snippetList
+ def doLoop72(self, argtype):
+ cnt = readEncodedNumber(self.fo)
+ if self.debug :
+ result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n'
+ result += 'of the document is indicated by snippet number sets at the\n'
+ result += 'end of each snippet. \n'
+ print result
+ for i in xrange(cnt):
+ if self.debug: print 'Snippet:',str(i)
+ snippet = []
+ snippet.append(i)
+ val = readEncodedNumber(self.fo)
+ snippet.append(self.procToken(self.dict.lookup(val)))
+ self.snippetList.append(snippet)
+ return
+
+
+
+ # general loop code gracisouly submitted by "skindle" - thank you!
+ def doLoop76Mode(self, argtype, cnt, mode):
+ result = []
+ adj = 0
+ if mode & 1:
+ adj = readEncodedNumber(self.fo)
+ mode = mode >> 1
+ x = []
+ for i in xrange(cnt):
+ x.append(readEncodedNumber(self.fo) - adj)
+ for i in xrange(mode):
+ for j in xrange(1, cnt):
+ x[j] = x[j] + x[j - 1]
+ for i in xrange(cnt):
+ result.append(self.formatArg(x[i],argtype))
+ return result
+
+
+ # dispatches loop commands bytes with various modes
+ # The 0x76 style loops are used to build vectors
+
+ # This was all derived by trial and error and
+ # new loop types may exist that are not handled here
+ # since they did not appear in the test cases
+
+ def decodeCMD(self, cmd, argtype):
+ if (cmd == 0x76):
+
+ # loop with cnt, and mode to control loop styles
+ cnt = readEncodedNumber(self.fo)
+ mode = readEncodedNumber(self.fo)
+
+ if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
+ return self.doLoop76Mode(argtype, cnt, mode)
+
+ if self.dbug: print "Unknown command", cmd
+ result = []
+ return result
+
+
+
+ # add full tag path to injected snippets
+ def updateName(self, tag, prefix):
+ name = tag[0]
+ subtagList = tag[1]
+ argtype = tag[2]
+ argList = tag[3]
+ nname = prefix + '.' + name
+ nsubtaglist = []
+ for j in subtagList:
+ nsubtaglist.append(self.updateName(j,prefix))
+ ntag = []
+ ntag.append(nname)
+ ntag.append(nsubtaglist)
+ ntag.append(argtype)
+ ntag.append(argList)
+ return ntag
+
+
+
+ # perform depth first injection of specified snippets into this one
+ def injectSnippets(self, snippet):
+ snipno, tag = snippet
+ name = tag[0]
+ subtagList = tag[1]
+ argtype = tag[2]
+ argList = tag[3]
+ nsubtagList = []
+ if len(argList) > 0 :
+ for j in argList:
+ asnip = self.snippetList[j]
+ aso, atag = self.injectSnippets(asnip)
+ atag = self.updateName(atag, name)
+ nsubtagList.append(atag)
+ argtype='number'
+ argList=[]
+ if len(nsubtagList) > 0 :
+ subtagList.extend(nsubtagList)
+ tag = []
+ tag.append(name)
+ tag.append(subtagList)
+ tag.append(argtype)
+ tag.append(argList)
+ snippet = []
+ snippet.append(snipno)
+ snippet.append(tag)
+ return snippet
+
+
+
+ # format the tag for output
+ def formatTag(self, node):
+ name = node[0]
+ subtagList = node[1]
+ argtype = node[2]
+ argList = node[3]
+ fullpathname = name.split('.')
+ nodename = fullpathname.pop()
+ ilvl = len(fullpathname)
+ indent = ' ' * (3 * ilvl)
+ result = indent + '<' + nodename + '>'
+ if len(argList) > 0:
+ argres = ''
+ for j in argList:
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ argres += j + '|'
+ else :
+ argres += str(j) + ','
+ argres = argres[0:-1]
+ if argtype == 'snippets' :
+ result += 'snippets:' + argres
+ else :
+ result += argres
+ if len(subtagList) > 0 :
+ result += '\n'
+ for j in subtagList:
+ if len(j) > 0 :
+ result += self.formatTag(j)
+ result += indent + '</' + nodename + '>\n'
+ else:
+ result += '</' + nodename + '>\n'
+ return result
+
+
+ # flatten tag
+ def flattenTag(self, node):
+ name = node[0]
+ subtagList = node[1]
+ argtype = node[2]
+ argList = node[3]
+ result = name
+ if (len(argList) > 0):
+ argres = ''
+ for j in argList:
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ argres += j + '|'
+ else :
+ argres += str(j) + '|'
+ argres = argres[0:-1]
+ if argtype == 'snippets' :
+ result += '.snippets=' + argres
+ else :
+ result += '=' + argres
+ result += '\n'
+ for j in subtagList:
+ if len(j) > 0 :
+ result += self.flattenTag(j)
+ return result
+
+
+ # reduce create xml output
+ def formatDoc(self, flat_xml):
+ result = ''
+ for j in self.doc :
+ if len(j) > 0:
+ if flat_xml:
+ result += self.flattenTag(j)
+ else:
+ result += self.formatTag(j)
+ if self.debug : print result
+ return result
+
+
+
+ # main loop - parse the page.dat files
+ # to create structured document and snippets
+
+ # FIXME: value at end of magic appears to be a subtags count
+ # but for what? For now, inject an 'info" tag as it is in
+ # every dictionary and seems close to what is meant
+ # The alternative is to special case the last _ "0x5f" to mean something
+
+ def process(self):
+
+ # peek at the first bytes to see what type of file it is
+ magic = self.fo.read(9)
+ if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
+ first_token = 'info'
+ elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
+ skip = self.fo.read(2)
+ first_token = 'info'
+ elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
+ first_token = 'info'
+ elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
+ skip = self.fo.read(3)
+ first_token = 'info'
+ else :
+ # other0.dat file
+ first_token = None
+ self.fo.seek(-9,1)
+
+
+ # main loop to read and build the document tree
+ while True:
+
+ if first_token != None :
+ # use "inserted" first token 'info' for page and glyph files
+ tag = self.procToken(first_token)
+ if len(tag) > 0 :
+ self.doc.append(tag)
+ first_token = None
+
+ v = self.getNext()
+ if (v == None):
+ break
+
+ if (v == 0x72):
+ self.doLoop72('number')
+ elif (v > 0) and (v < self.dict.getSize()) :
+ tag = self.procToken(self.dict.lookup(v))
+ if len(tag) > 0 :
+ self.doc.append(tag)
+ else:
+ if self.debug:
+ print "Main Loop: Unknown value: %x" % v
+ if (v == 0):
+ if (self.peek(1) == 0x5f):
+ skip = self.fo.read(1)
+ first_token = 'info'
+
+ # now do snippet injection
+ if len(self.snippetList) > 0 :
+ if self.debug : print 'Injecting Snippets:'
+ snippet = self.injectSnippets(self.snippetList[0])
+ snipno = snippet[0]
+ tag_add = snippet[1]
+ if self.debug : print self.formatTag(tag_add)
+ if len(tag_add) > 0:
+ self.doc.append(tag_add)
+
+ # handle generation of xml output
+ xmlpage = self.formatDoc(self.flat_xml)
+
+ return xmlpage
+
+
+def fromData(dict, fname):
+ flat_xml = True
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def getXML(dict, fname):
+ flat_xml = False
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def usage():
+ print 'Usage: '
+ print ' convert2xml.py dict0000.dat infile.dat '
+ print ' '
+ print ' Options:'
+ print ' -h print this usage help message '
+ print ' -d turn on debug output to check for potential errors '
+ print ' --flat-xml output the flattened xml page description only '
+ print ' '
+ print ' This program will attempt to convert a page*.dat file or '
+ print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. '
+ print ' '
+ print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump '
+ print ' the *.dat files from a Topaz format e-book.'
+
+#
+# Main
+#
+
+def main(argv):
+ dictFile = ""
+ pageFile = ""
+ debug = False
+ flat_xml = False
+ printOutput = False
+ if len(argv) == 0:
+ printOutput = True
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"])
+
+ except getopt.GetoptError, err:
+
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o =="-d":
+ debug=True
+ if o =="-h":
+ usage()
+ sys.exit(0)
+ if o =="--flat-xml":
+ flat_xml = True
+
+ dictFile, pageFile = args[0], args[1]
+
+ # read in the string table dictionary
+ dict = Dictionary(dictFile)
+ # dict.dumpDict()
+
+ # create a page parser
+ pp = PageParser(pageFile, dict, debug, flat_xml)
+
+ xmlpage = pp.process()
+
+ if printOutput:
+ print xmlpage
+ return 0
+
+ return xmlpage
+
+if __name__ == '__main__':
+ sys.exit(main(''))
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+import sys
+import csv
+import os
+import math
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+ def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ self.id = os.path.basename(fileid).replace('.dat','')
+ self.svgcount = 0
+ self.docList = flatxml.split('\n')
+ self.docSize = len(self.docList)
+ self.classList = {}
+ self.bookDir = bookDir
+ self.gdict = gdict
+ tmpList = classlst.split('\n')
+ for pclass in tmpList:
+ if pclass != '':
+ # remove the leading period from the css name
+ cname = pclass[1:]
+ self.classList[cname] = True
+ self.fixedimage = fixedimage
+ self.ocrtext = []
+ self.link_id = []
+ self.link_title = []
+ self.link_page = []
+ self.link_href = []
+ self.link_type = []
+ self.dehyphen_rootid = []
+ self.paracont_stemid = []
+ self.parastems_stemid = []
+
+
+ def getGlyph(self, gid):
+ result = ''
+ id='id="gl%d"' % gid
+ return self.gdict.lookup(id)
+
+ def glyphs_to_image(self, glyphList):
+
+ def extract(path, key):
+ b = path.find(key) + len(key)
+ e = path.find(' ',b)
+ return int(path[b:e])
+
+ svgDir = os.path.join(self.bookDir,'svg')
+
+ imgDir = os.path.join(self.bookDir,'img')
+ imgname = self.id + '_%04d.svg' % self.svgcount
+ imgfile = os.path.join(imgDir,imgname)
+
+ # get glyph information
+ gxList = self.getData('info.glyph.x',0,-1)
+ gyList = self.getData('info.glyph.y',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+
+ gids = []
+ maxws = []
+ maxhs = []
+ xs = []
+ ys = []
+ gdefs = []
+
+ # get path defintions, positions, dimensions for ecah glyph
+ # that makes up the image, and find min x and min y to reposition origin
+ minx = -1
+ miny = -1
+ for j in glyphList:
+ gid = gidList[j]
+ gids.append(gid)
+
+ xs.append(gxList[j])
+ if minx == -1: minx = gxList[j]
+ else : minx = min(minx, gxList[j])
+
+ ys.append(gyList[j])
+ if miny == -1: miny = gyList[j]
+ else : miny = min(miny, gyList[j])
+
+ path = self.getGlyph(gid)
+ gdefs.append(path)
+
+ maxws.append(extract(path,'width='))
+ maxhs.append(extract(path,'height='))
+
+
+ # change the origin to minx, miny and calc max height and width
+ maxw = maxws[0] + xs[0] - minx
+ maxh = maxhs[0] + ys[0] - miny
+ for j in xrange(0, len(xs)):
+ xs[j] = xs[j] - minx
+ ys[j] = ys[j] - miny
+ maxw = max( maxw, (maxws[j] + xs[j]) )
+ maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+ # open the image file for output
+ ifile = open(imgfile,'w')
+ ifile.write('<?xml version="1.0" standalone="no"?>\n')
+ ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
+ ifile.write('<defs>\n')
+ for j in xrange(0,len(gdefs)):
+ ifile.write(gdefs[j])
+ ifile.write('</defs>\n')
+ for j in xrange(0,len(gids)):
+ ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
+ ifile.write('</svg>')
+ ifile.close()
+
+ return 0
+
+
+
+ # return tag at line pos in document
+ def lineinDoc(self, pos) :
+ if (pos >= 0) and (pos < self.docSize) :
+ item = self.docList[pos]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ return name, argres
+
+
+ # find tag in doc if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ if end == -1 :
+ end = self.docSize
+ else:
+ end = min(self.docSize, end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = self.docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
+
+ # returns a vector of integers for the tagpath
+ def getData(self, tagpath, pos, end):
+ argres=[]
+ (foundat, argt) = self.findinDoc(tagpath, pos, end)
+ if (argt != None) and (len(argt) > 0) :
+ argList = argt.split('|')
+ argres = [ int(strval) for strval in argList]
+ return argres
+
+
+ # get the class
+ def getClass(self, pclass):
+ nclass = pclass
+
+ # class names are an issue given topaz may start them with numerals (not allowed),
+ # use a mix of cases (which cause some browsers problems), and actually
+ # attach numbers after "_reclustered*" to the end to deal classeses that inherit
+ # from a base class (but then not actually provide all of these _reclustereed
+ # classes in the stylesheet!
+
+ # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
+ # that exists in the stylesheet first, and then adding this specific class
+ # after
+
+ # also some class names have spaces in them so need to convert to dashes
+ if nclass != None :
+ nclass = nclass.replace(' ','-')
+ classres = ''
+ nclass = nclass.lower()
+ nclass = 'cl-' + nclass
+ baseclass = ''
+ # graphic is the base class for captions
+ if nclass.find('cl-cap-') >=0 :
+ classres = 'graphic' + ' '
+ else :
+ # strip to find baseclass
+ p = nclass.find('_')
+ if p > 0 :
+ baseclass = nclass[0:p]
+ if baseclass in self.classList:
+ classres += baseclass + ' '
+ classres += nclass
+ nclass = classres
+ return nclass
+
+
+ # develop a sorted description of the starting positions of
+ # groups and regions on the page, as well as the page type
+ def PageDescription(self):
+
+ def compare(x, y):
+ (xtype, xval) = x
+ (ytype, yval) = y
+ if xval > yval:
+ return 1
+ if xval == yval:
+ return 0
+ return -1
+
+ result = []
+ (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+ groupList = self.posinDoc('page.group')
+ groupregionList = self.posinDoc('page.group.region')
+ pageregionList = self.posinDoc('page.region')
+ # integrate into one list
+ for j in groupList:
+ result.append(('grpbeg',j))
+ for j in groupregionList:
+ result.append(('gregion',j))
+ for j in pageregionList:
+ result.append(('pregion',j))
+ result.sort(compare)
+
+ # insert group end and page end indicators
+ inGroup = False
+ j = 0
+ while True:
+ if j == len(result): break
+ rtype = result[j][0]
+ rval = result[j][1]
+ if not inGroup and (rtype == 'grpbeg') :
+ inGroup = True
+ j = j + 1
+ elif inGroup and (rtype in ('grpbeg', 'pregion')):
+ result.insert(j,('grpend',rval))
+ inGroup = False
+ else:
+ j = j + 1
+ if inGroup:
+ result.append(('grpend',-1))
+ result.append(('pageend', -1))
+ return pagetype, result
+
+
+
+ # build a description of the paragraph
+ def getParaDescription(self, start, end, regtype):
+
+ result = []
+
+ # paragraph
+ (pos, pclass) = self.findinDoc('paragraph.class',start,end)
+
+ pclass = self.getClass(pclass)
+
+ # build up a description of the paragraph in result and return it
+ # first check for the basic - all words paragraph
+ (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
+ (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+ if (sfirst != None) and (slast != None) :
+ first = int(sfirst)
+ last = int(slast)
+
+ makeImage = (regtype == 'vertical') or (regtype == 'table')
+ if self.fixedimage:
+ makeImage = makeImage or (regtype == 'fixed')
+
+ if (pclass != None):
+ makeImage = makeImage or (pclass.find('.inverted') >= 0)
+ if self.fixedimage :
+ makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+
+ if not makeImage :
+ # standard all word paragraph
+ for wordnum in xrange(first, last):
+ result.append(('ocr', wordnum))
+ return pclass, result
+
+ # convert paragraph to svg image
+ # translate first and last word into first and last glyphs
+ # and generate inline image and include it
+ glyphList = []
+ firstglyphList = self.getData('word.firstGlyph',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+ firstGlyph = firstglyphList[first]
+ if last < len(firstglyphList):
+ lastGlyph = firstglyphList[last]
+ else :
+ lastGlyph = len(gidList)
+ for glyphnum in xrange(firstGlyph, lastGlyph):
+ glyphList.append(glyphnum)
+ # include any extratokens if they exist
+ (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
+ (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+ if (sfg != None) and (slg != None):
+ for glyphnum in xrange(int(sfg), int(slg)):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ return pclass, result
+
+ # this type of paragraph may be made up of multiple spans, inline
+ # word monograms (images), and words with semantic meaning,
+ # plus glyphs used to form starting letter of first word
+
+ # need to parse this type line by line
+ line = start + 1
+ word_class = ''
+
+ # if end is -1 then we must search to end of document
+ if end == -1 :
+ end = self.docSize
+
+ # seems some xml has last* coming before first* so we have to
+ # handle any order
+ sp_first = -1
+ sp_last = -1
+
+ gl_first = -1
+ gl_last = -1
+
+ ws_first = -1
+ ws_last = -1
+
+ word_class = ''
+
+ while (line < end) :
+
+ (name, argres) = self.lineinDoc(line)
+
+ if name.endswith('span.firstWord') :
+ sp_first = int(argres)
+
+ elif name.endswith('span.lastWord') :
+ sp_last = int(argres)
+
+ elif name.endswith('word.firstGlyph') :
+ gl_first = int(argres)
+
+ elif name.endswith('word.lastGlyph') :
+ gl_last = int(argres)
+
+ elif name.endswith('word_semantic.firstWord'):
+ ws_first = int(argres)
+
+ elif name.endswith('word_semantic.lastWord'):
+ ws_last = int(argres)
+
+ elif name.endswith('word.class'):
+ (cname, space) = argres.split('-',1)
+ if space == '' : space = '0'
+ if (cname == 'spaceafter') and (int(space) > 0) :
+ word_class = 'sa'
+
+ elif name.endswith('word.img.src'):
+ result.append(('img' + word_class, int(argres)))
+ word_class = ''
+
+ elif name.endswith('region.img.src'):
+ result.append(('img' + word_class, int(argres)))
+
+ if (sp_first != -1) and (sp_last != -1):
+ for wordnum in xrange(sp_first, sp_last):
+ result.append(('ocr', wordnum))
+ sp_first = -1
+ sp_last = -1
+
+ if (gl_first != -1) and (gl_last != -1):
+ glyphList = []
+ for glyphnum in xrange(gl_first, gl_last):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ gl_first = -1
+ gl_last = -1
+
+ if (ws_first != -1) and (ws_last != -1):
+ for wordnum in xrange(ws_first, ws_last):
+ result.append(('ocr', wordnum))
+ ws_first = -1
+ ws_last = -1
+
+ line += 1
+
+ return pclass, result
+
+
+ def buildParagraph(self, pclass, pdesc, type, regtype) :
+ parares = ''
+ sep =''
+
+ classres = ''
+ if pclass :
+ classres = ' class="' + pclass + '"'
+
+ br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
+
+ handle_links = len(self.link_id) > 0
+
+ if (type == 'full') or (type == 'begin') :
+ parares += '<p' + classres + '>'
+
+ if (type == 'end'):
+ parares += ' '
+
+ lstart = len(parares)
+
+ cnt = len(pdesc)
+
+ for j in xrange( 0, cnt) :
+
+ (wtype, num) = pdesc[j]
+
+ if wtype == 'ocr' :
+ word = self.ocrtext[num]
+ sep = ' '
+
+ if handle_links:
+ link = self.link_id[num]
+ if (link > 0):
+ linktype = self.link_type[link-1]
+ title = self.link_title[link-1]
+ if (title == "") or (parares.rfind(title) < 0):
+ title=parares[lstart:]
+ if linktype == 'external' :
+ linkhref = self.link_href[link-1]
+ linkhtml = '<a href="%s">' % linkhref
+ else :
+ if len(self.link_page) >= link :
+ ptarget = self.link_page[link-1] - 1
+ linkhtml = '<a href="#page%04d">' % ptarget
+ else :
+ # just link to the current page
+ linkhtml = '<a href="#' + self.id + '">'
+ linkhtml += title + '</a>'
+ pos = parares.rfind(title)
+ if pos >= 0:
+ parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
+ else :
+ parares += linkhtml
+ lstart = len(parares)
+ if word == '_link_' : word = ''
+ elif (link < 0) :
+ if word == '_link_' : word = ''
+
+ if word == '_lb_':
+ if ((num-1) in self.dehyphen_rootid ) or handle_links:
+ word = ''
+ sep = ''
+ elif br_lb :
+ word = '<br />\n'
+ sep = ''
+ else :
+ word = '\n'
+ sep = ''
+
+ if num in self.dehyphen_rootid :
+ word = word[0:-1]
+ sep = ''
+
+ parares += word + sep
+
+ elif wtype == 'img' :
+ sep = ''
+ parares += '<img src="img/img%04d.jpg" alt="" />' % num
+ parares += sep
+
+ elif wtype == 'imgsa' :
+ sep = ' '
+ parares += '<img src="img/img%04d.jpg" alt="" />' % num
+ parares += sep
+
+ elif wtype == 'svg' :
+ sep = ''
+ parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
+ parares += sep
+
+ if len(sep) > 0 : parares = parares[0:-1]
+ if (type == 'full') or (type == 'end') :
+ parares += '</p>'
+ return parares
+
+
+
+ # walk the document tree collecting the information needed
+ # to build an html page using the ocrText
+
+ def process(self):
+
+ htmlpage = ''
+
+ # get the ocr text
+ (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
+ if argres : self.ocrtext = argres.split('|')
+
+ # get information to dehyphenate the text
+ self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
+
+ # determine if first paragraph is continued from previous page
+ (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
+ first_para_continued = (self.parastems_stemid != None)
+
+ # determine if last paragraph is continued onto the next page
+ (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
+ last_para_continued = (self.paracont_stemid != None)
+
+ # collect link ids
+ self.link_id = self.getData('info.word.link_id',0,-1)
+
+ # collect link destination page numbers
+ self.link_page = self.getData('info.links.page',0,-1)
+
+ # collect link types (container versus external)
+ (pos, argres) = self.findinDoc('info.links.type',0,-1)
+ if argres : self.link_type = argres.split('|')
+
+ # collect link destinations
+ (pos, argres) = self.findinDoc('info.links.href',0,-1)
+ if argres : self.link_href = argres.split('|')
+
+ # collect link titles
+ (pos, argres) = self.findinDoc('info.links.title',0,-1)
+ if argres :
+ self.link_title = argres.split('|')
+ else:
+ self.link_title.append('')
+
+ # get a descriptions of the starting points of the regions
+ # and groups on the page
+ (pagetype, pageDesc) = self.PageDescription()
+ regcnt = len(pageDesc) - 1
+
+ anchorSet = False
+ breakSet = False
+ inGroup = False
+
+ # process each region on the page and convert what you can to html
+
+ for j in xrange(regcnt):
+
+ (etype, start) = pageDesc[j]
+ (ntype, end) = pageDesc[j+1]
+
+
+ # set anchor for link target on this page
+ if not anchorSet and not first_para_continued:
+ htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
+ htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
+ anchorSet = True
+
+ # handle groups of graphics with text captions
+ if (etype == 'grpbeg'):
+ (pos, grptype) = self.findinDoc('group.type', start, end)
+ if grptype != None:
+ if grptype == 'graphic':
+ gcstr = ' class="' + grptype + '"'
+ htmlpage += '<div' + gcstr + '>'
+ inGroup = True
+
+ elif (etype == 'grpend'):
+ if inGroup:
+ htmlpage += '</div>\n'
+ inGroup = False
+
+ else:
+ (pos, regtype) = self.findinDoc('region.type',start,end)
+
+ if regtype == 'graphic' :
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ if inGroup:
+ htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
+ else:
+ htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+ elif regtype == 'chapterheading' :
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if not breakSet:
+ htmlpage += '<div style="page-break-after: always;"> </div>\n'
+ breakSet = True
+ tag = 'h1'
+ if pclass and (len(pclass) >= 7):
+ if pclass[3:7] == 'ch1-' : tag = 'h1'
+ if pclass[3:7] == 'ch2-' : tag = 'h2'
+ if pclass[3:7] == 'ch3-' : tag = 'h3'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ else:
+ htmlpage += '<' + tag + '>'
+ htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+ htmlpage += '</' + tag + '>'
+
+ elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
+ ptype = 'full'
+ # check to see if this is a continution from the previous page
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if pclass and (len(pclass) >= 6) and (ptype == 'full'):
+ tag = 'p'
+ if pclass[3:6] == 'h1-' : tag = 'h4'
+ if pclass[3:6] == 'h2-' : tag = 'h5'
+ if pclass[3:6] == 'h3-' : tag = 'h6'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+ htmlpage += '</' + tag + '>'
+ else :
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+ elif (regtype == 'tocentry') :
+ ptype = 'full'
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+ elif (regtype == 'vertical') or (regtype == 'table') :
+ ptype = 'full'
+ if inGroup:
+ ptype = 'middle'
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start, end, regtype)
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+ elif (regtype == 'synth_fcvr.center'):
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+ else :
+ print ' Making region type', regtype,
+ (pos, temp) = self.findinDoc('paragraph',start,end)
+ (pos2, temp) = self.findinDoc('span',start,end)
+ if pos != -1 or pos2 != -1:
+ print ' a "text" region'
+ orig_regtype = regtype
+ regtype = 'fixed'
+ ptype = 'full'
+ # check to see if this is a continution from the previous page
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if not pclass:
+ if orig_regtype.endswith('.right') : pclass = 'cl-right'
+ elif orig_regtype.endswith('.center') : pclass = 'cl-center'
+ elif orig_regtype.endswith('.left') : pclass = 'cl-left'
+ elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
+ if pclass and (ptype == 'full') and (len(pclass) >= 6):
+ tag = 'p'
+ if pclass[3:6] == 'h1-' : tag = 'h4'
+ if pclass[3:6] == 'h2-' : tag = 'h5'
+ if pclass[3:6] == 'h3-' : tag = 'h6'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+ htmlpage += '</' + tag + '>'
+ else :
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+ else :
+ print ' a "graphic" region'
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+
+ if last_para_continued :
+ if htmlpage[-4:] == '</p>':
+ htmlpage = htmlpage[0:-4]
+ last_para_continued = False
+
+ return htmlpage
+
+
+
+def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ # create a document parser
+ dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
+ htmlpage = dp.process()
+ return htmlpage
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import sys
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class PParser(object):
+ def __init__(self, gd, flatxml):
+ self.gd = gd
+ self.flatdoc = flatxml.split('\n')
+ self.temp = []
+ foo = self.getData('page.h') or self.getData('book.h')
+ self.ph = foo[0]
+ foo = self.getData('page.w') or self.getData('book.w')
+ self.pw = foo[0]
+ self.gx = self.getData('info.glyph.x')
+ self.gy = self.getData('info.glyph.y')
+ self.gid = self.getData('info.glyph.glyphID')
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getDataTemp(self, path):
+ result = None
+ cnt = len(self.temp)
+ for j in xrange(cnt):
+ item = self.temp[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ self.temp.pop(j)
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getImages(self):
+ result = []
+ self.temp = self.flatdoc
+ while (self.getDataTemp('img') != None):
+ h = self.getDataTemp('img.h')[0]
+ w = self.getDataTemp('img.w')[0]
+ x = self.getDataTemp('img.x')[0]
+ y = self.getDataTemp('img.y')[0]
+ src = self.getDataTemp('img.src')[0]
+ result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
+ return result
+ def getGlyphs(self):
+ result = []
+ if (self.gid != None) and (len(self.gid) > 0):
+ glyphs = []
+ for j in set(self.gid):
+ glyphs.append(j)
+ glyphs.sort()
+ for gid in glyphs:
+ id='id="gl%d"' % gid
+ path = self.gd.lookup(id)
+ if path:
+ result.append(id + ' ' + path)
+ return result
+
+
+def convert2SVG(gdict, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi):
+ ml = ''
+ pp = PParser(gdict, flat_xml)
+ ml += '<?xml version="1.0" standalone="no"?>\n'
+ if (raw):
+ ml += '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
+ ml += '<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)
+ ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+ else:
+ ml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ ml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n'
+ ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+ ml += '<script><![CDATA[\n'
+ ml += 'function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n'
+ ml += 'var dpi=%d;\n' % scaledpi
+ if (counter) :
+ ml += 'var prevpage="page%04d.xhtml";\n' % (counter - 1)
+ if (counter < numfiles-1) :
+ ml += 'var nextpage="page%04d.xhtml";\n' % (counter + 1)
+ ml += 'var pw=%d;var ph=%d;' % (pp.pw, pp.ph)
+ ml += 'function zoomin(){dpi=dpi*(0.8);setsize();}\n'
+ ml += 'function zoomout(){dpi=dpi*1.25;setsize();}\n'
+ ml += 'function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n'
+ ml += 'function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n'
+ ml += 'function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n'
+ ml += 'var gt=gd();if(gt>0){dpi=gt;}\n'
+ ml += 'window.onload=setsize;\n'
+ ml += ']]></script>\n'
+ ml += '</head>\n'
+ ml += '<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n'
+ ml += '<div style="white-space:nowrap;">\n'
+ if (counter == 0) :
+ ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ else:
+ ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n'
+ ml += '<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph)
+ if (pp.gid != None):
+ ml += '<defs>\n'
+ gdefs = pp.getGlyphs()
+ for j in xrange(0,len(gdefs)):
+ ml += gdefs[j]
+ ml += '</defs>\n'
+ img = pp.getImages()
+ if (img != None):
+ for j in xrange(0,len(img)):
+ ml += img[j]
+ if (pp.gid != None):
+ for j in xrange(0,len(pp.gid)):
+ ml += '<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j])
+ if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+ ml += '<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n'
+ if (raw) :
+ ml += '</svg>'
+ else :
+ ml += '</svg></a>\n'
+ if (counter == numfiles - 1) :
+ ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ else :
+ ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n'
+ ml += '</div>\n'
+ ml += '<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n'
+ ml += '</body>\n'
+ ml += '</html>\n'
+ return ml
+
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# local support routines
+import convert2xml
+import flatxml2html
+import flatxml2svg
+import stylexml2css
+
+
+# Get a 7 bit encoded number from a file
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from the file
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return None
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+def getMetaArray(metaFile):
+ # parse the meta file
+ result = {}
+ fo = file(metaFile,'rb')
+ size = readEncodedNumber(fo)
+ for i in xrange(size):
+ tag = readString(fo)
+ value = readString(fo)
+ result[tag] = value
+ # print tag, value
+ fo.close()
+ return result
+
+
+# dictionary of all text strings by index value
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+ def escapestr(self, str):
+ str = str.replace('&','&')
+ str = str.replace('<','<')
+ str = str.replace('>','>')
+ str = str.replace('=','=')
+ return str
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
+ else:
+ print "Error - %d outside of string table limits" % val
+ sys.exit(-1)
+ def getSize(self):
+ return self.size
+ def getPos(self):
+ return self.pos
+
+
+class PageDimParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ # find tag if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ docList = self.flatdoc
+ cnt = len(docList)
+ if end == -1 :
+ end = cnt
+ else:
+ end = min(cnt,end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=')
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+ def process(self):
+ (pos, sph) = self.findinDoc('page.h',0,-1)
+ (pos, spw) = self.findinDoc('page.w',0,-1)
+ if (sph == None): sph = '-1'
+ if (spw == None): spw = '-1'
+ return sph, spw
+
+def getPageDim(flatxml):
+ # create a document parser
+ dp = PageDimParser(flatxml)
+ (ph, pw) = dp.process()
+ return ph, pw
+
+class GParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ self.dpi = 1440
+ self.gh = self.getData('info.glyph.h')
+ self.gw = self.getData('info.glyph.w')
+ self.guse = self.getData('info.glyph.use')
+ if self.guse :
+ self.count = len(self.guse)
+ else :
+ self.count = 0
+ self.gvtx = self.getData('info.glyph.vtx')
+ self.glen = self.getData('info.glyph.len')
+ self.gdpi = self.getData('info.glyph.dpi')
+ self.vx = self.getData('info.vtx.x')
+ self.vy = self.getData('info.vtx.y')
+ self.vlen = self.getData('info.len.n')
+ if self.vlen :
+ self.glen.append(len(self.vlen))
+ elif self.glen:
+ self.glen.append(0)
+ if self.vx :
+ self.gvtx.append(len(self.vx))
+ elif self.gvtx :
+ self.gvtx.append(0)
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name == path):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getGlyphDim(self, gly):
+ maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
+ maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
+ return maxh, maxw
+ def getPath(self, gly):
+ path = ''
+ if (gly < 0) or (gly >= self.count):
+ return path
+ tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
+ ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
+ p = 0
+ for k in xrange(self.glen[gly], self.glen[gly+1]):
+ if (p == 0):
+ zx = tx[0:self.vlen[k]+1]
+ zy = ty[0:self.vlen[k]+1]
+ else:
+ zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
+ zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
+ p += 1
+ j = 0
+ while ( j < len(zx) ):
+ if (j == 0):
+ # Start Position.
+ path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
+ elif (j <= len(zx)-3):
+ # Cubic Bezier Curve
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
+ j += 2
+ elif (j == len(zx)-2):
+ # Cubic Bezier Curve to Start Position
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+ j += 1
+ elif (j == len(zx)-1):
+ # Quadratic Bezier Curve to Start Position
+ path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+
+ j += 1
+ path += 'z'
+ return path
+
+
+
+# dictionary of all text strings by index value
+class GlyphDict(object):
+ def __init__(self):
+ self.gdict = {}
+ def lookup(self, id):
+ # id='id="gl%d"' % val
+ if id in self.gdict:
+ return self.gdict[id]
+ return None
+ def addGlyph(self, val, path):
+ id='id="gl%d"' % val
+ self.gdict[id] = path
+
+
+def generateBook(bookDir, raw, fixedimage):
+ # sanity check Topaz file extraction
+ if not os.path.exists(bookDir) :
+ print "Can not find directory with unencrypted book"
+ return 1
+
+ dictFile = os.path.join(bookDir,'dict0000.dat')
+ if not os.path.exists(dictFile) :
+ print "Can not find dict0000.dat file"
+ return 1
+
+ pageDir = os.path.join(bookDir,'page')
+ if not os.path.exists(pageDir) :
+ print "Can not find page directory in unencrypted book"
+ return 1
+
+ imgDir = os.path.join(bookDir,'img')
+ if not os.path.exists(imgDir) :
+ print "Can not find image directory in unencrypted book"
+ return 1
+
+ glyphsDir = os.path.join(bookDir,'glyphs')
+ if not os.path.exists(glyphsDir) :
+ print "Can not find glyphs directory in unencrypted book"
+ return 1
+
+ metaFile = os.path.join(bookDir,'metadata0000.dat')
+ if not os.path.exists(metaFile) :
+ print "Can not find metadata0000.dat in unencrypted book"
+ return 1
+
+ svgDir = os.path.join(bookDir,'svg')
+ if not os.path.exists(svgDir) :
+ os.makedirs(svgDir)
+
+ xmlDir = os.path.join(bookDir,'xml')
+ if not os.path.exists(xmlDir) :
+ os.makedirs(xmlDir)
+
+ otherFile = os.path.join(bookDir,'other0000.dat')
+ if not os.path.exists(otherFile) :
+ print "Can not find other0000.dat in unencrypted book"
+ return 1
+
+ print "Updating to color images if available"
+ spath = os.path.join(bookDir,'color_img')
+ dpath = os.path.join(bookDir,'img')
+ filenames = os.listdir(spath)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname = filename.replace('color','img')
+ sfile = os.path.join(spath,filename)
+ dfile = os.path.join(dpath,imgname)
+ imgdata = file(sfile,'rb').read()
+ file(dfile,'wb').write(imgdata)
+
+ print "Creating cover.jpg"
+ isCover = False
+ cpath = os.path.join(bookDir,'img')
+ cpath = os.path.join(cpath,'img0000.jpg')
+ if os.path.isfile(cpath):
+ cover = file(cpath, 'rb').read()
+ cpath = os.path.join(bookDir,'cover.jpg')
+ file(cpath, 'wb').write(cover)
+ isCover = True
+
+
+ print 'Processing Dictionary'
+ dict = Dictionary(dictFile)
+
+ print 'Processing Meta Data and creating OPF'
+ meta_array = getMetaArray(metaFile)
+
+ xname = os.path.join(xmlDir, 'metadata.xml')
+ metastr = ''
+ for key in meta_array:
+ metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n'
+ file(xname, 'wb').write(metastr)
+
+ print 'Processing StyleSheet'
+ # get some scaling info from metadata to use while processing styles
+ fontsize = '135'
+ if 'fontSize' in meta_array:
+ fontsize = meta_array['fontSize']
+
+ # also get the size of a normal text page
+ spage = '1'
+ if 'firstTextPage' in meta_array:
+ spage = meta_array['firstTextPage']
+ pnum = int(spage)
+
+ # get page height and width from first text page for use in stylesheet scaling
+ pname = 'page%04d.dat' % (pnum + 1)
+ fname = os.path.join(pageDir,pname)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ (ph, pw) = getPageDim(flat_xml)
+ if (ph == '-1') or (ph == '0') : ph = '11000'
+ if (pw == '-1') or (pw == '0') : pw = '8500'
+
+ # print ' ', 'other0000.dat'
+ xname = os.path.join(bookDir, 'style.css')
+ flat_xml = convert2xml.fromData(dict, otherFile)
+ cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
+ file(xname, 'wb').write(cssstr)
+ xname = os.path.join(xmlDir, 'other0000.xml')
+ file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
+
+ print 'Processing Glyphs'
+ gd = GlyphDict()
+ filenames = os.listdir(glyphsDir)
+ filenames = sorted(filenames)
+ glyfname = os.path.join(svgDir,'glyphs.svg')
+ glyfile = open(glyfname, 'w')
+ glyfile.write('<?xml version="1.0" standalone="no"?>\n')
+ glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
+ glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
+ glyfile.write('<defs>\n')
+ counter = 0
+ for filename in filenames:
+ # print ' ', filename
+ print '.',
+ fname = os.path.join(glyphsDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ gp = GParser(flat_xml)
+ for i in xrange(0, gp.count):
+ path = gp.getPath(i)
+ maxh, maxw = gp.getGlyphDim(i)
+ fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
+ glyfile.write(fullpath)
+ gd.addGlyph(counter * 256 + i, fullpath)
+ counter += 1
+ glyfile.write('</defs>\n')
+ glyfile.write('</svg>\n')
+ glyfile.close()
+ print " "
+
+ # start up the html
+ htmlFileName = "book.html"
+ htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+ htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n'
+ htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
+ htmlstr += '<head>\n'
+ htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
+ htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
+ htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+ htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+ htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
+ htmlstr += '</head>\n<body>\n'
+
+ print 'Processing Pages'
+ # Books are at 1440 DPI. This is rendering at twice that size for
+ # readability when rendering to the screen.
+ scaledpi = 1440.0
+
+ svgindex = '<?xml version="1.0" encoding="utf-8"?>\n'
+ svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
+ svgindex += '<head>\n'
+ svgindex += '<title>' + meta_array['Title'] + '</title>\n'
+ svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+ svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+ svgindex += '</head>\n'
+ svgindex += '<body>\n'
+
+ filenames = os.listdir(pageDir)
+ filenames = sorted(filenames)
+ numfiles = len(filenames)
+ counter = 0
+
+ for filename in filenames:
+ # print ' ', filename
+ print ".",
+
+ fname = os.path.join(pageDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ # first get the html
+ htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
+
+ # now get the svg image of the page
+ svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi)
+
+ if (raw) :
+ pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
+ svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter)
+ else :
+ pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
+ svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (counter, counter)
+
+
+ pfile.write(svgxml)
+ pfile.close()
+
+ counter += 1
+
+ print " "
+
+ # finish up the html string and output it
+ htmlstr += '</body>\n</html>\n'
+ file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+
+ # finish up the svg index string and output it
+ svgindex += '</body>\n</html>\n'
+ file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
+
+ # build the opf file
+ opfname = os.path.join(bookDir, 'book.opf')
+ opfstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+ opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n'
+ # adding metadata
+ opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n'
+ opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n'
+ opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n'
+ opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n'
+ opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n'
+ opfstr += ' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n'
+ opfstr += ' <dc:language>en</dc:language>\n'
+ opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n'
+ if isCover:
+ opfstr += ' <meta name="cover" content="bookcover"/>\n'
+ opfstr += ' </metadata>\n'
+ opfstr += '<manifest>\n'
+ opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n'
+ opfstr += ' <item id="stylesheet" href="style.css" media-type="text.css"/>\n'
+ # adding image files to manifest
+ filenames = os.listdir(imgDir)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname, imgext = os.path.splitext(filename)
+ if imgext == '.jpg':
+ imgext = 'jpeg'
+ if imgext == '.svg':
+ imgext = 'svg+xml'
+ opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n'
+ if isCover:
+ opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n'
+ opfstr += '</manifest>\n'
+ # adding spine
+ opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n'
+ if isCover:
+ opfstr += ' <guide>\n'
+ opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n'
+ opfstr += ' </guide>\n'
+ opfstr += '</package>\n'
+ file(opfname, 'wb').write(opfstr)
+
+ print 'Processing Complete'
+
+ return 0
+
+def usage():
+ print "genbook.py generates a book from the extract Topaz Files"
+ print "Usage:"
+ print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
+ print " "
+ print "Options:"
+ print " -h : help - print this usage message"
+ print " -r : generate raw svg files (not wrapped in xhtml)"
+ print " --fixed-image : genearate any Fixed Area as an svg image in the html"
+ print " "
+
+
+def main(argv):
+ bookDir = ''
+
+ if len(argv) == 0:
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
+
+ except getopt.GetoptError, err:
+ print str(err)
+ usage()
+ return 1
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ return 1
+
+ raw = 0
+ fixedimage = False
+ for o, a in opts:
+ if o =="-h":
+ usage()
+ return 0
+ if o =="-r":
+ raw = 1
+ if o =="--fixed-image":
+ fixedimage = True
+
+ bookDir = args[0]
+
+ rv = generateBook(bookDir, raw, fixedimage)
+ return rv
+
+
+if __name__ == '__main__':
+ sys.exit(main(''))
enc('CipherReference'))
for elem in encryption.findall(expr):
path = elem.get('URI', None)
+ path = path.encode('utf-8')
if path is not None:
encrypted.add(path)
#! /usr/bin/python
# -*- coding: utf-8 -*-
-# ineptepub.pyw, version 5.2
+# ineptepub.pyw, version 5.4
# Copyright © 2009-2010 i♥cabbages
# Released under the terms of the GNU General Public Licence, version 3 or
# 5.1 - Improve OpenSSL error checking
# 5.2 - Fix ctypes error causing segfaults on some systems
# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o
+# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
+
"""
Decrypt Adobe ADEPT-encrypted EPUB books.
"""
for elem in encryption.findall(expr):
path = elem.get('URI', None)
if path is not None:
+ path = path.encode('utf-8')
encrypted.add(path)
def decompress(self, bytes):
from __future__ import with_statement
-__version__ = '1.2'
+__version__ = '1.4'
class Unbuffered:
def __init__(self, stream):
import binascii
import zlib
import re
+import zlib, zipfile, tempfile, shutil
from struct import pack, unpack, unpack_from
-
-#Exception Handling
class DrmException(Exception):
pass
-#
-# crypto digestroutines
-#
-
-import hashlib
-
-def MD5(message):
- ctx = hashlib.md5()
- ctx.update(message)
- return ctx.digest()
-
-def SHA1(message):
- ctx = hashlib.sha1()
- ctx.update(message)
- return ctx.digest()
-
-# determine if we are running as a calibre plugin
if 'calibre' in sys.modules:
inCalibre = True
- global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
else:
inCalibre = False
-#
-# start of Kindle specific routines
-#
-
-if not inCalibre:
- import mobidedrm
- if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
-
-global kindleDatabase
-
-# Encode the bytes in data with the characters in map
-def encode(data, map):
- result = ""
- for char in data:
- value = ord(char)
- Q = (value ^ 0x80) // len(map)
- R = value % len(map)
- result += map[Q]
- result += map[R]
- return result
-
-# Hash the bytes in data and then encode the digest with the characters in map
-def encodeHash(data,map):
- return encode(MD5(data),map)
-
-# Decode the string in data with the characters in map. Returns the decoded bytes
-def decode(data,map):
- result = ""
- for i in range (0,len(data)-1,2):
- high = map.find(data[i])
- low = map.find(data[i+1])
- if (high == -1) or (low == -1) :
- break
- value = (((high * len(map)) ^ 0x80) & 0xFF) + low
- result += pack("B",value)
- return result
-
-
-# Parse the Kindle.info file and return the records as a list of key-values
-def parseKindleInfo(kInfoFile):
- DB = {}
- infoReader = openKindleInfo(kInfoFile)
- infoReader.read(1)
- data = infoReader.read()
- if sys.platform.startswith('win'):
- items = data.split('{')
- else :
- items = data.split('[')
- for item in items:
- splito = item.split(':')
- DB[splito[0]] =splito[1]
- return DB
-
-# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
-def getKindleInfoValueForHash(hashedKey):
- global kindleDatabase
- encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
- if sys.platform.startswith('win'):
- return CryptUnprotectData(encryptedValue,"")
- else:
- cleartext = CryptUnprotectData(encryptedValue)
- return decode(cleartext, charMap1)
-
-# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
-def getKindleInfoValueForKey(key):
- return getKindleInfoValueForHash(encodeHash(key,charMap2))
-
-# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
-def findNameForHash(hash):
- names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
- result = ""
- for name in names:
- if hash == encodeHash(name, charMap2):
- result = name
- break
- return result
-
-# Print all the records from the kindle.info file (option -i)
-def printKindleInfo():
- for record in kindleDatabase:
- name = findNameForHash(record)
- if name != "" :
- print (name)
- print ("--------------------------")
- else :
- print ("Unknown Record")
- print getKindleInfoValueForHash(record)
- print "\n"
-
-#
-# PID generation routines
-#
-
-# Returns two bit at offset from a bit field
-def getTwoBitsFromBitField(bitField,offset):
- byteNumber = offset // 4
- bitPosition = 6 - 2*(offset % 4)
- return ord(bitField[byteNumber]) >> bitPosition & 3
-
-# Returns the six bits at offset from a bit field
-def getSixBitsFromBitField(bitField,offset):
- offset *= 3
- value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
- return value
-
-# 8 bits to six bits encoding from hash to generate PID string
-def encodePID(hash):
- global charMap3
- PID = ""
- for position in range (0,8):
- PID += charMap3[getSixBitsFromBitField(hash,position)]
- return PID
-
-# Encryption table used to generate the device PID
-def generatePidEncryptionTable() :
- table = []
- for counter1 in range (0,0x100):
- value = counter1
- for counter2 in range (0,8):
- if (value & 1 == 0) :
- value = value >> 1
- else :
- value = value >> 1
- value = value ^ 0xEDB88320
- table.append(value)
- return table
-
-# Seed value used to generate the device PID
-def generatePidSeed(table,dsn) :
- value = 0
- for counter in range (0,4) :
- index = (ord(dsn[counter]) ^ value) &0xFF
- value = (value >> 8) ^ table[index]
- return value
-
-# Generate the device PID
-def generateDevicePID(table,dsn,nbRoll):
- seed = generatePidSeed(table,dsn)
- pidAscii = ""
- pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
- index = 0
- for counter in range (0,nbRoll):
- pid[index] = pid[index] ^ ord(dsn[counter])
- index = (index+1) %8
- for counter in range (0,8):
- index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
- pidAscii += charMap4[index]
- return pidAscii
-
-# convert from 8 digit PID to 10 digit PID with checksum
-def checksumPid(s):
- letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
- crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
- crc = crc ^ (crc >> 16)
- res = s
- l = len(letters)
- for i in (0,1):
- b = crc & 0xff
- pos = (b // l) ^ (b % l)
- res += letters[pos%l]
- crc >>= 8
- return res
-
-
-class MobiPeek:
- def loadSection(self, section):
- before, after = self.sections[section:section+2]
- self.f.seek(before)
- return self.f.read(after - before)
- def __init__(self, filename):
- self.f = file(filename, 'rb')
- self.header = self.f.read(78)
- self.ident = self.header[0x3C:0x3C+8]
- if self.ident != 'BOOKMOBI' and self.ident != 'TEXtREAd':
- raise DrmException('invalid file format')
- self.num_sections, = unpack_from('>H', self.header, 76)
- sections = self.f.read(self.num_sections*8)
- self.sections = unpack_from('>%dL' % (self.num_sections*2), sections, 0)[::2] + (0xfffffff, )
- self.sect0 = self.loadSection(0)
- self.f.close()
- def getBookTitle(self):
- # get book title
- toff, tlen = unpack('>II', self.sect0[0x54:0x5c])
- tend = toff + tlen
- title = self.sect0[toff:tend]
- return title
- def getexthData(self):
- # if exth region exists then grab it
- # get length of this header
- length, type, codepage, unique_id, version = unpack('>LLLLL', self.sect0[20:40])
- exth_flag, = unpack('>L', self.sect0[0x80:0x84])
- exth = ''
- if exth_flag & 0x40:
- exth = self.sect0[16 + length:]
- return exth
- def isNotEncrypted(self):
- lock_type, = unpack('>H', self.sect0[0xC:0xC+2])
- if lock_type == 0:
- return True
- return False
-
-# DiapDealer's stuff: Parse the EXTH header records and parse the Kindleinfo
-# file to calculate the book pid.
-def getK4Pids(exth, title, kInfoFile=None):
- global kindleDatabase
- try:
- kindleDatabase = parseKindleInfo(kInfoFile)
- except Exception, message:
- print(message)
-
- if kindleDatabase != None :
- # Get the Mazama Random number
- MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
-
- # Get the HDD serial
- encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
-
- # Get the current user name
- encodedUsername = encodeHash(GetUserName(),charMap1)
-
- # concat, hash and encode to calculate the DSN
- DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
-
- print("\nDSN: " + DSN)
-
- # Compute the device PID (for which I can tell, is used for nothing).
- # But hey, stuff being printed out is apparently cool.
- table = generatePidEncryptionTable()
- devicePID = generateDevicePID(table,DSN,4)
-
- print("Device PID: " + checksumPid(devicePID))
-
- # Compute book PID
- exth_records = {}
- nitems, = unpack('>I', exth[8:12])
- pos = 12
-
- exth_records[209] = None
- # Parse the exth records, storing data indexed by type
- for i in xrange(nitems):
- type, size = unpack('>II', exth[pos: pos + 8])
- content = exth[pos + 8: pos + size]
-
- exth_records[type] = content
- pos += size
-
- # Grab the contents of the type 209 exth record
- if exth_records[209] != None:
- data = exth_records[209]
- else:
- raise DrmException("\nNo EXTH record type 209 - Perhaps not a K4 file?")
-
- # Parse the 209 data to find the the exth record with the token data.
- # The last character of the 209 data points to the record with the token.
- # Always 208 from my experience, but I'll leave the logic in case that changes.
- for i in xrange(len(data)):
- if ord(data[i]) != 0:
- if exth_records[ord(data[i])] != None:
- token = exth_records[ord(data[i])]
-
- # Get the kindle account token
- kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
-
- print("Account Token: " + kindleAccountToken)
-
- pidHash = SHA1(DSN+kindleAccountToken+exth_records[209]+token)
-
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
-
- if exth_records[503] != None:
- print "Pid for " + exth_records[503] + ": " + bookPID
- else:
- print "Pid for " + title + ":" + bookPID
- return bookPID
-
- raise DrmException("\nCould not access K4 data - Perhaps K4 is not installed/configured?")
- return null
+def zipUpDir(myzip, tempdir,localname):
+ currentdir = tempdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tempdir, localfilePath)
def usage(progname):
- print "Removes DRM protection from K4PC, K4M, and Mobi ebooks"
+ print "Removes DRM protection from K4PC/M, Kindle, Mobi and Topaz ebooks"
print "Usage:"
- print " %s [-k <kindle.info>] [-p <pidnums>] <infile> <outfile> " % progname
+ print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
#
# Main
#
def main(argv=sys.argv):
- global kindleDatabase
import mobidedrm
-
+ import topazextract
+ import kgenpids
progname = os.path.basename(argv[0])
+
+ k4 = False
kInfoFiles = []
- pidnums = ""
+ serials = []
+ pids = []
print ('K4MobiDeDrm v%(__version__)s '
'provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc .' % globals())
+ print ' '
try:
- opts, args = getopt.getopt(sys.argv[1:], "k:p:")
+ opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
except getopt.GetoptError, err:
print str(err)
usage(progname)
sys.exit(2)
-
if len(args)<2:
usage(progname)
sys.exit(2)
if o == "-p":
if a == None :
raise DrmException("Invalid parameter for -p")
- pidnums = a
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ raise DrmException("Invalid parameter for -s")
+ serials = a.split(',')
+
+ # try with built in Kindle Info files
+ k4 = True
- kindleDatabase = None
infile = args[0]
- outfile = args[1]
- DecodeErrorString = ""
- try:
- # first try with K4PC/K4M
- ex = MobiPeek(infile)
- if ex.isNotEncrypted():
- print "File was Not Encrypted"
- return 2
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title)
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except DrmException, e:
- DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
- pass
- except mobidedrm.DrmException, e:
- DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
- pass
+ outdir = args[1]
+
+ # handle the obvious cases at the beginning
+ if not os.path.isfile(infile):
+ print "Error: Input file does not exist"
+ return 1
+
+ mobi = True
+ magic3 = file(infile,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+
+ if mobi:
+ mb = mobidedrm.MobiBook(infile)
else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # now try alternate kindle.info files
- if kInfoFiles:
- for infoFile in kInfoFiles:
- kindleDatabase = None
- try:
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title, infoFile)
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except DrmException, e:
- DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
- pass
- except mobidedrm.DrmException, e:
- DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # Lastly, try from the pid list
- pids = pidnums.split(',')
- for pid in pids:
- try:
- print 'Trying: "'+ pid + '"'
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except mobidedrm.DrmException:
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
+ tempdir = tempfile.mkdtemp()
+ mb = topazextract.TopazBook(infile, tempdir)
- # we could not unencrypt book
- print DecodeErrorString
- print "Error: Could Not Unencrypt Book"
- return 1
+ title = mb.getBookTitle()
+ print "Processing Book: ", title
+ # build pid list
+ md1, md2 = mb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+
+ try:
+ if mobi:
+ unlocked_file = mb.processBook(pidlst)
+ else:
+ mb.processBook(pidlst)
+
+ except mobidedrm.DrmException, e:
+ print " ... not suceessful " + str(e) + "\n"
+ return 1
+ except topazextract.TpzDRMError, e:
+ print str(e)
+ print " Creating DeBug Full Zip Archive of Book"
+ zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+ myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ zipUpDir(myzip, tempdir, '')
+ myzip.close()
+ return 1
+
+ if mobi:
+ outfile = os.path.join(outdir,bookname + '_nodrm' + '.azw')
+ file(outfile, 'wb').write(unlocked_file)
+ return 0
+
+ # topaz: build up zip archives of results
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+ myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip1, tempdir, 'img')
+ myzip1.close()
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(myzip2, tempdir, 'svg')
+ zipUpDir(myzip2, tempdir, 'img')
+ myzip2.close()
+
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(tempdir,'xml')
+ zipUpDir(myzip3, targetdir, '')
+ zipUpDir(myzip3, tempdir, 'img')
+ myzip3.close()
+
+ shutil.rmtree(tempdir)
+ return 0
if __name__ == '__main__':
sys.stdout=Unbuffered(sys.stdout)
sys.exit(main())
-
if not __name__ == "__main__" and inCalibre:
from calibre.customize import FileTypePlugin
class K4DeDRM(FileTypePlugin):
- name = 'K4PC, K4Mac, Mobi DeDRM' # Name of the plugin
- description = 'Removes DRM from K4PC, K4Mac, and Mobi files. \
+ name = 'K4PC, K4Mac, Kindle Mobi and Topaz DeDRM' # Name of the plugin
+ description = 'Removes DRM from K4PC and Mac, Kindle Mobi and Topaz files. \
Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
author = 'DiapDealer, SomeUpdates' # The author of this plugin
- version = (0, 1, 2) # The version number of this plugin
- file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
+ version = (0, 1, 7) # The version number of this plugin
+ file_types = set(['prc','mobi','azw','azw1','tpz']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
- priority = 200 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
+ priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
def run(self, path_to_ebook):
from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
- global kindleDatabase
- global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
+ from calibre.ptempfile import PersistentTemporaryDirectory
+
+ import kgenpids
+ import zlib
+ import zipfile
+ import topazextract
import mobidedrm
+ k4 = True
+ pids = []
+ serials = []
+ kInfoFiles = []
+
# Get supplied list of PIDs to try from plugin customization.
- pidnums = self.site_customization
-
+ customvalues = self.site_customization.split(',')
+ for customvalue in customvalues:
+ customvalue = str(customvalue)
+ customvalue = customvalue.strip()
+ if len(customvalue) == 10 or len(customvalue) == 8:
+ pids.append(customvalue)
+ else :
+ if len(customvalue) == 16 and customvalue[0] == 'B':
+ serials.append(customvalue)
+ else:
+ print "%s is not a valid Kindle serial number or PID." % str(customvalue)
+
# Load any kindle info files (*.info) included Calibre's config directory.
- kInfoFiles = []
try:
# Find Calibre's configuration directory.
confpath = os.path.split(os.path.split(self.plugin_path)[0])[0]
print 'K4MobiDeDRM: Error reading kindle info files from config directory.'
pass
- # first try with book specifc pid from K4PC or K4M
+
+ mobi = True
+ magic3 = file(path_to_ebook,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ bookname = os.path.splitext(os.path.basename(path_to_ebook))[0]
+
+ if mobi:
+ mb = mobidedrm.MobiBook(path_to_ebook)
+ else:
+ tempdir = PersistentTemporaryDirectory()
+ mb = topazextract.TopazBook(path_to_ebook, tempdir)
+
+ title = mb.getBookTitle()
+ md1, md2 = mb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+
try:
- kindleDatabase = None
- ex = MobiPeek(path_to_ebook)
- if ex.isNotEncrypted():
- return path_to_ebook
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title)
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
- except DrmException:
- pass
+ if mobi:
+ unlocked_file = mb.processBook(pidlst)
+ else:
+ mb.processBook(pidlst)
+
except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
+ #if you reached here then no luck raise and exception
+ if is_ok_to_use_qt():
+ d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
+ d.show()
+ d.raise_()
+ d.exec_()
+ raise Exception("K4MobiDeDRM plugin could not decode the file")
+ return ""
+ except topazextract.TpzDRMError:
+ #if you reached here then no luck raise and exception
+ if is_ok_to_use_qt():
+ d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
+ d.show()
+ d.raise_()
+ d.exec_()
+ raise Exception("K4MobiDeDRM plugin could not decode the file")
+ return ""
+
+ print "Success!"
+ if mobi:
+ of = self.temporary_file(bookname+'.mobi')
of.write(unlocked_file)
of.close()
return of.name
-
- # Now try alternate kindle info files
- if kInfoFiles:
- for infoFile in kInfoFiles:
- kindleDatabase = None
- try:
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title, infoFile)
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
- except DrmException:
- pass
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- # now try from the pid list
- pids = pidnums.split(',')
- for pid in pids:
- try:
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook, pid)
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- #if you reached here then no luck raise and exception
- if is_ok_to_use_qt():
- d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
- d.show()
- d.raise_()
- d.exec_()
- raise Exception("K4MobiDeDRM plugin could not decode the file")
- return ""
+
+ # topaz: build up zip archives of results
+ print " Creating HTML ZIP Archive"
+ of = self.temporary_file(bookname + '.zip')
+ myzip = zipfile.ZipFile(of.name,'w',zipfile.ZIP_DEFLATED, False)
+ myzip.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip, tempdir, 'img')
+ myzip.close()
+ return of.name
def customization_help(self, gui=False):
- return 'Enter each 10 character PID separated by a comma (no spaces).'
+ return 'Enter 10 character PIDs and/or Kindle serial numbers, separated by commas.'
# standlone set of Mac OSX specific routines needed for K4DeDRM
from __future__ import with_statement
-
import sys
import os
-
-#Exception Handling
-class K4MDrmException(Exception):
- pass
-
-import signal
-import threading
import subprocess
-from subprocess import Popen, PIPE, STDOUT
-
-# **heavily** chopped up and modfied version of asyncproc.py
-# to make it actually work on Windows as well as Mac/Linux
-# For the original see:
-# "http://www.lysator.liu.se/~bellman/download/"
-# author is "Thomas Bellman <bellman@lysator.liu.se>"
-# available under GPL version 3 or Later
-
-# create an asynchronous subprocess whose output can be collected in
-# a non-blocking manner
-
-# What a mess! Have to use threads just to get non-blocking io
-# in a cross-platform manner
-
-# luckily all thread use is hidden within this class
-
-class Process(object):
- def __init__(self, *params, **kwparams):
- if len(params) <= 3:
- kwparams.setdefault('stdin', subprocess.PIPE)
- if len(params) <= 4:
- kwparams.setdefault('stdout', subprocess.PIPE)
- if len(params) <= 5:
- kwparams.setdefault('stderr', subprocess.PIPE)
- self.__pending_input = []
- self.__collected_outdata = []
- self.__collected_errdata = []
- self.__exitstatus = None
- self.__lock = threading.Lock()
- self.__inputsem = threading.Semaphore(0)
- self.__quit = False
-
- self.__process = subprocess.Popen(*params, **kwparams)
- if self.__process.stdin:
- self.__stdin_thread = threading.Thread(
- name="stdin-thread",
- target=self.__feeder, args=(self.__pending_input,
- self.__process.stdin))
- self.__stdin_thread.setDaemon(True)
- self.__stdin_thread.start()
-
- if self.__process.stdout:
- self.__stdout_thread = threading.Thread(
- name="stdout-thread",
- target=self.__reader, args=(self.__collected_outdata,
- self.__process.stdout))
- self.__stdout_thread.setDaemon(True)
- self.__stdout_thread.start()
-
- if self.__process.stderr:
- self.__stderr_thread = threading.Thread(
- name="stderr-thread",
- target=self.__reader, args=(self.__collected_errdata,
- self.__process.stderr))
- self.__stderr_thread.setDaemon(True)
- self.__stderr_thread.start()
-
- def pid(self):
- return self.__process.pid
-
- def kill(self, signal):
- self.__process.send_signal(signal)
-
- # check on subprocess (pass in 'nowait') to act like poll
- def wait(self, flag):
- if flag.lower() == 'nowait':
- rc = self.__process.poll()
- else:
- rc = self.__process.wait()
- if rc != None:
- if self.__process.stdin:
- self.closeinput()
- if self.__process.stdout:
- self.__stdout_thread.join()
- if self.__process.stderr:
- self.__stderr_thread.join()
- return self.__process.returncode
-
- def terminate(self):
- if self.__process.stdin:
- self.closeinput()
- self.__process.terminate()
-
- # thread gets data from subprocess stdout
- def __reader(self, collector, source):
- while True:
- data = os.read(source.fileno(), 65536)
- self.__lock.acquire()
- collector.append(data)
- self.__lock.release()
- if data == "":
- source.close()
- break
- return
-
- # thread feeds data to subprocess stdin
- def __feeder(self, pending, drain):
- while True:
- self.__inputsem.acquire()
- self.__lock.acquire()
- if not pending and self.__quit:
- drain.close()
- self.__lock.release()
- break
- data = pending.pop(0)
- self.__lock.release()
- drain.write(data)
- # non-blocking read of data from subprocess stdout
- def read(self):
- self.__lock.acquire()
- outdata = "".join(self.__collected_outdata)
- del self.__collected_outdata[:]
- self.__lock.release()
- return outdata
-
- # non-blocking read of data from subprocess stderr
- def readerr(self):
- self.__lock.acquire()
- errdata = "".join(self.__collected_errdata)
- del self.__collected_errdata[:]
- self.__lock.release()
- return errdata
-
- # non-blocking write to stdin of subprocess
- def write(self, data):
- if self.__process.stdin is None:
- raise ValueError("Writing to process with stdin not a pipe")
- self.__lock.acquire()
- self.__pending_input.append(data)
- self.__inputsem.release()
- self.__lock.release()
-
- # close stdinput of subprocess
- def closeinput(self):
- self.__lock.acquire()
- self.__quit = True
- self.__inputsem.release()
- self.__lock.release()
+class K4MDrmException(Exception):
+ pass
# interface to needed routines in openssl's libcrypto
# Utility Routines
#
+
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+
+
# uses a sub process to get the Hard Drive Serial Number using ioreg
# returns with the serial number of drive whose BSD Name is "disk0"
def GetVolumeSerialNumber():
return sernum
cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p.wait('wait')
- results = p.read()
- reslst = results.split('\n')
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
cnt = len(reslst)
bsdname = None
sernum = None
username = os.getenv('USER')
return username
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
-charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
def encode(data, map):
result = ""
cleartext = crp.decrypt(encryptedData)
return cleartext
+
# Locate and open the .kindle-info file
def openKindleInfo(kInfoFile=None):
if kInfoFile == None:
home = os.getenv('HOME')
cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p1.wait('wait')
- results = p1.read()
- reslst = results.split('\n')
+ p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p1.communicate()
+ reslst = out1.split('\n')
kinfopath = 'NONE'
cnt = len(reslst)
for j in xrange(cnt):
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import sys
+import os, csv
+import binascii
+import zlib
+import re
+from struct import pack, unpack, unpack_from
+
+class DrmException(Exception):
+ pass
+
+global kindleDatabase
+global charMap1
+global charMap2
+global charMap3
+global charMap4
+
+if sys.platform.startswith('win'):
+ from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+if sys.platform.startswith('darwin'):
+ from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+
+# Parse the Kindle.info file and return the records as a list of key-values
+def parseKindleInfo(kInfoFile):
+ DB = {}
+ infoReader = openKindleInfo(kInfoFile)
+ infoReader.read(1)
+ data = infoReader.read()
+ if sys.platform.startswith('win'):
+ items = data.split('{')
+ else :
+ items = data.split('[')
+ for item in items:
+ splito = item.split(':')
+ DB[splito[0]] =splito[1]
+ return DB
+
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ global charMap1
+ global charMap2
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ if sys.platform.startswith('win'):
+ return CryptUnprotectData(encryptedValue,"")
+ else:
+ cleartext = CryptUnprotectData(encryptedValue)
+ return decode(cleartext, charMap1)
+
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+def getKindleInfoValueForKey(key):
+ global charMap2
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
+def findNameForHash(hash):
+ global charMap2
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+ result = ""
+ for name in names:
+ if hash == encodeHash(name, charMap2):
+ result = name
+ break
+ return result
+
+# Print all the records from the kindle.info file (option -i)
+def printKindleInfo():
+ for record in kindleDatabase:
+ name = findNameForHash(record)
+ if name != "" :
+ print (name)
+ print ("--------------------------")
+ else :
+ print ("Unknown Record")
+ print getKindleInfoValueForHash(record)
+ print "\n"
+
+#
+# PID generation routines
+#
+
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+ global charMap4
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+def crc32(s):
+ return (~binascii.crc32(s,-1))&0xFFFFFFFF
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+ global charMap4
+ crc = crc32(s)
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(charMap4)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += charMap4[pos%l]
+ crc >>= 8
+ return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+ global charMap4
+ crc = crc32(s)
+ arr1 = [0]*l
+ for i in xrange(len(s)):
+ arr1[i%l] ^= ord(s[i])
+ crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+ for i in xrange(l):
+ arr1[i] ^= crc_bytes[i&3]
+ pid = ""
+ for i in xrange(l):
+ b = arr1[i] & 0xff
+ pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+ return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePid(pidlst, rec209, token, serialnum):
+
+ if rec209 != None:
+ # Compute book PID
+ pidHash = SHA1(serialnum+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # compute fixed pid for old pre 2.5 firmware update pid as well
+ bookPID = pidFromSerial(serialnum, 7) + "*"
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+
+# Parse the EXTH header records and parse the Kindleinfo
+# file to calculate the book pid.
+
+def getK4Pids(pidlst, rec209, token, kInfoFile=None):
+ global kindleDatabase
+ global charMap1
+ kindleDatabase = None
+ try:
+ kindleDatabase = parseKindleInfo(kInfoFile)
+ except Exception, message:
+ print(message)
+ pass
+
+ if kindleDatabase == None :
+ return pidlst
+
+ # Get the Mazama Random number
+ MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+ # Get the HDD serial
+ encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode to calculate the DSN
+ DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+
+ # Compute the device PID (for which I can tell, is used for nothing).
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ devicePID = checksumPid(devicePID)
+ pidlst.append(devicePID)
+
+ # Compute book PID
+ if rec209 == None:
+ print "\nNo EXTH record type 209 - Perhaps not a K4 file?"
+ return pidlst
+
+ # Get the kindle account token
+ kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+ # book pid
+ pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 1
+ pidHash = SHA1(kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 2
+ pidHash = SHA1(DSN+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+def getPidList(md1, md2, k4, pids, serials, kInfoFiles):
+ pidlst = []
+ if k4:
+ pidlst = getK4Pids(pidlst, md1, md2)
+ for infoFile in kInfoFiles:
+ pidlst = getK4Pids(pidlst, md1, md2, infoFile)
+ for serialnum in serials:
+ pidlst = getKindlePid(pidlst, md1, md2, serialnum)
+ for pid in pids:
+ pidlst.append(pid)
+ return pidlst
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
-# files reveals that a confusin has arisen because trailing data entries
+# files reveals that a confusion has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# Removed the disabled Calibre plug-in code
# Permit use of 8-digit PIDs
# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
-# 0.20 - Corretion: It seems that multibyte entries are encrypted in a v6 file.
+# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
+# 0.21 - Added support for multiple pids
+# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface
-__version__ = '0.20'
+__version__ = '0.22'
import sys
-import struct
-import binascii
class Unbuffered:
def __init__(self, stream):
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
+sys.stdout=Unbuffered(sys.stdout)
+
+import struct
+import binascii
class DrmException(Exception):
pass
+
+#
+# MobiBook Utility Routines
+#
+
# Implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
wkey = []
for i in xrange(8):
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
-
dst = ""
for i in xrange(len(src)):
temp1 = 0;
num += (ord(ptr[size - num - 1]) & 0x3) + 1
return num
-class DrmStripper:
+
+
+class MobiBook:
def loadSection(self, section):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
off = self.sections[section][0]
return self.data_file[off:endoff]
+ def __init__(self, infile):
+ # initial sanity check on file
+ self.data_file = file(infile, 'rb').read()
+ self.header = self.data_file[0:78]
+ if self.header[0x3C:0x3C+8] != 'BOOKMOBI':
+ raise DrmException("invalid file format")
+
+ # build up section offset and flag info
+ self.num_sections, = struct.unpack('>H', self.header[76:78])
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+
+ # parse information from section 0
+ self.sect = self.loadSection(0)
+ self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
+ self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
+ self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
+ print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length)
+ self.extra_data_flags = 0
+ if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
+ self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
+ print "Extra Data Flags = %d" % self.extra_data_flags
+ if self.mobi_version < 7:
+ # multibyte utf8 data is included in the encryption for mobi_version 6 and below
+ # so clear that byte so that we leave it to be decrypted.
+ self.extra_data_flags &= 0xFFFE
+
+ # if exth region exists parse it for metadata array
+ self.meta_array = {}
+ exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
+ exth = ''
+ if exth_flag & 0x40:
+ exth = self.sect[16 + self.mobi_length:]
+ nitems, = struct.unpack('>I', exth[8:12])
+ pos = 12
+ for i in xrange(nitems):
+ type, size = struct.unpack('>II', exth[pos: pos + 8])
+ content = exth[pos + 8: pos + size]
+ self.meta_array[type] = content
+ pos += size
+
+ def getBookTitle(self):
+ title = ''
+ if 503 in self.meta_array:
+ title = self.meta_array[503]
+ else :
+ toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
+ tend = toff + tlen
+ title = self.sect[toff:tend]
+ if title == '':
+ title = self.header[:32]
+ title = title.split("\0")[0]
+ return title
+
+ def getPIDMetaInfo(self):
+ rec209 = None
+ token = None
+ if 209 in self.meta_array:
+ rec209 = self.meta_array[209]
+ data = rec209
+ # Parse the 209 data to find the the exth record with the token data.
+ # The last character of the 209 data points to the record with the token.
+ # Always 208 from my experience, but I'll leave the logic in case that changes.
+ for i in xrange(len(data)):
+ if ord(data[i]) != 0:
+ if self.meta_array[ord(data[i])] != None:
+ token = self.meta_array[ord(data[i])]
+ return rec209, token
+
def patch(self, off, new):
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
assert off + in_off + len(new) <= endoff
self.patch(off + in_off, new)
- def parseDRM(self, data, count, pid):
- pid = pid.ljust(16,'\0')
- keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
- temp_key = PC1(keyvec1, pid, False)
- temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ def parseDRM(self, data, count, pidlist):
found_key = None
- for i in xrange(count):
- verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
- found_key = finalkey
+ keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
+ for pid in pidlist:
+ bigpid = pid.ljust(16,'\0')
+ temp_key = PC1(keyvec1, bigpid, False)
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ found_key = None
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver and (flags & 0x1F) == 1:
+ found_key = finalkey
+ break
+ if found_key != None:
break
if not found_key:
# Then try the default encoding that doesn't require a PID
+ pid = "00000000"
temp_key = keyvec1
temp_key_sum = sum(map(ord,temp_key)) & 0xff
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum:
- found_key = finalkey
- break
- return found_key
-
- def __init__(self, data_file, pid):
- if len(pid)==10:
- if checksumPid(pid[0:-2]) != pid:
- raise DrmException("invalid PID checksum")
- pid = pid[0:-2]
- elif len(pid)==8:
- print "PID without checksum given. With checksum PID is "+checksumPid(pid)
- else:
- raise DrmException("Invalid PID length")
-
- self.data_file = data_file
- header = data_file[0:72]
- if header[0x3C:0x3C+8] != 'BOOKMOBI':
- raise DrmException("invalid file format")
- self.num_sections, = struct.unpack('>H', data_file[76:78])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver:
+ found_key = finalkey
+ break
+ return [found_key,pid]
- self.sections = []
- for i in xrange(self.num_sections):
- offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
- flags, val = a1, a2<<16|a3<<8|a4
- self.sections.append( (offset, flags, val) )
-
- sect = self.loadSection(0)
- records, = struct.unpack('>H', sect[0x8:0x8+2])
- mobi_length, = struct.unpack('>L',sect[0x14:0x18])
- mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
- extra_data_flags = 0
- print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
- if (mobi_length >= 0xE4) and (mobi_version >= 5):
- extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
- print "Extra Data Flags = %d" %extra_data_flags
- if mobi_version < 7:
- # multibyte utf8 data is included in the encryption for mobi_version 6 and below
- # so clear that byte so that we leave it to be decrypted.
- extra_data_flags &= 0xFFFE
-
- crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
+ def processBook(self, pidlist):
+ crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
if crypto_type == 0:
print "This book is not encrypted."
+ return self.data_file
+ if crypto_type == 1:
+ raise DrmException("Cannot decode Mobipocket encryption type 1")
+ if crypto_type != 2:
+ raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
+
+ goodpids = []
+ for pid in pidlist:
+ if len(pid)==10:
+ if checksumPid(pid[0:-2]) != pid:
+ print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
+ goodpids.append(pid[0:-2])
+ elif len(pid)==8:
+ goodpids.append(pid)
+
+ # calculate the keys
+ drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
+ if drm_count == 0:
+ raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
+ found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
+ if not found_key:
+ raise DrmException("No key found. Most likely the correct PID has not been given.")
+
+ if pid=="00000000":
+ print "File has default encryption, no specific PID."
else:
- if crypto_type == 1:
- raise DrmException("cannot decode Mobipocket encryption type 1")
- if crypto_type != 2:
- raise DrmException("unknown encryption type: %d" % crypto_type)
-
- # calculate the keys
- drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
- if drm_count == 0:
- raise DrmException("no PIDs found in this file")
- found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
- if not found_key:
- raise DrmException("no key found. maybe the PID is incorrect")
-
- # kill the drm keys
- self.patchSection(0, "\0" * drm_size, drm_ptr)
- # kill the drm pointers
- self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
- # clear the crypto type
- self.patchSection(0, "\0" * 2, 0xC)
-
- # decrypt sections
- print "Decrypting. Please wait . . .",
- new_data = self.data_file[:self.sections[1][0]]
- for i in xrange(1, records+1):
- data = self.loadSection(i)
- extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
- if i%100 == 0:
- print ".",
- # print "record %d, extra_size %d" %(i,extra_size)
- new_data += PC1(found_key, data[0:len(data) - extra_size])
- if extra_size > 0:
- new_data += data[-extra_size:]
- #self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
- if self.num_sections > records+1:
- new_data += self.data_file[self.sections[records+1][0]:]
- self.data_file = new_data
- print "done"
-
- def getResult(self):
+ print "File is encoded with PID "+checksumPid(pid)+"."
+
+ # kill the drm keys
+ self.patchSection(0, "\0" * drm_size, drm_ptr)
+ # kill the drm pointers
+ self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
+ # clear the crypto type
+ self.patchSection(0, "\0" * 2, 0xC)
+
+ # decrypt sections
+ print "Decrypting. Please wait . . .",
+ new_data = self.data_file[:self.sections[1][0]]
+ for i in xrange(1, self.records+1):
+ data = self.loadSection(i)
+ extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
+ if i%100 == 0:
+ print ".",
+ # print "record %d, extra_size %d" %(i,extra_size)
+ new_data += PC1(found_key, data[0:len(data) - extra_size])
+ if extra_size > 0:
+ new_data += data[-extra_size:]
+ if self.num_sections > self.records+1:
+ new_data += self.data_file[self.sections[self.records+1][0]:]
+ self.data_file = new_data
+ print "done"
return self.data_file
def getUnencryptedBook(infile,pid):
- sys.stdout=Unbuffered(sys.stdout)
- data_file = file(infile, 'rb').read()
- strippedFile = DrmStripper(data_file, pid)
- return strippedFile.getResult()
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ return book.processBook([pid])
+
+def getUnencryptedBookWithList(infile,pidlist):
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ return book.processBook(pidlist)
def main(argv=sys.argv):
- sys.stdout=Unbuffered(sys.stdout)
print ('MobiDeDrm v%(__version__)s. '
'Copyright 2008-2010 The Dark Reverser.' % globals())
if len(argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
- print " %s <infile> <outfile> <PID>" % sys.argv[0]
+ print " %s <infile> <outfile> <Comma separated list of PIDs to try>" % sys.argv[0]
return 1
else:
infile = argv[1]
outfile = argv[2]
- pid = argv[3]
+ pidlist = argv[3].split(',')
try:
- stripped_file = getUnencryptedBook(infile, pid)
+ stripped_file = getUnencryptedBookWithList(infile, pidlist)
file(outfile, 'wb').write(stripped_file)
except DrmException, e:
print "Error: %s" % e
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+import csv
+import sys
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+ def __init__(self, flatxml, fontsize, ph, pw):
+ self.flatdoc = flatxml.split('\n')
+ self.fontsize = int(fontsize)
+ self.ph = int(ph) * 1.0
+ self.pw = int(pw) * 1.0
+
+ stags = {
+ 'paragraph' : 'p',
+ 'graphic' : '.graphic'
+ }
+
+ attr_val_map = {
+ 'hang' : 'text-indent: ',
+ 'indent' : 'text-indent: ',
+ 'line-space' : 'line-height: ',
+ 'margin-bottom' : 'margin-bottom: ',
+ 'margin-left' : 'margin-left: ',
+ 'margin-right' : 'margin-right: ',
+ 'margin-top' : 'margin-top: ',
+ 'space-after' : 'padding-bottom: ',
+ }
+
+ attr_str_map = {
+ 'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+ 'align-left' : 'text-align: left;',
+ 'align-right' : 'text-align: right;',
+ 'align-justify' : 'text-align: justify;',
+ 'display-inline' : 'display: inline;',
+ 'pos-left' : 'text-align: left;',
+ 'pos-right' : 'text-align: right;',
+ 'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+ }
+
+
+ # find tag if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ docList = self.flatdoc
+ cnt = len(docList)
+ if end == -1 :
+ end = cnt
+ else:
+ end = min(cnt,end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
+
+ def process(self):
+
+ classlst = ''
+ csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
+ csspage += '.cl-right { text-align: right; }\n'
+ csspage += '.cl-left { text-align: left; }\n'
+ csspage += '.cl-justify { text-align: justify; }\n'
+
+ # generate a list of each <style> starting point in the stylesheet
+ styleList= self.posinDoc('book.stylesheet.style')
+ stylecnt = len(styleList)
+ styleList.append(-1)
+
+ # process each style converting what you can
+
+ for j in xrange(stylecnt):
+ start = styleList[j]
+ end = styleList[j+1]
+
+ (pos, tag) = self.findinDoc('style._tag',start,end)
+ if tag == None :
+ (pos, tag) = self.findinDoc('style.type',start,end)
+
+ # Is this something we know how to convert to css
+ if tag in self.stags :
+
+ # get the style class
+ (pos, sclass) = self.findinDoc('style.class',start,end)
+ if sclass != None:
+ sclass = sclass.replace(' ','-')
+ sclass = '.cl-' + sclass.lower()
+ else :
+ sclass = ''
+
+ # check for any "after class" specifiers
+ (pos, aftclass) = self.findinDoc('style._after_class',start,end)
+ if aftclass != None:
+ aftclass = aftclass.replace(' ','-')
+ aftclass = '.cl-' + aftclass.lower()
+ else :
+ aftclass = ''
+
+ cssargs = {}
+
+ while True :
+
+ (pos1, attr) = self.findinDoc('style.rule.attr', start, end)
+ (pos2, val) = self.findinDoc('style.rule.value', start, end)
+
+ if attr == None : break
+
+ if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
+ # handle text based attributess
+ attr = attr + '-' + val
+ if attr in self.attr_str_map :
+ cssargs[attr] = (self.attr_str_map[attr], '')
+ else :
+ # handle value based attributes
+ if attr in self.attr_val_map :
+ name = self.attr_val_map[attr]
+ if attr in ('margin-bottom', 'margin-top', 'space-after') :
+ scale = self.ph
+ elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
+ scale = self.pw
+ elif attr == 'line-space':
+ scale = self.fontsize * 2.0
+
+ if not ((attr == 'hang') and (int(val) == 0)) :
+ pv = float(val)/scale
+ cssargs[attr] = (self.attr_val_map[attr], pv)
+ keep = True
+
+ start = max(pos1, pos2) + 1
+
+ # disable all of the after class tags until I figure out how to handle them
+ if aftclass != "" : keep = False
+
+ if keep :
+ # make sure line-space does not go below 100% or above 300% since
+ # it can be wacky in some styles
+ if 'line-space' in cssargs:
+ seg = cssargs['line-space'][0]
+ val = cssargs['line-space'][1]
+ if val < 1.0: val = 1.0
+ if val > 3.0: val = 3.0
+ del cssargs['line-space']
+ cssargs['line-space'] = (self.attr_val_map['line-space'], val)
+
+
+ # handle modifications for css style hanging indents
+ if 'hang' in cssargs:
+ hseg = cssargs['hang'][0]
+ hval = cssargs['hang'][1]
+ del cssargs['hang']
+ cssargs['hang'] = (self.attr_val_map['hang'], -hval)
+ mval = 0
+ mseg = 'margin-left: '
+ mval = hval
+ if 'margin-left' in cssargs:
+ mseg = cssargs['margin-left'][0]
+ mval = cssargs['margin-left'][1]
+ if mval < 0: mval = 0
+ mval = hval + mval
+ cssargs['margin-left'] = (mseg, mval)
+ if 'indent' in cssargs:
+ del cssargs['indent']
+
+ cssline = sclass + ' { '
+ for key in iter(cssargs):
+ mseg = cssargs[key][0]
+ mval = cssargs[key][1]
+ if mval == '':
+ cssline += mseg + ' '
+ else :
+ aseg = mseg + '%.1f%%;' % (mval * 100.0)
+ cssline += aseg + ' '
+
+ cssline += '}'
+
+ if sclass != '' :
+ classlst += sclass + '\n'
+
+ # handle special case of paragraph class used inside chapter heading
+ # and non-chapter headings
+ if sclass != '' :
+ ctype = sclass[4:7]
+ if ctype == 'ch1' :
+ csspage += 'h1' + cssline + '\n'
+ if ctype == 'ch2' :
+ csspage += 'h2' + cssline + '\n'
+ if ctype == 'ch3' :
+ csspage += 'h3' + cssline + '\n'
+ if ctype == 'h1-' :
+ csspage += 'h4' + cssline + '\n'
+ if ctype == 'h2-' :
+ csspage += 'h5' + cssline + '\n'
+ if ctype == 'h3_' :
+ csspage += 'h6' + cssline + '\n'
+
+ if cssline != ' { }':
+ csspage += self.stags[tag] + cssline + '\n'
+
+
+ return csspage, classlst
+
+
+
+def convert2CSS(flatxml, fontsize, ph, pw):
+
+ print ' ', 'Using font size:',fontsize
+ print ' ', 'Using page height:', ph
+ print ' ', 'Using page width:', pw
+
+ # create a document parser
+ dp = DocParser(flatxml, fontsize, ph, pw)
+
+ csspage = dp.process()
+
+ return csspage
--- /dev/null
+#!/usr/bin/env python
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+import os, csv, getopt
+import zlib, zipfile, tempfile, shutil
+from struct import pack
+from struct import unpack
+
+class TpzDRMError(Exception):
+ pass
+
+# local support routines
+import kgenpids
+import genbook
+#
+# Utility routines
+#
+
+# Get a 7 bit encoded number from file
+def bookReadEncodedNumber(fo):
+ flag = False
+ data = ord(fo.read(1))
+ if data == 0xFF:
+ flag = True
+ data = ord(fo.read(1))
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(fo.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from file
+def bookReadString(fo):
+ stringLength = bookReadEncodedNumber(fo)
+ return unpack(str(stringLength)+"s",fo.read(stringLength))[0]
+
+#
+# crypto routines
+#
+
+# Context initialisation for the Topaz Crypto
+def topazCryptoInit(key):
+ ctx1 = 0x0CAFFE19E
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ return [ctx1,ctx2]
+
+# decrypt data with the context prepared by topazCryptoInit()
+def topazCryptoDecrypt(data, ctx):
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+ plainText = ""
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+ return plainText
+
+# Decrypt data with the PID
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+# Try to decrypt a dkey record (contains the bookPID)
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise TpzDRMError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise TpzDRMError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise TpzDRMError("Record didn't contain PID")
+ return fields[4]
+
+# Decrypt all dkey records (contain the book PID)
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except TpzDRMError:
+ pass
+ data = data[1+length:]
+ if len(records) == 0:
+ raise TpzDRMError("BookKey Not Found")
+ return records
+
+
+class TopazBook:
+ def __init__(self, filename, outdir):
+ self.fo = file(filename, 'rb')
+ self.outdir = outdir
+ self.bookPayloadOffset = 0
+ self.bookHeaderRecords = {}
+ self.bookMetadata = {}
+ self.bookKey = None
+ magic = unpack("4s",self.fo.read(4))[0]
+ if magic != 'TPZ0':
+ raise TpzDRMError("Parse Error : Invalid Header, not a Topaz file")
+ self.parseTopazHeaders()
+ self.parseMetadata()
+
+ def parseTopazHeaders(self):
+ def bookReadHeaderRecordData():
+ # Read and return the data of one header record at the current book file position
+ # [[offset,decompressedLength,compressedLength],...]
+ nbValues = bookReadEncodedNumber(self.fo)
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
+ return values
+ def parseTopazHeaderRecord():
+ # Read and parse one header record at the current book file position and return the associated data
+ # [[offset,decompressedLength,compressedLength],...]
+ if ord(self.fo.read(1)) != 0x63:
+ raise TpzDRMError("Parse Error : Invalid Header")
+ tag = bookReadString(self.fo)
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+ nbRecords = bookReadEncodedNumber(self.fo)
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ # print result[0], result[1]
+ self.bookHeaderRecords[result[0]] = result[1]
+ if ord(self.fo.read(1)) != 0x64 :
+ raise TpzDRMError("Parse Error : Invalid Header")
+ self.bookPayloadOffset = self.fo.tell()
+
+ def parseMetadata(self):
+ # Parse the metadata record from the book payload and return a list of [key,values]
+ self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString(self.fo)
+ if tag != "metadata" :
+ raise TpzDRMError("Parse Error : Record Names Don't Match")
+ flags = ord(self.fo.read(1))
+ nbRecords = ord(self.fo.read(1))
+ for i in range (0,nbRecords) :
+ record = [bookReadString(self.fo), bookReadString(self.fo)]
+ self.bookMetadata[record[0]] = record[1]
+ return self.bookMetadata
+
+ def getPIDMetaInfo(self):
+ keysRecord = None
+ KeysRecordRecord = None
+ if 'keys' in self.bookMetadata:
+ keysRecord = self.bookMetadata['keys']
+ keysRecordRecord = self.bookMetadata[keysRecord]
+ return keysRecord, keysRecordRecord
+
+ def getBookTitle(self):
+ title = ''
+ if 'Title' in self.bookMetadata:
+ title = self.bookMetadata['Title']
+ return title
+
+ def setBookKey(self, key):
+ self.bookKey = key
+
+ def getBookPayloadRecord(self, name, index):
+ # Get a record in the book payload, given its name and index.
+ # decrypted and decompressed if necessary
+ encrypted = False
+ compressed = False
+ try:
+ recordOffset = self.bookHeaderRecords[name][index][0]
+ except:
+ raise TpzDRMError("Parse Error : Invalid Record, record not found")
+
+ self.fo.seek(self.bookPayloadOffset + recordOffset)
+
+ tag = bookReadString(self.fo)
+ if tag != name :
+ raise TpzDRMError("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber(self.fo)
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise TpzDRMError("Parse Error : Invalid Record, index doesn't match")
+
+ if (self.bookHeaderRecords[name][index][2] > 0):
+ compressed = True
+ record = self.fo.read(self.bookHeaderRecords[name][index][2])
+ else:
+ record = self.fo.read(self.bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ if self.bookKey:
+ ctx = topazCryptoInit(self.bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+ else :
+ raise TpzDRMError("Error: Attempt to decrypt without bookKey")
+
+ if compressed:
+ record = zlib.decompress(record)
+
+ return record
+
+ def processBook(self, pidlst):
+ raw = 0
+ fixedimage=True
+ try:
+ keydata = self.getBookPayloadRecord('dkey', 0)
+ except TpzDRMError, e:
+ print "no dkey record found, book may not be encrypted"
+ print "attempting to extrct files without a book key"
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ # try each pid to decode the file
+ bookKey = None
+ for pid in pidlst:
+ # use 8 digit pids here
+ pid = pid[0:8]
+ print "\nTrying: ", pid
+ bookKeys = []
+ data = keydata
+ try:
+ bookKeys+=decryptDkeyRecords(data,pid)
+ except TpzDRMError, e:
+ pass
+ else:
+ bookKey = bookKeys[0]
+ print "Book Key Found!"
+ break
+
+ if not bookKey:
+ raise TpzDRMError('Decryption Unsucessful; No valid pid found')
+
+ self.setBookKey(bookKey)
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ def createBookDirectory(self):
+ outdir = self.outdir
+ # create output directory structure
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ destdir = os.path.join(outdir,'img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'color_img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'page')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'glyphs')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ def extractFiles(self):
+ outdir = self.outdir
+ for headerRecord in self.bookHeaderRecords:
+ name = headerRecord
+ if name != "dkey" :
+ ext = '.dat'
+ if name == 'img' : ext = '.jpg'
+ if name == 'color' : ext = '.jpg'
+ print "\nProcessing Section: %s " % name
+ for index in range (0,len(self.bookHeaderRecords[name])) :
+ fnum = "%04d" % index
+ fname = name + fnum + ext
+ destdir = outdir
+ if name == 'img':
+ destdir = os.path.join(outdir,'img')
+ if name == 'color':
+ destdir = os.path.join(outdir,'color_img')
+ if name == 'page':
+ destdir = os.path.join(outdir,'page')
+ if name == 'glyphs':
+ destdir = os.path.join(outdir,'glyphs')
+ outputFile = os.path.join(destdir,fname)
+ print ".",
+ record = self.getBookPayloadRecord(name,index)
+ if record != '':
+ file(outputFile, 'wb').write(record)
+ print " "
+
+
+def zipUpDir(myzip, tempdir,localname):
+ currentdir = tempdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tempdir, localfilePath)
+
+
+def usage(progname):
+ print "Removes DRM protection from Topaz ebooks and extract the contents"
+ print "Usage:"
+ print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
+
+
+# Main
+def main(argv=sys.argv):
+ progname = os.path.basename(argv[0])
+ k4 = False
+ pids = []
+ serials = []
+ kInfoFiles = []
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
+ except getopt.GetoptError, err:
+ print str(err)
+ usage(progname)
+ return 1
+ if len(args)<2:
+ usage(progname)
+ return 1
+
+ for o, a in opts:
+ if o == "-k":
+ if a == None :
+ print "Invalid parameter for -k"
+ return 1
+ kInfoFiles.append(a)
+ if o == "-p":
+ if a == None :
+ print "Invalid parameter for -p"
+ return 1
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ print "Invalid parameter for -s"
+ return 1
+ serials = a.split(',')
+ k4 = True
+
+ infile = args[0]
+ outdir = args[1]
+
+ if not os.path.isfile(infile):
+ print "Input File Does Not Exist"
+ return 1
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+ tempdir = tempfile.mkdtemp()
+
+ tb = TopazBook(infile, tempdir)
+ title = tb.getBookTitle()
+ print "Processing Book: ", title
+ keysRecord, keysRecordRecord = tb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(keysRecord, keysRecordRecord, k4, pids, serials, kInfoFiles)
+
+ try:
+ tb.processBook(pidlst)
+ except TpzDRMError, e:
+ print str(e)
+ print " Creating DeBug Full Zip Archive of Book"
+ zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+ myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ zipUpDir(myzip, tempdir, '')
+ myzip.close()
+ return 1
+
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+ myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip1, tempdir, 'img')
+ myzip1.close()
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(myzip2, tempdir, 'svg')
+ zipUpDir(myzip2, tempdir, 'img')
+ myzip2.close()
+
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(tempdir,'xml')
+ zipUpDir(myzip3, targetdir, '')
+ zipUpDir(myzip3, tempdir, 'img')
+ myzip3.close()
+
+ shutil.rmtree(tempdir)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
+
def __init__(self, root):
Tkinter.Frame.__init__(self, root, border=5)
self.root = root
- self.interval = 2000
+ self.interval = 1000
self.p2 = None
- self.status = Tkinter.Label(self, text='Remove Encryption from a K4PC, K4M, or Mobi eBook')
+ self.status = Tkinter.Label(self, text='Remove Encryption from a Kindle/Mobi/Topaz eBook')
self.status.pack(fill=Tkconstants.X, expand=1)
body = Tkinter.Frame(self)
body.pack(fill=Tkconstants.X, expand=1)
sticky = Tkconstants.E + Tkconstants.W
body.grid_columnconfigure(1, weight=2)
- Tkinter.Label(body, text='K4 or Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
+ Tkinter.Label(body, text='Kindle/Mobi/Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
self.mobipath = Tkinter.Entry(body, width=50)
self.mobipath.grid(row=0, column=1, sticky=sticky)
cwd = os.getcwdu()
button = Tkinter.Button(body, text="...", command=self.get_mobipath)
button.grid(row=0, column=2)
- Tkinter.Label(body, text='Directory for the Unencrypted Output File').grid(row=1, sticky=Tkconstants.E)
+ Tkinter.Label(body, text='Directory for the Unencrypted Output File(s)').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky)
cwd = os.getcwdu()
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2)
- Tkinter.Label(body, text='Kindle.info file (optional)').grid(row=2, sticky=Tkconstants.E)
+ Tkinter.Label(body, text='Optional Alternative Kindle.info file').grid(row=2, sticky=Tkconstants.E)
self.altinfopath = Tkinter.Entry(body, width=50)
self.altinfopath.grid(row=2, column=1, sticky=sticky)
#cwd = os.getcwdu()
button = Tkinter.Button(body, text="...", command=self.get_altinfopath)
button.grid(row=2, column=2)
- Tkinter.Label(body, text='Comma Separated List of 10 Character PIDs (no spaces)').grid(row=3, sticky=Tkconstants.E)
+ Tkinter.Label(body, text='Optional Comma Separated List of 10 Character PIDs (no spaces)').grid(row=3, sticky=Tkconstants.E)
self.pidnums = Tkinter.StringVar()
self.pidinfo = Tkinter.Entry(body, width=50, textvariable=self.pidnums)
self.pidinfo.grid(row=3, column=1, sticky=sticky)
+ Tkinter.Label(body, text='Optional Comma Separated List of 16 Character Kindle Serial Numbers (no spaces)').grid(row=4, sticky=Tkconstants.E)
+ self.sernums = Tkinter.StringVar()
+ self.serinfo = Tkinter.Entry(body, width=50, textvariable=self.sernums)
+ self.serinfo.grid(row=4, column=1, sticky=sticky)
+
+
msg1 = 'Conversion Log \n\n'
self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
- self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
+ self.stext.grid(row=6, column=0, columnspan=2,sticky=sticky)
self.stext.insert(Tkconstants.END,msg1)
buttons = Tkinter.Frame(self)
return
# run as a subprocess via pipes and collect stdout
- def mobirdr(self, infile, outfile, altinfopath, pidnums):
+ def mobirdr(self, infile, outfile, altinfopath, pidnums, sernums):
# os.putenv('PYTHONUNBUFFERED', '1')
+ tool = 'k4mobidedrm.py'
pidoption = ''
if pidnums and pidnums != '':
pidoption = ' -p "' + pidnums + '" '
+ seroption = ''
+ if sernums and sernums != '':
+ seroption = ' -s "' + sernums + '" '
infooption = ''
if altinfopath and altinfopath != '':
infooption = ' -k "' + altinfopath + '" '
- cmdline = 'python ./lib/k4mobidedrm.py ' + pidoption + infooption + '"' + infile + '" "' + outfile + '"'
+ cmdline = 'python ./lib/' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
print cmdline
if sys.platform.startswith('win'):
search_path = os.environ['PATH']
search_path = search_path.lower()
if search_path.find('python') >= 0:
- cmdline = 'python lib\k4mobidedrm.py ' + pidoption + infooption + '"' + infile + '" "' + outfile + '"'
+ cmdline = 'python lib\\' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
else :
- cmdline = 'lib\k4mobidedrm.py ' + pidoption + infooption + '"' + infile + '" "' + outfile + '"'
+ cmdline = 'lib\\' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
cpath = self.mobipath.get()
mobipath = tkFileDialog.askopenfilename(
initialdir = cpath,
- parent=None, title='Select K4PC, K4M or Mobi eBook File',
- defaultextension='.prc', filetypes=[('Mobi eBook File', '.prc'), ('Mobi eBook File', '.azw'),('Mobi eBook File', '.mobi'),
- ('All Files', '.*')])
+ parent=None, title='Select Kindle/Mobi/Topaz eBook File',
+ defaultextension='.prc', filetypes=[('Mobi eBook File', '.prc'), ('Mobi eBook File', '.azw'),('Mobi eBook File', '.mobi'),('Mobi eBook File', '.tpz'),('Mobi eBook File', '.azw1'),('All Files', '.*')])
if mobipath:
mobipath = os.path.normpath(mobipath)
self.mobipath.delete(0, Tkconstants.END)
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory(
- parent=None, title='Directory to Store Unencrypted file into',
+ parent=None, title='Directory to Store Unencrypted file(s) into',
initialdir=cwd, initialfile=None)
if outpath:
outpath = os.path.normpath(outpath)
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
altinfopath = tkFileDialog.askopenfilename(
- parent=None, title='Select kindle.info File',
+ parent=None, title='Select Alternative kindle.info File',
defaultextension='.prc', filetypes=[('Kindle Info', '.info'),
('All Files', '.*')],
initialdir=cwd)
outpath = self.outpath.get()
altinfopath = self.altinfopath.get()
pidnums = self.pidinfo.get()
+ sernums = self.serinfo.get()
if not mobipath or not os.path.exists(mobipath) or not os.path.isfile(mobipath):
- self.status['text'] = 'Specified K4PC, K4M or Mobi eBook file does not exist'
+ self.status['text'] = 'Specified Kindle Mobi eBook file does not exist'
self.sbotton.configure(state='normal')
return
- # Head all Topaz ebooks off at the pass and warn user.
+
+ tpz = False
+ # Identify any Topaz Files
with open(mobipath, 'rb') as f:
- raw = f.read()
+ raw = f.read(3)
if raw.startswith('TPZ'):
- f.close()
- tkMessageBox.showerror(
- "K4MobiDeDRM",
- "%s is a Topaz ebook. It cannot be decrypted with this tool. "
- "You must use the Topaz Tools for this particular ebook." % mobipath)
- self.status['text'] = 'The selected file is a Topaz ebook! Use Topaz tools.'
- self.sbotton.configure(state='normal')
- return
+ tpz = True
f.close()
if not outpath:
self.status['text'] = 'No output directory specified'
self.status['text'] = 'Specified kindle.info file does not exist'
self.sbotton.configure(state='normal')
return
- # default output file name to be input file name + '_nodrm.mobi'
- initname = os.path.splitext(os.path.basename(mobipath))[0]
- initname += '_nodrm.mobi'
- outpath += os.sep + initname
log = 'Command = "python k4mobidedrm.py"\n'
- log += 'K4PC, K4M or Mobi Path = "'+ mobipath + '"\n'
- log += 'Output File = "' + outpath + '"\n'
+ if not tpz:
+ log += 'Kindle/Mobi Path = "'+ mobipath + '"\n'
+ else:
+ log += 'Topaz Path = "'+ mobipath + '"\n'
+ log += 'Output Directory = "' + outpath + '"\n'
log += 'Kindle.info file = "' + altinfopath + '"\n'
log += 'PID list = "' + pidnums + '"\n'
+ log += 'Serial Number list = "' + sernums + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
- self.p2 = self.mobirdr(mobipath, outpath, altinfopath, pidnums)
+ self.p2 = self.mobirdr(mobipath, outpath, altinfopath, pidnums, sernums)
# python does not seem to allow you to create
# your own eventloop which every other gui does - strange
def main(argv=None):
root = Tkinter.Tk()
- root.title('K4PC/K4M/Mobi eBook Encryption Removal')
+ root.title('Kindle/Mobi/Topaz eBook Encryption Removal')
root.resizable(True, False)
root.minsize(300, 0)
MainDialog(root).pack(fill=Tkconstants.X, expand=1)
--- /dev/null
+KindleBooks (Originally called K4MobiDeDRM and Topaz_Tools)
+
+This tools combines functionality of MobiDeDRM with that of K4PCDeDRM, K4MDeDRM, and K4DeDRM. Effectively, it provides one-stop shopping for all your Mobipocket, Kindle for iPhone/iPad/iPodTouch, Kindle for PC, and Kindle for Mac needs and should work for both Mobi and Topaz ebooks.
+
+Preliminary Steps:
+
+1. Make sure you have Python 2.X installed (32 bit) and properly set as part of your SYSTEM PATH environment variable (On Windows I recommend ActiveState's ActivePython. See their web pages for instructions on how to install and how to properly set your PATH). On Mac OSX 10.6 everything you need is already installed.
+
+
+****
+Please Note: If you a happy user of MobiDeDRM, K4DeDRM, K4PCDeDRM, or K4MUnswindle, please continue to use these programs as there is no additional capability provided by this tool over the others. In the long run, if you have problems with any of those tools, you might want to try this one as it will continue under development eventually replacing all of those tools.
+****
+
+Instructions:
+
+1. double-click on KindleBooks.pyw
+
+2. In the window that opens:
+hit the first '...' button to locate your DRM Kindle-style ebook
+
+3. Then hit the second '...' button to select an output directory for the unlocked file
+
+4. If you have multiple Kindle.Info files and would like to use one specific one, please hit the third "...' button to select it. Note, if you only have one Kindle.Info file (like most users) this can and should be left blank.
+
+5. Then add in any PIDs you need from KindleV1, Kindle for iPhone/iPad/iPodTouch, or other single PID devices to the provided box as a comma separated list of 10 digit PID numbers. If this is a Kindle for Mac or a Kindle for PC book then you can leave this box blank
+
+
+6. If you have standalone Kindles, add in any 16 digit Serial Numbers as a comma separated list. If this is a Kindle for Mac or a Kindle for PC book then you can leave this box blank
+
+7. hit the 'Start' button
+
+After a short delay, you should see progress in the Conversion Log window indicating is the unlocking was a success or failure.
+
+
+
+If your book was a normal Mobi style ebook:
+ If successful, you should see a "_nodrm" named version Mobi ebook.
+ If not please examine the Conversion Log window for any errors.
+
+
+
+If your book was actually a Topaz book:
+
+Please note that Topaz is most similar to a poor man's image only PDF in style. It has glyphs and x,y positions, ocrText used just for searching, that describe the image each page all encoded into a binary xml-like set of files.
+
+If successful, you will have 3 zip archives created.
+
+1. The first is BOOKNAME_nodrm.zip.
+ You can import this into calibre as is or unzip it and edit the book.html file you find inside. To create the book.html, Amazon's ocrText is combined with other information to recreate as closely as possible what the original book looked like. Unfortunately most bolding, italics is lost. Also, Amazon's ocrText can be absolutely horrible at times. Much work will be needed to clean up and correct Topaz books.
+
+2. The second is BOOKNAME_SVG.zip
+ You can also import this into calibre or unzip it and open the indexsvg.xhtml file in any good Browser (Safari, Firefox, etc). This zip contains a set of svg images (one for each pages is created) and it shows the page exactly how it appeared. This zip can be used to create an image only pdf file via post conversion.
+
+3. The third is BOOKNAME_XML.zip
+ This is a zip archive of the decrypted and translated xml-like descriptions of each page and can be archived/saved in case later code can do a better job converting these files. These are exactly what a Topaz books guts are. You should take a look at them in any text editor to see what they look like.
+
+If the Topaz book conversion is not successful, a large _DEBUG.zip archive of all of the pieces is created and this can examined along with the Conversion Log window contents to determine the cause of the error and hopefully get it fixed in the next release.
+
+
-#!/usr/bin/env python
-# For use with Topaz Scripts Version 2.6
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
+#! /usr/bin/python
-import sys
-sys.stdout=Unbuffered(sys.stdout)
+"""
+
+Comprehensive Mazama Book DRM with Topaz Cryptography V2.2
+
+-----BEGIN PUBLIC KEY-----
+MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdBHJ4CNc6DNFCw4MRCw4SWAK6
+M8hYfnNEI0yQmn5Ti+W8biT7EatpauE/5jgQMPBmdNrDr1hbHyHBSP7xeC2qlRWC
+B62UCxeu/fpfnvNHDN/wPWWH4jynZ2M6cdcnE5LQ+FfeKqZn7gnG2No1U9h7oOHx
+y2/pHuYme7U1TsgSjwIDAQAB
+-----END PUBLIC KEY-----
+
+"""
+
+from __future__ import with_statement
import csv
+import sys
import os
import getopt
import zlib
from struct import pack
from struct import unpack
+from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast
+import _winreg as winreg
+import Tkinter
+import Tkconstants
+import tkMessageBox
import traceback
import hashlib
MAX_PATH = 255
+kernel32 = windll.kernel32
+advapi32 = windll.advapi32
+crypt32 = windll.crypt32
+
+global kindleDatabase
global bookFile
global bookPayloadOffset
global bookHeaderRecords
global bookMetadata
global bookKey
global command
-global kindleDatabase
-global verbose
-global PIDs
-
-if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
-if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
+#
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+#
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+#
# Exceptions for all the problems that might happen during the script
+#
+
class CMBDTCError(Exception):
pass
class CMBDTCFatal(Exception):
pass
+#
+# Stolen stuff
+#
+class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+DataBlob_p = POINTER(DataBlob)
+
+def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+GetSystemDirectory = GetSystemDirectory()
+
+
+def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path):
+ vsn = c_uint(0)
+ GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
+ return vsn.value
+ return GetVolumeSerialNumber
+GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+
+def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(32)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+GetUserName = GetUserName()
+
+
+def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, 0, byref(outdata)):
+ raise CMBDTCFatal("Failed to Unprotect Data")
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+CryptUnprotectData = CryptUnprotectData()
+
+#
# Returns the MD5 digest of "message"
+#
+
def MD5(message):
ctx = hashlib.md5()
ctx.update(message)
return ctx.digest()
-
+#
# Returns the MD5 digest of "message"
+#
+
def SHA1(message):
ctx = hashlib.sha1()
ctx.update(message)
return ctx.digest()
-
+#
# Open the book file at path
+#
+
def openBook(path):
try:
return open(path,'rb')
except:
raise CMBDTCFatal("Could not open book file: " + path)
+#
+# Encode the bytes in data with the characters in map
+#
-# Encode the bytes in data with the characters in map
def encode(data, map):
result = ""
for char in data:
result += map[R]
return result
+#
# Hash the bytes in data and then encode the digest with the characters in map
+#
+
def encodeHash(data,map):
return encode(MD5(data),map)
+#
# Decode the string in data with the characters in map. Returns the decoded bytes
+#
+
def decode(data,map):
result = ""
- for i in range (0,len(data)-1,2):
+ for i in range (0,len(data),2):
high = map.find(data[i])
low = map.find(data[i+1])
- if (high == -1) or (low == -1) :
- break
- value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ value = (((high * 0x40) ^ 0x80) & 0xFF) + low
result += pack("B",value)
return result
+
+#
+# Locate and open the Kindle.info file (Hopefully in the way it is done in the Kindle application)
+#
+
+def openKindleInfo():
+ regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
+ path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r')
+#
# Parse the Kindle.info file and return the records as a list of key-values
-def parseKindleInfo(kInfoFile):
+#
+
+def parseKindleInfo():
DB = {}
- infoReader = openKindleInfo(kInfoFile)
+ infoReader = openKindleInfo()
infoReader.read(1)
data = infoReader.read()
- if sys.platform.startswith('win'):
- items = data.split('{')
- else :
- items = data.split('[')
+ items = data.split('{')
+
for item in items:
splito = item.split(':')
DB[splito[0]] =splito[1]
return DB
-# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
-def getKindleInfoValueForHash(hashedKey):
- global kindleDatabase
- encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
- if sys.platform.startswith('win'):
- return CryptUnprotectData(encryptedValue,"")
- else:
- cleartext = CryptUnprotectData(encryptedValue)
- return decode(cleartext, charMap1)
+#
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string. (Totally not optimal)
+#
-# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
-def getKindleInfoValueForKey(key):
- return getKindleInfoValueForHash(encodeHash(key,charMap2))
-
-# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
def findNameForHash(hash):
names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
result = ""
if hash == encodeHash(name, charMap2):
result = name
break
- return result
+ return name
+#
# Print all the records from the kindle.info file (option -i)
+#
+
def printKindleInfo():
for record in kindleDatabase:
name = findNameForHash(record)
if name != "" :
print (name)
- print ("--------------------------")
+ print ("--------------------------\n")
else :
print ("Unknown Record")
print getKindleInfoValueForHash(record)
print "\n"
+#
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+#
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ return CryptUnprotectData(encryptedValue,"")
+
#
-# PID generation routines
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
#
+
+def getKindleInfoValueForKey(key):
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
-# Returns two bit at offset from a bit field
-def getTwoBitsFromBitField(bitField,offset):
- byteNumber = offset // 4
- bitPosition = 6 - 2*(offset % 4)
- return ord(bitField[byteNumber]) >> bitPosition & 3
-
-# Returns the six bits at offset from a bit field
-def getSixBitsFromBitField(bitField,offset):
- offset *= 3
- value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
- return value
-
-# 8 bits to six bits encoding from hash to generate PID string
-def encodePID(hash):
- global charMap3
- PID = ""
- for position in range (0,8):
- PID += charMap3[getSixBitsFromBitField(hash,position)]
- return PID
-
-# Encryption table used to generate the device PID
-def generatePidEncryptionTable() :
- table = []
- for counter1 in range (0,0x100):
- value = counter1
- for counter2 in range (0,8):
- if (value & 1 == 0) :
- value = value >> 1
- else :
- value = value >> 1
- value = value ^ 0xEDB88320
- table.append(value)
- return table
-
-# Seed value used to generate the device PID
-def generatePidSeed(table,dsn) :
- value = 0
- for counter in range (0,4) :
- index = (ord(dsn[counter]) ^ value) &0xFF
- value = (value >> 8) ^ table[index]
- return value
-
-# Generate the device PID
-def generateDevicePID(table,dsn,nbRoll):
- seed = generatePidSeed(table,dsn)
- pidAscii = ""
- pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
- index = 0
- for counter in range (0,nbRoll):
- pid[index] = pid[index] ^ ord(dsn[counter])
- index = (index+1) %8
- for counter in range (0,8):
- index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
- pidAscii += charMap4[index]
- return pidAscii
-
+#
# Get a 7 bit encoded number from the book file
+#
+
def bookReadEncodedNumber():
flag = False
data = ord(bookFile.read(1))
data = -data
return data
+#
# Encode a number in 7 bit format
+#
+
def encodeNumber(number):
result = ""
negative = False
flag = 0
- print("Using encodeNumber routine")
if number < 0 :
number = -number + 1
byte += flag
result += chr(byte)
flag = 0x80
- if number == 0 :
+ if number == 0 :
if (byte == 0xFF and negative == False) :
result += chr(0x80)
break
return result[::-1]
-
+#
# Get a length prefixed string from the file
+#
+
def bookReadString():
stringLength = bookReadEncodedNumber()
return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0]
+#
# Returns a length prefixed string
+#
+
def lengthPrefixString(data):
return encodeNumber(len(data))+data
-# Read and return the data of one header record at the current book file position [[offset,decompressedLength,compressedLength],...]
+
+#
+# Read and return the data of one header record at the current book file position [[offset,compressedLength,decompressedLength],...]
+#
+
def bookReadHeaderRecordData():
nbValues = bookReadEncodedNumber()
values = []
values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()])
return values
-# Read and parse one header record at the current book file position and return the associated data [[offset,decompressedLength,compressedLength],...]
+#
+# Read and parse one header record at the current book file position and return the associated data [[offset,compressedLength,decompressedLength],...]
+#
+
def parseTopazHeaderRecord():
if ord(bookFile.read(1)) != 0x63:
raise CMBDTCFatal("Parse Error : Invalid Header")
record = bookReadHeaderRecordData()
return [tag,record]
+#
# Parse the header of a Topaz file, get all the header records and the offset for the payload
+#
+
def parseTopazHeader():
global bookHeaderRecords
global bookPayloadOffset
for i in range (0,nbRecords):
result = parseTopazHeaderRecord()
- #print result[0], result[1]
bookHeaderRecords[result[0]] = result[1]
if ord(bookFile.read(1)) != 0x64 :
bookPayloadOffset = bookFile.tell()
+#
# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed
-# Correction, the record is correctly decompressed too
+#
+
def getBookPayloadRecord(name, index):
encrypted = False
- compressed = False
-
+
try:
recordOffset = bookHeaderRecords[name][index][0]
except:
if recordIndex != index :
raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match")
-
- if (bookHeaderRecords[name][index][2] > 0):
- compressed = True
+
+ if bookHeaderRecords[name][index][2] != 0 :
record = bookFile.read(bookHeaderRecords[name][index][2])
else:
record = bookFile.read(bookHeaderRecords[name][index][1])
-
+
if encrypted:
ctx = topazCryptoInit(bookKey)
record = topazCryptoDecrypt(record,ctx)
-
- if compressed:
- record = zlib.decompress(record)
return record
+#
# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename"
+#
+
def extractBookPayloadRecord(name, index, filename):
compressed = False
except:
print("Could not find record")
- # if compressed:
- # try:
- # record = zlib.decompress(record)
- # except:
- # raise CMBDTCFatal("Could not decompress record")
+ if compressed:
+ try:
+ record = zlib.decompress(record)
+ except:
+ raise CMBDTCFatal("Could not decompress record")
if filename != "":
try:
else:
print(record)
+#
# return next record [key,value] from the book metadata from the current book position
+#
+
def readMetadataRecord():
return [bookReadString(),bookReadString()]
+#
# Parse the metadata record from the book payload and return a list of [key,values]
+#
+
def parseMetadata():
global bookHeaderRecords
global bookPayloadAddress
record =readMetadataRecord()
bookMetadata[record[0]] = record[1]
+#
+# Returns two bit at offset from a bit field
+#
+
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+#
+# Returns the six bits at offset from a bit field
+#
+
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+#
+# 8 bits to six bits encoding from hash to generate PID string
+#
+
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+#
# Context initialisation for the Topaz Crypto
+#
+
def topazCryptoInit(key):
ctx1 = 0x0CAFFE19E
ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
return [ctx1,ctx2]
+#
# decrypt data with the context prepared by topazCryptoInit()
+#
+
def topazCryptoDecrypt(data, ctx):
ctx1 = ctx[0]
ctx2 = ctx[1]
return plainText
+#
# Decrypt a payload record with the PID
+#
+
def decryptRecord(data,PID):
ctx = topazCryptoInit(PID)
return topazCryptoDecrypt(data, ctx)
+#
# Try to decrypt a dkey record (contains the book PID)
+#
+
def decryptDkeyRecord(data,PID):
record = decryptRecord(data,PID)
fields = unpack("3sB8sB8s3s",record)
raise CMBDTCError("Record didn't contain PID")
return fields[4]
-
+
+#
# Decrypt all the book's dkey records (contain the book PID)
+#
+
def decryptDkeyRecords(data,PID):
nbKeyRecords = ord(data[0])
records = []
return records
+#
+# Encryption table used to generate the device PID
+#
+
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+#
+# Seed value used to generate the device PID
+#
+
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+#
+# Generate the device PID
+#
+
+def generateDevicePID(table,dsn,nbRoll):
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+#
# Create decrypted book payload
+#
+
def createDecryptedPayload(payload):
+
+ # store data to be able to create the header later
+ headerData= []
+ currentOffset = 0
+
+ # Add social DRM to decrypted files
+
+ try:
+ data = getKindleInfoValueForKey("kindle.name.info")+":"+ getKindleInfoValueForKey("login")
+ if payload!= None:
+ payload.write(lengthPrefixString("sdrm"))
+ payload.write(encodeNumber(0))
+ payload.write(data)
+ else:
+ currentOffset += len(lengthPrefixString("sdrm"))
+ currentOffset += len(encodeNumber(0))
+ currentOffset += len(data)
+ except:
+ pass
+
for headerRecord in bookHeaderRecords:
name = headerRecord
+ newRecord = []
+
if name != "dkey" :
- ext = '.dat'
- if name == 'img' : ext = '.jpg'
- if name == 'color' : ext = '.jpg'
+
for index in range (0,len(bookHeaderRecords[name])) :
- fnum = "%04d" % index
- fname = name + fnum + ext
- destdir = payload
- if name == 'img':
- destdir = os.path.join(payload,'img')
- if name == 'color':
- destdir = os.path.join(payload,'color_img')
- if name == 'page':
- destdir = os.path.join(payload,'page')
- if name == 'glyphs':
- destdir = os.path.join(payload,'glyphs')
- outputFile = os.path.join(destdir,fname)
- file(outputFile, 'wb').write(getBookPayloadRecord(name, index))
+ offset = currentOffset
+
+ if payload != None:
+ # write tag
+ payload.write(lengthPrefixString(name))
+ # write data
+ payload.write(encodeNumber(index))
+ payload.write(getBookPayloadRecord(name, index))
-# Create decrypted book
-def createDecryptedBook(outdir):
- if not os.path.exists(outdir):
- os.makedirs(outdir)
-
- destdir = os.path.join(outdir,'img')
- if not os.path.exists(destdir):
- os.makedirs(destdir)
-
- destdir = os.path.join(outdir,'color_img')
- if not os.path.exists(destdir):
- os.makedirs(destdir)
-
- destdir = os.path.join(outdir,'page')
- if not os.path.exists(destdir):
- os.makedirs(destdir)
+ else :
+ currentOffset += len(lengthPrefixString(name))
+ currentOffset += len(encodeNumber(index))
+ currentOffset += len(getBookPayloadRecord(name, index))
+ newRecord.append([offset,bookHeaderRecords[name][index][1],bookHeaderRecords[name][index][2]])
+
+ headerData.append([name,newRecord])
+
- destdir = os.path.join(outdir,'glyphs')
- if not os.path.exists(destdir):
- os.makedirs(destdir)
+
+ return headerData
+
+#
+# Create decrypted book
+#
- createDecryptedPayload(outdir)
+def createDecryptedBook(outputFile):
+ outputFile = open(outputFile,"wb")
+ # Write the payload in a temporary file
+ headerData = createDecryptedPayload(None)
+ outputFile.write("TPZ0")
+ outputFile.write(encodeNumber(len(headerData)))
+
+ for header in headerData :
+ outputFile.write(chr(0x63))
+ outputFile.write(lengthPrefixString(header[0]))
+ outputFile.write(encodeNumber(len(header[1])))
+ for numbers in header[1] :
+ outputFile.write(encodeNumber(numbers[0]))
+ outputFile.write(encodeNumber(numbers[1]))
+ outputFile.write(encodeNumber(numbers[2]))
+
+ outputFile.write(chr(0x64))
+ createDecryptedPayload(outputFile)
+ outputFile.close()
+#
# Set the command to execute by the programm according to cmdLine parameters
+#
+
def setCommand(name) :
global command
if command != "" :
else :
command = name
+#
# Program usage
+#
+
def usage():
print("\nUsage:")
- print("\ncmbtc_dump_linux.py [options] bookFileName\n")
+ print("\nCMBDTC.py [options] bookFileName\n")
print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)")
- print("-d Dumps the unencrypted book as files to outdir")
- print("-o Output directory to save book files to")
+ print("-d Saves a decrypted copy of the book")
+ print("-r Prints or writes to disk a record indicated in the form name:index (e.g \"img:0\")")
+ print("-o Output file name to write records and decrypted books")
print("-v Verbose (can be used several times)")
print("-i Prints kindle.info database")
- print("-k Adds the path to an alternate kindle.info file")
-
-def prepTopazBook(bookPath):
- global bookFile
- bookFile = openBook(bookPath)
- parseTopazHeader()
- parseMetadata()
-
-# Get Pids
-def getK4Pids(kInfoFile=None):
- global kindleDatabase
- global PIDs
-
- # Read the encrypted database
- kindleDatabase = None
- try:
- kindleDatabase = parseKindleInfo(kInfoFile)
- except Exception, message:
- #if verbose > 0:
- # print(message)
- pass
-
- if kindleDatabase != None :
- # Compute the DSN
- # Get the Mazama Random number
- MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
-
- # Get the HDD serial
- encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
-
- # Get the current user name
- encodedUsername = encodeHash(GetUserName(),charMap1)
-
- # concat, hash and encode
- DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
-
- if verbose > 0:
- print("DSN: " + DSN)
-
- # Compute the device PID
- table = generatePidEncryptionTable()
- devicePID = generateDevicePID(table,DSN,4)
- PIDs.append(devicePID)
-
- if verbose > 0:
- print("Device PID: " + devicePID)
-
- # Compute book PID
- # Get the account token
- kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
-
- if verbose > 0:
- print("Account Token: " + kindleAccountToken)
-
- keysRecord = bookMetadata["keys"]
- keysRecordRecord = bookMetadata[keysRecord]
-
- pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord)
-
- bookPID = encodePID(pidHash)
- PIDs.append(bookPID)
-
- if verbose > 0:
- print ("Book PID: " + bookPID )
-
+
+#
# Main
+#
+
def main(argv=sys.argv):
- global verbose
- global PIDs
+ global kindleDatabase
+ global bookMetadata
global bookKey
+ global bookFile
global command
progname = os.path.basename(argv[0])
verbose = 0
recordName = ""
recordIndex = 0
- outdir = ""
+ outputFile = ""
PIDs = []
+ kindleDatabase = None
command = ""
- kInfoFiles = []
-
+
try:
- opts, args = getopt.getopt(sys.argv[1:], "vi:k:o:p:d")
+ opts, args = getopt.getopt(sys.argv[1:], "vdir:o:p:")
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
verbose+=1
if o == "-i":
setCommand("printInfo")
- if o == "-k":
- if a == None :
- raise CMBDTCFatal("Invalid parameter for -k")
- kInfoFiles.append(a)
if o =="-o":
if a == None :
raise CMBDTCFatal("Invalid parameter for -o")
- outdir = a
+ outputFile = a
+ if o =="-r":
+ setCommand("printRecord")
+ try:
+ recordName,recordIndex = a.split(':')
+ except:
+ raise CMBDTCFatal("Invalid parameter for -r")
if o =="-p":
- if a == None :
- raise CMBDTCFatal("Invalid parameter for -p")
PIDs.append(a)
if o =="-d":
setCommand("doit")
if command == "" :
- raise Exception("No action supplied on command line")
-
+ raise CMBDTCFatal("No action supplied on command line")
+
+ #
+ # Read the encrypted database
+ #
+
+ try:
+ kindleDatabase = parseKindleInfo()
+ except Exception, message:
+ if verbose>0:
+ print(message)
+
+ if kindleDatabase != None :
+ if command == "printInfo" :
+ printKindleInfo()
+
+ #
+ # Compute the DSN
+ #
+
+ # Get the Mazama Random number
+ MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+ # Get the HDD serial
+ encodedSystemVolumeSerialNumber = encodeHash(str(GetVolumeSerialNumber(GetSystemDirectory().split('\\')[0] + '\\')),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode
+ DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+
+ if verbose >1:
+ print("DSN: " + DSN)
+
+ #
+ # Compute the device PID
+ #
+
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ PIDs.append(devicePID)
+
+ if verbose > 0:
+ print("Device PID: " + devicePID)
+
+ #
# Open book and parse metadata
- if len(args) == 1:
- # Open the ebook
- prepTopazBook(args[0])
- # Always try to get the default Kindle installation info.
- getK4Pids()
+ #
- # If Alternate kindle.info files were supplied, parse them too.
- if kInfoFiles:
- for infoFile in kInfoFiles:
- getK4Pids(infoFile)
-
- # Print the kindle info if requested.
- if kindleDatabase != None :
- if command == "printInfo" :
- printKindleInfo()
-
- # Remove any duplicates that may occur from the PIDs List
- PIDs = list(set(PIDs))
-
- # Decrypt book key
- dkey = getBookPayloadRecord('dkey', 0)
+ if len(args) == 1:
+
+ bookFile = openBook(args[0])
+ parseTopazHeader()
+ parseMetadata()
+
+ #
+ # Compute book PID
+ #
+
+ # Get the account token
+
+ if kindleDatabase != None:
+ kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+ if verbose >1:
+ print("Account Token: " + kindleAccountToken)
+
+ keysRecord = bookMetadata["keys"]
+ keysRecordRecord = bookMetadata[keysRecord]
+
+ pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord)
+
+ bookPID = encodePID(pidHash)
+ PIDs.append(bookPID)
+
+ if verbose > 0:
+ print ("Book PID: " + bookPID )
+
+ #
+ # Decrypt book key
+ #
+
+ dkey = getBookPayloadRecord('dkey', 0)
bookKeys = []
for PID in PIDs :
- print PID
bookKeys+=decryptDkeyRecords(dkey,PID)
if len(bookKeys) == 0 :
if verbose > 0 :
print ("Book key could not be found. Maybe this book is not registered with this device.")
- return 1
else :
bookKey = bookKeys[0]
if verbose > 0:
print("Book key: " + bookKey.encode('hex'))
+
+
if command == "printRecord" :
extractBookPayloadRecord(recordName,int(recordIndex),outputFile)
if outputFile != "" and verbose>0 :
print("Wrote record to file: "+outputFile)
elif command == "doit" :
- if outdir != "" :
- createDecryptedBook(outdir)
- if verbose > 0 :
+ if outputFile!="" :
+ createDecryptedBook(outputFile)
+ if verbose >0 :
print ("Decrypted book saved. Don't pirate!")
elif verbose > 0:
- print("Output directory name was not supplied.")
- return 1
+ print("Output file name was not supplied.")
return 0
if __name__ == '__main__':
sys.exit(main())
+
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# Get a 7 bit encoded number from string. The most
+# significant byte comes first and has the high bit (8th) set
+
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+
+ if flag:
+ data = -data
+ return data
+
+
+# returns a binary string that encodes a number into 7 bits
+# most significant byte first which has the high bit set
+
+def encodeNumber(number):
+ result = ""
+ negative = False
+ flag = 0
+
+ if number < 0 :
+ number = -number + 1
+ negative = True
+
+ while True:
+ byte = number & 0x7F
+ number = number >> 7
+ byte += flag
+ result += chr(byte)
+ flag = 0x80
+ if number == 0 :
+ if (byte == 0xFF and negative == False) :
+ result += chr(0x80)
+ break
+
+ if negative:
+ result += chr(0xFF)
+
+ return result[::-1]
+
+
+
+# create / read a length prefixed string from the file
+
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return ""
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+
+# convert a binary string generated by encodeNumber (7 bit encoded number)
+# to the value you would find inside the page*.dat files to be processed
+
+def convert(i):
+ result = ''
+ val = encodeNumber(i)
+ for j in xrange(len(val)):
+ c = ord(val[j:j+1])
+ result += '%02x' % c
+ return result
+
+
+
+# the complete string table used to store all book text content
+# as well as the xml tokens and values that make sense out of it
+
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+
+ def escapestr(self, str):
+ str = str.replace('&','&')
+ str = str.replace('<','<')
+ str = str.replace('>','>')
+ str = str.replace('=','=')
+ return str
+
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
+ else:
+ print "Error - %d outside of string table limits" % val
+ sys.exit(-1)
+
+ def getSize(self):
+ return self.size
+
+ def getPos(self):
+ return self.pos
+
+ def dumpDict(self):
+ for i in xrange(self.size):
+ print "%d %s %s" % (i, convert(i), self.stable[i])
+ return
+
+# parses the xml snippets that are represented by each page*.dat file.
+# also parses the other0.dat file - the main stylesheet
+# and information used to inject the xml snippets into page*.dat files
+
+class PageParser(object):
+ def __init__(self, filename, dict, debug, flat_xml):
+ self.fo = file(filename,'rb')
+ self.id = os.path.basename(filename).replace('.dat','')
+ self.dict = dict
+ self.debug = debug
+ self.flat_xml = flat_xml
+ self.tagpath = []
+ self.doc = []
+ self.snippetList = []
+
+
+ # hash table used to enable the decoding process
+ # This has all been developed by trial and error so it may still have omissions or
+ # contain errors
+ # Format:
+ # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
+
+ token_tags = {
+ 'x' : (1, 'scalar_number', 0, 0),
+ 'y' : (1, 'scalar_number', 0, 0),
+ 'h' : (1, 'scalar_number', 0, 0),
+ 'w' : (1, 'scalar_number', 0, 0),
+ 'firstWord' : (1, 'scalar_number', 0, 0),
+ 'lastWord' : (1, 'scalar_number', 0, 0),
+ 'rootID' : (1, 'scalar_number', 0, 0),
+ 'stemID' : (1, 'scalar_number', 0, 0),
+ 'type' : (1, 'scalar_text', 0, 0),
+
+ 'info' : (0, 'number', 1, 0),
+
+ 'info.word' : (0, 'number', 1, 1),
+ 'info.word.ocrText' : (1, 'text', 0, 0),
+ 'info.word.firstGlyph' : (1, 'raw', 0, 0),
+ 'info.word.lastGlyph' : (1, 'raw', 0, 0),
+ 'info.word.bl' : (1, 'raw', 0, 0),
+ 'info.word.link_id' : (1, 'number', 0, 0),
+
+ 'glyph' : (0, 'number', 1, 1),
+ 'glyph.x' : (1, 'number', 0, 0),
+ 'glyph.y' : (1, 'number', 0, 0),
+ 'glyph.glyphID' : (1, 'number', 0, 0),
+
+ 'dehyphen' : (0, 'number', 1, 1),
+ 'dehyphen.rootID' : (1, 'number', 0, 0),
+ 'dehyphen.stemID' : (1, 'number', 0, 0),
+ 'dehyphen.stemPage' : (1, 'number', 0, 0),
+ 'dehyphen.sh' : (1, 'number', 0, 0),
+
+ 'links' : (0, 'number', 1, 1),
+ 'links.page' : (1, 'number', 0, 0),
+ 'links.rel' : (1, 'number', 0, 0),
+ 'links.row' : (1, 'number', 0, 0),
+ 'links.title' : (1, 'text', 0, 0),
+ 'links.href' : (1, 'text', 0, 0),
+ 'links.type' : (1, 'text', 0, 0),
+
+ 'paraCont' : (0, 'number', 1, 1),
+ 'paraCont.rootID' : (1, 'number', 0, 0),
+ 'paraCont.stemID' : (1, 'number', 0, 0),
+ 'paraCont.stemPage' : (1, 'number', 0, 0),
+
+ 'paraStems' : (0, 'number', 1, 1),
+ 'paraStems.stemID' : (1, 'number', 0, 0),
+
+ 'wordStems' : (0, 'number', 1, 1),
+ 'wordStems.stemID' : (1, 'number', 0, 0),
+
+ 'empty' : (1, 'snippets', 1, 0),
+
+ 'page' : (1, 'snippets', 1, 0),
+ 'page.pageid' : (1, 'scalar_text', 0, 0),
+ 'page.pagelabel' : (1, 'scalar_text', 0, 0),
+ 'page.type' : (1, 'scalar_text', 0, 0),
+ 'page.h' : (1, 'scalar_number', 0, 0),
+ 'page.w' : (1, 'scalar_number', 0, 0),
+ 'page.startID' : (1, 'scalar_number', 0, 0),
+
+ 'group' : (1, 'snippets', 1, 0),
+ 'group.type' : (1, 'scalar_text', 0, 0),
+
+ 'region' : (1, 'snippets', 1, 0),
+ 'region.type' : (1, 'scalar_text', 0, 0),
+ 'region.x' : (1, 'scalar_number', 0, 0),
+ 'region.y' : (1, 'scalar_number', 0, 0),
+ 'region.h' : (1, 'scalar_number', 0, 0),
+ 'region.w' : (1, 'scalar_number', 0, 0),
+
+ 'empty_text_region' : (1, 'snippets', 1, 0),
+
+ 'img' : (1, 'snippets', 1, 0),
+ 'img.x' : (1, 'scalar_number', 0, 0),
+ 'img.y' : (1, 'scalar_number', 0, 0),
+ 'img.h' : (1, 'scalar_number', 0, 0),
+ 'img.w' : (1, 'scalar_number', 0, 0),
+ 'img.src' : (1, 'scalar_number', 0, 0),
+ 'img.color_src' : (1, 'scalar_number', 0, 0),
+
+ 'paragraph' : (1, 'snippets', 1, 0),
+ 'paragraph.class' : (1, 'scalar_text', 0, 0),
+ 'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'word_semantic' : (1, 'snippets', 1, 1),
+ 'word_semantic.type' : (1, 'scalar_text', 0, 0),
+ 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
+ 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'word' : (1, 'snippets', 1, 0),
+ 'word.type' : (1, 'scalar_text', 0, 0),
+ 'word.class' : (1, 'scalar_text', 0, 0),
+ 'word.firstGlyph' : (1, 'scalar_number', 0, 0),
+ 'word.lastGlyph' : (1, 'scalar_number', 0, 0),
+
+ '_span' : (1, 'snippets', 1, 0),
+ '_span.firstWord' : (1, 'scalar_number', 0, 0),
+ '-span.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'span' : (1, 'snippets', 1, 0),
+ 'span.firstWord' : (1, 'scalar_number', 0, 0),
+ 'span.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'extratokens' : (1, 'snippets', 1, 0),
+ 'extratokens.type' : (1, 'scalar_text', 0, 0),
+ 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
+ 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0),
+
+ 'glyph.h' : (1, 'number', 0, 0),
+ 'glyph.w' : (1, 'number', 0, 0),
+ 'glyph.use' : (1, 'number', 0, 0),
+ 'glyph.vtx' : (1, 'number', 0, 1),
+ 'glyph.len' : (1, 'number', 0, 1),
+ 'glyph.dpi' : (1, 'number', 0, 0),
+ 'vtx' : (0, 'number', 1, 1),
+ 'vtx.x' : (1, 'number', 0, 0),
+ 'vtx.y' : (1, 'number', 0, 0),
+ 'len' : (0, 'number', 1, 1),
+ 'len.n' : (1, 'number', 0, 0),
+
+ 'book' : (1, 'snippets', 1, 0),
+ 'version' : (1, 'snippets', 1, 0),
+ 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.Schema_id' : (1, 'scalar_text', 0, 0),
+ 'version.Schema_version' : (1, 'scalar_text', 0, 0),
+ 'version.Topaz_version' : (1, 'scalar_text', 0, 0),
+ 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.chapterheaders' : (1, 'scalar_text', 0, 0),
+ 'version.creation_date' : (1, 'scalar_text', 0, 0),
+ 'version.header_footer' : (1, 'scalar_text', 0, 0),
+ 'version.init_from_ocr' : (1, 'scalar_text', 0, 0),
+ 'version.letter_insertion' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0),
+ 'version.findlists' : (1, 'scalar_text', 0, 0),
+ 'version.page_num' : (1, 'scalar_text', 0, 0),
+ 'version.page_type' : (1, 'scalar_text', 0, 0),
+ 'version.bad_text' : (1, 'scalar_text', 0, 0),
+ 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
+ 'version.margins' : (1, 'scalar_text', 0, 0),
+ 'version.staggered_lines' : (1, 'scalar_text', 0, 0),
+ 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
+ 'version.toc' : (1, 'scalar_text', 0, 0),
+
+ 'stylesheet' : (1, 'snippets', 1, 0),
+ 'style' : (1, 'snippets', 1, 0),
+ 'style._tag' : (1, 'scalar_text', 0, 0),
+ 'style.type' : (1, 'scalar_text', 0, 0),
+ 'style._parent_type' : (1, 'scalar_text', 0, 0),
+ 'style.class' : (1, 'scalar_text', 0, 0),
+ 'style._after_class' : (1, 'scalar_text', 0, 0),
+ 'rule' : (1, 'snippets', 1, 0),
+ 'rule.attr' : (1, 'scalar_text', 0, 0),
+ 'rule.value' : (1, 'scalar_text', 0, 0),
+
+ 'original' : (0, 'number', 1, 1),
+ 'original.pnum' : (1, 'number', 0, 0),
+ 'original.pid' : (1, 'text', 0, 0),
+ 'pages' : (0, 'number', 1, 1),
+ 'pages.ref' : (1, 'number', 0, 0),
+ 'pages.id' : (1, 'number', 0, 0),
+ 'startID' : (0, 'number', 1, 1),
+ 'startID.page' : (1, 'number', 0, 0),
+ 'startID.id' : (1, 'number', 0, 0),
+
+ }
+
+
+ # full tag path record keeping routines
+ def tag_push(self, token):
+ self.tagpath.append(token)
+ def tag_pop(self):
+ if len(self.tagpath) > 0 :
+ self.tagpath.pop()
+ def tagpath_len(self):
+ return len(self.tagpath)
+ def get_tagpath(self, i):
+ cnt = len(self.tagpath)
+ if i < cnt : result = self.tagpath[i]
+ for j in xrange(i+1, cnt) :
+ result += '.' + self.tagpath[j]
+ return result
+
+
+ # list of absolute command byte values values that indicate
+ # various types of loop meachanisms typically used to generate vectors
+
+ cmd_list = (0x76, 0x76)
+
+ # peek at and return 1 byte that is ahead by i bytes
+ def peek(self, aheadi):
+ c = self.fo.read(aheadi)
+ if (len(c) == 0):
+ return None
+ self.fo.seek(-aheadi,1)
+ c = c[-1:]
+ return ord(c)
+
+
+ # get the next value from the file being processed
+ def getNext(self):
+ nbyte = self.peek(1);
+ if (nbyte == None):
+ return None
+ val = readEncodedNumber(self.fo)
+ return val
+
+
+ # format an arg by argtype
+ def formatArg(self, arg, argtype):
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ result = self.dict.lookup(arg)
+ elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') :
+ result = arg
+ elif (argtype == 'snippets') :
+ result = arg
+ else :
+ print "Error Unknown argtype %s" % argtype
+ sys.exit(-2)
+ return result
+
+
+ # process the next tag token, recursively handling subtags,
+ # arguments, and commands
+ def procToken(self, token):
+
+ known_token = False
+ self.tag_push(token)
+
+ if self.debug : print 'Processing: ', self.get_tagpath(0)
+ cnt = self.tagpath_len()
+ for j in xrange(cnt):
+ tkn = self.get_tagpath(j)
+ if tkn in self.token_tags :
+ num_args = self.token_tags[tkn][0]
+ argtype = self.token_tags[tkn][1]
+ subtags = self.token_tags[tkn][2]
+ splcase = self.token_tags[tkn][3]
+ ntags = -1
+ known_token = True
+ break
+
+ if known_token :
+
+ # handle subtags if present
+ subtagres = []
+ if (splcase == 1):
+ # this type of tag uses of escape marker 0x74 indicate subtag count
+ if self.peek(1) == 0x74:
+ skip = readEncodedNumber(self.fo)
+ subtags = 1
+ num_args = 0
+
+ if (subtags == 1):
+ ntags = readEncodedNumber(self.fo)
+ if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
+ for j in xrange(ntags):
+ val = readEncodedNumber(self.fo)
+ subtagres.append(self.procToken(self.dict.lookup(val)))
+
+ # arguments can be scalars or vectors of text or numbers
+ argres = []
+ if num_args > 0 :
+ firstarg = self.peek(1)
+ if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'):
+ # single argument is a variable length vector of data
+ arg = readEncodedNumber(self.fo)
+ argres = self.decodeCMD(arg,argtype)
+ else :
+ # num_arg scalar arguments
+ for i in xrange(num_args):
+ argres.append(self.formatArg(readEncodedNumber(self.fo), argtype))
+
+ # build the return tag
+ result = []
+ tkn = self.get_tagpath(0)
+ result.append(tkn)
+ result.append(subtagres)
+ result.append(argtype)
+ result.append(argres)
+ self.tag_pop()
+ return result
+
+ # all tokens that need to be processed should be in the hash
+ # table if it may indicate a problem, either new token
+ # or an out of sync condition
+ else:
+ result = []
+ if (self.debug):
+ print 'Unknown Token:', token
+ self.tag_pop()
+ return result
+
+
+ # special loop used to process code snippets
+ # it is NEVER used to format arguments.
+ # builds the snippetList
+ def doLoop72(self, argtype):
+ cnt = readEncodedNumber(self.fo)
+ if self.debug :
+ result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n'
+ result += 'of the document is indicated by snippet number sets at the\n'
+ result += 'end of each snippet. \n'
+ print result
+ for i in xrange(cnt):
+ if self.debug: print 'Snippet:',str(i)
+ snippet = []
+ snippet.append(i)
+ val = readEncodedNumber(self.fo)
+ snippet.append(self.procToken(self.dict.lookup(val)))
+ self.snippetList.append(snippet)
+ return
+
+
+
+ # general loop code gracisouly submitted by "skindle" - thank you!
+ def doLoop76Mode(self, argtype, cnt, mode):
+ result = []
+ adj = 0
+ if mode & 1:
+ adj = readEncodedNumber(self.fo)
+ mode = mode >> 1
+ x = []
+ for i in xrange(cnt):
+ x.append(readEncodedNumber(self.fo) - adj)
+ for i in xrange(mode):
+ for j in xrange(1, cnt):
+ x[j] = x[j] + x[j - 1]
+ for i in xrange(cnt):
+ result.append(self.formatArg(x[i],argtype))
+ return result
+
+
+ # dispatches loop commands bytes with various modes
+ # The 0x76 style loops are used to build vectors
+
+ # This was all derived by trial and error and
+ # new loop types may exist that are not handled here
+ # since they did not appear in the test cases
+
+ def decodeCMD(self, cmd, argtype):
+ if (cmd == 0x76):
+
+ # loop with cnt, and mode to control loop styles
+ cnt = readEncodedNumber(self.fo)
+ mode = readEncodedNumber(self.fo)
+
+ if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
+ return self.doLoop76Mode(argtype, cnt, mode)
+
+ if self.dbug: print "Unknown command", cmd
+ result = []
+ return result
+
+
+
+ # add full tag path to injected snippets
+ def updateName(self, tag, prefix):
+ name = tag[0]
+ subtagList = tag[1]
+ argtype = tag[2]
+ argList = tag[3]
+ nname = prefix + '.' + name
+ nsubtaglist = []
+ for j in subtagList:
+ nsubtaglist.append(self.updateName(j,prefix))
+ ntag = []
+ ntag.append(nname)
+ ntag.append(nsubtaglist)
+ ntag.append(argtype)
+ ntag.append(argList)
+ return ntag
+
+
+
+ # perform depth first injection of specified snippets into this one
+ def injectSnippets(self, snippet):
+ snipno, tag = snippet
+ name = tag[0]
+ subtagList = tag[1]
+ argtype = tag[2]
+ argList = tag[3]
+ nsubtagList = []
+ if len(argList) > 0 :
+ for j in argList:
+ asnip = self.snippetList[j]
+ aso, atag = self.injectSnippets(asnip)
+ atag = self.updateName(atag, name)
+ nsubtagList.append(atag)
+ argtype='number'
+ argList=[]
+ if len(nsubtagList) > 0 :
+ subtagList.extend(nsubtagList)
+ tag = []
+ tag.append(name)
+ tag.append(subtagList)
+ tag.append(argtype)
+ tag.append(argList)
+ snippet = []
+ snippet.append(snipno)
+ snippet.append(tag)
+ return snippet
+
+
+
+ # format the tag for output
+ def formatTag(self, node):
+ name = node[0]
+ subtagList = node[1]
+ argtype = node[2]
+ argList = node[3]
+ fullpathname = name.split('.')
+ nodename = fullpathname.pop()
+ ilvl = len(fullpathname)
+ indent = ' ' * (3 * ilvl)
+ result = indent + '<' + nodename + '>'
+ if len(argList) > 0:
+ argres = ''
+ for j in argList:
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ argres += j + '|'
+ else :
+ argres += str(j) + ','
+ argres = argres[0:-1]
+ if argtype == 'snippets' :
+ result += 'snippets:' + argres
+ else :
+ result += argres
+ if len(subtagList) > 0 :
+ result += '\n'
+ for j in subtagList:
+ if len(j) > 0 :
+ result += self.formatTag(j)
+ result += indent + '</' + nodename + '>\n'
+ else:
+ result += '</' + nodename + '>\n'
+ return result
+
+
+ # flatten tag
+ def flattenTag(self, node):
+ name = node[0]
+ subtagList = node[1]
+ argtype = node[2]
+ argList = node[3]
+ result = name
+ if (len(argList) > 0):
+ argres = ''
+ for j in argList:
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ argres += j + '|'
+ else :
+ argres += str(j) + '|'
+ argres = argres[0:-1]
+ if argtype == 'snippets' :
+ result += '.snippets=' + argres
+ else :
+ result += '=' + argres
+ result += '\n'
+ for j in subtagList:
+ if len(j) > 0 :
+ result += self.flattenTag(j)
+ return result
+
+
+ # reduce create xml output
+ def formatDoc(self, flat_xml):
+ result = ''
+ for j in self.doc :
+ if len(j) > 0:
+ if flat_xml:
+ result += self.flattenTag(j)
+ else:
+ result += self.formatTag(j)
+ if self.debug : print result
+ return result
+
+
+
+ # main loop - parse the page.dat files
+ # to create structured document and snippets
+
+ # FIXME: value at end of magic appears to be a subtags count
+ # but for what? For now, inject an 'info" tag as it is in
+ # every dictionary and seems close to what is meant
+ # The alternative is to special case the last _ "0x5f" to mean something
+
+ def process(self):
+
+ # peek at the first bytes to see what type of file it is
+ magic = self.fo.read(9)
+ if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
+ first_token = 'info'
+ elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
+ skip = self.fo.read(2)
+ first_token = 'info'
+ elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
+ first_token = 'info'
+ elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
+ skip = self.fo.read(3)
+ first_token = 'info'
+ else :
+ # other0.dat file
+ first_token = None
+ self.fo.seek(-9,1)
+
+
+ # main loop to read and build the document tree
+ while True:
+
+ if first_token != None :
+ # use "inserted" first token 'info' for page and glyph files
+ tag = self.procToken(first_token)
+ if len(tag) > 0 :
+ self.doc.append(tag)
+ first_token = None
+
+ v = self.getNext()
+ if (v == None):
+ break
+
+ if (v == 0x72):
+ self.doLoop72('number')
+ elif (v > 0) and (v < self.dict.getSize()) :
+ tag = self.procToken(self.dict.lookup(v))
+ if len(tag) > 0 :
+ self.doc.append(tag)
+ else:
+ if self.debug:
+ print "Main Loop: Unknown value: %x" % v
+ if (v == 0):
+ if (self.peek(1) == 0x5f):
+ skip = self.fo.read(1)
+ first_token = 'info'
+
+ # now do snippet injection
+ if len(self.snippetList) > 0 :
+ if self.debug : print 'Injecting Snippets:'
+ snippet = self.injectSnippets(self.snippetList[0])
+ snipno = snippet[0]
+ tag_add = snippet[1]
+ if self.debug : print self.formatTag(tag_add)
+ if len(tag_add) > 0:
+ self.doc.append(tag_add)
+
+ # handle generation of xml output
+ xmlpage = self.formatDoc(self.flat_xml)
+
+ return xmlpage
+
+
+def fromData(dict, fname):
+ flat_xml = True
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def getXML(dict, fname):
+ flat_xml = False
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def usage():
+ print 'Usage: '
+ print ' convert2xml.py dict0000.dat infile.dat '
+ print ' '
+ print ' Options:'
+ print ' -h print this usage help message '
+ print ' -d turn on debug output to check for potential errors '
+ print ' --flat-xml output the flattened xml page description only '
+ print ' '
+ print ' This program will attempt to convert a page*.dat file or '
+ print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. '
+ print ' '
+ print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump '
+ print ' the *.dat files from a Topaz format e-book.'
+
+#
+# Main
+#
+
+def main(argv):
+ dictFile = ""
+ pageFile = ""
+ debug = False
+ flat_xml = False
+ printOutput = False
+ if len(argv) == 0:
+ printOutput = True
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"])
+
+ except getopt.GetoptError, err:
+
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o =="-d":
+ debug=True
+ if o =="-h":
+ usage()
+ sys.exit(0)
+ if o =="--flat-xml":
+ flat_xml = True
+
+ dictFile, pageFile = args[0], args[1]
+
+ # read in the string table dictionary
+ dict = Dictionary(dictFile)
+ # dict.dumpDict()
+
+ # create a page parser
+ pp = PageParser(pageFile, dict, debug, flat_xml)
+
+ xmlpage = pp.process()
+
+ if printOutput:
+ print xmlpage
+ return 0
+
+ return xmlpage
+
+if __name__ == '__main__':
+ sys.exit(main(''))
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+import sys
+import csv
+import os
+import math
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+ def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ self.id = os.path.basename(fileid).replace('.dat','')
+ self.svgcount = 0
+ self.docList = flatxml.split('\n')
+ self.docSize = len(self.docList)
+ self.classList = {}
+ self.bookDir = bookDir
+ self.gdict = gdict
+ tmpList = classlst.split('\n')
+ for pclass in tmpList:
+ if pclass != '':
+ # remove the leading period from the css name
+ cname = pclass[1:]
+ self.classList[cname] = True
+ self.fixedimage = fixedimage
+ self.ocrtext = []
+ self.link_id = []
+ self.link_title = []
+ self.link_page = []
+ self.link_href = []
+ self.link_type = []
+ self.dehyphen_rootid = []
+ self.paracont_stemid = []
+ self.parastems_stemid = []
+
+
+ def getGlyph(self, gid):
+ result = ''
+ id='id="gl%d"' % gid
+ return self.gdict.lookup(id)
+
+ def glyphs_to_image(self, glyphList):
+
+ def extract(path, key):
+ b = path.find(key) + len(key)
+ e = path.find(' ',b)
+ return int(path[b:e])
+
+ svgDir = os.path.join(self.bookDir,'svg')
+
+ imgDir = os.path.join(self.bookDir,'img')
+ imgname = self.id + '_%04d.svg' % self.svgcount
+ imgfile = os.path.join(imgDir,imgname)
+
+ # get glyph information
+ gxList = self.getData('info.glyph.x',0,-1)
+ gyList = self.getData('info.glyph.y',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+
+ gids = []
+ maxws = []
+ maxhs = []
+ xs = []
+ ys = []
+ gdefs = []
+
+ # get path defintions, positions, dimensions for ecah glyph
+ # that makes up the image, and find min x and min y to reposition origin
+ minx = -1
+ miny = -1
+ for j in glyphList:
+ gid = gidList[j]
+ gids.append(gid)
+
+ xs.append(gxList[j])
+ if minx == -1: minx = gxList[j]
+ else : minx = min(minx, gxList[j])
+
+ ys.append(gyList[j])
+ if miny == -1: miny = gyList[j]
+ else : miny = min(miny, gyList[j])
+
+ path = self.getGlyph(gid)
+ gdefs.append(path)
+
+ maxws.append(extract(path,'width='))
+ maxhs.append(extract(path,'height='))
+
+
+ # change the origin to minx, miny and calc max height and width
+ maxw = maxws[0] + xs[0] - minx
+ maxh = maxhs[0] + ys[0] - miny
+ for j in xrange(0, len(xs)):
+ xs[j] = xs[j] - minx
+ ys[j] = ys[j] - miny
+ maxw = max( maxw, (maxws[j] + xs[j]) )
+ maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+ # open the image file for output
+ ifile = open(imgfile,'w')
+ ifile.write('<?xml version="1.0" standalone="no"?>\n')
+ ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
+ ifile.write('<defs>\n')
+ for j in xrange(0,len(gdefs)):
+ ifile.write(gdefs[j])
+ ifile.write('</defs>\n')
+ for j in xrange(0,len(gids)):
+ ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
+ ifile.write('</svg>')
+ ifile.close()
+
+ return 0
+
+
+
+ # return tag at line pos in document
+ def lineinDoc(self, pos) :
+ if (pos >= 0) and (pos < self.docSize) :
+ item = self.docList[pos]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ return name, argres
+
+
+ # find tag in doc if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ if end == -1 :
+ end = self.docSize
+ else:
+ end = min(self.docSize, end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = self.docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
+
+ # returns a vector of integers for the tagpath
+ def getData(self, tagpath, pos, end):
+ argres=[]
+ (foundat, argt) = self.findinDoc(tagpath, pos, end)
+ if (argt != None) and (len(argt) > 0) :
+ argList = argt.split('|')
+ argres = [ int(strval) for strval in argList]
+ return argres
+
+
+ # get the class
+ def getClass(self, pclass):
+ nclass = pclass
+
+ # class names are an issue given topaz may start them with numerals (not allowed),
+ # use a mix of cases (which cause some browsers problems), and actually
+ # attach numbers after "_reclustered*" to the end to deal classeses that inherit
+ # from a base class (but then not actually provide all of these _reclustereed
+ # classes in the stylesheet!
+
+ # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
+ # that exists in the stylesheet first, and then adding this specific class
+ # after
+
+ # also some class names have spaces in them so need to convert to dashes
+ if nclass != None :
+ nclass = nclass.replace(' ','-')
+ classres = ''
+ nclass = nclass.lower()
+ nclass = 'cl-' + nclass
+ baseclass = ''
+ # graphic is the base class for captions
+ if nclass.find('cl-cap-') >=0 :
+ classres = 'graphic' + ' '
+ else :
+ # strip to find baseclass
+ p = nclass.find('_')
+ if p > 0 :
+ baseclass = nclass[0:p]
+ if baseclass in self.classList:
+ classres += baseclass + ' '
+ classres += nclass
+ nclass = classres
+ return nclass
+
+
+ # develop a sorted description of the starting positions of
+ # groups and regions on the page, as well as the page type
+ def PageDescription(self):
+
+ def compare(x, y):
+ (xtype, xval) = x
+ (ytype, yval) = y
+ if xval > yval:
+ return 1
+ if xval == yval:
+ return 0
+ return -1
+
+ result = []
+ (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+ groupList = self.posinDoc('page.group')
+ groupregionList = self.posinDoc('page.group.region')
+ pageregionList = self.posinDoc('page.region')
+ # integrate into one list
+ for j in groupList:
+ result.append(('grpbeg',j))
+ for j in groupregionList:
+ result.append(('gregion',j))
+ for j in pageregionList:
+ result.append(('pregion',j))
+ result.sort(compare)
+
+ # insert group end and page end indicators
+ inGroup = False
+ j = 0
+ while True:
+ if j == len(result): break
+ rtype = result[j][0]
+ rval = result[j][1]
+ if not inGroup and (rtype == 'grpbeg') :
+ inGroup = True
+ j = j + 1
+ elif inGroup and (rtype in ('grpbeg', 'pregion')):
+ result.insert(j,('grpend',rval))
+ inGroup = False
+ else:
+ j = j + 1
+ if inGroup:
+ result.append(('grpend',-1))
+ result.append(('pageend', -1))
+ return pagetype, result
+
+
+
+ # build a description of the paragraph
+ def getParaDescription(self, start, end, regtype):
+
+ result = []
+
+ # paragraph
+ (pos, pclass) = self.findinDoc('paragraph.class',start,end)
+
+ pclass = self.getClass(pclass)
+
+ # build up a description of the paragraph in result and return it
+ # first check for the basic - all words paragraph
+ (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
+ (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+ if (sfirst != None) and (slast != None) :
+ first = int(sfirst)
+ last = int(slast)
+
+ makeImage = (regtype == 'vertical') or (regtype == 'table')
+ if self.fixedimage:
+ makeImage = makeImage or (regtype == 'fixed')
+
+ if (pclass != None):
+ makeImage = makeImage or (pclass.find('.inverted') >= 0)
+ if self.fixedimage :
+ makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+
+ if not makeImage :
+ # standard all word paragraph
+ for wordnum in xrange(first, last):
+ result.append(('ocr', wordnum))
+ return pclass, result
+
+ # convert paragraph to svg image
+ # translate first and last word into first and last glyphs
+ # and generate inline image and include it
+ glyphList = []
+ firstglyphList = self.getData('word.firstGlyph',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+ firstGlyph = firstglyphList[first]
+ if last < len(firstglyphList):
+ lastGlyph = firstglyphList[last]
+ else :
+ lastGlyph = len(gidList)
+ for glyphnum in xrange(firstGlyph, lastGlyph):
+ glyphList.append(glyphnum)
+ # include any extratokens if they exist
+ (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
+ (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+ if (sfg != None) and (slg != None):
+ for glyphnum in xrange(int(sfg), int(slg)):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ return pclass, result
+
+ # this type of paragraph may be made up of multiple spans, inline
+ # word monograms (images), and words with semantic meaning,
+ # plus glyphs used to form starting letter of first word
+
+ # need to parse this type line by line
+ line = start + 1
+ word_class = ''
+
+ # if end is -1 then we must search to end of document
+ if end == -1 :
+ end = self.docSize
+
+ # seems some xml has last* coming before first* so we have to
+ # handle any order
+ sp_first = -1
+ sp_last = -1
+
+ gl_first = -1
+ gl_last = -1
+
+ ws_first = -1
+ ws_last = -1
+
+ word_class = ''
+
+ while (line < end) :
+
+ (name, argres) = self.lineinDoc(line)
+
+ if name.endswith('span.firstWord') :
+ sp_first = int(argres)
+
+ elif name.endswith('span.lastWord') :
+ sp_last = int(argres)
+
+ elif name.endswith('word.firstGlyph') :
+ gl_first = int(argres)
+
+ elif name.endswith('word.lastGlyph') :
+ gl_last = int(argres)
+
+ elif name.endswith('word_semantic.firstWord'):
+ ws_first = int(argres)
+
+ elif name.endswith('word_semantic.lastWord'):
+ ws_last = int(argres)
+
+ elif name.endswith('word.class'):
+ (cname, space) = argres.split('-',1)
+ if space == '' : space = '0'
+ if (cname == 'spaceafter') and (int(space) > 0) :
+ word_class = 'sa'
+
+ elif name.endswith('word.img.src'):
+ result.append(('img' + word_class, int(argres)))
+ word_class = ''
+
+ elif name.endswith('region.img.src'):
+ result.append(('img' + word_class, int(argres)))
+
+ if (sp_first != -1) and (sp_last != -1):
+ for wordnum in xrange(sp_first, sp_last):
+ result.append(('ocr', wordnum))
+ sp_first = -1
+ sp_last = -1
+
+ if (gl_first != -1) and (gl_last != -1):
+ glyphList = []
+ for glyphnum in xrange(gl_first, gl_last):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ gl_first = -1
+ gl_last = -1
+
+ if (ws_first != -1) and (ws_last != -1):
+ for wordnum in xrange(ws_first, ws_last):
+ result.append(('ocr', wordnum))
+ ws_first = -1
+ ws_last = -1
+
+ line += 1
+
+ return pclass, result
+
+
+ def buildParagraph(self, pclass, pdesc, type, regtype) :
+ parares = ''
+ sep =''
+
+ classres = ''
+ if pclass :
+ classres = ' class="' + pclass + '"'
+
+ br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
+
+ handle_links = len(self.link_id) > 0
+
+ if (type == 'full') or (type == 'begin') :
+ parares += '<p' + classres + '>'
+
+ if (type == 'end'):
+ parares += ' '
+
+ lstart = len(parares)
+
+ cnt = len(pdesc)
+
+ for j in xrange( 0, cnt) :
+
+ (wtype, num) = pdesc[j]
+
+ if wtype == 'ocr' :
+ word = self.ocrtext[num]
+ sep = ' '
+
+ if handle_links:
+ link = self.link_id[num]
+ if (link > 0):
+ linktype = self.link_type[link-1]
+ title = self.link_title[link-1]
+ if (title == "") or (parares.rfind(title) < 0):
+ title=parares[lstart:]
+ if linktype == 'external' :
+ linkhref = self.link_href[link-1]
+ linkhtml = '<a href="%s">' % linkhref
+ else :
+ if len(self.link_page) >= link :
+ ptarget = self.link_page[link-1] - 1
+ linkhtml = '<a href="#page%04d">' % ptarget
+ else :
+ # just link to the current page
+ linkhtml = '<a href="#' + self.id + '">'
+ linkhtml += title + '</a>'
+ pos = parares.rfind(title)
+ if pos >= 0:
+ parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
+ else :
+ parares += linkhtml
+ lstart = len(parares)
+ if word == '_link_' : word = ''
+ elif (link < 0) :
+ if word == '_link_' : word = ''
+
+ if word == '_lb_':
+ if ((num-1) in self.dehyphen_rootid ) or handle_links:
+ word = ''
+ sep = ''
+ elif br_lb :
+ word = '<br />\n'
+ sep = ''
+ else :
+ word = '\n'
+ sep = ''
+
+ if num in self.dehyphen_rootid :
+ word = word[0:-1]
+ sep = ''
+
+ parares += word + sep
+
+ elif wtype == 'img' :
+ sep = ''
+ parares += '<img src="img/img%04d.jpg" alt="" />' % num
+ parares += sep
+
+ elif wtype == 'imgsa' :
+ sep = ' '
+ parares += '<img src="img/img%04d.jpg" alt="" />' % num
+ parares += sep
+
+ elif wtype == 'svg' :
+ sep = ''
+ parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
+ parares += sep
+
+ if len(sep) > 0 : parares = parares[0:-1]
+ if (type == 'full') or (type == 'end') :
+ parares += '</p>'
+ return parares
+
+
+
+ # walk the document tree collecting the information needed
+ # to build an html page using the ocrText
+
+ def process(self):
+
+ htmlpage = ''
+
+ # get the ocr text
+ (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
+ if argres : self.ocrtext = argres.split('|')
+
+ # get information to dehyphenate the text
+ self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
+
+ # determine if first paragraph is continued from previous page
+ (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
+ first_para_continued = (self.parastems_stemid != None)
+
+ # determine if last paragraph is continued onto the next page
+ (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
+ last_para_continued = (self.paracont_stemid != None)
+
+ # collect link ids
+ self.link_id = self.getData('info.word.link_id',0,-1)
+
+ # collect link destination page numbers
+ self.link_page = self.getData('info.links.page',0,-1)
+
+ # collect link types (container versus external)
+ (pos, argres) = self.findinDoc('info.links.type',0,-1)
+ if argres : self.link_type = argres.split('|')
+
+ # collect link destinations
+ (pos, argres) = self.findinDoc('info.links.href',0,-1)
+ if argres : self.link_href = argres.split('|')
+
+ # collect link titles
+ (pos, argres) = self.findinDoc('info.links.title',0,-1)
+ if argres :
+ self.link_title = argres.split('|')
+ else:
+ self.link_title.append('')
+
+ # get a descriptions of the starting points of the regions
+ # and groups on the page
+ (pagetype, pageDesc) = self.PageDescription()
+ regcnt = len(pageDesc) - 1
+
+ anchorSet = False
+ breakSet = False
+ inGroup = False
+
+ # process each region on the page and convert what you can to html
+
+ for j in xrange(regcnt):
+
+ (etype, start) = pageDesc[j]
+ (ntype, end) = pageDesc[j+1]
+
+
+ # set anchor for link target on this page
+ if not anchorSet and not first_para_continued:
+ htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
+ htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
+ anchorSet = True
+
+ # handle groups of graphics with text captions
+ if (etype == 'grpbeg'):
+ (pos, grptype) = self.findinDoc('group.type', start, end)
+ if grptype != None:
+ if grptype == 'graphic':
+ gcstr = ' class="' + grptype + '"'
+ htmlpage += '<div' + gcstr + '>'
+ inGroup = True
+
+ elif (etype == 'grpend'):
+ if inGroup:
+ htmlpage += '</div>\n'
+ inGroup = False
+
+ else:
+ (pos, regtype) = self.findinDoc('region.type',start,end)
+
+ if regtype == 'graphic' :
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ if inGroup:
+ htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
+ else:
+ htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+ elif regtype == 'chapterheading' :
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if not breakSet:
+ htmlpage += '<div style="page-break-after: always;"> </div>\n'
+ breakSet = True
+ tag = 'h1'
+ if pclass and (len(pclass) >= 7):
+ if pclass[3:7] == 'ch1-' : tag = 'h1'
+ if pclass[3:7] == 'ch2-' : tag = 'h2'
+ if pclass[3:7] == 'ch3-' : tag = 'h3'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ else:
+ htmlpage += '<' + tag + '>'
+ htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+ htmlpage += '</' + tag + '>'
+
+ elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
+ ptype = 'full'
+ # check to see if this is a continution from the previous page
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if pclass and (len(pclass) >= 6) and (ptype == 'full'):
+ tag = 'p'
+ if pclass[3:6] == 'h1-' : tag = 'h4'
+ if pclass[3:6] == 'h2-' : tag = 'h5'
+ if pclass[3:6] == 'h3-' : tag = 'h6'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+ htmlpage += '</' + tag + '>'
+ else :
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+ elif (regtype == 'tocentry') :
+ ptype = 'full'
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+ elif (regtype == 'vertical') or (regtype == 'table') :
+ ptype = 'full'
+ if inGroup:
+ ptype = 'middle'
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start, end, regtype)
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+ elif (regtype == 'synth_fcvr.center'):
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+ else :
+ print ' Making region type', regtype,
+ (pos, temp) = self.findinDoc('paragraph',start,end)
+ (pos2, temp) = self.findinDoc('span',start,end)
+ if pos != -1 or pos2 != -1:
+ print ' a "text" region'
+ orig_regtype = regtype
+ regtype = 'fixed'
+ ptype = 'full'
+ # check to see if this is a continution from the previous page
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if not pclass:
+ if orig_regtype.endswith('.right') : pclass = 'cl-right'
+ elif orig_regtype.endswith('.center') : pclass = 'cl-center'
+ elif orig_regtype.endswith('.left') : pclass = 'cl-left'
+ elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
+ if pclass and (ptype == 'full') and (len(pclass) >= 6):
+ tag = 'p'
+ if pclass[3:6] == 'h1-' : tag = 'h4'
+ if pclass[3:6] == 'h2-' : tag = 'h5'
+ if pclass[3:6] == 'h3-' : tag = 'h6'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+ htmlpage += '</' + tag + '>'
+ else :
+ htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+ else :
+ print ' a "graphic" region'
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+
+ if last_para_continued :
+ if htmlpage[-4:] == '</p>':
+ htmlpage = htmlpage[0:-4]
+ last_para_continued = False
+
+ return htmlpage
+
+
+
+def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ # create a document parser
+ dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
+ htmlpage = dp.process()
+ return htmlpage
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import sys
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class PParser(object):
+ def __init__(self, gd, flatxml):
+ self.gd = gd
+ self.flatdoc = flatxml.split('\n')
+ self.temp = []
+ foo = self.getData('page.h') or self.getData('book.h')
+ self.ph = foo[0]
+ foo = self.getData('page.w') or self.getData('book.w')
+ self.pw = foo[0]
+ self.gx = self.getData('info.glyph.x')
+ self.gy = self.getData('info.glyph.y')
+ self.gid = self.getData('info.glyph.glyphID')
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getDataTemp(self, path):
+ result = None
+ cnt = len(self.temp)
+ for j in xrange(cnt):
+ item = self.temp[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ self.temp.pop(j)
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getImages(self):
+ result = []
+ self.temp = self.flatdoc
+ while (self.getDataTemp('img') != None):
+ h = self.getDataTemp('img.h')[0]
+ w = self.getDataTemp('img.w')[0]
+ x = self.getDataTemp('img.x')[0]
+ y = self.getDataTemp('img.y')[0]
+ src = self.getDataTemp('img.src')[0]
+ result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
+ return result
+ def getGlyphs(self):
+ result = []
+ if (self.gid != None) and (len(self.gid) > 0):
+ glyphs = []
+ for j in set(self.gid):
+ glyphs.append(j)
+ glyphs.sort()
+ for gid in glyphs:
+ id='id="gl%d"' % gid
+ path = self.gd.lookup(id)
+ if path:
+ result.append(id + ' ' + path)
+ return result
+
+
+def convert2SVG(gdict, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi):
+ ml = ''
+ pp = PParser(gdict, flat_xml)
+ ml += '<?xml version="1.0" standalone="no"?>\n'
+ if (raw):
+ ml += '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
+ ml += '<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)
+ ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+ else:
+ ml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ ml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n'
+ ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+ ml += '<script><![CDATA[\n'
+ ml += 'function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n'
+ ml += 'var dpi=%d;\n' % scaledpi
+ if (counter) :
+ ml += 'var prevpage="page%04d.xhtml";\n' % (counter - 1)
+ if (counter < numfiles-1) :
+ ml += 'var nextpage="page%04d.xhtml";\n' % (counter + 1)
+ ml += 'var pw=%d;var ph=%d;' % (pp.pw, pp.ph)
+ ml += 'function zoomin(){dpi=dpi*(0.8);setsize();}\n'
+ ml += 'function zoomout(){dpi=dpi*1.25;setsize();}\n'
+ ml += 'function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n'
+ ml += 'function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n'
+ ml += 'function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n'
+ ml += 'var gt=gd();if(gt>0){dpi=gt;}\n'
+ ml += 'window.onload=setsize;\n'
+ ml += ']]></script>\n'
+ ml += '</head>\n'
+ ml += '<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n'
+ ml += '<div style="white-space:nowrap;">\n'
+ if (counter == 0) :
+ ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ else:
+ ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n'
+ ml += '<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph)
+ if (pp.gid != None):
+ ml += '<defs>\n'
+ gdefs = pp.getGlyphs()
+ for j in xrange(0,len(gdefs)):
+ ml += gdefs[j]
+ ml += '</defs>\n'
+ img = pp.getImages()
+ if (img != None):
+ for j in xrange(0,len(img)):
+ ml += img[j]
+ if (pp.gid != None):
+ for j in xrange(0,len(pp.gid)):
+ ml += '<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j])
+ if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+ ml += '<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n'
+ if (raw) :
+ ml += '</svg>'
+ else :
+ ml += '</svg></a>\n'
+ if (counter == numfiles - 1) :
+ ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ else :
+ ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n'
+ ml += '</div>\n'
+ ml += '<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n'
+ ml += '</body>\n'
+ ml += '</html>\n'
+ return ml
+
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# local support routines
+import convert2xml
+import flatxml2html
+import flatxml2svg
+import stylexml2css
+
+
+# Get a 7 bit encoded number from a file
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from the file
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return None
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+def getMetaArray(metaFile):
+ # parse the meta file
+ result = {}
+ fo = file(metaFile,'rb')
+ size = readEncodedNumber(fo)
+ for i in xrange(size):
+ tag = readString(fo)
+ value = readString(fo)
+ result[tag] = value
+ # print tag, value
+ fo.close()
+ return result
+
+
+# dictionary of all text strings by index value
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+ def escapestr(self, str):
+ str = str.replace('&','&')
+ str = str.replace('<','<')
+ str = str.replace('>','>')
+ str = str.replace('=','=')
+ return str
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
+ else:
+ print "Error - %d outside of string table limits" % val
+ sys.exit(-1)
+ def getSize(self):
+ return self.size
+ def getPos(self):
+ return self.pos
+
+
+class PageDimParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ # find tag if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ docList = self.flatdoc
+ cnt = len(docList)
+ if end == -1 :
+ end = cnt
+ else:
+ end = min(cnt,end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=')
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+ def process(self):
+ (pos, sph) = self.findinDoc('page.h',0,-1)
+ (pos, spw) = self.findinDoc('page.w',0,-1)
+ if (sph == None): sph = '-1'
+ if (spw == None): spw = '-1'
+ return sph, spw
+
+def getPageDim(flatxml):
+ # create a document parser
+ dp = PageDimParser(flatxml)
+ (ph, pw) = dp.process()
+ return ph, pw
+
+class GParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ self.dpi = 1440
+ self.gh = self.getData('info.glyph.h')
+ self.gw = self.getData('info.glyph.w')
+ self.guse = self.getData('info.glyph.use')
+ if self.guse :
+ self.count = len(self.guse)
+ else :
+ self.count = 0
+ self.gvtx = self.getData('info.glyph.vtx')
+ self.glen = self.getData('info.glyph.len')
+ self.gdpi = self.getData('info.glyph.dpi')
+ self.vx = self.getData('info.vtx.x')
+ self.vy = self.getData('info.vtx.y')
+ self.vlen = self.getData('info.len.n')
+ if self.vlen :
+ self.glen.append(len(self.vlen))
+ elif self.glen:
+ self.glen.append(0)
+ if self.vx :
+ self.gvtx.append(len(self.vx))
+ elif self.gvtx :
+ self.gvtx.append(0)
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name == path):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getGlyphDim(self, gly):
+ maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
+ maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
+ return maxh, maxw
+ def getPath(self, gly):
+ path = ''
+ if (gly < 0) or (gly >= self.count):
+ return path
+ tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
+ ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
+ p = 0
+ for k in xrange(self.glen[gly], self.glen[gly+1]):
+ if (p == 0):
+ zx = tx[0:self.vlen[k]+1]
+ zy = ty[0:self.vlen[k]+1]
+ else:
+ zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
+ zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
+ p += 1
+ j = 0
+ while ( j < len(zx) ):
+ if (j == 0):
+ # Start Position.
+ path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
+ elif (j <= len(zx)-3):
+ # Cubic Bezier Curve
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
+ j += 2
+ elif (j == len(zx)-2):
+ # Cubic Bezier Curve to Start Position
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+ j += 1
+ elif (j == len(zx)-1):
+ # Quadratic Bezier Curve to Start Position
+ path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+
+ j += 1
+ path += 'z'
+ return path
+
+
+
+# dictionary of all text strings by index value
+class GlyphDict(object):
+ def __init__(self):
+ self.gdict = {}
+ def lookup(self, id):
+ # id='id="gl%d"' % val
+ if id in self.gdict:
+ return self.gdict[id]
+ return None
+ def addGlyph(self, val, path):
+ id='id="gl%d"' % val
+ self.gdict[id] = path
+
+
+def generateBook(bookDir, raw, fixedimage):
+ # sanity check Topaz file extraction
+ if not os.path.exists(bookDir) :
+ print "Can not find directory with unencrypted book"
+ return 1
+
+ dictFile = os.path.join(bookDir,'dict0000.dat')
+ if not os.path.exists(dictFile) :
+ print "Can not find dict0000.dat file"
+ return 1
+
+ pageDir = os.path.join(bookDir,'page')
+ if not os.path.exists(pageDir) :
+ print "Can not find page directory in unencrypted book"
+ return 1
+
+ imgDir = os.path.join(bookDir,'img')
+ if not os.path.exists(imgDir) :
+ print "Can not find image directory in unencrypted book"
+ return 1
+
+ glyphsDir = os.path.join(bookDir,'glyphs')
+ if not os.path.exists(glyphsDir) :
+ print "Can not find glyphs directory in unencrypted book"
+ return 1
+
+ metaFile = os.path.join(bookDir,'metadata0000.dat')
+ if not os.path.exists(metaFile) :
+ print "Can not find metadata0000.dat in unencrypted book"
+ return 1
+
+ svgDir = os.path.join(bookDir,'svg')
+ if not os.path.exists(svgDir) :
+ os.makedirs(svgDir)
+
+ xmlDir = os.path.join(bookDir,'xml')
+ if not os.path.exists(xmlDir) :
+ os.makedirs(xmlDir)
+
+ otherFile = os.path.join(bookDir,'other0000.dat')
+ if not os.path.exists(otherFile) :
+ print "Can not find other0000.dat in unencrypted book"
+ return 1
+
+ print "Updating to color images if available"
+ spath = os.path.join(bookDir,'color_img')
+ dpath = os.path.join(bookDir,'img')
+ filenames = os.listdir(spath)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname = filename.replace('color','img')
+ sfile = os.path.join(spath,filename)
+ dfile = os.path.join(dpath,imgname)
+ imgdata = file(sfile,'rb').read()
+ file(dfile,'wb').write(imgdata)
+
+ print "Creating cover.jpg"
+ isCover = False
+ cpath = os.path.join(bookDir,'img')
+ cpath = os.path.join(cpath,'img0000.jpg')
+ if os.path.isfile(cpath):
+ cover = file(cpath, 'rb').read()
+ cpath = os.path.join(bookDir,'cover.jpg')
+ file(cpath, 'wb').write(cover)
+ isCover = True
+
+
+ print 'Processing Dictionary'
+ dict = Dictionary(dictFile)
+
+ print 'Processing Meta Data and creating OPF'
+ meta_array = getMetaArray(metaFile)
+
+ xname = os.path.join(xmlDir, 'metadata.xml')
+ metastr = ''
+ for key in meta_array:
+ metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n'
+ file(xname, 'wb').write(metastr)
+
+ print 'Processing StyleSheet'
+ # get some scaling info from metadata to use while processing styles
+ fontsize = '135'
+ if 'fontSize' in meta_array:
+ fontsize = meta_array['fontSize']
+
+ # also get the size of a normal text page
+ spage = '1'
+ if 'firstTextPage' in meta_array:
+ spage = meta_array['firstTextPage']
+ pnum = int(spage)
+
+ # get page height and width from first text page for use in stylesheet scaling
+ pname = 'page%04d.dat' % (pnum + 1)
+ fname = os.path.join(pageDir,pname)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ (ph, pw) = getPageDim(flat_xml)
+ if (ph == '-1') or (ph == '0') : ph = '11000'
+ if (pw == '-1') or (pw == '0') : pw = '8500'
+
+ # print ' ', 'other0000.dat'
+ xname = os.path.join(bookDir, 'style.css')
+ flat_xml = convert2xml.fromData(dict, otherFile)
+ cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
+ file(xname, 'wb').write(cssstr)
+ xname = os.path.join(xmlDir, 'other0000.xml')
+ file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
+
+ print 'Processing Glyphs'
+ gd = GlyphDict()
+ filenames = os.listdir(glyphsDir)
+ filenames = sorted(filenames)
+ glyfname = os.path.join(svgDir,'glyphs.svg')
+ glyfile = open(glyfname, 'w')
+ glyfile.write('<?xml version="1.0" standalone="no"?>\n')
+ glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
+ glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
+ glyfile.write('<defs>\n')
+ counter = 0
+ for filename in filenames:
+ # print ' ', filename
+ print '.',
+ fname = os.path.join(glyphsDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ gp = GParser(flat_xml)
+ for i in xrange(0, gp.count):
+ path = gp.getPath(i)
+ maxh, maxw = gp.getGlyphDim(i)
+ fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
+ glyfile.write(fullpath)
+ gd.addGlyph(counter * 256 + i, fullpath)
+ counter += 1
+ glyfile.write('</defs>\n')
+ glyfile.write('</svg>\n')
+ glyfile.close()
+ print " "
+
+ # start up the html
+ htmlFileName = "book.html"
+ htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+ htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n'
+ htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
+ htmlstr += '<head>\n'
+ htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
+ htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
+ htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+ htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+ htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
+ htmlstr += '</head>\n<body>\n'
+
+ print 'Processing Pages'
+ # Books are at 1440 DPI. This is rendering at twice that size for
+ # readability when rendering to the screen.
+ scaledpi = 1440.0
+
+ svgindex = '<?xml version="1.0" encoding="utf-8"?>\n'
+ svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
+ svgindex += '<head>\n'
+ svgindex += '<title>' + meta_array['Title'] + '</title>\n'
+ svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+ svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+ svgindex += '</head>\n'
+ svgindex += '<body>\n'
+
+ filenames = os.listdir(pageDir)
+ filenames = sorted(filenames)
+ numfiles = len(filenames)
+ counter = 0
+
+ for filename in filenames:
+ # print ' ', filename
+ print ".",
+
+ fname = os.path.join(pageDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ # first get the html
+ htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
+
+ # now get the svg image of the page
+ svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi)
+
+ if (raw) :
+ pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
+ svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter)
+ else :
+ pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
+ svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (counter, counter)
+
+
+ pfile.write(svgxml)
+ pfile.close()
+
+ counter += 1
+
+ print " "
+
+ # finish up the html string and output it
+ htmlstr += '</body>\n</html>\n'
+ file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+
+ # finish up the svg index string and output it
+ svgindex += '</body>\n</html>\n'
+ file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
+
+ # build the opf file
+ opfname = os.path.join(bookDir, 'book.opf')
+ opfstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+ opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n'
+ # adding metadata
+ opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n'
+ opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n'
+ opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n'
+ opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n'
+ opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n'
+ opfstr += ' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n'
+ opfstr += ' <dc:language>en</dc:language>\n'
+ opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n'
+ if isCover:
+ opfstr += ' <meta name="cover" content="bookcover"/>\n'
+ opfstr += ' </metadata>\n'
+ opfstr += '<manifest>\n'
+ opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n'
+ opfstr += ' <item id="stylesheet" href="style.css" media-type="text.css"/>\n'
+ # adding image files to manifest
+ filenames = os.listdir(imgDir)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname, imgext = os.path.splitext(filename)
+ if imgext == '.jpg':
+ imgext = 'jpeg'
+ if imgext == '.svg':
+ imgext = 'svg+xml'
+ opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n'
+ if isCover:
+ opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n'
+ opfstr += '</manifest>\n'
+ # adding spine
+ opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n'
+ if isCover:
+ opfstr += ' <guide>\n'
+ opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n'
+ opfstr += ' </guide>\n'
+ opfstr += '</package>\n'
+ file(opfname, 'wb').write(opfstr)
+
+ print 'Processing Complete'
+
+ return 0
+
+def usage():
+ print "genbook.py generates a book from the extract Topaz Files"
+ print "Usage:"
+ print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
+ print " "
+ print "Options:"
+ print " -h : help - print this usage message"
+ print " -r : generate raw svg files (not wrapped in xhtml)"
+ print " --fixed-image : genearate any Fixed Area as an svg image in the html"
+ print " "
+
+
+def main(argv):
+ bookDir = ''
+
+ if len(argv) == 0:
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
+
+ except getopt.GetoptError, err:
+ print str(err)
+ usage()
+ return 1
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ return 1
+
+ raw = 0
+ fixedimage = False
+ for o, a in opts:
+ if o =="-h":
+ usage()
+ return 0
+ if o =="-r":
+ raw = 1
+ if o =="--fixed-image":
+ fixedimage = True
+
+ bookDir = args[0]
+
+ rv = generateBook(bookDir, raw, fixedimage)
+ return rv
+
+
+if __name__ == '__main__':
+ sys.exit(main(''))
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+
+import os, getopt
+
+# local routines
+import convert2xml
+import flatxml2html
+import decode_meta
+
+
+def usage():
+ print 'Usage: '
+ print ' '
+ print ' genxml.py dict0000.dat unencryptedBookDir'
+ print ' '
+
+
+
+def main(argv):
+ bookDir = ''
+
+ if len(argv) == 0:
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "h:")
+
+ except getopt.GetoptError, err:
+ print str(err)
+ usage()
+ sys.exit(1)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(1)
+
+ for o, a in opts:
+ if o =="-h":
+ usage()
+ sys.exit(0)
+
+ bookDir = args[0]
+
+ if not os.path.exists(bookDir) :
+ print "Can not find directory with unencrypted book"
+ sys.exit(1)
+
+ dictFile = os.path.join(bookDir,'dict0000.dat')
+ if not os.path.exists(dictFile) :
+ print "Can not find dict0000.dat file"
+ sys.exit(1)
+
+ pageDir = os.path.join(bookDir,'page')
+ if not os.path.exists(pageDir) :
+ print "Can not find page directory in unencrypted book"
+ sys.exit(1)
+
+ glyphsDir = os.path.join(bookDir,'glyphs')
+ if not os.path.exists(glyphsDir) :
+ print "Can not find glyphs directory in unencrypted book"
+ sys.exit(1)
+
+ otherFile = os.path.join(bookDir,'other0000.dat')
+ if not os.path.exists(otherFile) :
+ print "Can not find other0000.dat in unencrypted book"
+ sys.exit(1)
+
+ metaFile = os.path.join(bookDir,'metadata0000.dat')
+ if not os.path.exists(metaFile) :
+ print "Can not find metadata0000.dat in unencrypted book"
+ sys.exit(1)
+
+ xmlDir = os.path.join(bookDir,'xml')
+ if not os.path.exists(xmlDir):
+ os.makedirs(xmlDir)
+
+
+ print 'Processing ... '
+
+ print ' ', 'metadata0000.dat'
+ fname = os.path.join(bookDir,'metadata0000.dat')
+ xname = os.path.join(xmlDir, 'metadata.txt')
+ metastr = decode_meta.getMetaData(fname)
+ file(xname, 'wb').write(metastr)
+
+ print ' ', 'other0000.dat'
+ fname = os.path.join(bookDir,'other0000.dat')
+ xname = os.path.join(xmlDir, 'stylesheet.xml')
+ pargv=[]
+ pargv.append('convert2xml.py')
+ pargv.append(dictFile)
+ pargv.append(fname)
+ xmlstr = convert2xml.main(pargv)
+ file(xname, 'wb').write(xmlstr)
+
+ filenames = os.listdir(pageDir)
+ filenames = sorted(filenames)
+
+ for filename in filenames:
+ print ' ', filename
+ fname = os.path.join(pageDir,filename)
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ pargv=[]
+ pargv.append('convert2xml.py')
+ pargv.append(dictFile)
+ pargv.append(fname)
+ xmlstr = convert2xml.main(pargv)
+ file(xname, 'wb').write(xmlstr)
+
+ filenames = os.listdir(glyphsDir)
+ filenames = sorted(filenames)
+
+ for filename in filenames:
+ print ' ', filename
+ fname = os.path.join(glyphsDir,filename)
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ pargv=[]
+ pargv.append('convert2xml.py')
+ pargv.append(dictFile)
+ pargv.append(fname)
+ xmlstr = convert2xml.main(pargv)
+ file(xname, 'wb').write(xmlstr)
+
+
+ print 'Processing Complete'
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main(''))
--- /dev/null
+# engine to remove drm from Kindle for Mac books
+# for personal use for archiving and converting your ebooks
+# PLEASE DO NOT PIRATE!
+# We want all authors and Publishers, and eBook stores to live long and prosperous lives
+#
+# it borrows heavily from works by CMBDTC, IHeartCabbages, skindle,
+# unswindle, DiapDealer, some_updates and many many others
+
+from __future__ import with_statement
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+import os, csv, getopt
+from struct import pack
+from struct import unpack
+import zlib
+
+# for handling sub processes
+import subprocess
+from subprocess import Popen, PIPE, STDOUT
+import subasyncio
+from subasyncio import Process
+
+
+#Exception Handling
+class K4MDEDRMError(Exception):
+ pass
+class K4MDEDRMFatal(Exception):
+ pass
+
+#
+# crypto routines
+#
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA256(message):
+ ctx = hashlib.sha256()
+ ctx.update(message)
+ return ctx.digest()
+
+# interface to needed routines in openssl's libcrypto
+def _load_crypto_libcrypto():
+ from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, addressof, string_at, cast
+ from ctypes.util import find_library
+
+ libcrypto = find_library('crypto')
+ if libcrypto is None:
+ raise K4MDEDRMError('libcrypto not found')
+ libcrypto = CDLL(libcrypto)
+
+ AES_MAXNR = 14
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
+
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
+
+ PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
+ [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
+
+ class LibCrypto(object):
+ def __init__(self):
+ self._blocksize = 0
+ self._keyctx = None
+ self.iv = 0
+ def set_decrypt_key(self, userkey, iv):
+ self._blocksize = len(userkey)
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise K4MDEDRMError('AES improper key used')
+ return
+ keyctx = self._keyctx = AES_KEY()
+ self.iv = iv
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ if rv < 0:
+ raise K4MDEDRMError('Failed to initialize AES key')
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self.iv, 0)
+ if rv == 0:
+ raise K4MDEDRMError('AES decryption failed')
+ return out.raw
+ def keyivgen(self, passwd):
+ salt = '16743'
+ saltlen = 5
+ passlen = len(passwd)
+ iter = 0x3e8
+ keylen = 80
+ out = create_string_buffer(keylen)
+ rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
+ return out.raw
+ return LibCrypto
+
+def _load_crypto():
+ LibCrypto = None
+ try:
+ LibCrypto = _load_crypto_libcrypto()
+ except (ImportError, K4MDEDRMError):
+ pass
+ return LibCrypto
+
+LibCrypto = _load_crypto()
+
+#
+# Utility Routines
+#
+
+# uses a sub process to get the Hard Drive Serial Number using ioreg
+# returns with the first found serial number in that class
+def GetVolumeSerialNumber():
+ sernum = os.getenv('MYSERIALNUMBER')
+ if sernum != None:
+ return sernum
+ cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
+ poll = p.wait('wait')
+ results = p.read()
+ reslst = results.split('\n')
+ cnt = len(reslst)
+ bsdname = None
+ sernum = None
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ pp = resline.find('"Serial Number" = "')
+ if pp >= 0:
+ sernum = resline[pp+19:-1]
+ sernum = sernum.strip()
+ bb = resline.find('"BSD Name" = "')
+ if bb >= 0:
+ bsdname = resline[bb+14:-1]
+ bsdname = bsdname.strip()
+ if (bsdname == 'disk0') and (sernum != None):
+ foundIt = True
+ break
+ if not foundIt:
+ sernum = '9999999999'
+ return sernum
+
+# uses unix env to get username instead of using sysctlbyname
+def GetUserName():
+ username = os.getenv('USER')
+ return username
+
+MAX_PATH = 255
+
+#
+# start of Kindle specific routines
+#
+
+global kindleDatabase
+
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+# implements an Pseudo Mac Version of Windows built-in Crypto routine
+def CryptUnprotectData(encryptedData):
+ sp = GetVolumeSerialNumber() + '!@#' + GetUserName()
+ passwdData = encode(SHA256(sp),charMap1)
+ crp = LibCrypto()
+ key_iv = crp.keyivgen(passwdData)
+ key = key_iv[0:32]
+ iv = key_iv[32:48]
+ crp.set_decrypt_key(key,iv)
+ cleartext = crp.decrypt(encryptedData)
+ return cleartext
+
+# Locate and open the .kindle-info file
+def openKindleInfo():
+ home = os.getenv('HOME')
+ kinfopath = home + '/Library/Application Support/Amazon/Kindle/storage/.kindle-info'
+ if not os.path.exists(kinfopath):
+ kinfopath = home + '/Library/Application Support/Amazon/Kindle for Mac/storage/.kindle-info'
+ if not os.path.exists(kinfopath):
+ raise K4MDEDRMError('Error: .kindle-info file can not be found')
+ return open(kinfopath,'r')
+
+# Parse the Kindle.info file and return the records as a list of key-values
+def parseKindleInfo():
+ DB = {}
+ infoReader = openKindleInfo()
+ infoReader.read(1)
+ data = infoReader.read()
+ items = data.split('[')
+ for item in items:
+ splito = item.split(':')
+ DB[splito[0]] =splito[1]
+ return DB
+
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ cleartext = CryptUnprotectData(encryptedValue)
+ return decode(cleartext, charMap1)
+
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+def getKindleInfoValueForKey(key):
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
+def findNameForHash(hash):
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+ result = ""
+ for name in names:
+ if hash == encodeHash(name, charMap2):
+ result = name
+ break
+ return result
+
+# Print all the records from the kindle.info file (option -i)
+def printKindleInfo():
+ for record in kindleDatabase:
+ name = findNameForHash(record)
+ if name != "" :
+ print (name)
+ print ("--------------------------")
+ else :
+ print ("Unknown Record")
+ print getKindleInfoValueForHash(record)
+ print "\n"
+
+#
+# PID generation routines
+#
+
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+
+#
+# Main
+#
+
+def main(argv=sys.argv):
+ global kindleDatabase
+
+ kindleDatabase = None
+
+ #
+ # Read the encrypted database
+ #
+
+ try:
+ kindleDatabase = parseKindleInfo()
+ except Exception, message:
+ print(message)
+
+ if kindleDatabase != None :
+ printKindleInfo()
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
--- /dev/null
+#!/usr/bin/env python
+
+# engine to remove drm from Kindle for Mac and Kindle for PC books
+# for personal use for archiving and converting your ebooks
+
+# PLEASE DO NOT PIRATE EBOOKS!
+
+# We want all authors and publishers, and eBook stores to live
+# long and prosperous lives but at the same time we just want to
+# be able to read OUR books on whatever device we want and to keep
+# readable for a long, long time
+
+# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
+# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
+# and many many others
+
+# It can run standalone to convert K4M/K4PC/Mobi files, or it can be installed as a
+# plugin for Calibre (http://calibre-ebook.com/about) so that importing
+# K4 or Mobi with DRM is no londer a multi-step process.
+#
+# ***NOTE*** If you are using this script as a calibre plugin for a K4M or K4PC ebook
+# then calibre must be installed on the same machine and in the same account as K4PC or K4M
+# for the plugin version to function properly.
+#
+# To create a Calibre plugin, rename this file so that the filename
+# ends in '_plugin.py', put it into a ZIP file with all its supporting python routines
+# and import that ZIP into Calibre using its plugin configuration GUI.
+
+from __future__ import with_statement
+
+__version__ = '1.4'
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+import os, csv, getopt
+import binascii
+import zlib
+import re
+import zlib, zipfile, tempfile, shutil
+from struct import pack, unpack, unpack_from
+
+class DrmException(Exception):
+ pass
+
+if 'calibre' in sys.modules:
+ inCalibre = True
+else:
+ inCalibre = False
+
+def zipUpDir(myzip, tempdir,localname):
+ currentdir = tempdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tempdir, localfilePath)
+
+def usage(progname):
+ print "Removes DRM protection from K4PC/M, Kindle, Mobi and Topaz ebooks"
+ print "Usage:"
+ print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
+
+#
+# Main
+#
+def main(argv=sys.argv):
+ import mobidedrm
+ import topazextract
+ import kgenpids
+ progname = os.path.basename(argv[0])
+
+ k4 = False
+ kInfoFiles = []
+ serials = []
+ pids = []
+
+ print ('K4MobiDeDrm v%(__version__)s '
+ 'provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc .' % globals())
+
+ print ' '
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
+ except getopt.GetoptError, err:
+ print str(err)
+ usage(progname)
+ sys.exit(2)
+ if len(args)<2:
+ usage(progname)
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-k":
+ if a == None :
+ raise DrmException("Invalid parameter for -k")
+ kInfoFiles.append(a)
+ if o == "-p":
+ if a == None :
+ raise DrmException("Invalid parameter for -p")
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ raise DrmException("Invalid parameter for -s")
+ serials = a.split(',')
+
+ # try with built in Kindle Info files
+ k4 = True
+
+ infile = args[0]
+ outdir = args[1]
+
+ # handle the obvious cases at the beginning
+ if not os.path.isfile(infile):
+ print "Error: Input file does not exist"
+ return 1
+
+ mobi = True
+ magic3 = file(infile,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+
+ if mobi:
+ mb = mobidedrm.MobiBook(infile)
+ else:
+ tempdir = tempfile.mkdtemp()
+ mb = topazextract.TopazBook(infile, tempdir)
+
+ title = mb.getBookTitle()
+ print "Processing Book: ", title
+
+ # build pid list
+ md1, md2 = mb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+
+ try:
+ if mobi:
+ unlocked_file = mb.processBook(pidlst)
+ else:
+ mb.processBook(pidlst)
+
+ except mobidedrm.DrmException, e:
+ print " ... not suceessful " + str(e) + "\n"
+ return 1
+ except topazextract.TpzDRMError, e:
+ print str(e)
+ print " Creating DeBug Full Zip Archive of Book"
+ zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+ myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ zipUpDir(myzip, tempdir, '')
+ myzip.close()
+ return 1
+
+ if mobi:
+ outfile = os.path.join(outdir,bookname + '_nodrm' + '.azw')
+ file(outfile, 'wb').write(unlocked_file)
+ return 0
+
+ # topaz: build up zip archives of results
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+ myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip1, tempdir, 'img')
+ myzip1.close()
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(myzip2, tempdir, 'svg')
+ zipUpDir(myzip2, tempdir, 'img')
+ myzip2.close()
+
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(tempdir,'xml')
+ zipUpDir(myzip3, targetdir, '')
+ zipUpDir(myzip3, tempdir, 'img')
+ myzip3.close()
+
+ shutil.rmtree(tempdir)
+ return 0
+
+if __name__ == '__main__':
+ sys.stdout=Unbuffered(sys.stdout)
+ sys.exit(main())
+
+if not __name__ == "__main__" and inCalibre:
+ from calibre.customize import FileTypePlugin
+
+ class K4DeDRM(FileTypePlugin):
+ name = 'K4PC, K4Mac, Kindle Mobi and Topaz DeDRM' # Name of the plugin
+ description = 'Removes DRM from K4PC and Mac, Kindle Mobi and Topaz files. \
+ Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
+ supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
+ author = 'DiapDealer, SomeUpdates' # The author of this plugin
+ version = (0, 1, 7) # The version number of this plugin
+ file_types = set(['prc','mobi','azw','azw1','tpz']) # The file types that this plugin will be applied to
+ on_import = True # Run this plugin during the import
+ priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
+
+ def run(self, path_to_ebook):
+ from calibre.gui2 import is_ok_to_use_qt
+ from PyQt4.Qt import QMessageBox
+ from calibre.ptempfile import PersistentTemporaryDirectory
+
+ import kgenpids
+ import zlib
+ import zipfile
+ import topazextract
+ import mobidedrm
+
+ k4 = True
+ pids = []
+ serials = []
+ kInfoFiles = []
+
+ # Get supplied list of PIDs to try from plugin customization.
+ customvalues = self.site_customization.split(',')
+ for customvalue in customvalues:
+ customvalue = str(customvalue)
+ customvalue = customvalue.strip()
+ if len(customvalue) == 10 or len(customvalue) == 8:
+ pids.append(customvalue)
+ else :
+ if len(customvalue) == 16 and customvalue[0] == 'B':
+ serials.append(customvalue)
+ else:
+ print "%s is not a valid Kindle serial number or PID." % str(customvalue)
+
+ # Load any kindle info files (*.info) included Calibre's config directory.
+ try:
+ # Find Calibre's configuration directory.
+ confpath = os.path.split(os.path.split(self.plugin_path)[0])[0]
+ print 'K4MobiDeDRM: Calibre configuration directory = %s' % confpath
+ files = os.listdir(confpath)
+ filefilter = re.compile("\.info$", re.IGNORECASE)
+ files = filter(filefilter.search, files)
+
+ if files:
+ for filename in files:
+ fpath = os.path.join(confpath, filename)
+ kInfoFiles.append(fpath)
+ print 'K4MobiDeDRM: Kindle info file %s found in config folder.' % filename
+ except IOError:
+ print 'K4MobiDeDRM: Error reading kindle info files from config directory.'
+ pass
+
+
+ mobi = True
+ magic3 = file(path_to_ebook,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ bookname = os.path.splitext(os.path.basename(path_to_ebook))[0]
+
+ if mobi:
+ mb = mobidedrm.MobiBook(path_to_ebook)
+ else:
+ tempdir = PersistentTemporaryDirectory()
+ mb = topazextract.TopazBook(path_to_ebook, tempdir)
+
+ title = mb.getBookTitle()
+ md1, md2 = mb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+
+ try:
+ if mobi:
+ unlocked_file = mb.processBook(pidlst)
+ else:
+ mb.processBook(pidlst)
+
+ except mobidedrm.DrmException:
+ #if you reached here then no luck raise and exception
+ if is_ok_to_use_qt():
+ d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
+ d.show()
+ d.raise_()
+ d.exec_()
+ raise Exception("K4MobiDeDRM plugin could not decode the file")
+ return ""
+ except topazextract.TpzDRMError:
+ #if you reached here then no luck raise and exception
+ if is_ok_to_use_qt():
+ d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
+ d.show()
+ d.raise_()
+ d.exec_()
+ raise Exception("K4MobiDeDRM plugin could not decode the file")
+ return ""
+
+ print "Success!"
+ if mobi:
+ of = self.temporary_file(bookname+'.mobi')
+ of.write(unlocked_file)
+ of.close()
+ return of.name
+
+ # topaz: build up zip archives of results
+ print " Creating HTML ZIP Archive"
+ of = self.temporary_file(bookname + '.zip')
+ myzip = zipfile.ZipFile(of.name,'w',zipfile.ZIP_DEFLATED, False)
+ myzip.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip, tempdir, 'img')
+ myzip.close()
+ return of.name
+
+ def customization_help(self, gui=False):
+ return 'Enter 10 character PIDs and/or Kindle serial numbers, separated by commas.'
# standlone set of Mac OSX specific routines needed for K4DeDRM
from __future__ import with_statement
-
import sys
import os
-
-#Exception Handling
-class K4MDrmException(Exception):
- pass
-
-import signal
-import threading
import subprocess
-from subprocess import Popen, PIPE, STDOUT
-
-# **heavily** chopped up and modfied version of asyncproc.py
-# to make it actually work on Windows as well as Mac/Linux
-# For the original see:
-# "http://www.lysator.liu.se/~bellman/download/"
-# author is "Thomas Bellman <bellman@lysator.liu.se>"
-# available under GPL version 3 or Later
-
-# create an asynchronous subprocess whose output can be collected in
-# a non-blocking manner
-
-# What a mess! Have to use threads just to get non-blocking io
-# in a cross-platform manner
-
-# luckily all thread use is hidden within this class
-
-class Process(object):
- def __init__(self, *params, **kwparams):
- if len(params) <= 3:
- kwparams.setdefault('stdin', subprocess.PIPE)
- if len(params) <= 4:
- kwparams.setdefault('stdout', subprocess.PIPE)
- if len(params) <= 5:
- kwparams.setdefault('stderr', subprocess.PIPE)
- self.__pending_input = []
- self.__collected_outdata = []
- self.__collected_errdata = []
- self.__exitstatus = None
- self.__lock = threading.Lock()
- self.__inputsem = threading.Semaphore(0)
- self.__quit = False
-
- self.__process = subprocess.Popen(*params, **kwparams)
- if self.__process.stdin:
- self.__stdin_thread = threading.Thread(
- name="stdin-thread",
- target=self.__feeder, args=(self.__pending_input,
- self.__process.stdin))
- self.__stdin_thread.setDaemon(True)
- self.__stdin_thread.start()
-
- if self.__process.stdout:
- self.__stdout_thread = threading.Thread(
- name="stdout-thread",
- target=self.__reader, args=(self.__collected_outdata,
- self.__process.stdout))
- self.__stdout_thread.setDaemon(True)
- self.__stdout_thread.start()
-
- if self.__process.stderr:
- self.__stderr_thread = threading.Thread(
- name="stderr-thread",
- target=self.__reader, args=(self.__collected_errdata,
- self.__process.stderr))
- self.__stderr_thread.setDaemon(True)
- self.__stderr_thread.start()
-
- def pid(self):
- return self.__process.pid
-
- def kill(self, signal):
- self.__process.send_signal(signal)
-
- # check on subprocess (pass in 'nowait') to act like poll
- def wait(self, flag):
- if flag.lower() == 'nowait':
- rc = self.__process.poll()
- else:
- rc = self.__process.wait()
- if rc != None:
- if self.__process.stdin:
- self.closeinput()
- if self.__process.stdout:
- self.__stdout_thread.join()
- if self.__process.stderr:
- self.__stderr_thread.join()
- return self.__process.returncode
-
- def terminate(self):
- if self.__process.stdin:
- self.closeinput()
- self.__process.terminate()
-
- # thread gets data from subprocess stdout
- def __reader(self, collector, source):
- while True:
- data = os.read(source.fileno(), 65536)
- self.__lock.acquire()
- collector.append(data)
- self.__lock.release()
- if data == "":
- source.close()
- break
- return
-
- # thread feeds data to subprocess stdin
- def __feeder(self, pending, drain):
- while True:
- self.__inputsem.acquire()
- self.__lock.acquire()
- if not pending and self.__quit:
- drain.close()
- self.__lock.release()
- break
- data = pending.pop(0)
- self.__lock.release()
- drain.write(data)
- # non-blocking read of data from subprocess stdout
- def read(self):
- self.__lock.acquire()
- outdata = "".join(self.__collected_outdata)
- del self.__collected_outdata[:]
- self.__lock.release()
- return outdata
-
- # non-blocking read of data from subprocess stderr
- def readerr(self):
- self.__lock.acquire()
- errdata = "".join(self.__collected_errdata)
- del self.__collected_errdata[:]
- self.__lock.release()
- return errdata
-
- # non-blocking write to stdin of subprocess
- def write(self, data):
- if self.__process.stdin is None:
- raise ValueError("Writing to process with stdin not a pipe")
- self.__lock.acquire()
- self.__pending_input.append(data)
- self.__inputsem.release()
- self.__lock.release()
-
- # close stdinput of subprocess
- def closeinput(self):
- self.__lock.acquire()
- self.__quit = True
- self.__inputsem.release()
- self.__lock.release()
+class K4MDrmException(Exception):
+ pass
# interface to needed routines in openssl's libcrypto
# Utility Routines
#
+
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+
+
# uses a sub process to get the Hard Drive Serial Number using ioreg
# returns with the serial number of drive whose BSD Name is "disk0"
def GetVolumeSerialNumber():
return sernum
cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p.wait('wait')
- results = p.read()
- reslst = results.split('\n')
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
cnt = len(reslst)
bsdname = None
sernum = None
username = os.getenv('USER')
return username
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
-charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
def encode(data, map):
result = ""
cleartext = crp.decrypt(encryptedData)
return cleartext
+
# Locate and open the .kindle-info file
def openKindleInfo(kInfoFile=None):
if kInfoFile == None:
home = os.getenv('HOME')
cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p1.wait('wait')
- results = p1.read()
- reslst = results.split('\n')
+ p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p1.communicate()
+ reslst = out1.split('\n')
kinfopath = 'NONE'
cnt = len(reslst)
for j in xrange(cnt):
crypt32 = windll.crypt32
-#
# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-#
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
-#
-# Exceptions for all the problems that might happen during the script
-#
class DrmException(Exception):
pass
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import sys
+import os, csv
+import binascii
+import zlib
+import re
+from struct import pack, unpack, unpack_from
+
+class DrmException(Exception):
+ pass
+
+global kindleDatabase
+global charMap1
+global charMap2
+global charMap3
+global charMap4
+
+if sys.platform.startswith('win'):
+ from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+if sys.platform.startswith('darwin'):
+ from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+
+# Parse the Kindle.info file and return the records as a list of key-values
+def parseKindleInfo(kInfoFile):
+ DB = {}
+ infoReader = openKindleInfo(kInfoFile)
+ infoReader.read(1)
+ data = infoReader.read()
+ if sys.platform.startswith('win'):
+ items = data.split('{')
+ else :
+ items = data.split('[')
+ for item in items:
+ splito = item.split(':')
+ DB[splito[0]] =splito[1]
+ return DB
+
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ global charMap1
+ global charMap2
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ if sys.platform.startswith('win'):
+ return CryptUnprotectData(encryptedValue,"")
+ else:
+ cleartext = CryptUnprotectData(encryptedValue)
+ return decode(cleartext, charMap1)
+
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+def getKindleInfoValueForKey(key):
+ global charMap2
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
+def findNameForHash(hash):
+ global charMap2
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+ result = ""
+ for name in names:
+ if hash == encodeHash(name, charMap2):
+ result = name
+ break
+ return result
+
+# Print all the records from the kindle.info file (option -i)
+def printKindleInfo():
+ for record in kindleDatabase:
+ name = findNameForHash(record)
+ if name != "" :
+ print (name)
+ print ("--------------------------")
+ else :
+ print ("Unknown Record")
+ print getKindleInfoValueForHash(record)
+ print "\n"
+
+#
+# PID generation routines
+#
+
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+ global charMap4
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+def crc32(s):
+ return (~binascii.crc32(s,-1))&0xFFFFFFFF
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+ global charMap4
+ crc = crc32(s)
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(charMap4)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += charMap4[pos%l]
+ crc >>= 8
+ return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+ global charMap4
+ crc = crc32(s)
+ arr1 = [0]*l
+ for i in xrange(len(s)):
+ arr1[i%l] ^= ord(s[i])
+ crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+ for i in xrange(l):
+ arr1[i] ^= crc_bytes[i&3]
+ pid = ""
+ for i in xrange(l):
+ b = arr1[i] & 0xff
+ pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+ return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePid(pidlst, rec209, token, serialnum):
+
+ if rec209 != None:
+ # Compute book PID
+ pidHash = SHA1(serialnum+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # compute fixed pid for old pre 2.5 firmware update pid as well
+ bookPID = pidFromSerial(serialnum, 7) + "*"
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+
+# Parse the EXTH header records and parse the Kindleinfo
+# file to calculate the book pid.
+
+def getK4Pids(pidlst, rec209, token, kInfoFile=None):
+ global kindleDatabase
+ global charMap1
+ kindleDatabase = None
+ try:
+ kindleDatabase = parseKindleInfo(kInfoFile)
+ except Exception, message:
+ print(message)
+ pass
+
+ if kindleDatabase == None :
+ return pidlst
+
+ # Get the Mazama Random number
+ MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+ # Get the HDD serial
+ encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode to calculate the DSN
+ DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+
+ # Compute the device PID (for which I can tell, is used for nothing).
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ devicePID = checksumPid(devicePID)
+ pidlst.append(devicePID)
+
+ # Compute book PID
+ if rec209 == None:
+ print "\nNo EXTH record type 209 - Perhaps not a K4 file?"
+ return pidlst
+
+ # Get the kindle account token
+ kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+ # book pid
+ pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 1
+ pidHash = SHA1(kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 2
+ pidHash = SHA1(DSN+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+def getPidList(md1, md2, k4, pids, serials, kInfoFiles):
+ pidlst = []
+ if k4:
+ pidlst = getK4Pids(pidlst, md1, md2)
+ for infoFile in kInfoFiles:
+ pidlst = getK4Pids(pidlst, md1, md2, infoFile)
+ for serialnum in serials:
+ pidlst = getKindlePid(pidlst, md1, md2, serialnum)
+ for pid in pids:
+ pidlst.append(pid)
+ return pidlst
--- /dev/null
+#!/usr/bin/python
+#
+# This is a python script. You need a Python interpreter to run it.
+# For example, ActiveState Python, which exists for windows.
+#
+# Changelog
+# 0.01 - Initial version
+# 0.02 - Huffdic compressed books were not properly decrypted
+# 0.03 - Wasn't checking MOBI header length
+# 0.04 - Wasn't sanity checking size of data record
+# 0.05 - It seems that the extra data flags take two bytes not four
+# 0.06 - And that low bit does mean something after all :-)
+# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
+# 0.08 - ...and also not in Mobi header version < 6
+# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
+# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
+# import filter it works when importing unencrypted files.
+# Also now handles encrypted files that don't need a specific PID.
+# 0.11 - use autoflushed stdout and proper return values
+# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
+# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
+# and extra blank lines, converted CR/LF pairs at ends of each line,
+# and other cosmetic fixes.
+# 0.14 - Working out when the extra data flags are present has been problematic
+# Versions 7 through 9 have tried to tweak the conditions, but have been
+# only partially successful. Closer examination of lots of sample
+# files reveals that a confusion has arisen because trailing data entries
+# are not encrypted, but it turns out that the multibyte entries
+# in utf8 file are encrypted. (Although neither kind gets compressed.)
+# This knowledge leads to a simplification of the test for the
+# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
+# 0.15 - Now outputs 'heartbeat', and is also quicker for long files.
+# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
+# 0.17 - added modifications to support its use as an imported python module
+# both inside calibre and also in other places (ie K4DeDRM tools)
+# 0.17a- disabled the standalone plugin feature since a plugin can not import
+# a plugin
+# 0.18 - It seems that multibyte entries aren't encrypted in a v7 file...
+# Removed the disabled Calibre plug-in code
+# Permit use of 8-digit PIDs
+# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
+# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
+# 0.21 - Added support for multiple pids
+# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface
+
+__version__ = '0.22'
+
+import sys
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+sys.stdout=Unbuffered(sys.stdout)
+
+import struct
+import binascii
+
+class DrmException(Exception):
+ pass
+
+
+#
+# MobiBook Utility Routines
+#
+
+# Implementation of Pukall Cipher 1
+def PC1(key, src, decryption=True):
+ sum1 = 0;
+ sum2 = 0;
+ keyXorVal = 0;
+ if len(key)!=16:
+ print "Bad key length!"
+ return None
+ wkey = []
+ for i in xrange(8):
+ wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
+ dst = ""
+ for i in xrange(len(src)):
+ temp1 = 0;
+ byteXorVal = 0;
+ for j in xrange(8):
+ temp1 ^= wkey[j]
+ sum2 = (sum2+j)*20021 + sum1
+ sum1 = (temp1*346)&0xFFFF
+ sum2 = (sum2+sum1)&0xFFFF
+ temp1 = (temp1*20021+1)&0xFFFF
+ byteXorVal ^= temp1 ^ sum2
+ curByte = ord(src[i])
+ if not decryption:
+ keyXorVal = curByte * 257;
+ curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
+ if decryption:
+ keyXorVal = curByte * 257;
+ for j in xrange(8):
+ wkey[j] ^= keyXorVal;
+ dst+=chr(curByte)
+ return dst
+
+def checksumPid(s):
+ letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+ crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(letters)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += letters[pos%l]
+ crc >>= 8
+ return res
+
+def getSizeOfTrailingDataEntries(ptr, size, flags):
+ def getSizeOfTrailingDataEntry(ptr, size):
+ bitpos, result = 0, 0
+ if size <= 0:
+ return result
+ while True:
+ v = ord(ptr[size-1])
+ result |= (v & 0x7F) << bitpos
+ bitpos += 7
+ size -= 1
+ if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
+ return result
+ num = 0
+ testflags = flags >> 1
+ while testflags:
+ if testflags & 1:
+ num += getSizeOfTrailingDataEntry(ptr, size - num)
+ testflags >>= 1
+ # Check the low bit to see if there's multibyte data present.
+ # if multibyte data is included in the encryped data, we'll
+ # have already cleared this flag.
+ if flags & 1:
+ num += (ord(ptr[size - num - 1]) & 0x3) + 1
+ return num
+
+
+
+class MobiBook:
+ def loadSection(self, section):
+ if (section + 1 == self.num_sections):
+ endoff = len(self.data_file)
+ else:
+ endoff = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ return self.data_file[off:endoff]
+
+ def __init__(self, infile):
+ # initial sanity check on file
+ self.data_file = file(infile, 'rb').read()
+ self.header = self.data_file[0:78]
+ if self.header[0x3C:0x3C+8] != 'BOOKMOBI':
+ raise DrmException("invalid file format")
+
+ # build up section offset and flag info
+ self.num_sections, = struct.unpack('>H', self.header[76:78])
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+
+ # parse information from section 0
+ self.sect = self.loadSection(0)
+ self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
+ self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
+ self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
+ print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length)
+ self.extra_data_flags = 0
+ if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
+ self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
+ print "Extra Data Flags = %d" % self.extra_data_flags
+ if self.mobi_version < 7:
+ # multibyte utf8 data is included in the encryption for mobi_version 6 and below
+ # so clear that byte so that we leave it to be decrypted.
+ self.extra_data_flags &= 0xFFFE
+
+ # if exth region exists parse it for metadata array
+ self.meta_array = {}
+ exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
+ exth = ''
+ if exth_flag & 0x40:
+ exth = self.sect[16 + self.mobi_length:]
+ nitems, = struct.unpack('>I', exth[8:12])
+ pos = 12
+ for i in xrange(nitems):
+ type, size = struct.unpack('>II', exth[pos: pos + 8])
+ content = exth[pos + 8: pos + size]
+ self.meta_array[type] = content
+ pos += size
+
+ def getBookTitle(self):
+ title = ''
+ if 503 in self.meta_array:
+ title = self.meta_array[503]
+ else :
+ toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
+ tend = toff + tlen
+ title = self.sect[toff:tend]
+ if title == '':
+ title = self.header[:32]
+ title = title.split("\0")[0]
+ return title
+
+ def getPIDMetaInfo(self):
+ rec209 = None
+ token = None
+ if 209 in self.meta_array:
+ rec209 = self.meta_array[209]
+ data = rec209
+ # Parse the 209 data to find the the exth record with the token data.
+ # The last character of the 209 data points to the record with the token.
+ # Always 208 from my experience, but I'll leave the logic in case that changes.
+ for i in xrange(len(data)):
+ if ord(data[i]) != 0:
+ if self.meta_array[ord(data[i])] != None:
+ token = self.meta_array[ord(data[i])]
+ return rec209, token
+
+ def patch(self, off, new):
+ self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
+
+ def patchSection(self, section, new, in_off = 0):
+ if (section + 1 == self.num_sections):
+ endoff = len(self.data_file)
+ else:
+ endoff = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ assert off + in_off + len(new) <= endoff
+ self.patch(off + in_off, new)
+
+ def parseDRM(self, data, count, pidlist):
+ found_key = None
+ keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
+ for pid in pidlist:
+ bigpid = pid.ljust(16,'\0')
+ temp_key = PC1(keyvec1, bigpid, False)
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ found_key = None
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver and (flags & 0x1F) == 1:
+ found_key = finalkey
+ break
+ if found_key != None:
+ break
+ if not found_key:
+ # Then try the default encoding that doesn't require a PID
+ pid = "00000000"
+ temp_key = keyvec1
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver:
+ found_key = finalkey
+ break
+ return [found_key,pid]
+
+ def processBook(self, pidlist):
+ crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
+ if crypto_type == 0:
+ print "This book is not encrypted."
+ return self.data_file
+ if crypto_type == 1:
+ raise DrmException("Cannot decode Mobipocket encryption type 1")
+ if crypto_type != 2:
+ raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
+
+ goodpids = []
+ for pid in pidlist:
+ if len(pid)==10:
+ if checksumPid(pid[0:-2]) != pid:
+ print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
+ goodpids.append(pid[0:-2])
+ elif len(pid)==8:
+ goodpids.append(pid)
+
+ # calculate the keys
+ drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
+ if drm_count == 0:
+ raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
+ found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
+ if not found_key:
+ raise DrmException("No key found. Most likely the correct PID has not been given.")
+
+ if pid=="00000000":
+ print "File has default encryption, no specific PID."
+ else:
+ print "File is encoded with PID "+checksumPid(pid)+"."
+
+ # kill the drm keys
+ self.patchSection(0, "\0" * drm_size, drm_ptr)
+ # kill the drm pointers
+ self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
+ # clear the crypto type
+ self.patchSection(0, "\0" * 2, 0xC)
+
+ # decrypt sections
+ print "Decrypting. Please wait . . .",
+ new_data = self.data_file[:self.sections[1][0]]
+ for i in xrange(1, self.records+1):
+ data = self.loadSection(i)
+ extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
+ if i%100 == 0:
+ print ".",
+ # print "record %d, extra_size %d" %(i,extra_size)
+ new_data += PC1(found_key, data[0:len(data) - extra_size])
+ if extra_size > 0:
+ new_data += data[-extra_size:]
+ if self.num_sections > self.records+1:
+ new_data += self.data_file[self.sections[self.records+1][0]:]
+ self.data_file = new_data
+ print "done"
+ return self.data_file
+
+def getUnencryptedBook(infile,pid):
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ return book.processBook([pid])
+
+def getUnencryptedBookWithList(infile,pidlist):
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ return book.processBook(pidlist)
+
+def main(argv=sys.argv):
+ print ('MobiDeDrm v%(__version__)s. '
+ 'Copyright 2008-2010 The Dark Reverser.' % globals())
+ if len(argv)<4:
+ print "Removes protection from Mobipocket books"
+ print "Usage:"
+ print " %s <infile> <outfile> <Comma separated list of PIDs to try>" % sys.argv[0]
+ return 1
+ else:
+ infile = argv[1]
+ outfile = argv[2]
+ pidlist = argv[3].split(',')
+ try:
+ stripped_file = getUnencryptedBookWithList(infile, pidlist)
+ file(outfile, 'wb').write(stripped_file)
+ except DrmException, e:
+ print "Error: %s" % e
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
--- /dev/null
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+import csv
+import sys
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+ def __init__(self, flatxml, fontsize, ph, pw):
+ self.flatdoc = flatxml.split('\n')
+ self.fontsize = int(fontsize)
+ self.ph = int(ph) * 1.0
+ self.pw = int(pw) * 1.0
+
+ stags = {
+ 'paragraph' : 'p',
+ 'graphic' : '.graphic'
+ }
+
+ attr_val_map = {
+ 'hang' : 'text-indent: ',
+ 'indent' : 'text-indent: ',
+ 'line-space' : 'line-height: ',
+ 'margin-bottom' : 'margin-bottom: ',
+ 'margin-left' : 'margin-left: ',
+ 'margin-right' : 'margin-right: ',
+ 'margin-top' : 'margin-top: ',
+ 'space-after' : 'padding-bottom: ',
+ }
+
+ attr_str_map = {
+ 'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+ 'align-left' : 'text-align: left;',
+ 'align-right' : 'text-align: right;',
+ 'align-justify' : 'text-align: justify;',
+ 'display-inline' : 'display: inline;',
+ 'pos-left' : 'text-align: left;',
+ 'pos-right' : 'text-align: right;',
+ 'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+ }
+
+
+ # find tag if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ docList = self.flatdoc
+ cnt = len(docList)
+ if end == -1 :
+ end = cnt
+ else:
+ end = min(cnt,end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
+
+ def process(self):
+
+ classlst = ''
+ csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
+ csspage += '.cl-right { text-align: right; }\n'
+ csspage += '.cl-left { text-align: left; }\n'
+ csspage += '.cl-justify { text-align: justify; }\n'
+
+ # generate a list of each <style> starting point in the stylesheet
+ styleList= self.posinDoc('book.stylesheet.style')
+ stylecnt = len(styleList)
+ styleList.append(-1)
+
+ # process each style converting what you can
+
+ for j in xrange(stylecnt):
+ start = styleList[j]
+ end = styleList[j+1]
+
+ (pos, tag) = self.findinDoc('style._tag',start,end)
+ if tag == None :
+ (pos, tag) = self.findinDoc('style.type',start,end)
+
+ # Is this something we know how to convert to css
+ if tag in self.stags :
+
+ # get the style class
+ (pos, sclass) = self.findinDoc('style.class',start,end)
+ if sclass != None:
+ sclass = sclass.replace(' ','-')
+ sclass = '.cl-' + sclass.lower()
+ else :
+ sclass = ''
+
+ # check for any "after class" specifiers
+ (pos, aftclass) = self.findinDoc('style._after_class',start,end)
+ if aftclass != None:
+ aftclass = aftclass.replace(' ','-')
+ aftclass = '.cl-' + aftclass.lower()
+ else :
+ aftclass = ''
+
+ cssargs = {}
+
+ while True :
+
+ (pos1, attr) = self.findinDoc('style.rule.attr', start, end)
+ (pos2, val) = self.findinDoc('style.rule.value', start, end)
+
+ if attr == None : break
+
+ if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
+ # handle text based attributess
+ attr = attr + '-' + val
+ if attr in self.attr_str_map :
+ cssargs[attr] = (self.attr_str_map[attr], '')
+ else :
+ # handle value based attributes
+ if attr in self.attr_val_map :
+ name = self.attr_val_map[attr]
+ if attr in ('margin-bottom', 'margin-top', 'space-after') :
+ scale = self.ph
+ elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
+ scale = self.pw
+ elif attr == 'line-space':
+ scale = self.fontsize * 2.0
+
+ if not ((attr == 'hang') and (int(val) == 0)) :
+ pv = float(val)/scale
+ cssargs[attr] = (self.attr_val_map[attr], pv)
+ keep = True
+
+ start = max(pos1, pos2) + 1
+
+ # disable all of the after class tags until I figure out how to handle them
+ if aftclass != "" : keep = False
+
+ if keep :
+ # make sure line-space does not go below 100% or above 300% since
+ # it can be wacky in some styles
+ if 'line-space' in cssargs:
+ seg = cssargs['line-space'][0]
+ val = cssargs['line-space'][1]
+ if val < 1.0: val = 1.0
+ if val > 3.0: val = 3.0
+ del cssargs['line-space']
+ cssargs['line-space'] = (self.attr_val_map['line-space'], val)
+
+
+ # handle modifications for css style hanging indents
+ if 'hang' in cssargs:
+ hseg = cssargs['hang'][0]
+ hval = cssargs['hang'][1]
+ del cssargs['hang']
+ cssargs['hang'] = (self.attr_val_map['hang'], -hval)
+ mval = 0
+ mseg = 'margin-left: '
+ mval = hval
+ if 'margin-left' in cssargs:
+ mseg = cssargs['margin-left'][0]
+ mval = cssargs['margin-left'][1]
+ if mval < 0: mval = 0
+ mval = hval + mval
+ cssargs['margin-left'] = (mseg, mval)
+ if 'indent' in cssargs:
+ del cssargs['indent']
+
+ cssline = sclass + ' { '
+ for key in iter(cssargs):
+ mseg = cssargs[key][0]
+ mval = cssargs[key][1]
+ if mval == '':
+ cssline += mseg + ' '
+ else :
+ aseg = mseg + '%.1f%%;' % (mval * 100.0)
+ cssline += aseg + ' '
+
+ cssline += '}'
+
+ if sclass != '' :
+ classlst += sclass + '\n'
+
+ # handle special case of paragraph class used inside chapter heading
+ # and non-chapter headings
+ if sclass != '' :
+ ctype = sclass[4:7]
+ if ctype == 'ch1' :
+ csspage += 'h1' + cssline + '\n'
+ if ctype == 'ch2' :
+ csspage += 'h2' + cssline + '\n'
+ if ctype == 'ch3' :
+ csspage += 'h3' + cssline + '\n'
+ if ctype == 'h1-' :
+ csspage += 'h4' + cssline + '\n'
+ if ctype == 'h2-' :
+ csspage += 'h5' + cssline + '\n'
+ if ctype == 'h3_' :
+ csspage += 'h6' + cssline + '\n'
+
+ if cssline != ' { }':
+ csspage += self.stags[tag] + cssline + '\n'
+
+
+ return csspage, classlst
+
+
+
+def convert2CSS(flatxml, fontsize, ph, pw):
+
+ print ' ', 'Using font size:',fontsize
+ print ' ', 'Using page height:', ph
+ print ' ', 'Using page width:', pw
+
+ # create a document parser
+ dp = DocParser(flatxml, fontsize, ph, pw)
+
+ csspage = dp.process()
+
+ return csspage
--- /dev/null
+#!/usr/bin/env python
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+import os, csv, getopt
+import zlib, zipfile, tempfile, shutil
+from struct import pack
+from struct import unpack
+
+class TpzDRMError(Exception):
+ pass
+
+# local support routines
+import kgenpids
+import genbook
+#
+# Utility routines
+#
+
+# Get a 7 bit encoded number from file
+def bookReadEncodedNumber(fo):
+ flag = False
+ data = ord(fo.read(1))
+ if data == 0xFF:
+ flag = True
+ data = ord(fo.read(1))
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(fo.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from file
+def bookReadString(fo):
+ stringLength = bookReadEncodedNumber(fo)
+ return unpack(str(stringLength)+"s",fo.read(stringLength))[0]
+
+#
+# crypto routines
+#
+
+# Context initialisation for the Topaz Crypto
+def topazCryptoInit(key):
+ ctx1 = 0x0CAFFE19E
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ return [ctx1,ctx2]
+
+# decrypt data with the context prepared by topazCryptoInit()
+def topazCryptoDecrypt(data, ctx):
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+ plainText = ""
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+ return plainText
+
+# Decrypt data with the PID
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+# Try to decrypt a dkey record (contains the bookPID)
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise TpzDRMError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise TpzDRMError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise TpzDRMError("Record didn't contain PID")
+ return fields[4]
+
+# Decrypt all dkey records (contain the book PID)
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except TpzDRMError:
+ pass
+ data = data[1+length:]
+ if len(records) == 0:
+ raise TpzDRMError("BookKey Not Found")
+ return records
+
+
+class TopazBook:
+ def __init__(self, filename, outdir):
+ self.fo = file(filename, 'rb')
+ self.outdir = outdir
+ self.bookPayloadOffset = 0
+ self.bookHeaderRecords = {}
+ self.bookMetadata = {}
+ self.bookKey = None
+ magic = unpack("4s",self.fo.read(4))[0]
+ if magic != 'TPZ0':
+ raise TpzDRMError("Parse Error : Invalid Header, not a Topaz file")
+ self.parseTopazHeaders()
+ self.parseMetadata()
+
+ def parseTopazHeaders(self):
+ def bookReadHeaderRecordData():
+ # Read and return the data of one header record at the current book file position
+ # [[offset,decompressedLength,compressedLength],...]
+ nbValues = bookReadEncodedNumber(self.fo)
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
+ return values
+ def parseTopazHeaderRecord():
+ # Read and parse one header record at the current book file position and return the associated data
+ # [[offset,decompressedLength,compressedLength],...]
+ if ord(self.fo.read(1)) != 0x63:
+ raise TpzDRMError("Parse Error : Invalid Header")
+ tag = bookReadString(self.fo)
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+ nbRecords = bookReadEncodedNumber(self.fo)
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ # print result[0], result[1]
+ self.bookHeaderRecords[result[0]] = result[1]
+ if ord(self.fo.read(1)) != 0x64 :
+ raise TpzDRMError("Parse Error : Invalid Header")
+ self.bookPayloadOffset = self.fo.tell()
+
+ def parseMetadata(self):
+ # Parse the metadata record from the book payload and return a list of [key,values]
+ self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString(self.fo)
+ if tag != "metadata" :
+ raise TpzDRMError("Parse Error : Record Names Don't Match")
+ flags = ord(self.fo.read(1))
+ nbRecords = ord(self.fo.read(1))
+ for i in range (0,nbRecords) :
+ record = [bookReadString(self.fo), bookReadString(self.fo)]
+ self.bookMetadata[record[0]] = record[1]
+ return self.bookMetadata
+
+ def getPIDMetaInfo(self):
+ keysRecord = None
+ KeysRecordRecord = None
+ if 'keys' in self.bookMetadata:
+ keysRecord = self.bookMetadata['keys']
+ keysRecordRecord = self.bookMetadata[keysRecord]
+ return keysRecord, keysRecordRecord
+
+ def getBookTitle(self):
+ title = ''
+ if 'Title' in self.bookMetadata:
+ title = self.bookMetadata['Title']
+ return title
+
+ def setBookKey(self, key):
+ self.bookKey = key
+
+ def getBookPayloadRecord(self, name, index):
+ # Get a record in the book payload, given its name and index.
+ # decrypted and decompressed if necessary
+ encrypted = False
+ compressed = False
+ try:
+ recordOffset = self.bookHeaderRecords[name][index][0]
+ except:
+ raise TpzDRMError("Parse Error : Invalid Record, record not found")
+
+ self.fo.seek(self.bookPayloadOffset + recordOffset)
+
+ tag = bookReadString(self.fo)
+ if tag != name :
+ raise TpzDRMError("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber(self.fo)
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise TpzDRMError("Parse Error : Invalid Record, index doesn't match")
+
+ if (self.bookHeaderRecords[name][index][2] > 0):
+ compressed = True
+ record = self.fo.read(self.bookHeaderRecords[name][index][2])
+ else:
+ record = self.fo.read(self.bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ if self.bookKey:
+ ctx = topazCryptoInit(self.bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+ else :
+ raise TpzDRMError("Error: Attempt to decrypt without bookKey")
+
+ if compressed:
+ record = zlib.decompress(record)
+
+ return record
+
+ def processBook(self, pidlst):
+ raw = 0
+ fixedimage=True
+ try:
+ keydata = self.getBookPayloadRecord('dkey', 0)
+ except TpzDRMError, e:
+ print "no dkey record found, book may not be encrypted"
+ print "attempting to extrct files without a book key"
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ # try each pid to decode the file
+ bookKey = None
+ for pid in pidlst:
+ # use 8 digit pids here
+ pid = pid[0:8]
+ print "\nTrying: ", pid
+ bookKeys = []
+ data = keydata
+ try:
+ bookKeys+=decryptDkeyRecords(data,pid)
+ except TpzDRMError, e:
+ pass
+ else:
+ bookKey = bookKeys[0]
+ print "Book Key Found!"
+ break
+
+ if not bookKey:
+ raise TpzDRMError('Decryption Unsucessful; No valid pid found')
+
+ self.setBookKey(bookKey)
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ def createBookDirectory(self):
+ outdir = self.outdir
+ # create output directory structure
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ destdir = os.path.join(outdir,'img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'color_img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'page')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'glyphs')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ def extractFiles(self):
+ outdir = self.outdir
+ for headerRecord in self.bookHeaderRecords:
+ name = headerRecord
+ if name != "dkey" :
+ ext = '.dat'
+ if name == 'img' : ext = '.jpg'
+ if name == 'color' : ext = '.jpg'
+ print "\nProcessing Section: %s " % name
+ for index in range (0,len(self.bookHeaderRecords[name])) :
+ fnum = "%04d" % index
+ fname = name + fnum + ext
+ destdir = outdir
+ if name == 'img':
+ destdir = os.path.join(outdir,'img')
+ if name == 'color':
+ destdir = os.path.join(outdir,'color_img')
+ if name == 'page':
+ destdir = os.path.join(outdir,'page')
+ if name == 'glyphs':
+ destdir = os.path.join(outdir,'glyphs')
+ outputFile = os.path.join(destdir,fname)
+ print ".",
+ record = self.getBookPayloadRecord(name,index)
+ if record != '':
+ file(outputFile, 'wb').write(record)
+ print " "
+
+
+def zipUpDir(myzip, tempdir,localname):
+ currentdir = tempdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tempdir, localfilePath)
+
+
+def usage(progname):
+ print "Removes DRM protection from Topaz ebooks and extract the contents"
+ print "Usage:"
+ print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
+
+
+# Main
+def main(argv=sys.argv):
+ progname = os.path.basename(argv[0])
+ k4 = False
+ pids = []
+ serials = []
+ kInfoFiles = []
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
+ except getopt.GetoptError, err:
+ print str(err)
+ usage(progname)
+ return 1
+ if len(args)<2:
+ usage(progname)
+ return 1
+
+ for o, a in opts:
+ if o == "-k":
+ if a == None :
+ print "Invalid parameter for -k"
+ return 1
+ kInfoFiles.append(a)
+ if o == "-p":
+ if a == None :
+ print "Invalid parameter for -p"
+ return 1
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ print "Invalid parameter for -s"
+ return 1
+ serials = a.split(',')
+ k4 = True
+
+ infile = args[0]
+ outdir = args[1]
+
+ if not os.path.isfile(infile):
+ print "Input File Does Not Exist"
+ return 1
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+ tempdir = tempfile.mkdtemp()
+
+ tb = TopazBook(infile, tempdir)
+ title = tb.getBookTitle()
+ print "Processing Book: ", title
+ keysRecord, keysRecordRecord = tb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(keysRecord, keysRecordRecord, k4, pids, serials, kInfoFiles)
+
+ try:
+ tb.processBook(pidlst)
+ except TpzDRMError, e:
+ print str(e)
+ print " Creating DeBug Full Zip Archive of Book"
+ zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+ myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ zipUpDir(myzip, tempdir, '')
+ myzip.close()
+ return 1
+
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+ myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+ myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+ myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+ myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+ zipUpDir(myzip1, tempdir, 'img')
+ myzip1.close()
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(myzip2, tempdir, 'svg')
+ zipUpDir(myzip2, tempdir, 'img')
+ myzip2.close()
+
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(tempdir,'xml')
+ zipUpDir(myzip3, targetdir, '')
+ zipUpDir(myzip3, tempdir, 'img')
+ myzip3.close()
+
+ shutil.rmtree(tempdir)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
+
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
-# files reveals that a confusin has arisen because trailing data entries
+# files reveals that a confusion has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# Removed the disabled Calibre plug-in code
# Permit use of 8-digit PIDs
# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
-# 0.20 - Corretion: It seems that multibyte entries are encrypted in a v6 file.
+# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
+# 0.21 - Added support for multiple pids
-__version__ = '0.20'
+__version__ = '0.21'
import sys
import struct
assert off + in_off + len(new) <= endoff
self.patch(off + in_off, new)
- def parseDRM(self, data, count, pid):
- pid = pid.ljust(16,'\0')
+ def parseDRM(self, data, count, pidlist):
keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
- temp_key = PC1(keyvec1, pid, False)
- temp_key_sum = sum(map(ord,temp_key)) & 0xff
- found_key = None
- for i in xrange(count):
- verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
- found_key = finalkey
+ for pid in pidlist:
+ bigpid = pid.ljust(16,'\0')
+ temp_key = PC1(keyvec1, bigpid, False)
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ found_key = None
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver and (flags & 0x1F) == 1:
+ found_key = finalkey
+ break
+ if found_key != None:
break
if not found_key:
# Then try the default encoding that doesn't require a PID
+ pid = "00000000"
temp_key = keyvec1
temp_key_sum = sum(map(ord,temp_key)) & 0xff
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum:
- found_key = finalkey
- break
- return found_key
-
- def __init__(self, data_file, pid):
- if len(pid)==10:
- if checksumPid(pid[0:-2]) != pid:
- raise DrmException("invalid PID checksum")
- pid = pid[0:-2]
- elif len(pid)==8:
- print "PID without checksum given. With checksum PID is "+checksumPid(pid)
- else:
- raise DrmException("Invalid PID length")
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver:
+ found_key = finalkey
+ break
+ return [found_key,pid]
+ def __init__(self, data_file, pidlist):
+ # initial sanity check on file
self.data_file = data_file
header = data_file[0:72]
if header[0x3C:0x3C+8] != 'BOOKMOBI':
print "This book is not encrypted."
else:
if crypto_type == 1:
- raise DrmException("cannot decode Mobipocket encryption type 1")
+ raise DrmException("Cannot decode Mobipocket encryption type 1")
if crypto_type != 2:
- raise DrmException("unknown encryption type: %d" % crypto_type)
+ raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
+
+ goodpids = []
+ for pid in pidlist:
+ if len(pid)==10:
+ if checksumPid(pid[0:-2]) != pid:
+ print "PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
+ else:
+ goodpids.append(pid[0:-2])
+ elif len(pid)==8:
+ print "PID without checksum given. With checksum PID is "+checksumPid(pid)
+ goodpids.append(pid)
+
+ if len(goodpids) == 0:
+ raise DrmException("No valid PIDs supplied.")
# calculate the keys
drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
if drm_count == 0:
- raise DrmException("no PIDs found in this file")
- found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
+ raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
+ found_key, pid = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
if not found_key:
- raise DrmException("no key found. maybe the PID is incorrect")
+ raise DrmException("No key found. Most likely the correct PID has not been given.")
+
+ if pid=="00000000":
+ print "File has default encryption, no specific PID."
+ else:
+ print "File is encoded with PID "+checksumPid(pid)+"."
# kill the drm keys
self.patchSection(0, "\0" * drm_size, drm_ptr)
new_data += PC1(found_key, data[0:len(data) - extra_size])
if extra_size > 0:
new_data += data[-extra_size:]
- #self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
if self.num_sections > records+1:
new_data += self.data_file[self.sections[records+1][0]:]
self.data_file = new_data
def getUnencryptedBook(infile,pid):
sys.stdout=Unbuffered(sys.stdout)
data_file = file(infile, 'rb').read()
- strippedFile = DrmStripper(data_file, pid)
+ strippedFile = DrmStripper(data_file, [pid])
+ return strippedFile.getResult()
+
+def getUnencryptedBookWithList(infile,pidlist):
+ sys.stdout=Unbuffered(sys.stdout)
+ data_file = file(infile, 'rb').read()
+ strippedFile = DrmStripper(data_file, pidlist)
return strippedFile.getResult()
def main(argv=sys.argv):
if len(argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
- print " %s <infile> <outfile> <PID>" % sys.argv[0]
+ print " %s <infile> <outfile> <Comma separated list of PIDs to try>" % sys.argv[0]
return 1
else:
infile = argv[1]
outfile = argv[2]
- pid = argv[3]
+ pidlist = argv[3].split(',')
try:
- stripped_file = getUnencryptedBook(infile, pid)
+ stripped_file = getUnencryptedBook(infile, pidlist)
file(outfile, 'wb').write(stripped_file)
except DrmException, e:
print "Error: %s" % e
+++ /dev/null
-K4MobiDeDRM
-
-This tools combines functionality of MobiDeDRM with that of K4PCDeDRM, K4MDeDRM, and K4DeDRM. Effectively, it provides one-stop shopping for all your Mobipocket, Kindle for iPhone/iPad/iPodTouch, Kindle for PC, and Kindle for Mac needs.
-
-****
-Please Note: If you a happy user of MobiDeDRM, K4DeDRM, K4PCDeDRM, or K4MUnswindle, please continue to use these programs as there is no additional capability provided by this tool over the others. In the long run, if you have problems with any of those tools, you might want to try this one as it will continue under development eventually replacing all of those tools.
-****
-
-1. double-click on K4MobiDeDRM.pyw
-
-2. In the window that opens:
-hit the first '...' button to locate your DRM Kindle-style ebook
-
-3. Then hit the second '...' button to select an output directory for the unlocked file
-
-4. If you have multiple Kindle.Info files and would like to use one specific one, please hit the third "...' button to select it. Note, if you only have one Kindle.Info file (like most users) this can and should be left blank.
-
-5. . Then add in any PIDs you need from KindleV1, Kindle for iPhone/iPad/iPodTouch, or other single PID devices to the provided box as a comma separated list of 10 digit PID numbers.
-
-If this is a Kindle for Mac or a Kindle for PC book then you can leave this box blank
-
-6. hit the 'Start' button
-
-After a short delay, you should see progress in the Conversion Log window indicating is the unlocking was a success or failure.
-
+++ /dev/null
-#!/usr/bin/env python
-
-# engine to remove drm from Kindle for Mac and Kindle for PC books
-# for personal use for archiving and converting your ebooks
-
-# PLEASE DO NOT PIRATE EBOOKS!
-
-# We want all authors and publishers, and eBook stores to live
-# long and prosperous lives but at the same time we just want to
-# be able to read OUR books on whatever device we want and to keep
-# readable for a long, long time
-
-# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
-# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
-# and many many others
-
-# It can run standalone to convert K4M/K4PC/Mobi files, or it can be installed as a
-# plugin for Calibre (http://calibre-ebook.com/about) so that importing
-# K4 or Mobi with DRM is no londer a multi-step process.
-#
-# ***NOTE*** If you are using this script as a calibre plugin for a K4M or K4PC ebook
-# then calibre must be installed on the same machine and in the same account as K4PC or K4M
-# for the plugin version to function properly.
-#
-# To create a Calibre plugin, rename this file so that the filename
-# ends in '_plugin.py', put it into a ZIP file with all its supporting python routines
-# and import that ZIP into Calibre using its plugin configuration GUI.
-
-from __future__ import with_statement
-
-__version__ = '1.2'
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-import sys
-import os, csv, getopt
-import binascii
-import zlib
-import re
-from struct import pack, unpack, unpack_from
-
-
-#Exception Handling
-class DrmException(Exception):
- pass
-
-#
-# crypto digestroutines
-#
-
-import hashlib
-
-def MD5(message):
- ctx = hashlib.md5()
- ctx.update(message)
- return ctx.digest()
-
-def SHA1(message):
- ctx = hashlib.sha1()
- ctx.update(message)
- return ctx.digest()
-
-# determine if we are running as a calibre plugin
-if 'calibre' in sys.modules:
- inCalibre = True
- global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
-else:
- inCalibre = False
-
-#
-# start of Kindle specific routines
-#
-
-if not inCalibre:
- import mobidedrm
- if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
-
-global kindleDatabase
-
-# Encode the bytes in data with the characters in map
-def encode(data, map):
- result = ""
- for char in data:
- value = ord(char)
- Q = (value ^ 0x80) // len(map)
- R = value % len(map)
- result += map[Q]
- result += map[R]
- return result
-
-# Hash the bytes in data and then encode the digest with the characters in map
-def encodeHash(data,map):
- return encode(MD5(data),map)
-
-# Decode the string in data with the characters in map. Returns the decoded bytes
-def decode(data,map):
- result = ""
- for i in range (0,len(data)-1,2):
- high = map.find(data[i])
- low = map.find(data[i+1])
- if (high == -1) or (low == -1) :
- break
- value = (((high * len(map)) ^ 0x80) & 0xFF) + low
- result += pack("B",value)
- return result
-
-
-# Parse the Kindle.info file and return the records as a list of key-values
-def parseKindleInfo(kInfoFile):
- DB = {}
- infoReader = openKindleInfo(kInfoFile)
- infoReader.read(1)
- data = infoReader.read()
- if sys.platform.startswith('win'):
- items = data.split('{')
- else :
- items = data.split('[')
- for item in items:
- splito = item.split(':')
- DB[splito[0]] =splito[1]
- return DB
-
-# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
-def getKindleInfoValueForHash(hashedKey):
- global kindleDatabase
- encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
- if sys.platform.startswith('win'):
- return CryptUnprotectData(encryptedValue,"")
- else:
- cleartext = CryptUnprotectData(encryptedValue)
- return decode(cleartext, charMap1)
-
-# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
-def getKindleInfoValueForKey(key):
- return getKindleInfoValueForHash(encodeHash(key,charMap2))
-
-# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
-def findNameForHash(hash):
- names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
- result = ""
- for name in names:
- if hash == encodeHash(name, charMap2):
- result = name
- break
- return result
-
-# Print all the records from the kindle.info file (option -i)
-def printKindleInfo():
- for record in kindleDatabase:
- name = findNameForHash(record)
- if name != "" :
- print (name)
- print ("--------------------------")
- else :
- print ("Unknown Record")
- print getKindleInfoValueForHash(record)
- print "\n"
-
-#
-# PID generation routines
-#
-
-# Returns two bit at offset from a bit field
-def getTwoBitsFromBitField(bitField,offset):
- byteNumber = offset // 4
- bitPosition = 6 - 2*(offset % 4)
- return ord(bitField[byteNumber]) >> bitPosition & 3
-
-# Returns the six bits at offset from a bit field
-def getSixBitsFromBitField(bitField,offset):
- offset *= 3
- value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
- return value
-
-# 8 bits to six bits encoding from hash to generate PID string
-def encodePID(hash):
- global charMap3
- PID = ""
- for position in range (0,8):
- PID += charMap3[getSixBitsFromBitField(hash,position)]
- return PID
-
-# Encryption table used to generate the device PID
-def generatePidEncryptionTable() :
- table = []
- for counter1 in range (0,0x100):
- value = counter1
- for counter2 in range (0,8):
- if (value & 1 == 0) :
- value = value >> 1
- else :
- value = value >> 1
- value = value ^ 0xEDB88320
- table.append(value)
- return table
-
-# Seed value used to generate the device PID
-def generatePidSeed(table,dsn) :
- value = 0
- for counter in range (0,4) :
- index = (ord(dsn[counter]) ^ value) &0xFF
- value = (value >> 8) ^ table[index]
- return value
-
-# Generate the device PID
-def generateDevicePID(table,dsn,nbRoll):
- seed = generatePidSeed(table,dsn)
- pidAscii = ""
- pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
- index = 0
- for counter in range (0,nbRoll):
- pid[index] = pid[index] ^ ord(dsn[counter])
- index = (index+1) %8
- for counter in range (0,8):
- index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
- pidAscii += charMap4[index]
- return pidAscii
-
-# convert from 8 digit PID to 10 digit PID with checksum
-def checksumPid(s):
- letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
- crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
- crc = crc ^ (crc >> 16)
- res = s
- l = len(letters)
- for i in (0,1):
- b = crc & 0xff
- pos = (b // l) ^ (b % l)
- res += letters[pos%l]
- crc >>= 8
- return res
-
-
-class MobiPeek:
- def loadSection(self, section):
- before, after = self.sections[section:section+2]
- self.f.seek(before)
- return self.f.read(after - before)
- def __init__(self, filename):
- self.f = file(filename, 'rb')
- self.header = self.f.read(78)
- self.ident = self.header[0x3C:0x3C+8]
- if self.ident != 'BOOKMOBI' and self.ident != 'TEXtREAd':
- raise DrmException('invalid file format')
- self.num_sections, = unpack_from('>H', self.header, 76)
- sections = self.f.read(self.num_sections*8)
- self.sections = unpack_from('>%dL' % (self.num_sections*2), sections, 0)[::2] + (0xfffffff, )
- self.sect0 = self.loadSection(0)
- self.f.close()
- def getBookTitle(self):
- # get book title
- toff, tlen = unpack('>II', self.sect0[0x54:0x5c])
- tend = toff + tlen
- title = self.sect0[toff:tend]
- return title
- def getexthData(self):
- # if exth region exists then grab it
- # get length of this header
- length, type, codepage, unique_id, version = unpack('>LLLLL', self.sect0[20:40])
- exth_flag, = unpack('>L', self.sect0[0x80:0x84])
- exth = ''
- if exth_flag & 0x40:
- exth = self.sect0[16 + length:]
- return exth
- def isNotEncrypted(self):
- lock_type, = unpack('>H', self.sect0[0xC:0xC+2])
- if lock_type == 0:
- return True
- return False
-
-# DiapDealer's stuff: Parse the EXTH header records and parse the Kindleinfo
-# file to calculate the book pid.
-def getK4Pids(exth, title, kInfoFile=None):
- global kindleDatabase
- try:
- kindleDatabase = parseKindleInfo(kInfoFile)
- except Exception, message:
- print(message)
-
- if kindleDatabase != None :
- # Get the Mazama Random number
- MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
-
- # Get the HDD serial
- encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
-
- # Get the current user name
- encodedUsername = encodeHash(GetUserName(),charMap1)
-
- # concat, hash and encode to calculate the DSN
- DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
-
- print("\nDSN: " + DSN)
-
- # Compute the device PID (for which I can tell, is used for nothing).
- # But hey, stuff being printed out is apparently cool.
- table = generatePidEncryptionTable()
- devicePID = generateDevicePID(table,DSN,4)
-
- print("Device PID: " + checksumPid(devicePID))
-
- # Compute book PID
- exth_records = {}
- nitems, = unpack('>I', exth[8:12])
- pos = 12
-
- exth_records[209] = None
- # Parse the exth records, storing data indexed by type
- for i in xrange(nitems):
- type, size = unpack('>II', exth[pos: pos + 8])
- content = exth[pos + 8: pos + size]
-
- exth_records[type] = content
- pos += size
-
- # Grab the contents of the type 209 exth record
- if exth_records[209] != None:
- data = exth_records[209]
- else:
- raise DrmException("\nNo EXTH record type 209 - Perhaps not a K4 file?")
-
- # Parse the 209 data to find the the exth record with the token data.
- # The last character of the 209 data points to the record with the token.
- # Always 208 from my experience, but I'll leave the logic in case that changes.
- for i in xrange(len(data)):
- if ord(data[i]) != 0:
- if exth_records[ord(data[i])] != None:
- token = exth_records[ord(data[i])]
-
- # Get the kindle account token
- kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
-
- print("Account Token: " + kindleAccountToken)
-
- pidHash = SHA1(DSN+kindleAccountToken+exth_records[209]+token)
-
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
-
- if exth_records[503] != None:
- print "Pid for " + exth_records[503] + ": " + bookPID
- else:
- print "Pid for " + title + ":" + bookPID
- return bookPID
-
- raise DrmException("\nCould not access K4 data - Perhaps K4 is not installed/configured?")
- return null
-
-def usage(progname):
- print "Removes DRM protection from K4PC, K4M, and Mobi ebooks"
- print "Usage:"
- print " %s [-k <kindle.info>] [-p <pidnums>] <infile> <outfile> " % progname
-
-#
-# Main
-#
-def main(argv=sys.argv):
- global kindleDatabase
- import mobidedrm
-
- progname = os.path.basename(argv[0])
- kInfoFiles = []
- pidnums = ""
-
- print ('K4MobiDeDrm v%(__version__)s '
- 'provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc .' % globals())
-
- try:
- opts, args = getopt.getopt(sys.argv[1:], "k:p:")
- except getopt.GetoptError, err:
- print str(err)
- usage(progname)
- sys.exit(2)
-
- if len(args)<2:
- usage(progname)
- sys.exit(2)
-
- for o, a in opts:
- if o == "-k":
- if a == None :
- raise DrmException("Invalid parameter for -k")
- kInfoFiles.append(a)
- if o == "-p":
- if a == None :
- raise DrmException("Invalid parameter for -p")
- pidnums = a
-
- kindleDatabase = None
- infile = args[0]
- outfile = args[1]
- DecodeErrorString = ""
- try:
- # first try with K4PC/K4M
- ex = MobiPeek(infile)
- if ex.isNotEncrypted():
- print "File was Not Encrypted"
- return 2
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title)
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except DrmException, e:
- DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
- pass
- except mobidedrm.DrmException, e:
- DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # now try alternate kindle.info files
- if kInfoFiles:
- for infoFile in kInfoFiles:
- kindleDatabase = None
- try:
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title, infoFile)
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except DrmException, e:
- DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
- pass
- except mobidedrm.DrmException, e:
- DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # Lastly, try from the pid list
- pids = pidnums.split(',')
- for pid in pids:
- try:
- print 'Trying: "'+ pid + '"'
- unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
- except mobidedrm.DrmException:
- pass
- else:
- file(outfile, 'wb').write(unlocked_file)
- return 0
-
- # we could not unencrypt book
- print DecodeErrorString
- print "Error: Could Not Unencrypt Book"
- return 1
-
-
-if __name__ == '__main__':
- sys.stdout=Unbuffered(sys.stdout)
- sys.exit(main())
-
-
-if not __name__ == "__main__" and inCalibre:
- from calibre.customize import FileTypePlugin
-
- class K4DeDRM(FileTypePlugin):
- name = 'K4PC, K4Mac, Mobi DeDRM' # Name of the plugin
- description = 'Removes DRM from K4PC, K4Mac, and Mobi files. \
- Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
- supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
- author = 'DiapDealer, SomeUpdates' # The author of this plugin
- version = (0, 1, 3) # The version number of this plugin
- file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
- on_import = True # Run this plugin during the import
- priority = 200 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
-
- def run(self, path_to_ebook):
- from calibre.gui2 import is_ok_to_use_qt
- from PyQt4.Qt import QMessageBox
-
- # Head Topaz files off at the pass and warn the user that they will NOT
- # be decrypted. Changes the file extension from .azw or .prc to .tpz so
- # Calibre can at least read the metadata properly and the user can find
- # them by sorting on 'format'.
- with open(path_to_ebook, 'rb') as f:
- raw = f.read()
- if raw.startswith('TPZ'):
- tf = self.temporary_file('.tpz')
- if is_ok_to_use_qt():
- d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "%s is a Topaz book. It will NOT be decrypted!" % path_to_ebook)
- d.show()
- d.raise_()
- d.exec_()
- tf.write(raw)
- tf.close
- return tf.name
-
- global kindleDatabase
- global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('win'):
- from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- if sys.platform.startswith('darwin'):
- from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
- import mobidedrm
-
- # Get supplied list of PIDs to try from plugin customization.
- pidnums = self.site_customization
-
- # Load any kindle info files (*.info) included Calibre's config directory.
- kInfoFiles = []
- try:
- # Find Calibre's configuration directory.
- confpath = os.path.split(os.path.split(self.plugin_path)[0])[0]
- print 'K4MobiDeDRM: Calibre configuration directory = %s' % confpath
- files = os.listdir(confpath)
- filefilter = re.compile("\.info$", re.IGNORECASE)
- files = filter(filefilter.search, files)
-
- if files:
- for filename in files:
- fpath = os.path.join(confpath, filename)
- kInfoFiles.append(fpath)
- print 'K4MobiDeDRM: Kindle info file %s found in config folder.' % filename
- except IOError:
- print 'K4MobiDeDRM: Error reading kindle info files from config directory.'
- pass
-
- # first try with book specifc pid from K4PC or K4M
- try:
- kindleDatabase = None
- ex = MobiPeek(path_to_ebook)
- if ex.isNotEncrypted():
- return path_to_ebook
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title)
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
- except DrmException:
- pass
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- # Now try alternate kindle info files
- if kInfoFiles:
- for infoFile in kInfoFiles:
- kindleDatabase = None
- try:
- title = ex.getBookTitle()
- exth = ex.getexthData()
- if exth=='':
- raise DrmException("Not a Kindle Mobipocket file")
- pid = getK4Pids(exth, title, infoFile)
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
- except DrmException:
- pass
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- # now try from the pid list
- pids = pidnums.split(',')
- for pid in pids:
- try:
- unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook, pid)
- except mobidedrm.DrmException:
- pass
- else:
- of = self.temporary_file('.mobi')
- of.write(unlocked_file)
- of.close()
- return of.name
-
- #if you reached here then no luck raise and exception
- if is_ok_to_use_qt():
- d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
- d.show()
- d.raise_()
- d.exec_()
- raise Exception("K4MobiDeDRM plugin could not decode the file")
- return ""
-
- def customization_help(self, gui=False):
- return 'Enter each 10 character PID separated by a comma (no spaces).'
+++ /dev/null
-# K4PC Windows specific routines
-
-from __future__ import with_statement
-
-import sys, os
-
-from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
- create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
- string_at, Structure, c_void_p, cast
-
-import _winreg as winreg
-
-import traceback
-
-MAX_PATH = 255
-
-kernel32 = windll.kernel32
-advapi32 = windll.advapi32
-crypt32 = windll.crypt32
-
-
-#
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-#
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
-charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
-
-#
-# Exceptions for all the problems that might happen during the script
-#
-class DrmException(Exception):
- pass
-
-
-class DataBlob(Structure):
- _fields_ = [('cbData', c_uint),
- ('pbData', c_void_p)]
-DataBlob_p = POINTER(DataBlob)
-
-
-def GetSystemDirectory():
- GetSystemDirectoryW = kernel32.GetSystemDirectoryW
- GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
- GetSystemDirectoryW.restype = c_uint
- def GetSystemDirectory():
- buffer = create_unicode_buffer(MAX_PATH + 1)
- GetSystemDirectoryW(buffer, len(buffer))
- return buffer.value
- return GetSystemDirectory
-GetSystemDirectory = GetSystemDirectory()
-
-def GetVolumeSerialNumber():
- GetVolumeInformationW = kernel32.GetVolumeInformationW
- GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
- POINTER(c_uint), POINTER(c_uint),
- POINTER(c_uint), c_wchar_p, c_uint]
- GetVolumeInformationW.restype = c_uint
- def GetVolumeSerialNumber(path = GetSystemDirectory().split('\\')[0] + '\\'):
- vsn = c_uint(0)
- GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
- return str(vsn.value)
- return GetVolumeSerialNumber
-GetVolumeSerialNumber = GetVolumeSerialNumber()
-
-
-def GetUserName():
- GetUserNameW = advapi32.GetUserNameW
- GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
- GetUserNameW.restype = c_uint
- def GetUserName():
- buffer = create_unicode_buffer(32)
- size = c_uint(len(buffer))
- while not GetUserNameW(buffer, byref(size)):
- buffer = create_unicode_buffer(len(buffer) * 2)
- size.value = len(buffer)
- return buffer.value.encode('utf-16-le')[::2]
- return GetUserName
-GetUserName = GetUserName()
-
-
-def CryptUnprotectData():
- _CryptUnprotectData = crypt32.CryptUnprotectData
- _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
- c_void_p, c_void_p, c_uint, DataBlob_p]
- _CryptUnprotectData.restype = c_uint
- def CryptUnprotectData(indata, entropy):
- indatab = create_string_buffer(indata)
- indata = DataBlob(len(indata), cast(indatab, c_void_p))
- entropyb = create_string_buffer(entropy)
- entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
- outdata = DataBlob()
- if not _CryptUnprotectData(byref(indata), None, byref(entropy),
- None, None, 0, byref(outdata)):
- raise DrmException("Failed to Unprotect Data")
- return string_at(outdata.pbData, outdata.cbData)
- return CryptUnprotectData
-CryptUnprotectData = CryptUnprotectData()
-
-#
-# Locate and open the Kindle.info file.
-#
-def openKindleInfo(kInfoFile=None):
- if kInfoFile == None:
- regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
- path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
- return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r')
- else:
- return open(kInfoFile, 'r')
+++ /dev/null
-#!/usr/bin/python
-#
-# This is a python script. You need a Python interpreter to run it.
-# For example, ActiveState Python, which exists for windows.
-#
-# Changelog
-# 0.01 - Initial version
-# 0.02 - Huffdic compressed books were not properly decrypted
-# 0.03 - Wasn't checking MOBI header length
-# 0.04 - Wasn't sanity checking size of data record
-# 0.05 - It seems that the extra data flags take two bytes not four
-# 0.06 - And that low bit does mean something after all :-)
-# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
-# 0.08 - ...and also not in Mobi header version < 6
-# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
-# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
-# import filter it works when importing unencrypted files.
-# Also now handles encrypted files that don't need a specific PID.
-# 0.11 - use autoflushed stdout and proper return values
-# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
-# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
-# and extra blank lines, converted CR/LF pairs at ends of each line,
-# and other cosmetic fixes.
-# 0.14 - Working out when the extra data flags are present has been problematic
-# Versions 7 through 9 have tried to tweak the conditions, but have been
-# only partially successful. Closer examination of lots of sample
-# files reveals that a confusin has arisen because trailing data entries
-# are not encrypted, but it turns out that the multibyte entries
-# in utf8 file are encrypted. (Although neither kind gets compressed.)
-# This knowledge leads to a simplification of the test for the
-# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
-# 0.15 - Now outputs 'heartbeat', and is also quicker for long files.
-# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
-# 0.17 - added modifications to support its use as an imported python module
-# both inside calibre and also in other places (ie K4DeDRM tools)
-# 0.17a- disabled the standalone plugin feature since a plugin can not import
-# a plugin
-# 0.18 - It seems that multibyte entries aren't encrypted in a v7 file...
-# Removed the disabled Calibre plug-in code
-# Permit use of 8-digit PIDs
-# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
-# 0.20 - Corretion: It seems that multibyte entries are encrypted in a v6 file.
-
-__version__ = '0.20'
-
-import sys
-import struct
-import binascii
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-class DrmException(Exception):
- pass
-
-# Implementation of Pukall Cipher 1
-def PC1(key, src, decryption=True):
- sum1 = 0;
- sum2 = 0;
- keyXorVal = 0;
- if len(key)!=16:
- print "Bad key length!"
- return None
- wkey = []
- for i in xrange(8):
- wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
-
- dst = ""
- for i in xrange(len(src)):
- temp1 = 0;
- byteXorVal = 0;
- for j in xrange(8):
- temp1 ^= wkey[j]
- sum2 = (sum2+j)*20021 + sum1
- sum1 = (temp1*346)&0xFFFF
- sum2 = (sum2+sum1)&0xFFFF
- temp1 = (temp1*20021+1)&0xFFFF
- byteXorVal ^= temp1 ^ sum2
- curByte = ord(src[i])
- if not decryption:
- keyXorVal = curByte * 257;
- curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
- if decryption:
- keyXorVal = curByte * 257;
- for j in xrange(8):
- wkey[j] ^= keyXorVal;
- dst+=chr(curByte)
- return dst
-
-def checksumPid(s):
- letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
- crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
- crc = crc ^ (crc >> 16)
- res = s
- l = len(letters)
- for i in (0,1):
- b = crc & 0xff
- pos = (b // l) ^ (b % l)
- res += letters[pos%l]
- crc >>= 8
- return res
-
-def getSizeOfTrailingDataEntries(ptr, size, flags):
- def getSizeOfTrailingDataEntry(ptr, size):
- bitpos, result = 0, 0
- if size <= 0:
- return result
- while True:
- v = ord(ptr[size-1])
- result |= (v & 0x7F) << bitpos
- bitpos += 7
- size -= 1
- if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
- return result
- num = 0
- testflags = flags >> 1
- while testflags:
- if testflags & 1:
- num += getSizeOfTrailingDataEntry(ptr, size - num)
- testflags >>= 1
- # Check the low bit to see if there's multibyte data present.
- # if multibyte data is included in the encryped data, we'll
- # have already cleared this flag.
- if flags & 1:
- num += (ord(ptr[size - num - 1]) & 0x3) + 1
- return num
-
-class DrmStripper:
- def loadSection(self, section):
- if (section + 1 == self.num_sections):
- endoff = len(self.data_file)
- else:
- endoff = self.sections[section + 1][0]
- off = self.sections[section][0]
- return self.data_file[off:endoff]
-
- def patch(self, off, new):
- self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
-
- def patchSection(self, section, new, in_off = 0):
- if (section + 1 == self.num_sections):
- endoff = len(self.data_file)
- else:
- endoff = self.sections[section + 1][0]
- off = self.sections[section][0]
- assert off + in_off + len(new) <= endoff
- self.patch(off + in_off, new)
-
- def parseDRM(self, data, count, pid):
- pid = pid.ljust(16,'\0')
- keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
- temp_key = PC1(keyvec1, pid, False)
- temp_key_sum = sum(map(ord,temp_key)) & 0xff
- found_key = None
- for i in xrange(count):
- verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
- found_key = finalkey
- break
- if not found_key:
- # Then try the default encoding that doesn't require a PID
- temp_key = keyvec1
- temp_key_sum = sum(map(ord,temp_key)) & 0xff
- for i in xrange(count):
- verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
- cookie = PC1(temp_key, cookie)
- ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
- if verification == ver and cksum == temp_key_sum:
- found_key = finalkey
- break
- return found_key
-
- def __init__(self, data_file, pid):
- if len(pid)==10:
- if checksumPid(pid[0:-2]) != pid:
- raise DrmException("invalid PID checksum")
- pid = pid[0:-2]
- elif len(pid)==8:
- print "PID without checksum given. With checksum PID is "+checksumPid(pid)
- else:
- raise DrmException("Invalid PID length")
-
- self.data_file = data_file
- header = data_file[0:72]
- if header[0x3C:0x3C+8] != 'BOOKMOBI':
- raise DrmException("invalid file format")
- self.num_sections, = struct.unpack('>H', data_file[76:78])
-
- self.sections = []
- for i in xrange(self.num_sections):
- offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
- flags, val = a1, a2<<16|a3<<8|a4
- self.sections.append( (offset, flags, val) )
-
- sect = self.loadSection(0)
- records, = struct.unpack('>H', sect[0x8:0x8+2])
- mobi_length, = struct.unpack('>L',sect[0x14:0x18])
- mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
- extra_data_flags = 0
- print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
- if (mobi_length >= 0xE4) and (mobi_version >= 5):
- extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
- print "Extra Data Flags = %d" %extra_data_flags
- if mobi_version < 7:
- # multibyte utf8 data is included in the encryption for mobi_version 6 and below
- # so clear that byte so that we leave it to be decrypted.
- extra_data_flags &= 0xFFFE
-
- crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
- if crypto_type == 0:
- print "This book is not encrypted."
- else:
- if crypto_type == 1:
- raise DrmException("cannot decode Mobipocket encryption type 1")
- if crypto_type != 2:
- raise DrmException("unknown encryption type: %d" % crypto_type)
-
- # calculate the keys
- drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
- if drm_count == 0:
- raise DrmException("no PIDs found in this file")
- found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
- if not found_key:
- raise DrmException("no key found. maybe the PID is incorrect")
-
- # kill the drm keys
- self.patchSection(0, "\0" * drm_size, drm_ptr)
- # kill the drm pointers
- self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
- # clear the crypto type
- self.patchSection(0, "\0" * 2, 0xC)
-
- # decrypt sections
- print "Decrypting. Please wait . . .",
- new_data = self.data_file[:self.sections[1][0]]
- for i in xrange(1, records+1):
- data = self.loadSection(i)
- extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
- if i%100 == 0:
- print ".",
- # print "record %d, extra_size %d" %(i,extra_size)
- new_data += PC1(found_key, data[0:len(data) - extra_size])
- if extra_size > 0:
- new_data += data[-extra_size:]
- #self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
- if self.num_sections > records+1:
- new_data += self.data_file[self.sections[records+1][0]:]
- self.data_file = new_data
- print "done"
-
- def getResult(self):
- return self.data_file
-
-def getUnencryptedBook(infile,pid):
- sys.stdout=Unbuffered(sys.stdout)
- data_file = file(infile, 'rb').read()
- strippedFile = DrmStripper(data_file, pid)
- return strippedFile.getResult()
-
-def main(argv=sys.argv):
- sys.stdout=Unbuffered(sys.stdout)
- print ('MobiDeDrm v%(__version__)s. '
- 'Copyright 2008-2010 The Dark Reverser.' % globals())
- if len(argv)<4:
- print "Removes protection from Mobipocket books"
- print "Usage:"
- print " %s <infile> <outfile> <PID>" % sys.argv[0]
- return 1
- else:
- infile = argv[1]
- outfile = argv[2]
- pid = argv[3]
- try:
- stripped_file = getUnencryptedBook(infile, pid)
- file(outfile, 'wb').write(stripped_file)
- except DrmException, e:
- print "Error: %s" % e
- return 1
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
print "Usage: kindlepid.py <Kindle Serial Number>/<iPhone/iPod Touch UDID>"
return 1
if len(serial)==16:
- if serial.startswith("B001"):
- print "Kindle 1 serial number detected"
- elif serial.startswith("B002"):
- print "Kindle 2 serial number detected"
- elif serial.startswith("B003"):
- print "Kindle 2 Global serial number detected"
- elif serial.startswith("B004"):
- print "Kindle DX serial number detected"
- elif serial.startswith("B005"):
- print "Kindle DX International serial number detected"
+ if serial.startswith("B00"):
+ print "Kindle serial number detected"
else:
print "Warning: unrecognized serial number. Please recheck input."
return 1
+++ /dev/null
-#!/usr/bin/env python
-
-# This is a simple tool to identify all Amazon Topaz ebooks in a specific directory.
-# There always seems to be confusion since Topaz books downloaded to K4PC/Mac can have
-# almost any extension (.azw, .azw1, .prc, tpz). While the .azw1 and .tpz extensions
-# are fairly easy to indentify, the others are not (without opening the files in an editor).
-
-# To run the tool with the GUI frontend, just double-click on the 'FindTopazFiles.pyw' file
-# and select the folder where all of the ebooks in question are located. Then click 'Search'.
-# The program will list the file names of the ebooks that are indentified as being Topaz.
-# You can then isolate those books and use the Topaz tools to decrypt and convert them.
-
-# You can also run the script from a command line... supplying the folder to search
-# as a parameter: python FindTopazEbooks.pyw "C:\My Folder" (change appropriately for
-# your particular O.S.)
-
-# ** NOTE: This program does NOT decrypt or modify Topaz files in any way. It simply identifies them.
-
-# PLEASE DO NOT PIRATE EBOOKS!
-
-# We want all authors and publishers, and eBook stores to live
-# long and prosperous lives but at the same time we just want to
-# be able to read OUR books on whatever device we want and to keep
-# readable for a long, long time
-
-# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
-# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
-# and many many others
-
-# Revision history:
-# 1 - Initial release.
-
-from __future__ import with_statement
-
-__license__ = 'GPL v3'
-
-import sys
-import os
-import re
-import shutil
-import Tkinter
-import Tkconstants
-import tkFileDialog
-import tkMessageBox
-
-
-class ScrolledText(Tkinter.Text):
- def __init__(self, master=None, **kw):
- self.frame = Tkinter.Frame(master)
- self.vbar = Tkinter.Scrollbar(self.frame)
- self.vbar.pack(side=Tkconstants.RIGHT, fill=Tkconstants.Y)
- kw.update({'yscrollcommand': self.vbar.set})
- Tkinter.Text.__init__(self, self.frame, **kw)
- self.pack(side=Tkconstants.LEFT, fill=Tkconstants.BOTH, expand=True)
- self.vbar['command'] = self.yview
- # Copy geometry methods of self.frame without overriding Text
- # methods = hack!
- text_meths = vars(Tkinter.Text).keys()
- methods = vars(Tkinter.Pack).keys() + vars(Tkinter.Grid).keys() + vars(Tkinter.Place).keys()
- methods = set(methods).difference(text_meths)
- for m in methods:
- if m[0] != '_' and m != 'config' and m != 'configure':
- setattr(self, m, getattr(self.frame, m))
-
- def __str__(self):
- return str(self.frame)
-
-
-def cli_main(argv=sys.argv, obj=None):
- progname = os.path.basename(argv[0])
- if len(argv) != 2:
- print "usage: %s DIRECTORY" % (progname,)
- return 1
-
- if obj == None:
- print "\nTopaz search results:\n"
- else:
- obj.stext.insert(Tkconstants.END,"Topaz search results:\n\n")
-
- inpath = argv[1]
- files = os.listdir(inpath)
- filefilter = re.compile("(\.azw$)|(\.azw1$)|(\.prc$)|(\.tpz$)", re.IGNORECASE)
- files = filter(filefilter.search, files)
-
- if files:
- topazcount = 0
- totalcount = 0
- for filename in files:
- with open(os.path.join(inpath, filename), 'rb') as f:
- try:
- if f.read().startswith('TPZ'):
- f.close()
- basename, extension = os.path.splitext(filename)
- if obj == None:
- print " %s is a Topaz formatted ebook." % filename
- """
- if extension == '.azw' or extension == '.prc':
- print " renaming to %s" % (basename + '.tpz')
- shutil.move(os.path.join(inpath, filename),
- os.path.join(inpath, basename + '.tpz'))
- """
- else:
- msg1 = " %s is a Topaz formatted ebook.\n" % filename
- obj.stext.insert(Tkconstants.END,msg1)
- """
- if extension == '.azw' or extension == '.prc':
- msg2 = " renaming to %s\n" % (basename + '.tpz')
- obj.stext.insert(Tkconstants.END,msg2)
- shutil.move(os.path.join(inpath, filename),
- os.path.join(inpath, basename + '.tpz'))
- """
- topazcount += 1
- except:
- if obj == None:
- print " Error reading %s." % filename
- else:
- msg = " Error reading or %s.\n" % filename
- obj.stext.insert(Tkconstants.END,msg)
- pass
- totalcount += 1
- if topazcount == 0:
- if obj == None:
- print "\nNo Topaz books found in %s." % inpath
- else:
- msg = "\nNo Topaz books found in %s.\n\n" % inpath
- obj.stext.insert(Tkconstants.END,msg)
- else:
- if obj == None:
- print "\n%i Topaz books found in %s\n%i total books checked.\n" % (topazcount, inpath, totalcount)
- else:
- msg = "\n%i Topaz books found in %s\n%i total books checked.\n\n" %(topazcount, inpath, totalcount)
- obj.stext.insert(Tkconstants.END,msg)
- else:
- if obj == None:
- print "No typical Topaz file extensions found in %s.\n" % inpath
- else:
- msg = "No typical Topaz file extensions found in %s.\n\n" % inpath
- obj.stext.insert(Tkconstants.END,msg)
-
- return 0
-
-
-class DecryptionDialog(Tkinter.Frame):
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root, border=5)
- ltext='Search a directory for Topaz eBooks\n'
- self.status = Tkinter.Label(self, text=ltext)
- self.status.pack(fill=Tkconstants.X, expand=1)
- body = Tkinter.Frame(self)
- body.pack(fill=Tkconstants.X, expand=1)
- sticky = Tkconstants.E + Tkconstants.W
- body.grid_columnconfigure(1, weight=2)
- Tkinter.Label(body, text='Directory to Search').grid(row=1)
- self.inpath = Tkinter.Entry(body, width=30)
- self.inpath.grid(row=1, column=1, sticky=sticky)
- button = Tkinter.Button(body, text="...", command=self.get_inpath)
- button.grid(row=1, column=2)
- msg1 = 'Topaz search results \n\n'
- self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE,
- height=15, width=60, wrap=Tkconstants.WORD)
- self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
- #self.stext.insert(Tkconstants.END,msg1)
- buttons = Tkinter.Frame(self)
- buttons.pack()
-
-
- self.botton = Tkinter.Button(
- buttons, text="Search", width=10, command=self.search)
- self.botton.pack(side=Tkconstants.LEFT)
- Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
- self.button = Tkinter.Button(
- buttons, text="Quit", width=10, command=self.quit)
- self.button.pack(side=Tkconstants.RIGHT)
-
- def get_inpath(self):
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- inpath = tkFileDialog.askdirectory(
- parent=None, title='Directory to search',
- initialdir=cwd, initialfile=None)
- if inpath:
- inpath = os.path.normpath(inpath)
- self.inpath.delete(0, Tkconstants.END)
- self.inpath.insert(0, inpath)
- return
-
-
- def search(self):
- inpath = self.inpath.get()
- if not inpath or not os.path.exists(inpath):
- self.status['text'] = 'Specified directory does not exist'
- return
- argv = [sys.argv[0], inpath]
- self.status['text'] = 'Searching...'
- self.botton.configure(state='disabled')
- cli_main(argv, self)
- self.status['text'] = 'Search a directory for Topaz files'
- self.botton.configure(state='normal')
-
- return
-
-
-def gui_main():
- root = Tkinter.Tk()
- root.title('Topaz eBook Finder')
- root.resizable(True, False)
- root.minsize(370, 0)
- DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
- root.mainloop()
- return 0
-
-
-if __name__ == '__main__':
- if len(sys.argv) > 1:
- sys.exit(cli_main())
- sys.exit(gui_main())
\ No newline at end of file
+++ /dev/null
-The Topaz Tools work for "Kindle for PC" books, "Kindle for Mac" books, original standalone Kindles that have never been updated to firmware 2.5 or later, and Kindle for iPhone/iPad/iPodTouch (where the PID is known).
-
-
-For Topaz:
-
-1. Make sure you have Python 2.X installed (32 bit) and properly set as part of your SYSTEM PATH environment variable (On Windows I recommend ActivePython. See their web pages for instructions on how to install and how to properly set your PATH). On Mac OSX 10.6 everything you need is already installed.
-
-2. Simply download the latest tools_vX.X.zip file (see the comments after the first post on this blog) and extract the entire archive. Do not move or rename anything after extracting the entire archive.
-
-3. move to tools\Topaz_Tools\
-
-4. double-click on TopazExtract.pyw
-
-Hit the first “…” button to select the Topaz book with DRM that you want to convert
-
-Hit the second “…” to select an entirely new directory to extract the many book pieces into
-
-And add info for your PID (or extra PIDs) if needed (should not be needed for Kindle For PC or Kindle for Mac). This field is useful if you have Kindle for iPad/iPhone/iPodTouch or an old Kindle V1 and know your device PID.
-
-Hit the Start button
-
-3. Next double-click on TopazFiles2SVG.pyw
-(use the “…” button to select the new directory you created from the previous step, and hit Start)
-
-4. Finally double-click on TopazFiles2HTML.pyw
-(use the “…” button to slect the new directory you created in step 2, and hit Start)
-
-5. After all of this you should have book.html inside the directory you created with its own image directory and css style sheet. This file is created from the ocr that is done by Amazon and stored in the Topaz file. All errors belong to Amazon.
-
-Inside of that same directory, you should have an svg directory which has an exact image of each page of the book. To see it, simply open the .xhtml page which has the embedded svg image in a good browser (Firefox, Safari, etc)
-
-If you run into any problems – and there can be problems because the format has not been completely reversed engineered, simply copy the entire contents of the Conversion Log window and paste them in a post here or on the Dark Reverser’s New Blog and I will find it and try to help
-
+++ /dev/null
-#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-import sys
-sys.path.append('lib')
-
-import os, os.path, urllib
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-import Tkinter
-import Tkconstants
-import tkFileDialog
-import tkMessageBox
-import subasyncio
-from subasyncio import Process
-from scrolltextwidget import ScrolledText
-
-class MainDialog(Tkinter.Frame):
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root, border=5)
- self.root = root
- self.interval = 2000
- self.p2 = None
- self.status = Tkinter.Label(self, text='Extract Contents of Topaz eBook to a Directory')
- self.status.pack(fill=Tkconstants.X, expand=1)
- body = Tkinter.Frame(self)
- body.pack(fill=Tkconstants.X, expand=1)
- sticky = Tkconstants.E + Tkconstants.W
- body.grid_columnconfigure(1, weight=2)
-
- Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
- self.tpzpath = Tkinter.Entry(body, width=50)
- self.tpzpath.grid(row=0, column=1, sticky=sticky)
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- self.tpzpath.insert(0, cwd)
- button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
- button.grid(row=0, column=2)
-
- Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
- self.outpath = Tkinter.Entry(body, width=50)
- self.outpath.grid(row=1, column=1, sticky=sticky)
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- self.outpath.insert(0, cwd)
- button = Tkinter.Button(body, text="...", command=self.get_outpath)
- button.grid(row=1, column=2)
-
- Tkinter.Label(body, text='Kindle.info file (optional)').grid(row=2, sticky=Tkconstants.E)
- self.altinfopath = Tkinter.Entry(body, width=50)
- self.altinfopath.grid(row=2, column=1, sticky=sticky)
- #cwd = os.getcwdu()
- #cwd = cwd.encode('utf-8')
- #self.altinfopath.insert(0, cwd)
- button = Tkinter.Button(body, text="...", command=self.get_altinfopath)
- button.grid(row=2, column=2)
-
- Tkinter.Label(body, text='First 8 char of PID (optional)').grid(row=3, sticky=Tkconstants.E)
- self.pidnum = Tkinter.StringVar()
- self.ccinfo = Tkinter.Entry(body, width=10, textvariable=self.pidnum)
- self.ccinfo.grid(row=3, column=1, sticky=sticky)
-
- msg1 = 'Conversion Log \n\n'
- self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
- self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
- self.stext.insert(Tkconstants.END,msg1)
-
- buttons = Tkinter.Frame(self)
- buttons.pack()
- self.sbotton = Tkinter.Button(
- buttons, text="Start", width=10, command=self.convertit)
- self.sbotton.pack(side=Tkconstants.LEFT)
-
- Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
- self.qbutton = Tkinter.Button(
- buttons, text="Quit", width=10, command=self.quitting)
- self.qbutton.pack(side=Tkconstants.RIGHT)
-
- # read from subprocess pipe without blocking
- # invoked every interval via the widget "after"
- # option being used, so need to reset it for the next time
- def processPipe(self):
- poll = self.p2.wait('nowait')
- if poll != None:
- text = self.p2.readerr()
- text += self.p2.read()
- msg = text + '\n\n' + 'Files successfully extracted\n'
- if poll != 0:
- msg = text + '\n\n' + 'Error: File Extraction Failed\n'
- self.showCmdOutput(msg)
- self.p2 = None
- self.sbotton.configure(state='normal')
- return
- text = self.p2.readerr()
- text += self.p2.read()
- self.showCmdOutput(text)
- # make sure we get invoked again by event loop after interval
- self.stext.after(self.interval,self.processPipe)
- return
-
- # post output from subprocess in scrolled text widget
- def showCmdOutput(self, msg):
- if msg and msg !='':
- msg = msg.encode('utf-8')
- self.stext.insert(Tkconstants.END,msg)
- self.stext.yview_pickplace(Tkconstants.END)
- return
-
- # run as a subprocess via pipes and collect stdout
- def topazrdr(self, infile, outdir, altinfopath, pidnum):
- # os.putenv('PYTHONUNBUFFERED', '1')
- pidoption = ''
- if pidnum and pidnum != '':
- pidoption = ' -p "' + pidnum + '" '
- infooption = ''
- if altinfopath and altinfopath != '':
- infooption = ' -k "' + altinfopath + '" '
- outoption = ' -o "' + outdir + '" '
- cmdline = 'python ./lib/cmbtc_dump.py -v -d ' + pidoption + infooption + outoption + '"' + infile + '"'
- if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\cmbtc_dump.py -v -d ' + pidoption + infooption + outoption + '"' + infile + '"'
- else :
- cmdline = 'lib\cmbtc_dump.py -v -d ' + pidoption + infooption + outoption + '"' + infile + '"'
-
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- return p2
-
-
- def get_tpzpath(self):
- tpzpath = tkFileDialog.askopenfilename(
- parent=None, title='Select Topaz File',
- defaultextension='.prc', filetypes=[('Topaz azw', '.azw'),('Topaz azw1', '.azw1'), ('Topaz prc', '.prc'),
- ('All Files', '.*')])
- if tpzpath:
- tpzpath = os.path.normpath(tpzpath)
- self.tpzpath.delete(0, Tkconstants.END)
- self.tpzpath.insert(0, tpzpath)
- return
-
- def get_outpath(self):
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- outpath = tkFileDialog.askdirectory(
- parent=None, title='Directory to Extract Files into',
- initialdir=cwd, initialfile=None)
- if outpath:
- outpath = os.path.normpath(outpath)
- self.outpath.delete(0, Tkconstants.END)
- self.outpath.insert(0, outpath)
- return
-
- def get_altinfopath(self):
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- altinfopath = tkFileDialog.askopenfilename(
- parent=None, title='Select kindle.info File',
- defaultextension='.prc', filetypes=[('Kindle Info', '.info'),
- ('All Files', '.*')],
- initialdir=cwd)
- if altinfopath:
- altinfopath = os.path.normpath(altinfopath)
- self.altinfopath.delete(0, Tkconstants.END)
- self.altinfopath.insert(0, altinfopath)
- return
-
- def quitting(self):
- # kill any still running subprocess
- if self.p2 != None:
- if (self.p2.wait('nowait') == None):
- self.p2.terminate()
- self.root.destroy()
-
- # actually ready to run the subprocess and get its output
- def convertit(self):
- # now disable the button to prevent multiple launches
- self.sbotton.configure(state='disabled')
- tpzpath = self.tpzpath.get()
- outpath = self.outpath.get()
- altinfopath = self.altinfopath.get()
- if not tpzpath or not os.path.exists(tpzpath):
- self.status['text'] = 'Specified Topaz eBook file does not exist'
- self.sbotton.configure(state='normal')
- return
- if not outpath:
- self.status['text'] = 'No output directory specified'
- self.sbotton.configure(state='normal')
- return
- if not os.path.exists(outpath):
- os.makedirs(outpath)
- if altinfopath and not os.path.exists(altinfopath):
- self.status['text'] = 'Specified kindle.info file does not exist'
- self.sbotton.configure(state='normal')
- return
- pidnum = self.pidnum.get()
- # if not pidnum or pidnum == '':
- # self.status['text'] = 'You have not entered a PID '
- # self.sbotton.configure(state='normal')
- # return
-
- log = 'Command = "python cmbtc_dump.py"\n'
- log += 'Topaz Path Path = "'+ tpzpath + '"\n'
- log += 'Output Directory = "' + outpath + '"\n'
- log += 'Kindle.info file = "' + altinfopath + '"\n'
- log += 'First 8 chars of PID = "' + pidnum + '"\n'
- log += '\n\n'
- log += 'Please Wait ...\n'
- log = log.encode('utf-8')
- self.stext.insert(Tkconstants.END,log)
- self.p2 = self.topazrdr(tpzpath, outpath, altinfopath, pidnum)
-
- # python does not seem to allow you to create
- # your own eventloop which every other gui does - strange
- # so need to use the widget "after" command to force
- # event loop to run non-gui events every interval
- self.stext.after(self.interval,self.processPipe)
- return
-
-
-def main(argv=None):
- root = Tkinter.Tk()
- root.title('Topaz eBook File Extraction')
- root.resizable(True, False)
- root.minsize(300, 0)
- MainDialog(root).pack(fill=Tkconstants.X, expand=1)
- root.mainloop()
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
+++ /dev/null
-#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-import sys
-sys.path.append('lib')
-
-import os, os.path, urllib
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-import Tkinter
-import Tkconstants
-import tkFileDialog
-import tkMessageBox
-import subasyncio
-from subasyncio import Process
-from scrolltextwidget import ScrolledText
-
-class MainDialog(Tkinter.Frame):
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root, border=5)
- self.root = root
- self.interval = 2000
- self.p2 = None
- self.status = Tkinter.Label(self, text='Convert Files From Topaz eBook to HTML')
- self.status.pack(fill=Tkconstants.X, expand=1)
- body = Tkinter.Frame(self)
- body.pack(fill=Tkconstants.X, expand=1)
- sticky = Tkconstants.E + Tkconstants.W
- body.grid_columnconfigure(1, weight=2)
-
- Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
- self.bookdir = Tkinter.Entry(body, width=50)
- self.bookdir.grid(row=0, column=1, sticky=sticky)
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- self.bookdir.insert(0, cwd)
- button = Tkinter.Button(body, text="...", command=self.get_bookdir)
- button.grid(row=0, column=2)
-
- msg1 = 'Conversion Log \n\n'
- self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
- self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
- self.stext.insert(Tkconstants.END,msg1)
-
- buttons = Tkinter.Frame(self)
- buttons.pack()
- self.sbotton = Tkinter.Button(
- buttons, text="Start", width=10, command=self.convertit)
- self.sbotton.pack(side=Tkconstants.LEFT)
-
- Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
- self.qbutton = Tkinter.Button(
- buttons, text="Quit", width=10, command=self.quitting)
- self.qbutton.pack(side=Tkconstants.RIGHT)
-
- # read from subprocess pipe without blocking
- # invoked every interval via the widget "after"
- # option being used, so need to reset it for the next time
- def processPipe(self):
- poll = self.p2.wait('nowait')
- if poll != None:
- text = self.p2.readerr()
- text += self.p2.read()
- msg = text + '\n\n' + 'book.html successfully created in ' + self.bookdir.get() + '\n'
- if poll != 0:
- msg = text + '\n\n' + 'Error: HTML conversion Failed\n'
- self.showCmdOutput(msg)
- self.p2 = None
- self.sbotton.configure(state='normal')
- return
- text = self.p2.readerr()
- text += self.p2.read()
- self.showCmdOutput(text)
- # make sure we get invoked again by event loop after interval
- self.stext.after(self.interval,self.processPipe)
- return
-
- # post output from subprocess in scrolled text widget
- def showCmdOutput(self, msg):
- if msg and msg !='':
- msg = msg.encode('utf-8')
- self.stext.insert(Tkconstants.END,msg)
- self.stext.yview_pickplace(Tkconstants.END)
- return
-
- # run as a subprocess via pipes and collect stdout
- def topazrdr(self, bookdir):
- # os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/genhtml.py "' + bookdir + '"'
- if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\genhtml.py "' + bookdir + '"'
- else :
- cmdline = 'lib\genhtml.py "' + bookdir + '"'
-
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- return p2
-
-
- def get_bookdir(self):
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- bookdir = tkFileDialog.askdirectory(
- parent=None, title='Select the Directory you Extracted Topaz Files into',
- initialdir=cwd, initialfile=None)
- if bookdir:
- bookdir = os.path.normpath(bookdir)
- self.bookdir.delete(0, Tkconstants.END)
- self.bookdir.insert(0, bookdir)
- return
-
- def quitting(self):
- # kill any still running subprocess
- if self.p2 != None:
- if (self.p2.wait('nowait') == None):
- self.p2.terminate()
- self.root.destroy()
-
- # actually ready to run the subprocess and get its output
- def convertit(self):
- # now disable the button to prevent multiple launches
- self.sbotton.configure(state='disabled')
- bookdir = self.bookdir.get()
- if not bookdir:
- self.status['text'] = 'No directory specified'
- self.sbotton.configure(state='normal')
- return
-
- log = 'Command = "python genhtml.py"\n'
- log += 'Book Directory = "' + bookdir + '"\n'
- log += '\n\n'
- log += 'Please Wait ...\n'
- log = log.encode('utf-8')
- self.stext.insert(Tkconstants.END,log)
- self.p2 = self.topazrdr(bookdir)
-
- # python does not seem to allow you to create
- # your own eventloop which every other gui does - strange
- # so need to use the widget "after" command to force
- # event loop to run non-gui events every interval
- self.stext.after(self.interval,self.processPipe)
- return
-
-
-def main(argv=None):
- root = Tkinter.Tk()
- root.title('Convert Topaz Files to SVG Files')
- root.resizable(True, False)
- root.minsize(300, 0)
- MainDialog(root).pack(fill=Tkconstants.X, expand=1)
- root.mainloop()
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
+++ /dev/null
-#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-import sys
-sys.path.append('lib')
-
-import os, os.path, urllib
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-import Tkinter
-import Tkconstants
-import tkFileDialog
-import tkMessageBox
-import subasyncio
-from subasyncio import Process
-from scrolltextwidget import ScrolledText
-
-class MainDialog(Tkinter.Frame):
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root, border=5)
- self.root = root
- self.interval = 2000
- self.p2 = None
- self.status = Tkinter.Label(self, text='Convert Files From Topaz eBook to SVG')
- self.status.pack(fill=Tkconstants.X, expand=1)
- body = Tkinter.Frame(self)
- body.pack(fill=Tkconstants.X, expand=1)
- sticky = Tkconstants.E + Tkconstants.W
- body.grid_columnconfigure(1, weight=2)
-
- Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
- self.bookdir = Tkinter.Entry(body, width=50)
- self.bookdir.grid(row=0, column=1, sticky=sticky)
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- self.bookdir.insert(0, cwd)
- button = Tkinter.Button(body, text="...", command=self.get_bookdir)
- button.grid(row=0, column=2)
-
- msg1 = 'Conversion Log \n\n'
- self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
- self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
- self.stext.insert(Tkconstants.END,msg1)
-
- buttons = Tkinter.Frame(self)
- buttons.pack()
- self.sbotton = Tkinter.Button(
- buttons, text="Start", width=10, command=self.convertit)
- self.sbotton.pack(side=Tkconstants.LEFT)
-
- Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
- self.qbutton = Tkinter.Button(
- buttons, text="Quit", width=10, command=self.quitting)
- self.qbutton.pack(side=Tkconstants.RIGHT)
-
- # read from subprocess pipe without blocking
- # invoked every interval via the widget "after"
- # option being used, so need to reset it for the next time
- def processPipe(self):
- poll = self.p2.wait('nowait')
- if poll != None:
- text = self.p2.readerr()
- text += self.p2.read()
- msg = text + '\n\n' + 'SVG embedded in XHTML files successfully created in the svg directory in ' + self.bookdir.get() + '\n'
- if poll != 0:
- msg = text + '\n\n' + 'Error: SVG conversion Failed\n'
- self.showCmdOutput(msg)
- self.p2 = None
- self.sbotton.configure(state='normal')
- return
- text = self.p2.readerr()
- text += self.p2.read()
- self.showCmdOutput(text)
- # make sure we get invoked again by event loop after interval
- self.stext.after(self.interval,self.processPipe)
- return
-
- # post output from subprocess in scrolled text widget
- def showCmdOutput(self, msg):
- if msg and msg !='':
- msg = msg.encode('utf-8')
- self.stext.insert(Tkconstants.END,msg)
- self.stext.yview_pickplace(Tkconstants.END)
- return
-
- # run as a subprocess via pipes and collect stdout
- def topazrdr(self, bookdir):
- # os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/gensvg.py "' + bookdir + '"'
- if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\gensvg.py "' + bookdir + '"'
- else :
- cmdline = 'lib\gensvg.py "' + bookdir + '"'
-
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- return p2
-
-
- def get_bookdir(self):
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- bookdir = tkFileDialog.askdirectory(
- parent=None, title='Select the Directory you Extracted Topaz Files into',
- initialdir=cwd, initialfile=None)
- if bookdir:
- bookdir = os.path.normpath(bookdir)
- self.bookdir.delete(0, Tkconstants.END)
- self.bookdir.insert(0, bookdir)
- return
-
- def quitting(self):
- # kill any still running subprocess
- if self.p2 != None:
- if (self.p2.wait('nowait') == None):
- self.p2.terminate()
- self.root.destroy()
-
- # actually ready to run the subprocess and get its output
- def convertit(self):
- # now disable the button to prevent multiple launches
- self.sbotton.configure(state='disabled')
- bookdir = self.bookdir.get()
- if not bookdir:
- self.status['text'] = 'No directory specified'
- self.sbotton.configure(state='normal')
- return
-
- log = 'Command = "python gensvg.py"\n'
- log += 'Book Directory = "' + bookdir + '"\n'
- log += '\n\n'
- log += 'Please Wait ...\n'
- log = log.encode('utf-8')
- self.stext.insert(Tkconstants.END,log)
- self.p2 = self.topazrdr(bookdir)
-
- # python does not seem to allow you to create
- # your own eventloop which every other gui does - strange
- # so need to use the widget "after" command to force
- # event loop to run non-gui events every interval
- self.stext.after(self.interval,self.processPipe)
- return
-
-
-def main(argv=None):
- root = Tkinter.Tk()
- root.title('Convert Topaz Files to SVG Files')
- root.resizable(True, False)
- root.minsize(300, 0)
- MainDialog(root).pack(fill=Tkconstants.X, expand=1)
- root.mainloop()
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
+++ /dev/null
-#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-import sys
-sys.path.append('lib')
-
-import os, os.path, urllib
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-import Tkinter
-import Tkconstants
-import tkFileDialog
-import tkMessageBox
-import subasyncio
-from subasyncio import Process
-from scrolltextwidget import ScrolledText
-
-class MainDialog(Tkinter.Frame):
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root, border=5)
- self.root = root
- self.interval = 2000
- self.p2 = None
- self.status = Tkinter.Label(self, text='Convert Files From Topaz eBook to XML')
- self.status.pack(fill=Tkconstants.X, expand=1)
- body = Tkinter.Frame(self)
- body.pack(fill=Tkconstants.X, expand=1)
- sticky = Tkconstants.E + Tkconstants.W
- body.grid_columnconfigure(1, weight=2)
-
- Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
- self.bookdir = Tkinter.Entry(body, width=50)
- self.bookdir.grid(row=0, column=1, sticky=sticky)
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- self.bookdir.insert(0, cwd)
- button = Tkinter.Button(body, text="...", command=self.get_bookdir)
- button.grid(row=0, column=2)
-
- msg1 = 'Conversion Log \n\n'
- self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
- self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
- self.stext.insert(Tkconstants.END,msg1)
-
- buttons = Tkinter.Frame(self)
- buttons.pack()
- self.sbotton = Tkinter.Button(
- buttons, text="Start", width=10, command=self.convertit)
- self.sbotton.pack(side=Tkconstants.LEFT)
-
- Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
- self.qbutton = Tkinter.Button(
- buttons, text="Quit", width=10, command=self.quitting)
- self.qbutton.pack(side=Tkconstants.RIGHT)
-
- # read from subprocess pipe without blocking
- # invoked every interval via the widget "after"
- # option being used, so need to reset it for the next time
- def processPipe(self):
- poll = self.p2.wait('nowait')
- if poll != None:
- text = self.p2.readerr()
- text += self.p2.read()
- msg = text + '\n\n' + 'XML files successfully created in the xml directory in ' + self.bookdir.get() + '\n'
- if poll != 0:
- msg = text + '\n\n' + 'Error: XML conversion Failed\n'
- self.showCmdOutput(msg)
- self.p2 = None
- self.sbotton.configure(state='normal')
- return
- text = self.p2.readerr()
- text += self.p2.read()
- self.showCmdOutput(text)
- # make sure we get invoked again by event loop after interval
- self.stext.after(self.interval,self.processPipe)
- return
-
- # post output from subprocess in scrolled text widget
- def showCmdOutput(self, msg):
- if msg and msg !='':
- msg = msg.encode('utf-8')
- self.stext.insert(Tkconstants.END,msg)
- self.stext.yview_pickplace(Tkconstants.END)
- return
-
- # run as a subprocess via pipes and collect stdout
- def topazrdr(self, bookdir):
- # os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/genxml.py "' + bookdir + '"'
- if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\genxml.py "' + bookdir + '"'
- else :
- cmdline = 'lib\genxml.py "' + bookdir + '"'
-
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- return p2
-
-
- def get_bookdir(self):
- cwd = os.getcwdu()
- cwd = cwd.encode('utf-8')
- bookdir = tkFileDialog.askdirectory(
- parent=None, title='Select the Directory you Extracted Topaz Files into',
- initialdir=cwd, initialfile=None)
- if bookdir:
- bookdir = os.path.normpath(bookdir)
- self.bookdir.delete(0, Tkconstants.END)
- self.bookdir.insert(0, bookdir)
- return
-
- def quitting(self):
- # kill any still running subprocess
- if self.p2 != None:
- if (self.p2.wait('nowait') == None):
- self.p2.terminate()
- self.root.destroy()
-
- # actually ready to run the subprocess and get its output
- def convertit(self):
- # now disable the button to prevent multiple launches
- self.sbotton.configure(state='disabled')
- bookdir = self.bookdir.get()
- if not bookdir:
- self.status['text'] = 'No directory specified'
- self.sbotton.configure(state='normal')
- return
-
- log = 'Command = "python genxml.py"\n'
- log += 'Book Directory = "' + bookdir + '"\n'
- log += '\n\n'
- log += 'Please Wait ...\n'
- log = log.encode('utf-8')
- self.stext.insert(Tkconstants.END,log)
- self.p2 = self.topazrdr(bookdir)
-
- # python does not seem to allow you to create
- # your own eventloop which every other gui does - strange
- # so need to use the widget "after" command to force
- # event loop to run non-gui events every interval
- self.stext.after(self.interval,self.processPipe)
- return
-
-
-def main(argv=None):
- root = Tkinter.Tk()
- root.title('Convert Topaz Files to XML Files')
- root.resizable(True, False)
- root.minsize(300, 0)
- MainDialog(root).pack(fill=Tkconstants.X, expand=1)
- root.mainloop()
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
+++ /dev/null
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-import csv
-import sys
-import os
-import getopt
-from struct import pack
-from struct import unpack
-
-#
-# Get a 7 bit encoded number from string
-#
-
-def readEncodedNumber(file):
- flag = False
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
-
- if data == 0xFF:
- flag = True
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
-
- if data >= 0x80:
- datax = (data & 0x7F)
- while data >= 0x80 :
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- datax = (datax <<7) + (data & 0x7F)
- data = datax
-
- if flag:
- data = -data
- return data
-
-#
-# Encode a number in 7 bit format
-#
-
-def encodeNumber(number):
- result = ""
- negative = False
- flag = 0
-
- if number < 0 :
- number = -number + 1
- negative = True
-
- while True:
- byte = number & 0x7F
- number = number >> 7
- byte += flag
- result += chr(byte)
- flag = 0x80
- if number == 0 :
- if (byte == 0xFF and negative == False) :
- result += chr(0x80)
- break
-
- if negative:
- result += chr(0xFF)
-
- return result[::-1]
-
-#
-# Get a length prefixed string from the file
-#
-def lengthPrefixString(data):
- return encodeNumber(len(data))+data
-
-def readString(file):
- stringLength = readEncodedNumber(file)
- if (stringLength == None):
- return None
- sv = file.read(stringLength)
- if (len(sv) != stringLength):
- return ""
- return unpack(str(stringLength)+"s",sv)[0]
-
-
-
-def getMetaArray(metaFile):
- # parse the meta file into a Python dictionary (associative array)
- result = {}
- fo = file(metaFile,'rb')
- size = readEncodedNumber(fo)
- for i in xrange(size):
- temp = readString(fo)
- result[temp] = readString(fo)
- fo.close()
- return result
-
-
-
-def getMetaData(metaFile):
- # parse the meta file
- result = ''
- fo = file(metaFile,'rb')
- size = readEncodedNumber(fo)
- for i in xrange(size):
- result += readString(fo) + '|'
- result += readString(fo) + '\n'
- fo.close()
- return result
+++ /dev/null
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-import sys
-sys.stdout=Unbuffered(sys.stdout)
-
-
-import os, getopt
-
-# local routines
-import convert2xml
-import flatxml2html
-import decode_meta
-import stylexml2css
-import getpagedim
-
-def usage():
- print 'Usage: '
- print ' '
- print ' genhtml.py [--fixed-image] unencryptedBookDir'
- print ' '
- print ' Options: '
- print ' --fixed-image : force translation of fixed regions into svg images '
- print ' '
-
-
-def main(argv):
- bookDir = ''
- fixedimage = False
-
- if len(argv) == 0:
- argv = sys.argv
-
- try:
- opts, args = getopt.getopt(argv[1:], "h:",["fixed-image"])
-
- except getopt.GetoptError, err:
- print str(err)
- usage()
- sys.exit(1)
-
- if len(opts) == 0 and len(args) == 0 :
- usage()
- sys.exit(1)
-
- for o, a in opts:
- if o =="-h":
- usage()
- sys.exit(0)
- if o =="--fixed-image":
- fixedimage = True
-
- bookDir = args[0]
-
- if not os.path.exists(bookDir) :
- print "Can not find directory with unencrypted book"
- sys.exit(1)
-
- dictFile = os.path.join(bookDir,'dict0000.dat')
-
- if not os.path.exists(dictFile) :
- print "Can not find dict0000.dat file"
- sys.exit(1)
-
- pageDir = os.path.join(bookDir,'page')
- if not os.path.exists(pageDir) :
- print "Can not find page directory in unencrypted book"
- sys.exit(1)
-
- imgDir = os.path.join(bookDir,'img')
- if not os.path.exists(imgDir) :
- print "Can not find image directory in unencrypted book"
- sys.exit(1)
-
- svgDir = os.path.join(bookDir,'svg')
- if not os.path.exists(svgDir) :
- print "Can not find svg directory in unencrypted book"
- print "please run gensvg.py before running genhtml.py"
- sys.exit(1)
-
- otherFile = os.path.join(bookDir,'other0000.dat')
- if not os.path.exists(otherFile) :
- print "Can not find other0000.dat in unencrypted book"
- sys.exit(1)
-
- metaFile = os.path.join(bookDir,'metadata0000.dat')
- if not os.path.exists(metaFile) :
- print "Can not find metadata0000.dat in unencrypted book"
- sys.exit(1)
-
- htmlFileName = "book.html"
- htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
- htmlstr += '<html>\n'
-
- filenames = os.listdir(pageDir)
- filenames = sorted(filenames)
-
- print 'Processing ... '
-
- htmlstr += '<head>\n'
- htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
-
- # process metadata and retrieve fontSize info
- print ' ', 'metadata0000.dat'
- fname = os.path.join(bookDir,'metadata0000.dat')
- xname = os.path.join(bookDir, 'metadata.txt')
- metastr = decode_meta.getMetaData(fname)
- file(xname, 'wb').write(metastr)
- meta_array = decode_meta.getMetaArray(fname)
-
- htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
- htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
- htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
-
- # get some scaling info from metadata to use while processing styles
- fontsize = '135'
- if 'fontSize' in meta_array:
- fontsize = meta_array['fontSize']
-
- # also get the size of a normal text page
- spage = '1'
- if 'firstTextPage' in meta_array:
- spage = meta_array['firstTextPage']
- pnum = int(spage)
-
- # get page height and width from first text page for use in stylesheet scaling
- pname = 'page%04d.dat' % (pnum + 1)
- fname = os.path.join(pageDir,pname)
- pargv=[]
- pargv.append('convert2xml.py')
- pargv.append('--flat-xml')
- pargv.append(dictFile)
- pargv.append(fname)
- flat_xml = convert2xml.main(pargv)
- (ph, pw) = getpagedim.getPageDim(flat_xml)
- if (ph == '-1') or (ph == '0') : ph = '11000'
- if (pw == '-1') or (pw == '0') : pw = '8500'
-
- # now build up the style sheet
- print ' ', 'other0000.dat'
- fname = os.path.join(bookDir,'other0000.dat')
- xname = os.path.join(bookDir, 'style.css')
- pargv=[]
- pargv.append('convert2xml.py')
- pargv.append('--flat-xml')
- pargv.append(dictFile)
- pargv.append(fname)
- xmlstr = convert2xml.main(pargv)
- cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize, ph, pw)
- file(xname, 'wb').write(cssstr)
- htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
- htmlstr += '</head>\n<body>\n'
-
- for filename in filenames:
- print ' ', filename
- fname = os.path.join(pageDir,filename)
- pargv=[]
- pargv.append('convert2xml.py')
- pargv.append('--flat-xml')
- pargv.append(dictFile)
- pargv.append(fname)
- flat_xml = convert2xml.main(pargv)
- htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, fixedimage)
-
- htmlstr += '</body>\n</html>\n'
-
- file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
- print 'Processing Complete'
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main(''))
-
-
+++ /dev/null
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-import sys
-sys.stdout=Unbuffered(sys.stdout)
-
-import os, getopt
-
-# local routines
-import convert2xml
-import decode_meta
-
-
-class GParser(object):
- def __init__(self, flatxml):
- self.flatdoc = flatxml.split('\n')
- self.dpi = 1440
- self.gh = self.getData('info.glyph.h')
- self.gw = self.getData('info.glyph.w')
- self.guse = self.getData('info.glyph.use')
- if self.guse :
- self.count = len(self.guse)
- else :
- self.count = 0
- self.gvtx = self.getData('info.glyph.vtx')
- self.glen = self.getData('info.glyph.len')
- self.gdpi = self.getData('info.glyph.dpi')
- self.vx = self.getData('info.vtx.x')
- self.vy = self.getData('info.vtx.y')
- self.vlen = self.getData('info.len.n')
- if self.vlen :
- self.glen.append(len(self.vlen))
- elif self.glen:
- self.glen.append(0)
- if self.vx :
- self.gvtx.append(len(self.vx))
- elif self.gvtx :
- self.gvtx.append(0)
-
- def getData(self, path):
- result = None
- cnt = len(self.flatdoc)
- for j in xrange(cnt):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name == path):
- result = argres
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
-
-
- def getGlyphDim(self, gly):
- maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
- maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
- return maxh, maxw
-
-
- def getPath(self, gly):
- path = ''
- if (gly < 0) or (gly >= self.count):
- return path
- tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
- ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
- p = 0
- for k in xrange(self.glen[gly], self.glen[gly+1]):
- if (p == 0):
- zx = tx[0:self.vlen[k]+1]
- zy = ty[0:self.vlen[k]+1]
- else:
- zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
- zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
- p += 1
- j = 0
- while ( j < len(zx) ):
- if (j == 0):
- # Start Position.
- path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
- elif (j <= len(zx)-3):
- # Cubic Bezier Curve
- path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
- j += 2
- elif (j == len(zx)-2):
- # Cubic Bezier Curve to Start Position
- path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
- j += 1
- elif (j == len(zx)-1):
- # Quadratic Bezier Curve to Start Position
- path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
-
- j += 1
- path += 'z'
- return path
-
-class PParser(object):
- def __init__(self, flatxml):
- self.flatdoc = flatxml.split('\n')
- self.temp = []
- foo = self.getData('page.h') or self.getData('book.h')
- self.ph = foo[0]
- foo = self.getData('page.w') or self.getData('book.w')
- self.pw = foo[0]
- self.gx = self.getData('info.glyph.x')
- self.gy = self.getData('info.glyph.y')
- self.gid = self.getData('info.glyph.glyphID')
-
- def getData(self, path):
- result = None
- cnt = len(self.flatdoc)
- for j in xrange(cnt):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
-
- def getDataTemp(self, path):
- result = None
- cnt = len(self.temp)
- for j in xrange(cnt):
- item = self.temp[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- self.temp.pop(j)
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
-
- def getImages(self):
- result = []
- self.temp = self.flatdoc
- while (self.getDataTemp('img') != None):
- h = self.getDataTemp('img.h')[0]
- w = self.getDataTemp('img.w')[0]
- x = self.getDataTemp('img.x')[0]
- y = self.getDataTemp('img.y')[0]
- src = self.getDataTemp('img.src')[0]
- result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
- return result
-
- def getGlyphs(self,glyfname):
- result = []
- if (self.gid != None) and (len(self.gid) > 0):
- glyphs = []
- for j in set(self.gid):
- glyphs.append(j)
- glyphs.sort()
- gfile = open(glyfname, 'r')
- j = 0
- while True :
- inp = gfile.readline()
- if (inp == ''):
- break
- id='id="gl%d"' % glyphs[j]
- if (inp.find(id) > 0):
- result.append(inp)
- j += 1
- if (j == len(glyphs)):
- break
- gfile.close()
- return result
-
-
-
-
-def usage():
- print 'Usage: '
- print ' '
- print ' gensvg.py [options] unencryptedBookDir'
- print ' '
- print ' -x : output browseable XHTML+SVG pages (default)'
- print ' -r : output raw SVG images'
-
-
-def main(argv):
- bookDir = ''
-
- if len(argv) == 0:
- argv = sys.argv
-
- try:
- opts, args = getopt.getopt(argv[1:], "xrh")
-
- except getopt.GetoptError, err:
- print str(err)
- usage()
- sys.exit(1)
-
- if len(opts) == 0 and len(args) == 0 :
- usage()
- sys.exit(1)
-
- raw = 0
- for o, a in opts:
- if o =="-h":
- usage()
- sys.exit(0)
- if o =="-x":
- raw = 0
- if o =="-r":
- raw = 1
-
- bookDir = args[0]
-
- if not os.path.exists(bookDir) :
- print "Can not find directory with unencrypted book"
- sys.exit(1)
-
- dictFile = os.path.join(bookDir,'dict0000.dat')
-
- if not os.path.exists(dictFile) :
- print "Can not find dict0000.dat file"
- sys.exit(1)
-
- pageDir = os.path.join(bookDir,'page')
- if not os.path.exists(pageDir) :
- print "Can not find page directory in unencrypted book"
- sys.exit(1)
-
- imgDir = os.path.join(bookDir,'img')
- if not os.path.exists(imgDir) :
- print "Can not find image directory in unencrypted book"
- sys.exit(1)
-
- glyphsDir = os.path.join(bookDir,'glyphs')
- if not os.path.exists(glyphsDir) :
- print "Can not find glyphs directory in unencrypted book"
- sys.exit(1)
-
- metaFile = os.path.join(bookDir,'metadata0000.dat')
- if not os.path.exists(metaFile) :
- print "Can not find metadata0000.dat in unencrypted book"
- sys.exit(1)
-
- svgDir = os.path.join(bookDir,'svg')
- if not os.path.exists(svgDir) :
- os.makedirs(svgDir)
-
-
- print 'Processing Meta Data ... '
-
- print ' ', 'metadata0000.dat'
- fname = os.path.join(bookDir,'metadata0000.dat')
- metadata = decode_meta.getMetaArray(fname)
-
- print 'Processing Glyphs ... '
-
- filenames = os.listdir(glyphsDir)
- filenames = sorted(filenames)
-
- glyfname = os.path.join(svgDir,'glyphs.svg')
- glyfile = open(glyfname, 'w')
- glyfile.write('<?xml version="1.0" standalone="no"?>\n')
- glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
- glyfile.write('<title>Glyphs for %s</title>\n' % metadata['Title'])
- glyfile.write('<defs>\n')
- counter = 0
- for filename in filenames:
- print ' ', filename
- fname = os.path.join(glyphsDir,filename)
- pargv=[]
- pargv.append('convert2xml.py')
- pargv.append('--flat-xml')
- pargv.append(dictFile)
- pargv.append(fname)
- flat_xml = convert2xml.main(pargv)
- gp = GParser(flat_xml)
- for i in xrange(0, gp.count):
- path = gp.getPath(i)
- maxh, maxw = gp.getGlyphDim(i)
- # glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
- glyfile.write('<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh ))
- counter += 1
- glyfile.write('</defs>\n')
- glyfile.write('</svg>\n')
- glyfile.close()
-
- print 'Processing Pages ... '
-
- # Books are at 1440 DPI. This is rendering at twice that size for
- # readability when rendering to the screen.
- scaledpi = 1440
- filenames = os.listdir(pageDir)
- filenames = sorted(filenames)
- counter = 0
- for filename in filenames:
- print ' ', filename
- fname = os.path.join(pageDir,filename)
- pargv=[]
- pargv.append('convert2xml.py')
- pargv.append('--flat-xml')
- pargv.append(dictFile)
- pargv.append(fname)
- flat_xml = convert2xml.main(pargv)
- pp = PParser(flat_xml)
- if (raw) :
- pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
- else :
- pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
-
- pfile.write('<?xml version="1.0" standalone="no"?>\n')
- if (raw):
- pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
- pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
- else:
- pfile.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n');
- pfile.write('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n');
- pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
- pfile.write('<script><![CDATA[\n');
- pfile.write('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n');
- pfile.write('var dpi=%d;\n' % scaledpi);
- if (counter) :
- pfile.write('var prevpage="page%04d.xhtml";\n' % (counter - 1))
- if (counter < len(filenames)-1) :
- pfile.write('var nextpage="page%04d.xhtml";\n' % (counter + 1))
- pfile.write('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
- pfile.write('function zoomin(){dpi=dpi*(2/3);setsize();}\n')
- pfile.write('function zoomout(){dpi=dpi*1.5;setsize();}\n')
- pfile.write('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
- pfile.write('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
- pfile.write('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
- pfile.write('var gt=gd();if(gt>0){dpi=gt;}\n')
- pfile.write('window.onload=setsize;\n')
- pfile.write(']]></script>\n')
- pfile.write('</head>\n')
- pfile.write('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
- pfile.write('<div style="white-space:nowrap;">\n')
- if (counter == 0) :
- pfile.write('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
- else:
- pfile.write('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
- pfile.write('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
-
- if (pp.gid != None):
- pfile.write('<defs>\n')
- gdefs = pp.getGlyphs(glyfname)
- for j in xrange(0,len(gdefs)):
- pfile.write(gdefs[j])
- pfile.write('</defs>\n')
- img = pp.getImages()
- if (img != None):
- for j in xrange(0,len(img)):
- pfile.write(img[j])
- if (pp.gid != None):
- for j in xrange(0,len(pp.gid)):
- pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
- if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
- pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n');
- if (raw) :
- pfile.write('</svg>')
- else :
- pfile.write('</svg></a>\n')
- if (counter == len(filenames) - 1) :
- pfile.write('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
- else :
- pfile.write('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
- pfile.write('</div>\n')
- pfile.write('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
- pfile.write('</body>\n')
- pfile.write('</html>\n')
- pfile.close()
- counter += 1
-
- print 'Processing Complete'
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main(''))
+++ /dev/null
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-import csv
-import sys
-import os
-import getopt
-from struct import pack
-from struct import unpack
-
-
-class DocParser(object):
- def __init__(self, flatxml):
- self.flatdoc = flatxml.split('\n')
-
-
- # find tag if within pos to end inclusive
- def findinDoc(self, tagpath, pos, end) :
- result = None
- docList = self.flatdoc
- cnt = len(docList)
- if end == -1 :
- end = cnt
- else:
- end = min(cnt,end)
- foundat = -1
- for j in xrange(pos, end):
- item = docList[j]
- if item.find('=') >= 0:
- (name, argres) = item.split('=')
- else :
- name = item
- argres = ''
- if name.endswith(tagpath) :
- result = argres
- foundat = j
- break
- return foundat, result
-
- def process(self):
- (pos, sph) = self.findinDoc('page.h',0,-1)
- (pos, spw) = self.findinDoc('page.w',0,-1)
- if (sph == None): sph = '-1'
- if (spw == None): spw = '-1'
- return sph, spw
-
-
-def getPageDim(flatxml):
- # create a document parser
- dp = DocParser(flatxml)
- (ph, pw) = dp.process()
- return ph, pw
+++ /dev/null
-# standlone set of Mac OSX specific routines needed for K4DeDRM
-
-from __future__ import with_statement
-
-import sys
-import os
-
-#Exception Handling
-class K4MDrmException(Exception):
- pass
-
-import signal
-import threading
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-
-# **heavily** chopped up and modfied version of asyncproc.py
-# to make it actually work on Windows as well as Mac/Linux
-# For the original see:
-# "http://www.lysator.liu.se/~bellman/download/"
-# author is "Thomas Bellman <bellman@lysator.liu.se>"
-# available under GPL version 3 or Later
-
-# create an asynchronous subprocess whose output can be collected in
-# a non-blocking manner
-
-# What a mess! Have to use threads just to get non-blocking io
-# in a cross-platform manner
-
-# luckily all thread use is hidden within this class
-
-class Process(object):
- def __init__(self, *params, **kwparams):
- if len(params) <= 3:
- kwparams.setdefault('stdin', subprocess.PIPE)
- if len(params) <= 4:
- kwparams.setdefault('stdout', subprocess.PIPE)
- if len(params) <= 5:
- kwparams.setdefault('stderr', subprocess.PIPE)
- self.__pending_input = []
- self.__collected_outdata = []
- self.__collected_errdata = []
- self.__exitstatus = None
- self.__lock = threading.Lock()
- self.__inputsem = threading.Semaphore(0)
- self.__quit = False
-
- self.__process = subprocess.Popen(*params, **kwparams)
-
- if self.__process.stdin:
- self.__stdin_thread = threading.Thread(
- name="stdin-thread",
- target=self.__feeder, args=(self.__pending_input,
- self.__process.stdin))
- self.__stdin_thread.setDaemon(True)
- self.__stdin_thread.start()
-
- if self.__process.stdout:
- self.__stdout_thread = threading.Thread(
- name="stdout-thread",
- target=self.__reader, args=(self.__collected_outdata,
- self.__process.stdout))
- self.__stdout_thread.setDaemon(True)
- self.__stdout_thread.start()
-
- if self.__process.stderr:
- self.__stderr_thread = threading.Thread(
- name="stderr-thread",
- target=self.__reader, args=(self.__collected_errdata,
- self.__process.stderr))
- self.__stderr_thread.setDaemon(True)
- self.__stderr_thread.start()
-
- def pid(self):
- return self.__process.pid
-
- def kill(self, signal):
- self.__process.send_signal(signal)
-
- # check on subprocess (pass in 'nowait') to act like poll
- def wait(self, flag):
- if flag.lower() == 'nowait':
- rc = self.__process.poll()
- else:
- rc = self.__process.wait()
- if rc != None:
- if self.__process.stdin:
- self.closeinput()
- if self.__process.stdout:
- self.__stdout_thread.join()
- if self.__process.stderr:
- self.__stderr_thread.join()
- return self.__process.returncode
-
- def terminate(self):
- if self.__process.stdin:
- self.closeinput()
- self.__process.terminate()
-
- # thread gets data from subprocess stdout
- def __reader(self, collector, source):
- while True:
- data = os.read(source.fileno(), 65536)
- self.__lock.acquire()
- collector.append(data)
- self.__lock.release()
- if data == "":
- source.close()
- break
- return
-
- # thread feeds data to subprocess stdin
- def __feeder(self, pending, drain):
- while True:
- self.__inputsem.acquire()
- self.__lock.acquire()
- if not pending and self.__quit:
- drain.close()
- self.__lock.release()
- break
- data = pending.pop(0)
- self.__lock.release()
- drain.write(data)
-
- # non-blocking read of data from subprocess stdout
- def read(self):
- self.__lock.acquire()
- outdata = "".join(self.__collected_outdata)
- del self.__collected_outdata[:]
- self.__lock.release()
- return outdata
-
- # non-blocking read of data from subprocess stderr
- def readerr(self):
- self.__lock.acquire()
- errdata = "".join(self.__collected_errdata)
- del self.__collected_errdata[:]
- self.__lock.release()
- return errdata
-
- # non-blocking write to stdin of subprocess
- def write(self, data):
- if self.__process.stdin is None:
- raise ValueError("Writing to process with stdin not a pipe")
- self.__lock.acquire()
- self.__pending_input.append(data)
- self.__inputsem.release()
- self.__lock.release()
-
- # close stdinput of subprocess
- def closeinput(self):
- self.__lock.acquire()
- self.__quit = True
- self.__inputsem.release()
- self.__lock.release()
-
-
-# interface to needed routines in openssl's libcrypto
-def _load_crypto_libcrypto():
- from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
- Structure, c_ulong, create_string_buffer, addressof, string_at, cast
- from ctypes.util import find_library
-
- libcrypto = find_library('crypto')
- if libcrypto is None:
- raise K4MDrmException('libcrypto not found')
- libcrypto = CDLL(libcrypto)
-
- AES_MAXNR = 14
- c_char_pp = POINTER(c_char_p)
- c_int_p = POINTER(c_int)
-
- class AES_KEY(Structure):
- _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
- AES_KEY_p = POINTER(AES_KEY)
-
- def F(restype, name, argtypes):
- func = getattr(libcrypto, name)
- func.restype = restype
- func.argtypes = argtypes
- return func
-
- AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
-
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
-
- PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
- [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
-
- class LibCrypto(object):
- def __init__(self):
- self._blocksize = 0
- self._keyctx = None
- self.iv = 0
-
- def set_decrypt_key(self, userkey, iv):
- self._blocksize = len(userkey)
- if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
- raise K4MDrmException('AES improper key used')
- return
- keyctx = self._keyctx = AES_KEY()
- self.iv = iv
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
- if rv < 0:
- raise K4MDrmException('Failed to initialize AES key')
-
- def decrypt(self, data):
- out = create_string_buffer(len(data))
- rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self.iv, 0)
- if rv == 0:
- raise K4MDrmException('AES decryption failed')
- return out.raw
-
- def keyivgen(self, passwd):
- salt = '16743'
- saltlen = 5
- passlen = len(passwd)
- iter = 0x3e8
- keylen = 80
- out = create_string_buffer(keylen)
- rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
- return out.raw
- return LibCrypto
-
-def _load_crypto():
- LibCrypto = None
- try:
- LibCrypto = _load_crypto_libcrypto()
- except (ImportError, K4MDrmException):
- pass
- return LibCrypto
-
-LibCrypto = _load_crypto()
-
-#
-# Utility Routines
-#
-
-# uses a sub process to get the Hard Drive Serial Number using ioreg
-# returns with the serial number of drive whose BSD Name is "disk0"
-def GetVolumeSerialNumber():
- sernum = os.getenv('MYSERIALNUMBER')
- if sernum != None:
- return sernum
- cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p.wait('wait')
- results = p.read()
- reslst = results.split('\n')
- cnt = len(reslst)
- bsdname = None
- sernum = None
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- pp = resline.find('"Serial Number" = "')
- if pp >= 0:
- sernum = resline[pp+19:-1]
- sernum = sernum.strip()
- bb = resline.find('"BSD Name" = "')
- if bb >= 0:
- bsdname = resline[bb+14:-1]
- bsdname = bsdname.strip()
- if (bsdname == 'disk0') and (sernum != None):
- foundIt = True
- break
- if not foundIt:
- sernum = '9999999999'
- return sernum
-
-# uses unix env to get username instead of using sysctlbyname
-def GetUserName():
- username = os.getenv('USER')
- return username
-
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
-charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
-
-def encode(data, map):
- result = ""
- for char in data:
- value = ord(char)
- Q = (value ^ 0x80) // len(map)
- R = value % len(map)
- result += map[Q]
- result += map[R]
- return result
-
-import hashlib
-
-def SHA256(message):
- ctx = hashlib.sha256()
- ctx.update(message)
- return ctx.digest()
-
-# implements an Pseudo Mac Version of Windows built-in Crypto routine
-def CryptUnprotectData(encryptedData):
- sp = GetVolumeSerialNumber() + '!@#' + GetUserName()
- passwdData = encode(SHA256(sp),charMap1)
- crp = LibCrypto()
- key_iv = crp.keyivgen(passwdData)
- key = key_iv[0:32]
- iv = key_iv[32:48]
- crp.set_decrypt_key(key,iv)
- cleartext = crp.decrypt(encryptedData)
- return cleartext
-
-# Locate and open the .kindle-info file
-def openKindleInfo(kInfoFile=None):
- if kInfoFile == None:
- home = os.getenv('HOME')
- cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
- poll = p1.wait('wait')
- results = p1.read()
- reslst = results.split('\n')
- kinfopath = 'NONE'
- cnt = len(reslst)
- for j in xrange(cnt):
- resline = reslst[j]
- pp = resline.find('.kindle-info')
- if pp >= 0:
- kinfopath = resline
- break
- if not os.path.exists(kinfopath):
- raise K4MDrmException('Error: .kindle-info file can not be found')
- return open(kinfopath,'r')
- else:
- return open(kInfoFile, 'r')
+++ /dev/null
-#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-import os, sys
-import signal
-import threading
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-
-# **heavily** chopped up and modfied version of asyncproc.py
-# to make it actually work on Windows as well as Mac/Linux
-# For the original see:
-# "http://www.lysator.liu.se/~bellman/download/"
-# author is "Thomas Bellman <bellman@lysator.liu.se>"
-# available under GPL version 3 or Later
-
-# create an asynchronous subprocess whose output can be collected in
-# a non-blocking manner
-
-# What a mess! Have to use threads just to get non-blocking io
-# in a cross-platform manner
-
-# luckily all thread use is hidden within this class
-
-class Process(object):
- def __init__(self, *params, **kwparams):
- if len(params) <= 3:
- kwparams.setdefault('stdin', subprocess.PIPE)
- if len(params) <= 4:
- kwparams.setdefault('stdout', subprocess.PIPE)
- if len(params) <= 5:
- kwparams.setdefault('stderr', subprocess.PIPE)
- self.__pending_input = []
- self.__collected_outdata = []
- self.__collected_errdata = []
- self.__exitstatus = None
- self.__lock = threading.Lock()
- self.__inputsem = threading.Semaphore(0)
- self.__quit = False
-
- self.__process = subprocess.Popen(*params, **kwparams)
-
- if self.__process.stdin:
- self.__stdin_thread = threading.Thread(
- name="stdin-thread",
- target=self.__feeder, args=(self.__pending_input,
- self.__process.stdin))
- self.__stdin_thread.setDaemon(True)
- self.__stdin_thread.start()
-
- if self.__process.stdout:
- self.__stdout_thread = threading.Thread(
- name="stdout-thread",
- target=self.__reader, args=(self.__collected_outdata,
- self.__process.stdout))
- self.__stdout_thread.setDaemon(True)
- self.__stdout_thread.start()
-
- if self.__process.stderr:
- self.__stderr_thread = threading.Thread(
- name="stderr-thread",
- target=self.__reader, args=(self.__collected_errdata,
- self.__process.stderr))
- self.__stderr_thread.setDaemon(True)
- self.__stderr_thread.start()
-
- def pid(self):
- return self.__process.pid
-
- def kill(self, signal):
- self.__process.send_signal(signal)
-
- # check on subprocess (pass in 'nowait') to act like poll
- def wait(self, flag):
- if flag.lower() == 'nowait':
- rc = self.__process.poll()
- else:
- rc = self.__process.wait()
- if rc != None:
- if self.__process.stdin:
- self.closeinput()
- if self.__process.stdout:
- self.__stdout_thread.join()
- if self.__process.stderr:
- self.__stderr_thread.join()
- return self.__process.returncode
-
- def terminate(self):
- if self.__process.stdin:
- self.closeinput()
- self.__process.terminate()
-
- # thread gets data from subprocess stdout
- def __reader(self, collector, source):
- while True:
- data = os.read(source.fileno(), 65536)
- self.__lock.acquire()
- collector.append(data)
- self.__lock.release()
- if data == "":
- source.close()
- break
- return
-
- # thread feeds data to subprocess stdin
- def __feeder(self, pending, drain):
- while True:
- self.__inputsem.acquire()
- self.__lock.acquire()
- if not pending and self.__quit:
- drain.close()
- self.__lock.release()
- break
- data = pending.pop(0)
- self.__lock.release()
- drain.write(data)
-
- # non-blocking read of data from subprocess stdout
- def read(self):
- self.__lock.acquire()
- outdata = "".join(self.__collected_outdata)
- del self.__collected_outdata[:]
- self.__lock.release()
- return outdata
-
- # non-blocking read of data from subprocess stderr
- def readerr(self):
- self.__lock.acquire()
- errdata = "".join(self.__collected_errdata)
- del self.__collected_errdata[:]
- self.__lock.release()
- return errdata
-
- # non-blocking write to stdin of subprocess
- def write(self, data):
- if self.__process.stdin is None:
- raise ValueError("Writing to process with stdin not a pipe")
- self.__lock.acquire()
- self.__pending_input.append(data)
- self.__inputsem.release()
- self.__lock.release()
-
- # close stdinput of subprocess
- def closeinput(self):
- self.__lock.acquire()
- self.__quit = True
- self.__inputsem.release()
- self.__lock.release()
-
+++ /dev/null
-Contributors:
- cmbtc - removal of drm which made all of this possible
- clarknova - for all of the svg and glyph generation and many other bug fixes and improvements
- skindle - for figuing out the general case for the mode loops
- some updates - for conversion to xml, basic html
- DiapDealer - for extensive testing and feedback, and standalone linux/macosx version of cmbtc_dump
- stewball - for extensive testing and feedback
-
-and many others for posting, feedback and testing
-
-
-This is experimental and it will probably not work for you but...
-
-ALSO: Please do not use any of this to steal. Theft is wrong.
- This is meant to allow conversion of Topaz books for other book readers you own
-
-Here are the steps:
-
-1. Unzip the topazscripts.zip file to get the full set of python scripts.
-The files you should have after unzipping are:
-
-cmbtc_dump.py - (author: cmbtc) unencrypts and dumps sections into separate files for Kindle for PC and Mac
-decode_meta.py - converts metadata0000.dat to make it available
-convert2xml.py - converts page*.dat, other*.dat, and glyphs*.dat files to pseudo xml descriptions
-flatxml2html.py - converts a "flattened" xml description to html using the ocrtext
-stylexml2css.py - converts stylesheet "flattened" xml into css (as best it can)
-getpagedim.py - reads page0000.dat to get the book height and width parameters
-genxml.py - main program to convert everything to xml
-genhtml.py - main program to generate "book.html"
-gensvg.py - (author: clarknova) main program to create an xhmtl page with embedded svg graphics
-k4mutils.py - Mac OSX support routines for cmbtc_dump.py
-k4pcutils.py - Windows support routines for cmbtc_dump.py
-
-
-
-Please note, these scripts all import code from each other so please
-keep all of these python scripts together in the same place.
-
-
-
-2. Remove the DRM from the Topaz book and build a directory
-of its contents as files
-
-All Thanks go to CMBTC who broke the DRM for Topaz - without it nothing else
-would be possible
-
-If you purchased the book for Kindle for PC or Kindle for Mac, you must do the following:
-
- cmbtc_dump.py -d -o TARGETDIR [-p pid] YOURTOPAZBOOKNAMEHERE
-
-
-If you purchased the book for a standalone Kindle 1 or ipod/iphone/ipad
-and you know your pid (at least the first 8 characters) then you should
-add that using -p 12345678 switch as indicated above, replacing the
-12345678 with the 8 characters of your pid
-
-
-This should create a directory called "TARGETDIR" in your current directory.
-It should have the following files in it:
-
-metadata0000.dat - metadata info
-other0000.dat - information used to create a style sheet
-dict0000.dat - dictionary of words used to build page descriptions
-page - directory filled with page*.dat files
-glyphs - directory filled with glyphs*.dat files
-img - directory filled with images
-color_img - directory used for color images
-
-3. REQUIRED: Create xhtml page descriptions with embedded svg
-that show the exact representation of each page as an image
-with proper glyphs and positioning.
-
-The step must NOW be done BEFORE attempting conversion to html
-
- gensvg.py TARGETDIR
-
-When complete, use a web-browser to open the page*.xhtml files
-in TARGETDIR/svg/ to see what the book really looks like.
-
-If you would prefer pure svg pages, then use the -r option
-as follows:
-
- gensvg.py -r TARGETDIR
-
-
-All thanks go to CLARKNOVA for this program. This program is
-needed to actually see the true image of each page and so that
-the next step can properly create images from glyphs for
-monograms, dropcaps and tables.
-
-
-4. Create "book.html" which can be found in "TARGETDIR" after
-completion.
-
- genhtml.py TARGETDIR
-
-
-***IMPORTANT NOTE*** This html conversion can not fully capture
-all of the layouts and styles actually used in the book
-and the resulting html will need to be edited by hand to
-properly set bold and/or italics, handle font size changes,
-and to fix the sometimes horiffic mistakes in the ocrText
-used to create the html.
-
-If there critical pages that need fixed layout in your book
-you might want to consider forcing these fixed regions to
-become svg images using the command instead
-
- genhtml.py --fixed-image TARGETDIR
-
-This will convert all fixed regions into svg images at the
-expense of increased book size, slower loading speed, and
-a loss of the ability to search for words in those regions
-
-FYI: Sigil is a wonderful, free cross-
-platform program that can be used to edit the html and
-create an epub if you so desire.
-
-
-5. Optional Step: Convert the files in "TARGETDIR" to their
-xml descriptions which can be found in TARGETDIR/xml/
-upon completion.
-
- genxml.py TARGETDIR
-
-
-These conversions are important for allowing future (and better)
-conversions to come later.
-
# This plugin is meant to convert secure Ereader files (PDB) to unsecured PMLZ files.
# Calibre can then convert it to whatever format you desire.
# It is meant to function without having to install any dependencies...
-# other than having Calibre installed, of course. I've included the psyco libraries
-# (compiled for each platform) for speed. If your system can use them, great!
-# Otherwise, they won't be used and things will just work slower.
+# other than having Calibre installed, of course.
#
# Installation:
# Go to Calibre's Preferences page... click on the Plugins button. Use the file
# Revision history:
# 0.0.1 - Initial release
# 0.0.2 - updated to distinguish it from earlier non-openssl version
+# 0.0.3 - removed added psyco code as it is not supported under Calibre's Python 2.7
import sys, os
Credit given to The Dark Reverser for the original standalone script.'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'DiapDealer' # The author of this plugin
- version = (0, 0, 2) # The version number of this plugin
+ version = (0, 0, 3) # The version number of this plugin
file_types = set(['pdb']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
def run(self, path_to_ebook):
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.constants import iswindows, isosx
- pdir = 'windows' if iswindows else 'osx' if isosx else 'linux'
- ppath = os.path.join(self.sys_insertion_path, pdir)
- sys.path.insert(0, ppath)
global bookname, erdr2pml
import erdr2pml
- if 'psyco' in sys.modules:
- print 'Using psyco acceleration for %s.' % pdir
- else:
- print 'NOT using psyco acceleration for %s. Conversion may be slow.' % pdir
-
infile = path_to_ebook
bookname = os.path.splitext(os.path.basename(infile))[0]
outdir = PersistentTemporaryDirectory()
try:
name, cc = i.split(',')
except ValueError:
- sys.path.remove(ppath)
print ' Error parsing user supplied data.'
return path_to_ebook
# 0.14 - contributed enhancement to support --make-pmlz switch
# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available
+# 0.17 - added support for pycrypto's DES as well
Des = None
import openssl_des
Des = openssl_des.load_libcrypto()
+# if that did not work then try pycrypto version of DES
+if Des == None:
+ import pycrypto_des
+ Des = pycrypto_des.load_pycrypto()
+
# if that did not work then use pure python implementation
# of DES and try to speed it up with Psycho
if Des == None:
Des = python_des.Des
# Import Psyco if available
try:
- # Dumb speed hack 1
# http://psyco.sourceforge.net
import psyco
psyco.full()
- pass
except ImportError:
pass
-__version__='0.16'
+__version__='0.17'
class Unbuffered:
def __init__(self, stream):
+++ /dev/null
-#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-#
-# This is a python script. You need a Python interpreter to run it.
-# For example, ActiveState Python, which exists for windows.
-# Changelog
-# 0.01 - Initial version
-# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
-# 0.03 - Fix incorrect variable usage at one place.
-# 0.03b - enhancement by DeBockle (version 259 support)
-# Custom version 0.03 - no change to eReader support, only usability changes
-# - start of pep-8 indentation (spaces not tab), fix trailing blanks
-# - version variable, only one place to change
-# - added main routine, now callable as a library/module,
-# means tools can add optional support for ereader2html
-# - outdir is no longer a mandatory parameter (defaults based on input name if missing)
-# - time taken output to stdout
-# - Psyco support - reduces runtime by a factor of (over) 3!
-# E.g. (~600Kb file) 90 secs down to 24 secs
-# - newstyle classes
-# - changed map call to list comprehension
-# may not work with python 2.3
-# without Psyco this reduces runtime to 90%
-# E.g. 90 secs down to 77 secs
-# Psyco with map calls takes longer, do not run with map in Psyco JIT!
-# - izip calls used instead of zip (if available), further reduction
-# in run time (factor of 4.5).
-# E.g. (~600Kb file) 90 secs down to 20 secs
-# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
-# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags
-# - Feature change, dump out PML file
-# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
-# in some pdb files :-( due to the same id being used multiple times
-# - Added correct charset encoding (pml is based on cp1252)
-# - Added logging support.
-#
-# TODO run this through a profiler - speed increases so far was from
-# applying "quick known fixes", added (commented out) cprofiler call
-# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc
-# 0.06 - Merge of 0.04 and 0.05. Improved HTML output
-# Placed images in subfolder, so that it's possible to just
-# drop the book.pml file onto DropBook to make an unencrypted
-# copy of the eReader file.
-# Using that with Calibre works a lot better than the HTML
-# conversion in this code.
-# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes
-# 0.08 - fixed typos, removed extraneous things
-# 0.09 - tried to greatly improve html conversion especially with \t tags
-
-__version__='0.09'
-
-# Import Psyco if available
-try:
- # Dumb speed hack 1
- # http://psyco.sourceforge.net
- import psyco
- psyco.full()
- pass
-except ImportError:
- pass
-try:
- # Dumb speed hack 2
- # All map() calls converted to list comprehension (some use zip)
- # override zip with izip - saves memory and in rough testing
- # appears to be faster zip() is only used in the converted map() calls
- from itertools import izip as zip
-except ImportError:
- pass
-
-import struct, binascii, zlib, os, sys, os.path, urllib
-try:
- from hashlib import sha1
-except ImportError:
- # older Python release
- import sha
- sha1 = lambda s: sha.new(s)
-import cgi
-import logging
-
-logging.basicConfig()
-#logging.basicConfig(level=logging.DEBUG)
-
-
-ECB = 0
-CBC = 1
-class Des(object):
- __pc1 = [56, 48, 40, 32, 24, 16, 8, 0, 57, 49, 41, 33, 25, 17,
- 9, 1, 58, 50, 42, 34, 26, 18, 10, 2, 59, 51, 43, 35,
- 62, 54, 46, 38, 30, 22, 14, 6, 61, 53, 45, 37, 29, 21,
- 13, 5, 60, 52, 44, 36, 28, 20, 12, 4, 27, 19, 11, 3]
- __left_rotations = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1]
- __pc2 = [13, 16, 10, 23, 0, 4,2, 27, 14, 5, 20, 9,
- 22, 18, 11, 3, 25, 7, 15, 6, 26, 19, 12, 1,
- 40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47,
- 43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31]
- __ip = [57, 49, 41, 33, 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3,
- 61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7,
- 56, 48, 40, 32, 24, 16, 8, 0, 58, 50, 42, 34, 26, 18, 10, 2,
- 60, 52, 44, 36, 28, 20, 12, 4, 62, 54, 46, 38, 30, 22, 14, 6]
- __expansion_table = [31, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 8,
- 7, 8, 9, 10, 11, 12,11, 12, 13, 14, 15, 16,
- 15, 16, 17, 18, 19, 20,19, 20, 21, 22, 23, 24,
- 23, 24, 25, 26, 27, 28,27, 28, 29, 30, 31, 0]
- __sbox = [[14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7,
- 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8,
- 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0,
- 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13],
- [15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10,
- 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5,
- 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15,
- 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9],
- [10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8,
- 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1,
- 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7,
- 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12],
- [7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15,
- 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9,
- 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4,
- 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14],
- [2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9,
- 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6,
- 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14,
- 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3],
- [12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11,
- 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8,
- 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6,
- 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13],
- [4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1,
- 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6,
- 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2,
- 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12],
- [13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7,
- 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2,
- 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8,
- 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11],]
- __p = [15, 6, 19, 20, 28, 11,27, 16, 0, 14, 22, 25,
- 4, 17, 30, 9, 1, 7,23,13, 31, 26, 2, 8,18, 12, 29, 5, 21, 10,3, 24]
- __fp = [39, 7, 47, 15, 55, 23, 63, 31,38, 6, 46, 14, 54, 22, 62, 30,
- 37, 5, 45, 13, 53, 21, 61, 29,36, 4, 44, 12, 52, 20, 60, 28,
- 35, 3, 43, 11, 51, 19, 59, 27,34, 2, 42, 10, 50, 18, 58, 26,
- 33, 1, 41, 9, 49, 17, 57, 25,32, 0, 40, 8, 48, 16, 56, 24]
- # Type of crypting being done
- ENCRYPT = 0x00
- DECRYPT = 0x01
- def __init__(self, key, mode=ECB, IV=None):
- if len(key) != 8:
- raise ValueError("Invalid DES key size. Key must be exactly 8 bytes long.")
- self.block_size = 8
- self.key_size = 8
- self.__padding = ''
- self.setMode(mode)
- if IV:
- self.setIV(IV)
- self.L = []
- self.R = []
- self.Kn = [ [0] * 48 ] * 16 # 16 48-bit keys (K1 - K16)
- self.final = []
- self.setKey(key)
- def getKey(self):
- return self.__key
- def setKey(self, key):
- self.__key = key
- self.__create_sub_keys()
- def getMode(self):
- return self.__mode
- def setMode(self, mode):
- self.__mode = mode
- def getIV(self):
- return self.__iv
- def setIV(self, IV):
- if not IV or len(IV) != self.block_size:
- raise ValueError("Invalid Initial Value (IV), must be a multiple of " + str(self.block_size) + " bytes")
- self.__iv = IV
- def getPadding(self):
- return self.__padding
- def __String_to_BitList(self, data):
- l = len(data) * 8
- result = [0] * l
- pos = 0
- for c in data:
- i = 7
- ch = ord(c)
- while i >= 0:
- if ch & (1 << i) != 0:
- result[pos] = 1
- else:
- result[pos] = 0
- pos += 1
- i -= 1
- return result
- def __BitList_to_String(self, data):
- result = ''
- pos = 0
- c = 0
- while pos < len(data):
- c += data[pos] << (7 - (pos % 8))
- if (pos % 8) == 7:
- result += chr(c)
- c = 0
- pos += 1
- return result
- def __permutate(self, table, block):
- return [block[x] for x in table]
- def __create_sub_keys(self):
- key = self.__permutate(Des.__pc1, self.__String_to_BitList(self.getKey()))
- i = 0
- self.L = key[:28]
- self.R = key[28:]
- while i < 16:
- j = 0
- while j < Des.__left_rotations[i]:
- self.L.append(self.L[0])
- del self.L[0]
- self.R.append(self.R[0])
- del self.R[0]
- j += 1
- self.Kn[i] = self.__permutate(Des.__pc2, self.L + self.R)
- i += 1
- def __des_crypt(self, block, crypt_type):
- block = self.__permutate(Des.__ip, block)
- self.L = block[:32]
- self.R = block[32:]
- if crypt_type == Des.ENCRYPT:
- iteration = 0
- iteration_adjustment = 1
- else:
- iteration = 15
- iteration_adjustment = -1
- i = 0
- while i < 16:
- tempR = self.R[:]
- self.R = self.__permutate(Des.__expansion_table, self.R)
- self.R = [x ^ y for x,y in zip(self.R, self.Kn[iteration])]
- B = [self.R[:6], self.R[6:12], self.R[12:18], self.R[18:24], self.R[24:30], self.R[30:36], self.R[36:42], self.R[42:]]
- j = 0
- Bn = [0] * 32
- pos = 0
- while j < 8:
- m = (B[j][0] << 1) + B[j][5]
- n = (B[j][1] << 3) + (B[j][2] << 2) + (B[j][3] << 1) + B[j][4]
- v = Des.__sbox[j][(m << 4) + n]
- Bn[pos] = (v & 8) >> 3
- Bn[pos + 1] = (v & 4) >> 2
- Bn[pos + 2] = (v & 2) >> 1
- Bn[pos + 3] = v & 1
- pos += 4
- j += 1
- self.R = self.__permutate(Des.__p, Bn)
- self.R = [x ^ y for x, y in zip(self.R, self.L)]
- self.L = tempR
- i += 1
- iteration += iteration_adjustment
- self.final = self.__permutate(Des.__fp, self.R + self.L)
- return self.final
- def crypt(self, data, crypt_type):
- if not data:
- return ''
- if len(data) % self.block_size != 0:
- if crypt_type == Des.DECRYPT: # Decryption must work on 8 byte blocks
- raise ValueError("Invalid data length, data must be a multiple of " + str(self.block_size) + " bytes\n.")
- if not self.getPadding():
- raise ValueError("Invalid data length, data must be a multiple of " + str(self.block_size) + " bytes\n. Try setting the optional padding character")
- else:
- data += (self.block_size - (len(data) % self.block_size)) * self.getPadding()
- if self.getMode() == CBC:
- if self.getIV():
- iv = self.__String_to_BitList(self.getIV())
- else:
- raise ValueError("For CBC mode, you must supply the Initial Value (IV) for ciphering")
- i = 0
- dict = {}
- result = []
- while i < len(data):
- block = self.__String_to_BitList(data[i:i+8])
- if self.getMode() == CBC:
- if crypt_type == Des.ENCRYPT:
- block = [x ^ y for x, y in zip(block, iv)]
- processed_block = self.__des_crypt(block, crypt_type)
- if crypt_type == Des.DECRYPT:
- processed_block = [x ^ y for x, y in zip(processed_block, iv)]
- iv = block
- else:
- iv = processed_block
- else:
- processed_block = self.__des_crypt(block, crypt_type)
- result.append(self.__BitList_to_String(processed_block))
- i += 8
- if crypt_type == Des.DECRYPT and self.getPadding():
- s = result[-1]
- while s[-1] == self.getPadding():
- s = s[:-1]
- result[-1] = s
- return ''.join(result)
- def encrypt(self, data, pad=''):
- self.__padding = pad
- return self.crypt(data, Des.ENCRYPT)
- def decrypt(self, data, pad=''):
- self.__padding = pad
- return self.crypt(data, Des.DECRYPT)
-
-class Sectionizer(object):
- def __init__(self, filename, ident):
- self.contents = file(filename, 'rb').read()
- self.header = self.contents[0:72]
- self.num_sections, = struct.unpack('>H', self.contents[76:78])
- if self.header[0x3C:0x3C+8] != ident:
- raise ValueError('Invalid file format')
- self.sections = []
- for i in xrange(self.num_sections):
- offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
- flags, val = a1, a2<<16|a3<<8|a4
- self.sections.append( (offset, flags, val) )
- def loadSection(self, section):
- if section + 1 == self.num_sections:
- end_off = len(self.contents)
- else:
- end_off = self.sections[section + 1][0]
- off = self.sections[section][0]
- return self.contents[off:end_off]
-
-def sanitizeFileName(s):
- r = ''
- for c in s.lower():
- if c in "abcdefghijklmnopqrstuvwxyz0123456789_.-":
- r += c
- return r
-
-def fixKey(key):
- def fixByte(b):
- return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
- return "".join([chr(fixByte(ord(a))) for a in key])
-
-def deXOR(text, sp, table):
- r=''
- j = sp
- for i in xrange(len(text)):
- r += chr(ord(table[j]) ^ ord(text[i]))
- j = j + 1
- if j == len(table):
- j = 0
- return r
-
-class EreaderProcessor(object):
- def __init__(self, section_reader, username, creditcard):
- self.section_reader = section_reader
- data = section_reader(0)
- version, = struct.unpack('>H', data[0:2])
- self.version = version
- logging.info('eReader file format version %s', version)
- if version != 272 and version != 260 and version != 259:
- raise ValueError('incorrect eReader version %d (error 1)' % version)
- data = section_reader(1)
- self.data = data
- des = Des(fixKey(data[0:8]))
- cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:]))
- if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
- raise ValueError('incorrect eReader version (error 2)')
- input = des.decrypt(data[-cookie_size:])
- def unshuff(data, shuf):
- r = [''] * len(data)
- j = 0
- for i in xrange(len(data)):
- j = (j + shuf) % len(data)
- r[j] = data[i]
- assert len("".join(r)) == len(data)
- return "".join(r)
- r = unshuff(input[0:-8], cookie_shuf)
-
- def fixUsername(s):
- r = ''
- for c in s.lower():
- if (c >= 'a' and c <= 'z' or c >= '0' and c <= '9'):
- r += c
- return r
-
- user_key = struct.pack('>LL', binascii.crc32(fixUsername(username)) & 0xffffffff, binascii.crc32(creditcard[-8:])& 0xffffffff)
- drm_sub_version = struct.unpack('>H', r[0:2])[0]
- self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
- self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
- self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
- if self.version == 272:
- self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
- self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
- self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
- self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
- self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0]
- self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0]
- self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
- self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
- self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
- self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
- self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
- self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
-
- # **before** data record 1 was decrypted and unshuffled, it contained data
- # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
- self.xortable_offset = struct.unpack('>H', r[40:40+2])[0]
- self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
- self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
- else:
- self.num_chapter_pages = 0
- self.num_link_pages = 0
- self.num_bookinfo_pages = 0
- self.num_footnote_pages = 0
- self.num_xtextsize_pages = 0
- self.num_sidebar_pages = 0
- self.first_chapter_page = -1
- self.first_link_page = -1
- self.first_bookinfo_page = -1
- self.first_footnote_page = -1
- self.first_xtextsize_page = -1
- self.first_sidebar_page = -1
-
- logging.debug('self.num_text_pages %d', self.num_text_pages)
- logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
- logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page)
- self.flags = struct.unpack('>L', r[4:8])[0]
- reqd_flags = (1<<9) | (1<<7) | (1<<10)
- if (self.flags & reqd_flags) != reqd_flags:
- print "Flags: 0x%X" % self.flags
- raise ValueError('incompatible eReader file')
- des = Des(fixKey(user_key))
- if version == 259:
- if drm_sub_version != 7:
- raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
- encrypted_key_sha = r[44:44+20]
- encrypted_key = r[64:64+8]
- elif version == 260:
- if drm_sub_version != 13:
- raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
- encrypted_key = r[44:44+8]
- encrypted_key_sha = r[52:52+20]
- elif version == 272:
- encrypted_key = r[172:172+8]
- encrypted_key_sha = r[56:56+20]
- self.content_key = des.decrypt(encrypted_key)
- if sha1(self.content_key).digest() != encrypted_key_sha:
- raise ValueError('Incorrect Name and/or Credit Card')
-
- def getNumImages(self):
- return self.num_image_pages
-
- def getImage(self, i):
- sect = self.section_reader(self.first_image_page + i)
- name = sect[4:4+32].strip('\0')
- data = sect[62:]
- return sanitizeFileName(name), data
-
- def getChapterNamePMLOffsetData(self):
- cv = ''
- if self.num_chapter_pages > 0:
- for i in xrange(self.num_chapter_pages):
- chaps = self.section_reader(self.first_chapter_page + i)
- j = i % self.xortable_size
- offname = deXOR(chaps, j, self.xortable)
- offset = struct.unpack('>L', offname[0:4])[0]
- name = offname[4:].strip('\0')
- cv += '%d,%s\n' % (offset, name)
- return cv
-
- def getLinkNamePMLOffsetData(self):
- lv = ''
- if self.num_link_pages > 0:
- for i in xrange(self.num_link_pages):
- links = self.section_reader(self.first_link_page + i)
- j = i % self.xortable_size
- offname = deXOR(links, j, self.xortable)
- offset = struct.unpack('>L', offname[0:4])[0]
- name = offname[4:].strip('\0')
- lv += '%d,%s\n' % (offset, name)
- return lv
-
- def getExpandedTextSizesData(self):
- ts = ''
- if self.num_xtextsize_pages > 0:
- tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable)
- for i in xrange(self.num_text_pages):
- xsize = struct.unpack('>H', tsize[0:2])[0]
- ts += "%d\n" % xsize
- tsize = tsize[2:]
- return ts
-
- def getBookInfo(self):
- bkinfo = ''
- if self.num_bookinfo_pages > 0:
- info = self.section_reader(self.first_bookinfo_page)
- bkinfo = deXOR(info, 0, self.xortable)
- return bkinfo
-
- def getText(self):
- des = Des(fixKey(self.content_key))
- r = ''
- for i in xrange(self.num_text_pages):
- logging.debug('get page %d', i)
- r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
-
- # now handle footnotes pages
- if self.num_footnote_pages > 0:
- # the record 0 of the footnote section must pass through the Xor Table to make it useful
- sect = self.section_reader(self.first_footnote_page)
- fnote_ids = deXOR(sect, 0, self.xortable)
- # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
- des = Des(fixKey(self.content_key))
- r += '\\w="100%"'
- r += '\\pFootnotes:\n\n'
- for i in xrange(1,self.num_footnote_pages):
- logging.debug('get footnotepage %d', i)
- id_len = ord(fnote_ids[2])
- id = fnote_ids[3:3+id_len]
- fmarker='\\t\\Q="footnote-%s"' % id
- r+=fmarker
- r += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
- r += '\\t\n\n'
- fnote_ids = fnote_ids[id_len+4:]
-
-# according to ereader pml spec we should be outputing the following xml for each footnote - but then we would have to handle
-# parsing it back in to convert it since that xml is not valid xhtml
-# fmarker = '<footnote id="footnote-%s">\n' % id
-# fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
-# fmarker += '\n</footnote>\n'
-# r += fmarker
-
-
- # now handle sidebar pages
- if self.num_sidebar_pages > 0:
- # the record 0 of the sidebar section must pass through the Xor Table to make it useful
- sect = self.section_reader(self.first_sidebar_page)
- sbar_ids = deXOR(sect, 0, self.xortable)
- # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated
- des = Des(fixKey(self.content_key))
- r += '\\w="100%"'
- r += '\\pSidebars:\n\n'
- for i in xrange(1,self.num_sidebar_pages):
- id_len = ord(sbar_ids[2])
- id = sbar_ids[3:3+id_len]
- smarker='\\t\\Q="sidebar-%s"' % id
- r+=smarker
- r += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i)))
- r += '\\t\n\n'
- sbar_ids = sbar_ids[id_len+4:]
-
-# according to ereader pml spec we should be outputing the following xml for each sidebar - but then we would have to handle
-# parsing it back in to convert it since that xml is not valid xhtml
-# smarker = '<sidebar id="sidebar-%s">\n' % id
-# smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
-# smarker += '\n</sidebar>\n'
-# r += smarker
-
- return r
-
-class PmlConverter(object):
- def __init__(self, s, bkinfo):
- def findPrevStartofLine(src,p,n):
- # find last end of previous line in substring from p to n
- b1 = src.rfind('\n',p,n)
- b2 = src.rfind('\\c',p,n)
- b3 = src.rfind('\\r',p,n)
- b4 = src.rfind('\\x',p,n)
- b5 = src.rfind('\\p',p,n)
- b = max(b1, b2, b3, b4, b5)
- if b == -1:
- return n
- if b == b1:
- return b + 1
- return b + 2
- def markHangingIndents(src):
- r = ''
- p = 0
- while True:
- if p > len(src):
- return r
- n = src.find('\\t', p)
- if n == -1:
- r += src[p:]
- return r
- pc = findPrevStartofLine(src,p,n)
- if pc == n :
- # \t tag is at start of line so indent block will work
- end = src.find('\\t',n+2)
- if end == -1:
- end = n
- r += src[p:end+2]
- p = end + 2
- else :
- # \t tag not at start of line so hanging indent case
- # recode \t to pseudo \h tags and move it to start of this line
- # and recode its close as well
- r += src[p:pc] + '\\h' + src[pc:n]
- end = src.find('\\t',n+2)
- if end == -1:
- end = n+2
- r += src[n+2:end] + '\\h'
- p = end + 2
- self.s = markHangingIndents(s)
- # file(os.path.join("./pseudo.pml"), 'wb').write(self.s)
- self.pos = 0
- self.bkinfo = bkinfo
- def nextOptAttr(self):
- p = self.pos
- if self.s[p:p+2] != '="':
- return None
- r = ''
- p += 2
- while self.s[p] != '"':
- r += self.s[p]
- p += 1
- self.pos = p + 1
- return r
- def next(self):
- p = self.pos
- if p >= len(self.s):
- return None
- if self.s[p] != '\\':
- res = self.s.find('\\', p)
- if res == -1:
- res = len(self.s)
- self.pos = res
- return self.s[p : res], None, None
- c = self.s[p+1]
- # add in support for new pseudo tag \\h
- if c in 'pxcriuovthnsblBk-lI\\d':
- self.pos = p + 2
- return None, c, None
- if c in 'TwmqQ':
- self.pos = p + 2
- return None, c, self.nextOptAttr()
- if c == 'a':
- self.pos = p + 5
- return None, c, int(self.s[p+2:p+5])
- if c == 'U':
- self.pos = p + 6
- return None, c, int(self.s[p+2:p+6], 16)
- c = self.s[p+1:p+1+2]
- if c in ('X0','X1','X2','X3','X4','Sp','Sb'):
- self.pos = p + 3
- return None, c, None
- if c in ('C0','C1','C2','C3','C4','Fn','Sd'):
- self.pos = p + 3
- return None, c, self.nextOptAttr()
- print "unknown escape code %s" % c
- self.pos = p + 1
- return None, None, None
- def LinePrinter(link):
- return '<hr width="%s" />\n' % link
- def LinkPrinter(link):
- return '<a href="%s">' % link
- def FootnoteLinkPrinter(link):
- return '<a href="#footnote-%s">' % link
- def SidebarLinkPrinter(link):
- return '<a href="#sidebar-%s">' % link
- def NotSupported(link):
- raise NotImplemented()
- def IndentPercent(link):
- return '<span style="padding-left: %s%%;"></span>' %link
- def NormalFont(link):
- print "Nonfatal Error: NormalFont not implemented."
- return '<!-- NormalFont %s -->' %link
- def StdFont(link):
- print "Nonfatal Error: StdFont not implemented."
- return '<!-- StdFont: %s -->' %link
-
- # See http://wiki.mobileread.com/wiki/PML#Palm_Markup_Language
- html_tags = {
- 'c' : ('<div class="center">', '</div>'),
- 'r' : ('<div class="right">', '</div>'),
- 'i' : ('<i>', '</i>'),
- 'u' : ('<span class="under">', '</span>'),
- 'b' : ('<strong>', '</strong>'),
- 'B' : ('<strong>', '</strong>'),
- 'o' : ('<del>', '</del>'),
- 'v' : ('<!-- ', ' -->'),
- 't' : ('<div class="indent">','</div>'),
- 'h' : ('<div class="hang">','</div>'), # pseudo-tag created to handle hanging indent cases
- 'Sb' : ('<sub>', '</sub>'),
- 'Sp' : ('<sup>', '</sup>'),
- 'X0' : ('<h1>', '</h1>'),
- 'X1' : ('<h2>', '</h2>'),
- 'X2' : ('<h3>', '</h3>'),
- 'X3' : ('<h4>', '</h4>'),
- 'X4' : ('<h5>', '</h5>'),
- 'l' : ('<span class="big">', '</span>'),
- 'q' : (LinkPrinter, '</a>'),
- 'Fn' : (FootnoteLinkPrinter, '</a>'),
- 'Sd' : (SidebarLinkPrinter, '</a>'),
- 'w' : (LinePrinter, ''),
- #'m' : handled in if block,
- #'Q' : handled in if block,
- #'a' : handled in if block,
- #'U' : handled in if block,
- 'x' : ('<h1 class="breakbefore">', '</h1>'),
- #'C0' : handled in if block,
- #'C1' : handled in if block,
- #'C2' : handled in if block,
- #'C3' : handled in if block,
- #'C4' : handled in if block,
- 'T' : (IndentPercent, ''),
- 'n' : (NormalFont, ''),
- 's' : ('', ''),
- 'k' : ('<span class="small">', '</span>'),
- 'I' : ('<i>', '</i>'), # according to calibre - all ereader does is italicize the index entries
- }
- html_one_tags = {
- 'p' : '<p class="breakafter"> </p>\n',
- '\\': '\\',
- '-' : '­',
- }
- pml_chars = {
- 160 : ' ',130 : '—',131: 'ƒ',132: '„',
- 133: '…',134: '†',135: '‡',138: 'Š',
- 139: '‹',140: 'Œ',145: '‘',146: '’',
- 147: '“',148: '”',149: '•',150: '–',
- 151: '—',153: '™',154: 'š',155: '›',
- 156: 'œ',159: 'Ÿ'
- }
- def process(self):
- final = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n'
- final += '<html>\n<head>\n<meta http-equiv="content-type" content="text/html; charset=windows-1252">\n'
- if len(self.bkinfo) > 0:
- title, author, copyright, publisher, isbn = self.bkinfo.split('\0',4)
- isbn = isbn.strip('\0')
- final += '<meta name="Title" content="%s"/>\n' % title
- final += '<meta name="Author" content="%s"/>\n' % author
- final += '<meta name="Copyright" content="%s"/>\n' % copyright
- final += '<meta name="Publisher" content="%s"/>\n' % publisher
- final += '<meta name="ISBN" content="%s"/>\n' % isbn
- final += '<style type="text/css">\n'
- final += 'div.center { text-align:center; }\n'
- final += 'div.right { text-align:right; }\n'
- final += 'div.indent { margin-left: 5%; }\n'
- final += 'div.hang { text-indent: -5%; margin-left: 5%; }\n'
- final += 'span.big { font-size: 175%; }\n'
- final += 'span.small { font-size: 50%; }\n'
- final += 'span.under { text-decoration: underline; }\n'
- final += '.breakbefore { page-break-before: always; }\n'
- final += '.breakafter { page-break-after: always; }\n'
- final += '</style>\n'
- final += '</head><body>\n'
- in_tags = []
- def makeText(s):
- s = s.replace('&', '&')
- #s = s.replace('"', '"')
- s = s.replace('<', '<')
- s = s.replace('>', '>')
- s = s.replace('\n', '<br />\n')
- return s
- while True:
- r = self.next()
- if not r:
- break
- text, cmd, attr = r
- if text:
- final += makeText(text)
- if cmd:
- def getTag(ti, end):
- cmd, attr = ti
- r = self.html_tags[cmd][end]
- if type(r) != str:
- r = r(attr)
- return r
- if cmd in self.html_tags:
- pair = (cmd, attr)
- if cmd not in [a for (a,b) in in_tags]:
- final += getTag(pair, False)
- in_tags.append(pair)
- else:
- j = len(in_tags)
- while True:
- j = j - 1
- final += getTag(in_tags[j], True)
- if in_tags[j][0] == cmd:
- break
- del in_tags[j]
- while j < len(in_tags):
- final += getTag(in_tags[j], False)
- j = j + 1
-
- if cmd in self.html_one_tags:
- final += self.html_one_tags[cmd]
- if cmd == 'm':
- unquotedimagepath = "images/" + attr
- imagepath = urllib.quote( unquotedimagepath )
- final += '<img src="%s" alt="">' % imagepath
- if cmd == 'Q':
- final += '<span id="%s"> </span>' % attr
- if cmd == 'C0':
- final += '<!-- ContentsList "%s" -->' % attr
- if cmd == 'C1':
- final += '<!-- ContentsList " %s" -->' % attr
- if cmd == 'C2':
- final += '<!-- ContentsList " %s" -->' % attr
- if cmd == 'C3':
- final += '<!-- ContentsList " %s" -->' % attr
- if cmd == 'C4':
- final += '<!-- ContentsList " %s" -->' % attr
- if cmd == 'a':
- final += self.pml_chars.get(attr, '&#%d;' % attr)
- if cmd == 'U':
- final += '&#%d;' % attr
- final += '</body></html>\n'
- # while True:
- # s = final.replace('<br />\n<br />\n<br />\n', '<br />\n<br />\n')
- # if s == final:
- # break
- # final = s
- s = final.replace('</div><br />','</div>\n')
- final = s
- return final
-
-def convertEreaderToHtml(infile, name, cc, outdir):
- if not os.path.exists(outdir):
- os.makedirs(outdir)
- sect = Sectionizer(infile, 'PNRdPPrs')
- er = EreaderProcessor(sect.loadSection, name, cc)
-
- if er.getNumImages() > 0:
- imagedir = "images/"
- imagedirpath = os.path.join(outdir,imagedir)
- if not os.path.exists(imagedirpath):
- os.makedirs(imagedirpath)
- for i in xrange(er.getNumImages()):
- name, contents = er.getImage(i)
- file(os.path.join(imagedirpath, name), 'wb').write(contents)
-
- pml_string = er.getText()
- pmlfilename = bookname + ".pml"
- file(os.path.join(outdir, pmlfilename),'wb').write(pml_string)
-
- bkinfo = er.getBookInfo()
-
- pml = PmlConverter(pml_string, bkinfo)
- htmlfilename = bookname + ".html"
- file(os.path.join(outdir, htmlfilename),'wb').write(pml.process())
-
- # ts = er.getExpandedTextSizesData()
- # file(os.path.join(outdir, 'xtextsizes.dat'), 'wb').write(ts)
-
- cv = er.getChapterNamePMLOffsetData()
- file(os.path.join(outdir, 'chapters.dat'), 'wb').write(cv)
-
- # lv = er.getLinkNamePMLOffsetData()
- # file(os.path.join(outdir, 'links.dat'), 'wb').write(lv)
-
-
-def main(argv=None):
- global bookname
- if argv is None:
- argv = sys.argv
-
- print "eReader2Html v%s. Copyright (c) 2008 The Dark Reverser" % __version__
-
- if len(argv)!=4 and len(argv)!=5:
- print "Converts DRMed eReader books to PML Source and HTML"
- print "Usage:"
- print " ereader2html infile.pdb [outdir] \"your name\" credit_card_number "
- print "Note:"
- print " if ommitted, outdir defaults based on 'infile.pdb'"
- print " It's enough to enter the last 8 digits of the credit card number"
- else:
- if len(argv)==4:
- infile, name, cc = argv[1], argv[2], argv[3]
- outdir = infile[:-4] + '_Source'
- elif len(argv)==5:
- infile, outdir, name, cc = argv[1], argv[2], argv[3], argv[4]
- bookname = os.path.splitext(os.path.basename(infile))[0]
-
- try:
- print "Processing..."
- import time
- start_time = time.time()
- convertEreaderToHtml(infile, name, cc, outdir)
- end_time = time.time()
- search_time = end_time - start_time
- print 'elapsed time: %.2f seconds' % (search_time, )
- print 'output in %s' % outdir
- print "done"
- except ValueError, e:
- print "Error: %s" % e
-
-if __name__ == "__main__":
- #import cProfile
- #command = """sys.exit(main())"""
- #cProfile.runctx( command, globals(), locals(), filename="cprofile.profile" )
-
- sys.exit(main())
\ No newline at end of file
--- /dev/null
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+
+def load_pycrypto():
+ try :
+ from Crypto.Cipher import DES as _DES
+ except:
+ return None
+
+ class DES(object):
+ def __init__(self, key):
+ if len(key) != 8 :
+ raise Error('DES improper key used')
+ self.key = key
+ self._des = _DES.new(key,_DES.MODE_ECB)
+ def desdecrypt(self, data):
+ return self._des.decrypt(data)
+ def decrypt(self, data):
+ if not data:
+ return ''
+ i = 0
+ result = []
+ while i < len(data):
+ block = data[i:i+8]
+ processed_block = self.desdecrypt(block)
+ result.append(processed_block)
+ i += 8
+ return ''.join(result)
+ return DES
+
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import sys
ECB = 0