__pycache__/
*.pyc
-# C extensions
-*.so
-
# Distribution / packaging
.Python
env/
description = 'Removes DRM from Mobipocket, Kindle/Mobi, Kindle/Topaz and Kindle/Print Replica files. Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
author = 'DiapDealer, SomeUpdates' # The author of this plugin
- version = (0, 4, 1) # The version number of this plugin
- file_types = set(['prc','mobi','azw','azw1','azw4','tpz']) # The file types that this plugin will be applied to
+ version = (0, 4, 2) # The version number of this plugin
+ file_types = set(['prc','mobi','azw','azw1','azw3','azw4','tpz']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
minimum_calibre_version = (0, 7, 55)
if mb.getPrintReplica():
of = self.temporary_file(bookname+'.azw4')
print 'K4MobiDeDRM v%s: Print Replica format detected.' % plug_ver
+ elif mb.getMobiVersion() >= 8:
+ print 'K4MobiDeDRM v%s: Stand-alone KF8 format detected.' % plug_ver
+ of = self.temporary_file(bookname+'.azw3')
else:
of = self.temporary_file(bookname+'.mobi')
mb.getMobiFile(of.name)
--- /dev/null
+#! /usr/bin/env python
+
+"""
+ Routines for doing AES CBC in one file
+
+ Modified by some_updates to extract
+ and combine only those parts needed for AES CBC
+ into one simple to add python file
+
+ Original Version
+ Copyright (c) 2002 by Paul A. Lambert
+ Under:
+ CryptoPy Artisitic License Version 1.0
+ See the wonderful pure python package cryptopy-1.2.5
+ and read its LICENSE.txt for complete license details.
+"""
+
+class CryptoError(Exception):
+ """ Base class for crypto exceptions """
+ def __init__(self,errorMessage='Error!'):
+ self.message = errorMessage
+ def __str__(self):
+ return self.message
+
+class InitCryptoError(CryptoError):
+ """ Crypto errors during algorithm initialization """
+class BadKeySizeError(InitCryptoError):
+ """ Bad key size error """
+class EncryptError(CryptoError):
+ """ Error in encryption processing """
+class DecryptError(CryptoError):
+ """ Error in decryption processing """
+class DecryptNotBlockAlignedError(DecryptError):
+ """ Error in decryption processing """
+
+def xorS(a,b):
+ """ XOR two strings """
+ assert len(a)==len(b)
+ x = []
+ for i in range(len(a)):
+ x.append( chr(ord(a[i])^ord(b[i])))
+ return ''.join(x)
+
+def xor(a,b):
+ """ XOR two strings """
+ x = []
+ for i in range(min(len(a),len(b))):
+ x.append( chr(ord(a[i])^ord(b[i])))
+ return ''.join(x)
+
+"""
+ Base 'BlockCipher' and Pad classes for cipher instances.
+ BlockCipher supports automatic padding and type conversion. The BlockCipher
+ class was written to make the actual algorithm code more readable and
+ not for performance.
+"""
+
+class BlockCipher:
+ """ Block ciphers """
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.resetEncrypt()
+ self.resetDecrypt()
+ def resetEncrypt(self):
+ self.encryptBlockCount = 0
+ self.bytesToEncrypt = ''
+ def resetDecrypt(self):
+ self.decryptBlockCount = 0
+ self.bytesToDecrypt = ''
+
+ def encrypt(self, plainText, more = None):
+ """ Encrypt a string and return a binary string """
+ self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt
+ numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize)
+ cipherText = ''
+ for i in range(numBlocks):
+ bStart = i*self.blockSize
+ ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize])
+ self.encryptBlockCount += 1
+ cipherText += ctBlock
+ if numExtraBytes > 0: # save any bytes that are not block aligned
+ self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
+ else:
+ self.bytesToEncrypt = ''
+
+ if more == None: # no more data expected from caller
+ finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize)
+ if len(finalBytes) > 0:
+ ctBlock = self.encryptBlock(finalBytes)
+ self.encryptBlockCount += 1
+ cipherText += ctBlock
+ self.resetEncrypt()
+ return cipherText
+
+ def decrypt(self, cipherText, more = None):
+ """ Decrypt a string and return a string """
+ self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt
+
+ numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize)
+ if more == None: # no more calls to decrypt, should have all the data
+ if numExtraBytes != 0:
+ raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt'
+
+ # hold back some bytes in case last decrypt has zero len
+ if (more != None) and (numExtraBytes == 0) and (numBlocks >0) :
+ numBlocks -= 1
+ numExtraBytes = self.blockSize
+
+ plainText = ''
+ for i in range(numBlocks):
+ bStart = i*self.blockSize
+ ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize])
+ self.decryptBlockCount += 1
+ plainText += ptBlock
+
+ if numExtraBytes > 0: # save any bytes that are not block aligned
+ self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
+ else:
+ self.bytesToEncrypt = ''
+
+ if more == None: # last decrypt remove padding
+ plainText = self.padding.removePad(plainText, self.blockSize)
+ self.resetDecrypt()
+ return plainText
+
+
+class Pad:
+ def __init__(self):
+ pass # eventually could put in calculation of min and max size extension
+
+class padWithPadLen(Pad):
+ """ Pad a binary string with the length of the padding """
+
+ def addPad(self, extraBytes, blockSize):
+ """ Add padding to a binary string to make it an even multiple
+ of the block size """
+ blocks, numExtraBytes = divmod(len(extraBytes), blockSize)
+ padLength = blockSize - numExtraBytes
+ return extraBytes + padLength*chr(padLength)
+
+ def removePad(self, paddedBinaryString, blockSize):
+ """ Remove padding from a binary string """
+ if not(0<len(paddedBinaryString)):
+ raise DecryptNotBlockAlignedError, 'Expected More Data'
+ return paddedBinaryString[:-ord(paddedBinaryString[-1])]
+
+class noPadding(Pad):
+ """ No padding. Use this to get ECB behavior from encrypt/decrypt """
+
+ def addPad(self, extraBytes, blockSize):
+ """ Add no padding """
+ return extraBytes
+
+ def removePad(self, paddedBinaryString, blockSize):
+ """ Remove no padding """
+ return paddedBinaryString
+
+"""
+ Rijndael encryption algorithm
+ This byte oriented implementation is intended to closely
+ match FIPS specification for readability. It is not implemented
+ for performance.
+"""
+
+class Rijndael(BlockCipher):
+ """ Rijndael encryption algorithm """
+ def __init__(self, key = None, padding = padWithPadLen(), keySize=16, blockSize=16 ):
+ self.name = 'RIJNDAEL'
+ self.keySize = keySize
+ self.strength = keySize*8
+ self.blockSize = blockSize # blockSize is in bytes
+ self.padding = padding # change default to noPadding() to get normal ECB behavior
+
+ assert( keySize%4==0 and NrTable[4].has_key(keySize/4)),'key size must be 16,20,24,29 or 32 bytes'
+ assert( blockSize%4==0 and NrTable.has_key(blockSize/4)), 'block size must be 16,20,24,29 or 32 bytes'
+
+ self.Nb = self.blockSize/4 # Nb is number of columns of 32 bit words
+ self.Nk = keySize/4 # Nk is the key length in 32-bit words
+ self.Nr = NrTable[self.Nb][self.Nk] # The number of rounds (Nr) is a function of
+ # the block (Nb) and key (Nk) sizes.
+ if key != None:
+ self.setKey(key)
+
+ def setKey(self, key):
+ """ Set a key and generate the expanded key """
+ assert( len(key) == (self.Nk*4) ), 'Key length must be same as keySize parameter'
+ self.__expandedKey = keyExpansion(self, key)
+ self.reset() # BlockCipher.reset()
+
+ def encryptBlock(self, plainTextBlock):
+ """ Encrypt a block, plainTextBlock must be a array of bytes [Nb by 4] """
+ self.state = self._toBlock(plainTextBlock)
+ AddRoundKey(self, self.__expandedKey[0:self.Nb])
+ for round in range(1,self.Nr): #for round = 1 step 1 to Nr
+ SubBytes(self)
+ ShiftRows(self)
+ MixColumns(self)
+ AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
+ SubBytes(self)
+ ShiftRows(self)
+ AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
+ return self._toBString(self.state)
+
+
+ def decryptBlock(self, encryptedBlock):
+ """ decrypt a block (array of bytes) """
+ self.state = self._toBlock(encryptedBlock)
+ AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
+ for round in range(self.Nr-1,0,-1):
+ InvShiftRows(self)
+ InvSubBytes(self)
+ AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
+ InvMixColumns(self)
+ InvShiftRows(self)
+ InvSubBytes(self)
+ AddRoundKey(self, self.__expandedKey[0:self.Nb])
+ return self._toBString(self.state)
+
+ def _toBlock(self, bs):
+ """ Convert binary string to array of bytes, state[col][row]"""
+ assert ( len(bs) == 4*self.Nb ), 'Rijndarl blocks must be of size blockSize'
+ return [[ord(bs[4*i]),ord(bs[4*i+1]),ord(bs[4*i+2]),ord(bs[4*i+3])] for i in range(self.Nb)]
+
+ def _toBString(self, block):
+ """ Convert block (array of bytes) to binary string """
+ l = []
+ for col in block:
+ for rowElement in col:
+ l.append(chr(rowElement))
+ return ''.join(l)
+#-------------------------------------
+""" Number of rounds Nr = NrTable[Nb][Nk]
+
+ Nb Nk=4 Nk=5 Nk=6 Nk=7 Nk=8
+ ------------------------------------- """
+NrTable = {4: {4:10, 5:11, 6:12, 7:13, 8:14},
+ 5: {4:11, 5:11, 6:12, 7:13, 8:14},
+ 6: {4:12, 5:12, 6:12, 7:13, 8:14},
+ 7: {4:13, 5:13, 6:13, 7:13, 8:14},
+ 8: {4:14, 5:14, 6:14, 7:14, 8:14}}
+#-------------------------------------
+def keyExpansion(algInstance, keyString):
+ """ Expand a string of size keySize into a larger array """
+ Nk, Nb, Nr = algInstance.Nk, algInstance.Nb, algInstance.Nr # for readability
+ key = [ord(byte) for byte in keyString] # convert string to list
+ w = [[key[4*i],key[4*i+1],key[4*i+2],key[4*i+3]] for i in range(Nk)]
+ for i in range(Nk,Nb*(Nr+1)):
+ temp = w[i-1] # a four byte column
+ if (i%Nk) == 0 :
+ temp = temp[1:]+[temp[0]] # RotWord(temp)
+ temp = [ Sbox[byte] for byte in temp ]
+ temp[0] ^= Rcon[i/Nk]
+ elif Nk > 6 and i%Nk == 4 :
+ temp = [ Sbox[byte] for byte in temp ] # SubWord(temp)
+ w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] )
+ return w
+
+Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!!
+ 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,
+ 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91)
+
+#-------------------------------------
+def AddRoundKey(algInstance, keyBlock):
+ """ XOR the algorithm state with a block of key material """
+ for column in range(algInstance.Nb):
+ for row in range(4):
+ algInstance.state[column][row] ^= keyBlock[column][row]
+#-------------------------------------
+
+def SubBytes(algInstance):
+ for column in range(algInstance.Nb):
+ for row in range(4):
+ algInstance.state[column][row] = Sbox[algInstance.state[column][row]]
+
+def InvSubBytes(algInstance):
+ for column in range(algInstance.Nb):
+ for row in range(4):
+ algInstance.state[column][row] = InvSbox[algInstance.state[column][row]]
+
+Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,
+ 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
+ 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,
+ 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
+ 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,
+ 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
+ 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,
+ 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
+ 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,
+ 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
+ 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,
+ 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
+ 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,
+ 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
+ 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,
+ 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
+ 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,
+ 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
+ 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,
+ 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
+ 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,
+ 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
+ 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,
+ 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
+ 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,
+ 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
+ 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,
+ 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
+ 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,
+ 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
+ 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,
+ 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16)
+
+InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
+ 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
+ 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
+ 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
+ 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
+ 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
+ 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
+ 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
+ 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
+ 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
+ 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
+ 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
+ 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
+ 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
+ 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
+ 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
+ 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
+ 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
+ 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
+ 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
+ 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
+ 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
+ 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
+ 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
+ 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
+ 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
+ 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
+ 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
+ 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
+ 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
+ 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
+ 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d)
+
+#-------------------------------------
+""" For each block size (Nb), the ShiftRow operation shifts row i
+ by the amount Ci. Note that row 0 is not shifted.
+ Nb C1 C2 C3
+ ------------------- """
+shiftOffset = { 4 : ( 0, 1, 2, 3),
+ 5 : ( 0, 1, 2, 3),
+ 6 : ( 0, 1, 2, 3),
+ 7 : ( 0, 1, 2, 4),
+ 8 : ( 0, 1, 3, 4) }
+def ShiftRows(algInstance):
+ tmp = [0]*algInstance.Nb # list of size Nb
+ for r in range(1,4): # row 0 reamains unchanged and can be skipped
+ for c in range(algInstance.Nb):
+ tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
+ for c in range(algInstance.Nb):
+ algInstance.state[c][r] = tmp[c]
+def InvShiftRows(algInstance):
+ tmp = [0]*algInstance.Nb # list of size Nb
+ for r in range(1,4): # row 0 reamains unchanged and can be skipped
+ for c in range(algInstance.Nb):
+ tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
+ for c in range(algInstance.Nb):
+ algInstance.state[c][r] = tmp[c]
+#-------------------------------------
+def MixColumns(a):
+ Sprime = [0,0,0,0]
+ for j in range(a.Nb): # for each column
+ Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3])
+ Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3])
+ Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3])
+ Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3])
+ for i in range(4):
+ a.state[j][i] = Sprime[i]
+
+def InvMixColumns(a):
+ """ Mix the four bytes of every column in a linear way
+ This is the opposite operation of Mixcolumn """
+ Sprime = [0,0,0,0]
+ for j in range(a.Nb): # for each column
+ Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3])
+ Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3])
+ Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3])
+ Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3])
+ for i in range(4):
+ a.state[j][i] = Sprime[i]
+
+#-------------------------------------
+def mul(a, b):
+ """ Multiply two elements of GF(2^m)
+ needed for MixColumn and InvMixColumn """
+ if (a !=0 and b!=0):
+ return Alogtable[(Logtable[a] + Logtable[b])%255]
+ else:
+ return 0
+
+Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3,
+ 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193,
+ 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120,
+ 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142,
+ 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56,
+ 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16,
+ 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186,
+ 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87,
+ 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232,
+ 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160,
+ 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183,
+ 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157,
+ 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209,
+ 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171,
+ 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165,
+ 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7)
+
+Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53,
+ 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170,
+ 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49,
+ 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205,
+ 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136,
+ 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154,
+ 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163,
+ 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160,
+ 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65,
+ 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117,
+ 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128,
+ 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84,
+ 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202,
+ 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14,
+ 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23,
+ 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1)
+
+
+
+
+"""
+ AES Encryption Algorithm
+ The AES algorithm is just Rijndael algorithm restricted to the default
+ blockSize of 128 bits.
+"""
+
+class AES(Rijndael):
+ """ The AES algorithm is the Rijndael block cipher restricted to block
+ sizes of 128 bits and key sizes of 128, 192 or 256 bits
+ """
+ def __init__(self, key = None, padding = padWithPadLen(), keySize=16):
+ """ Initialize AES, keySize is in bytes """
+ if not (keySize == 16 or keySize == 24 or keySize == 32) :
+ raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes'
+
+ Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 )
+
+ self.name = 'AES'
+
+
+"""
+ CBC mode of encryption for block ciphers.
+ This algorithm mode wraps any BlockCipher to make a
+ Cipher Block Chaining mode.
+"""
+from random import Random # should change to crypto.random!!!
+
+
+class CBC(BlockCipher):
+ """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode
+ algorithms. The initialization (IV) is automatic if set to None. Padding
+ is also automatic based on the Pad class used to initialize the algorithm
+ """
+ def __init__(self, blockCipherInstance, padding = padWithPadLen()):
+ """ CBC algorithms are created by initializing with a BlockCipher instance """
+ self.baseCipher = blockCipherInstance
+ self.name = self.baseCipher.name + '_CBC'
+ self.blockSize = self.baseCipher.blockSize
+ self.keySize = self.baseCipher.keySize
+ self.padding = padding
+ self.baseCipher.padding = noPadding() # baseCipher should NOT pad!!
+ self.r = Random() # for IV generation, currently uses
+ # mediocre standard distro version <----------------
+ import time
+ newSeed = time.ctime()+str(self.r) # seed with instance location
+ self.r.seed(newSeed) # to make unique
+ self.reset()
+
+ def setKey(self, key):
+ self.baseCipher.setKey(key)
+
+ # Overload to reset both CBC state and the wrapped baseCipher
+ def resetEncrypt(self):
+ BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class)
+ self.baseCipher.resetEncrypt() # reset base cipher encrypt state
+
+ def resetDecrypt(self):
+ BlockCipher.resetDecrypt(self) # reset CBC state (super class)
+ self.baseCipher.resetDecrypt() # reset base cipher decrypt state
+
+ def encrypt(self, plainText, iv=None, more=None):
+ """ CBC encryption - overloads baseCipher to allow optional explicit IV
+ when iv=None, iv is auto generated!
+ """
+ if self.encryptBlockCount == 0:
+ self.iv = iv
+ else:
+ assert(iv==None), 'IV used only on first call to encrypt'
+
+ return BlockCipher.encrypt(self,plainText, more=more)
+
+ def decrypt(self, cipherText, iv=None, more=None):
+ """ CBC decryption - overloads baseCipher to allow optional explicit IV
+ when iv=None, iv is auto generated!
+ """
+ if self.decryptBlockCount == 0:
+ self.iv = iv
+ else:
+ assert(iv==None), 'IV used only on first call to decrypt'
+
+ return BlockCipher.decrypt(self, cipherText, more=more)
+
+ def encryptBlock(self, plainTextBlock):
+ """ CBC block encryption, IV is set with 'encrypt' """
+ auto_IV = ''
+ if self.encryptBlockCount == 0:
+ if self.iv == None:
+ # generate IV and use
+ self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)])
+ self.prior_encr_CT_block = self.iv
+ auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic
+ else: # application provided IV
+ assert(len(self.iv) == self.blockSize ),'IV must be same length as block'
+ self.prior_encr_CT_block = self.iv
+ """ encrypt the prior CT XORed with the PT """
+ ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) )
+ self.prior_encr_CT_block = ct
+ return auto_IV+ct
+
+ def decryptBlock(self, encryptedBlock):
+ """ Decrypt a single block """
+
+ if self.decryptBlockCount == 0: # first call, process IV
+ if self.iv == None: # auto decrypt IV?
+ self.prior_CT_block = encryptedBlock
+ return ''
+ else:
+ assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption"
+ self.prior_CT_block = self.iv
+
+ dct = self.baseCipher.decryptBlock(encryptedBlock)
+ """ XOR the prior decrypted CT with the prior CT """
+ dct_XOR_priorCT = xor( self.prior_CT_block, dct )
+
+ self.prior_CT_block = encryptedBlock
+
+ return dct_XOR_priorCT
+
+
+"""
+ AES_CBC Encryption Algorithm
+"""
+
+class AES_CBC(CBC):
+ """ AES encryption in CBC feedback mode """
+ def __init__(self, key=None, padding=padWithPadLen(), keySize=16):
+ CBC.__init__( self, AES(key, noPadding(), keySize), padding)
+ self.name = 'AES_CBC'
--- /dev/null
+#! /usr/bin/env python
+
+import sys, os
+import hmac
+from struct import pack
+import hashlib
+
+
+# interface to needed routines libalfcrypto
+def _load_libalfcrypto():
+ import ctypes
+ from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, addressof, string_at, cast, sizeof
+
+ pointer_size = ctypes.sizeof(ctypes.c_voidp)
+ name_of_lib = None
+ if sys.platform.startswith('darwin'):
+ name_of_lib = 'libalfcrypto.dylib'
+ elif sys.platform.startswith('win'):
+ if pointer_size == 4:
+ name_of_lib = 'alfcrypto.dll'
+ else:
+ name_of_lib = 'alfcrypto64.dll'
+ else:
+ if pointer_size == 4:
+ name_of_lib = 'libalfcrypto32.so'
+ else:
+ name_of_lib = 'libalfcrypto64.so'
+
+ libalfcrypto = sys.path[0] + os.sep + name_of_lib
+
+ if not os.path.isfile(libalfcrypto):
+ raise Exception('libalfcrypto not found')
+
+ libalfcrypto = CDLL(libalfcrypto)
+
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+
+
+ def F(restype, name, argtypes):
+ func = getattr(libalfcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ # aes cbc decryption
+ #
+ # struct aes_key_st {
+ # unsigned long rd_key[4 *(AES_MAXNR + 1)];
+ # int rounds;
+ # };
+ #
+ # typedef struct aes_key_st AES_KEY;
+ #
+ # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+ #
+ #
+ # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ # const unsigned long length, const AES_KEY *key,
+ # unsigned char *ivec, const int enc);
+
+ AES_MAXNR = 14
+
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
+
+ AES_KEY_p = POINTER(AES_KEY)
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int])
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
+
+
+
+ # Pukall 1 Cipher
+ # unsigned char *PC1(const unsigned char *key, unsigned int klen, const unsigned char *src,
+ # unsigned char *dest, unsigned int len, int decryption);
+
+ PC1 = F(c_char_p, 'PC1', [c_char_p, c_ulong, c_char_p, c_char_p, c_ulong, c_ulong])
+
+ # Topaz Encryption
+ # typedef struct _TpzCtx {
+ # unsigned int v[2];
+ # } TpzCtx;
+ #
+ # void topazCryptoInit(TpzCtx *ctx, const unsigned char *key, int klen);
+ # void topazCryptoDecrypt(const TpzCtx *ctx, const unsigned char *in, unsigned char *out, int len);
+
+ class TPZ_CTX(Structure):
+ _fields_ = [('v', c_long * 2)]
+
+ TPZ_CTX_p = POINTER(TPZ_CTX)
+ topazCryptoInit = F(None, 'topazCryptoInit', [TPZ_CTX_p, c_char_p, c_ulong])
+ topazCryptoDecrypt = F(None, 'topazCryptoDecrypt', [TPZ_CTX_p, c_char_p, c_char_p, c_ulong])
+
+
+ class AES_CBC(object):
+ def __init__(self):
+ self._blocksize = 0
+ self._keyctx = None
+ self._iv = 0
+
+ def set_decrypt_key(self, userkey, iv):
+ self._blocksize = len(userkey)
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise Exception('AES CBC improper key used')
+ return
+ keyctx = self._keyctx = AES_KEY()
+ self._iv = iv
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ if rv < 0:
+ raise Exception('Failed to initialize AES CBC key')
+
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ mutable_iv = create_string_buffer(self._iv, len(self._iv))
+ rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, mutable_iv, 0)
+ if rv == 0:
+ raise Exception('AES CBC decryption failed')
+ return out.raw
+
+ class Pukall_Cipher(object):
+ def __init__(self):
+ self.key = None
+
+ def PC1(self, key, src, decryption=True):
+ self.key = key
+ out = create_string_buffer(len(src))
+ de = 0
+ if decryption:
+ de = 1
+ rv = PC1(key, len(key), src, out, len(src), de)
+ return out.raw
+
+ class Topaz_Cipher(object):
+ def __init__(self):
+ self._ctx = None
+
+ def ctx_init(self, key):
+ tpz_ctx = self._ctx = TPZ_CTX()
+ topazCryptoInit(tpz_ctx, key, len(key))
+ return tpz_ctx
+
+ def decrypt(self, data, ctx=None):
+ if ctx == None:
+ ctx = self._ctx
+ out = create_string_buffer(len(data))
+ topazCryptoDecrypt(ctx, data, out, len(data))
+ return out.raw
+
+ print "Using Library AlfCrypto DLL/DYLIB/SO"
+ return (AES_CBC, Pukall_Cipher, Topaz_Cipher)
+
+
+def _load_python_alfcrypto():
+
+ import aescbc
+
+ class Pukall_Cipher(object):
+ def __init__(self):
+ self.key = None
+
+ def PC1(self, key, src, decryption=True):
+ sum1 = 0;
+ sum2 = 0;
+ keyXorVal = 0;
+ if len(key)!=16:
+ print "Bad key length!"
+ return None
+ wkey = []
+ for i in xrange(8):
+ wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
+ dst = ""
+ for i in xrange(len(src)):
+ temp1 = 0;
+ byteXorVal = 0;
+ for j in xrange(8):
+ temp1 ^= wkey[j]
+ sum2 = (sum2+j)*20021 + sum1
+ sum1 = (temp1*346)&0xFFFF
+ sum2 = (sum2+sum1)&0xFFFF
+ temp1 = (temp1*20021+1)&0xFFFF
+ byteXorVal ^= temp1 ^ sum2
+ curByte = ord(src[i])
+ if not decryption:
+ keyXorVal = curByte * 257;
+ curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
+ if decryption:
+ keyXorVal = curByte * 257;
+ for j in xrange(8):
+ wkey[j] ^= keyXorVal;
+ dst+=chr(curByte)
+ return dst
+
+ class Topaz_Cipher(object):
+ def __init__(self):
+ self._ctx = None
+
+ def ctx_init(self, key):
+ ctx1 = 0x0CAFFE19E
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ self._ctx = [ctx1, ctx2]
+ return [ctx1,ctx2]
+
+ def decrypt(self, data, ctx=None):
+ if ctx == None:
+ ctx = self._ctx
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+ plainText = ""
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+ return plainText
+
+ class AES_CBC(object):
+ def __init__(self):
+ self._key = None
+ self._iv = None
+ self.aes = None
+
+ def set_decrypt_key(self, userkey, iv):
+ self._key = userkey
+ self._iv = iv
+ self.aes = aescbc.AES_CBC(userkey, aescbc.noPadding(), len(userkey))
+
+ def decrypt(self, data):
+ iv = self._iv
+ cleartext = self.aes.decrypt(iv + data)
+ return cleartext
+
+ return (AES_CBC, Pukall_Cipher, Topaz_Cipher)
+
+
+def _load_crypto():
+ AES_CBC = Pukall_Cipher = Topaz_Cipher = None
+ cryptolist = (_load_libalfcrypto, _load_python_alfcrypto)
+ for loader in cryptolist:
+ try:
+ AES_CBC, Pukall_Cipher, Topaz_Cipher = loader()
+ break
+ except (ImportError, Exception):
+ pass
+ return AES_CBC, Pukall_Cipher, Topaz_Cipher
+
+AES_CBC, Pukall_Cipher, Topaz_Cipher = _load_crypto()
+
+
+class KeyIVGen(object):
+ # this only exists in openssl so we will use pure python implementation instead
+ # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
+ # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
+ def pbkdf2(self, passwd, salt, iter, keylen):
+
+ def xorstr( a, b ):
+ if len(a) != len(b):
+ raise Exception("xorstr(): lengths differ")
+ return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b)))
+
+ def prf( h, data ):
+ hm = h.copy()
+ hm.update( data )
+ return hm.digest()
+
+ def pbkdf2_F( h, salt, itercount, blocknum ):
+ U = prf( h, salt + pack('>i',blocknum ) )
+ T = U
+ for i in range(2, itercount+1):
+ U = prf( h, U )
+ T = xorstr( T, U )
+ return T
+
+ sha = hashlib.sha1
+ digest_size = sha().digest_size
+ # l - number of output blocks to produce
+ l = keylen / digest_size
+ if keylen % digest_size != 0:
+ l += 1
+ h = hmac.new( passwd, None, sha )
+ T = ""
+ for i in range(1, l+1):
+ T += pbkdf2_F( h, salt, iter, i )
+ return T[0: keylen]
+
+
--- /dev/null
+#! /usr/bin/python
+
+"""
+
+Comprehensive Mazama Book DRM with Topaz Cryptography V2.2
+
+-----BEGIN PUBLIC KEY-----
+MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdBHJ4CNc6DNFCw4MRCw4SWAK6
+M8hYfnNEI0yQmn5Ti+W8biT7EatpauE/5jgQMPBmdNrDr1hbHyHBSP7xeC2qlRWC
+B62UCxeu/fpfnvNHDN/wPWWH4jynZ2M6cdcnE5LQ+FfeKqZn7gnG2No1U9h7oOHx
+y2/pHuYme7U1TsgSjwIDAQAB
+-----END PUBLIC KEY-----
+
+"""
+
+from __future__ import with_statement
+
+import csv
+import sys
+import os
+import getopt
+import zlib
+from struct import pack
+from struct import unpack
+from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast
+import _winreg as winreg
+import Tkinter
+import Tkconstants
+import tkMessageBox
+import traceback
+import hashlib
+
+MAX_PATH = 255
+
+kernel32 = windll.kernel32
+advapi32 = windll.advapi32
+crypt32 = windll.crypt32
+
+global kindleDatabase
+global bookFile
+global bookPayloadOffset
+global bookHeaderRecords
+global bookMetadata
+global bookKey
+global command
+
+#
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+#
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+#
+# Exceptions for all the problems that might happen during the script
+#
+
+class CMBDTCError(Exception):
+ pass
+
+class CMBDTCFatal(Exception):
+ pass
+
+#
+# Stolen stuff
+#
+
+class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+DataBlob_p = POINTER(DataBlob)
+
+def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+GetSystemDirectory = GetSystemDirectory()
+
+
+def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path):
+ vsn = c_uint(0)
+ GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
+ return vsn.value
+ return GetVolumeSerialNumber
+GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+
+def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(32)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+GetUserName = GetUserName()
+
+
+def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, 0, byref(outdata)):
+ raise CMBDTCFatal("Failed to Unprotect Data")
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+CryptUnprotectData = CryptUnprotectData()
+
+#
+# Returns the MD5 digest of "message"
+#
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+#
+# Returns the MD5 digest of "message"
+#
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+#
+# Open the book file at path
+#
+
+def openBook(path):
+ try:
+ return open(path,'rb')
+ except:
+ raise CMBDTCFatal("Could not open book file: " + path)
+#
+# Encode the bytes in data with the characters in map
+#
+
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+#
+# Hash the bytes in data and then encode the digest with the characters in map
+#
+
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+#
+# Decode the string in data with the characters in map. Returns the decoded bytes
+#
+
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data),2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ value = (((high * 0x40) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+#
+# Locate and open the Kindle.info file (Hopefully in the way it is done in the Kindle application)
+#
+
+def openKindleInfo():
+ regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
+ path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r')
+
+#
+# Parse the Kindle.info file and return the records as a list of key-values
+#
+
+def parseKindleInfo():
+ DB = {}
+ infoReader = openKindleInfo()
+ infoReader.read(1)
+ data = infoReader.read()
+ items = data.split('{')
+
+ for item in items:
+ splito = item.split(':')
+ DB[splito[0]] =splito[1]
+ return DB
+
+#
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string. (Totally not optimal)
+#
+
+def findNameForHash(hash):
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+ result = ""
+ for name in names:
+ if hash == encodeHash(name, charMap2):
+ result = name
+ break
+ return name
+
+#
+# Print all the records from the kindle.info file (option -i)
+#
+
+def printKindleInfo():
+ for record in kindleDatabase:
+ name = findNameForHash(record)
+ if name != "" :
+ print (name)
+ print ("--------------------------\n")
+ else :
+ print ("Unknown Record")
+ print getKindleInfoValueForHash(record)
+ print "\n"
+#
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+#
+
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ return CryptUnprotectData(encryptedValue,"")
+
+#
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+#
+
+def getKindleInfoValueForKey(key):
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+#
+# Get a 7 bit encoded number from the book file
+#
+
+def bookReadEncodedNumber():
+ flag = False
+ data = ord(bookFile.read(1))
+
+ if data == 0xFF:
+ flag = True
+ data = ord(bookFile.read(1))
+
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(bookFile.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+
+ if flag:
+ data = -data
+ return data
+
+#
+# Encode a number in 7 bit format
+#
+
+def encodeNumber(number):
+ result = ""
+ negative = False
+ flag = 0
+
+ if number < 0 :
+ number = -number + 1
+ negative = True
+
+ while True:
+ byte = number & 0x7F
+ number = number >> 7
+ byte += flag
+ result += chr(byte)
+ flag = 0x80
+ if number == 0 :
+ if (byte == 0xFF and negative == False) :
+ result += chr(0x80)
+ break
+
+ if negative:
+ result += chr(0xFF)
+
+ return result[::-1]
+
+#
+# Get a length prefixed string from the file
+#
+
+def bookReadString():
+ stringLength = bookReadEncodedNumber()
+ return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0]
+
+#
+# Returns a length prefixed string
+#
+
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+
+#
+# Read and return the data of one header record at the current book file position [[offset,compressedLength,decompressedLength],...]
+#
+
+def bookReadHeaderRecordData():
+ nbValues = bookReadEncodedNumber()
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()])
+ return values
+
+#
+# Read and parse one header record at the current book file position and return the associated data [[offset,compressedLength,decompressedLength],...]
+#
+
+def parseTopazHeaderRecord():
+ if ord(bookFile.read(1)) != 0x63:
+ raise CMBDTCFatal("Parse Error : Invalid Header")
+
+ tag = bookReadString()
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+
+#
+# Parse the header of a Topaz file, get all the header records and the offset for the payload
+#
+
+def parseTopazHeader():
+ global bookHeaderRecords
+ global bookPayloadOffset
+ magic = unpack("4s",bookFile.read(4))[0]
+
+ if magic != 'TPZ0':
+ raise CMBDTCFatal("Parse Error : Invalid Header, not a Topaz file")
+
+ nbRecords = bookReadEncodedNumber()
+ bookHeaderRecords = {}
+
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ bookHeaderRecords[result[0]] = result[1]
+
+ if ord(bookFile.read(1)) != 0x64 :
+ raise CMBDTCFatal("Parse Error : Invalid Header")
+
+ bookPayloadOffset = bookFile.tell()
+
+#
+# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed
+#
+
+def getBookPayloadRecord(name, index):
+ encrypted = False
+
+ try:
+ recordOffset = bookHeaderRecords[name][index][0]
+ except:
+ raise CMBDTCFatal("Parse Error : Invalid Record, record not found")
+
+ bookFile.seek(bookPayloadOffset + recordOffset)
+
+ tag = bookReadString()
+ if tag != name :
+ raise CMBDTCFatal("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber()
+
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match")
+
+ if bookHeaderRecords[name][index][2] != 0 :
+ record = bookFile.read(bookHeaderRecords[name][index][2])
+ else:
+ record = bookFile.read(bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ ctx = topazCryptoInit(bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+
+ return record
+
+#
+# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename"
+#
+
+def extractBookPayloadRecord(name, index, filename):
+ compressed = False
+
+ try:
+ compressed = bookHeaderRecords[name][index][2] != 0
+ record = getBookPayloadRecord(name,index)
+ except:
+ print("Could not find record")
+
+ if compressed:
+ try:
+ record = zlib.decompress(record)
+ except:
+ raise CMBDTCFatal("Could not decompress record")
+
+ if filename != "":
+ try:
+ file = open(filename,"wb")
+ file.write(record)
+ file.close()
+ except:
+ raise CMBDTCFatal("Could not write to destination file")
+ else:
+ print(record)
+
+#
+# return next record [key,value] from the book metadata from the current book position
+#
+
+def readMetadataRecord():
+ return [bookReadString(),bookReadString()]
+
+#
+# Parse the metadata record from the book payload and return a list of [key,values]
+#
+
+def parseMetadata():
+ global bookHeaderRecords
+ global bookPayloadAddress
+ global bookMetadata
+ bookMetadata = {}
+ bookFile.seek(bookPayloadOffset + bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString()
+ if tag != "metadata" :
+ raise CMBDTCFatal("Parse Error : Record Names Don't Match")
+
+ flags = ord(bookFile.read(1))
+ nbRecords = ord(bookFile.read(1))
+
+ for i in range (0,nbRecords) :
+ record =readMetadataRecord()
+ bookMetadata[record[0]] = record[1]
+
+#
+# Returns two bit at offset from a bit field
+#
+
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+#
+# Returns the six bits at offset from a bit field
+#
+
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+#
+# 8 bits to six bits encoding from hash to generate PID string
+#
+
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+#
+# Context initialisation for the Topaz Crypto
+#
+
+def topazCryptoInit(key):
+ ctx1 = 0x0CAFFE19E
+
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ return [ctx1,ctx2]
+
+#
+# decrypt data with the context prepared by topazCryptoInit()
+#
+
+def topazCryptoDecrypt(data, ctx):
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+
+ plainText = ""
+
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+
+ return plainText
+
+#
+# Decrypt a payload record with the PID
+#
+
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+#
+# Try to decrypt a dkey record (contains the book PID)
+#
+
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise CMBDTCError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise CMBDTCError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise CMBDTCError("Record didn't contain PID")
+
+ return fields[4]
+
+#
+# Decrypt all the book's dkey records (contain the book PID)
+#
+
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except CMBDTCError:
+ pass
+ data = data[1+length:]
+
+ return records
+
+#
+# Encryption table used to generate the device PID
+#
+
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+#
+# Seed value used to generate the device PID
+#
+
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+#
+# Generate the device PID
+#
+
+def generateDevicePID(table,dsn,nbRoll):
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+#
+# Create decrypted book payload
+#
+
+def createDecryptedPayload(payload):
+
+ # store data to be able to create the header later
+ headerData= []
+ currentOffset = 0
+
+ # Add social DRM to decrypted files
+
+ try:
+ data = getKindleInfoValueForKey("kindle.name.info")+":"+ getKindleInfoValueForKey("login")
+ if payload!= None:
+ payload.write(lengthPrefixString("sdrm"))
+ payload.write(encodeNumber(0))
+ payload.write(data)
+ else:
+ currentOffset += len(lengthPrefixString("sdrm"))
+ currentOffset += len(encodeNumber(0))
+ currentOffset += len(data)
+ except:
+ pass
+
+ for headerRecord in bookHeaderRecords:
+ name = headerRecord
+ newRecord = []
+
+ if name != "dkey" :
+
+ for index in range (0,len(bookHeaderRecords[name])) :
+ offset = currentOffset
+
+ if payload != None:
+ # write tag
+ payload.write(lengthPrefixString(name))
+ # write data
+ payload.write(encodeNumber(index))
+ payload.write(getBookPayloadRecord(name, index))
+
+ else :
+ currentOffset += len(lengthPrefixString(name))
+ currentOffset += len(encodeNumber(index))
+ currentOffset += len(getBookPayloadRecord(name, index))
+ newRecord.append([offset,bookHeaderRecords[name][index][1],bookHeaderRecords[name][index][2]])
+
+ headerData.append([name,newRecord])
+
+
+
+ return headerData
+
+#
+# Create decrypted book
+#
+
+def createDecryptedBook(outputFile):
+ outputFile = open(outputFile,"wb")
+ # Write the payload in a temporary file
+ headerData = createDecryptedPayload(None)
+ outputFile.write("TPZ0")
+ outputFile.write(encodeNumber(len(headerData)))
+
+ for header in headerData :
+ outputFile.write(chr(0x63))
+ outputFile.write(lengthPrefixString(header[0]))
+ outputFile.write(encodeNumber(len(header[1])))
+ for numbers in header[1] :
+ outputFile.write(encodeNumber(numbers[0]))
+ outputFile.write(encodeNumber(numbers[1]))
+ outputFile.write(encodeNumber(numbers[2]))
+
+ outputFile.write(chr(0x64))
+ createDecryptedPayload(outputFile)
+ outputFile.close()
+
+#
+# Set the command to execute by the programm according to cmdLine parameters
+#
+
+def setCommand(name) :
+ global command
+ if command != "" :
+ raise CMBDTCFatal("Invalid command line parameters")
+ else :
+ command = name
+
+#
+# Program usage
+#
+
+def usage():
+ print("\nUsage:")
+ print("\nCMBDTC.py [options] bookFileName\n")
+ print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)")
+ print("-d Saves a decrypted copy of the book")
+ print("-r Prints or writes to disk a record indicated in the form name:index (e.g \"img:0\")")
+ print("-o Output file name to write records and decrypted books")
+ print("-v Verbose (can be used several times)")
+ print("-i Prints kindle.info database")
+
+#
+# Main
+#
+
+def main(argv=sys.argv):
+ global kindleDatabase
+ global bookMetadata
+ global bookKey
+ global bookFile
+ global command
+
+ progname = os.path.basename(argv[0])
+
+ verbose = 0
+ recordName = ""
+ recordIndex = 0
+ outputFile = ""
+ PIDs = []
+ kindleDatabase = None
+ command = ""
+
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "vdir:o:p:")
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-v":
+ verbose+=1
+ if o == "-i":
+ setCommand("printInfo")
+ if o =="-o":
+ if a == None :
+ raise CMBDTCFatal("Invalid parameter for -o")
+ outputFile = a
+ if o =="-r":
+ setCommand("printRecord")
+ try:
+ recordName,recordIndex = a.split(':')
+ except:
+ raise CMBDTCFatal("Invalid parameter for -r")
+ if o =="-p":
+ PIDs.append(a)
+ if o =="-d":
+ setCommand("doit")
+
+ if command == "" :
+ raise CMBDTCFatal("No action supplied on command line")
+
+ #
+ # Read the encrypted database
+ #
+
+ try:
+ kindleDatabase = parseKindleInfo()
+ except Exception, message:
+ if verbose>0:
+ print(message)
+
+ if kindleDatabase != None :
+ if command == "printInfo" :
+ printKindleInfo()
+
+ #
+ # Compute the DSN
+ #
+
+ # Get the Mazama Random number
+ MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+ # Get the HDD serial
+ encodedSystemVolumeSerialNumber = encodeHash(str(GetVolumeSerialNumber(GetSystemDirectory().split('\\')[0] + '\\')),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode
+ DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+
+ if verbose >1:
+ print("DSN: " + DSN)
+
+ #
+ # Compute the device PID
+ #
+
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ PIDs.append(devicePID)
+
+ if verbose > 0:
+ print("Device PID: " + devicePID)
+
+ #
+ # Open book and parse metadata
+ #
+
+ if len(args) == 1:
+
+ bookFile = openBook(args[0])
+ parseTopazHeader()
+ parseMetadata()
+
+ #
+ # Compute book PID
+ #
+
+ # Get the account token
+
+ if kindleDatabase != None:
+ kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+ if verbose >1:
+ print("Account Token: " + kindleAccountToken)
+
+ keysRecord = bookMetadata["keys"]
+ keysRecordRecord = bookMetadata[keysRecord]
+
+ pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord)
+
+ bookPID = encodePID(pidHash)
+ PIDs.append(bookPID)
+
+ if verbose > 0:
+ print ("Book PID: " + bookPID )
+
+ #
+ # Decrypt book key
+ #
+
+ dkey = getBookPayloadRecord('dkey', 0)
+
+ bookKeys = []
+ for PID in PIDs :
+ bookKeys+=decryptDkeyRecords(dkey,PID)
+
+ if len(bookKeys) == 0 :
+ if verbose > 0 :
+ print ("Book key could not be found. Maybe this book is not registered with this device.")
+ else :
+ bookKey = bookKeys[0]
+ if verbose > 0:
+ print("Book key: " + bookKey.encode('hex'))
+
+
+
+ if command == "printRecord" :
+ extractBookPayloadRecord(recordName,int(recordIndex),outputFile)
+ if outputFile != "" and verbose>0 :
+ print("Wrote record to file: "+outputFile)
+ elif command == "doit" :
+ if outputFile!="" :
+ createDecryptedBook(outputFile)
+ if verbose >0 :
+ print ("Decrypted book saved. Don't pirate!")
+ elif verbose > 0:
+ print("Output file name was not supplied.")
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
-#! /usr/bin/env python
-
-"""
- Routines for doing AES CBC in one file
-
- Modified by some_updates to extract
- and combine only those parts needed for AES CBC
- into one simple to add python file
-
- Original Version
- Copyright (c) 2002 by Paul A. Lambert
- Under:
- CryptoPy Artisitic License Version 1.0
- See the wonderful pure python package cryptopy-1.2.5
- and read its LICENSE.txt for complete license details.
-"""
-
-class CryptoError(Exception):
- """ Base class for crypto exceptions """
- def __init__(self,errorMessage='Error!'):
- self.message = errorMessage
- def __str__(self):
- return self.message
-
-class InitCryptoError(CryptoError):
- """ Crypto errors during algorithm initialization """
-class BadKeySizeError(InitCryptoError):
- """ Bad key size error """
-class EncryptError(CryptoError):
- """ Error in encryption processing """
-class DecryptError(CryptoError):
- """ Error in decryption processing """
-class DecryptNotBlockAlignedError(DecryptError):
- """ Error in decryption processing """
-
-def xorS(a,b):
- """ XOR two strings """
- assert len(a)==len(b)
- x = []
- for i in range(len(a)):
- x.append( chr(ord(a[i])^ord(b[i])))
- return ''.join(x)
-
-def xor(a,b):
- """ XOR two strings """
- x = []
- for i in range(min(len(a),len(b))):
- x.append( chr(ord(a[i])^ord(b[i])))
- return ''.join(x)
-
-"""
- Base 'BlockCipher' and Pad classes for cipher instances.
- BlockCipher supports automatic padding and type conversion. The BlockCipher
- class was written to make the actual algorithm code more readable and
- not for performance.
-"""
-
-class BlockCipher:
- """ Block ciphers """
- def __init__(self):
- self.reset()
-
- def reset(self):
- self.resetEncrypt()
- self.resetDecrypt()
- def resetEncrypt(self):
- self.encryptBlockCount = 0
- self.bytesToEncrypt = ''
- def resetDecrypt(self):
- self.decryptBlockCount = 0
- self.bytesToDecrypt = ''
-
- def encrypt(self, plainText, more = None):
- """ Encrypt a string and return a binary string """
- self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt
- numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize)
- cipherText = ''
- for i in range(numBlocks):
- bStart = i*self.blockSize
- ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize])
- self.encryptBlockCount += 1
- cipherText += ctBlock
- if numExtraBytes > 0: # save any bytes that are not block aligned
- self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+class TpzDRMError(Exception):
+ pass
+
+# Get a 7 bit encoded number from string. The most
+# significant byte comes first and has the high bit (8th) set
+
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+
+ if flag:
+ data = -data
+ return data
+
+
+# returns a binary string that encodes a number into 7 bits
+# most significant byte first which has the high bit set
+
+def encodeNumber(number):
+ result = ""
+ negative = False
+ flag = 0
+
+ if number < 0 :
+ number = -number + 1
+ negative = True
+
+ while True:
+ byte = number & 0x7F
+ number = number >> 7
+ byte += flag
+ result += chr(byte)
+ flag = 0x80
+ if number == 0 :
+ if (byte == 0xFF and negative == False) :
+ result += chr(0x80)
+ break
+
+ if negative:
+ result += chr(0xFF)
+
+ return result[::-1]
+
+
+
+# create / read a length prefixed string from the file
+
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return ""
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+
+# convert a binary string generated by encodeNumber (7 bit encoded number)
+# to the value you would find inside the page*.dat files to be processed
+
+def convert(i):
+ result = ''
+ val = encodeNumber(i)
+ for j in xrange(len(val)):
+ c = ord(val[j:j+1])
+ result += '%02x' % c
+ return result
+
+
+
+# the complete string table used to store all book text content
+# as well as the xml tokens and values that make sense out of it
+
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+
+ def escapestr(self, str):
+ str = str.replace('&','&')
+ str = str.replace('<','<')
+ str = str.replace('>','>')
+ str = str.replace('=','=')
+ return str
+
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
else:
- self.bytesToEncrypt = ''
-
- if more == None: # no more data expected from caller
- finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize)
- if len(finalBytes) > 0:
- ctBlock = self.encryptBlock(finalBytes)
- self.encryptBlockCount += 1
- cipherText += ctBlock
- self.resetEncrypt()
- return cipherText
-
- def decrypt(self, cipherText, more = None):
- """ Decrypt a string and return a string """
- self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt
-
- numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize)
- if more == None: # no more calls to decrypt, should have all the data
- if numExtraBytes != 0:
- raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt'
-
- # hold back some bytes in case last decrypt has zero len
- if (more != None) and (numExtraBytes == 0) and (numBlocks >0) :
- numBlocks -= 1
- numExtraBytes = self.blockSize
-
- plainText = ''
- for i in range(numBlocks):
- bStart = i*self.blockSize
- ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize])
- self.decryptBlockCount += 1
- plainText += ptBlock
-
- if numExtraBytes > 0: # save any bytes that are not block aligned
- self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
+ print "Error - %d outside of string table limits" % val
+ raise TpzDRMError('outside of string table limits')
+ # sys.exit(-1)
+
+ def getSize(self):
+ return self.size
+
+ def getPos(self):
+ return self.pos
+
+ def dumpDict(self):
+ for i in xrange(self.size):
+ print "%d %s %s" % (i, convert(i), self.stable[i])
+ return
+
+# parses the xml snippets that are represented by each page*.dat file.
+# also parses the other0.dat file - the main stylesheet
+# and information used to inject the xml snippets into page*.dat files
+
+class PageParser(object):
+ def __init__(self, filename, dict, debug, flat_xml):
+ self.fo = file(filename,'rb')
+ self.id = os.path.basename(filename).replace('.dat','')
+ self.dict = dict
+ self.debug = debug
+ self.flat_xml = flat_xml
+ self.tagpath = []
+ self.doc = []
+ self.snippetList = []
+
+
+ # hash table used to enable the decoding process
+ # This has all been developed by trial and error so it may still have omissions or
+ # contain errors
+ # Format:
+ # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
+
+ token_tags = {
+ 'x' : (1, 'scalar_number', 0, 0),
+ 'y' : (1, 'scalar_number', 0, 0),
+ 'h' : (1, 'scalar_number', 0, 0),
+ 'w' : (1, 'scalar_number', 0, 0),
+ 'firstWord' : (1, 'scalar_number', 0, 0),
+ 'lastWord' : (1, 'scalar_number', 0, 0),
+ 'rootID' : (1, 'scalar_number', 0, 0),
+ 'stemID' : (1, 'scalar_number', 0, 0),
+ 'type' : (1, 'scalar_text', 0, 0),
+
+ 'info' : (0, 'number', 1, 0),
+
+ 'info.word' : (0, 'number', 1, 1),
+ 'info.word.ocrText' : (1, 'text', 0, 0),
+ 'info.word.firstGlyph' : (1, 'raw', 0, 0),
+ 'info.word.lastGlyph' : (1, 'raw', 0, 0),
+ 'info.word.bl' : (1, 'raw', 0, 0),
+ 'info.word.link_id' : (1, 'number', 0, 0),
+
+ 'glyph' : (0, 'number', 1, 1),
+ 'glyph.x' : (1, 'number', 0, 0),
+ 'glyph.y' : (1, 'number', 0, 0),
+ 'glyph.glyphID' : (1, 'number', 0, 0),
+
+ 'dehyphen' : (0, 'number', 1, 1),
+ 'dehyphen.rootID' : (1, 'number', 0, 0),
+ 'dehyphen.stemID' : (1, 'number', 0, 0),
+ 'dehyphen.stemPage' : (1, 'number', 0, 0),
+ 'dehyphen.sh' : (1, 'number', 0, 0),
+
+ 'links' : (0, 'number', 1, 1),
+ 'links.page' : (1, 'number', 0, 0),
+ 'links.rel' : (1, 'number', 0, 0),
+ 'links.row' : (1, 'number', 0, 0),
+ 'links.title' : (1, 'text', 0, 0),
+ 'links.href' : (1, 'text', 0, 0),
+ 'links.type' : (1, 'text', 0, 0),
+
+ 'paraCont' : (0, 'number', 1, 1),
+ 'paraCont.rootID' : (1, 'number', 0, 0),
+ 'paraCont.stemID' : (1, 'number', 0, 0),
+ 'paraCont.stemPage' : (1, 'number', 0, 0),
+
+ 'paraStems' : (0, 'number', 1, 1),
+ 'paraStems.stemID' : (1, 'number', 0, 0),
+
+ 'wordStems' : (0, 'number', 1, 1),
+ 'wordStems.stemID' : (1, 'number', 0, 0),
+
+ 'empty' : (1, 'snippets', 1, 0),
+
+ 'page' : (1, 'snippets', 1, 0),
+ 'page.pageid' : (1, 'scalar_text', 0, 0),
+ 'page.pagelabel' : (1, 'scalar_text', 0, 0),
+ 'page.type' : (1, 'scalar_text', 0, 0),
+ 'page.h' : (1, 'scalar_number', 0, 0),
+ 'page.w' : (1, 'scalar_number', 0, 0),
+ 'page.startID' : (1, 'scalar_number', 0, 0),
+
+ 'group' : (1, 'snippets', 1, 0),
+ 'group.type' : (1, 'scalar_text', 0, 0),
+ 'group._tag' : (1, 'scalar_text', 0, 0),
+
+ 'region' : (1, 'snippets', 1, 0),
+ 'region.type' : (1, 'scalar_text', 0, 0),
+ 'region.x' : (1, 'scalar_number', 0, 0),
+ 'region.y' : (1, 'scalar_number', 0, 0),
+ 'region.h' : (1, 'scalar_number', 0, 0),
+ 'region.w' : (1, 'scalar_number', 0, 0),
+ 'region.orientation' : (1, 'scalar_number', 0, 0),
+
+ 'empty_text_region' : (1, 'snippets', 1, 0),
+
+ 'img' : (1, 'snippets', 1, 0),
+ 'img.x' : (1, 'scalar_number', 0, 0),
+ 'img.y' : (1, 'scalar_number', 0, 0),
+ 'img.h' : (1, 'scalar_number', 0, 0),
+ 'img.w' : (1, 'scalar_number', 0, 0),
+ 'img.src' : (1, 'scalar_number', 0, 0),
+ 'img.color_src' : (1, 'scalar_number', 0, 0),
+
+ 'paragraph' : (1, 'snippets', 1, 0),
+ 'paragraph.class' : (1, 'scalar_text', 0, 0),
+ 'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridSize' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridEndCenter' : (1, 'scalar_number', 0, 0),
+
+
+ 'word_semantic' : (1, 'snippets', 1, 1),
+ 'word_semantic.type' : (1, 'scalar_text', 0, 0),
+ 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
+ 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
+
+ 'word' : (1, 'snippets', 1, 0),
+ 'word.type' : (1, 'scalar_text', 0, 0),
+ 'word.class' : (1, 'scalar_text', 0, 0),
+ 'word.firstGlyph' : (1, 'scalar_number', 0, 0),
+ 'word.lastGlyph' : (1, 'scalar_number', 0, 0),
+
+ '_span' : (1, 'snippets', 1, 0),
+ '_span.firstWord' : (1, 'scalar_number', 0, 0),
+ '_span.lastWord' : (1, 'scalar_number', 0, 0),
+ '_span.gridSize' : (1, 'scalar_number', 0, 0),
+ '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0),
+ '_span.gridTopCenter' : (1, 'scalar_number', 0, 0),
+ '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+ '_span.gridEndCenter' : (1, 'scalar_number', 0, 0),
+
+ 'span' : (1, 'snippets', 1, 0),
+ 'span.firstWord' : (1, 'scalar_number', 0, 0),
+ 'span.lastWord' : (1, 'scalar_number', 0, 0),
+ 'span.gridSize' : (1, 'scalar_number', 0, 0),
+ 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0),
+ 'span.gridTopCenter' : (1, 'scalar_number', 0, 0),
+ 'span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+ 'span.gridEndCenter' : (1, 'scalar_number', 0, 0),
+
+ 'extratokens' : (1, 'snippets', 1, 0),
+ 'extratokens.type' : (1, 'scalar_text', 0, 0),
+ 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
+ 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0),
+
+ 'glyph.h' : (1, 'number', 0, 0),
+ 'glyph.w' : (1, 'number', 0, 0),
+ 'glyph.use' : (1, 'number', 0, 0),
+ 'glyph.vtx' : (1, 'number', 0, 1),
+ 'glyph.len' : (1, 'number', 0, 1),
+ 'glyph.dpi' : (1, 'number', 0, 0),
+ 'vtx' : (0, 'number', 1, 1),
+ 'vtx.x' : (1, 'number', 0, 0),
+ 'vtx.y' : (1, 'number', 0, 0),
+ 'len' : (0, 'number', 1, 1),
+ 'len.n' : (1, 'number', 0, 0),
+
+ 'book' : (1, 'snippets', 1, 0),
+ 'version' : (1, 'snippets', 1, 0),
+ 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.Schema_id' : (1, 'scalar_text', 0, 0),
+ 'version.Schema_version' : (1, 'scalar_text', 0, 0),
+ 'version.Topaz_version' : (1, 'scalar_text', 0, 0),
+ 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
+ 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
+ 'version.chapterheaders' : (1, 'scalar_text', 0, 0),
+ 'version.creation_date' : (1, 'scalar_text', 0, 0),
+ 'version.header_footer' : (1, 'scalar_text', 0, 0),
+ 'version.init_from_ocr' : (1, 'scalar_text', 0, 0),
+ 'version.letter_insertion' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0),
+ 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0),
+ 'version.findlists' : (1, 'scalar_text', 0, 0),
+ 'version.page_num' : (1, 'scalar_text', 0, 0),
+ 'version.page_type' : (1, 'scalar_text', 0, 0),
+ 'version.bad_text' : (1, 'scalar_text', 0, 0),
+ 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
+ 'version.margins' : (1, 'scalar_text', 0, 0),
+ 'version.staggered_lines' : (1, 'scalar_text', 0, 0),
+ 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
+ 'version.toc' : (1, 'scalar_text', 0, 0),
+
+ 'stylesheet' : (1, 'snippets', 1, 0),
+ 'style' : (1, 'snippets', 1, 0),
+ 'style._tag' : (1, 'scalar_text', 0, 0),
+ 'style.type' : (1, 'scalar_text', 0, 0),
+ 'style._parent_type' : (1, 'scalar_text', 0, 0),
+ 'style.class' : (1, 'scalar_text', 0, 0),
+ 'style._after_class' : (1, 'scalar_text', 0, 0),
+ 'rule' : (1, 'snippets', 1, 0),
+ 'rule.attr' : (1, 'scalar_text', 0, 0),
+ 'rule.value' : (1, 'scalar_text', 0, 0),
+
+ 'original' : (0, 'number', 1, 1),
+ 'original.pnum' : (1, 'number', 0, 0),
+ 'original.pid' : (1, 'text', 0, 0),
+ 'pages' : (0, 'number', 1, 1),
+ 'pages.ref' : (1, 'number', 0, 0),
+ 'pages.id' : (1, 'number', 0, 0),
+ 'startID' : (0, 'number', 1, 1),
+ 'startID.page' : (1, 'number', 0, 0),
+ 'startID.id' : (1, 'number', 0, 0),
+
+ }
+
+
+ # full tag path record keeping routines
+ def tag_push(self, token):
+ self.tagpath.append(token)
+ def tag_pop(self):
+ if len(self.tagpath) > 0 :
+ self.tagpath.pop()
+ def tagpath_len(self):
+ return len(self.tagpath)
+ def get_tagpath(self, i):
+ cnt = len(self.tagpath)
+ if i < cnt : result = self.tagpath[i]
+ for j in xrange(i+1, cnt) :
+ result += '.' + self.tagpath[j]
+ return result
+
+
+ # list of absolute command byte values values that indicate
+ # various types of loop meachanisms typically used to generate vectors
+
+ cmd_list = (0x76, 0x76)
+
+ # peek at and return 1 byte that is ahead by i bytes
+ def peek(self, aheadi):
+ c = self.fo.read(aheadi)
+ if (len(c) == 0):
+ return None
+ self.fo.seek(-aheadi,1)
+ c = c[-1:]
+ return ord(c)
+
+
+ # get the next value from the file being processed
+ def getNext(self):
+ nbyte = self.peek(1);
+ if (nbyte == None):
+ return None
+ val = readEncodedNumber(self.fo)
+ return val
+
+
+ # format an arg by argtype
+ def formatArg(self, arg, argtype):
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ result = self.dict.lookup(arg)
+ elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') :
+ result = arg
+ elif (argtype == 'snippets') :
+ result = arg
+ else :
+ print "Error Unknown argtype %s" % argtype
+ sys.exit(-2)
+ return result
+
+
+ # process the next tag token, recursively handling subtags,
+ # arguments, and commands
+ def procToken(self, token):
+
+ known_token = False
+ self.tag_push(token)
+
+ if self.debug : print 'Processing: ', self.get_tagpath(0)
+ cnt = self.tagpath_len()
+ for j in xrange(cnt):
+ tkn = self.get_tagpath(j)
+ if tkn in self.token_tags :
+ num_args = self.token_tags[tkn][0]
+ argtype = self.token_tags[tkn][1]
+ subtags = self.token_tags[tkn][2]
+ splcase = self.token_tags[tkn][3]
+ ntags = -1
+ known_token = True
+ break
+
+ if known_token :
+
+ # handle subtags if present
+ subtagres = []
+ if (splcase == 1):
+ # this type of tag uses of escape marker 0x74 indicate subtag count
+ if self.peek(1) == 0x74:
+ skip = readEncodedNumber(self.fo)
+ subtags = 1
+ num_args = 0
+
+ if (subtags == 1):
+ ntags = readEncodedNumber(self.fo)
+ if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
+ for j in xrange(ntags):
+ val = readEncodedNumber(self.fo)
+ subtagres.append(self.procToken(self.dict.lookup(val)))
+
+ # arguments can be scalars or vectors of text or numbers
+ argres = []
+ if num_args > 0 :
+ firstarg = self.peek(1)
+ if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'):
+ # single argument is a variable length vector of data
+ arg = readEncodedNumber(self.fo)
+ argres = self.decodeCMD(arg,argtype)
+ else :
+ # num_arg scalar arguments
+ for i in xrange(num_args):
+ argres.append(self.formatArg(readEncodedNumber(self.fo), argtype))
+
+ # build the return tag
+ result = []
+ tkn = self.get_tagpath(0)
+ result.append(tkn)
+ result.append(subtagres)
+ result.append(argtype)
+ result.append(argres)
+ self.tag_pop()
+ return result
+
+ # all tokens that need to be processed should be in the hash
+ # table if it may indicate a problem, either new token
+ # or an out of sync condition
else:
- self.bytesToEncrypt = ''
-
- if more == None: # last decrypt remove padding
- plainText = self.padding.removePad(plainText, self.blockSize)
- self.resetDecrypt()
- return plainText
-
-
-class Pad:
- def __init__(self):
- pass # eventually could put in calculation of min and max size extension
-
-class padWithPadLen(Pad):
- """ Pad a binary string with the length of the padding """
-
- def addPad(self, extraBytes, blockSize):
- """ Add padding to a binary string to make it an even multiple
- of the block size """
- blocks, numExtraBytes = divmod(len(extraBytes), blockSize)
- padLength = blockSize - numExtraBytes
- return extraBytes + padLength*chr(padLength)
-
- def removePad(self, paddedBinaryString, blockSize):
- """ Remove padding from a binary string """
- if not(0<len(paddedBinaryString)):
- raise DecryptNotBlockAlignedError, 'Expected More Data'
- return paddedBinaryString[:-ord(paddedBinaryString[-1])]
-
-class noPadding(Pad):
- """ No padding. Use this to get ECB behavior from encrypt/decrypt """
-
- def addPad(self, extraBytes, blockSize):
- """ Add no padding """
- return extraBytes
-
- def removePad(self, paddedBinaryString, blockSize):
- """ Remove no padding """
- return paddedBinaryString
-
-"""
- Rijndael encryption algorithm
- This byte oriented implementation is intended to closely
- match FIPS specification for readability. It is not implemented
- for performance.
-"""
-
-class Rijndael(BlockCipher):
- """ Rijndael encryption algorithm """
- def __init__(self, key = None, padding = padWithPadLen(), keySize=16, blockSize=16 ):
- self.name = 'RIJNDAEL'
- self.keySize = keySize
- self.strength = keySize*8
- self.blockSize = blockSize # blockSize is in bytes
- self.padding = padding # change default to noPadding() to get normal ECB behavior
-
- assert( keySize%4==0 and NrTable[4].has_key(keySize/4)),'key size must be 16,20,24,29 or 32 bytes'
- assert( blockSize%4==0 and NrTable.has_key(blockSize/4)), 'block size must be 16,20,24,29 or 32 bytes'
-
- self.Nb = self.blockSize/4 # Nb is number of columns of 32 bit words
- self.Nk = keySize/4 # Nk is the key length in 32-bit words
- self.Nr = NrTable[self.Nb][self.Nk] # The number of rounds (Nr) is a function of
- # the block (Nb) and key (Nk) sizes.
- if key != None:
- self.setKey(key)
-
- def setKey(self, key):
- """ Set a key and generate the expanded key """
- assert( len(key) == (self.Nk*4) ), 'Key length must be same as keySize parameter'
- self.__expandedKey = keyExpansion(self, key)
- self.reset() # BlockCipher.reset()
-
- def encryptBlock(self, plainTextBlock):
- """ Encrypt a block, plainTextBlock must be a array of bytes [Nb by 4] """
- self.state = self._toBlock(plainTextBlock)
- AddRoundKey(self, self.__expandedKey[0:self.Nb])
- for round in range(1,self.Nr): #for round = 1 step 1 to Nr
- SubBytes(self)
- ShiftRows(self)
- MixColumns(self)
- AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
- SubBytes(self)
- ShiftRows(self)
- AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
- return self._toBString(self.state)
-
-
- def decryptBlock(self, encryptedBlock):
- """ decrypt a block (array of bytes) """
- self.state = self._toBlock(encryptedBlock)
- AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
- for round in range(self.Nr-1,0,-1):
- InvShiftRows(self)
- InvSubBytes(self)
- AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
- InvMixColumns(self)
- InvShiftRows(self)
- InvSubBytes(self)
- AddRoundKey(self, self.__expandedKey[0:self.Nb])
- return self._toBString(self.state)
-
- def _toBlock(self, bs):
- """ Convert binary string to array of bytes, state[col][row]"""
- assert ( len(bs) == 4*self.Nb ), 'Rijndarl blocks must be of size blockSize'
- return [[ord(bs[4*i]),ord(bs[4*i+1]),ord(bs[4*i+2]),ord(bs[4*i+3])] for i in range(self.Nb)]
-
- def _toBString(self, block):
- """ Convert block (array of bytes) to binary string """
- l = []
- for col in block:
- for rowElement in col:
- l.append(chr(rowElement))
- return ''.join(l)
-#-------------------------------------
-""" Number of rounds Nr = NrTable[Nb][Nk]
-
- Nb Nk=4 Nk=5 Nk=6 Nk=7 Nk=8
- ------------------------------------- """
-NrTable = {4: {4:10, 5:11, 6:12, 7:13, 8:14},
- 5: {4:11, 5:11, 6:12, 7:13, 8:14},
- 6: {4:12, 5:12, 6:12, 7:13, 8:14},
- 7: {4:13, 5:13, 6:13, 7:13, 8:14},
- 8: {4:14, 5:14, 6:14, 7:14, 8:14}}
-#-------------------------------------
-def keyExpansion(algInstance, keyString):
- """ Expand a string of size keySize into a larger array """
- Nk, Nb, Nr = algInstance.Nk, algInstance.Nb, algInstance.Nr # for readability
- key = [ord(byte) for byte in keyString] # convert string to list
- w = [[key[4*i],key[4*i+1],key[4*i+2],key[4*i+3]] for i in range(Nk)]
- for i in range(Nk,Nb*(Nr+1)):
- temp = w[i-1] # a four byte column
- if (i%Nk) == 0 :
- temp = temp[1:]+[temp[0]] # RotWord(temp)
- temp = [ Sbox[byte] for byte in temp ]
- temp[0] ^= Rcon[i/Nk]
- elif Nk > 6 and i%Nk == 4 :
- temp = [ Sbox[byte] for byte in temp ] # SubWord(temp)
- w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] )
- return w
-
-Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!!
- 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,
- 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91)
-
-#-------------------------------------
-def AddRoundKey(algInstance, keyBlock):
- """ XOR the algorithm state with a block of key material """
- for column in range(algInstance.Nb):
- for row in range(4):
- algInstance.state[column][row] ^= keyBlock[column][row]
-#-------------------------------------
-
-def SubBytes(algInstance):
- for column in range(algInstance.Nb):
- for row in range(4):
- algInstance.state[column][row] = Sbox[algInstance.state[column][row]]
-
-def InvSubBytes(algInstance):
- for column in range(algInstance.Nb):
- for row in range(4):
- algInstance.state[column][row] = InvSbox[algInstance.state[column][row]]
-
-Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,
- 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
- 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,
- 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
- 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,
- 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
- 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,
- 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
- 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,
- 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
- 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,
- 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
- 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,
- 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
- 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,
- 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
- 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,
- 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
- 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,
- 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
- 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,
- 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
- 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,
- 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
- 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,
- 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
- 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,
- 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
- 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,
- 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
- 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,
- 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16)
-
-InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
- 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
- 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
- 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
- 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
- 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
- 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
- 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
- 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
- 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
- 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
- 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
- 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
- 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
- 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
- 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
- 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
- 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
- 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
- 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
- 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
- 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
- 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
- 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
- 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
- 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
- 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
- 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
- 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
- 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
- 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
- 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d)
-
-#-------------------------------------
-""" For each block size (Nb), the ShiftRow operation shifts row i
- by the amount Ci. Note that row 0 is not shifted.
- Nb C1 C2 C3
- ------------------- """
-shiftOffset = { 4 : ( 0, 1, 2, 3),
- 5 : ( 0, 1, 2, 3),
- 6 : ( 0, 1, 2, 3),
- 7 : ( 0, 1, 2, 4),
- 8 : ( 0, 1, 3, 4) }
-def ShiftRows(algInstance):
- tmp = [0]*algInstance.Nb # list of size Nb
- for r in range(1,4): # row 0 reamains unchanged and can be skipped
- for c in range(algInstance.Nb):
- tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
- for c in range(algInstance.Nb):
- algInstance.state[c][r] = tmp[c]
-def InvShiftRows(algInstance):
- tmp = [0]*algInstance.Nb # list of size Nb
- for r in range(1,4): # row 0 reamains unchanged and can be skipped
- for c in range(algInstance.Nb):
- tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
- for c in range(algInstance.Nb):
- algInstance.state[c][r] = tmp[c]
-#-------------------------------------
-def MixColumns(a):
- Sprime = [0,0,0,0]
- for j in range(a.Nb): # for each column
- Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3])
- Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3])
- Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3])
- Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3])
- for i in range(4):
- a.state[j][i] = Sprime[i]
-
-def InvMixColumns(a):
- """ Mix the four bytes of every column in a linear way
- This is the opposite operation of Mixcolumn """
- Sprime = [0,0,0,0]
- for j in range(a.Nb): # for each column
- Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3])
- Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3])
- Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3])
- Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3])
- for i in range(4):
- a.state[j][i] = Sprime[i]
-
-#-------------------------------------
-def mul(a, b):
- """ Multiply two elements of GF(2^m)
- needed for MixColumn and InvMixColumn """
- if (a !=0 and b!=0):
- return Alogtable[(Logtable[a] + Logtable[b])%255]
- else:
- return 0
-
-Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3,
- 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193,
- 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120,
- 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142,
- 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56,
- 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16,
- 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186,
- 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87,
- 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232,
- 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160,
- 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183,
- 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157,
- 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209,
- 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171,
- 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165,
- 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7)
-
-Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53,
- 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170,
- 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49,
- 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205,
- 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136,
- 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154,
- 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163,
- 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160,
- 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65,
- 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117,
- 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128,
- 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84,
- 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202,
- 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14,
- 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23,
- 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1)
-
-
-
-
-"""
- AES Encryption Algorithm
- The AES algorithm is just Rijndael algorithm restricted to the default
- blockSize of 128 bits.
-"""
-
-class AES(Rijndael):
- """ The AES algorithm is the Rijndael block cipher restricted to block
- sizes of 128 bits and key sizes of 128, 192 or 256 bits
- """
- def __init__(self, key = None, padding = padWithPadLen(), keySize=16):
- """ Initialize AES, keySize is in bytes """
- if not (keySize == 16 or keySize == 24 or keySize == 32) :
- raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes'
-
- Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 )
-
- self.name = 'AES'
-
-
-"""
- CBC mode of encryption for block ciphers.
- This algorithm mode wraps any BlockCipher to make a
- Cipher Block Chaining mode.
-"""
-from random import Random # should change to crypto.random!!!
-
-
-class CBC(BlockCipher):
- """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode
- algorithms. The initialization (IV) is automatic if set to None. Padding
- is also automatic based on the Pad class used to initialize the algorithm
- """
- def __init__(self, blockCipherInstance, padding = padWithPadLen()):
- """ CBC algorithms are created by initializing with a BlockCipher instance """
- self.baseCipher = blockCipherInstance
- self.name = self.baseCipher.name + '_CBC'
- self.blockSize = self.baseCipher.blockSize
- self.keySize = self.baseCipher.keySize
- self.padding = padding
- self.baseCipher.padding = noPadding() # baseCipher should NOT pad!!
- self.r = Random() # for IV generation, currently uses
- # mediocre standard distro version <----------------
- import time
- newSeed = time.ctime()+str(self.r) # seed with instance location
- self.r.seed(newSeed) # to make unique
- self.reset()
-
- def setKey(self, key):
- self.baseCipher.setKey(key)
-
- # Overload to reset both CBC state and the wrapped baseCipher
- def resetEncrypt(self):
- BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class)
- self.baseCipher.resetEncrypt() # reset base cipher encrypt state
-
- def resetDecrypt(self):
- BlockCipher.resetDecrypt(self) # reset CBC state (super class)
- self.baseCipher.resetDecrypt() # reset base cipher decrypt state
-
- def encrypt(self, plainText, iv=None, more=None):
- """ CBC encryption - overloads baseCipher to allow optional explicit IV
- when iv=None, iv is auto generated!
- """
- if self.encryptBlockCount == 0:
- self.iv = iv
- else:
- assert(iv==None), 'IV used only on first call to encrypt'
-
- return BlockCipher.encrypt(self,plainText, more=more)
-
- def decrypt(self, cipherText, iv=None, more=None):
- """ CBC decryption - overloads baseCipher to allow optional explicit IV
- when iv=None, iv is auto generated!
- """
- if self.decryptBlockCount == 0:
- self.iv = iv
+ result = []
+ if (self.debug):
+ print 'Unknown Token:', token
+ self.tag_pop()
+ return result
+
+
+ # special loop used to process code snippets
+ # it is NEVER used to format arguments.
+ # builds the snippetList
+ def doLoop72(self, argtype):
+ cnt = readEncodedNumber(self.fo)
+ if self.debug :
+ result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n'
+ result += 'of the document is indicated by snippet number sets at the\n'
+ result += 'end of each snippet. \n'
+ print result
+ for i in xrange(cnt):
+ if self.debug: print 'Snippet:',str(i)
+ snippet = []
+ snippet.append(i)
+ val = readEncodedNumber(self.fo)
+ snippet.append(self.procToken(self.dict.lookup(val)))
+ self.snippetList.append(snippet)
+ return
+
+
+
+ # general loop code gracisouly submitted by "skindle" - thank you!
+ def doLoop76Mode(self, argtype, cnt, mode):
+ result = []
+ adj = 0
+ if mode & 1:
+ adj = readEncodedNumber(self.fo)
+ mode = mode >> 1
+ x = []
+ for i in xrange(cnt):
+ x.append(readEncodedNumber(self.fo) - adj)
+ for i in xrange(mode):
+ for j in xrange(1, cnt):
+ x[j] = x[j] + x[j - 1]
+ for i in xrange(cnt):
+ result.append(self.formatArg(x[i],argtype))
+ return result
+
+
+ # dispatches loop commands bytes with various modes
+ # The 0x76 style loops are used to build vectors
+
+ # This was all derived by trial and error and
+ # new loop types may exist that are not handled here
+ # since they did not appear in the test cases
+
+ def decodeCMD(self, cmd, argtype):
+ if (cmd == 0x76):
+
+ # loop with cnt, and mode to control loop styles
+ cnt = readEncodedNumber(self.fo)
+ mode = readEncodedNumber(self.fo)
+
+ if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
+ return self.doLoop76Mode(argtype, cnt, mode)
+
+ if self.dbug: print "Unknown command", cmd
+ result = []
+ return result
+
+
+
+ # add full tag path to injected snippets
+ def updateName(self, tag, prefix):
+ name = tag[0]
+ subtagList = tag[1]
+ argtype = tag[2]
+ argList = tag[3]
+ nname = prefix + '.' + name
+ nsubtaglist = []
+ for j in subtagList:
+ nsubtaglist.append(self.updateName(j,prefix))
+ ntag = []
+ ntag.append(nname)
+ ntag.append(nsubtaglist)
+ ntag.append(argtype)
+ ntag.append(argList)
+ return ntag
+
+
+
+ # perform depth first injection of specified snippets into this one
+ def injectSnippets(self, snippet):
+ snipno, tag = snippet
+ name = tag[0]
+ subtagList = tag[1]
+ argtype = tag[2]
+ argList = tag[3]
+ nsubtagList = []
+ if len(argList) > 0 :
+ for j in argList:
+ asnip = self.snippetList[j]
+ aso, atag = self.injectSnippets(asnip)
+ atag = self.updateName(atag, name)
+ nsubtagList.append(atag)
+ argtype='number'
+ argList=[]
+ if len(nsubtagList) > 0 :
+ subtagList.extend(nsubtagList)
+ tag = []
+ tag.append(name)
+ tag.append(subtagList)
+ tag.append(argtype)
+ tag.append(argList)
+ snippet = []
+ snippet.append(snipno)
+ snippet.append(tag)
+ return snippet
+
+
+
+ # format the tag for output
+ def formatTag(self, node):
+ name = node[0]
+ subtagList = node[1]
+ argtype = node[2]
+ argList = node[3]
+ fullpathname = name.split('.')
+ nodename = fullpathname.pop()
+ ilvl = len(fullpathname)
+ indent = ' ' * (3 * ilvl)
+ rlst = []
+ rlst.append(indent + '<' + nodename + '>')
+ if len(argList) > 0:
+ alst = []
+ for j in argList:
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ alst.append(j + '|')
+ else :
+ alst.append(str(j) + ',')
+ argres = "".join(alst)
+ argres = argres[0:-1]
+ if argtype == 'snippets' :
+ rlst.append('snippets:' + argres)
+ else :
+ rlst.append(argres)
+ if len(subtagList) > 0 :
+ rlst.append('\n')
+ for j in subtagList:
+ if len(j) > 0 :
+ rlst.append(self.formatTag(j))
+ rlst.append(indent + '</' + nodename + '>\n')
else:
- assert(iv==None), 'IV used only on first call to decrypt'
-
- return BlockCipher.decrypt(self, cipherText, more=more)
-
- def encryptBlock(self, plainTextBlock):
- """ CBC block encryption, IV is set with 'encrypt' """
- auto_IV = ''
- if self.encryptBlockCount == 0:
- if self.iv == None:
- # generate IV and use
- self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)])
- self.prior_encr_CT_block = self.iv
- auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic
- else: # application provided IV
- assert(len(self.iv) == self.blockSize ),'IV must be same length as block'
- self.prior_encr_CT_block = self.iv
- """ encrypt the prior CT XORed with the PT """
- ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) )
- self.prior_encr_CT_block = ct
- return auto_IV+ct
-
- def decryptBlock(self, encryptedBlock):
- """ Decrypt a single block """
-
- if self.decryptBlockCount == 0: # first call, process IV
- if self.iv == None: # auto decrypt IV?
- self.prior_CT_block = encryptedBlock
- return ''
+ rlst.append('</' + nodename + '>\n')
+ return "".join(rlst)
+
+
+ # flatten tag
+ def flattenTag(self, node):
+ name = node[0]
+ subtagList = node[1]
+ argtype = node[2]
+ argList = node[3]
+ rlst = []
+ rlst.append(name)
+ if (len(argList) > 0):
+ alst = []
+ for j in argList:
+ if (argtype == 'text') or (argtype == 'scalar_text') :
+ alst.append(j + '|')
+ else :
+ alst.append(str(j) + '|')
+ argres = "".join(alst)
+ argres = argres[0:-1]
+ if argtype == 'snippets' :
+ rlst.append('.snippets=' + argres)
+ else :
+ rlst.append('=' + argres)
+ rlst.append('\n')
+ for j in subtagList:
+ if len(j) > 0 :
+ rlst.append(self.flattenTag(j))
+ return "".join(rlst)
+
+
+ # reduce create xml output
+ def formatDoc(self, flat_xml):
+ rlst = []
+ for j in self.doc :
+ if len(j) > 0:
+ if flat_xml:
+ rlst.append(self.flattenTag(j))
+ else:
+ rlst.append(self.formatTag(j))
+ result = "".join(rlst)
+ if self.debug : print result
+ return result
+
+
+
+ # main loop - parse the page.dat files
+ # to create structured document and snippets
+
+ # FIXME: value at end of magic appears to be a subtags count
+ # but for what? For now, inject an 'info" tag as it is in
+ # every dictionary and seems close to what is meant
+ # The alternative is to special case the last _ "0x5f" to mean something
+
+ def process(self):
+
+ # peek at the first bytes to see what type of file it is
+ magic = self.fo.read(9)
+ if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
+ first_token = 'info'
+ elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
+ skip = self.fo.read(2)
+ first_token = 'info'
+ elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
+ first_token = 'info'
+ elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
+ skip = self.fo.read(3)
+ first_token = 'info'
+ else :
+ # other0.dat file
+ first_token = None
+ self.fo.seek(-9,1)
+
+
+ # main loop to read and build the document tree
+ while True:
+
+ if first_token != None :
+ # use "inserted" first token 'info' for page and glyph files
+ tag = self.procToken(first_token)
+ if len(tag) > 0 :
+ self.doc.append(tag)
+ first_token = None
+
+ v = self.getNext()
+ if (v == None):
+ break
+
+ if (v == 0x72):
+ self.doLoop72('number')
+ elif (v > 0) and (v < self.dict.getSize()) :
+ tag = self.procToken(self.dict.lookup(v))
+ if len(tag) > 0 :
+ self.doc.append(tag)
else:
- assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption"
- self.prior_CT_block = self.iv
-
- dct = self.baseCipher.decryptBlock(encryptedBlock)
- """ XOR the prior decrypted CT with the prior CT """
- dct_XOR_priorCT = xor( self.prior_CT_block, dct )
-
- self.prior_CT_block = encryptedBlock
-
- return dct_XOR_priorCT
-
+ if self.debug:
+ print "Main Loop: Unknown value: %x" % v
+ if (v == 0):
+ if (self.peek(1) == 0x5f):
+ skip = self.fo.read(1)
+ first_token = 'info'
+
+ # now do snippet injection
+ if len(self.snippetList) > 0 :
+ if self.debug : print 'Injecting Snippets:'
+ snippet = self.injectSnippets(self.snippetList[0])
+ snipno = snippet[0]
+ tag_add = snippet[1]
+ if self.debug : print self.formatTag(tag_add)
+ if len(tag_add) > 0:
+ self.doc.append(tag_add)
+
+ # handle generation of xml output
+ xmlpage = self.formatDoc(self.flat_xml)
+
+ return xmlpage
+
+
+def fromData(dict, fname):
+ flat_xml = True
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def getXML(dict, fname):
+ flat_xml = False
+ debug = False
+ pp = PageParser(fname, dict, debug, flat_xml)
+ xmlpage = pp.process()
+ return xmlpage
+
+def usage():
+ print 'Usage: '
+ print ' convert2xml.py dict0000.dat infile.dat '
+ print ' '
+ print ' Options:'
+ print ' -h print this usage help message '
+ print ' -d turn on debug output to check for potential errors '
+ print ' --flat-xml output the flattened xml page description only '
+ print ' '
+ print ' This program will attempt to convert a page*.dat file or '
+ print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. '
+ print ' '
+ print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump '
+ print ' the *.dat files from a Topaz format e-book.'
+
+#
+# Main
+#
+
+def main(argv):
+ dictFile = ""
+ pageFile = ""
+ debug = False
+ flat_xml = False
+ printOutput = False
+ if len(argv) == 0:
+ printOutput = True
+ argv = sys.argv
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"])
+
+ except getopt.GetoptError, err:
+
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o =="-d":
+ debug=True
+ if o =="-h":
+ usage()
+ sys.exit(0)
+ if o =="--flat-xml":
+ flat_xml = True
+
+ dictFile, pageFile = args[0], args[1]
+
+ # read in the string table dictionary
+ dict = Dictionary(dictFile)
+ # dict.dumpDict()
+
+ # create a page parser
+ pp = PageParser(pageFile, dict, debug, flat_xml)
+
+ xmlpage = pp.process()
+
+ if printOutput:
+ print xmlpage
+ return 0
-"""
- AES_CBC Encryption Algorithm
-"""
+ return xmlpage
-class AES_CBC(CBC):
- """ AES encryption in CBC feedback mode """
- def __init__(self, key=None, padding=padWithPadLen(), keySize=16):
- CBC.__init__( self, AES(key, noPadding(), keySize), padding)
- self.name = 'AES_CBC'
+if __name__ == '__main__':
+ sys.exit(main(''))
-#! /usr/bin/env python
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-import sys, os
-import hmac
+import sys
+import csv
+import os
+import getopt
from struct import pack
-import hashlib
-
-
-# interface to needed routines libalfcrypto
-def _load_libalfcrypto():
- import ctypes
- from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
- Structure, c_ulong, create_string_buffer, addressof, string_at, cast, sizeof
-
- pointer_size = ctypes.sizeof(ctypes.c_voidp)
- name_of_lib = None
- if sys.platform.startswith('darwin'):
- name_of_lib = 'libalfcrypto.dylib'
- elif sys.platform.startswith('win'):
- if pointer_size == 4:
- name_of_lib = 'alfcrypto.dll'
+from struct import unpack
+
+
+class PParser(object):
+ def __init__(self, gd, flatxml, meta_array):
+ self.gd = gd
+ self.flatdoc = flatxml.split('\n')
+ self.docSize = len(self.flatdoc)
+ self.temp = []
+
+ self.ph = -1
+ self.pw = -1
+ startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
+ for p in startpos:
+ (name, argres) = self.lineinDoc(p)
+ self.ph = max(self.ph, int(argres))
+ startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
+ for p in startpos:
+ (name, argres) = self.lineinDoc(p)
+ self.pw = max(self.pw, int(argres))
+
+ if self.ph <= 0:
+ self.ph = int(meta_array.get('pageHeight', '11000'))
+ if self.pw <= 0:
+ self.pw = int(meta_array.get('pageWidth', '8500'))
+
+ res = []
+ startpos = self.posinDoc('info.glyph.x')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.x', p)
+ res.extend(argres)
+ self.gx = res
+
+ res = []
+ startpos = self.posinDoc('info.glyph.y')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.y', p)
+ res.extend(argres)
+ self.gy = res
+
+ res = []
+ startpos = self.posinDoc('info.glyph.glyphID')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.glyphID', p)
+ res.extend(argres)
+ self.gid = res
+
+
+ # return tag at line pos in document
+ def lineinDoc(self, pos) :
+ if (pos >= 0) and (pos < self.docSize) :
+ item = self.flatdoc[pos]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ return name, argres
+
+ # find tag in doc if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ if end == -1 :
+ end = self.docSize
+ else:
+ end = min(self.docSize, end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+
+ def getDataatPos(self, path, pos):
+ result = None
+ item = self.flatdoc[pos]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
else:
- name_of_lib = 'alfcrypto64.dll'
+ name = item
+ argres = []
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ if (name.endswith(path)):
+ result = argres
+ return result
+
+ def getDataTemp(self, path):
+ result = None
+ cnt = len(self.temp)
+ for j in xrange(cnt):
+ item = self.temp[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ self.temp.pop(j)
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+
+ def getImages(self):
+ result = []
+ self.temp = self.flatdoc
+ while (self.getDataTemp('img') != None):
+ h = self.getDataTemp('img.h')[0]
+ w = self.getDataTemp('img.w')[0]
+ x = self.getDataTemp('img.x')[0]
+ y = self.getDataTemp('img.y')[0]
+ src = self.getDataTemp('img.src')[0]
+ result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
+ return result
+
+ def getGlyphs(self):
+ result = []
+ if (self.gid != None) and (len(self.gid) > 0):
+ glyphs = []
+ for j in set(self.gid):
+ glyphs.append(j)
+ glyphs.sort()
+ for gid in glyphs:
+ id='id="gl%d"' % gid
+ path = self.gd.lookup(id)
+ if path:
+ result.append(id + ' ' + path)
+ return result
+
+
+def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
+ mlst = []
+ pp = PParser(gdict, flat_xml, meta_array)
+ mlst.append('<?xml version="1.0" standalone="no"?>\n')
+ if (raw):
+ mlst.append('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ mlst.append('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
+ mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
else:
- if pointer_size == 4:
- name_of_lib = 'libalfcrypto32.so'
+ mlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ mlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n')
+ mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
+ mlst.append('<script><![CDATA[\n')
+ mlst.append('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n')
+ mlst.append('var dpi=%d;\n' % scaledpi)
+ if (previd) :
+ mlst.append('var prevpage="page%04d.xhtml";\n' % (previd))
+ if (nextid) :
+ mlst.append('var nextpage="page%04d.xhtml";\n' % (nextid))
+ mlst.append('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
+ mlst.append('function zoomin(){dpi=dpi*(0.8);setsize();}\n')
+ mlst.append('function zoomout(){dpi=dpi*1.25;setsize();}\n')
+ mlst.append('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
+ mlst.append('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
+ mlst.append('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
+ mlst.append('var gt=gd();if(gt>0){dpi=gt;}\n')
+ mlst.append('window.onload=setsize;\n')
+ mlst.append(']]></script>\n')
+ mlst.append('</head>\n')
+ mlst.append('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
+ mlst.append('<div style="white-space:nowrap;">\n')
+ if previd == None:
+ mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
else:
- name_of_lib = 'libalfcrypto64.so'
-
- libalfcrypto = sys.path[0] + os.sep + name_of_lib
-
- if not os.path.isfile(libalfcrypto):
- raise Exception('libalfcrypto not found')
-
- libalfcrypto = CDLL(libalfcrypto)
-
- c_char_pp = POINTER(c_char_p)
- c_int_p = POINTER(c_int)
-
-
- def F(restype, name, argtypes):
- func = getattr(libalfcrypto, name)
- func.restype = restype
- func.argtypes = argtypes
- return func
-
- # aes cbc decryption
- #
- # struct aes_key_st {
- # unsigned long rd_key[4 *(AES_MAXNR + 1)];
- # int rounds;
- # };
- #
- # typedef struct aes_key_st AES_KEY;
- #
- # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
- #
- #
- # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
- # const unsigned long length, const AES_KEY *key,
- # unsigned char *ivec, const int enc);
-
- AES_MAXNR = 14
-
- class AES_KEY(Structure):
- _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
-
- AES_KEY_p = POINTER(AES_KEY)
- AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int])
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
-
-
-
- # Pukall 1 Cipher
- # unsigned char *PC1(const unsigned char *key, unsigned int klen, const unsigned char *src,
- # unsigned char *dest, unsigned int len, int decryption);
-
- PC1 = F(c_char_p, 'PC1', [c_char_p, c_ulong, c_char_p, c_char_p, c_ulong, c_ulong])
-
- # Topaz Encryption
- # typedef struct _TpzCtx {
- # unsigned int v[2];
- # } TpzCtx;
- #
- # void topazCryptoInit(TpzCtx *ctx, const unsigned char *key, int klen);
- # void topazCryptoDecrypt(const TpzCtx *ctx, const unsigned char *in, unsigned char *out, int len);
-
- class TPZ_CTX(Structure):
- _fields_ = [('v', c_long * 2)]
-
- TPZ_CTX_p = POINTER(TPZ_CTX)
- topazCryptoInit = F(None, 'topazCryptoInit', [TPZ_CTX_p, c_char_p, c_ulong])
- topazCryptoDecrypt = F(None, 'topazCryptoDecrypt', [TPZ_CTX_p, c_char_p, c_char_p, c_ulong])
-
-
- class AES_CBC(object):
- def __init__(self):
- self._blocksize = 0
- self._keyctx = None
- self._iv = 0
-
- def set_decrypt_key(self, userkey, iv):
- self._blocksize = len(userkey)
- if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
- raise Exception('AES CBC improper key used')
- return
- keyctx = self._keyctx = AES_KEY()
- self._iv = iv
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
- if rv < 0:
- raise Exception('Failed to initialize AES CBC key')
-
- def decrypt(self, data):
- out = create_string_buffer(len(data))
- mutable_iv = create_string_buffer(self._iv, len(self._iv))
- rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, mutable_iv, 0)
- if rv == 0:
- raise Exception('AES CBC decryption failed')
- return out.raw
-
- class Pukall_Cipher(object):
- def __init__(self):
- self.key = None
-
- def PC1(self, key, src, decryption=True):
- self.key = key
- out = create_string_buffer(len(src))
- de = 0
- if decryption:
- de = 1
- rv = PC1(key, len(key), src, out, len(src), de)
- return out.raw
-
- class Topaz_Cipher(object):
- def __init__(self):
- self._ctx = None
-
- def ctx_init(self, key):
- tpz_ctx = self._ctx = TPZ_CTX()
- topazCryptoInit(tpz_ctx, key, len(key))
- return tpz_ctx
-
- def decrypt(self, data, ctx=None):
- if ctx == None:
- ctx = self._ctx
- out = create_string_buffer(len(data))
- topazCryptoDecrypt(ctx, data, out, len(data))
- return out.raw
-
- print "Using Library AlfCrypto DLL/DYLIB/SO"
- return (AES_CBC, Pukall_Cipher, Topaz_Cipher)
-
-
-def _load_python_alfcrypto():
-
- import aescbc
-
- class Pukall_Cipher(object):
- def __init__(self):
- self.key = None
-
- def PC1(self, key, src, decryption=True):
- sum1 = 0;
- sum2 = 0;
- keyXorVal = 0;
- if len(key)!=16:
- print "Bad key length!"
- return None
- wkey = []
- for i in xrange(8):
- wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
- dst = ""
- for i in xrange(len(src)):
- temp1 = 0;
- byteXorVal = 0;
- for j in xrange(8):
- temp1 ^= wkey[j]
- sum2 = (sum2+j)*20021 + sum1
- sum1 = (temp1*346)&0xFFFF
- sum2 = (sum2+sum1)&0xFFFF
- temp1 = (temp1*20021+1)&0xFFFF
- byteXorVal ^= temp1 ^ sum2
- curByte = ord(src[i])
- if not decryption:
- keyXorVal = curByte * 257;
- curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
- if decryption:
- keyXorVal = curByte * 257;
- for j in xrange(8):
- wkey[j] ^= keyXorVal;
- dst+=chr(curByte)
- return dst
-
- class Topaz_Cipher(object):
- def __init__(self):
- self._ctx = None
-
- def ctx_init(self, key):
- ctx1 = 0x0CAFFE19E
- for keyChar in key:
- keyByte = ord(keyChar)
- ctx2 = ctx1
- ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
- self._ctx = [ctx1, ctx2]
- return [ctx1,ctx2]
-
- def decrypt(self, data, ctx=None):
- if ctx == None:
- ctx = self._ctx
- ctx1 = ctx[0]
- ctx2 = ctx[1]
- plainText = ""
- for dataChar in data:
- dataByte = ord(dataChar)
- m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
- ctx2 = ctx1
- ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
- plainText += chr(m)
- return plainText
-
- class AES_CBC(object):
- def __init__(self):
- self._key = None
- self._iv = None
- self.aes = None
-
- def set_decrypt_key(self, userkey, iv):
- self._key = userkey
- self._iv = iv
- self.aes = aescbc.AES_CBC(userkey, aescbc.noPadding(), len(userkey))
-
- def decrypt(self, data):
- iv = self._iv
- cleartext = self.aes.decrypt(iv + data)
- return cleartext
-
- return (AES_CBC, Pukall_Cipher, Topaz_Cipher)
-
-
-def _load_crypto():
- AES_CBC = Pukall_Cipher = Topaz_Cipher = None
- cryptolist = (_load_libalfcrypto, _load_python_alfcrypto)
- for loader in cryptolist:
- try:
- AES_CBC, Pukall_Cipher, Topaz_Cipher = loader()
- break
- except (ImportError, Exception):
- pass
- return AES_CBC, Pukall_Cipher, Topaz_Cipher
-
-AES_CBC, Pukall_Cipher, Topaz_Cipher = _load_crypto()
-
-
-class KeyIVGen(object):
- # this only exists in openssl so we will use pure python implementation instead
- # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
- # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
- def pbkdf2(self, passwd, salt, iter, keylen):
-
- def xorstr( a, b ):
- if len(a) != len(b):
- raise Exception("xorstr(): lengths differ")
- return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b)))
-
- def prf( h, data ):
- hm = h.copy()
- hm.update( data )
- return hm.digest()
-
- def pbkdf2_F( h, salt, itercount, blocknum ):
- U = prf( h, salt + pack('>i',blocknum ) )
- T = U
- for i in range(2, itercount+1):
- U = prf( h, U )
- T = xorstr( T, U )
- return T
-
- sha = hashlib.sha1
- digest_size = sha().digest_size
- # l - number of output blocks to produce
- l = keylen / digest_size
- if keylen % digest_size != 0:
- l += 1
- h = hmac.new( passwd, None, sha )
- T = ""
- for i in range(1, l+1):
- T += pbkdf2_F( h, salt, iter, i )
- return T[0: keylen]
-
-
+ mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
+
+ mlst.append('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
+ if (pp.gid != None):
+ mlst.append('<defs>\n')
+ gdefs = pp.getGlyphs()
+ for j in xrange(0,len(gdefs)):
+ mlst.append(gdefs[j])
+ mlst.append('</defs>\n')
+ img = pp.getImages()
+ if (img != None):
+ for j in xrange(0,len(img)):
+ mlst.append(img[j])
+ if (pp.gid != None):
+ for j in xrange(0,len(pp.gid)):
+ mlst.append('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
+ if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+ xpos = "%d" % (pp.pw // 3)
+ ypos = "%d" % (pp.ph // 3)
+ mlst.append('<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n')
+ if (raw) :
+ mlst.append('</svg>')
+ else :
+ mlst.append('</svg></a>\n')
+ if nextid == None:
+ mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
+ else :
+ mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
+ mlst.append('</div>\n')
+ mlst.append('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
+ mlst.append('</body>\n')
+ mlst.append('</html>\n')
+ return "".join(mlst)
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
+#!/usr/bin/env python
+from __future__ import with_statement
import sys
-import csv
-import os
-import math
-import getopt
-from struct import pack
-from struct import unpack
-
-
-class DocParser(object):
- def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
- self.id = os.path.basename(fileid).replace('.dat','')
- self.svgcount = 0
- self.docList = flatxml.split('\n')
- self.docSize = len(self.docList)
- self.classList = {}
- self.bookDir = bookDir
- self.gdict = gdict
- tmpList = classlst.split('\n')
- for pclass in tmpList:
- if pclass != '':
- # remove the leading period from the css name
- cname = pclass[1:]
- self.classList[cname] = True
- self.fixedimage = fixedimage
- self.ocrtext = []
- self.link_id = []
- self.link_title = []
- self.link_page = []
- self.link_href = []
- self.link_type = []
- self.dehyphen_rootid = []
- self.paracont_stemid = []
- self.parastems_stemid = []
-
-
- def getGlyph(self, gid):
- result = ''
- id='id="gl%d"' % gid
- return self.gdict.lookup(id)
-
- def glyphs_to_image(self, glyphList):
-
- def extract(path, key):
- b = path.find(key) + len(key)
- e = path.find(' ',b)
- return int(path[b:e])
-
- svgDir = os.path.join(self.bookDir,'svg')
-
- imgDir = os.path.join(self.bookDir,'img')
- imgname = self.id + '_%04d.svg' % self.svgcount
- imgfile = os.path.join(imgDir,imgname)
-
- # get glyph information
- gxList = self.getData('info.glyph.x',0,-1)
- gyList = self.getData('info.glyph.y',0,-1)
- gidList = self.getData('info.glyph.glyphID',0,-1)
-
- gids = []
- maxws = []
- maxhs = []
- xs = []
- ys = []
- gdefs = []
-
- # get path defintions, positions, dimensions for each glyph
- # that makes up the image, and find min x and min y to reposition origin
- minx = -1
- miny = -1
- for j in glyphList:
- gid = gidList[j]
- gids.append(gid)
-
- xs.append(gxList[j])
- if minx == -1: minx = gxList[j]
- else : minx = min(minx, gxList[j])
-
- ys.append(gyList[j])
- if miny == -1: miny = gyList[j]
- else : miny = min(miny, gyList[j])
-
- path = self.getGlyph(gid)
- gdefs.append(path)
-
- maxws.append(extract(path,'width='))
- maxhs.append(extract(path,'height='))
-
-
- # change the origin to minx, miny and calc max height and width
- maxw = maxws[0] + xs[0] - minx
- maxh = maxhs[0] + ys[0] - miny
- for j in xrange(0, len(xs)):
- xs[j] = xs[j] - minx
- ys[j] = ys[j] - miny
- maxw = max( maxw, (maxws[j] + xs[j]) )
- maxh = max( maxh, (maxhs[j] + ys[j]) )
-
- # open the image file for output
- ifile = open(imgfile,'w')
- ifile.write('<?xml version="1.0" standalone="no"?>\n')
- ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
- ifile.write('<defs>\n')
- for j in xrange(0,len(gdefs)):
- ifile.write(gdefs[j])
- ifile.write('</defs>\n')
- for j in xrange(0,len(gids)):
- ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
- ifile.write('</svg>')
- ifile.close()
-
- return 0
-
-
-
- # return tag at line pos in document
- def lineinDoc(self, pos) :
- if (pos >= 0) and (pos < self.docSize) :
- item = self.docList[pos]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
+import os, csv
+import binascii
+import zlib
+import re
+from struct import pack, unpack, unpack_from
+
+class DrmException(Exception):
+ pass
+
+global charMap1
+global charMap3
+global charMap4
+
+if 'calibre' in sys.modules:
+ inCalibre = True
+else:
+ inCalibre = False
+
+if inCalibre:
+ if sys.platform.startswith('win'):
+ from calibre_plugins.k4mobidedrm.k4pcutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString
+
+ if sys.platform.startswith('darwin'):
+ from calibre_plugins.k4mobidedrm.k4mutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString
+else:
+ if sys.platform.startswith('win'):
+ from k4pcutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString
+
+ if sys.platform.startswith('darwin'):
+ from k4mutils import getKindleInfoFiles, getDBfromFile, GetUserName, GetIDString
+
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+#
+# PID generation routines
+#
+
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
else :
- name = item
- argres = ''
- return name, argres
-
-
- # find tag in doc if within pos to end inclusive
- def findinDoc(self, tagpath, pos, end) :
- result = None
- if end == -1 :
- end = self.docSize
- else:
- end = min(self.docSize, end)
- foundat = -1
- for j in xrange(pos, end):
- item = self.docList[j]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- if name.endswith(tagpath) :
- result = argres
- foundat = j
- break
- return foundat, result
-
-
- # return list of start positions for the tagpath
- def posinDoc(self, tagpath):
- startpos = []
- pos = 0
- res = ""
- while res != None :
- (foundpos, res) = self.findinDoc(tagpath, pos, -1)
- if res != None :
- startpos.append(foundpos)
- pos = foundpos + 1
- return startpos
-
-
- # returns a vector of integers for the tagpath
- def getData(self, tagpath, pos, end):
- argres=[]
- (foundat, argt) = self.findinDoc(tagpath, pos, end)
- if (argt != None) and (len(argt) > 0) :
- argList = argt.split('|')
- argres = [ int(strval) for strval in argList]
- return argres
-
-
- # get the class
- def getClass(self, pclass):
- nclass = pclass
-
- # class names are an issue given topaz may start them with numerals (not allowed),
- # use a mix of cases (which cause some browsers problems), and actually
- # attach numbers after "_reclustered*" to the end to deal classeses that inherit
- # from a base class (but then not actually provide all of these _reclustereed
- # classes in the stylesheet!
-
- # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
- # that exists in the stylesheet first, and then adding this specific class
- # after
-
- # also some class names have spaces in them so need to convert to dashes
- if nclass != None :
- nclass = nclass.replace(' ','-')
- classres = ''
- nclass = nclass.lower()
- nclass = 'cl-' + nclass
- baseclass = ''
- # graphic is the base class for captions
- if nclass.find('cl-cap-') >=0 :
- classres = 'graphic' + ' '
- else :
- # strip to find baseclass
- p = nclass.find('_')
- if p > 0 :
- baseclass = nclass[0:p]
- if baseclass in self.classList:
- classres += baseclass + ' '
- classres += nclass
- nclass = classres
- return nclass
-
-
- # develop a sorted description of the starting positions of
- # groups and regions on the page, as well as the page type
- def PageDescription(self):
-
- def compare(x, y):
- (xtype, xval) = x
- (ytype, yval) = y
- if xval > yval:
- return 1
- if xval == yval:
- return 0
- return -1
-
- result = []
- (pos, pagetype) = self.findinDoc('page.type',0,-1)
-
- groupList = self.posinDoc('page.group')
- groupregionList = self.posinDoc('page.group.region')
- pageregionList = self.posinDoc('page.region')
- # integrate into one list
- for j in groupList:
- result.append(('grpbeg',j))
- for j in groupregionList:
- result.append(('gregion',j))
- for j in pageregionList:
- result.append(('pregion',j))
- result.sort(compare)
-
- # insert group end and page end indicators
- inGroup = False
- j = 0
- while True:
- if j == len(result): break
- rtype = result[j][0]
- rval = result[j][1]
- if not inGroup and (rtype == 'grpbeg') :
- inGroup = True
- j = j + 1
- elif inGroup and (rtype in ('grpbeg', 'pregion')):
- result.insert(j,('grpend',rval))
- inGroup = False
- else:
- j = j + 1
- if inGroup:
- result.append(('grpend',-1))
- result.append(('pageend', -1))
- return pagetype, result
-
-
-
- # build a description of the paragraph
- def getParaDescription(self, start, end, regtype):
-
- result = []
-
- # paragraph
- (pos, pclass) = self.findinDoc('paragraph.class',start,end)
-
- pclass = self.getClass(pclass)
-
- # if paragraph uses extratokens (extra glyphs) then make it fixed
- (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
-
- # build up a description of the paragraph in result and return it
- # first check for the basic - all words paragraph
- (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
- (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
- if (sfirst != None) and (slast != None) :
- first = int(sfirst)
- last = int(slast)
-
- makeImage = (regtype == 'vertical') or (regtype == 'table')
- makeImage = makeImage or (extraglyphs != None)
- if self.fixedimage:
- makeImage = makeImage or (regtype == 'fixed')
-
- if (pclass != None):
- makeImage = makeImage or (pclass.find('.inverted') >= 0)
- if self.fixedimage :
- makeImage = makeImage or (pclass.find('cl-f-') >= 0)
-
- # before creating an image make sure glyph info exists
- gidList = self.getData('info.glyph.glyphID',0,-1)
-
- makeImage = makeImage & (len(gidList) > 0)
-
- if not makeImage :
- # standard all word paragraph
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- return pclass, result
-
- # convert paragraph to svg image
- # translate first and last word into first and last glyphs
- # and generate inline image and include it
- glyphList = []
- firstglyphList = self.getData('word.firstGlyph',0,-1)
- gidList = self.getData('info.glyph.glyphID',0,-1)
- firstGlyph = firstglyphList[first]
- if last < len(firstglyphList):
- lastGlyph = firstglyphList[last]
- else :
- lastGlyph = len(gidList)
-
- # handle case of white sapce paragraphs with no actual glyphs in them
- # by reverting to text based paragraph
- if firstGlyph >= lastGlyph:
- # revert to standard text based paragraph
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- return pclass, result
-
- for glyphnum in xrange(firstGlyph, lastGlyph):
- glyphList.append(glyphnum)
- # include any extratokens if they exist
- (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
- (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
- if (sfg != None) and (slg != None):
- for glyphnum in xrange(int(sfg), int(slg)):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- return pclass, result
-
- # this type of paragraph may be made up of multiple spans, inline
- # word monograms (images), and words with semantic meaning,
- # plus glyphs used to form starting letter of first word
-
- # need to parse this type line by line
- line = start + 1
- word_class = ''
-
- # if end is -1 then we must search to end of document
- if end == -1 :
- end = self.docSize
-
- # seems some xml has last* coming before first* so we have to
- # handle any order
- sp_first = -1
- sp_last = -1
-
- gl_first = -1
- gl_last = -1
-
- ws_first = -1
- ws_last = -1
-
- word_class = ''
-
- word_semantic_type = ''
-
- while (line < end) :
-
- (name, argres) = self.lineinDoc(line)
-
- if name.endswith('span.firstWord') :
- sp_first = int(argres)
-
- elif name.endswith('span.lastWord') :
- sp_last = int(argres)
-
- elif name.endswith('word.firstGlyph') :
- gl_first = int(argres)
-
- elif name.endswith('word.lastGlyph') :
- gl_last = int(argres)
-
- elif name.endswith('word_semantic.firstWord'):
- ws_first = int(argres)
-
- elif name.endswith('word_semantic.lastWord'):
- ws_last = int(argres)
-
- elif name.endswith('word.class'):
- (cname, space) = argres.split('-',1)
- if space == '' : space = '0'
- if (cname == 'spaceafter') and (int(space) > 0) :
- word_class = 'sa'
-
- elif name.endswith('word.img.src'):
- result.append(('img' + word_class, int(argres)))
- word_class = ''
-
- elif name.endswith('region.img.src'):
- result.append(('img' + word_class, int(argres)))
-
- if (sp_first != -1) and (sp_last != -1):
- for wordnum in xrange(sp_first, sp_last):
- result.append(('ocr', wordnum))
- sp_first = -1
- sp_last = -1
-
- if (gl_first != -1) and (gl_last != -1):
- glyphList = []
- for glyphnum in xrange(gl_first, gl_last):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- gl_first = -1
- gl_last = -1
-
- if (ws_first != -1) and (ws_last != -1):
- for wordnum in xrange(ws_first, ws_last):
- result.append(('ocr', wordnum))
- ws_first = -1
- ws_last = -1
-
- line += 1
-
- return pclass, result
-
-
- def buildParagraph(self, pclass, pdesc, type, regtype) :
- parares = ''
- sep =''
-
- classres = ''
- if pclass :
- classres = ' class="' + pclass + '"'
-
- br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
-
- handle_links = len(self.link_id) > 0
-
- if (type == 'full') or (type == 'begin') :
- parares += '<p' + classres + '>'
-
- if (type == 'end'):
- parares += ' '
-
- lstart = len(parares)
-
- cnt = len(pdesc)
-
- for j in xrange( 0, cnt) :
-
- (wtype, num) = pdesc[j]
-
- if wtype == 'ocr' :
- word = self.ocrtext[num]
- sep = ' '
-
- if handle_links:
- link = self.link_id[num]
- if (link > 0):
- linktype = self.link_type[link-1]
- title = self.link_title[link-1]
- if (title == "") or (parares.rfind(title) < 0):
- title=parares[lstart:]
- if linktype == 'external' :
- linkhref = self.link_href[link-1]
- linkhtml = '<a href="%s">' % linkhref
- else :
- if len(self.link_page) >= link :
- ptarget = self.link_page[link-1] - 1
- linkhtml = '<a href="#page%04d">' % ptarget
- else :
- # just link to the current page
- linkhtml = '<a href="#' + self.id + '">'
- linkhtml += title + '</a>'
- pos = parares.rfind(title)
- if pos >= 0:
- parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
- else :
- parares += linkhtml
- lstart = len(parares)
- if word == '_link_' : word = ''
- elif (link < 0) :
- if word == '_link_' : word = ''
-
- if word == '_lb_':
- if ((num-1) in self.dehyphen_rootid ) or handle_links:
- word = ''
- sep = ''
- elif br_lb :
- word = '<br />\n'
- sep = ''
- else :
- word = '\n'
- sep = ''
-
- if num in self.dehyphen_rootid :
- word = word[0:-1]
- sep = ''
-
- parares += word + sep
-
- elif wtype == 'img' :
- sep = ''
- parares += '<img src="img/img%04d.jpg" alt="" />' % num
- parares += sep
-
- elif wtype == 'imgsa' :
- sep = ' '
- parares += '<img src="img/img%04d.jpg" alt="" />' % num
- parares += sep
-
- elif wtype == 'svg' :
- sep = ''
- parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
- parares += sep
-
- if len(sep) > 0 : parares = parares[0:-1]
- if (type == 'full') or (type == 'end') :
- parares += '</p>'
- return parares
-
-
- def buildTOCEntry(self, pdesc) :
- parares = ''
- sep =''
- tocentry = ''
- handle_links = len(self.link_id) > 0
-
- lstart = 0
-
- cnt = len(pdesc)
- for j in xrange( 0, cnt) :
-
- (wtype, num) = pdesc[j]
-
- if wtype == 'ocr' :
- word = self.ocrtext[num]
- sep = ' '
-
- if handle_links:
- link = self.link_id[num]
- if (link > 0):
- linktype = self.link_type[link-1]
- title = self.link_title[link-1]
- title = title.rstrip('. ')
- alt_title = parares[lstart:]
- alt_title = alt_title.strip()
- # now strip off the actual printed page number
- alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.')
- alt_title = alt_title.rstrip('. ')
- # skip over any external links - can't have them in a books toc
- if linktype == 'external' :
- title = ''
- alt_title = ''
- linkpage = ''
- else :
- if len(self.link_page) >= link :
- ptarget = self.link_page[link-1] - 1
- linkpage = '%04d' % ptarget
- else :
- # just link to the current page
- linkpage = self.id[4:]
- if len(alt_title) >= len(title):
- title = alt_title
- if title != '' and linkpage != '':
- tocentry += title + '|' + linkpage + '\n'
- lstart = len(parares)
- if word == '_link_' : word = ''
- elif (link < 0) :
- if word == '_link_' : word = ''
-
- if word == '_lb_':
- word = ''
- sep = ''
-
- if num in self.dehyphen_rootid :
- word = word[0:-1]
- sep = ''
-
- parares += word + sep
-
- else :
- continue
-
- return tocentry
-
-
-
-
- # walk the document tree collecting the information needed
- # to build an html page using the ocrText
-
- def process(self):
-
- tocinfo = ''
- hlst = []
-
- # get the ocr text
- (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
- if argres : self.ocrtext = argres.split('|')
-
- # get information to dehyphenate the text
- self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
-
- # determine if first paragraph is continued from previous page
- (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
- first_para_continued = (self.parastems_stemid != None)
-
- # determine if last paragraph is continued onto the next page
- (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
- last_para_continued = (self.paracont_stemid != None)
-
- # collect link ids
- self.link_id = self.getData('info.word.link_id',0,-1)
-
- # collect link destination page numbers
- self.link_page = self.getData('info.links.page',0,-1)
-
- # collect link types (container versus external)
- (pos, argres) = self.findinDoc('info.links.type',0,-1)
- if argres : self.link_type = argres.split('|')
-
- # collect link destinations
- (pos, argres) = self.findinDoc('info.links.href',0,-1)
- if argres : self.link_href = argres.split('|')
-
- # collect link titles
- (pos, argres) = self.findinDoc('info.links.title',0,-1)
- if argres :
- self.link_title = argres.split('|')
- else:
- self.link_title.append('')
-
- # get a descriptions of the starting points of the regions
- # and groups on the page
- (pagetype, pageDesc) = self.PageDescription()
- regcnt = len(pageDesc) - 1
-
- anchorSet = False
- breakSet = False
- inGroup = False
-
- # process each region on the page and convert what you can to html
-
- for j in xrange(regcnt):
-
- (etype, start) = pageDesc[j]
- (ntype, end) = pageDesc[j+1]
-
-
- # set anchor for link target on this page
- if not anchorSet and not first_para_continued:
- hlst.append('<div style="visibility: hidden; height: 0; width: 0;" id="')
- hlst.append(self.id + '" title="pagetype_' + pagetype + '"></div>\n')
- anchorSet = True
-
- # handle groups of graphics with text captions
- if (etype == 'grpbeg'):
- (pos, grptype) = self.findinDoc('group.type', start, end)
- if grptype != None:
- if grptype == 'graphic':
- gcstr = ' class="' + grptype + '"'
- hlst.append('<div' + gcstr + '>')
- inGroup = True
-
- elif (etype == 'grpend'):
- if inGroup:
- hlst.append('</div>\n')
- inGroup = False
-
- else:
- (pos, regtype) = self.findinDoc('region.type',start,end)
-
- if regtype == 'graphic' :
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- if inGroup:
- hlst.append('<img src="img/img%04d.jpg" alt="" />' % int(simgsrc))
- else:
- hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
-
- elif regtype == 'chapterheading' :
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if not breakSet:
- hlst.append('<div style="page-break-after: always;"> </div>\n')
- breakSet = True
- tag = 'h1'
- if pclass and (len(pclass) >= 7):
- if pclass[3:7] == 'ch1-' : tag = 'h1'
- if pclass[3:7] == 'ch2-' : tag = 'h2'
- if pclass[3:7] == 'ch3-' : tag = 'h3'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- else:
- hlst.append('<' + tag + '>')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('</' + tag + '>')
-
- elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
- ptype = 'full'
- # check to see if this is a continution from the previous page
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if pclass and (len(pclass) >= 6) and (ptype == 'full'):
- tag = 'p'
- if pclass[3:6] == 'h1-' : tag = 'h4'
- if pclass[3:6] == 'h2-' : tag = 'h5'
- if pclass[3:6] == 'h3-' : tag = 'h6'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('</' + tag + '>')
- else :
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
- elif (regtype == 'tocentry') :
- ptype = 'full'
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- tocinfo += self.buildTOCEntry(pdesc)
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
- elif (regtype == 'vertical') or (regtype == 'table') :
- ptype = 'full'
- if inGroup:
- ptype = 'middle'
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start, end, regtype)
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
-
- elif (regtype == 'synth_fcvr.center'):
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
-
- else :
- print ' Making region type', regtype,
- (pos, temp) = self.findinDoc('paragraph',start,end)
- (pos2, temp) = self.findinDoc('span',start,end)
- if pos != -1 or pos2 != -1:
- print ' a "text" region'
- orig_regtype = regtype
- regtype = 'fixed'
- ptype = 'full'
- # check to see if this is a continution from the previous page
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if not pclass:
- if orig_regtype.endswith('.right') : pclass = 'cl-right'
- elif orig_regtype.endswith('.center') : pclass = 'cl-center'
- elif orig_regtype.endswith('.left') : pclass = 'cl-left'
- elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
- if pclass and (ptype == 'full') and (len(pclass) >= 6):
- tag = 'p'
- if pclass[3:6] == 'h1-' : tag = 'h4'
- if pclass[3:6] == 'h2-' : tag = 'h5'
- if pclass[3:6] == 'h3-' : tag = 'h6'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('</' + tag + '>')
- else :
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
- else :
- print ' a "graphic" region'
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
-
-
- htmlpage = "".join(hlst)
- if last_para_continued :
- if htmlpage[-4:] == '</p>':
- htmlpage = htmlpage[0:-4]
- last_para_continued = False
-
- return htmlpage, tocinfo
-
-
-def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
- # create a document parser
- dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
- htmlpage, tocinfo = dp.process()
- return htmlpage, tocinfo
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+ global charMap4
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+def crc32(s):
+ return (~binascii.crc32(s,-1))&0xFFFFFFFF
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+ global charMap4
+ crc = crc32(s)
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(charMap4)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += charMap4[pos%l]
+ crc >>= 8
+ return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+ global charMap4
+ crc = crc32(s)
+ arr1 = [0]*l
+ for i in xrange(len(s)):
+ arr1[i%l] ^= ord(s[i])
+ crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+ for i in xrange(l):
+ arr1[i] ^= crc_bytes[i&3]
+ pid = ""
+ for i in xrange(l):
+ b = arr1[i] & 0xff
+ pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+ return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePid(pidlst, rec209, token, serialnum):
+ # Compute book PID
+ pidHash = SHA1(serialnum+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # compute fixed pid for old pre 2.5 firmware update pid as well
+ bookPID = pidFromSerial(serialnum, 7) + "*"
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+
+# parse the Kindleinfo file to calculate the book pid.
+
+keynames = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+
+def getK4Pids(pidlst, rec209, token, kInfoFile):
+ global charMap1
+ kindleDatabase = None
+ try:
+ kindleDatabase = getDBfromFile(kInfoFile)
+ except Exception, message:
+ print(message)
+ kindleDatabase = None
+ pass
+
+ if kindleDatabase == None :
+ return pidlst
+
+ try:
+ # Get the Mazama Random number
+ MazamaRandomNumber = kindleDatabase["MazamaRandomNumber"]
+
+ # Get the kindle account token
+ kindleAccountToken = kindleDatabase["kindle.account.tokens"]
+ except KeyError:
+ print "Keys not found in " + kInfoFile
+ return pidlst
+
+ # Get the ID string used
+ encodedIDString = encodeHash(GetIDString(),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode to calculate the DSN
+ DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1)
+
+ # Compute the device PID (for which I can tell, is used for nothing).
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ devicePID = checksumPid(devicePID)
+ pidlst.append(devicePID)
+
+ # Compute book PIDs
+
+ # book pid
+ pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 1
+ pidHash = SHA1(kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ # variant 2
+ pidHash = SHA1(DSN+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pidlst.append(bookPID)
+
+ return pidlst
+
+def getPidList(md1, md2, k4, pids, serials, kInfoFiles):
+ pidlst = []
+ if kInfoFiles is None:
+ kInfoFiles = []
+ if k4:
+ kInfoFiles = getKindleInfoFiles(kInfoFiles)
+ for infoFile in kInfoFiles:
+ pidlst = getK4Pids(pidlst, md1, md2, infoFile)
+ for serialnum in serials:
+ pidlst = getKindlePid(pidlst, md1, md2, serialnum)
+ for pid in pids:
+ pidlst.append(pid)
+ return pidlst
--- /dev/null
+# A simple implementation of pbkdf2 using stock python modules. See RFC2898
+# for details. Basically, it derives a key from a password and salt.
+
+# Copyright 2004 Matt Johnston <matt @ ucc asn au>
+# Copyright 2009 Daniel Holth <dholth@fastmail.fm>
+# This code may be freely used and modified for any purpose.
+
+# Revision history
+# v0.1 October 2004 - Initial release
+# v0.2 8 March 2007 - Make usable with hashlib in Python 2.5 and use
+# v0.3 "" the correct digest_size rather than always 20
+# v0.4 Oct 2009 - Rescue from chandler svn, test and optimize.
+
+import sys
+import hmac
+from struct import pack
+try:
+ # only in python 2.5
+ import hashlib
+ sha = hashlib.sha1
+ md5 = hashlib.md5
+ sha256 = hashlib.sha256
+except ImportError: # pragma: NO COVERAGE
+ # fallback
+ import sha
+ import md5
+
+# this is what you want to call.
+def pbkdf2( password, salt, itercount, keylen, hashfn = sha ):
+ try:
+ # depending whether the hashfn is from hashlib or sha/md5
+ digest_size = hashfn().digest_size
+ except TypeError: # pragma: NO COVERAGE
+ digest_size = hashfn.digest_size
+ # l - number of output blocks to produce
+ l = keylen / digest_size
+ if keylen % digest_size != 0:
+ l += 1
+
+ h = hmac.new( password, None, hashfn )
+
+ T = ""
+ for i in range(1, l+1):
+ T += pbkdf2_F( h, salt, itercount, i )
+
+ return T[0: keylen]
+
+def xorstr( a, b ):
+ if len(a) != len(b):
+ raise ValueError("xorstr(): lengths differ")
+ return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b)))
+
+def prf( h, data ):
+ hm = h.copy()
+ hm.update( data )
+ return hm.digest()
+
+# Helper as per the spec. h is a hmac which has been created seeded with the
+# password, it will be copy()ed and not modified.
+def pbkdf2_F( h, salt, itercount, blocknum ):
+ U = prf( h, salt + pack('>i',blocknum ) )
+ T = U
+
+ for i in range(2, itercount+1):
+ U = prf( h, U )
+ T = xorstr( T, U )
+
+ return T
-# standlone set of Mac OSX specific routines needed for KindleBooks
-
-from __future__ import with_statement
-
-import sys
-import os
-import os.path
-import re
-import copy
-import subprocess
-from struct import pack, unpack, unpack_from
-
-class DrmException(Exception):
- pass
-
-
-# interface to needed routines in openssl's libcrypto
-def _load_crypto_libcrypto():
- from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
- Structure, c_ulong, create_string_buffer, addressof, string_at, cast
- from ctypes.util import find_library
-
- libcrypto = find_library('crypto')
- if libcrypto is None:
- raise DrmException('libcrypto not found')
- libcrypto = CDLL(libcrypto)
-
- # From OpenSSL's crypto aes header
- #
- # AES_ENCRYPT 1
- # AES_DECRYPT 0
- # AES_MAXNR 14 (in bytes)
- # AES_BLOCK_SIZE 16 (in bytes)
- #
- # struct aes_key_st {
- # unsigned long rd_key[4 *(AES_MAXNR + 1)];
- # int rounds;
- # };
- # typedef struct aes_key_st AES_KEY;
- #
- # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
- #
- # note: the ivec string, and output buffer are both mutable
- # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
- # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc);
-
- AES_MAXNR = 14
- c_char_pp = POINTER(c_char_p)
- c_int_p = POINTER(c_int)
-
- class AES_KEY(Structure):
- _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
- AES_KEY_p = POINTER(AES_KEY)
-
- def F(restype, name, argtypes):
- func = getattr(libcrypto, name)
- func.restype = restype
- func.argtypes = argtypes
- return func
-
- AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
-
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
-
- # From OpenSSL's Crypto evp/p5_crpt2.c
- #
- # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
- # const unsigned char *salt, int saltlen, int iter,
- # int keylen, unsigned char *out);
-
- PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
- [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
-
- class LibCrypto(object):
- def __init__(self):
- self._blocksize = 0
- self._keyctx = None
- self._iv = 0
-
- def set_decrypt_key(self, userkey, iv):
- self._blocksize = len(userkey)
- if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
- raise DrmException('AES improper key used')
- return
- keyctx = self._keyctx = AES_KEY()
- self._iv = iv
- self._userkey = userkey
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
- if rv < 0:
- raise DrmException('Failed to initialize AES key')
-
- def decrypt(self, data):
- out = create_string_buffer(len(data))
- mutable_iv = create_string_buffer(self._iv, len(self._iv))
- keyctx = self._keyctx
- rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0)
- if rv == 0:
- raise DrmException('AES decryption failed')
- return out.raw
-
- def keyivgen(self, passwd, salt, iter, keylen):
- saltlen = len(salt)
- passlen = len(passwd)
- out = create_string_buffer(keylen)
- rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
- return out.raw
- return LibCrypto
-
-def _load_crypto():
- LibCrypto = None
- try:
- LibCrypto = _load_crypto_libcrypto()
- except (ImportError, DrmException):
- pass
- return LibCrypto
-
-LibCrypto = _load_crypto()
-
-#
-# Utility Routines
-#
-
-# crypto digestroutines
-import hashlib
-
-def MD5(message):
- ctx = hashlib.md5()
- ctx.update(message)
- return ctx.digest()
-
-def SHA1(message):
- ctx = hashlib.sha1()
- ctx.update(message)
- return ctx.digest()
-
-def SHA256(message):
- ctx = hashlib.sha256()
- ctx.update(message)
- return ctx.digest()
-
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
-
-# For kinf approach of K4Mac 1.6.X or later
-# On K4PC charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE"
-# For Mac they seem to re-use charMap2 here
-charMap5 = charMap2
-
-# new in K4M 1.9.X
-testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD"
-
-
-def encode(data, map):
- result = ""
- for char in data:
- value = ord(char)
- Q = (value ^ 0x80) // len(map)
- R = value % len(map)
- result += map[Q]
- result += map[R]
- return result
-
-# Hash the bytes in data and then encode the digest with the characters in map
-def encodeHash(data,map):
- return encode(MD5(data),map)
-
-# Decode the string in data with the characters in map. Returns the decoded bytes
-def decode(data,map):
- result = ""
- for i in range (0,len(data)-1,2):
- high = map.find(data[i])
- low = map.find(data[i+1])
- if (high == -1) or (low == -1) :
- break
- value = (((high * len(map)) ^ 0x80) & 0xFF) + low
- result += pack("B",value)
- return result
-
-# For K4M 1.6.X and later
-# generate table of prime number less than or equal to int n
-def primes(n):
- if n==2: return [2]
- elif n<2: return []
- s=range(3,n+1,2)
- mroot = n ** 0.5
- half=(n+1)/2-1
- i=0
- m=3
- while m <= mroot:
- if s[i]:
- j=(m*m-3)/2
- s[j]=0
- while j<half:
- s[j]=0
- j+=m
- i=i+1
- m=2*i+3
- return [2]+[x for x in s if x]
-
-
-# uses a sub process to get the Hard Drive Serial Number using ioreg
-# returns with the serial number of drive whose BSD Name is "disk0"
-def GetVolumeSerialNumber():
- sernum = os.getenv('MYSERIALNUMBER')
- if sernum != None:
- return sernum
- cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- bsdname = None
- sernum = None
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- pp = resline.find('"Serial Number" = "')
- if pp >= 0:
- sernum = resline[pp+19:-1]
- sernum = sernum.strip()
- bb = resline.find('"BSD Name" = "')
- if bb >= 0:
- bsdname = resline[bb+14:-1]
- bsdname = bsdname.strip()
- if (bsdname == 'disk0') and (sernum != None):
- foundIt = True
- break
- if not foundIt:
- sernum = ''
- return sernum
-
-def GetUserHomeAppSupKindleDirParitionName():
- home = os.getenv('HOME')
- dpath = home + '/Library/Application Support/Kindle'
- cmdline = '/sbin/mount'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- disk = ''
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- if resline.startswith('/dev'):
- (devpart, mpath) = resline.split(' on ')
- dpart = devpart[5:]
- pp = mpath.find('(')
- if pp >= 0:
- mpath = mpath[:pp-1]
- if dpath.startswith(mpath):
- disk = dpart
- return disk
-
-# uses a sub process to get the UUID of the specified disk partition using ioreg
-def GetDiskPartitionUUID(diskpart):
- uuidnum = os.getenv('MYUUIDNUMBER')
- if uuidnum != None:
- return uuidnum
- cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- bsdname = None
- uuidnum = None
- foundIt = False
- nest = 0
- uuidnest = -1
- partnest = -2
- for j in xrange(cnt):
- resline = reslst[j]
- if resline.find('{') >= 0:
- nest += 1
- if resline.find('}') >= 0:
- nest -= 1
- pp = resline.find('"UUID" = "')
- if pp >= 0:
- uuidnum = resline[pp+10:-1]
- uuidnum = uuidnum.strip()
- uuidnest = nest
- if partnest == uuidnest and uuidnest > 0:
- foundIt = True
- break
- bb = resline.find('"BSD Name" = "')
- if bb >= 0:
- bsdname = resline[bb+14:-1]
- bsdname = bsdname.strip()
- if (bsdname == diskpart):
- partnest = nest
- else :
- partnest = -2
- if partnest == uuidnest and partnest > 0:
- foundIt = True
- break
- if nest == 0:
- partnest = -2
- uuidnest = -1
- uuidnum = None
- bsdname = None
- if not foundIt:
- uuidnum = ''
- return uuidnum
-
-def GetMACAddressMunged():
- macnum = os.getenv('MYMACNUM')
- if macnum != None:
- return macnum
- cmdline = '/sbin/ifconfig en0'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- macnum = None
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- pp = resline.find('ether ')
- if pp >= 0:
- macnum = resline[pp+6:-1]
- macnum = macnum.strip()
- # print "original mac", macnum
- # now munge it up the way Kindle app does
- # by xoring it with 0xa5 and swapping elements 3 and 4
- maclst = macnum.split(':')
- n = len(maclst)
- if n != 6:
- fountIt = False
- break
- for i in range(6):
- maclst[i] = int('0x' + maclst[i], 0)
- mlst = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
- mlst[5] = maclst[5] ^ 0xa5
- mlst[4] = maclst[3] ^ 0xa5
- mlst[3] = maclst[4] ^ 0xa5
- mlst[2] = maclst[2] ^ 0xa5
- mlst[1] = maclst[1] ^ 0xa5
- mlst[0] = maclst[0] ^ 0xa5
- macnum = "%0.2x%0.2x%0.2x%0.2x%0.2x%0.2x" % (mlst[0], mlst[1], mlst[2], mlst[3], mlst[4], mlst[5])
- foundIt = True
- break
- if not foundIt:
- macnum = ''
- return macnum
-
-
-# uses unix env to get username instead of using sysctlbyname
-def GetUserName():
- username = os.getenv('USER')
- return username
-
-def isNewInstall():
- home = os.getenv('HOME')
- # soccer game fan anyone
- dpath = home + '/Library/Application Support/Kindle/storage/.pes2011'
- # print dpath, os.path.exists(dpath)
- if os.path.exists(dpath):
- return True
- return False
-
-
-def GetIDString():
- # K4Mac now has an extensive set of ids strings it uses
- # in encoding pids and in creating unique passwords
- # for use in its own version of CryptUnprotectDataV2
-
- # BUT Amazon has now become nasty enough to detect when its app
- # is being run under a debugger and actually changes code paths
- # including which one of these strings is chosen, all to try
- # to prevent reverse engineering
-
- # Sad really ... they will only hurt their own sales ...
- # true book lovers really want to keep their books forever
- # and move them to their devices and DRM prevents that so they
- # will just buy from someplace else that they can remove
- # the DRM from
-
- # Amazon should know by now that true book lover's are not like
- # penniless kids that pirate music, we do not pirate books
-
- if isNewInstall():
- mungedmac = GetMACAddressMunged()
- if len(mungedmac) > 7:
- return mungedmac
- sernum = GetVolumeSerialNumber()
- if len(sernum) > 7:
- return sernum
- diskpart = GetUserHomeAppSupKindleDirParitionName()
- uuidnum = GetDiskPartitionUUID(diskpart)
- if len(uuidnum) > 7:
- return uuidnum
- mungedmac = GetMACAddressMunged()
- if len(mungedmac) > 7:
- return mungedmac
- return '9999999999'
-
-
-# implements an Pseudo Mac Version of Windows built-in Crypto routine
-# used by Kindle for Mac versions < 1.6.0
-class CryptUnprotectData(object):
- def __init__(self):
- sernum = GetVolumeSerialNumber()
- if sernum == '':
- sernum = '9999999999'
- sp = sernum + '!@#' + GetUserName()
- passwdData = encode(SHA256(sp),charMap1)
- salt = '16743'
- self.crp = LibCrypto()
- iter = 0x3e8
- keylen = 0x80
- key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
- self.key = key_iv[0:32]
- self.iv = key_iv[32:48]
- self.crp.set_decrypt_key(self.key, self.iv)
-
- def decrypt(self, encryptedData):
- cleartext = self.crp.decrypt(encryptedData)
- cleartext = decode(cleartext,charMap1)
- return cleartext
-
-
-# implements an Pseudo Mac Version of Windows built-in Crypto routine
-# used for Kindle for Mac Versions >= 1.6.0
-class CryptUnprotectDataV2(object):
- def __init__(self):
- sp = GetUserName() + ':&%:' + GetIDString()
- passwdData = encode(SHA256(sp),charMap5)
- # salt generation as per the code
- salt = 0x0512981d * 2 * 1 * 1
- salt = str(salt) + GetUserName()
- salt = encode(salt,charMap5)
- self.crp = LibCrypto()
- iter = 0x800
- keylen = 0x400
- key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
- self.key = key_iv[0:32]
- self.iv = key_iv[32:48]
- self.crp.set_decrypt_key(self.key, self.iv)
-
- def decrypt(self, encryptedData):
- cleartext = self.crp.decrypt(encryptedData)
- cleartext = decode(cleartext, charMap5)
- return cleartext
-
-
-# unprotect the new header blob in .kinf2011
-# used in Kindle for Mac Version >= 1.9.0
-def UnprotectHeaderData(encryptedData):
- passwdData = 'header_key_data'
- salt = 'HEADER.2011'
- iter = 0x80
- keylen = 0x100
- crp = LibCrypto()
- key_iv = crp.keyivgen(passwdData, salt, iter, keylen)
- key = key_iv[0:32]
- iv = key_iv[32:48]
- crp.set_decrypt_key(key,iv)
- cleartext = crp.decrypt(encryptedData)
- return cleartext
-
-
-# implements an Pseudo Mac Version of Windows built-in Crypto routine
-# used for Kindle for Mac Versions >= 1.9.0
-class CryptUnprotectDataV3(object):
- def __init__(self, entropy):
- sp = GetUserName() + '+@#$%+' + GetIDString()
- passwdData = encode(SHA256(sp),charMap2)
- salt = entropy
- self.crp = LibCrypto()
- iter = 0x800
- keylen = 0x400
- key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
- self.key = key_iv[0:32]
- self.iv = key_iv[32:48]
- self.crp.set_decrypt_key(self.key, self.iv)
-
- def decrypt(self, encryptedData):
- cleartext = self.crp.decrypt(encryptedData)
- cleartext = decode(cleartext, charMap2)
- return cleartext
-
-
-# Locate the .kindle-info files
-def getKindleInfoFiles(kInfoFiles):
- # first search for current .kindle-info files
- home = os.getenv('HOME')
- cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p1.communicate()
- reslst = out1.split('\n')
- kinfopath = 'NONE'
- found = False
- for resline in reslst:
- if os.path.isfile(resline):
- kInfoFiles.append(resline)
- found = True
- # add any .rainier*-kinf files
- cmdline = 'find "' + home + '/Library/Application Support" -name ".rainier*-kinf"'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p1.communicate()
- reslst = out1.split('\n')
- for resline in reslst:
- if os.path.isfile(resline):
- kInfoFiles.append(resline)
- found = True
- # add any .kinf2011 files
- cmdline = 'find "' + home + '/Library/Application Support" -name ".kinf2011"'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p1.communicate()
- reslst = out1.split('\n')
- for resline in reslst:
- if os.path.isfile(resline):
- kInfoFiles.append(resline)
- found = True
- if not found:
- print('No kindle-info files have been found.')
- return kInfoFiles
-
-# determine type of kindle info provided and return a
-# database of keynames and values
-def getDBfromFile(kInfoFile):
- names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber", "max_date", "SIGVERIF"]
- DB = {}
- cnt = 0
- infoReader = open(kInfoFile, 'r')
- hdr = infoReader.read(1)
- data = infoReader.read()
-
- if data.find('[') != -1 :
-
- # older style kindle-info file
- cud = CryptUnprotectData()
- items = data.split('[')
- for item in items:
- if item != '':
- keyhash, rawdata = item.split(':')
- keyname = "unknown"
- for name in names:
- if encodeHash(name,charMap2) == keyhash:
- keyname = name
- break
- if keyname == "unknown":
- keyname = keyhash
- encryptedValue = decode(rawdata,charMap2)
- cleartext = cud.decrypt(encryptedValue)
- DB[keyname] = cleartext
- cnt = cnt + 1
- if cnt == 0:
- DB = None
- return DB
-
- if hdr == '/':
-
- # else newer style .kinf file used by K4Mac >= 1.6.0
- # the .kinf file uses "/" to separate it into records
- # so remove the trailing "/" to make it easy to use split
- data = data[:-1]
- items = data.split('/')
- cud = CryptUnprotectDataV2()
-
- # loop through the item records until all are processed
- while len(items) > 0:
-
- # get the first item record
- item = items.pop(0)
-
- # the first 32 chars of the first record of a group
- # is the MD5 hash of the key name encoded by charMap5
- keyhash = item[0:32]
- keyname = "unknown"
-
- # the raw keyhash string is also used to create entropy for the actual
- # CryptProtectData Blob that represents that keys contents
- # "entropy" not used for K4Mac only K4PC
- # entropy = SHA1(keyhash)
-
- # the remainder of the first record when decoded with charMap5
- # has the ':' split char followed by the string representation
- # of the number of records that follow
- # and make up the contents
- srcnt = decode(item[34:],charMap5)
- rcnt = int(srcnt)
-
- # read and store in rcnt records of data
- # that make up the contents value
- edlst = []
- for i in xrange(rcnt):
- item = items.pop(0)
- edlst.append(item)
-
- keyname = "unknown"
- for name in names:
- if encodeHash(name,charMap5) == keyhash:
- keyname = name
- break
- if keyname == "unknown":
- keyname = keyhash
-
- # the charMap5 encoded contents data has had a length
- # of chars (always odd) cut off of the front and moved
- # to the end to prevent decoding using charMap5 from
- # working properly, and thereby preventing the ensuing
- # CryptUnprotectData call from succeeding.
-
- # The offset into the charMap5 encoded contents seems to be:
- # len(contents) - largest prime number less than or equal to int(len(content)/3)
- # (in other words split "about" 2/3rds of the way through)
-
- # move first offsets chars to end to align for decode by charMap5
- encdata = "".join(edlst)
- contlen = len(encdata)
-
- # now properly split and recombine
- # by moving noffset chars from the start of the
- # string to the end of the string
- noffset = contlen - primes(int(contlen/3))[-1]
- pfx = encdata[0:noffset]
- encdata = encdata[noffset:]
- encdata = encdata + pfx
-
- # decode using charMap5 to get the CryptProtect Data
- encryptedValue = decode(encdata,charMap5)
- cleartext = cud.decrypt(encryptedValue)
- DB[keyname] = cleartext
- cnt = cnt + 1
-
- if cnt == 0:
- DB = None
- return DB
-
- # the latest .kinf2011 version for K4M 1.9.1
- # put back the hdr char, it is needed
- data = hdr + data
- data = data[:-1]
- items = data.split('/')
-
- # the headerblob is the encrypted information needed to build the entropy string
- headerblob = items.pop(0)
- encryptedValue = decode(headerblob, charMap1)
- cleartext = UnprotectHeaderData(encryptedValue)
-
- # now extract the pieces in the same way
- # this version is different from K4PC it scales the build number by multipying by 735
- pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
- for m in re.finditer(pattern, cleartext):
- entropy = str(int(m.group(2)) * 0x2df) + m.group(4)
-
- cud = CryptUnprotectDataV3(entropy)
-
- # loop through the item records until all are processed
- while len(items) > 0:
-
- # get the first item record
- item = items.pop(0)
-
- # the first 32 chars of the first record of a group
- # is the MD5 hash of the key name encoded by charMap5
- keyhash = item[0:32]
- keyname = "unknown"
-
- # unlike K4PC the keyhash is not used in generating entropy
- # entropy = SHA1(keyhash) + added_entropy
- # entropy = added_entropy
-
- # the remainder of the first record when decoded with charMap5
- # has the ':' split char followed by the string representation
- # of the number of records that follow
- # and make up the contents
- srcnt = decode(item[34:],charMap5)
- rcnt = int(srcnt)
-
- # read and store in rcnt records of data
- # that make up the contents value
- edlst = []
- for i in xrange(rcnt):
- item = items.pop(0)
- edlst.append(item)
-
- keyname = "unknown"
- for name in names:
- if encodeHash(name,testMap8) == keyhash:
- keyname = name
- break
- if keyname == "unknown":
- keyname = keyhash
-
- # the testMap8 encoded contents data has had a length
- # of chars (always odd) cut off of the front and moved
- # to the end to prevent decoding using testMap8 from
- # working properly, and thereby preventing the ensuing
- # CryptUnprotectData call from succeeding.
-
- # The offset into the testMap8 encoded contents seems to be:
- # len(contents) - largest prime number less than or equal to int(len(content)/3)
- # (in other words split "about" 2/3rds of the way through)
-
- # move first offsets chars to end to align for decode by testMap8
- encdata = "".join(edlst)
- contlen = len(encdata)
-
- # now properly split and recombine
- # by moving noffset chars from the start of the
- # string to the end of the string
- noffset = contlen - primes(int(contlen/3))[-1]
- pfx = encdata[0:noffset]
- encdata = encdata[noffset:]
- encdata = encdata + pfx
-
- # decode using testMap8 to get the CryptProtect Data
- encryptedValue = decode(encdata,testMap8)
- cleartext = cud.decrypt(encryptedValue)
- # print keyname
- # print cleartext
- DB[keyname] = cleartext
- cnt = cnt + 1
-
- if cnt == 0:
- DB = None
- return DB
--- /dev/null
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import Tkinter
+import Tkconstants
+
+# basic scrolled text widget
+class ScrolledText(Tkinter.Text):
+ def __init__(self, master=None, **kw):
+ self.frame = Tkinter.Frame(master)
+ self.vbar = Tkinter.Scrollbar(self.frame)
+ self.vbar.pack(side=Tkconstants.RIGHT, fill=Tkconstants.Y)
+ kw.update({'yscrollcommand': self.vbar.set})
+ Tkinter.Text.__init__(self, self.frame, **kw)
+ self.pack(side=Tkconstants.LEFT, fill=Tkconstants.BOTH, expand=True)
+ self.vbar['command'] = self.yview
+ # Copy geometry methods of self.frame without overriding Text
+ # methods = hack!
+ text_meths = vars(Tkinter.Text).keys()
+ methods = vars(Tkinter.Pack).keys() + vars(Tkinter.Grid).keys() + vars(Tkinter.Place).keys()
+ methods = set(methods).difference(text_meths)
+ for m in methods:
+ if m[0] != '_' and m != 'config' and m != 'configure':
+ setattr(self, m, getattr(self.frame, m))
+
+ def __str__(self):
+ return str(self.frame)
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-import sys
-sys.stdout=Unbuffered(sys.stdout)
+# For use with Topaz Scripts Version 2.6
import csv
+import sys
import os
import getopt
+import re
from struct import pack
from struct import unpack
-class TpzDRMError(Exception):
- pass
-
-# local support routines
-if 'calibre' in sys.modules:
- inCalibre = True
-else:
- inCalibre = False
-
-if inCalibre :
- from calibre_plugins.k4mobidedrm import convert2xml
- from calibre_plugins.k4mobidedrm import flatxml2html
- from calibre_plugins.k4mobidedrm import flatxml2svg
- from calibre_plugins.k4mobidedrm import stylexml2css
-else :
- import convert2xml
- import flatxml2html
- import flatxml2svg
- import stylexml2css
-
-# global switch
-buildXML = False
-
-# Get a 7 bit encoded number from a file
-def readEncodedNumber(file):
- flag = False
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- if data == 0xFF:
- flag = True
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- if data >= 0x80:
- datax = (data & 0x7F)
- while data >= 0x80 :
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- datax = (datax <<7) + (data & 0x7F)
- data = datax
- if flag:
- data = -data
- return data
-
-# Get a length prefixed string from the file
-def lengthPrefixString(data):
- return encodeNumber(len(data))+data
-
-def readString(file):
- stringLength = readEncodedNumber(file)
- if (stringLength == None):
- return None
- sv = file.read(stringLength)
- if (len(sv) != stringLength):
- return ""
- return unpack(str(stringLength)+"s",sv)[0]
-
-def getMetaArray(metaFile):
- # parse the meta file
- result = {}
- fo = file(metaFile,'rb')
- size = readEncodedNumber(fo)
- for i in xrange(size):
- tag = readString(fo)
- value = readString(fo)
- result[tag] = value
- # print tag, value
- fo.close()
- return result
-
-
-# dictionary of all text strings by index value
-class Dictionary(object):
- def __init__(self, dictFile):
- self.filename = dictFile
- self.size = 0
- self.fo = file(dictFile,'rb')
- self.stable = []
- self.size = readEncodedNumber(self.fo)
- for i in xrange(self.size):
- self.stable.append(self.escapestr(readString(self.fo)))
- self.pos = 0
- def escapestr(self, str):
- str = str.replace('&','&')
- str = str.replace('<','<')
- str = str.replace('>','>')
- str = str.replace('=','=')
- return str
- def lookup(self,val):
- if ((val >= 0) and (val < self.size)) :
- self.pos = val
- return self.stable[self.pos]
- else:
- print "Error - %d outside of string table limits" % val
- raise TpzDRMError('outside or string table limits')
- # sys.exit(-1)
- def getSize(self):
- return self.size
- def getPos(self):
- return self.pos
-
-class PageDimParser(object):
- def __init__(self, flatxml):
+class DocParser(object):
+ def __init__(self, flatxml, fontsize, ph, pw):
self.flatdoc = flatxml.split('\n')
+ self.fontsize = int(fontsize)
+ self.ph = int(ph) * 1.0
+ self.pw = int(pw) * 1.0
+
+ stags = {
+ 'paragraph' : 'p',
+ 'graphic' : '.graphic'
+ }
+
+ attr_val_map = {
+ 'hang' : 'text-indent: ',
+ 'indent' : 'text-indent: ',
+ 'line-space' : 'line-height: ',
+ 'margin-bottom' : 'margin-bottom: ',
+ 'margin-left' : 'margin-left: ',
+ 'margin-right' : 'margin-right: ',
+ 'margin-top' : 'margin-top: ',
+ 'space-after' : 'padding-bottom: ',
+ }
+
+ attr_str_map = {
+ 'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+ 'align-left' : 'text-align: left;',
+ 'align-right' : 'text-align: right;',
+ 'align-justify' : 'text-align: justify;',
+ 'display-inline' : 'display: inline;',
+ 'pos-left' : 'text-align: left;',
+ 'pos-right' : 'text-align: right;',
+ 'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+ }
+
+
# find tag if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
for j in xrange(pos, end):
item = docList[j]
if item.find('=') >= 0:
- (name, argres) = item.split('=')
+ (name, argres) = item.split('=',1)
else :
name = item
argres = ''
foundat = j
break
return foundat, result
+
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
+ # returns a vector of integers for the tagpath
+ def getData(self, tagpath, pos, end, clean=False):
+ if clean:
+ digits_only = re.compile(r'''([0-9]+)''')
+ argres=[]
+ (foundat, argt) = self.findinDoc(tagpath, pos, end)
+ if (argt != None) and (len(argt) > 0) :
+ argList = argt.split('|')
+ for strval in argList:
+ if clean:
+ m = re.search(digits_only, strval)
+ if m != None:
+ strval = m.group()
+ argres.append(int(strval))
+ return argres
+
def process(self):
- (pos, sph) = self.findinDoc('page.h',0,-1)
- (pos, spw) = self.findinDoc('page.w',0,-1)
- if (sph == None): sph = '-1'
- if (spw == None): spw = '-1'
- return sph, spw
-def getPageDim(flatxml):
+ classlst = ''
+ csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
+ csspage += '.cl-right { text-align: right; }\n'
+ csspage += '.cl-left { text-align: left; }\n'
+ csspage += '.cl-justify { text-align: justify; }\n'
+
+ # generate a list of each <style> starting point in the stylesheet
+ styleList= self.posinDoc('book.stylesheet.style')
+ stylecnt = len(styleList)
+ styleList.append(-1)
+
+ # process each style converting what you can
+
+ for j in xrange(stylecnt):
+ start = styleList[j]
+ end = styleList[j+1]
+
+ (pos, tag) = self.findinDoc('style._tag',start,end)
+ if tag == None :
+ (pos, tag) = self.findinDoc('style.type',start,end)
+
+ # Is this something we know how to convert to css
+ if tag in self.stags :
+
+ # get the style class
+ (pos, sclass) = self.findinDoc('style.class',start,end)
+ if sclass != None:
+ sclass = sclass.replace(' ','-')
+ sclass = '.cl-' + sclass.lower()
+ else :
+ sclass = ''
+
+ # check for any "after class" specifiers
+ (pos, aftclass) = self.findinDoc('style._after_class',start,end)
+ if aftclass != None:
+ aftclass = aftclass.replace(' ','-')
+ aftclass = '.cl-' + aftclass.lower()
+ else :
+ aftclass = ''
+
+ cssargs = {}
+
+ while True :
+
+ (pos1, attr) = self.findinDoc('style.rule.attr', start, end)
+ (pos2, val) = self.findinDoc('style.rule.value', start, end)
+
+ if attr == None : break
+
+ if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
+ # handle text based attributess
+ attr = attr + '-' + val
+ if attr in self.attr_str_map :
+ cssargs[attr] = (self.attr_str_map[attr], '')
+ else :
+ # handle value based attributes
+ if attr in self.attr_val_map :
+ name = self.attr_val_map[attr]
+ if attr in ('margin-bottom', 'margin-top', 'space-after') :
+ scale = self.ph
+ elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
+ scale = self.pw
+ elif attr == 'line-space':
+ scale = self.fontsize * 2.0
+
+ if not ((attr == 'hang') and (int(val) == 0)) :
+ pv = float(val)/scale
+ cssargs[attr] = (self.attr_val_map[attr], pv)
+ keep = True
+
+ start = max(pos1, pos2) + 1
+
+ # disable all of the after class tags until I figure out how to handle them
+ if aftclass != "" : keep = False
+
+ if keep :
+ # make sure line-space does not go below 100% or above 300% since
+ # it can be wacky in some styles
+ if 'line-space' in cssargs:
+ seg = cssargs['line-space'][0]
+ val = cssargs['line-space'][1]
+ if val < 1.0: val = 1.0
+ if val > 3.0: val = 3.0
+ del cssargs['line-space']
+ cssargs['line-space'] = (self.attr_val_map['line-space'], val)
+
+
+ # handle modifications for css style hanging indents
+ if 'hang' in cssargs:
+ hseg = cssargs['hang'][0]
+ hval = cssargs['hang'][1]
+ del cssargs['hang']
+ cssargs['hang'] = (self.attr_val_map['hang'], -hval)
+ mval = 0
+ mseg = 'margin-left: '
+ mval = hval
+ if 'margin-left' in cssargs:
+ mseg = cssargs['margin-left'][0]
+ mval = cssargs['margin-left'][1]
+ if mval < 0: mval = 0
+ mval = hval + mval
+ cssargs['margin-left'] = (mseg, mval)
+ if 'indent' in cssargs:
+ del cssargs['indent']
+
+ cssline = sclass + ' { '
+ for key in iter(cssargs):
+ mseg = cssargs[key][0]
+ mval = cssargs[key][1]
+ if mval == '':
+ cssline += mseg + ' '
+ else :
+ aseg = mseg + '%.1f%%;' % (mval * 100.0)
+ cssline += aseg + ' '
+
+ cssline += '}'
+
+ if sclass != '' :
+ classlst += sclass + '\n'
+
+ # handle special case of paragraph class used inside chapter heading
+ # and non-chapter headings
+ if sclass != '' :
+ ctype = sclass[4:7]
+ if ctype == 'ch1' :
+ csspage += 'h1' + cssline + '\n'
+ if ctype == 'ch2' :
+ csspage += 'h2' + cssline + '\n'
+ if ctype == 'ch3' :
+ csspage += 'h3' + cssline + '\n'
+ if ctype == 'h1-' :
+ csspage += 'h4' + cssline + '\n'
+ if ctype == 'h2-' :
+ csspage += 'h5' + cssline + '\n'
+ if ctype == 'h3_' :
+ csspage += 'h6' + cssline + '\n'
+
+ if cssline != ' { }':
+ csspage += self.stags[tag] + cssline + '\n'
+
+
+ return csspage, classlst
+
+
+
+def convert2CSS(flatxml, fontsize, ph, pw):
+
+ print ' ', 'Using font size:',fontsize
+ print ' ', 'Using page height:', ph
+ print ' ', 'Using page width:', pw
+
# create a document parser
- dp = PageDimParser(flatxml)
- (ph, pw) = dp.process()
- return ph, pw
+ dp = DocParser(flatxml, fontsize, ph, pw)
+ csspage = dp.process()
+ return csspage
-class GParser(object):
- def __init__(self, flatxml):
- self.flatdoc = flatxml.split('\n')
- self.dpi = 1440
- self.gh = self.getData('info.glyph.h')
- self.gw = self.getData('info.glyph.w')
- self.guse = self.getData('info.glyph.use')
- if self.guse :
- self.count = len(self.guse)
- else :
- self.count = 0
- self.gvtx = self.getData('info.glyph.vtx')
- self.glen = self.getData('info.glyph.len')
- self.gdpi = self.getData('info.glyph.dpi')
- self.vx = self.getData('info.vtx.x')
- self.vy = self.getData('info.vtx.y')
- self.vlen = self.getData('info.len.n')
- if self.vlen :
- self.glen.append(len(self.vlen))
- elif self.glen:
- self.glen.append(0)
- if self.vx :
- self.gvtx.append(len(self.vx))
- elif self.gvtx :
- self.gvtx.append(0)
- def getData(self, path):
- result = None
- cnt = len(self.flatdoc)
- for j in xrange(cnt):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name == path):
- result = argres
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
- def getGlyphDim(self, gly):
- if self.gdpi[gly] == 0:
- return 0, 0
- maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
- maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
- return maxh, maxw
- def getPath(self, gly):
- path = ''
- if (gly < 0) or (gly >= self.count):
- return path
- tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
- ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
- p = 0
- for k in xrange(self.glen[gly], self.glen[gly+1]):
- if (p == 0):
- zx = tx[0:self.vlen[k]+1]
- zy = ty[0:self.vlen[k]+1]
- else:
- zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
- zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
- p += 1
- j = 0
- while ( j < len(zx) ):
- if (j == 0):
- # Start Position.
- path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
- elif (j <= len(zx)-3):
- # Cubic Bezier Curve
- path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
- j += 2
- elif (j == len(zx)-2):
- # Cubic Bezier Curve to Start Position
- path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
- j += 1
- elif (j == len(zx)-1):
- # Quadratic Bezier Curve to Start Position
- path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
-
- j += 1
- path += 'z'
- return path
-
-
-
-# dictionary of all text strings by index value
-class GlyphDict(object):
- def __init__(self):
- self.gdict = {}
- def lookup(self, id):
- # id='id="gl%d"' % val
- if id in self.gdict:
- return self.gdict[id]
- return None
- def addGlyph(self, val, path):
- id='id="gl%d"' % val
- self.gdict[id] = path
-
-
-def generateBook(bookDir, raw, fixedimage):
- # sanity check Topaz file extraction
- if not os.path.exists(bookDir) :
- print "Can not find directory with unencrypted book"
- return 1
-
- dictFile = os.path.join(bookDir,'dict0000.dat')
- if not os.path.exists(dictFile) :
- print "Can not find dict0000.dat file"
- return 1
-
- pageDir = os.path.join(bookDir,'page')
- if not os.path.exists(pageDir) :
- print "Can not find page directory in unencrypted book"
- return 1
-
- imgDir = os.path.join(bookDir,'img')
- if not os.path.exists(imgDir) :
- print "Can not find image directory in unencrypted book"
- return 1
-
- glyphsDir = os.path.join(bookDir,'glyphs')
- if not os.path.exists(glyphsDir) :
- print "Can not find glyphs directory in unencrypted book"
- return 1
-
- metaFile = os.path.join(bookDir,'metadata0000.dat')
- if not os.path.exists(metaFile) :
- print "Can not find metadata0000.dat in unencrypted book"
- return 1
-
- svgDir = os.path.join(bookDir,'svg')
- if not os.path.exists(svgDir) :
- os.makedirs(svgDir)
-
- if buildXML:
- xmlDir = os.path.join(bookDir,'xml')
- if not os.path.exists(xmlDir) :
- os.makedirs(xmlDir)
-
- otherFile = os.path.join(bookDir,'other0000.dat')
- if not os.path.exists(otherFile) :
- print "Can not find other0000.dat in unencrypted book"
- return 1
-
- print "Updating to color images if available"
- spath = os.path.join(bookDir,'color_img')
- dpath = os.path.join(bookDir,'img')
- filenames = os.listdir(spath)
- filenames = sorted(filenames)
- for filename in filenames:
- imgname = filename.replace('color','img')
- sfile = os.path.join(spath,filename)
- dfile = os.path.join(dpath,imgname)
- imgdata = file(sfile,'rb').read()
- file(dfile,'wb').write(imgdata)
-
- print "Creating cover.jpg"
- isCover = False
- cpath = os.path.join(bookDir,'img')
- cpath = os.path.join(cpath,'img0000.jpg')
- if os.path.isfile(cpath):
- cover = file(cpath, 'rb').read()
- cpath = os.path.join(bookDir,'cover.jpg')
- file(cpath, 'wb').write(cover)
- isCover = True
-
-
- print 'Processing Dictionary'
- dict = Dictionary(dictFile)
-
- print 'Processing Meta Data and creating OPF'
- meta_array = getMetaArray(metaFile)
-
- # replace special chars in title and authors like & < >
- title = meta_array.get('Title','No Title Provided')
- title = title.replace('&','&')
- title = title.replace('<','<')
- title = title.replace('>','>')
- meta_array['Title'] = title
- authors = meta_array.get('Authors','No Authors Provided')
- authors = authors.replace('&','&')
- authors = authors.replace('<','<')
- authors = authors.replace('>','>')
- meta_array['Authors'] = authors
-
- if buildXML:
- xname = os.path.join(xmlDir, 'metadata.xml')
- mlst = []
- for key in meta_array:
- mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n')
- metastr = "".join(mlst)
- mlst = None
- file(xname, 'wb').write(metastr)
-
- print 'Processing StyleSheet'
- # get some scaling info from metadata to use while processing styles
- fontsize = '135'
- if 'fontSize' in meta_array:
- fontsize = meta_array['fontSize']
-
- # also get the size of a normal text page
- spage = '1'
- if 'firstTextPage' in meta_array:
- spage = meta_array['firstTextPage']
- pnum = int(spage)
-
- # get page height and width from first text page for use in stylesheet scaling
- pname = 'page%04d.dat' % (pnum + 1)
- fname = os.path.join(pageDir,pname)
- flat_xml = convert2xml.fromData(dict, fname)
-
- (ph, pw) = getPageDim(flat_xml)
- if (ph == '-1') or (ph == '0') : ph = '11000'
- if (pw == '-1') or (pw == '0') : pw = '8500'
- meta_array['pageHeight'] = ph
- meta_array['pageWidth'] = pw
- if 'fontSize' not in meta_array.keys():
- meta_array['fontSize'] = fontsize
-
- # process other.dat for css info and for map of page files to svg images
- # this map is needed because some pages actually are made up of multiple
- # pageXXXX.xml files
- xname = os.path.join(bookDir, 'style.css')
- flat_xml = convert2xml.fromData(dict, otherFile)
-
- # extract info.original.pid to get original page information
- pageIDMap = {}
- pageidnums = stylexml2css.getpageIDMap(flat_xml)
- if len(pageidnums) == 0:
- filenames = os.listdir(pageDir)
- numfiles = len(filenames)
- for k in range(numfiles):
- pageidnums.append(k)
- # create a map from page ids to list of page file nums to process for that page
- for i in range(len(pageidnums)):
- id = pageidnums[i]
- if id in pageIDMap.keys():
- pageIDMap[id].append(i)
- else:
- pageIDMap[id] = [i]
-
- # now get the css info
- cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
- file(xname, 'wb').write(cssstr)
- if buildXML:
- xname = os.path.join(xmlDir, 'other0000.xml')
- file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
-
- print 'Processing Glyphs'
- gd = GlyphDict()
- filenames = os.listdir(glyphsDir)
- filenames = sorted(filenames)
- glyfname = os.path.join(svgDir,'glyphs.svg')
- glyfile = open(glyfname, 'w')
- glyfile.write('<?xml version="1.0" standalone="no"?>\n')
- glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
- glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
- glyfile.write('<defs>\n')
- counter = 0
- for filename in filenames:
- # print ' ', filename
- print '.',
- fname = os.path.join(glyphsDir,filename)
- flat_xml = convert2xml.fromData(dict, fname)
-
- if buildXML:
- xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
- file(xname, 'wb').write(convert2xml.getXML(dict, fname))
-
- gp = GParser(flat_xml)
- for i in xrange(0, gp.count):
- path = gp.getPath(i)
- maxh, maxw = gp.getGlyphDim(i)
- fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
- glyfile.write(fullpath)
- gd.addGlyph(counter * 256 + i, fullpath)
- counter += 1
- glyfile.write('</defs>\n')
- glyfile.write('</svg>\n')
- glyfile.close()
- print " "
-
-
- # start up the html
- # also build up tocentries while processing html
- htmlFileName = "book.html"
- hlst = []
- hlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n')
- hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
- hlst.append('<head>\n')
- hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n')
- hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n')
- hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
- hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
- if 'ASIN' in meta_array:
- hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
- if 'GUID' in meta_array:
- hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
- hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n')
- hlst.append('</head>\n<body>\n')
-
- print 'Processing Pages'
- # Books are at 1440 DPI. This is rendering at twice that size for
- # readability when rendering to the screen.
- scaledpi = 1440.0
-
- filenames = os.listdir(pageDir)
- filenames = sorted(filenames)
- numfiles = len(filenames)
-
- xmllst = []
- elst = []
-
- for filename in filenames:
- # print ' ', filename
- print ".",
- fname = os.path.join(pageDir,filename)
- flat_xml = convert2xml.fromData(dict, fname)
-
- # keep flat_xml for later svg processing
- xmllst.append(flat_xml)
-
- if buildXML:
- xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
- file(xname, 'wb').write(convert2xml.getXML(dict, fname))
-
- # first get the html
- pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
- elst.append(tocinfo)
- hlst.append(pagehtml)
-
- # finish up the html string and output it
- hlst.append('</body>\n</html>\n')
- htmlstr = "".join(hlst)
- hlst = None
- file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
-
- print " "
- print 'Extracting Table of Contents from Amazon OCR'
-
- # first create a table of contents file for the svg images
- tlst = []
- tlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
- tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
- tlst.append('<head>\n')
- tlst.append('<title>' + meta_array['Title'] + '</title>\n')
- tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
- tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
- if 'ASIN' in meta_array:
- tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
- if 'GUID' in meta_array:
- tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
- tlst.append('</head>\n')
- tlst.append('<body>\n')
-
- tlst.append('<h2>Table of Contents</h2>\n')
- start = pageidnums[0]
- if (raw):
- startname = 'page%04d.svg' % start
- else:
- startname = 'page%04d.xhtml' % start
-
- tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n')
- # build up a table of contents for the svg xhtml output
- tocentries = "".join(elst)
- elst = None
- toclst = tocentries.split('\n')
- toclst.pop()
- for entry in toclst:
- print entry
- title, pagenum = entry.split('|')
- id = pageidnums[int(pagenum)]
- if (raw):
- fname = 'page%04d.svg' % id
- else:
- fname = 'page%04d.xhtml' % id
- tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n')
- tlst.append('</body>\n')
- tlst.append('</html>\n')
- tochtml = "".join(tlst)
- file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
-
-
- # now create index_svg.xhtml that points to all required files
- slst = []
- slst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
- slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
- slst.append('<head>\n')
- slst.append('<title>' + meta_array['Title'] + '</title>\n')
- slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
- slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
- if 'ASIN' in meta_array:
- slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
- if 'GUID' in meta_array:
- slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
- slst.append('</head>\n')
- slst.append('<body>\n')
-
- print "Building svg images of each book page"
- slst.append('<h2>List of Pages</h2>\n')
- slst.append('<div>\n')
- idlst = sorted(pageIDMap.keys())
- numids = len(idlst)
- cnt = len(idlst)
- previd = None
- for j in range(cnt):
- pageid = idlst[j]
- if j < cnt - 1:
- nextid = idlst[j+1]
- else:
- nextid = None
- print '.',
- pagelst = pageIDMap[pageid]
- flst = []
- for page in pagelst:
- flst.append(xmllst[page])
- flat_svg = "".join(flst)
- flst=None
- svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
- if (raw) :
- pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w')
- slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid))
- else :
- pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w')
- slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid))
- previd = pageid
- pfile.write(svgxml)
- pfile.close()
- counter += 1
- slst.append('</div>\n')
- slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n')
- slst.append('</body>\n</html>\n')
- svgindex = "".join(slst)
- slst = None
- file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
-
- print " "
-
- # build the opf file
- opfname = os.path.join(bookDir, 'book.opf')
- olst = []
- olst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
- # adding metadata
- olst.append(' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
- if 'GUID' in meta_array:
- olst.append(' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
- if 'ASIN' in meta_array:
- olst.append(' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
- if 'oASIN' in meta_array:
- olst.append(' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
- olst.append(' <dc:title>' + meta_array['Title'] + '</dc:title>\n')
- olst.append(' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
- olst.append(' <dc:language>en</dc:language>\n')
- olst.append(' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
- if isCover:
- olst.append(' <meta name="cover" content="bookcover"/>\n')
- olst.append(' </metadata>\n')
- olst.append('<manifest>\n')
- olst.append(' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n')
- olst.append(' <item id="stylesheet" href="style.css" media-type="text/css"/>\n')
- # adding image files to manifest
- filenames = os.listdir(imgDir)
- filenames = sorted(filenames)
- for filename in filenames:
- imgname, imgext = os.path.splitext(filename)
- if imgext == '.jpg':
- imgext = 'jpeg'
- if imgext == '.svg':
- imgext = 'svg+xml'
- olst.append(' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n')
- if isCover:
- olst.append(' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n')
- olst.append('</manifest>\n')
- # adding spine
- olst.append('<spine>\n <itemref idref="book" />\n</spine>\n')
- if isCover:
- olst.append(' <guide>\n')
- olst.append(' <reference href="cover.jpg" type="cover" title="Cover"/>\n')
- olst.append(' </guide>\n')
- olst.append('</package>\n')
- opfstr = "".join(olst)
- olst = None
- file(opfname, 'wb').write(opfstr)
-
- print 'Processing Complete'
-
- return 0
-
-def usage():
- print "genbook.py generates a book from the extract Topaz Files"
- print "Usage:"
- print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
- print " "
- print "Options:"
- print " -h : help - print this usage message"
- print " -r : generate raw svg files (not wrapped in xhtml)"
- print " --fixed-image : genearate any Fixed Area as an svg image in the html"
- print " "
-
-
-def main(argv):
- bookDir = ''
- if len(argv) == 0:
- argv = sys.argv
-
- try:
- opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
-
- except getopt.GetoptError, err:
- print str(err)
- usage()
- return 1
-
- if len(opts) == 0 and len(args) == 0 :
- usage()
- return 1
-
- raw = 0
- fixedimage = True
- for o, a in opts:
- if o =="-h":
- usage()
- return 0
- if o =="-r":
- raw = 1
- if o =="--fixed-image":
- fixedimage = True
-
- bookDir = args[0]
-
- rv = generateBook(bookDir, raw, fixedimage)
- return rv
-
-
-if __name__ == '__main__':
- sys.exit(main(''))
+
+def getpageIDMap(flatxml):
+ dp = DocParser(flatxml, 0, 0, 0)
+ pageidnumbers = dp.getData('info.original.pid', 0, -1, True)
+ return pageidnumbers
--- /dev/null
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import os, sys
+import signal
+import threading
+import subprocess
+from subprocess import Popen, PIPE, STDOUT
+
+# **heavily** chopped up and modfied version of asyncproc.py
+# to make it actually work on Windows as well as Mac/Linux
+# For the original see:
+# "http://www.lysator.liu.se/~bellman/download/"
+# author is "Thomas Bellman <bellman@lysator.liu.se>"
+# available under GPL version 3 or Later
+
+# create an asynchronous subprocess whose output can be collected in
+# a non-blocking manner
+
+# What a mess! Have to use threads just to get non-blocking io
+# in a cross-platform manner
+
+# luckily all thread use is hidden within this class
+
+class Process(object):
+ def __init__(self, *params, **kwparams):
+ if len(params) <= 3:
+ kwparams.setdefault('stdin', subprocess.PIPE)
+ if len(params) <= 4:
+ kwparams.setdefault('stdout', subprocess.PIPE)
+ if len(params) <= 5:
+ kwparams.setdefault('stderr', subprocess.PIPE)
+ self.__pending_input = []
+ self.__collected_outdata = []
+ self.__collected_errdata = []
+ self.__exitstatus = None
+ self.__lock = threading.Lock()
+ self.__inputsem = threading.Semaphore(0)
+ self.__quit = False
+
+ self.__process = subprocess.Popen(*params, **kwparams)
+
+ if self.__process.stdin:
+ self.__stdin_thread = threading.Thread(
+ name="stdin-thread",
+ target=self.__feeder, args=(self.__pending_input,
+ self.__process.stdin))
+ self.__stdin_thread.setDaemon(True)
+ self.__stdin_thread.start()
+
+ if self.__process.stdout:
+ self.__stdout_thread = threading.Thread(
+ name="stdout-thread",
+ target=self.__reader, args=(self.__collected_outdata,
+ self.__process.stdout))
+ self.__stdout_thread.setDaemon(True)
+ self.__stdout_thread.start()
+
+ if self.__process.stderr:
+ self.__stderr_thread = threading.Thread(
+ name="stderr-thread",
+ target=self.__reader, args=(self.__collected_errdata,
+ self.__process.stderr))
+ self.__stderr_thread.setDaemon(True)
+ self.__stderr_thread.start()
+
+ def pid(self):
+ return self.__process.pid
+
+ def kill(self, signal):
+ self.__process.send_signal(signal)
+
+ # check on subprocess (pass in 'nowait') to act like poll
+ def wait(self, flag):
+ if flag.lower() == 'nowait':
+ rc = self.__process.poll()
+ else:
+ rc = self.__process.wait()
+ if rc != None:
+ if self.__process.stdin:
+ self.closeinput()
+ if self.__process.stdout:
+ self.__stdout_thread.join()
+ if self.__process.stderr:
+ self.__stderr_thread.join()
+ return self.__process.returncode
+
+ def terminate(self):
+ if self.__process.stdin:
+ self.closeinput()
+ self.__process.terminate()
+
+ # thread gets data from subprocess stdout
+ def __reader(self, collector, source):
+ while True:
+ data = os.read(source.fileno(), 65536)
+ self.__lock.acquire()
+ collector.append(data)
+ self.__lock.release()
+ if data == "":
+ source.close()
+ break
+ return
+
+ # thread feeds data to subprocess stdin
+ def __feeder(self, pending, drain):
+ while True:
+ self.__inputsem.acquire()
+ self.__lock.acquire()
+ if not pending and self.__quit:
+ drain.close()
+ self.__lock.release()
+ break
+ data = pending.pop(0)
+ self.__lock.release()
+ drain.write(data)
+
+ # non-blocking read of data from subprocess stdout
+ def read(self):
+ self.__lock.acquire()
+ outdata = "".join(self.__collected_outdata)
+ del self.__collected_outdata[:]
+ self.__lock.release()
+ return outdata
+
+ # non-blocking read of data from subprocess stderr
+ def readerr(self):
+ self.__lock.acquire()
+ errdata = "".join(self.__collected_errdata)
+ del self.__collected_errdata[:]
+ self.__lock.release()
+ return errdata
+
+ # non-blocking write to stdin of subprocess
+ def write(self, data):
+ if self.__process.stdin is None:
+ raise ValueError("Writing to process with stdin not a pipe")
+ self.__lock.acquire()
+ self.__pending_input.append(data)
+ self.__inputsem.release()
+ self.__lock.release()
+
+ # close stdinput of subprocess
+ def closeinput(self):
+ self.__lock.acquire()
+ self.__quit = True
+ self.__inputsem.release()
+ self.__lock.release()
#!/usr/bin/env python
-from __future__ import with_statement
-
-# engine to remove drm from Kindle for Mac and Kindle for PC books
-# for personal use for archiving and converting your ebooks
-
-# PLEASE DO NOT PIRATE EBOOKS!
-
-# We want all authors and publishers, and eBook stores to live
-# long and prosperous lives but at the same time we just want to
-# be able to read OUR books on whatever device we want and to keep
-# readable for a long, long time
-
-# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
-# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
-# and many many others
-
-
-__version__ = '4.0'
-
class Unbuffered:
def __init__(self, stream):
self.stream = stream
return getattr(self.stream, attr)
import sys
-import os, csv, getopt
-import string
-import re
-import traceback
-
-buildXML = False
-
-class DrmException(Exception):
- pass
if 'calibre' in sys.modules:
inCalibre = True
else:
inCalibre = False
-if inCalibre:
- from calibre_plugins.k4mobidedrm import mobidedrm
- from calibre_plugins.k4mobidedrm import topazextract
- from calibre_plugins.k4mobidedrm import kgenpids
-else:
- import mobidedrm
- import topazextract
- import kgenpids
-
-
-# cleanup bytestring filenames
-# borrowed from calibre from calibre/src/calibre/__init__.py
-# added in removal of non-printing chars
-# and removal of . at start
-# convert spaces to underscores
-def cleanup_name(name):
- _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
- substitute='_'
- one = ''.join(char for char in name if char in string.printable)
- one = _filename_sanitize.sub(substitute, one)
- one = re.sub(r'\s', ' ', one).strip()
- one = re.sub(r'^\.+$', '_', one)
- one = one.replace('..', substitute)
- # Windows doesn't like path components that end with a period
- if one.endswith('.'):
- one = one[:-1]+substitute
- # Mac and Unix don't like file names that begin with a full stop
- if len(one) > 0 and one[0] == '.':
- one = substitute+one[1:]
- one = one.replace(' ','_')
- return one
-
-def decryptBook(infile, outdir, k4, kInfoFiles, serials, pids):
- global buildXML
-
- # handle the obvious cases at the beginning
- if not os.path.isfile(infile):
- print >>sys.stderr, ('K4MobiDeDrm v%(__version__)s\n' % globals()) + "Error: Input file does not exist"
- return 1
-
- mobi = True
- magic3 = file(infile,'rb').read(3)
- if magic3 == 'TPZ':
- mobi = False
+buildXML = False
- bookname = os.path.splitext(os.path.basename(infile))[0]
+import os, csv, getopt
+import zlib, zipfile, tempfile, shutil
+from struct import pack
+from struct import unpack
+from alfcrypto import Topaz_Cipher
- if mobi:
- mb = mobidedrm.MobiBook(infile)
- else:
- mb = topazextract.TopazBook(infile)
+class TpzDRMError(Exception):
+ pass
- title = mb.getBookTitle()
- print "Processing Book: ", title
- filenametitle = cleanup_name(title)
- outfilename = bookname
- if len(outfilename)<=8 or len(filenametitle)<=8:
- outfilename = outfilename + "_" + filenametitle
- elif outfilename[:8] != filenametitle[:8]:
- outfilename = outfilename[:8] + "_" + filenametitle
- # avoid excessively long file names
- if len(outfilename)>150:
- outfilename = outfilename[:150]
+# local support routines
+if inCalibre:
+ from calibre_plugins.k4mobidedrm import kgenpids
+ from calibre_plugins.k4mobidedrm import genbook
+else:
+ import kgenpids
+ import genbook
+
+
+# recursive zip creation support routine
+def zipUpDir(myzip, tdir, localname):
+ currentdir = tdir
+ if localname != "":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tdir, localfilePath)
- # build pid list
- md1, md2 = mb.getPIDMetaInfo()
- pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
+#
+# Utility routines
+#
- try:
- mb.processBook(pidlst)
+# Get a 7 bit encoded number from file
+def bookReadEncodedNumber(fo):
+ flag = False
+ data = ord(fo.read(1))
+ if data == 0xFF:
+ flag = True
+ data = ord(fo.read(1))
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(fo.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from file
+def bookReadString(fo):
+ stringLength = bookReadEncodedNumber(fo)
+ return unpack(str(stringLength)+"s",fo.read(stringLength))[0]
- except mobidedrm.DrmException, e:
- print >>sys.stderr, ('K4MobiDeDrm v%(__version__)s\n' % globals()) + "Error: " + str(e) + "\nDRM Removal Failed.\n"
- return 1
- except topazextract.TpzDRMError, e:
- print >>sys.stderr, ('K4MobiDeDrm v%(__version__)s\n' % globals()) + "Error: " + str(e) + "\nDRM Removal Failed.\n"
- return 1
- except Exception, e:
- print >>sys.stderr, ('K4MobiDeDrm v%(__version__)s\n' % globals()) + "Error: " + str(e) + "\nDRM Removal Failed.\n"
- return 1
+#
+# crypto routines
+#
- if mobi:
- if mb.getPrintReplica():
- outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw4')
+# Context initialisation for the Topaz Crypto
+def topazCryptoInit(key):
+ return Topaz_Cipher().ctx_init(key)
+
+# ctx1 = 0x0CAFFE19E
+# for keyChar in key:
+# keyByte = ord(keyChar)
+# ctx2 = ctx1
+# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+# return [ctx1,ctx2]
+
+# decrypt data with the context prepared by topazCryptoInit()
+def topazCryptoDecrypt(data, ctx):
+ return Topaz_Cipher().decrypt(data, ctx)
+# ctx1 = ctx[0]
+# ctx2 = ctx[1]
+# plainText = ""
+# for dataChar in data:
+# dataByte = ord(dataChar)
+# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+# ctx2 = ctx1
+# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+# plainText += chr(m)
+# return plainText
+
+# Decrypt data with the PID
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+# Try to decrypt a dkey record (contains the bookPID)
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise TpzDRMError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise TpzDRMError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise TpzDRMError("Record didn't contain PID")
+ return fields[4]
+
+# Decrypt all dkey records (contain the book PID)
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except TpzDRMError:
+ pass
+ data = data[1+length:]
+ if len(records) == 0:
+ raise TpzDRMError("BookKey Not Found")
+ return records
+
+
+class TopazBook:
+ def __init__(self, filename):
+ self.fo = file(filename, 'rb')
+ self.outdir = tempfile.mkdtemp()
+ # self.outdir = 'rawdat'
+ self.bookPayloadOffset = 0
+ self.bookHeaderRecords = {}
+ self.bookMetadata = {}
+ self.bookKey = None
+ magic = unpack("4s",self.fo.read(4))[0]
+ if magic != 'TPZ0':
+ raise TpzDRMError("Parse Error : Invalid Header, not a Topaz file")
+ self.parseTopazHeaders()
+ self.parseMetadata()
+
+ def parseTopazHeaders(self):
+ def bookReadHeaderRecordData():
+ # Read and return the data of one header record at the current book file position
+ # [[offset,decompressedLength,compressedLength],...]
+ nbValues = bookReadEncodedNumber(self.fo)
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
+ return values
+ def parseTopazHeaderRecord():
+ # Read and parse one header record at the current book file position and return the associated data
+ # [[offset,decompressedLength,compressedLength],...]
+ if ord(self.fo.read(1)) != 0x63:
+ raise TpzDRMError("Parse Error : Invalid Header")
+ tag = bookReadString(self.fo)
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+ nbRecords = bookReadEncodedNumber(self.fo)
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ # print result[0], result[1]
+ self.bookHeaderRecords[result[0]] = result[1]
+ if ord(self.fo.read(1)) != 0x64 :
+ raise TpzDRMError("Parse Error : Invalid Header")
+ self.bookPayloadOffset = self.fo.tell()
+
+ def parseMetadata(self):
+ # Parse the metadata record from the book payload and return a list of [key,values]
+ self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString(self.fo)
+ if tag != "metadata" :
+ raise TpzDRMError("Parse Error : Record Names Don't Match")
+ flags = ord(self.fo.read(1))
+ nbRecords = ord(self.fo.read(1))
+ # print nbRecords
+ for i in range (0,nbRecords) :
+ keyval = bookReadString(self.fo)
+ content = bookReadString(self.fo)
+ # print keyval
+ # print content
+ self.bookMetadata[keyval] = content
+ return self.bookMetadata
+
+ def getPIDMetaInfo(self):
+ keysRecord = self.bookMetadata.get('keys','')
+ keysRecordRecord = ''
+ if keysRecord != '':
+ keylst = keysRecord.split(',')
+ for keyval in keylst:
+ keysRecordRecord += self.bookMetadata.get(keyval,'')
+ return keysRecord, keysRecordRecord
+
+ def getBookTitle(self):
+ title = ''
+ if 'Title' in self.bookMetadata:
+ title = self.bookMetadata['Title']
+ return title
+
+ def setBookKey(self, key):
+ self.bookKey = key
+
+ def getBookPayloadRecord(self, name, index):
+ # Get a record in the book payload, given its name and index.
+ # decrypted and decompressed if necessary
+ encrypted = False
+ compressed = False
+ try:
+ recordOffset = self.bookHeaderRecords[name][index][0]
+ except:
+ raise TpzDRMError("Parse Error : Invalid Record, record not found")
+
+ self.fo.seek(self.bookPayloadOffset + recordOffset)
+
+ tag = bookReadString(self.fo)
+ if tag != name :
+ raise TpzDRMError("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber(self.fo)
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise TpzDRMError("Parse Error : Invalid Record, index doesn't match")
+
+ if (self.bookHeaderRecords[name][index][2] > 0):
+ compressed = True
+ record = self.fo.read(self.bookHeaderRecords[name][index][2])
else:
- outfile = os.path.join(outdir, outfilename + '_nodrm' + '.mobi')
- mb.getMobiFile(outfile)
- return 0
-
- # topaz:
- print " Creating NoDRM HTMLZ Archive"
- zipname = os.path.join(outdir, outfilename + '_nodrm' + '.htmlz')
- mb.getHTMLZip(zipname)
-
- print " Creating SVG ZIP Archive"
- zipname = os.path.join(outdir, outfilename + '_SVG' + '.zip')
- mb.getSVGZip(zipname)
-
- if buildXML:
- print " Creating XML ZIP Archive"
- zipname = os.path.join(outdir, outfilename + '_XML' + '.zip')
- mb.getXMLZip(zipname)
-
- # remove internal temporary directory of Topaz pieces
- mb.cleanup()
-
- return 0
-
+ record = self.fo.read(self.bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ if self.bookKey:
+ ctx = topazCryptoInit(self.bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+ else :
+ raise TpzDRMError("Error: Attempt to decrypt without bookKey")
+
+ if compressed:
+ record = zlib.decompress(record)
+
+ return record
+
+ def processBook(self, pidlst):
+ raw = 0
+ fixedimage=True
+ try:
+ keydata = self.getBookPayloadRecord('dkey', 0)
+ except TpzDRMError, e:
+ print "no dkey record found, book may not be encrypted"
+ print "attempting to extrct files without a book key"
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ # try each pid to decode the file
+ bookKey = None
+ for pid in pidlst:
+ # use 8 digit pids here
+ pid = pid[0:8]
+ print "\nTrying: ", pid
+ bookKeys = []
+ data = keydata
+ try:
+ bookKeys+=decryptDkeyRecords(data,pid)
+ except TpzDRMError, e:
+ pass
+ else:
+ bookKey = bookKeys[0]
+ print "Book Key Found!"
+ break
+
+ if not bookKey:
+ raise TpzDRMError('Decryption Unsucessful; No valid pid found')
+
+ self.setBookKey(bookKey)
+ self.createBookDirectory()
+ self.extractFiles()
+ print "Successfully Extracted Topaz contents"
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print "\nBook Successfully generated"
+ return rv
+
+ def createBookDirectory(self):
+ outdir = self.outdir
+ # create output directory structure
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ destdir = os.path.join(outdir,'img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'color_img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'page')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,'glyphs')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ def extractFiles(self):
+ outdir = self.outdir
+ for headerRecord in self.bookHeaderRecords:
+ name = headerRecord
+ if name != "dkey" :
+ ext = '.dat'
+ if name == 'img' : ext = '.jpg'
+ if name == 'color' : ext = '.jpg'
+ print "\nProcessing Section: %s " % name
+ for index in range (0,len(self.bookHeaderRecords[name])) :
+ fnum = "%04d" % index
+ fname = name + fnum + ext
+ destdir = outdir
+ if name == 'img':
+ destdir = os.path.join(outdir,'img')
+ if name == 'color':
+ destdir = os.path.join(outdir,'color_img')
+ if name == 'page':
+ destdir = os.path.join(outdir,'page')
+ if name == 'glyphs':
+ destdir = os.path.join(outdir,'glyphs')
+ outputFile = os.path.join(destdir,fname)
+ print ".",
+ record = self.getBookPayloadRecord(name,index)
+ if record != '':
+ file(outputFile, 'wb').write(record)
+ print " "
+
+ def getHTMLZip(self, zipname):
+ htmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ htmlzip.write(os.path.join(self.outdir,'book.html'),'book.html')
+ htmlzip.write(os.path.join(self.outdir,'book.opf'),'book.opf')
+ if os.path.isfile(os.path.join(self.outdir,'cover.jpg')):
+ htmlzip.write(os.path.join(self.outdir,'cover.jpg'),'cover.jpg')
+ htmlzip.write(os.path.join(self.outdir,'style.css'),'style.css')
+ zipUpDir(htmlzip, self.outdir, 'img')
+ htmlzip.close()
+
+ def getSVGZip(self, zipname):
+ svgzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ svgzip.write(os.path.join(self.outdir,'index_svg.xhtml'),'index_svg.xhtml')
+ zipUpDir(svgzip, self.outdir, 'svg')
+ zipUpDir(svgzip, self.outdir, 'img')
+ svgzip.close()
+
+ def getXMLZip(self, zipname):
+ xmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ targetdir = os.path.join(self.outdir,'xml')
+ zipUpDir(xmlzip, targetdir, '')
+ zipUpDir(xmlzip, self.outdir, 'img')
+ xmlzip.close()
+
+ def cleanup(self):
+ if os.path.isdir(self.outdir):
+ shutil.rmtree(self.outdir, True)
def usage(progname):
- print "Removes DRM protection from K4PC/M, Kindle, Mobi and Topaz ebooks"
+ print "Removes DRM protection from Topaz ebooks and extract the contents"
print "Usage:"
print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
-#
+
# Main
-#
def main(argv=sys.argv):
+ global buildXML
progname = os.path.basename(argv[0])
-
k4 = False
- kInfoFiles = []
- serials = []
pids = []
-
- print ('K4MobiDeDrm v%(__version__)s '
- 'provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc .' % globals())
+ serials = []
+ kInfoFiles = []
try:
opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
except getopt.GetoptError, err:
print str(err)
usage(progname)
- sys.exit(2)
+ return 1
if len(args)<2:
usage(progname)
- sys.exit(2)
+ return 1
for o, a in opts:
if o == "-k":
if a == None :
- raise DrmException("Invalid parameter for -k")
+ print "Invalid parameter for -k"
+ return 1
kInfoFiles.append(a)
if o == "-p":
if a == None :
- raise DrmException("Invalid parameter for -p")
+ print "Invalid parameter for -p"
+ return 1
pids = a.split(',')
if o == "-s":
if a == None :
- raise DrmException("Invalid parameter for -s")
+ print "Invalid parameter for -s"
+ return 1
serials = a.split(',')
-
- # try with built in Kindle Info files
k4 = True
- if sys.platform.startswith('linux'):
- k4 = False
- kInfoFiles = None
+
infile = args[0]
outdir = args[1]
- return decryptBook(infile, outdir, k4, kInfoFiles, serials, pids)
+
+ if not os.path.isfile(infile):
+ print "Input File Does Not Exist"
+ return 1
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+
+ tb = TopazBook(infile)
+ title = tb.getBookTitle()
+ print "Processing Book: ", title
+ keysRecord, keysRecordRecord = tb.getPIDMetaInfo()
+ pidlst = kgenpids.getPidList(keysRecord, keysRecordRecord, k4, pids, serials, kInfoFiles)
+
+ try:
+ print "Decrypting Book"
+ tb.processBook(pidlst)
+
+ print " Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_nodrm' + '.htmlz')
+ tb.getHTMLZip(zipname)
+
+ print " Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+ tb.getSVGZip(zipname)
+
+ if buildXML:
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ tb.getXMLZip(zipname)
+
+ # removing internal temporary directory of pieces
+ tb.cleanup()
+
+ except TpzDRMError, e:
+ print str(e)
+ # tb.cleanup()
+ return 1
+
+ except Exception, e:
+ print str(e)
+ # tb.cleanup
+ return 1
+
+ return 0
if __name__ == '__main__':
-#! /usr/bin/python
-
-"""
-
-Comprehensive Mazama Book DRM with Topaz Cryptography V2.2
-
------BEGIN PUBLIC KEY-----
-MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdBHJ4CNc6DNFCw4MRCw4SWAK6
-M8hYfnNEI0yQmn5Ti+W8biT7EatpauE/5jgQMPBmdNrDr1hbHyHBSP7xeC2qlRWC
-B62UCxeu/fpfnvNHDN/wPWWH4jynZ2M6cdcnE5LQ+FfeKqZn7gnG2No1U9h7oOHx
-y2/pHuYme7U1TsgSjwIDAQAB
------END PUBLIC KEY-----
-
-"""
+# standlone set of Mac OSX specific routines needed for KindleBooks
from __future__ import with_statement
-import csv
import sys
import os
-import getopt
-import zlib
-from struct import pack
-from struct import unpack
-from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
- create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
- string_at, Structure, c_void_p, cast
-import _winreg as winreg
-import Tkinter
-import Tkconstants
-import tkMessageBox
-import traceback
-import hashlib
+import os.path
+import re
+import copy
+import subprocess
+from struct import pack, unpack, unpack_from
-MAX_PATH = 255
-
-kernel32 = windll.kernel32
-advapi32 = windll.advapi32
-crypt32 = windll.crypt32
-
-global kindleDatabase
-global bookFile
-global bookPayloadOffset
-global bookHeaderRecords
-global bookMetadata
-global bookKey
-global command
-
-#
-# Various character maps used to decrypt books. Probably supposed to act as obfuscation
-#
+class DrmException(Exception):
+ pass
-charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
-charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
-charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
-#
-# Exceptions for all the problems that might happen during the script
-#
+# interface to needed routines in openssl's libcrypto
+def _load_crypto_libcrypto():
+ from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, addressof, string_at, cast
+ from ctypes.util import find_library
-class CMBDTCError(Exception):
- pass
+ libcrypto = find_library('crypto')
+ if libcrypto is None:
+ raise DrmException('libcrypto not found')
+ libcrypto = CDLL(libcrypto)
-class CMBDTCFatal(Exception):
- pass
+ # From OpenSSL's crypto aes header
+ #
+ # AES_ENCRYPT 1
+ # AES_DECRYPT 0
+ # AES_MAXNR 14 (in bytes)
+ # AES_BLOCK_SIZE 16 (in bytes)
+ #
+ # struct aes_key_st {
+ # unsigned long rd_key[4 *(AES_MAXNR + 1)];
+ # int rounds;
+ # };
+ # typedef struct aes_key_st AES_KEY;
+ #
+ # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+ #
+ # note: the ivec string, and output buffer are both mutable
+ # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc);
-#
-# Stolen stuff
-#
+ AES_MAXNR = 14
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
-class DataBlob(Structure):
- _fields_ = [('cbData', c_uint),
- ('pbData', c_void_p)]
-DataBlob_p = POINTER(DataBlob)
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
-def GetSystemDirectory():
- GetSystemDirectoryW = kernel32.GetSystemDirectoryW
- GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
- GetSystemDirectoryW.restype = c_uint
- def GetSystemDirectory():
- buffer = create_unicode_buffer(MAX_PATH + 1)
- GetSystemDirectoryW(buffer, len(buffer))
- return buffer.value
- return GetSystemDirectory
-GetSystemDirectory = GetSystemDirectory()
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
-def GetVolumeSerialNumber():
- GetVolumeInformationW = kernel32.GetVolumeInformationW
- GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
- POINTER(c_uint), POINTER(c_uint),
- POINTER(c_uint), c_wchar_p, c_uint]
- GetVolumeInformationW.restype = c_uint
- def GetVolumeSerialNumber(path):
- vsn = c_uint(0)
- GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
- return vsn.value
- return GetVolumeSerialNumber
-GetVolumeSerialNumber = GetVolumeSerialNumber()
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
+ # From OpenSSL's Crypto evp/p5_crpt2.c
+ #
+ # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
+ # const unsigned char *salt, int saltlen, int iter,
+ # int keylen, unsigned char *out);
+
+ PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
+ [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
+
+ class LibCrypto(object):
+ def __init__(self):
+ self._blocksize = 0
+ self._keyctx = None
+ self._iv = 0
+
+ def set_decrypt_key(self, userkey, iv):
+ self._blocksize = len(userkey)
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise DrmException('AES improper key used')
+ return
+ keyctx = self._keyctx = AES_KEY()
+ self._iv = iv
+ self._userkey = userkey
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ if rv < 0:
+ raise DrmException('Failed to initialize AES key')
+
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ mutable_iv = create_string_buffer(self._iv, len(self._iv))
+ keyctx = self._keyctx
+ rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0)
+ if rv == 0:
+ raise DrmException('AES decryption failed')
+ return out.raw
+
+ def keyivgen(self, passwd, salt, iter, keylen):
+ saltlen = len(salt)
+ passlen = len(passwd)
+ out = create_string_buffer(keylen)
+ rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
+ return out.raw
+ return LibCrypto
+
+def _load_crypto():
+ LibCrypto = None
+ try:
+ LibCrypto = _load_crypto_libcrypto()
+ except (ImportError, DrmException):
+ pass
+ return LibCrypto
-def GetUserName():
- GetUserNameW = advapi32.GetUserNameW
- GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
- GetUserNameW.restype = c_uint
- def GetUserName():
- buffer = create_unicode_buffer(32)
- size = c_uint(len(buffer))
- while not GetUserNameW(buffer, byref(size)):
- buffer = create_unicode_buffer(len(buffer) * 2)
- size.value = len(buffer)
- return buffer.value.encode('utf-16-le')[::2]
- return GetUserName
-GetUserName = GetUserName()
-
-
-def CryptUnprotectData():
- _CryptUnprotectData = crypt32.CryptUnprotectData
- _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
- c_void_p, c_void_p, c_uint, DataBlob_p]
- _CryptUnprotectData.restype = c_uint
- def CryptUnprotectData(indata, entropy):
- indatab = create_string_buffer(indata)
- indata = DataBlob(len(indata), cast(indatab, c_void_p))
- entropyb = create_string_buffer(entropy)
- entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
- outdata = DataBlob()
- if not _CryptUnprotectData(byref(indata), None, byref(entropy),
- None, None, 0, byref(outdata)):
- raise CMBDTCFatal("Failed to Unprotect Data")
- return string_at(outdata.pbData, outdata.cbData)
- return CryptUnprotectData
-CryptUnprotectData = CryptUnprotectData()
+LibCrypto = _load_crypto()
#
-# Returns the MD5 digest of "message"
+# Utility Routines
#
+# crypto digestroutines
+import hashlib
+
def MD5(message):
ctx = hashlib.md5()
ctx.update(message)
return ctx.digest()
-#
-# Returns the MD5 digest of "message"
-#
-
def SHA1(message):
ctx = hashlib.sha1()
ctx.update(message)
return ctx.digest()
-#
-# Open the book file at path
-#
+def SHA256(message):
+ ctx = hashlib.sha256()
+ ctx.update(message)
+ return ctx.digest()
+
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
+
+# For kinf approach of K4Mac 1.6.X or later
+# On K4PC charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE"
+# For Mac they seem to re-use charMap2 here
+charMap5 = charMap2
+
+# new in K4M 1.9.X
+testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD"
-def openBook(path):
- try:
- return open(path,'rb')
- except:
- raise CMBDTCFatal("Could not open book file: " + path)
-#
-# Encode the bytes in data with the characters in map
-#
def encode(data, map):
result = ""
result += map[R]
return result
-#
# Hash the bytes in data and then encode the digest with the characters in map
-#
-
def encodeHash(data,map):
return encode(MD5(data),map)
-#
# Decode the string in data with the characters in map. Returns the decoded bytes
-#
-
def decode(data,map):
result = ""
- for i in range (0,len(data),2):
+ for i in range (0,len(data)-1,2):
high = map.find(data[i])
low = map.find(data[i+1])
- value = (((high * 0x40) ^ 0x80) & 0xFF) + low
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
result += pack("B",value)
return result
-#
-# Locate and open the Kindle.info file (Hopefully in the way it is done in the Kindle application)
-#
-
-def openKindleInfo():
- regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
- path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
- return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r')
+# For K4M 1.6.X and later
+# generate table of prime number less than or equal to int n
+def primes(n):
+ if n==2: return [2]
+ elif n<2: return []
+ s=range(3,n+1,2)
+ mroot = n ** 0.5
+ half=(n+1)/2-1
+ i=0
+ m=3
+ while m <= mroot:
+ if s[i]:
+ j=(m*m-3)/2
+ s[j]=0
+ while j<half:
+ s[j]=0
+ j+=m
+ i=i+1
+ m=2*i+3
+ return [2]+[x for x in s if x]
+
+
+# uses a sub process to get the Hard Drive Serial Number using ioreg
+# returns with the serial number of drive whose BSD Name is "disk0"
+def GetVolumeSerialNumber():
+ sernum = os.getenv('MYSERIALNUMBER')
+ if sernum != None:
+ return sernum
+ cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ bsdname = None
+ sernum = None
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ pp = resline.find('"Serial Number" = "')
+ if pp >= 0:
+ sernum = resline[pp+19:-1]
+ sernum = sernum.strip()
+ bb = resline.find('"BSD Name" = "')
+ if bb >= 0:
+ bsdname = resline[bb+14:-1]
+ bsdname = bsdname.strip()
+ if (bsdname == 'disk0') and (sernum != None):
+ foundIt = True
+ break
+ if not foundIt:
+ sernum = ''
+ return sernum
+
+def GetUserHomeAppSupKindleDirParitionName():
+ home = os.getenv('HOME')
+ dpath = home + '/Library/Application Support/Kindle'
+ cmdline = '/sbin/mount'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ disk = ''
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ if resline.startswith('/dev'):
+ (devpart, mpath) = resline.split(' on ')
+ dpart = devpart[5:]
+ pp = mpath.find('(')
+ if pp >= 0:
+ mpath = mpath[:pp-1]
+ if dpath.startswith(mpath):
+ disk = dpart
+ return disk
+
+# uses a sub process to get the UUID of the specified disk partition using ioreg
+def GetDiskPartitionUUID(diskpart):
+ uuidnum = os.getenv('MYUUIDNUMBER')
+ if uuidnum != None:
+ return uuidnum
+ cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ bsdname = None
+ uuidnum = None
+ foundIt = False
+ nest = 0
+ uuidnest = -1
+ partnest = -2
+ for j in xrange(cnt):
+ resline = reslst[j]
+ if resline.find('{') >= 0:
+ nest += 1
+ if resline.find('}') >= 0:
+ nest -= 1
+ pp = resline.find('"UUID" = "')
+ if pp >= 0:
+ uuidnum = resline[pp+10:-1]
+ uuidnum = uuidnum.strip()
+ uuidnest = nest
+ if partnest == uuidnest and uuidnest > 0:
+ foundIt = True
+ break
+ bb = resline.find('"BSD Name" = "')
+ if bb >= 0:
+ bsdname = resline[bb+14:-1]
+ bsdname = bsdname.strip()
+ if (bsdname == diskpart):
+ partnest = nest
+ else :
+ partnest = -2
+ if partnest == uuidnest and partnest > 0:
+ foundIt = True
+ break
+ if nest == 0:
+ partnest = -2
+ uuidnest = -1
+ uuidnum = None
+ bsdname = None
+ if not foundIt:
+ uuidnum = ''
+ return uuidnum
+
+def GetMACAddressMunged():
+ macnum = os.getenv('MYMACNUM')
+ if macnum != None:
+ return macnum
+ cmdline = '/sbin/ifconfig en0'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ macnum = None
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ pp = resline.find('ether ')
+ if pp >= 0:
+ macnum = resline[pp+6:-1]
+ macnum = macnum.strip()
+ # print "original mac", macnum
+ # now munge it up the way Kindle app does
+ # by xoring it with 0xa5 and swapping elements 3 and 4
+ maclst = macnum.split(':')
+ n = len(maclst)
+ if n != 6:
+ fountIt = False
+ break
+ for i in range(6):
+ maclst[i] = int('0x' + maclst[i], 0)
+ mlst = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+ mlst[5] = maclst[5] ^ 0xa5
+ mlst[4] = maclst[3] ^ 0xa5
+ mlst[3] = maclst[4] ^ 0xa5
+ mlst[2] = maclst[2] ^ 0xa5
+ mlst[1] = maclst[1] ^ 0xa5
+ mlst[0] = maclst[0] ^ 0xa5
+ macnum = "%0.2x%0.2x%0.2x%0.2x%0.2x%0.2x" % (mlst[0], mlst[1], mlst[2], mlst[3], mlst[4], mlst[5])
+ foundIt = True
+ break
+ if not foundIt:
+ macnum = ''
+ return macnum
-#
-# Parse the Kindle.info file and return the records as a list of key-values
-#
-def parseKindleInfo():
+# uses unix env to get username instead of using sysctlbyname
+def GetUserName():
+ username = os.getenv('USER')
+ return username
+
+def isNewInstall():
+ home = os.getenv('HOME')
+ # soccer game fan anyone
+ dpath = home + '/Library/Application Support/Kindle/storage/.pes2011'
+ # print dpath, os.path.exists(dpath)
+ if os.path.exists(dpath):
+ return True
+ return False
+
+
+def GetIDString():
+ # K4Mac now has an extensive set of ids strings it uses
+ # in encoding pids and in creating unique passwords
+ # for use in its own version of CryptUnprotectDataV2
+
+ # BUT Amazon has now become nasty enough to detect when its app
+ # is being run under a debugger and actually changes code paths
+ # including which one of these strings is chosen, all to try
+ # to prevent reverse engineering
+
+ # Sad really ... they will only hurt their own sales ...
+ # true book lovers really want to keep their books forever
+ # and move them to their devices and DRM prevents that so they
+ # will just buy from someplace else that they can remove
+ # the DRM from
+
+ # Amazon should know by now that true book lover's are not like
+ # penniless kids that pirate music, we do not pirate books
+
+ if isNewInstall():
+ mungedmac = GetMACAddressMunged()
+ if len(mungedmac) > 7:
+ return mungedmac
+ sernum = GetVolumeSerialNumber()
+ if len(sernum) > 7:
+ return sernum
+ diskpart = GetUserHomeAppSupKindleDirParitionName()
+ uuidnum = GetDiskPartitionUUID(diskpart)
+ if len(uuidnum) > 7:
+ return uuidnum
+ mungedmac = GetMACAddressMunged()
+ if len(mungedmac) > 7:
+ return mungedmac
+ return '9999999999'
+
+
+# implements an Pseudo Mac Version of Windows built-in Crypto routine
+# used by Kindle for Mac versions < 1.6.0
+class CryptUnprotectData(object):
+ def __init__(self):
+ sernum = GetVolumeSerialNumber()
+ if sernum == '':
+ sernum = '9999999999'
+ sp = sernum + '!@#' + GetUserName()
+ passwdData = encode(SHA256(sp),charMap1)
+ salt = '16743'
+ self.crp = LibCrypto()
+ iter = 0x3e8
+ keylen = 0x80
+ key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
+ self.key = key_iv[0:32]
+ self.iv = key_iv[32:48]
+ self.crp.set_decrypt_key(self.key, self.iv)
+
+ def decrypt(self, encryptedData):
+ cleartext = self.crp.decrypt(encryptedData)
+ cleartext = decode(cleartext,charMap1)
+ return cleartext
+
+
+# implements an Pseudo Mac Version of Windows built-in Crypto routine
+# used for Kindle for Mac Versions >= 1.6.0
+class CryptUnprotectDataV2(object):
+ def __init__(self):
+ sp = GetUserName() + ':&%:' + GetIDString()
+ passwdData = encode(SHA256(sp),charMap5)
+ # salt generation as per the code
+ salt = 0x0512981d * 2 * 1 * 1
+ salt = str(salt) + GetUserName()
+ salt = encode(salt,charMap5)
+ self.crp = LibCrypto()
+ iter = 0x800
+ keylen = 0x400
+ key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
+ self.key = key_iv[0:32]
+ self.iv = key_iv[32:48]
+ self.crp.set_decrypt_key(self.key, self.iv)
+
+ def decrypt(self, encryptedData):
+ cleartext = self.crp.decrypt(encryptedData)
+ cleartext = decode(cleartext, charMap5)
+ return cleartext
+
+
+# unprotect the new header blob in .kinf2011
+# used in Kindle for Mac Version >= 1.9.0
+def UnprotectHeaderData(encryptedData):
+ passwdData = 'header_key_data'
+ salt = 'HEADER.2011'
+ iter = 0x80
+ keylen = 0x100
+ crp = LibCrypto()
+ key_iv = crp.keyivgen(passwdData, salt, iter, keylen)
+ key = key_iv[0:32]
+ iv = key_iv[32:48]
+ crp.set_decrypt_key(key,iv)
+ cleartext = crp.decrypt(encryptedData)
+ return cleartext
+
+
+# implements an Pseudo Mac Version of Windows built-in Crypto routine
+# used for Kindle for Mac Versions >= 1.9.0
+class CryptUnprotectDataV3(object):
+ def __init__(self, entropy):
+ sp = GetUserName() + '+@#$%+' + GetIDString()
+ passwdData = encode(SHA256(sp),charMap2)
+ salt = entropy
+ self.crp = LibCrypto()
+ iter = 0x800
+ keylen = 0x400
+ key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
+ self.key = key_iv[0:32]
+ self.iv = key_iv[32:48]
+ self.crp.set_decrypt_key(self.key, self.iv)
+
+ def decrypt(self, encryptedData):
+ cleartext = self.crp.decrypt(encryptedData)
+ cleartext = decode(cleartext, charMap2)
+ return cleartext
+
+
+# Locate the .kindle-info files
+def getKindleInfoFiles(kInfoFiles):
+ # first search for current .kindle-info files
+ home = os.getenv('HOME')
+ cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p1.communicate()
+ reslst = out1.split('\n')
+ kinfopath = 'NONE'
+ found = False
+ for resline in reslst:
+ if os.path.isfile(resline):
+ kInfoFiles.append(resline)
+ found = True
+ # add any .rainier*-kinf files
+ cmdline = 'find "' + home + '/Library/Application Support" -name ".rainier*-kinf"'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p1.communicate()
+ reslst = out1.split('\n')
+ for resline in reslst:
+ if os.path.isfile(resline):
+ kInfoFiles.append(resline)
+ found = True
+ # add any .kinf2011 files
+ cmdline = 'find "' + home + '/Library/Application Support" -name ".kinf2011"'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p1.communicate()
+ reslst = out1.split('\n')
+ for resline in reslst:
+ if os.path.isfile(resline):
+ kInfoFiles.append(resline)
+ found = True
+ if not found:
+ print('No kindle-info files have been found.')
+ return kInfoFiles
+
+# determine type of kindle info provided and return a
+# database of keynames and values
+def getDBfromFile(kInfoFile):
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber", "max_date", "SIGVERIF"]
DB = {}
- infoReader = openKindleInfo()
- infoReader.read(1)
+ cnt = 0
+ infoReader = open(kInfoFile, 'r')
+ hdr = infoReader.read(1)
data = infoReader.read()
- items = data.split('{')
- for item in items:
- splito = item.split(':')
- DB[splito[0]] =splito[1]
+ if data.find('[') != -1 :
+
+ # older style kindle-info file
+ cud = CryptUnprotectData()
+ items = data.split('[')
+ for item in items:
+ if item != '':
+ keyhash, rawdata = item.split(':')
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,charMap2) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+ encryptedValue = decode(rawdata,charMap2)
+ cleartext = cud.decrypt(encryptedValue)
+ DB[keyname] = cleartext
+ cnt = cnt + 1
+ if cnt == 0:
+ DB = None
+ return DB
+
+ if hdr == '/':
+
+ # else newer style .kinf file used by K4Mac >= 1.6.0
+ # the .kinf file uses "/" to separate it into records
+ # so remove the trailing "/" to make it easy to use split
+ data = data[:-1]
+ items = data.split('/')
+ cud = CryptUnprotectDataV2()
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+ keyname = "unknown"
+
+ # the raw keyhash string is also used to create entropy for the actual
+ # CryptProtectData Blob that represents that keys contents
+ # "entropy" not used for K4Mac only K4PC
+ # entropy = SHA1(keyhash)
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,charMap5) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+
+ # the charMap5 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using charMap5 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the charMap5 encoded contents seems to be:
+ # len(contents) - largest prime number less than or equal to int(len(content)/3)
+ # (in other words split "about" 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by charMap5
+ encdata = "".join(edlst)
+ contlen = len(encdata)
+
+ # now properly split and recombine
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ noffset = contlen - primes(int(contlen/3))[-1]
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using charMap5 to get the CryptProtect Data
+ encryptedValue = decode(encdata,charMap5)
+ cleartext = cud.decrypt(encryptedValue)
+ DB[keyname] = cleartext
+ cnt = cnt + 1
+
+ if cnt == 0:
+ DB = None
+ return DB
+
+ # the latest .kinf2011 version for K4M 1.9.1
+ # put back the hdr char, it is needed
+ data = hdr + data
+ data = data[:-1]
+ items = data.split('/')
+
+ # the headerblob is the encrypted information needed to build the entropy string
+ headerblob = items.pop(0)
+ encryptedValue = decode(headerblob, charMap1)
+ cleartext = UnprotectHeaderData(encryptedValue)
+
+ # now extract the pieces in the same way
+ # this version is different from K4PC it scales the build number by multipying by 735
+ pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
+ for m in re.finditer(pattern, cleartext):
+ entropy = str(int(m.group(2)) * 0x2df) + m.group(4)
+
+ cud = CryptUnprotectDataV3(entropy)
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+ keyname = "unknown"
+
+ # unlike K4PC the keyhash is not used in generating entropy
+ # entropy = SHA1(keyhash) + added_entropy
+ # entropy = added_entropy
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,testMap8) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+
+ # the testMap8 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using testMap8 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the testMap8 encoded contents seems to be:
+ # len(contents) - largest prime number less than or equal to int(len(content)/3)
+ # (in other words split "about" 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by testMap8
+ encdata = "".join(edlst)
+ contlen = len(encdata)
+
+ # now properly split and recombine
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ noffset = contlen - primes(int(contlen/3))[-1]
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using testMap8 to get the CryptProtect Data
+ encryptedValue = decode(encdata,testMap8)
+ cleartext = cud.decrypt(encryptedValue)
+ # print keyname
+ # print cleartext
+ DB[keyname] = cleartext
+ cnt = cnt + 1
+
+ if cnt == 0:
+ DB = None
return DB
-
-#
-# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string. (Totally not optimal)
-#
-
-def findNameForHash(hash):
- names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
- result = ""
- for name in names:
- if hash == encodeHash(name, charMap2):
- result = name
- break
- return name
-
-#
-# Print all the records from the kindle.info file (option -i)
-#
-
-def printKindleInfo():
- for record in kindleDatabase:
- name = findNameForHash(record)
- if name != "" :
- print (name)
- print ("--------------------------\n")
- else :
- print ("Unknown Record")
- print getKindleInfoValueForHash(record)
- print "\n"
-#
-# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
-#
-
-def getKindleInfoValueForHash(hashedKey):
- global kindleDatabase
- encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
- return CryptUnprotectData(encryptedValue,"")
-
-#
-# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
-#
-
-def getKindleInfoValueForKey(key):
- return getKindleInfoValueForHash(encodeHash(key,charMap2))
-
-#
-# Get a 7 bit encoded number from the book file
-#
-
-def bookReadEncodedNumber():
- flag = False
- data = ord(bookFile.read(1))
-
- if data == 0xFF:
- flag = True
- data = ord(bookFile.read(1))
-
- if data >= 0x80:
- datax = (data & 0x7F)
- while data >= 0x80 :
- data = ord(bookFile.read(1))
- datax = (datax <<7) + (data & 0x7F)
- data = datax
-
- if flag:
- data = -data
- return data
-
-#
-# Encode a number in 7 bit format
-#
-
-def encodeNumber(number):
- result = ""
- negative = False
- flag = 0
-
- if number < 0 :
- number = -number + 1
- negative = True
-
- while True:
- byte = number & 0x7F
- number = number >> 7
- byte += flag
- result += chr(byte)
- flag = 0x80
- if number == 0 :
- if (byte == 0xFF and negative == False) :
- result += chr(0x80)
- break
-
- if negative:
- result += chr(0xFF)
-
- return result[::-1]
-
-#
-# Get a length prefixed string from the file
-#
-
-def bookReadString():
- stringLength = bookReadEncodedNumber()
- return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0]
-
-#
-# Returns a length prefixed string
-#
-
-def lengthPrefixString(data):
- return encodeNumber(len(data))+data
-
-
-#
-# Read and return the data of one header record at the current book file position [[offset,compressedLength,decompressedLength],...]
-#
-
-def bookReadHeaderRecordData():
- nbValues = bookReadEncodedNumber()
- values = []
- for i in range (0,nbValues):
- values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()])
- return values
-
-#
-# Read and parse one header record at the current book file position and return the associated data [[offset,compressedLength,decompressedLength],...]
-#
-
-def parseTopazHeaderRecord():
- if ord(bookFile.read(1)) != 0x63:
- raise CMBDTCFatal("Parse Error : Invalid Header")
-
- tag = bookReadString()
- record = bookReadHeaderRecordData()
- return [tag,record]
-
-#
-# Parse the header of a Topaz file, get all the header records and the offset for the payload
-#
-
-def parseTopazHeader():
- global bookHeaderRecords
- global bookPayloadOffset
- magic = unpack("4s",bookFile.read(4))[0]
-
- if magic != 'TPZ0':
- raise CMBDTCFatal("Parse Error : Invalid Header, not a Topaz file")
-
- nbRecords = bookReadEncodedNumber()
- bookHeaderRecords = {}
-
- for i in range (0,nbRecords):
- result = parseTopazHeaderRecord()
- bookHeaderRecords[result[0]] = result[1]
-
- if ord(bookFile.read(1)) != 0x64 :
- raise CMBDTCFatal("Parse Error : Invalid Header")
-
- bookPayloadOffset = bookFile.tell()
-
-#
-# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed
-#
-
-def getBookPayloadRecord(name, index):
- encrypted = False
-
- try:
- recordOffset = bookHeaderRecords[name][index][0]
- except:
- raise CMBDTCFatal("Parse Error : Invalid Record, record not found")
-
- bookFile.seek(bookPayloadOffset + recordOffset)
-
- tag = bookReadString()
- if tag != name :
- raise CMBDTCFatal("Parse Error : Invalid Record, record name doesn't match")
-
- recordIndex = bookReadEncodedNumber()
-
- if recordIndex < 0 :
- encrypted = True
- recordIndex = -recordIndex -1
-
- if recordIndex != index :
- raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match")
-
- if bookHeaderRecords[name][index][2] != 0 :
- record = bookFile.read(bookHeaderRecords[name][index][2])
- else:
- record = bookFile.read(bookHeaderRecords[name][index][1])
-
- if encrypted:
- ctx = topazCryptoInit(bookKey)
- record = topazCryptoDecrypt(record,ctx)
-
- return record
-
-#
-# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename"
-#
-
-def extractBookPayloadRecord(name, index, filename):
- compressed = False
-
- try:
- compressed = bookHeaderRecords[name][index][2] != 0
- record = getBookPayloadRecord(name,index)
- except:
- print("Could not find record")
-
- if compressed:
- try:
- record = zlib.decompress(record)
- except:
- raise CMBDTCFatal("Could not decompress record")
-
- if filename != "":
- try:
- file = open(filename,"wb")
- file.write(record)
- file.close()
- except:
- raise CMBDTCFatal("Could not write to destination file")
- else:
- print(record)
-
-#
-# return next record [key,value] from the book metadata from the current book position
-#
-
-def readMetadataRecord():
- return [bookReadString(),bookReadString()]
-
-#
-# Parse the metadata record from the book payload and return a list of [key,values]
-#
-
-def parseMetadata():
- global bookHeaderRecords
- global bookPayloadAddress
- global bookMetadata
- bookMetadata = {}
- bookFile.seek(bookPayloadOffset + bookHeaderRecords["metadata"][0][0])
- tag = bookReadString()
- if tag != "metadata" :
- raise CMBDTCFatal("Parse Error : Record Names Don't Match")
-
- flags = ord(bookFile.read(1))
- nbRecords = ord(bookFile.read(1))
-
- for i in range (0,nbRecords) :
- record =readMetadataRecord()
- bookMetadata[record[0]] = record[1]
-
-#
-# Returns two bit at offset from a bit field
-#
-
-def getTwoBitsFromBitField(bitField,offset):
- byteNumber = offset // 4
- bitPosition = 6 - 2*(offset % 4)
-
- return ord(bitField[byteNumber]) >> bitPosition & 3
-
-#
-# Returns the six bits at offset from a bit field
-#
-
-def getSixBitsFromBitField(bitField,offset):
- offset *= 3
- value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
- return value
-
-#
-# 8 bits to six bits encoding from hash to generate PID string
-#
-
-def encodePID(hash):
- global charMap3
- PID = ""
- for position in range (0,8):
- PID += charMap3[getSixBitsFromBitField(hash,position)]
- return PID
-
-#
-# Context initialisation for the Topaz Crypto
-#
-
-def topazCryptoInit(key):
- ctx1 = 0x0CAFFE19E
-
- for keyChar in key:
- keyByte = ord(keyChar)
- ctx2 = ctx1
- ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
- return [ctx1,ctx2]
-
-#
-# decrypt data with the context prepared by topazCryptoInit()
-#
-
-def topazCryptoDecrypt(data, ctx):
- ctx1 = ctx[0]
- ctx2 = ctx[1]
-
- plainText = ""
-
- for dataChar in data:
- dataByte = ord(dataChar)
- m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
- ctx2 = ctx1
- ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
- plainText += chr(m)
-
- return plainText
-
-#
-# Decrypt a payload record with the PID
-#
-
-def decryptRecord(data,PID):
- ctx = topazCryptoInit(PID)
- return topazCryptoDecrypt(data, ctx)
-
-#
-# Try to decrypt a dkey record (contains the book PID)
-#
-
-def decryptDkeyRecord(data,PID):
- record = decryptRecord(data,PID)
- fields = unpack("3sB8sB8s3s",record)
-
- if fields[0] != "PID" or fields[5] != "pid" :
- raise CMBDTCError("Didn't find PID magic numbers in record")
- elif fields[1] != 8 or fields[3] != 8 :
- raise CMBDTCError("Record didn't contain correct length fields")
- elif fields[2] != PID :
- raise CMBDTCError("Record didn't contain PID")
-
- return fields[4]
-
-#
-# Decrypt all the book's dkey records (contain the book PID)
-#
-
-def decryptDkeyRecords(data,PID):
- nbKeyRecords = ord(data[0])
- records = []
- data = data[1:]
- for i in range (0,nbKeyRecords):
- length = ord(data[0])
- try:
- key = decryptDkeyRecord(data[1:length+1],PID)
- records.append(key)
- except CMBDTCError:
- pass
- data = data[1+length:]
-
- return records
-
-#
-# Encryption table used to generate the device PID
-#
-
-def generatePidEncryptionTable() :
- table = []
- for counter1 in range (0,0x100):
- value = counter1
- for counter2 in range (0,8):
- if (value & 1 == 0) :
- value = value >> 1
- else :
- value = value >> 1
- value = value ^ 0xEDB88320
- table.append(value)
- return table
-
-#
-# Seed value used to generate the device PID
-#
-
-def generatePidSeed(table,dsn) :
- value = 0
- for counter in range (0,4) :
- index = (ord(dsn[counter]) ^ value) &0xFF
- value = (value >> 8) ^ table[index]
- return value
-
-#
-# Generate the device PID
-#
-
-def generateDevicePID(table,dsn,nbRoll):
- seed = generatePidSeed(table,dsn)
- pidAscii = ""
- pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
- index = 0
-
- for counter in range (0,nbRoll):
- pid[index] = pid[index] ^ ord(dsn[counter])
- index = (index+1) %8
-
- for counter in range (0,8):
- index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
- pidAscii += charMap4[index]
- return pidAscii
-
-#
-# Create decrypted book payload
-#
-
-def createDecryptedPayload(payload):
-
- # store data to be able to create the header later
- headerData= []
- currentOffset = 0
-
- # Add social DRM to decrypted files
-
- try:
- data = getKindleInfoValueForKey("kindle.name.info")+":"+ getKindleInfoValueForKey("login")
- if payload!= None:
- payload.write(lengthPrefixString("sdrm"))
- payload.write(encodeNumber(0))
- payload.write(data)
- else:
- currentOffset += len(lengthPrefixString("sdrm"))
- currentOffset += len(encodeNumber(0))
- currentOffset += len(data)
- except:
- pass
-
- for headerRecord in bookHeaderRecords:
- name = headerRecord
- newRecord = []
-
- if name != "dkey" :
-
- for index in range (0,len(bookHeaderRecords[name])) :
- offset = currentOffset
-
- if payload != None:
- # write tag
- payload.write(lengthPrefixString(name))
- # write data
- payload.write(encodeNumber(index))
- payload.write(getBookPayloadRecord(name, index))
-
- else :
- currentOffset += len(lengthPrefixString(name))
- currentOffset += len(encodeNumber(index))
- currentOffset += len(getBookPayloadRecord(name, index))
- newRecord.append([offset,bookHeaderRecords[name][index][1],bookHeaderRecords[name][index][2]])
-
- headerData.append([name,newRecord])
-
-
-
- return headerData
-
-#
-# Create decrypted book
-#
-
-def createDecryptedBook(outputFile):
- outputFile = open(outputFile,"wb")
- # Write the payload in a temporary file
- headerData = createDecryptedPayload(None)
- outputFile.write("TPZ0")
- outputFile.write(encodeNumber(len(headerData)))
-
- for header in headerData :
- outputFile.write(chr(0x63))
- outputFile.write(lengthPrefixString(header[0]))
- outputFile.write(encodeNumber(len(header[1])))
- for numbers in header[1] :
- outputFile.write(encodeNumber(numbers[0]))
- outputFile.write(encodeNumber(numbers[1]))
- outputFile.write(encodeNumber(numbers[2]))
-
- outputFile.write(chr(0x64))
- createDecryptedPayload(outputFile)
- outputFile.close()
-
-#
-# Set the command to execute by the programm according to cmdLine parameters
-#
-
-def setCommand(name) :
- global command
- if command != "" :
- raise CMBDTCFatal("Invalid command line parameters")
- else :
- command = name
-
-#
-# Program usage
-#
-
-def usage():
- print("\nUsage:")
- print("\nCMBDTC.py [options] bookFileName\n")
- print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)")
- print("-d Saves a decrypted copy of the book")
- print("-r Prints or writes to disk a record indicated in the form name:index (e.g \"img:0\")")
- print("-o Output file name to write records and decrypted books")
- print("-v Verbose (can be used several times)")
- print("-i Prints kindle.info database")
-
-#
-# Main
-#
-
-def main(argv=sys.argv):
- global kindleDatabase
- global bookMetadata
- global bookKey
- global bookFile
- global command
-
- progname = os.path.basename(argv[0])
-
- verbose = 0
- recordName = ""
- recordIndex = 0
- outputFile = ""
- PIDs = []
- kindleDatabase = None
- command = ""
-
-
- try:
- opts, args = getopt.getopt(sys.argv[1:], "vdir:o:p:")
- except getopt.GetoptError, err:
- # print help information and exit:
- print str(err) # will print something like "option -a not recognized"
- usage()
- sys.exit(2)
-
- if len(opts) == 0 and len(args) == 0 :
- usage()
- sys.exit(2)
-
- for o, a in opts:
- if o == "-v":
- verbose+=1
- if o == "-i":
- setCommand("printInfo")
- if o =="-o":
- if a == None :
- raise CMBDTCFatal("Invalid parameter for -o")
- outputFile = a
- if o =="-r":
- setCommand("printRecord")
- try:
- recordName,recordIndex = a.split(':')
- except:
- raise CMBDTCFatal("Invalid parameter for -r")
- if o =="-p":
- PIDs.append(a)
- if o =="-d":
- setCommand("doit")
-
- if command == "" :
- raise CMBDTCFatal("No action supplied on command line")
-
- #
- # Read the encrypted database
- #
-
- try:
- kindleDatabase = parseKindleInfo()
- except Exception, message:
- if verbose>0:
- print(message)
-
- if kindleDatabase != None :
- if command == "printInfo" :
- printKindleInfo()
-
- #
- # Compute the DSN
- #
-
- # Get the Mazama Random number
- MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
-
- # Get the HDD serial
- encodedSystemVolumeSerialNumber = encodeHash(str(GetVolumeSerialNumber(GetSystemDirectory().split('\\')[0] + '\\')),charMap1)
-
- # Get the current user name
- encodedUsername = encodeHash(GetUserName(),charMap1)
-
- # concat, hash and encode
- DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
-
- if verbose >1:
- print("DSN: " + DSN)
-
- #
- # Compute the device PID
- #
-
- table = generatePidEncryptionTable()
- devicePID = generateDevicePID(table,DSN,4)
- PIDs.append(devicePID)
-
- if verbose > 0:
- print("Device PID: " + devicePID)
-
- #
- # Open book and parse metadata
- #
-
- if len(args) == 1:
-
- bookFile = openBook(args[0])
- parseTopazHeader()
- parseMetadata()
-
- #
- # Compute book PID
- #
-
- # Get the account token
-
- if kindleDatabase != None:
- kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
-
- if verbose >1:
- print("Account Token: " + kindleAccountToken)
-
- keysRecord = bookMetadata["keys"]
- keysRecordRecord = bookMetadata[keysRecord]
-
- pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord)
-
- bookPID = encodePID(pidHash)
- PIDs.append(bookPID)
-
- if verbose > 0:
- print ("Book PID: " + bookPID )
-
- #
- # Decrypt book key
- #
-
- dkey = getBookPayloadRecord('dkey', 0)
-
- bookKeys = []
- for PID in PIDs :
- bookKeys+=decryptDkeyRecords(dkey,PID)
-
- if len(bookKeys) == 0 :
- if verbose > 0 :
- print ("Book key could not be found. Maybe this book is not registered with this device.")
- else :
- bookKey = bookKeys[0]
- if verbose > 0:
- print("Book key: " + bookKey.encode('hex'))
-
-
-
- if command == "printRecord" :
- extractBookPayloadRecord(recordName,int(recordIndex),outputFile)
- if outputFile != "" and verbose>0 :
- print("Wrote record to file: "+outputFile)
- elif command == "doit" :
- if outputFile!="" :
- createDecryptedBook(outputFile)
- if verbose >0 :
- print ("Decrypted book saved. Don't pirate!")
- elif verbose > 0:
- print("Output file name was not supplied.")
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-import sys
-sys.stdout=Unbuffered(sys.stdout)
-
-import csv
-import os
-import getopt
-from struct import pack
-from struct import unpack
-
-class TpzDRMError(Exception):
+#!/usr/bin/env python
+# K4PC Windows specific routines
+
+from __future__ import with_statement
+
+import sys, os, re
+from struct import pack, unpack, unpack_from
+
+from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast
+
+import _winreg as winreg
+MAX_PATH = 255
+kernel32 = windll.kernel32
+advapi32 = windll.advapi32
+crypt32 = windll.crypt32
+
+import traceback
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA256(message):
+ ctx = hashlib.sha256()
+ ctx.update(message)
+ return ctx.digest()
+
+# For K4PC 1.9.X
+# use routines in alfcrypto:
+# AES_cbc_encrypt
+# AES_set_decrypt_key
+# PKCS5_PBKDF2_HMAC_SHA1
+
+from alfcrypto import AES_CBC, KeyIVGen
+
+def UnprotectHeaderData(encryptedData):
+ passwdData = 'header_key_data'
+ salt = 'HEADER.2011'
+ iter = 0x80
+ keylen = 0x100
+ key_iv = KeyIVGen().pbkdf2(passwdData, salt, iter, keylen)
+ key = key_iv[0:32]
+ iv = key_iv[32:48]
+ aes=AES_CBC()
+ aes.set_decrypt_key(key, iv)
+ cleartext = aes.decrypt(encryptedData)
+ return cleartext
+
+
+# simple primes table (<= n) calculator
+def primes(n):
+ if n==2: return [2]
+ elif n<2: return []
+ s=range(3,n+1,2)
+ mroot = n ** 0.5
+ half=(n+1)/2-1
+ i=0
+ m=3
+ while m <= mroot:
+ if s[i]:
+ j=(m*m-3)/2
+ s[j]=0
+ while j<half:
+ s[j]=0
+ j+=m
+ i=i+1
+ m=2*i+3
+ return [2]+[x for x in s if x]
+
+
+# Various character maps used to decrypt kindle info values.
+# Probably supposed to act as obfuscation
+charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
+charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE"
+# New maps in K4PC 1.9.0
+testMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+testMap6 = "9YzAb0Cd1Ef2n5Pr6St7Uvh3Jk4M8WxG"
+testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD"
+
+class DrmException(Exception):
pass
-# Get a 7 bit encoded number from string. The most
-# significant byte comes first and has the high bit (8th) set
-
-def readEncodedNumber(file):
- flag = False
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
-
- if data == 0xFF:
- flag = True
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
-
- if data >= 0x80:
- datax = (data & 0x7F)
- while data >= 0x80 :
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- datax = (datax <<7) + (data & 0x7F)
- data = datax
-
- if flag:
- data = -data
- return data
-
-
-# returns a binary string that encodes a number into 7 bits
-# most significant byte first which has the high bit set
-
-def encodeNumber(number):
+# Encode the bytes in data with the characters in map
+def encode(data, map):
result = ""
- negative = False
- flag = 0
-
- if number < 0 :
- number = -number + 1
- negative = True
-
- while True:
- byte = number & 0x7F
- number = number >> 7
- byte += flag
- result += chr(byte)
- flag = 0x80
- if number == 0 :
- if (byte == 0xFF and negative == False) :
- result += chr(0x80)
- break
-
- if negative:
- result += chr(0xFF)
-
- return result[::-1]
-
-
-
-# create / read a length prefixed string from the file
-
-def lengthPrefixString(data):
- return encodeNumber(len(data))+data
-
-def readString(file):
- stringLength = readEncodedNumber(file)
- if (stringLength == None):
- return ""
- sv = file.read(stringLength)
- if (len(sv) != stringLength):
- return ""
- return unpack(str(stringLength)+"s",sv)[0]
-
-
-# convert a binary string generated by encodeNumber (7 bit encoded number)
-# to the value you would find inside the page*.dat files to be processed
-
-def convert(i):
- result = ''
- val = encodeNumber(i)
- for j in xrange(len(val)):
- c = ord(val[j:j+1])
- result += '%02x' % c
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
return result
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
-# the complete string table used to store all book text content
-# as well as the xml tokens and values that make sense out of it
-
-class Dictionary(object):
- def __init__(self, dictFile):
- self.filename = dictFile
- self.size = 0
- self.fo = file(dictFile,'rb')
- self.stable = []
- self.size = readEncodedNumber(self.fo)
- for i in xrange(self.size):
- self.stable.append(self.escapestr(readString(self.fo)))
- self.pos = 0
-
- def escapestr(self, str):
- str = str.replace('&','&')
- str = str.replace('<','<')
- str = str.replace('>','>')
- str = str.replace('=','=')
- return str
-
- def lookup(self,val):
- if ((val >= 0) and (val < self.size)) :
- self.pos = val
- return self.stable[self.pos]
- else:
- print "Error - %d outside of string table limits" % val
- raise TpzDRMError('outside of string table limits')
- # sys.exit(-1)
-
- def getSize(self):
- return self.size
-
- def getPos(self):
- return self.pos
-
- def dumpDict(self):
- for i in xrange(self.size):
- print "%d %s %s" % (i, convert(i), self.stable[i])
- return
-
-# parses the xml snippets that are represented by each page*.dat file.
-# also parses the other0.dat file - the main stylesheet
-# and information used to inject the xml snippets into page*.dat files
-
-class PageParser(object):
- def __init__(self, filename, dict, debug, flat_xml):
- self.fo = file(filename,'rb')
- self.id = os.path.basename(filename).replace('.dat','')
- self.dict = dict
- self.debug = debug
- self.flat_xml = flat_xml
- self.tagpath = []
- self.doc = []
- self.snippetList = []
-
-
- # hash table used to enable the decoding process
- # This has all been developed by trial and error so it may still have omissions or
- # contain errors
- # Format:
- # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
-
- token_tags = {
- 'x' : (1, 'scalar_number', 0, 0),
- 'y' : (1, 'scalar_number', 0, 0),
- 'h' : (1, 'scalar_number', 0, 0),
- 'w' : (1, 'scalar_number', 0, 0),
- 'firstWord' : (1, 'scalar_number', 0, 0),
- 'lastWord' : (1, 'scalar_number', 0, 0),
- 'rootID' : (1, 'scalar_number', 0, 0),
- 'stemID' : (1, 'scalar_number', 0, 0),
- 'type' : (1, 'scalar_text', 0, 0),
-
- 'info' : (0, 'number', 1, 0),
-
- 'info.word' : (0, 'number', 1, 1),
- 'info.word.ocrText' : (1, 'text', 0, 0),
- 'info.word.firstGlyph' : (1, 'raw', 0, 0),
- 'info.word.lastGlyph' : (1, 'raw', 0, 0),
- 'info.word.bl' : (1, 'raw', 0, 0),
- 'info.word.link_id' : (1, 'number', 0, 0),
-
- 'glyph' : (0, 'number', 1, 1),
- 'glyph.x' : (1, 'number', 0, 0),
- 'glyph.y' : (1, 'number', 0, 0),
- 'glyph.glyphID' : (1, 'number', 0, 0),
-
- 'dehyphen' : (0, 'number', 1, 1),
- 'dehyphen.rootID' : (1, 'number', 0, 0),
- 'dehyphen.stemID' : (1, 'number', 0, 0),
- 'dehyphen.stemPage' : (1, 'number', 0, 0),
- 'dehyphen.sh' : (1, 'number', 0, 0),
-
- 'links' : (0, 'number', 1, 1),
- 'links.page' : (1, 'number', 0, 0),
- 'links.rel' : (1, 'number', 0, 0),
- 'links.row' : (1, 'number', 0, 0),
- 'links.title' : (1, 'text', 0, 0),
- 'links.href' : (1, 'text', 0, 0),
- 'links.type' : (1, 'text', 0, 0),
-
- 'paraCont' : (0, 'number', 1, 1),
- 'paraCont.rootID' : (1, 'number', 0, 0),
- 'paraCont.stemID' : (1, 'number', 0, 0),
- 'paraCont.stemPage' : (1, 'number', 0, 0),
-
- 'paraStems' : (0, 'number', 1, 1),
- 'paraStems.stemID' : (1, 'number', 0, 0),
-
- 'wordStems' : (0, 'number', 1, 1),
- 'wordStems.stemID' : (1, 'number', 0, 0),
-
- 'empty' : (1, 'snippets', 1, 0),
-
- 'page' : (1, 'snippets', 1, 0),
- 'page.pageid' : (1, 'scalar_text', 0, 0),
- 'page.pagelabel' : (1, 'scalar_text', 0, 0),
- 'page.type' : (1, 'scalar_text', 0, 0),
- 'page.h' : (1, 'scalar_number', 0, 0),
- 'page.w' : (1, 'scalar_number', 0, 0),
- 'page.startID' : (1, 'scalar_number', 0, 0),
-
- 'group' : (1, 'snippets', 1, 0),
- 'group.type' : (1, 'scalar_text', 0, 0),
- 'group._tag' : (1, 'scalar_text', 0, 0),
-
- 'region' : (1, 'snippets', 1, 0),
- 'region.type' : (1, 'scalar_text', 0, 0),
- 'region.x' : (1, 'scalar_number', 0, 0),
- 'region.y' : (1, 'scalar_number', 0, 0),
- 'region.h' : (1, 'scalar_number', 0, 0),
- 'region.w' : (1, 'scalar_number', 0, 0),
-
- 'empty_text_region' : (1, 'snippets', 1, 0),
-
- 'img' : (1, 'snippets', 1, 0),
- 'img.x' : (1, 'scalar_number', 0, 0),
- 'img.y' : (1, 'scalar_number', 0, 0),
- 'img.h' : (1, 'scalar_number', 0, 0),
- 'img.w' : (1, 'scalar_number', 0, 0),
- 'img.src' : (1, 'scalar_number', 0, 0),
- 'img.color_src' : (1, 'scalar_number', 0, 0),
-
- 'paragraph' : (1, 'snippets', 1, 0),
- 'paragraph.class' : (1, 'scalar_text', 0, 0),
- 'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
- 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
- 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
- 'paragraph.gridSize' : (1, 'scalar_number', 0, 0),
- 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0),
- 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0),
- 'paragraph.gridBeginCenter' : (1, 'scalar_number', 0, 0),
- 'paragraph.gridEndCenter' : (1, 'scalar_number', 0, 0),
-
-
- 'word_semantic' : (1, 'snippets', 1, 1),
- 'word_semantic.type' : (1, 'scalar_text', 0, 0),
- 'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
- 'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
-
- 'word' : (1, 'snippets', 1, 0),
- 'word.type' : (1, 'scalar_text', 0, 0),
- 'word.class' : (1, 'scalar_text', 0, 0),
- 'word.firstGlyph' : (1, 'scalar_number', 0, 0),
- 'word.lastGlyph' : (1, 'scalar_number', 0, 0),
-
- '_span' : (1, 'snippets', 1, 0),
- '_span.firstWord' : (1, 'scalar_number', 0, 0),
- '_span.lastWord' : (1, 'scalar_number', 0, 0),
- '_span.gridSize' : (1, 'scalar_number', 0, 0),
- '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0),
- '_span.gridTopCenter' : (1, 'scalar_number', 0, 0),
- '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
- '_span.gridEndCenter' : (1, 'scalar_number', 0, 0),
-
- 'span' : (1, 'snippets', 1, 0),
- 'span.firstWord' : (1, 'scalar_number', 0, 0),
- 'span.lastWord' : (1, 'scalar_number', 0, 0),
- 'span.gridSize' : (1, 'scalar_number', 0, 0),
- 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0),
- 'span.gridTopCenter' : (1, 'scalar_number', 0, 0),
- 'span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
- 'span.gridEndCenter' : (1, 'scalar_number', 0, 0),
-
- 'extratokens' : (1, 'snippets', 1, 0),
- 'extratokens.type' : (1, 'scalar_text', 0, 0),
- 'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
- 'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0),
-
- 'glyph.h' : (1, 'number', 0, 0),
- 'glyph.w' : (1, 'number', 0, 0),
- 'glyph.use' : (1, 'number', 0, 0),
- 'glyph.vtx' : (1, 'number', 0, 1),
- 'glyph.len' : (1, 'number', 0, 1),
- 'glyph.dpi' : (1, 'number', 0, 0),
- 'vtx' : (0, 'number', 1, 1),
- 'vtx.x' : (1, 'number', 0, 0),
- 'vtx.y' : (1, 'number', 0, 0),
- 'len' : (0, 'number', 1, 1),
- 'len.n' : (1, 'number', 0, 0),
-
- 'book' : (1, 'snippets', 1, 0),
- 'version' : (1, 'snippets', 1, 0),
- 'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
- 'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
- 'version.Schema_id' : (1, 'scalar_text', 0, 0),
- 'version.Schema_version' : (1, 'scalar_text', 0, 0),
- 'version.Topaz_version' : (1, 'scalar_text', 0, 0),
- 'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
- 'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
- 'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
- 'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
- 'version.chapterheaders' : (1, 'scalar_text', 0, 0),
- 'version.creation_date' : (1, 'scalar_text', 0, 0),
- 'version.header_footer' : (1, 'scalar_text', 0, 0),
- 'version.init_from_ocr' : (1, 'scalar_text', 0, 0),
- 'version.letter_insertion' : (1, 'scalar_text', 0, 0),
- 'version.xmlinj_convert' : (1, 'scalar_text', 0, 0),
- 'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0),
- 'version.xmlinj_transform' : (1, 'scalar_text', 0, 0),
- 'version.findlists' : (1, 'scalar_text', 0, 0),
- 'version.page_num' : (1, 'scalar_text', 0, 0),
- 'version.page_type' : (1, 'scalar_text', 0, 0),
- 'version.bad_text' : (1, 'scalar_text', 0, 0),
- 'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
- 'version.margins' : (1, 'scalar_text', 0, 0),
- 'version.staggered_lines' : (1, 'scalar_text', 0, 0),
- 'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
- 'version.toc' : (1, 'scalar_text', 0, 0),
-
- 'stylesheet' : (1, 'snippets', 1, 0),
- 'style' : (1, 'snippets', 1, 0),
- 'style._tag' : (1, 'scalar_text', 0, 0),
- 'style.type' : (1, 'scalar_text', 0, 0),
- 'style._parent_type' : (1, 'scalar_text', 0, 0),
- 'style.class' : (1, 'scalar_text', 0, 0),
- 'style._after_class' : (1, 'scalar_text', 0, 0),
- 'rule' : (1, 'snippets', 1, 0),
- 'rule.attr' : (1, 'scalar_text', 0, 0),
- 'rule.value' : (1, 'scalar_text', 0, 0),
-
- 'original' : (0, 'number', 1, 1),
- 'original.pnum' : (1, 'number', 0, 0),
- 'original.pid' : (1, 'text', 0, 0),
- 'pages' : (0, 'number', 1, 1),
- 'pages.ref' : (1, 'number', 0, 0),
- 'pages.id' : (1, 'number', 0, 0),
- 'startID' : (0, 'number', 1, 1),
- 'startID.page' : (1, 'number', 0, 0),
- 'startID.id' : (1, 'number', 0, 0),
-
- }
-
-
- # full tag path record keeping routines
- def tag_push(self, token):
- self.tagpath.append(token)
- def tag_pop(self):
- if len(self.tagpath) > 0 :
- self.tagpath.pop()
- def tagpath_len(self):
- return len(self.tagpath)
- def get_tagpath(self, i):
- cnt = len(self.tagpath)
- if i < cnt : result = self.tagpath[i]
- for j in xrange(i+1, cnt) :
- result += '.' + self.tagpath[j]
- return result
-
-
- # list of absolute command byte values values that indicate
- # various types of loop meachanisms typically used to generate vectors
-
- cmd_list = (0x76, 0x76)
-
- # peek at and return 1 byte that is ahead by i bytes
- def peek(self, aheadi):
- c = self.fo.read(aheadi)
- if (len(c) == 0):
- return None
- self.fo.seek(-aheadi,1)
- c = c[-1:]
- return ord(c)
-
-
- # get the next value from the file being processed
- def getNext(self):
- nbyte = self.peek(1);
- if (nbyte == None):
- return None
- val = readEncodedNumber(self.fo)
- return val
-
-
- # format an arg by argtype
- def formatArg(self, arg, argtype):
- if (argtype == 'text') or (argtype == 'scalar_text') :
- result = self.dict.lookup(arg)
- elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') :
- result = arg
- elif (argtype == 'snippets') :
- result = arg
- else :
- print "Error Unknown argtype %s" % argtype
- sys.exit(-2)
- return result
-
-
- # process the next tag token, recursively handling subtags,
- # arguments, and commands
- def procToken(self, token):
-
- known_token = False
- self.tag_push(token)
-
- if self.debug : print 'Processing: ', self.get_tagpath(0)
- cnt = self.tagpath_len()
- for j in xrange(cnt):
- tkn = self.get_tagpath(j)
- if tkn in self.token_tags :
- num_args = self.token_tags[tkn][0]
- argtype = self.token_tags[tkn][1]
- subtags = self.token_tags[tkn][2]
- splcase = self.token_tags[tkn][3]
- ntags = -1
- known_token = True
- break
- if known_token :
-
- # handle subtags if present
- subtagres = []
- if (splcase == 1):
- # this type of tag uses of escape marker 0x74 indicate subtag count
- if self.peek(1) == 0x74:
- skip = readEncodedNumber(self.fo)
- subtags = 1
- num_args = 0
-
- if (subtags == 1):
- ntags = readEncodedNumber(self.fo)
- if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
- for j in xrange(ntags):
- val = readEncodedNumber(self.fo)
- subtagres.append(self.procToken(self.dict.lookup(val)))
-
- # arguments can be scalars or vectors of text or numbers
- argres = []
- if num_args > 0 :
- firstarg = self.peek(1)
- if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'):
- # single argument is a variable length vector of data
- arg = readEncodedNumber(self.fo)
- argres = self.decodeCMD(arg,argtype)
- else :
- # num_arg scalar arguments
- for i in xrange(num_args):
- argres.append(self.formatArg(readEncodedNumber(self.fo), argtype))
-
- # build the return tag
- result = []
- tkn = self.get_tagpath(0)
- result.append(tkn)
- result.append(subtagres)
- result.append(argtype)
- result.append(argres)
- self.tag_pop()
- return result
-
- # all tokens that need to be processed should be in the hash
- # table if it may indicate a problem, either new token
- # or an out of sync condition
- else:
- result = []
- if (self.debug):
- print 'Unknown Token:', token
- self.tag_pop()
- return result
-
-
- # special loop used to process code snippets
- # it is NEVER used to format arguments.
- # builds the snippetList
- def doLoop72(self, argtype):
- cnt = readEncodedNumber(self.fo)
- if self.debug :
- result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n'
- result += 'of the document is indicated by snippet number sets at the\n'
- result += 'end of each snippet. \n'
- print result
- for i in xrange(cnt):
- if self.debug: print 'Snippet:',str(i)
- snippet = []
- snippet.append(i)
- val = readEncodedNumber(self.fo)
- snippet.append(self.procToken(self.dict.lookup(val)))
- self.snippetList.append(snippet)
- return
-
-
-
- # general loop code gracisouly submitted by "skindle" - thank you!
- def doLoop76Mode(self, argtype, cnt, mode):
- result = []
- adj = 0
- if mode & 1:
- adj = readEncodedNumber(self.fo)
- mode = mode >> 1
- x = []
- for i in xrange(cnt):
- x.append(readEncodedNumber(self.fo) - adj)
- for i in xrange(mode):
- for j in xrange(1, cnt):
- x[j] = x[j] + x[j - 1]
- for i in xrange(cnt):
- result.append(self.formatArg(x[i],argtype))
- return result
-
-
- # dispatches loop commands bytes with various modes
- # The 0x76 style loops are used to build vectors
-
- # This was all derived by trial and error and
- # new loop types may exist that are not handled here
- # since they did not appear in the test cases
-
- def decodeCMD(self, cmd, argtype):
- if (cmd == 0x76):
-
- # loop with cnt, and mode to control loop styles
- cnt = readEncodedNumber(self.fo)
- mode = readEncodedNumber(self.fo)
-
- if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
- return self.doLoop76Mode(argtype, cnt, mode)
-
- if self.dbug: print "Unknown command", cmd
- result = []
- return result
-
-
-
- # add full tag path to injected snippets
- def updateName(self, tag, prefix):
- name = tag[0]
- subtagList = tag[1]
- argtype = tag[2]
- argList = tag[3]
- nname = prefix + '.' + name
- nsubtaglist = []
- for j in subtagList:
- nsubtaglist.append(self.updateName(j,prefix))
- ntag = []
- ntag.append(nname)
- ntag.append(nsubtaglist)
- ntag.append(argtype)
- ntag.append(argList)
- return ntag
-
-
-
- # perform depth first injection of specified snippets into this one
- def injectSnippets(self, snippet):
- snipno, tag = snippet
- name = tag[0]
- subtagList = tag[1]
- argtype = tag[2]
- argList = tag[3]
- nsubtagList = []
- if len(argList) > 0 :
- for j in argList:
- asnip = self.snippetList[j]
- aso, atag = self.injectSnippets(asnip)
- atag = self.updateName(atag, name)
- nsubtagList.append(atag)
- argtype='number'
- argList=[]
- if len(nsubtagList) > 0 :
- subtagList.extend(nsubtagList)
- tag = []
- tag.append(name)
- tag.append(subtagList)
- tag.append(argtype)
- tag.append(argList)
- snippet = []
- snippet.append(snipno)
- snippet.append(tag)
- return snippet
-
-
-
- # format the tag for output
- def formatTag(self, node):
- name = node[0]
- subtagList = node[1]
- argtype = node[2]
- argList = node[3]
- fullpathname = name.split('.')
- nodename = fullpathname.pop()
- ilvl = len(fullpathname)
- indent = ' ' * (3 * ilvl)
- rlst = []
- rlst.append(indent + '<' + nodename + '>')
- if len(argList) > 0:
- alst = []
- for j in argList:
- if (argtype == 'text') or (argtype == 'scalar_text') :
- alst.append(j + '|')
- else :
- alst.append(str(j) + ',')
- argres = "".join(alst)
- argres = argres[0:-1]
- if argtype == 'snippets' :
- rlst.append('snippets:' + argres)
- else :
- rlst.append(argres)
- if len(subtagList) > 0 :
- rlst.append('\n')
- for j in subtagList:
- if len(j) > 0 :
- rlst.append(self.formatTag(j))
- rlst.append(indent + '</' + nodename + '>\n')
- else:
- rlst.append('</' + nodename + '>\n')
- return "".join(rlst)
-
-
- # flatten tag
- def flattenTag(self, node):
- name = node[0]
- subtagList = node[1]
- argtype = node[2]
- argList = node[3]
- rlst = []
- rlst.append(name)
- if (len(argList) > 0):
- alst = []
- for j in argList:
- if (argtype == 'text') or (argtype == 'scalar_text') :
- alst.append(j + '|')
- else :
- alst.append(str(j) + '|')
- argres = "".join(alst)
- argres = argres[0:-1]
- if argtype == 'snippets' :
- rlst.append('.snippets=' + argres)
- else :
- rlst.append('=' + argres)
- rlst.append('\n')
- for j in subtagList:
- if len(j) > 0 :
- rlst.append(self.flattenTag(j))
- return "".join(rlst)
-
-
- # reduce create xml output
- def formatDoc(self, flat_xml):
- rlst = []
- for j in self.doc :
- if len(j) > 0:
- if flat_xml:
- rlst.append(self.flattenTag(j))
- else:
- rlst.append(self.formatTag(j))
- result = "".join(rlst)
- if self.debug : print result
- return result
-
-
-
- # main loop - parse the page.dat files
- # to create structured document and snippets
-
- # FIXME: value at end of magic appears to be a subtags count
- # but for what? For now, inject an 'info" tag as it is in
- # every dictionary and seems close to what is meant
- # The alternative is to special case the last _ "0x5f" to mean something
-
- def process(self):
-
- # peek at the first bytes to see what type of file it is
- magic = self.fo.read(9)
- if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
- first_token = 'info'
- elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
- skip = self.fo.read(2)
- first_token = 'info'
- elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
- first_token = 'info'
- elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
- skip = self.fo.read(3)
- first_token = 'info'
- else :
- # other0.dat file
- first_token = None
- self.fo.seek(-9,1)
-
-
- # main loop to read and build the document tree
- while True:
-
- if first_token != None :
- # use "inserted" first token 'info' for page and glyph files
- tag = self.procToken(first_token)
- if len(tag) > 0 :
- self.doc.append(tag)
- first_token = None
-
- v = self.getNext()
- if (v == None):
+# interface with Windows OS Routines
+class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+DataBlob_p = POINTER(DataBlob)
+
+
+def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+GetSystemDirectory = GetSystemDirectory()
+
+def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path = GetSystemDirectory().split('\\')[0] + '\\'):
+ vsn = c_uint(0)
+ GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
+ return str(vsn.value)
+ return GetVolumeSerialNumber
+GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+def GetIDString():
+ return GetVolumeSerialNumber()
+
+def getLastError():
+ GetLastError = kernel32.GetLastError
+ GetLastError.argtypes = None
+ GetLastError.restype = c_uint
+ def getLastError():
+ return GetLastError()
+ return getLastError
+getLastError = getLastError()
+
+def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(2)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ errcd = getLastError()
+ if errcd == 234:
+ # bad wine implementation up through wine 1.3.21
+ return "AlternateUserName"
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+GetUserName = GetUserName()
+
+def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy, flags):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, flags, byref(outdata)):
+ # raise DrmException("Failed to Unprotect Data")
+ return 'failed'
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+CryptUnprotectData = CryptUnprotectData()
+
+
+# Locate all of the kindle-info style files and return as list
+def getKindleInfoFiles(kInfoFiles):
+ regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
+ path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+
+ # some 64 bit machines do not have the proper registry key for some reason
+ # or the pythonn interface to the 32 vs 64 bit registry is broken
+ if 'LOCALAPPDATA' in os.environ.keys():
+ path = os.environ['LOCALAPPDATA']
+
+ print "searching for kinfoFiles in ", path
+
+ # first look for older kindle-info files
+ kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info'
+ if not os.path.isfile(kinfopath):
+ print('No kindle.info files have not been found.')
+ else:
+ kInfoFiles.append(kinfopath)
+
+ # now look for newer (K4PC 1.5.0 and later rainier.2.1.1.kinf file
+
+ kinfopath = path +'\\Amazon\\Kindle For PC\\storage\\rainier.2.1.1.kinf'
+ if not os.path.isfile(kinfopath):
+ print('No K4PC 1.5.X .kinf files have not been found.')
+ else:
+ kInfoFiles.append(kinfopath)
+
+ # now look for even newer (K4PC 1.6.0 and later) rainier.2.1.1.kinf file
+ kinfopath = path +'\\Amazon\\Kindle\\storage\\rainier.2.1.1.kinf'
+ if not os.path.isfile(kinfopath):
+ print('No K4PC 1.6.X .kinf files have not been found.')
+ else:
+ kInfoFiles.append(kinfopath)
+
+ # now look for even newer (K4PC 1.9.0 and later) .kinf2011 file
+ kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011'
+ if not os.path.isfile(kinfopath):
+ print('No K4PC 1.9.X .kinf files have not been found.')
+ else:
+ kInfoFiles.append(kinfopath)
+
+ return kInfoFiles
+
+
+# determine type of kindle info provided and return a
+# database of keynames and values
+def getDBfromFile(kInfoFile):
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber", "max_date", "SIGVERIF"]
+ DB = {}
+ cnt = 0
+ infoReader = open(kInfoFile, 'r')
+ hdr = infoReader.read(1)
+ data = infoReader.read()
+
+ if data.find('{') != -1 :
+
+ # older style kindle-info file
+ items = data.split('{')
+ for item in items:
+ if item != '':
+ keyhash, rawdata = item.split(':')
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,charMap2) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+ encryptedValue = decode(rawdata,charMap2)
+ DB[keyname] = CryptUnprotectData(encryptedValue, "", 0)
+ cnt = cnt + 1
+ if cnt == 0:
+ DB = None
+ return DB
+
+ if hdr == '/':
+ # else rainier-2-1-1 .kinf file
+ # the .kinf file uses "/" to separate it into records
+ # so remove the trailing "/" to make it easy to use split
+ data = data[:-1]
+ items = data.split('/')
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+
+ # the raw keyhash string is used to create entropy for the actual
+ # CryptProtectData Blob that represents that keys contents
+ entropy = SHA1(keyhash)
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,charMap5) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+ # the charMap5 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using charMap5 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the charMap5 encoded contents seems to be:
+ # len(contents)-largest prime number <= int(len(content)/3)
+ # (in other words split "about" 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by charMap5
+ encdata = "".join(edlst)
+ contlen = len(encdata)
+ noffset = contlen - primes(int(contlen/3))[-1]
+
+ # now properly split and recombine
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using Map5 to get the CryptProtect Data
+ encryptedValue = decode(encdata,charMap5)
+ DB[keyname] = CryptUnprotectData(encryptedValue, entropy, 1)
+ cnt = cnt + 1
+
+ if cnt == 0:
+ DB = None
+ return DB
+
+ # else newest .kinf2011 style .kinf file
+ # the .kinf file uses "/" to separate it into records
+ # so remove the trailing "/" to make it easy to use split
+ # need to put back the first char read because it it part
+ # of the added entropy blob
+ data = hdr + data[:-1]
+ items = data.split('/')
+
+ # starts with and encoded and encrypted header blob
+ headerblob = items.pop(0)
+ encryptedValue = decode(headerblob, testMap1)
+ cleartext = UnprotectHeaderData(encryptedValue)
+ # now extract the pieces that form the added entropy
+ pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
+ for m in re.finditer(pattern, cleartext):
+ added_entropy = m.group(2) + m.group(4)
+
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+
+ # the sha1 of raw keyhash string is used to create entropy along
+ # with the added entropy provided above from the headerblob
+ entropy = SHA1(keyhash) + added_entropy
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ # key names now use the new testMap8 encoding
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,testMap8) == keyhash:
+ keyname = name
break
- if (v == 0x72):
- self.doLoop72('number')
- elif (v > 0) and (v < self.dict.getSize()) :
- tag = self.procToken(self.dict.lookup(v))
- if len(tag) > 0 :
- self.doc.append(tag)
- else:
- if self.debug:
- print "Main Loop: Unknown value: %x" % v
- if (v == 0):
- if (self.peek(1) == 0x5f):
- skip = self.fo.read(1)
- first_token = 'info'
-
- # now do snippet injection
- if len(self.snippetList) > 0 :
- if self.debug : print 'Injecting Snippets:'
- snippet = self.injectSnippets(self.snippetList[0])
- snipno = snippet[0]
- tag_add = snippet[1]
- if self.debug : print self.formatTag(tag_add)
- if len(tag_add) > 0:
- self.doc.append(tag_add)
-
- # handle generation of xml output
- xmlpage = self.formatDoc(self.flat_xml)
-
- return xmlpage
-
-
-def fromData(dict, fname):
- flat_xml = True
- debug = False
- pp = PageParser(fname, dict, debug, flat_xml)
- xmlpage = pp.process()
- return xmlpage
-
-def getXML(dict, fname):
- flat_xml = False
- debug = False
- pp = PageParser(fname, dict, debug, flat_xml)
- xmlpage = pp.process()
- return xmlpage
-
-def usage():
- print 'Usage: '
- print ' convert2xml.py dict0000.dat infile.dat '
- print ' '
- print ' Options:'
- print ' -h print this usage help message '
- print ' -d turn on debug output to check for potential errors '
- print ' --flat-xml output the flattened xml page description only '
- print ' '
- print ' This program will attempt to convert a page*.dat file or '
- print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. '
- print ' '
- print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump '
- print ' the *.dat files from a Topaz format e-book.'
-
-#
-# Main
-#
-
-def main(argv):
- dictFile = ""
- pageFile = ""
- debug = False
- flat_xml = False
- printOutput = False
- if len(argv) == 0:
- printOutput = True
- argv = sys.argv
-
- try:
- opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"])
-
- except getopt.GetoptError, err:
-
- # print help information and exit:
- print str(err) # will print something like "option -a not recognized"
- usage()
- sys.exit(2)
-
- if len(opts) == 0 and len(args) == 0 :
- usage()
- sys.exit(2)
-
- for o, a in opts:
- if o =="-d":
- debug=True
- if o =="-h":
- usage()
- sys.exit(0)
- if o =="--flat-xml":
- flat_xml = True
-
- dictFile, pageFile = args[0], args[1]
-
- # read in the string table dictionary
- dict = Dictionary(dictFile)
- # dict.dumpDict()
-
- # create a page parser
- pp = PageParser(pageFile, dict, debug, flat_xml)
-
- xmlpage = pp.process()
-
- if printOutput:
- print xmlpage
- return 0
-
- return xmlpage
-
-if __name__ == '__main__':
- sys.exit(main(''))
+ # the testMap8 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using testMap8 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the testMap8 encoded contents seems to be:
+ # len(contents)-largest prime number <= int(len(content)/3)
+ # (in other words split "about" 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by testMap8
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ encdata = "".join(edlst)
+ contlen = len(encdata)
+ noffset = contlen - primes(int(contlen/3))[-1]
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using new testMap8 to get the original CryptProtect Data
+ encryptedValue = decode(encdata,testMap8)
+ cleartext = CryptUnprotectData(encryptedValue, entropy, 1)
+ DB[keyname] = cleartext
+ cnt = cnt + 1
+
+ if cnt == 0:
+ DB = None
+ return DB
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+#!/usr/bin/python
+#
+# This is a python script. You need a Python interpreter to run it.
+# For example, ActiveState Python, which exists for windows.
+#
+# Changelog
+# 0.01 - Initial version
+# 0.02 - Huffdic compressed books were not properly decrypted
+# 0.03 - Wasn't checking MOBI header length
+# 0.04 - Wasn't sanity checking size of data record
+# 0.05 - It seems that the extra data flags take two bytes not four
+# 0.06 - And that low bit does mean something after all :-)
+# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
+# 0.08 - ...and also not in Mobi header version < 6
+# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
+# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
+# import filter it works when importing unencrypted files.
+# Also now handles encrypted files that don't need a specific PID.
+# 0.11 - use autoflushed stdout and proper return values
+# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
+# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
+# and extra blank lines, converted CR/LF pairs at ends of each line,
+# and other cosmetic fixes.
+# 0.14 - Working out when the extra data flags are present has been problematic
+# Versions 7 through 9 have tried to tweak the conditions, but have been
+# only partially successful. Closer examination of lots of sample
+# files reveals that a confusion has arisen because trailing data entries
+# are not encrypted, but it turns out that the multibyte entries
+# in utf8 file are encrypted. (Although neither kind gets compressed.)
+# This knowledge leads to a simplification of the test for the
+# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
+# 0.15 - Now outputs 'heartbeat', and is also quicker for long files.
+# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
+# 0.17 - added modifications to support its use as an imported python module
+# both inside calibre and also in other places (ie K4DeDRM tools)
+# 0.17a- disabled the standalone plugin feature since a plugin can not import
+# a plugin
+# 0.18 - It seems that multibyte entries aren't encrypted in a v7 file...
+# Removed the disabled Calibre plug-in code
+# Permit use of 8-digit PIDs
+# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
+# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
+# 0.21 - Added support for multiple pids
+# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface
+# 0.23 - fixed problem with older files with no EXTH section
+# 0.24 - add support for type 1 encryption and 'TEXtREAd' books as well
+# 0.25 - Fixed support for 'BOOKMOBI' type 1 encryption
+# 0.26 - Now enables Text-To-Speech flag and sets clipping limit to 100%
+# 0.27 - Correct pid metadata token generation to match that used by skindle (Thank You Bart!)
+# 0.28 - slight additional changes to metadata token generation (None -> '')
+# 0.29 - It seems that the ideas about when multibyte trailing characters were
+# included in the encryption were wrong. They are for DOC compressed
+# files, but they are not for HUFF/CDIC compress files!
+# 0.30 - Modified interface slightly to work better with new calibre plugin style
+# 0.31 - The multibyte encrytion info is true for version 7 files too.
+# 0.32 - Added support for "Print Replica" Kindle ebooks
+# 0.33 - Performance improvements for large files (concatenation)
+# 0.34 - Performance improvements in decryption (libalfcrypto)
+# 0.35 - add interface to get mobi_version
+
+__version__ = '0.35'
import sys
-import csv
+
+class Unbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ def write(self, data):
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+sys.stdout=Unbuffered(sys.stdout)
+
import os
-import getopt
-from struct import pack
-from struct import unpack
-
-
-class PParser(object):
- def __init__(self, gd, flatxml, meta_array):
- self.gd = gd
- self.flatdoc = flatxml.split('\n')
- self.docSize = len(self.flatdoc)
- self.temp = []
-
- self.ph = -1
- self.pw = -1
- startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
- for p in startpos:
- (name, argres) = self.lineinDoc(p)
- self.ph = max(self.ph, int(argres))
- startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
- for p in startpos:
- (name, argres) = self.lineinDoc(p)
- self.pw = max(self.pw, int(argres))
-
- if self.ph <= 0:
- self.ph = int(meta_array.get('pageHeight', '11000'))
- if self.pw <= 0:
- self.pw = int(meta_array.get('pageWidth', '8500'))
-
- res = []
- startpos = self.posinDoc('info.glyph.x')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.x', p)
- res.extend(argres)
- self.gx = res
-
- res = []
- startpos = self.posinDoc('info.glyph.y')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.y', p)
- res.extend(argres)
- self.gy = res
-
- res = []
- startpos = self.posinDoc('info.glyph.glyphID')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.glyphID', p)
- res.extend(argres)
- self.gid = res
-
-
- # return tag at line pos in document
- def lineinDoc(self, pos) :
- if (pos >= 0) and (pos < self.docSize) :
- item = self.flatdoc[pos]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- return name, argres
-
- # find tag in doc if within pos to end inclusive
- def findinDoc(self, tagpath, pos, end) :
- result = None
- if end == -1 :
- end = self.docSize
+import struct
+import binascii
+from alfcrypto import Pukall_Cipher
+
+class DrmException(Exception):
+ pass
+
+
+#
+# MobiBook Utility Routines
+#
+
+# Implementation of Pukall Cipher 1
+def PC1(key, src, decryption=True):
+ return Pukall_Cipher().PC1(key,src,decryption)
+# sum1 = 0;
+# sum2 = 0;
+# keyXorVal = 0;
+# if len(key)!=16:
+# print "Bad key length!"
+# return None
+# wkey = []
+# for i in xrange(8):
+# wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
+# dst = ""
+# for i in xrange(len(src)):
+# temp1 = 0;
+# byteXorVal = 0;
+# for j in xrange(8):
+# temp1 ^= wkey[j]
+# sum2 = (sum2+j)*20021 + sum1
+# sum1 = (temp1*346)&0xFFFF
+# sum2 = (sum2+sum1)&0xFFFF
+# temp1 = (temp1*20021+1)&0xFFFF
+# byteXorVal ^= temp1 ^ sum2
+# curByte = ord(src[i])
+# if not decryption:
+# keyXorVal = curByte * 257;
+# curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
+# if decryption:
+# keyXorVal = curByte * 257;
+# for j in xrange(8):
+# wkey[j] ^= keyXorVal;
+# dst+=chr(curByte)
+# return dst
+
+def checksumPid(s):
+ letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+ crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(letters)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += letters[pos%l]
+ crc >>= 8
+ return res
+
+def getSizeOfTrailingDataEntries(ptr, size, flags):
+ def getSizeOfTrailingDataEntry(ptr, size):
+ bitpos, result = 0, 0
+ if size <= 0:
+ return result
+ while True:
+ v = ord(ptr[size-1])
+ result |= (v & 0x7F) << bitpos
+ bitpos += 7
+ size -= 1
+ if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
+ return result
+ num = 0
+ testflags = flags >> 1
+ while testflags:
+ if testflags & 1:
+ num += getSizeOfTrailingDataEntry(ptr, size - num)
+ testflags >>= 1
+ # Check the low bit to see if there's multibyte data present.
+ # if multibyte data is included in the encryped data, we'll
+ # have already cleared this flag.
+ if flags & 1:
+ num += (ord(ptr[size - num - 1]) & 0x3) + 1
+ return num
+
+
+
+class MobiBook:
+ def loadSection(self, section):
+ if (section + 1 == self.num_sections):
+ endoff = len(self.data_file)
+ else:
+ endoff = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ return self.data_file[off:endoff]
+
+ def __init__(self, infile):
+ print ('MobiDeDrm v%(__version__)s. '
+ 'Copyright 2008-2011 The Dark Reverser et al.' % globals())
+
+ # initial sanity check on file
+ self.data_file = file(infile, 'rb').read()
+ self.mobi_data = ''
+ self.header = self.data_file[0:78]
+ if self.header[0x3C:0x3C+8] != 'BOOKMOBI' and self.header[0x3C:0x3C+8] != 'TEXtREAd':
+ raise DrmException("invalid file format")
+ self.magic = self.header[0x3C:0x3C+8]
+ self.crypto_type = -1
+
+ # build up section offset and flag info
+ self.num_sections, = struct.unpack('>H', self.header[76:78])
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+
+ # parse information from section 0
+ self.sect = self.loadSection(0)
+ self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
+ self.compression, = struct.unpack('>H', self.sect[0x0:0x0+2])
+
+ if self.magic == 'TEXtREAd':
+ print "Book has format: ", self.magic
+ self.extra_data_flags = 0
+ self.mobi_length = 0
+ self.mobi_version = -1
+ self.meta_array = {}
+ return
+ self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
+ self.mobi_codepage, = struct.unpack('>L',self.sect[0x1c:0x20])
+ self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
+ print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length)
+ self.extra_data_flags = 0
+ if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
+ self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
+ print "Extra Data Flags = %d" % self.extra_data_flags
+ if (self.compression != 17480):
+ # multibyte utf8 data is included in the encryption for PalmDoc compression
+ # so clear that byte so that we leave it to be decrypted.
+ self.extra_data_flags &= 0xFFFE
+
+ # if exth region exists parse it for metadata array
+ self.meta_array = {}
+ try:
+ exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
+ exth = 'NONE'
+ if exth_flag & 0x40:
+ exth = self.sect[16 + self.mobi_length:]
+ if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
+ nitems, = struct.unpack('>I', exth[8:12])
+ pos = 12
+ for i in xrange(nitems):
+ type, size = struct.unpack('>II', exth[pos: pos + 8])
+ content = exth[pos + 8: pos + size]
+ self.meta_array[type] = content
+ # reset the text to speech flag and clipping limit, if present
+ if type == 401 and size == 9:
+ # set clipping limit to 100%
+ self.patchSection(0, "\144", 16 + self.mobi_length + pos + 8)
+ elif type == 404 and size == 9:
+ # make sure text to speech is enabled
+ self.patchSection(0, "\0", 16 + self.mobi_length + pos + 8)
+ # print type, size, content, content.encode('hex')
+ pos += size
+ except:
+ self.meta_array = {}
+ pass
+ self.print_replica = False
+
+ def getBookTitle(self):
+ codec_map = {
+ 1252 : 'windows-1252',
+ 65001 : 'utf-8',
+ }
+ title = ''
+ if 503 in self.meta_array:
+ title = self.meta_array[503]
+ else :
+ toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
+ tend = toff + tlen
+ title = self.sect[toff:tend]
+ if title == '':
+ title = self.header[:32]
+ title = title.split("\0")[0]
+ codec = 'windows-1252'
+ if self.mobi_codepage in codec_map.keys():
+ codec = codec_map[self.mobi_codepage]
+ return unicode(title, codec).encode('utf-8')
+
+ def getPIDMetaInfo(self):
+ rec209 = ''
+ token = ''
+ if 209 in self.meta_array:
+ rec209 = self.meta_array[209]
+ data = rec209
+ # The 209 data comes in five byte groups. Interpret the last four bytes
+ # of each group as a big endian unsigned integer to get a key value
+ # if that key exists in the meta_array, append its contents to the token
+ for i in xrange(0,len(data),5):
+ val, = struct.unpack('>I',data[i+1:i+5])
+ sval = self.meta_array.get(val,'')
+ token += sval
+ return rec209, token
+
+ def patch(self, off, new):
+ self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
+
+ def patchSection(self, section, new, in_off = 0):
+ if (section + 1 == self.num_sections):
+ endoff = len(self.data_file)
else:
- end = min(self.docSize, end)
- foundat = -1
- for j in xrange(pos, end):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- if name.endswith(tagpath) :
- result = argres
- foundat = j
+ endoff = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ assert off + in_off + len(new) <= endoff
+ self.patch(off + in_off, new)
+
+ def parseDRM(self, data, count, pidlist):
+ found_key = None
+ keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
+ for pid in pidlist:
+ bigpid = pid.ljust(16,'\0')
+ temp_key = PC1(keyvec1, bigpid, False)
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ found_key = None
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver and (flags & 0x1F) == 1:
+ found_key = finalkey
+ break
+ if found_key != None:
break
- return foundat, result
-
- # return list of start positions for the tagpath
- def posinDoc(self, tagpath):
- startpos = []
- pos = 0
- res = ""
- while res != None :
- (foundpos, res) = self.findinDoc(tagpath, pos, -1)
- if res != None :
- startpos.append(foundpos)
- pos = foundpos + 1
- return startpos
-
- def getData(self, path):
- result = None
- cnt = len(self.flatdoc)
- for j in xrange(cnt):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
+ if not found_key:
+ # Then try the default encoding that doesn't require a PID
+ pid = "00000000"
+ temp_key = keyvec1
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver:
+ found_key = finalkey
+ break
+ return [found_key,pid]
+
+ def getMobiFile(self, outpath):
+ file(outpath,'wb').write(self.mobi_data)
+
+ def getMobiVersion(self):
+ return self.mobi_version
+
+ def getPrintReplica(self):
+ return self.print_replica
+
+ def processBook(self, pidlist):
+ crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
+ print 'Crypto Type is: ', crypto_type
+ self.crypto_type = crypto_type
+ if crypto_type == 0:
+ print "This book is not encrypted."
+ # we must still check for Print Replica
+ self.print_replica = (self.loadSection(1)[0:4] == '%MOP')
+ self.mobi_data = self.data_file
+ return
+ if crypto_type != 2 and crypto_type != 1:
+ raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
+ if 406 in self.meta_array:
+ data406 = self.meta_array[406]
+ val406, = struct.unpack('>Q',data406)
+ if val406 != 0:
+ raise DrmException("Cannot decode library or rented ebooks.")
+
+ goodpids = []
+ for pid in pidlist:
+ if len(pid)==10:
+ if checksumPid(pid[0:-2]) != pid:
+ print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
+ goodpids.append(pid[0:-2])
+ elif len(pid)==8:
+ goodpids.append(pid)
+
+ if self.crypto_type == 1:
+ t1_keyvec = "QDCVEPMU675RUBSZ"
+ if self.magic == 'TEXtREAd':
+ bookkey_data = self.sect[0x0E:0x0E+16]
+ elif self.mobi_version < 0:
+ bookkey_data = self.sect[0x90:0x90+16]
else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
-
- def getDataatPos(self, path, pos):
- result = None
- item = self.flatdoc[pos]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
+ bookkey_data = self.sect[self.mobi_length+16:self.mobi_length+32]
+ pid = "00000000"
+ found_key = PC1(t1_keyvec, bookkey_data)
+ else :
+ # calculate the keys
+ drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
+ if drm_count == 0:
+ raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
+ found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
+ if not found_key:
+ raise DrmException("No key found. Please report this failure for help.")
+ # kill the drm keys
+ self.patchSection(0, "\0" * drm_size, drm_ptr)
+ # kill the drm pointers
+ self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
+
+ if pid=="00000000":
+ print "File has default encryption, no specific PID."
else:
- name = item
- argres = []
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- if (name.endswith(path)):
- result = argres
- return result
-
- def getDataTemp(self, path):
- result = None
- cnt = len(self.temp)
- for j in xrange(cnt):
- item = self.temp[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- self.temp.pop(j)
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
-
- def getImages(self):
- result = []
- self.temp = self.flatdoc
- while (self.getDataTemp('img') != None):
- h = self.getDataTemp('img.h')[0]
- w = self.getDataTemp('img.w')[0]
- x = self.getDataTemp('img.x')[0]
- y = self.getDataTemp('img.y')[0]
- src = self.getDataTemp('img.src')[0]
- result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
- return result
-
- def getGlyphs(self):
- result = []
- if (self.gid != None) and (len(self.gid) > 0):
- glyphs = []
- for j in set(self.gid):
- glyphs.append(j)
- glyphs.sort()
- for gid in glyphs:
- id='id="gl%d"' % gid
- path = self.gd.lookup(id)
- if path:
- result.append(id + ' ' + path)
- return result
-
-
-def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
- mlst = []
- pp = PParser(gdict, flat_xml, meta_array)
- mlst.append('<?xml version="1.0" standalone="no"?>\n')
- if (raw):
- mlst.append('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- mlst.append('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
- mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
+ print "File is encoded with PID "+checksumPid(pid)+"."
+
+ # clear the crypto type
+ self.patchSection(0, "\0" * 2, 0xC)
+
+ # decrypt sections
+ print "Decrypting. Please wait . . .",
+ mobidataList = []
+ mobidataList.append(self.data_file[:self.sections[1][0]])
+ for i in xrange(1, self.records+1):
+ data = self.loadSection(i)
+ extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
+ if i%100 == 0:
+ print ".",
+ # print "record %d, extra_size %d" %(i,extra_size)
+ decoded_data = PC1(found_key, data[0:len(data) - extra_size])
+ if i==1:
+ self.print_replica = (decoded_data[0:4] == '%MOP')
+ mobidataList.append(decoded_data)
+ if extra_size > 0:
+ mobidataList.append(data[-extra_size:])
+ if self.num_sections > self.records+1:
+ mobidataList.append(self.data_file[self.sections[self.records+1][0]:])
+ self.mobi_data = "".join(mobidataList)
+ print "done"
+ return
+
+def getUnencryptedBook(infile,pid):
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ book.processBook([pid])
+ return book.mobi_data
+
+def getUnencryptedBookWithList(infile,pidlist):
+ if not os.path.isfile(infile):
+ raise DrmException('Input File Not Found')
+ book = MobiBook(infile)
+ book.processBook(pidlist)
+ return book.mobi_data
+
+
+def main(argv=sys.argv):
+ print ('MobiDeDrm v%(__version__)s. '
+ 'Copyright 2008-2011 The Dark Reverser et al.' % globals())
+ if len(argv)<3 or len(argv)>4:
+ print "Removes protection from Kindle/Mobipocket and Kindle/Print Replica ebooks"
+ print "Usage:"
+ print " %s <infile> <outfile> [<Comma separated list of PIDs to try>]" % sys.argv[0]
+ return 1
else:
- mlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
- mlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n')
- mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
- mlst.append('<script><![CDATA[\n')
- mlst.append('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n')
- mlst.append('var dpi=%d;\n' % scaledpi)
- if (previd) :
- mlst.append('var prevpage="page%04d.xhtml";\n' % (previd))
- if (nextid) :
- mlst.append('var nextpage="page%04d.xhtml";\n' % (nextid))
- mlst.append('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
- mlst.append('function zoomin(){dpi=dpi*(0.8);setsize();}\n')
- mlst.append('function zoomout(){dpi=dpi*1.25;setsize();}\n')
- mlst.append('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
- mlst.append('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
- mlst.append('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
- mlst.append('var gt=gd();if(gt>0){dpi=gt;}\n')
- mlst.append('window.onload=setsize;\n')
- mlst.append(']]></script>\n')
- mlst.append('</head>\n')
- mlst.append('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
- mlst.append('<div style="white-space:nowrap;">\n')
- if previd == None:
- mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
+ infile = argv[1]
+ outfile = argv[2]
+ if len(argv) is 4:
+ pidlist = argv[3].split(',')
else:
- mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
-
- mlst.append('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
- if (pp.gid != None):
- mlst.append('<defs>\n')
- gdefs = pp.getGlyphs()
- for j in xrange(0,len(gdefs)):
- mlst.append(gdefs[j])
- mlst.append('</defs>\n')
- img = pp.getImages()
- if (img != None):
- for j in xrange(0,len(img)):
- mlst.append(img[j])
- if (pp.gid != None):
- for j in xrange(0,len(pp.gid)):
- mlst.append('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
- if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
- xpos = "%d" % (pp.pw // 3)
- ypos = "%d" % (pp.ph // 3)
- mlst.append('<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n')
- if (raw) :
- mlst.append('</svg>')
- else :
- mlst.append('</svg></a>\n')
- if nextid == None:
- mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
- else :
- mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
- mlst.append('</div>\n')
- mlst.append('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
- mlst.append('</body>\n')
- mlst.append('</html>\n')
- return "".join(mlst)
+ pidlist = {}
+ try:
+ stripped_file = getUnencryptedBookWithList(infile, pidlist)
+ file(outfile, 'wb').write(stripped_file)
+ except DrmException, e:
+ print "Error: %s" % e
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
<key>CFBundleExecutable</key>
<string>droplet</string>
<key>CFBundleGetInfoString</key>
- <string>DeDRM 5.0, Written 2010–2012 by Apprentice Alf and others.</string>
+ <string>DeDRM 5.1, Written 2010–2012 by Apprentice Alf and others.</string>
<key>CFBundleIconFile</key>
<string>droplet</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
- <string>DeDRM 5.0</string>
+ <string>DeDRM 5.1</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
- <string>5.0</string>
+ <string>5.1</string>
<key>CFBundleSignature</key>
<string>dplt</string>
<key>LSMinimumSystemVersion</key>
<key>positionOfDivider</key>
<real>460</real>
<key>savedFrame</key>
- <string>-2 132 1316 746 0 0 1440 878 </string>
+ <string>1518 90 1316 746 1440 -150 1680 1050 </string>
<key>selectedTabView</key>
<string>event log</string>
</dict>
'region.y' : (1, 'scalar_number', 0, 0),
'region.h' : (1, 'scalar_number', 0, 0),
'region.w' : (1, 'scalar_number', 0, 0),
+ 'region.orientation' : (1, 'scalar_number', 0, 0),
'empty_text_region' : (1, 'snippets', 1, 0),
nodename = fullpathname.pop()
ilvl = len(fullpathname)
indent = ' ' * (3 * ilvl)
- result = indent + '<' + nodename + '>'
+ rlst = []
+ rlst.append(indent + '<' + nodename + '>')
if len(argList) > 0:
- argres = ''
+ alst = []
for j in argList:
if (argtype == 'text') or (argtype == 'scalar_text') :
- argres += j + '|'
+ alst.append(j + '|')
else :
- argres += str(j) + ','
+ alst.append(str(j) + ',')
+ argres = "".join(alst)
argres = argres[0:-1]
if argtype == 'snippets' :
- result += 'snippets:' + argres
+ rlst.append('snippets:' + argres)
else :
- result += argres
+ rlst.append(argres)
if len(subtagList) > 0 :
- result += '\n'
+ rlst.append('\n')
for j in subtagList:
if len(j) > 0 :
- result += self.formatTag(j)
- result += indent + '</' + nodename + '>\n'
+ rlst.append(self.formatTag(j))
+ rlst.append(indent + '</' + nodename + '>\n')
else:
- result += '</' + nodename + '>\n'
- return result
+ rlst.append('</' + nodename + '>\n')
+ return "".join(rlst)
# flatten tag
subtagList = node[1]
argtype = node[2]
argList = node[3]
- result = name
+ rlst = []
+ rlst.append(name)
if (len(argList) > 0):
- argres = ''
+ alst = []
for j in argList:
if (argtype == 'text') or (argtype == 'scalar_text') :
- argres += j + '|'
+ alst.append(j + '|')
else :
- argres += str(j) + '|'
+ alst.append(str(j) + '|')
+ argres = "".join(alst)
argres = argres[0:-1]
if argtype == 'snippets' :
- result += '.snippets=' + argres
+ rlst.append('.snippets=' + argres)
else :
- result += '=' + argres
- result += '\n'
+ rlst.append('=' + argres)
+ rlst.append('\n')
for j in subtagList:
if len(j) > 0 :
- result += self.flattenTag(j)
- return result
+ rlst.append(self.flattenTag(j))
+ return "".join(rlst)
# reduce create xml output
def formatDoc(self, flat_xml):
- result = ''
+ rlst = []
for j in self.doc :
if len(j) > 0:
if flat_xml:
- result += self.flattenTag(j)
+ rlst.append(self.flattenTag(j))
else:
- result += self.formatTag(j)
+ rlst.append(self.formatTag(j))
+ result = "".join(rlst)
if self.debug : print result
return result
def process(self):
- htmlpage = ''
tocinfo = ''
+ hlst = []
# get the ocr text
(pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
# set anchor for link target on this page
if not anchorSet and not first_para_continued:
- htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
- htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
+ hlst.append('<div style="visibility: hidden; height: 0; width: 0;" id="')
+ hlst.append(self.id + '" title="pagetype_' + pagetype + '"></div>\n')
anchorSet = True
# handle groups of graphics with text captions
if grptype != None:
if grptype == 'graphic':
gcstr = ' class="' + grptype + '"'
- htmlpage += '<div' + gcstr + '>'
+ hlst.append('<div' + gcstr + '>')
inGroup = True
elif (etype == 'grpend'):
if inGroup:
- htmlpage += '</div>\n'
+ hlst.append('</div>\n')
inGroup = False
else:
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
if inGroup:
- htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
+ hlst.append('<img src="img/img%04d.jpg" alt="" />' % int(simgsrc))
else:
- htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+ hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
elif regtype == 'chapterheading' :
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
if not breakSet:
- htmlpage += '<div style="page-break-after: always;"> </div>\n'
+ hlst.append('<div style="page-break-after: always;"> </div>\n')
breakSet = True
tag = 'h1'
if pclass and (len(pclass) >= 7):
if pclass[3:7] == 'ch1-' : tag = 'h1'
if pclass[3:7] == 'ch2-' : tag = 'h2'
if pclass[3:7] == 'ch3-' : tag = 'h3'
- htmlpage += '<' + tag + ' class="' + pclass + '">'
+ hlst.append('<' + tag + ' class="' + pclass + '">')
else:
- htmlpage += '<' + tag + '>'
- htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
- htmlpage += '</' + tag + '>'
+ hlst.append('<' + tag + '>')
+ hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+ hlst.append('</' + tag + '>')
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
ptype = 'full'
if pclass[3:6] == 'h1-' : tag = 'h4'
if pclass[3:6] == 'h2-' : tag = 'h5'
if pclass[3:6] == 'h3-' : tag = 'h6'
- htmlpage += '<' + tag + ' class="' + pclass + '">'
- htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
- htmlpage += '</' + tag + '>'
+ hlst.append('<' + tag + ' class="' + pclass + '">')
+ hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+ hlst.append('</' + tag + '>')
else :
- htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
elif (regtype == 'tocentry') :
ptype = 'full'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
tocinfo += self.buildTOCEntry(pdesc)
- htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
elif (regtype == 'vertical') or (regtype == 'table') :
ptype = 'full'
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start, end, regtype)
- htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
elif (regtype == 'synth_fcvr.center'):
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
- htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+ hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
else :
print ' Making region type', regtype,
if pclass[3:6] == 'h1-' : tag = 'h4'
if pclass[3:6] == 'h2-' : tag = 'h5'
if pclass[3:6] == 'h3-' : tag = 'h6'
- htmlpage += '<' + tag + ' class="' + pclass + '">'
- htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
- htmlpage += '</' + tag + '>'
+ hlst.append('<' + tag + ' class="' + pclass + '">')
+ hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+ hlst.append('</' + tag + '>')
else :
- htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
else :
print ' a "graphic" region'
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
- htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+ hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+ htmlpage = "".join(hlst)
if last_para_continued :
if htmlpage[-4:] == '</p>':
htmlpage = htmlpage[0:-4]
def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
- ml = ''
+ mlst = []
pp = PParser(gdict, flat_xml, meta_array)
- ml += '<?xml version="1.0" standalone="no"?>\n'
+ mlst.append('<?xml version="1.0" standalone="no"?>\n')
if (raw):
- ml += '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
- ml += '<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)
- ml += '<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors'])
+ mlst.append('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ mlst.append('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
+ mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
else:
- ml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
- ml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n'
- ml += '<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors'])
- ml += '<script><![CDATA[\n'
- ml += 'function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n'
- ml += 'var dpi=%d;\n' % scaledpi
+ mlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ mlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n')
+ mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
+ mlst.append('<script><![CDATA[\n')
+ mlst.append('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n')
+ mlst.append('var dpi=%d;\n' % scaledpi)
if (previd) :
- ml += 'var prevpage="page%04d.xhtml";\n' % (previd)
+ mlst.append('var prevpage="page%04d.xhtml";\n' % (previd))
if (nextid) :
- ml += 'var nextpage="page%04d.xhtml";\n' % (nextid)
- ml += 'var pw=%d;var ph=%d;' % (pp.pw, pp.ph)
- ml += 'function zoomin(){dpi=dpi*(0.8);setsize();}\n'
- ml += 'function zoomout(){dpi=dpi*1.25;setsize();}\n'
- ml += 'function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n'
- ml += 'function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n'
- ml += 'function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n'
- ml += 'var gt=gd();if(gt>0){dpi=gt;}\n'
- ml += 'window.onload=setsize;\n'
- ml += ']]></script>\n'
- ml += '</head>\n'
- ml += '<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n'
- ml += '<div style="white-space:nowrap;">\n'
+ mlst.append('var nextpage="page%04d.xhtml";\n' % (nextid))
+ mlst.append('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
+ mlst.append('function zoomin(){dpi=dpi*(0.8);setsize();}\n')
+ mlst.append('function zoomout(){dpi=dpi*1.25;setsize();}\n')
+ mlst.append('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
+ mlst.append('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
+ mlst.append('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
+ mlst.append('var gt=gd();if(gt>0){dpi=gt;}\n')
+ mlst.append('window.onload=setsize;\n')
+ mlst.append(']]></script>\n')
+ mlst.append('</head>\n')
+ mlst.append('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
+ mlst.append('<div style="white-space:nowrap;">\n')
if previd == None:
- ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
else:
- ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n'
+ mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
- ml += '<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph)
+ mlst.append('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
if (pp.gid != None):
- ml += '<defs>\n'
+ mlst.append('<defs>\n')
gdefs = pp.getGlyphs()
for j in xrange(0,len(gdefs)):
- ml += gdefs[j]
- ml += '</defs>\n'
+ mlst.append(gdefs[j])
+ mlst.append('</defs>\n')
img = pp.getImages()
if (img != None):
for j in xrange(0,len(img)):
- ml += img[j]
+ mlst.append(img[j])
if (pp.gid != None):
for j in xrange(0,len(pp.gid)):
- ml += '<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j])
+ mlst.append('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
xpos = "%d" % (pp.pw // 3)
ypos = "%d" % (pp.ph // 3)
- ml += '<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n'
+ mlst.append('<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n')
if (raw) :
- ml += '</svg>'
+ mlst.append('</svg>')
else :
- ml += '</svg></a>\n'
+ mlst.append('</svg></a>\n')
if nextid == None:
- ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+ mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
else :
- ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n'
- ml += '</div>\n'
- ml += '<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n'
- ml += '</body>\n'
- ml += '</html>\n'
- return ml
+ mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
+ mlst.append('</div>\n')
+ mlst.append('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
+ mlst.append('</body>\n')
+ mlst.append('</html>\n')
+ return "".join(mlst)
import flatxml2svg
import stylexml2css
+# global switch
+buildXML = False
# Get a 7 bit encoded number from a file
def readEncodedNumber(file):
if not os.path.exists(svgDir) :
os.makedirs(svgDir)
- xmlDir = os.path.join(bookDir,'xml')
- if not os.path.exists(xmlDir) :
- os.makedirs(xmlDir)
+ if buildXML:
+ xmlDir = os.path.join(bookDir,'xml')
+ if not os.path.exists(xmlDir) :
+ os.makedirs(xmlDir)
otherFile = os.path.join(bookDir,'other0000.dat')
if not os.path.exists(otherFile) :
authors = authors.replace('>','>')
meta_array['Authors'] = authors
- xname = os.path.join(xmlDir, 'metadata.xml')
- metastr = ''
- for key in meta_array:
- metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n'
- file(xname, 'wb').write(metastr)
+ if buildXML:
+ xname = os.path.join(xmlDir, 'metadata.xml')
+ mlst = []
+ for key in meta_array:
+ mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n')
+ metastr = "".join(mlst)
+ mlst = None
+ file(xname, 'wb').write(metastr)
print 'Processing StyleSheet'
+
# get some scaling info from metadata to use while processing styles
+ # and first page info
+
fontsize = '135'
if 'fontSize' in meta_array:
fontsize = meta_array['fontSize']
# also get the size of a normal text page
+ # get the total number of pages unpacked as a safety check
+ filenames = os.listdir(pageDir)
+ numfiles = len(filenames)
+
spage = '1'
if 'firstTextPage' in meta_array:
spage = meta_array['firstTextPage']
pnum = int(spage)
+ if pnum >= numfiles or pnum < 0:
+ # metadata is wrong so just select a page near the front
+ # 10% of the book to get a normal text page
+ pnum = int(0.10 * numfiles)
+ # print "first normal text page is", spage
# get page height and width from first text page for use in stylesheet scaling
pname = 'page%04d.dat' % (pnum + 1)
# now get the css info
cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
file(xname, 'wb').write(cssstr)
- xname = os.path.join(xmlDir, 'other0000.xml')
- file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
+ if buildXML:
+ xname = os.path.join(xmlDir, 'other0000.xml')
+ file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
print 'Processing Glyphs'
gd = GlyphDict()
fname = os.path.join(glyphsDir,filename)
flat_xml = convert2xml.fromData(dict, fname)
- xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
- file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+ if buildXML:
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
gp = GParser(flat_xml)
for i in xrange(0, gp.count):
glyfile.close()
print " "
- # build up tocentries while processing html
- tocentries = ''
# start up the html
+ # also build up tocentries while processing html
htmlFileName = "book.html"
- htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n'
- htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n'
- htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
- htmlstr += '<head>\n'
- htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
- htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
- htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
- htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ hlst = []
+ hlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n')
+ hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
+ hlst.append('<head>\n')
+ hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n')
+ hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n')
+ hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
+ hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
if 'ASIN' in meta_array:
- htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
if 'GUID' in meta_array:
- htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
- htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
- htmlstr += '</head>\n<body>\n'
+ hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
+ hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n')
+ hlst.append('</head>\n<body>\n')
print 'Processing Pages'
# Books are at 1440 DPI. This is rendering at twice that size for
numfiles = len(filenames)
xmllst = []
+ elst = []
for filename in filenames:
# print ' ', filename
# keep flat_xml for later svg processing
xmllst.append(flat_xml)
- xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
- file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+ if buildXML:
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
# first get the html
pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
- tocentries += tocinfo
- htmlstr += pagehtml
+ elst.append(tocinfo)
+ hlst.append(pagehtml)
# finish up the html string and output it
- htmlstr += '</body>\n</html>\n'
+ hlst.append('</body>\n</html>\n')
+ htmlstr = "".join(hlst)
+ hlst = None
file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
print " "
print 'Extracting Table of Contents from Amazon OCR'
# first create a table of contents file for the svg images
- tochtml = '<?xml version="1.0" encoding="utf-8"?>\n'
- tochtml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
- tochtml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
- tochtml += '<head>\n'
- tochtml += '<title>' + meta_array['Title'] + '</title>\n'
- tochtml += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
- tochtml += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ tlst = []
+ tlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
+ tlst.append('<head>\n')
+ tlst.append('<title>' + meta_array['Title'] + '</title>\n')
+ tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
+ tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
if 'ASIN' in meta_array:
- tochtml += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
if 'GUID' in meta_array:
- tochtml += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
- tochtml += '</head>\n'
- tochtml += '<body>\n'
+ tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
+ tlst.append('</head>\n')
+ tlst.append('<body>\n')
- tochtml += '<h2>Table of Contents</h2>\n'
+ tlst.append('<h2>Table of Contents</h2>\n')
start = pageidnums[0]
if (raw):
startname = 'page%04d.svg' % start
else:
startname = 'page%04d.xhtml' % start
- tochtml += '<h3><a href="' + startname + '">Start of Book</a></h3>\n'
+ tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n')
# build up a table of contents for the svg xhtml output
+ tocentries = "".join(elst)
+ elst = None
toclst = tocentries.split('\n')
toclst.pop()
for entry in toclst:
fname = 'page%04d.svg' % id
else:
fname = 'page%04d.xhtml' % id
- tochtml += '<h3><a href="'+ fname + '">' + title + '</a></h3>\n'
- tochtml += '</body>\n'
- tochtml += '</html>\n'
+ tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n')
+ tlst.append('</body>\n')
+ tlst.append('</html>\n')
+ tochtml = "".join(tlst)
file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
# now create index_svg.xhtml that points to all required files
- svgindex = '<?xml version="1.0" encoding="utf-8"?>\n'
- svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
- svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
- svgindex += '<head>\n'
- svgindex += '<title>' + meta_array['Title'] + '</title>\n'
- svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
- svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+ slst = []
+ slst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
+ slst.append('<head>\n')
+ slst.append('<title>' + meta_array['Title'] + '</title>\n')
+ slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
+ slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
if 'ASIN' in meta_array:
- svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+ slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
if 'GUID' in meta_array:
- svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
- svgindex += '</head>\n'
- svgindex += '<body>\n'
+ slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
+ slst.append('</head>\n')
+ slst.append('<body>\n')
print "Building svg images of each book page"
- svgindex += '<h2>List of Pages</h2>\n'
- svgindex += '<div>\n'
+ slst.append('<h2>List of Pages</h2>\n')
+ slst.append('<div>\n')
idlst = sorted(pageIDMap.keys())
numids = len(idlst)
cnt = len(idlst)
nextid = None
print '.',
pagelst = pageIDMap[pageid]
- flat_svg = ''
+ flst = []
for page in pagelst:
- flat_svg += xmllst[page]
+ flst.append(xmllst[page])
+ flat_svg = "".join(flst)
+ flst=None
svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
if (raw) :
pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w')
- svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid)
+ slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid))
else :
pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w')
- svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid)
+ slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid))
previd = pageid
pfile.write(svgxml)
pfile.close()
counter += 1
- svgindex += '</div>\n'
- svgindex += '<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n'
- svgindex += '</body>\n</html>\n'
+ slst.append('</div>\n')
+ slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n')
+ slst.append('</body>\n</html>\n')
+ svgindex = "".join(slst)
+ slst = None
file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
print " "
# build the opf file
opfname = os.path.join(bookDir, 'book.opf')
- opfstr = '<?xml version="1.0" encoding="utf-8"?>\n'
- opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n'
+ olst = []
+ olst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
# adding metadata
- opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n'
+ olst.append(' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
if 'GUID' in meta_array:
- opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n'
+ olst.append(' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
if 'ASIN' in meta_array:
- opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n'
+ olst.append(' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
if 'oASIN' in meta_array:
- opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n'
- opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n'
- opfstr += ' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n'
- opfstr += ' <dc:language>en</dc:language>\n'
- opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n'
+ olst.append(' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
+ olst.append(' <dc:title>' + meta_array['Title'] + '</dc:title>\n')
+ olst.append(' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
+ olst.append(' <dc:language>en</dc:language>\n')
+ olst.append(' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
if isCover:
- opfstr += ' <meta name="cover" content="bookcover"/>\n'
- opfstr += ' </metadata>\n'
- opfstr += '<manifest>\n'
- opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n'
- opfstr += ' <item id="stylesheet" href="style.css" media-type="text/css"/>\n'
+ olst.append(' <meta name="cover" content="bookcover"/>\n')
+ olst.append(' </metadata>\n')
+ olst.append('<manifest>\n')
+ olst.append(' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n')
+ olst.append(' <item id="stylesheet" href="style.css" media-type="text/css"/>\n')
# adding image files to manifest
filenames = os.listdir(imgDir)
filenames = sorted(filenames)
imgext = 'jpeg'
if imgext == '.svg':
imgext = 'svg+xml'
- opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n'
+ olst.append(' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n')
if isCover:
- opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n'
- opfstr += '</manifest>\n'
+ olst.append(' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n')
+ olst.append('</manifest>\n')
# adding spine
- opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n'
+ olst.append('<spine>\n <itemref idref="book" />\n</spine>\n')
if isCover:
- opfstr += ' <guide>\n'
- opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n'
- opfstr += ' </guide>\n'
- opfstr += '</package>\n'
+ olst.append(' <guide>\n')
+ olst.append(' <reference href="cover.jpg" type="cover" title="Cover"/>\n')
+ olst.append(' </guide>\n')
+ olst.append('</package>\n')
+ opfstr = "".join(olst)
+ olst = None
file(opfname, 'wb').write(opfstr)
print 'Processing Complete'
def main(argv):
bookDir = ''
-
if len(argv) == 0:
argv = sys.argv
# and many many others
-__version__ = '4.0'
+__version__ = '4.2'
class Unbuffered:
def __init__(self, stream):
import re
import traceback
+buildXML = False
+
class DrmException(Exception):
pass
return one
def decryptBook(infile, outdir, k4, kInfoFiles, serials, pids):
+ global buildXML
+
# handle the obvious cases at the beginning
if not os.path.isfile(infile):
print >>sys.stderr, ('K4MobiDeDrm v%(__version__)s\n' % globals()) + "Error: Input file does not exist"
if mobi:
if mb.getPrintReplica():
outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw4')
+ elif mb.getMobiVersion() >= 8:
+ outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw3')
else:
outfile = os.path.join(outdir, outfilename + '_nodrm' + '.mobi')
mb.getMobiFile(outfile)
zipname = os.path.join(outdir, outfilename + '_SVG' + '.zip')
mb.getSVGZip(zipname)
- print " Creating XML ZIP Archive"
- zipname = os.path.join(outdir, outfilename + '_XML' + '.zip')
- mb.getXMLZip(zipname)
+ if buildXML:
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, outfilename + '_XML' + '.zip')
+ mb.getXMLZip(zipname)
# remove internal temporary directory of Topaz pieces
mb.cleanup()
#
# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
#
- # note: the ivec string, and output buffer are mutable
+ # note: the ivec string, and output buffer are both mutable
# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
# const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc);
return ctx.digest()
# For K4PC 1.9.X
-# need to use routines from openssl
-# AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
-# AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
-# PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
-# [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
-# but the user may not have openssl installed or their version is a hacked one that was shipped
-# with many ethernet cards that used software instead of hardware routines
-# so using pure python implementations
-from pbkdf2 import pbkdf2
-import aescbc
+# use routines in alfcrypto:
+# AES_cbc_encrypt
+# AES_set_decrypt_key
+# PKCS5_PBKDF2_HMAC_SHA1
+
+from alfcrypto import AES_CBC, KeyIVGen
def UnprotectHeaderData(encryptedData):
passwdData = 'header_key_data'
salt = 'HEADER.2011'
iter = 0x80
keylen = 0x100
- key_iv = pbkdf2(passwdData, salt, iter, keylen)
+ key_iv = KeyIVGen().pbkdf2(passwdData, salt, iter, keylen)
key = key_iv[0:32]
iv = key_iv[32:48]
- aes=aescbc.AES_CBC(key, aescbc.noPadding() ,32)
- cleartext = aes.decrypt(iv + encryptedData)
+ aes=AES_CBC()
+ aes.set_decrypt_key(key, iv)
+ cleartext = aes.decrypt(encryptedData)
return cleartext
-
# simple primes table (<= n) calculator
def primes(n):
if n==2: return [2]
regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ # some 64 bit machines do not have the proper registry key for some reason
+ # or the pythonn interface to the 32 vs 64 bit registry is broken
+ if 'LOCALAPPDATA' in os.environ.keys():
+ path = os.environ['LOCALAPPDATA']
+
+ print "searching for kinfoFiles in ", path
+
# first look for older kindle-info files
kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info'
if not os.path.isfile(kinfopath):
# 0.32 - Added support for "Print Replica" Kindle ebooks
# 0.33 - Performance improvements for large files (concatenation)
# 0.34 - Performance improvements in decryption (libalfcrypto)
+# 0.35 - add interface to get mobi_version
-__version__ = '0.34'
+__version__ = '0.35'
import sys
def getMobiFile(self, outpath):
file(outpath,'wb').write(self.mobi_data)
+ def getMobiVersion(self):
+ return self.mobi_version
+
def getPrintReplica(self):
return self.print_replica
raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
if not found_key:
- raise DrmException("No key found. Most likely the correct PID has not been given.")
+ raise DrmException("No key found. Please report this failure for help.")
# kill the drm keys
self.patchSection(0, "\0" * drm_size, drm_ptr)
# kill the drm pointers
else:
inCalibre = False
+buildXML = False
+
import os, csv, getopt
import zlib, zipfile, tempfile, shutil
from struct import pack
zipUpDir(svgzip, self.outdir, 'svg')
zipUpDir(svgzip, self.outdir, 'img')
svgzip.close()
-
+
def getXMLZip(self, zipname):
xmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
targetdir = os.path.join(self.outdir,'xml')
def cleanup(self):
if os.path.isdir(self.outdir):
- pass
- # shutil.rmtree(self.outdir, True)
+ shutil.rmtree(self.outdir, True)
def usage(progname):
print "Removes DRM protection from Topaz ebooks and extract the contents"
# Main
def main(argv=sys.argv):
+ global buildXML
progname = os.path.basename(argv[0])
k4 = False
pids = []
zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
tb.getSVGZip(zipname)
- print " Creating XML ZIP Archive"
- zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
- tb.getXMLZip(zipname)
+ if buildXML:
+ print " Creating XML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+ tb.getXMLZip(zipname)
# removing internal temporary directory of pieces
tb.cleanup()
\
To remove the DRM from standalone Kindle ebooks, eReader pdb ebooks, Barnes and Noble epubs, and Mobipocket ebooks requires the user to double-click the DeDRM droplet and set some additional Preferences including:\
\
-Kindle 16 digit Serial Number\
+Mobipocket, Kindle for iPhone/iPad/iPodTouch: 10 digit PID\
+Kindle (not Kindle Fire): 16 digit Serial Number\
Barnes & Noble key files (bnepubkey.b64)\
eReader Social DRM: (Name:Last 8 digits of CC number)\
-MobiPocket, Kindle for iPhone/iPad/iPodTouch 10 digit PID\
+Additional Above Adept key files (.der)\
Location for DRM-free ebooks.\
\
Once these preferences have been set, the user can simply drag and drop ebooks onto the DeDRM droplet to remove the DRM.\
\
-This program requires Mac OS X 10.5, 10.5 or 10.7 (Leopard, Snow Leopard or Lion)\r\
+This program requires Mac OS X 10.5, 10.5 or 10.7 (Leopard, Snow Leopard or Lion)\
\pard\tx566\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\ql\qnatural\pardirnatural
\cf0 \
\
\b0 \
1. From tools_vX.X\\DeDRM_Applications\\, double click on DeDRM_X.X.zip to extract its contents. \
\
-2. Move the resulting DeDRM X.X.app AppleScript droplet to whereever you keep you other applications. (Typically your Applications folder.)\
+2. Move the resulting DeDRM X.X.app AppleScript droplet to wherever you keep you other applications. (Typically your Applications folder.)\
\
3. Optionally drag it into your dock, to make it easily available.\
\
import sys
import os, os.path
sys.path.append(sys.path[0]+os.sep+'lib')
+os.environ['PYTHONIOENCODING'] = "utf-8"
+
import shutil
import Tkinter
from Tkinter import *
keyfile = os.path.join(prefdir,'adeptkey.der')
if os.path.isfile(keyfile):
path = keyfile
- path = path.encode('utf-8')
self.adkpath.insert(0, path)
button = Tkinter.Button(body, text="...", command=self.get_adkpath)
button.grid(row=0, column=2)
keyfile = os.path.join(prefdir,'bnepubkey.b64')
if os.path.isfile(keyfile):
path = keyfile
- path = path.encode('utf-8')
self.bnkpath.insert(0, path)
button = Tkinter.Button(body, text="...", command=self.get_bnkpath)
button.grid(row=1, column=2)
path = infofile
elif os.path.isfile(ainfofile):
path = ainfofile
- path = path.encode('utf-8')
self.altinfopath.insert(0, path)
button = Tkinter.Button(body, text="...", command=self.get_altinfopath)
button.grid(row=2, column=2)
self.outpath.grid(row=6, column=1, sticky=sticky)
if 'outdir' in self.prefs_array:
dpath = self.prefs_array['outdir']
- dpath = dpath.encode('utf-8')
self.outpath.insert(0, dpath)
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=6, column=2)
filetypes=[('ePub Files','.epub'),
('Kindle','.azw'),
('Kindle','.azw1'),
+ ('Kindle','.azw3'),
('Kindle','.azw4'),
('Kindle','.tpz'),
('Kindle','.mobi'),
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
if ext == '.pdb':
self.p2 = processPDB(apphome, infile, outdir, rscpath)
return 0
- if ext in ['.azw', '.azw1', '.azw4', '.prc', '.mobi', '.tpz']:
+ if ext in ['.azw', '.azw1', '.azw3', '.azw4', '.prc', '.mobi', '.tpz']:
self.p2 = processK4MOBI(apphome, infile, outdir, rscpath)
return 0
if ext == '.pdf':
# run as a subprocess via pipes and collect stdout, stderr, and return value
def runit(apphome, ncmd, nparms):
- cmdline = 'python ' + '"' + os.path.join(apphome, ncmd) + '" '
- if sys.platform.startswith('win'):
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') < 0:
- # if no python hope that win registry finds what is associated with py extension
- cmdline = '"' + os.path.join(apphome, ncmd) + '" '
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = 'python'
+ pengine = os.path.normpath(pengine)
+ cmdline = pengine + ' "' + os.path.join(apphome, ncmd) + '" '
+ # if sys.platform.startswith('win'):
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') < 0:
+ # # if no python hope that win registry finds what is associated with py extension
+ # cmdline = pengine + ' "' + os.path.join(apphome, ncmd) + '" '
cmdline += nparms
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = subasyncio.Process(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
infilelst = argv[1:]
filenames = []
for infile in infilelst:
+ print infile
infile = infile.replace('"','')
infile = os.path.abspath(infile)
if os.path.isdir(infile):
--- /dev/null
+#! /usr/bin/python
+
+"""
+
+Comprehensive Mazama Book DRM with Topaz Cryptography V2.2
+
+-----BEGIN PUBLIC KEY-----
+MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdBHJ4CNc6DNFCw4MRCw4SWAK6
+M8hYfnNEI0yQmn5Ti+W8biT7EatpauE/5jgQMPBmdNrDr1hbHyHBSP7xeC2qlRWC
+B62UCxeu/fpfnvNHDN/wPWWH4jynZ2M6cdcnE5LQ+FfeKqZn7gnG2No1U9h7oOHx
+y2/pHuYme7U1TsgSjwIDAQAB
+-----END PUBLIC KEY-----
+
+"""
+
+from __future__ import with_statement
+
+import csv
+import sys
+import os
+import getopt
+import zlib
+from struct import pack
+from struct import unpack
+from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast
+import _winreg as winreg
+import Tkinter
+import Tkconstants
+import tkMessageBox
+import traceback
+import hashlib
+
+MAX_PATH = 255
+
+kernel32 = windll.kernel32
+advapi32 = windll.advapi32
+crypt32 = windll.crypt32
+
+global kindleDatabase
+global bookFile
+global bookPayloadOffset
+global bookHeaderRecords
+global bookMetadata
+global bookKey
+global command
+
+#
+# Various character maps used to decrypt books. Probably supposed to act as obfuscation
+#
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+#
+# Exceptions for all the problems that might happen during the script
+#
+
+class CMBDTCError(Exception):
+ pass
+
+class CMBDTCFatal(Exception):
+ pass
+
+#
+# Stolen stuff
+#
+
+class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+DataBlob_p = POINTER(DataBlob)
+
+def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+GetSystemDirectory = GetSystemDirectory()
+
+
+def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path):
+ vsn = c_uint(0)
+ GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
+ return vsn.value
+ return GetVolumeSerialNumber
+GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+
+def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(32)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+GetUserName = GetUserName()
+
+
+def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, 0, byref(outdata)):
+ raise CMBDTCFatal("Failed to Unprotect Data")
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+CryptUnprotectData = CryptUnprotectData()
+
+#
+# Returns the MD5 digest of "message"
+#
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+#
+# Returns the MD5 digest of "message"
+#
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+#
+# Open the book file at path
+#
+
+def openBook(path):
+ try:
+ return open(path,'rb')
+ except:
+ raise CMBDTCFatal("Could not open book file: " + path)
+#
+# Encode the bytes in data with the characters in map
+#
+
+def encode(data, map):
+ result = ""
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+#
+# Hash the bytes in data and then encode the digest with the characters in map
+#
+
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+#
+# Decode the string in data with the characters in map. Returns the decoded bytes
+#
+
+def decode(data,map):
+ result = ""
+ for i in range (0,len(data),2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ value = (((high * 0x40) ^ 0x80) & 0xFF) + low
+ result += pack("B",value)
+ return result
+
+#
+# Locate and open the Kindle.info file (Hopefully in the way it is done in the Kindle application)
+#
+
+def openKindleInfo():
+ regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
+ path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r')
+
+#
+# Parse the Kindle.info file and return the records as a list of key-values
+#
+
+def parseKindleInfo():
+ DB = {}
+ infoReader = openKindleInfo()
+ infoReader.read(1)
+ data = infoReader.read()
+ items = data.split('{')
+
+ for item in items:
+ splito = item.split(':')
+ DB[splito[0]] =splito[1]
+ return DB
+
+#
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string. (Totally not optimal)
+#
+
+def findNameForHash(hash):
+ names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+ result = ""
+ for name in names:
+ if hash == encodeHash(name, charMap2):
+ result = name
+ break
+ return name
+
+#
+# Print all the records from the kindle.info file (option -i)
+#
+
+def printKindleInfo():
+ for record in kindleDatabase:
+ name = findNameForHash(record)
+ if name != "" :
+ print (name)
+ print ("--------------------------\n")
+ else :
+ print ("Unknown Record")
+ print getKindleInfoValueForHash(record)
+ print "\n"
+#
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+#
+
+def getKindleInfoValueForHash(hashedKey):
+ global kindleDatabase
+ encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+ return CryptUnprotectData(encryptedValue,"")
+
+#
+# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+#
+
+def getKindleInfoValueForKey(key):
+ return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+#
+# Get a 7 bit encoded number from the book file
+#
+
+def bookReadEncodedNumber():
+ flag = False
+ data = ord(bookFile.read(1))
+
+ if data == 0xFF:
+ flag = True
+ data = ord(bookFile.read(1))
+
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(bookFile.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+
+ if flag:
+ data = -data
+ return data
+
+#
+# Encode a number in 7 bit format
+#
+
+def encodeNumber(number):
+ result = ""
+ negative = False
+ flag = 0
+
+ if number < 0 :
+ number = -number + 1
+ negative = True
+
+ while True:
+ byte = number & 0x7F
+ number = number >> 7
+ byte += flag
+ result += chr(byte)
+ flag = 0x80
+ if number == 0 :
+ if (byte == 0xFF and negative == False) :
+ result += chr(0x80)
+ break
+
+ if negative:
+ result += chr(0xFF)
+
+ return result[::-1]
+
+#
+# Get a length prefixed string from the file
+#
+
+def bookReadString():
+ stringLength = bookReadEncodedNumber()
+ return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0]
+
+#
+# Returns a length prefixed string
+#
+
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+
+#
+# Read and return the data of one header record at the current book file position [[offset,compressedLength,decompressedLength],...]
+#
+
+def bookReadHeaderRecordData():
+ nbValues = bookReadEncodedNumber()
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()])
+ return values
+
+#
+# Read and parse one header record at the current book file position and return the associated data [[offset,compressedLength,decompressedLength],...]
+#
+
+def parseTopazHeaderRecord():
+ if ord(bookFile.read(1)) != 0x63:
+ raise CMBDTCFatal("Parse Error : Invalid Header")
+
+ tag = bookReadString()
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+
+#
+# Parse the header of a Topaz file, get all the header records and the offset for the payload
+#
+
+def parseTopazHeader():
+ global bookHeaderRecords
+ global bookPayloadOffset
+ magic = unpack("4s",bookFile.read(4))[0]
+
+ if magic != 'TPZ0':
+ raise CMBDTCFatal("Parse Error : Invalid Header, not a Topaz file")
+
+ nbRecords = bookReadEncodedNumber()
+ bookHeaderRecords = {}
+
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ bookHeaderRecords[result[0]] = result[1]
+
+ if ord(bookFile.read(1)) != 0x64 :
+ raise CMBDTCFatal("Parse Error : Invalid Header")
+
+ bookPayloadOffset = bookFile.tell()
+
+#
+# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed
+#
+
+def getBookPayloadRecord(name, index):
+ encrypted = False
+
+ try:
+ recordOffset = bookHeaderRecords[name][index][0]
+ except:
+ raise CMBDTCFatal("Parse Error : Invalid Record, record not found")
+
+ bookFile.seek(bookPayloadOffset + recordOffset)
+
+ tag = bookReadString()
+ if tag != name :
+ raise CMBDTCFatal("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber()
+
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match")
+
+ if bookHeaderRecords[name][index][2] != 0 :
+ record = bookFile.read(bookHeaderRecords[name][index][2])
+ else:
+ record = bookFile.read(bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ ctx = topazCryptoInit(bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+
+ return record
+
+#
+# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename"
+#
+
+def extractBookPayloadRecord(name, index, filename):
+ compressed = False
+
+ try:
+ compressed = bookHeaderRecords[name][index][2] != 0
+ record = getBookPayloadRecord(name,index)
+ except:
+ print("Could not find record")
+
+ if compressed:
+ try:
+ record = zlib.decompress(record)
+ except:
+ raise CMBDTCFatal("Could not decompress record")
+
+ if filename != "":
+ try:
+ file = open(filename,"wb")
+ file.write(record)
+ file.close()
+ except:
+ raise CMBDTCFatal("Could not write to destination file")
+ else:
+ print(record)
+
+#
+# return next record [key,value] from the book metadata from the current book position
+#
+
+def readMetadataRecord():
+ return [bookReadString(),bookReadString()]
+
+#
+# Parse the metadata record from the book payload and return a list of [key,values]
+#
+
+def parseMetadata():
+ global bookHeaderRecords
+ global bookPayloadAddress
+ global bookMetadata
+ bookMetadata = {}
+ bookFile.seek(bookPayloadOffset + bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString()
+ if tag != "metadata" :
+ raise CMBDTCFatal("Parse Error : Record Names Don't Match")
+
+ flags = ord(bookFile.read(1))
+ nbRecords = ord(bookFile.read(1))
+
+ for i in range (0,nbRecords) :
+ record =readMetadataRecord()
+ bookMetadata[record[0]] = record[1]
+
+#
+# Returns two bit at offset from a bit field
+#
+
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+#
+# Returns the six bits at offset from a bit field
+#
+
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+#
+# 8 bits to six bits encoding from hash to generate PID string
+#
+
+def encodePID(hash):
+ global charMap3
+ PID = ""
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+#
+# Context initialisation for the Topaz Crypto
+#
+
+def topazCryptoInit(key):
+ ctx1 = 0x0CAFFE19E
+
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ return [ctx1,ctx2]
+
+#
+# decrypt data with the context prepared by topazCryptoInit()
+#
+
+def topazCryptoDecrypt(data, ctx):
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+
+ plainText = ""
+
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+
+ return plainText
+
+#
+# Decrypt a payload record with the PID
+#
+
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+#
+# Try to decrypt a dkey record (contains the book PID)
+#
+
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise CMBDTCError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise CMBDTCError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise CMBDTCError("Record didn't contain PID")
+
+ return fields[4]
+
+#
+# Decrypt all the book's dkey records (contain the book PID)
+#
+
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except CMBDTCError:
+ pass
+ data = data[1+length:]
+
+ return records
+
+#
+# Encryption table used to generate the device PID
+#
+
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+#
+# Seed value used to generate the device PID
+#
+
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+#
+# Generate the device PID
+#
+
+def generateDevicePID(table,dsn,nbRoll):
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ""
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+#
+# Create decrypted book payload
+#
+
+def createDecryptedPayload(payload):
+
+ # store data to be able to create the header later
+ headerData= []
+ currentOffset = 0
+
+ # Add social DRM to decrypted files
+
+ try:
+ data = getKindleInfoValueForKey("kindle.name.info")+":"+ getKindleInfoValueForKey("login")
+ if payload!= None:
+ payload.write(lengthPrefixString("sdrm"))
+ payload.write(encodeNumber(0))
+ payload.write(data)
+ else:
+ currentOffset += len(lengthPrefixString("sdrm"))
+ currentOffset += len(encodeNumber(0))
+ currentOffset += len(data)
+ except:
+ pass
+
+ for headerRecord in bookHeaderRecords:
+ name = headerRecord
+ newRecord = []
+
+ if name != "dkey" :
+
+ for index in range (0,len(bookHeaderRecords[name])) :
+ offset = currentOffset
+
+ if payload != None:
+ # write tag
+ payload.write(lengthPrefixString(name))
+ # write data
+ payload.write(encodeNumber(index))
+ payload.write(getBookPayloadRecord(name, index))
+
+ else :
+ currentOffset += len(lengthPrefixString(name))
+ currentOffset += len(encodeNumber(index))
+ currentOffset += len(getBookPayloadRecord(name, index))
+ newRecord.append([offset,bookHeaderRecords[name][index][1],bookHeaderRecords[name][index][2]])
+
+ headerData.append([name,newRecord])
+
+
+
+ return headerData
+
+#
+# Create decrypted book
+#
+
+def createDecryptedBook(outputFile):
+ outputFile = open(outputFile,"wb")
+ # Write the payload in a temporary file
+ headerData = createDecryptedPayload(None)
+ outputFile.write("TPZ0")
+ outputFile.write(encodeNumber(len(headerData)))
+
+ for header in headerData :
+ outputFile.write(chr(0x63))
+ outputFile.write(lengthPrefixString(header[0]))
+ outputFile.write(encodeNumber(len(header[1])))
+ for numbers in header[1] :
+ outputFile.write(encodeNumber(numbers[0]))
+ outputFile.write(encodeNumber(numbers[1]))
+ outputFile.write(encodeNumber(numbers[2]))
+
+ outputFile.write(chr(0x64))
+ createDecryptedPayload(outputFile)
+ outputFile.close()
+
+#
+# Set the command to execute by the programm according to cmdLine parameters
+#
+
+def setCommand(name) :
+ global command
+ if command != "" :
+ raise CMBDTCFatal("Invalid command line parameters")
+ else :
+ command = name
+
+#
+# Program usage
+#
+
+def usage():
+ print("\nUsage:")
+ print("\nCMBDTC.py [options] bookFileName\n")
+ print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)")
+ print("-d Saves a decrypted copy of the book")
+ print("-r Prints or writes to disk a record indicated in the form name:index (e.g \"img:0\")")
+ print("-o Output file name to write records and decrypted books")
+ print("-v Verbose (can be used several times)")
+ print("-i Prints kindle.info database")
+
+#
+# Main
+#
+
+def main(argv=sys.argv):
+ global kindleDatabase
+ global bookMetadata
+ global bookKey
+ global bookFile
+ global command
+
+ progname = os.path.basename(argv[0])
+
+ verbose = 0
+ recordName = ""
+ recordIndex = 0
+ outputFile = ""
+ PIDs = []
+ kindleDatabase = None
+ command = ""
+
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "vdir:o:p:")
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-v":
+ verbose+=1
+ if o == "-i":
+ setCommand("printInfo")
+ if o =="-o":
+ if a == None :
+ raise CMBDTCFatal("Invalid parameter for -o")
+ outputFile = a
+ if o =="-r":
+ setCommand("printRecord")
+ try:
+ recordName,recordIndex = a.split(':')
+ except:
+ raise CMBDTCFatal("Invalid parameter for -r")
+ if o =="-p":
+ PIDs.append(a)
+ if o =="-d":
+ setCommand("doit")
+
+ if command == "" :
+ raise CMBDTCFatal("No action supplied on command line")
+
+ #
+ # Read the encrypted database
+ #
+
+ try:
+ kindleDatabase = parseKindleInfo()
+ except Exception, message:
+ if verbose>0:
+ print(message)
+
+ if kindleDatabase != None :
+ if command == "printInfo" :
+ printKindleInfo()
+
+ #
+ # Compute the DSN
+ #
+
+ # Get the Mazama Random number
+ MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+ # Get the HDD serial
+ encodedSystemVolumeSerialNumber = encodeHash(str(GetVolumeSerialNumber(GetSystemDirectory().split('\\')[0] + '\\')),charMap1)
+
+ # Get the current user name
+ encodedUsername = encodeHash(GetUserName(),charMap1)
+
+ # concat, hash and encode
+ DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+
+ if verbose >1:
+ print("DSN: " + DSN)
+
+ #
+ # Compute the device PID
+ #
+
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ PIDs.append(devicePID)
+
+ if verbose > 0:
+ print("Device PID: " + devicePID)
+
+ #
+ # Open book and parse metadata
+ #
+
+ if len(args) == 1:
+
+ bookFile = openBook(args[0])
+ parseTopazHeader()
+ parseMetadata()
+
+ #
+ # Compute book PID
+ #
+
+ # Get the account token
+
+ if kindleDatabase != None:
+ kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+ if verbose >1:
+ print("Account Token: " + kindleAccountToken)
+
+ keysRecord = bookMetadata["keys"]
+ keysRecordRecord = bookMetadata[keysRecord]
+
+ pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord)
+
+ bookPID = encodePID(pidHash)
+ PIDs.append(bookPID)
+
+ if verbose > 0:
+ print ("Book PID: " + bookPID )
+
+ #
+ # Decrypt book key
+ #
+
+ dkey = getBookPayloadRecord('dkey', 0)
+
+ bookKeys = []
+ for PID in PIDs :
+ bookKeys+=decryptDkeyRecords(dkey,PID)
+
+ if len(bookKeys) == 0 :
+ if verbose > 0 :
+ print ("Book key could not be found. Maybe this book is not registered with this device.")
+ else :
+ bookKey = bookKeys[0]
+ if verbose > 0:
+ print("Book key: " + bookKey.encode('hex'))
+
+
+
+ if command == "printRecord" :
+ extractBookPayloadRecord(recordName,int(recordIndex),outputFile)
+ if outputFile != "" and verbose>0 :
+ print("Wrote record to file: "+outputFile)
+ elif command == "doit" :
+ if outputFile!="" :
+ createDecryptedBook(outputFile)
+ if verbose >0 :
+ print ("Decrypted book saved. Don't pirate!")
+ elif verbose > 0:
+ print("Output file name was not supplied.")
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
'region.y' : (1, 'scalar_number', 0, 0),
'region.h' : (1, 'scalar_number', 0, 0),
'region.w' : (1, 'scalar_number', 0, 0),
+ 'region.orientation' : (1, 'scalar_number', 0, 0),
'empty_text_region' : (1, 'snippets', 1, 0),
file(xname, 'wb').write(metastr)
print 'Processing StyleSheet'
+
# get some scaling info from metadata to use while processing styles
+ # and first page info
+
fontsize = '135'
if 'fontSize' in meta_array:
fontsize = meta_array['fontSize']
# also get the size of a normal text page
+ # get the total number of pages unpacked as a safety check
+ filenames = os.listdir(pageDir)
+ numfiles = len(filenames)
+
spage = '1'
if 'firstTextPage' in meta_array:
spage = meta_array['firstTextPage']
pnum = int(spage)
+ if pnum >= numfiles or pnum < 0:
+ # metadata is wrong so just select a page near the front
+ # 10% of the book to get a normal text page
+ pnum = int(0.10 * numfiles)
+ # print "first normal text page is", spage
# get page height and width from first text page for use in stylesheet scaling
pname = 'page%04d.dat' % (pnum + 1)
# and many many others
-__version__ = '4.0'
+__version__ = '4.2'
class Unbuffered:
def __init__(self, stream):
if mobi:
if mb.getPrintReplica():
outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw4')
+ elif mb.getMobiVersion() >= 8:
+ outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw3')
else:
outfile = os.path.join(outdir, outfilename + '_nodrm' + '.mobi')
mb.getMobiFile(outfile)
regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ # some 64 bit machines do not have the proper registry key for some reason
+ # or the pythonn interface to the 32 vs 64 bit registry is broken
+ if 'LOCALAPPDATA' in os.environ.keys():
+ path = os.environ['LOCALAPPDATA']
+
+ print "searching for kinfoFiles in ", path
+
# first look for older kindle-info files
kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info'
if not os.path.isfile(kinfopath):
# 0.32 - Added support for "Print Replica" Kindle ebooks
# 0.33 - Performance improvements for large files (concatenation)
# 0.34 - Performance improvements in decryption (libalfcrypto)
+# 0.35 - add interface to get mobi_version
-__version__ = '0.34'
+__version__ = '0.35'
import sys
def getMobiFile(self, outpath):
file(outpath,'wb').write(self.mobi_data)
+ def getMobiVersion(self):
+ return self.mobi_version
+
def getPrintReplica(self):
return self.print_replica
raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
if not found_key:
- raise DrmException("No key found. Most likely the correct PID has not been given.")
+ raise DrmException("No key found. Please report this failure for help.")
# kill the drm keys
self.patchSection(0, "\0" * drm_size, drm_ptr)
# kill the drm pointers
import sys
import os
+os.environ['PYTHONIOENCODING'] = "utf-8"
import re
import shutil
import Tkinter
import sys
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
import subprocess
from subprocess import Popen, PIPE, STDOUT
import subasyncio
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
# run as a subprocess via pipes and collect stdout
def pidrdr(self, serial):
# os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/kindlepid.py "' + serial + '"'
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path.normpath(pengine)
+ cmdline = pengine + ' ./lib/kindlepid.py "' + serial + '"'
if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\kindlepid.py "' + serial + '"'
- else :
- cmdline = 'lib\kindlepid.py "' + serial + '"'
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') >= 0:
+ # cmdline = 'python lib\kindlepid.py "' + serial + '"'
+ # else :
+ # cmdline = 'lib\kindlepid.py "' + serial + '"'
+ cmdline = pengine + ' lib\\kindlepid.py "' + serial + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
log += 'Serial = "' + serial + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
- log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.pidrdr(serial)
import sys
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
import subprocess
from subprocess import Popen, PIPE, STDOUT
import subasyncio
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
# run as a subprocess via pipes and collect stdout
def mobirdr(self, infile, outfile, pidnum):
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path.normpath(pengine)
# os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
+ cmdline = pengine + ' ./lib/mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
- else :
- cmdline = 'lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') >= 0:
+ # cmdline = 'python lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
+ # else :
+ # cmdline = 'lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
+ cmdline = pengine + ' lib\\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
log += 'PID = "' + pidnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
- log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.mobirdr(mobipath, outpath, pidnum)
import sys
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
+
import Tkinter
import Tkconstants
import tkFileDialog
self.outpath.insert(0, outname)
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2)
-
+
Tkinter.Label(body, text='Optional Alternative Kindle.info file').grid(row=2, sticky=Tkconstants.E)
self.altinfopath = Tkinter.Entry(body, width=50)
self.altinfopath.grid(row=2, column=1, sticky=sticky)
# option being used, so need to reset it for the next time
def processPipe(self):
poll = self.p2.wait('nowait')
- if poll != None:
+ if poll != None:
text = self.p2.readerr()
text += self.p2.read()
msg = text + '\n\n' + 'Encryption successfully removed\n'
text = self.p2.readerr()
text += self.p2.read()
self.showCmdOutput(text)
- # make sure we get invoked again by event loop after interval
+ # make sure we get invoked again by event loop after interval
self.stext.after(self.interval,self.processPipe)
return
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- # msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
infooption = ''
if altinfopath and altinfopath != '':
infooption = ' -k "' + altinfopath + '" '
- cmdline = 'python ./lib/' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
- print cmdline
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path.normpath(pengine)
+ cmdline = pengine + ' ./lib/' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
if sys.platform.startswith('win'):
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\\' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
- else :
- cmdline = 'lib\\' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
-
+ cmdline = pengine + ' lib\\' + tool + ' ' + pidoption + seroption + infooption + '"' + infile + '" "' + outfile + '"'
+ print cmdline
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
self.p2 = self.mobirdr(mobipath, outpath, altinfopath, pidnums, sernums)
# python does not seem to allow you to create
- # your own eventloop which every other gui does - strange
+ # your own eventloop which every other gui does - strange
# so need to use the widget "after" command to force
# event loop to run non-gui events every interval
self.stext.after(self.interval,self.processPipe)
MainDialog(root).pack(fill=Tkconstants.X, expand=1)
root.mainloop()
return 0
-
+
if __name__ == "__main__":
sys.exit(main())
'region.y' : (1, 'scalar_number', 0, 0),
'region.h' : (1, 'scalar_number', 0, 0),
'region.w' : (1, 'scalar_number', 0, 0),
+ 'region.orientation' : (1, 'scalar_number', 0, 0),
'empty_text_region' : (1, 'snippets', 1, 0),
file(xname, 'wb').write(metastr)
print 'Processing StyleSheet'
+
# get some scaling info from metadata to use while processing styles
+ # and first page info
+
fontsize = '135'
if 'fontSize' in meta_array:
fontsize = meta_array['fontSize']
# also get the size of a normal text page
+ # get the total number of pages unpacked as a safety check
+ filenames = os.listdir(pageDir)
+ numfiles = len(filenames)
+
spage = '1'
if 'firstTextPage' in meta_array:
spage = meta_array['firstTextPage']
pnum = int(spage)
+ if pnum >= numfiles or pnum < 0:
+ # metadata is wrong so just select a page near the front
+ # 10% of the book to get a normal text page
+ pnum = int(0.10 * numfiles)
+ # print "first normal text page is", spage
# get page height and width from first text page for use in stylesheet scaling
pname = 'page%04d.dat' % (pnum + 1)
# and many many others
-__version__ = '4.0'
+__version__ = '4.2'
class Unbuffered:
def __init__(self, stream):
if mobi:
if mb.getPrintReplica():
outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw4')
+ elif mb.getMobiVersion() >= 8:
+ outfile = os.path.join(outdir, outfilename + '_nodrm' + '.azw3')
else:
outfile = os.path.join(outdir, outfilename + '_nodrm' + '.mobi')
mb.getMobiFile(outfile)
regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ # some 64 bit machines do not have the proper registry key for some reason
+ # or the pythonn interface to the 32 vs 64 bit registry is broken
+ if 'LOCALAPPDATA' in os.environ.keys():
+ path = os.environ['LOCALAPPDATA']
+
+ print "searching for kinfoFiles in ", path
+
# first look for older kindle-info files
kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info'
if not os.path.isfile(kinfopath):
# 0.32 - Added support for "Print Replica" Kindle ebooks
# 0.33 - Performance improvements for large files (concatenation)
# 0.34 - Performance improvements in decryption (libalfcrypto)
+# 0.35 - add interface to get mobi_version
-__version__ = '0.34'
+__version__ = '0.35'
import sys
def getMobiFile(self, outpath):
file(outpath,'wb').write(self.mobi_data)
+ def getMobiVersion(self):
+ return self.mobi_version
+
def getPrintReplica(self):
return self.print_replica
raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
if not found_key:
- raise DrmException("No key found. Most likely the correct PID has not been given.")
+ raise DrmException("No key found. Please report this failure for help.")
# kill the drm keys
self.patchSection(0, "\0" * drm_size, drm_ptr)
# kill the drm pointers
import sys
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
+
import subprocess
from subprocess import Popen, PIPE, STDOUT
import subasyncio
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
# run as a subprocess via pipes and collect stdout
def zipfixrdr(self, infile, outfile):
# os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/zipfix.py "' + infile + '" "' + outfile + '"'
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path.normpath(pengine)
+ cmdline = pengine + ' ./lib/zipfix.py "' + infile + '" "' + outfile + '"'
if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\zipfix.py "' + infile + '" "' + outfile + '"'
- else :
- cmdline = 'lib\zipfix.py "' + infile + '" "' + outfile + '"'
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') >= 0:
+ # cmdline = 'python lib\zipfix.py "' + infile + '" "' + outfile + '"'
+ # else :
+ # cmdline = 'lib\zipfix.py "' + infile + '" "' + outfile + '"'
+ cmdline = pengine + ' lib\\zipfix.py "' + infile + '" "' + outfile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
log += 'Output File = "' + outpath + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
- log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.zipfixrdr(epubpath, outpath)
import sys
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
import subprocess
from subprocess import Popen, PIPE, STDOUT
import subasyncio
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
# run xpml2hxtml.py as a subprocess via pipes and collect stdout
def pmlhtml(self, infile, outfile):
# os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/xpml2xhtml.py "' + infile + '" "' + outfile + '"'
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path.normpath(pengine)
+ cmdline = pengine + ' ./lib/xpml2xhtml.py "' + infile + '" "' + outfile + '"'
if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
- else :
- cmdline = 'lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') >= 0:
+ # cmdline = 'python lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
+ # else :
+ # cmdline = 'lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
+ cmdline = pengine + ' lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
log += 'HTML Output File = "' + outpath + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
- log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.pmlhtml(pmlpath, outpath)
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
import subprocess
from subprocess import Popen, PIPE, STDOUT
import Tkinter
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
# run erdr2pml.py as a subprocess via pipes and collect stdout
def erdr(self, infile, outdir, name, ccnum):
# os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path/normpath(pengine)
+ cmdline = pengine + ' ./lib/erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
- else :
- cmdline = 'lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') >= 0:
+ # cmdline = 'python lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
+ # else :
+ # cmdline = 'lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
+ cmdline = pengine + ' lib\\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
log += 'Last 8 of CC = "' + ccnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
- log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.erdr(pdbpath, outpath, name, ccnum)
sys.path.append('lib')
import os, os.path, urllib
+os.environ['PYTHONIOENCODING'] = "utf-8"
import subprocess
from subprocess import Popen, PIPE, STDOUT
import Tkinter
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
- msg = msg.encode('utf-8')
if sys.platform.startswith('win'):
msg = msg.replace('\r\n','\n')
self.stext.insert(Tkconstants.END,msg)
# run erdr2pml.py as a subprocess via pipes and collect stdout
def erdr(self, infile, name, ccnum):
# os.putenv('PYTHONUNBUFFERED', '1')
- cmdline = 'python ./lib/erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
+ pengine = sys.executable
+ if pengine is None or pengine == '':
+ pengine = "python"
+ pengine = os.path.normpath(pengine)
+ cmdline = pengine + ' ./lib/erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
if sys.platform[0:3] == 'win':
- search_path = os.environ['PATH']
- search_path = search_path.lower()
- if search_path.find('python') >= 0:
- cmdline = 'python lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
- else :
- cmdline = 'lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
-
+ # search_path = os.environ['PATH']
+ # search_path = search_path.lower()
+ # if search_path.find('python') >= 0:
+ # cmdline = 'python lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
+ # else :
+ # cmdline = 'lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
+ cmdline = pengine + ' lib\\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
log += 'Last 8 of CC = "' + ccnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
- log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.erdr(pdbpath, name, ccnum)