+Changes in version 1.6
+ - support for books whose paragraphs have no styles
+ - support to run cmbtc_dump on Linux and Mac OSX provided you know your PID of your ipod or standalone Kindle
+ (contributed by DiapDealer)
+
Changes in version 1.5
- completely reworked generation of styles to use actual page heights and widths
- added new script getpagedim.py to support the above
--- /dev/null
+#! /usr/bin/python
+
+from __future__ import with_statement
+
+import csv
+import sys
+import os
+import getopt
+import zlib
+from struct import pack
+from struct import unpack
+
+MAX_PATH = 255
+
+# Put the first 8 characters of your Kindle PID here
+# or supply it with the -p option in the command line
+####################################################
+kindlePID = "12345678"
+####################################################
+
+global bookFile
+global bookPayloadOffset
+global bookHeaderRecords
+global bookMetadata
+global bookKey
+global command
+
+#
+# Exceptions for all the problems that might happen during the script
+#
+
+class CMBDTCError(Exception):
+ pass
+
+class CMBDTCFatal(Exception):
+ pass
+
+
+#
+# Open the book file at path
+#
+
+def openBook(path):
+ try:
+ return open(path,'rb')
+ except:
+ raise CMBDTCFatal("Could not open book file: " + path)
+
+#
+# Get a 7 bit encoded number from the book file
+#
+
+def bookReadEncodedNumber():
+ flag = False
+ data = ord(bookFile.read(1))
+
+ if data == 0xFF:
+ flag = True
+ data = ord(bookFile.read(1))
+
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(bookFile.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+
+ if flag:
+ data = -data
+ return data
+
+#
+# Encode a number in 7 bit format
+#
+
+def encodeNumber(number):
+ result = ""
+ negative = False
+ flag = 0
+ print("Using encodeNumber routine")
+
+ if number < 0 :
+ number = -number + 1
+ negative = True
+
+ while True:
+ byte = number & 0x7F
+ number = number >> 7
+ byte += flag
+ result += chr(byte)
+ flag = 0x80
+ if number == 0 : break
+
+ if negative:
+ result += chr(0xFF)
+
+ return result[::-1]
+
+#
+# Get a length prefixed string from the file
+#
+
+def bookReadString():
+ stringLength = bookReadEncodedNumber()
+ return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0]
+
+#
+# Returns a length prefixed string
+#
+
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+
+#
+# Read and return the data of one header record at the current book file position [[offset,decompressedLength,compressedLength],...]
+#
+
+def bookReadHeaderRecordData():
+ nbValues = bookReadEncodedNumber()
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()])
+ return values
+
+#
+# Read and parse one header record at the current book file position and return the associated data [[offset,decompressedLength,compressedLength],...]
+#
+
+def parseTopazHeaderRecord():
+ if ord(bookFile.read(1)) != 0x63:
+ raise CMBDTCFatal("Parse Error : Invalid Header")
+
+ tag = bookReadString()
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+
+#
+# Parse the header of a Topaz file, get all the header records and the offset for the payload
+#
+
+def parseTopazHeader():
+ global bookHeaderRecords
+ global bookPayloadOffset
+ magic = unpack("4s",bookFile.read(4))[0]
+
+ if magic != 'TPZ0':
+ raise CMBDTCFatal("Parse Error : Invalid Header, not a Topaz file")
+
+ nbRecords = bookReadEncodedNumber()
+ bookHeaderRecords = {}
+
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ print result[0], result[1]
+ bookHeaderRecords[result[0]] = result[1]
+
+ if ord(bookFile.read(1)) != 0x64 :
+ raise CMBDTCFatal("Parse Error : Invalid Header")
+
+ bookPayloadOffset = bookFile.tell()
+
+#
+# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed
+# Correction, the record is correctly decompressed too
+#
+
+def getBookPayloadRecord(name, index):
+ encrypted = False
+ compressed = False
+
+ try:
+ recordOffset = bookHeaderRecords[name][index][0]
+ except:
+ raise CMBDTCFatal("Parse Error : Invalid Record, record not found")
+
+ bookFile.seek(bookPayloadOffset + recordOffset)
+
+ tag = bookReadString()
+ if tag != name :
+ raise CMBDTCFatal("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber()
+
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match")
+
+ if (bookHeaderRecords[name][index][2] > 0):
+ compressed = True
+ record = bookFile.read(bookHeaderRecords[name][index][2])
+ else:
+ record = bookFile.read(bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ ctx = topazCryptoInit(bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+
+ if compressed:
+ record = zlib.decompress(record)
+
+ return record
+
+#
+# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename"
+#
+
+def extractBookPayloadRecord(name, index, filename):
+ compressed = False
+
+ try:
+ compressed = bookHeaderRecords[name][index][2] != 0
+ record = getBookPayloadRecord(name,index)
+ except:
+ print("Could not find record")
+
+ # if compressed:
+ # try:
+ # record = zlib.decompress(record)
+ # except:
+ # raise CMBDTCFatal("Could not decompress record")
+
+ if filename != "":
+ try:
+ file = open(filename,"wb")
+ file.write(record)
+ file.close()
+ except:
+ raise CMBDTCFatal("Could not write to destination file")
+ else:
+ print(record)
+
+#
+# return next record [key,value] from the book metadata from the current book position
+#
+
+def readMetadataRecord():
+ return [bookReadString(),bookReadString()]
+
+#
+# Parse the metadata record from the book payload and return a list of [key,values]
+#
+
+def parseMetadata():
+ global bookHeaderRecords
+ global bookPayloadAddress
+ global bookMetadata
+ bookMetadata = {}
+ bookFile.seek(bookPayloadOffset + bookHeaderRecords["metadata"][0][0])
+ tag = bookReadString()
+ if tag != "metadata" :
+ raise CMBDTCFatal("Parse Error : Record Names Don't Match")
+
+ flags = ord(bookFile.read(1))
+ nbRecords = ord(bookFile.read(1))
+
+ for i in range (0,nbRecords) :
+ record =readMetadataRecord()
+ bookMetadata[record[0]] = record[1]
+
+#
+# Context initialisation for the Topaz Crypto
+#
+
+def topazCryptoInit(key):
+ ctx1 = 0x0CAFFE19E
+
+ for keyChar in key:
+ keyByte = ord(keyChar)
+ ctx2 = ctx1
+ ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+ return [ctx1,ctx2]
+
+#
+# decrypt data with the context prepared by topazCryptoInit()
+#
+
+def topazCryptoDecrypt(data, ctx):
+ ctx1 = ctx[0]
+ ctx2 = ctx[1]
+
+ plainText = ""
+
+ for dataChar in data:
+ dataByte = ord(dataChar)
+ m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+ ctx2 = ctx1
+ ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+ plainText += chr(m)
+
+ return plainText
+
+#
+# Decrypt a payload record with the PID
+#
+
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+#
+# Try to decrypt a dkey record (contains the book PID)
+#
+
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack("3sB8sB8s3s",record)
+
+ if fields[0] != "PID" or fields[5] != "pid" :
+ raise CMBDTCError("Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise CMBDTCError("Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise CMBDTCError("Record didn't contain PID")
+
+ return fields[4]
+
+#
+# Decrypt all the book's dkey records (contain the book PID)
+#
+
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except CMBDTCError:
+ pass
+ data = data[1+length:]
+
+ return records
+
+#
+# Create decrypted book payload
+#
+
+def createDecryptedPayload(payload):
+ for headerRecord in bookHeaderRecords:
+ name = headerRecord
+ if name != "dkey" :
+ ext = '.dat'
+ if name == 'img' : ext = '.jpg'
+ for index in range (0,len(bookHeaderRecords[name])) :
+ fnum = "%04d" % index
+ fname = name + fnum + ext
+ destdir = payload
+ if name == 'img':
+ destdir = os.path.join(payload,'img')
+ if name == 'page':
+ destdir = os.path.join(payload,'page')
+ if name == 'glyphs':
+ destdir = os.path.join(payload,'glyphs')
+ outputFile = os.path.join(destdir,fname)
+ file(outputFile, 'wb').write(getBookPayloadRecord(name, index))
+
+
+# Create decrypted book
+#
+
+def createDecryptedBook(outdir):
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+
+ destdir = os.path.join(outdir,'img')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ destdir = os.path.join(outdir,'page')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ destdir = os.path.join(outdir,'glyphs')
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ createDecryptedPayload(outdir)
+
+
+#
+# Set the command to execute by the programm according to cmdLine parameters
+#
+
+def setCommand(name) :
+ global command
+ if command != "" :
+ raise CMBDTCFatal("Invalid command line parameters")
+ else :
+ command = name
+
+#
+# Program usage
+#
+
+def usage():
+ print("\nUsage:")
+ print("\ncmbtc_dump_linux.py [options] bookFileName\n")
+ print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)")
+ print("-d Dumps the unencrypted book as files to outdir")
+ print("-o Output directory to save book files to")
+ print("-v Verbose (can be used several times)")
+
+
+#
+# Main
+#
+
+def main(argv=sys.argv):
+ global bookMetadata
+ global bookKey
+ global bookFile
+ global command
+
+ progname = os.path.basename(argv[0])
+
+ verbose = 0
+ recordName = ""
+ recordIndex = 0
+ outdir = ""
+ PIDs = []
+ command = ""
+
+ # Preloads your Kindle pid from the top of the program.
+ PIDs.append(kindlePID)
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "vo:p:d")
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(2)
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ sys.exit(2)
+
+ for o, a in opts:
+ if o == "-v":
+ verbose+=1
+ if o =="-o":
+ if a == None :
+ raise CMBDTCFatal("Invalid parameter for -o")
+ outdir = a
+ if o =="-p":
+ PIDs.append(a)
+ if o =="-d":
+ setCommand("doit")
+
+ if command == "" :
+ raise CMBDTCFatal("No action supplied on command line")
+
+ #
+ # Open book and parse metadata
+ #
+
+ if len(args) == 1:
+
+ bookFile = openBook(args[0])
+ parseTopazHeader()
+ parseMetadata()
+
+ #
+ # Decrypt book key
+ #
+
+ dkey = getBookPayloadRecord('dkey', 0)
+
+ bookKeys = []
+ for PID in PIDs :
+ bookKeys+=decryptDkeyRecords(dkey,PID)
+
+ if len(bookKeys) == 0 :
+ if verbose > 0 :
+ print ("Book key could not be found. Maybe this book is not registered with this device.")
+ else :
+ bookKey = bookKeys[0]
+ if verbose > 0:
+ print("Book key: " + bookKey.encode('hex'))
+
+
+
+ if command == "printRecord" :
+ extractBookPayloadRecord(recordName,int(recordIndex),outputFile)
+ if outputFile != "" and verbose>0 :
+ print("Wrote record to file: "+outputFile)
+ elif command == "doit" :
+ if outdir != "" :
+ createDecryptedBook(outdir)
+ if verbose >0 :
+ print ("Decrypted book saved. Don't pirate!")
+ elif verbose > 0:
+ print("Output directory name was not supplied.")
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
class DocParser(object):
def __init__(self, flatxml, classlst, fileid):
self.id = os.path.basename(fileid).replace('.dat','')
- self.flatdoc = flatxml.split('\n')
+ self.docList = flatxml.split('\n')
+ self.docSize = len(self.docList)
self.classList = {}
tmpList = classlst.split('\n')
for pclass in tmpList:
self.paracont_stemid = []
self.parastems_stemid = []
- # find tag if within pos to end inclusive
+ # return tag at line pos in document
def lineinDoc(self, pos) :
- docList = self.flatdoc
- cnt = len(docList)
- if (pos >= 0) and (pos < cnt) :
- item = docList[pos]
+ if (pos >= 0) and (pos < self.docSize) :
+ item = self.docList[pos]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
return name, argres
- # find tag if within pos to end inclusive
+ # find tag in doc if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
- docList = self.flatdoc
- cnt = len(docList)
if end == -1 :
- end = cnt
+ end = self.docSize
else:
- end = min(cnt,end)
+ end = min(self.docSize, end)
foundat = -1
for j in xrange(pos, end):
- item = docList[j]
+ item = self.docList[j]
if item.find('=') >= 0:
- (name, argres) = item.split('=')
+ (name, argres) = item.split('=',1)
else :
name = item
argres = ''
result = []
- # normal paragraph
+ # paragraph
(pos, pclass) = self.findinDoc('paragraph.class',start,end)
# class names are an issue given topaz may start them with numerals (not allowed),
# from a base class (but then not actually provide all of these _reclustereed
# classes in the stylesheet!
- # so we clean this up by lowercasing, prepend 'cl_', and getting any baseclass
+ # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
# that exists in the stylesheet first, and then adding this specific class
# after
- classres = ''
- pclass = pclass.lower()
- pclass = 'cl-' + pclass
- p = pclass.find('_')
- if p > 0 :
- baseclass = pclass[0:p]
- if baseclass in self.classList:
- classres += baseclass + ' '
- classres += pclass
- pclass = classres
+ if pclass != None :
+ classres = ''
+ pclass = pclass.lower()
+ pclass = 'cl-' + pclass
+ p = pclass.find('_')
+ if p > 0 :
+ baseclass = pclass[0:p]
+ if baseclass in self.classList:
+ classres += baseclass + ' '
+ classres += pclass
+ pclass = classres
# build up a description of the paragraph in result and return it
# first check for the basic - all words paragraph
# if end is -1 then we must search to end of document
if end == -1 :
- docList = self.flatdoc
- cnt = len(docList)
- end = cnt
+ end = self.docSize
while (line < end) :
return pclass, result
- def buildParagraph(self, cname, pdesc, type, regtype) :
+ def buildParagraph(self, pclass, pdesc, type, regtype) :
parares = ''
sep =''
- br_lb = False
- if (regtype == 'fixed') or (regtype == 'chapterheading'):
- br_lb = True
+ classres = ''
+ if pclass :
+ classres = ' class="' + pclass + '"'
- handle_links = False
- if len(self.link_id) > 0:
- handle_links = True
+ br_lb = (regtype == 'fixed') or (regtype == 'chapterheading')
+ handle_links = len(self.link_id) > 0
+
if (type == 'full') or (type == 'begin') :
- parares += '<p class="' + cname + '">'
+ parares += '<p' + classres + '>'
if (type == 'end'):
parares += ' '
if word == '_link_' : word = ''
if word == '_lb_':
- if (num-1) in self.dehyphen_rootid :
- word = ''
- sep = ''
- elif handle_links :
+ if ((num-1) in self.dehyphen_rootid ) or handle_links:
word = ''
sep = ''
elif br_lb :
htmlpage = ''
- # first collect information from the xml doc that describes this page
+ # get the ocr text
(pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
if argres : self.ocrtext = argres.split('|')
+ # get information to dehyphenate the text
(pos, argres) = self.findinDoc('info.dehyphen.rootID',0,-1)
if argres:
argList = argres.split('|')
self.dehyphen_rootid = [ int(strval) for strval in argList]
+ # determine if first paragraph is continued from previous page
(pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
- if self.parastems_stemid == None : self.parastems_stemid = []
-
+ first_para_continued = (self.parastems_stemid != None)
+
+ # determine if last paragraph is continued onto the next page
(pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
- if self.paracont_stemid == None : self.paracont_stemid = []
-
+ last_para_continued = (self.paracont_stemid != None)
+ # collect link ids
(pos, argres) = self.findinDoc('info.word.link_id',0,-1)
if argres:
argList = argres.split('|')
self.link_id = [ int(strval) for strval in argList]
+ # collect link destination page numbers
(pos, argres) = self.findinDoc('info.links.page',0,-1)
if argres :
argList = argres.split('|')
self.link_page = [ int(strval) for strval in argList]
+ # collect link titles
(pos, argres) = self.findinDoc('info.links.title',0,-1)
if argres :
self.link_title = argres.split('|')
else:
self.link_title.append('')
+
+ # get page type
(pos, pagetype) = self.findinDoc('page.type',0,-1)
# generate a list of each region starting point
# each region has one paragraph,, or one image, or one chapterheading
+
regionList= self.posinDoc('region')
regcnt = len(regionList)
regionList.append(-1)
# process each region tag and convert what you can to html
for j in xrange(regcnt):
+
start = regionList[j]
end = regionList[j+1]
(pos, regtype) = self.findinDoc('region.type',start,end)
+ # set anchor for link target on this page
+ if not anchorSet and not first_para_continued:
+ htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
+ anchorSet = True
+
if regtype == 'graphic' :
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
elif regtype == 'chapterheading' :
(pclass, pdesc) = self.getParaDescription(start,end)
if not breakSet:
htmlpage += '<div style="page-break-after: always;"> </div>\n'
breakSet = True
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
tag = 'h1'
- if pclass[3:7] == 'ch1-' : tag = 'h1'
- if pclass[3:7] == 'ch2-' : tag = 'h2'
- if pclass[3:7] == 'ch3-' : tag = 'h3'
- htmlpage += '<' + tag + ' class="' + pclass + '">'
+ if pclass and (len(pclass) >= 7):
+ if pclass[3:7] == 'ch1-' : tag = 'h1'
+ if pclass[3:7] == 'ch2-' : tag = 'h2'
+ if pclass[3:7] == 'ch3-' : tag = 'h3'
+ htmlpage += '<' + tag + ' class="' + pclass + '">'
+ else:
+ htmlpage += '<' + tag + '>'
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
htmlpage += '</' + tag + '>'
+
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem') :
ptype = 'full'
# check to see if this is a continution from the previous page
- if (len(self.parastems_stemid) > 0):
+ if first_para_continued :
ptype = 'end'
- self.parastems_stemid=[]
- else:
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
+ first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
- if ptype == 'full' :
+ if pclass and (len(pclass) >= 6) and (ptype == 'full'):
tag = 'p'
if pclass[3:6] == 'h1-' : tag = 'h4'
if pclass[3:6] == 'h2-' : tag = 'h5'
else :
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
elif (regtype == 'tocentry') :
ptype = 'full'
- # check to see if this is a continution from the previous page
- if (len(self.parastems_stemid) > 0) and (j == 0):
- # process the first paragraph as a continuation from the last page
+ if first_para_continued :
ptype = 'end'
- self.parastems_stemid = []
- else:
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
+ first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
else :
print 'Warning: Unknown region type', regtype
(pos, temp) = self.findinDoc('paragraph',start,end)
regtype = 'fixed'
ptype = 'full'
# check to see if this is a continution from the previous page
- if (len(self.parastems_stemid) > 0):
+ if first_para_continued :
ptype = 'end'
- self.parastems_stemid=[]
- else:
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
+ first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
- if ptype == 'full' :
+ if pclass and (ptype == 'full') and (len(pclass) >= 6):
tag = 'p'
if pclass[3:6] == 'h1-' : tag = 'h4'
if pclass[3:6] == 'h2-' : tag = 'h5'
else :
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
else :
- print 'Treating this like a "image" region'
- if not anchorSet:
- htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
- anchorSet = True
+ print 'Treating this like a "graphic" region'
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
- if len(self.paracont_stemid) > 0 :
+
+ if last_para_continued :
if htmlpage[-4:] == '</p>':
- htmlpage = htmlpage[0:-4]
+ htmlpage = htmlpage[0:-4]
+ last_para_continued = False
return htmlpage
- return self.convert2HTML()
-
-
def convert2HTML(flatxml, classlst, fileid):
clarknova - for all of the svg and glyph generation and many other bug fixes and improvements
skindle - for figuing out the general case for the mode loops
some updates - for conversion to xml, basic html
- DiapDealer - for extensive testing and feedback
+ DiapDealer - for extensive testing and feedback, and standalone linux/macosx version of cmbtc_dump
stewball - for extensive testing and feedback
and others for posting, feedback and testing
genhtml.py - main program to generate "book.html"
gensvg.py - (author: clarknova) main program to create an svg grpahic of each page
+
+In addition there is now a new file:
+
+cmbtc_dump_mac_linux.py
+
+If you know the pid of your ipod and/or your standalone Kindle and your book
+was meant for that device, you can use this program to dump the proper sections
+on Mac OSX and Linux (and even Windows if you do not have Kindle4PC installed).
+Thank DiapDealer for creating it!
+
+
Please note, gensvg.py, genhtml.py, and genxml.py import and use
decode_meta.py, convert2xml.py, flatxml2html.py, getpagedim.py and stylexml2css.py
so please keep all of these python scripts together in the same place.