topazscripts 2.0

author some_updates <some_updates@gmail.com>

Tue, 27 Jan 2009 12:20:37 +0000 (12:20 +0000)

committer Apprentice Alf <apprenticealf@gmail.com>

Sat, 28 Feb 2015 12:20:58 +0000 (12:20 +0000)
author some_updates <some_updates@gmail.com>
Tue, 27 Jan 2009 12:20:37 +0000 (12:20 +0000)
committer Apprentice Alf <apprenticealf@gmail.com>
Sat, 28 Feb 2015 12:20:58 +0000 (12:20 +0000)
diff --git a/Topaz_Tools/lib/changes.txt b/Topaz_Tools/lib/changes.txt

index 83910c00ba0e76988df668cd5e1c1e60b8f6c7bf..125a869f15e74c8f6d2fdf9641d40c3dbafa1363 100644 (file)
--- a/Topaz_Tools/lib/changes.txt
+++ b/Topaz_Tools/lib/changes.txt
@@ -1,17 +1,29 @@
-Changes in version 1.8
+Changes in version 2.0
+
+       - gensvg.py now accepts two options
+             -x : output browseable XHTML+SVG pages (default)
+            -r : output raw SVG images (useful for later conversion to pdf)
+         
+       - flatxml2html.py now understands page.groups of type graphic
+            and handles vertical regions as svg images
+
+       - genhtml.py now accepts an option
+            --fixed-image : which will force the conversion
+                            of all fixed regions to svg images
+
+       - minor bug fixes and html conversion improvements
+
  
+Changes in version 1.8
         - gensvg.py now builds wonderful xhtml pages with embedded svg 
             that can be easily paged through as if reading a book!
             (tested in Safari for Mac and Win and Firefox)
             (requires javascript to be enabled)
-
         - genhtml.py now REQUIRES that gensvg.py be run FIRST
              this allows create of images on the fly from glyphs
-
         - genhtml.py now automatically makes tables of words into svg
              based images and will handle glyph based ornate first 
              letters of words
-
         - cmbtc_dump_mac_linux.py has been renamed to be
              cmbtc_dump_nonK4PC.py to make it clearer
              when it needs to be used
diff --git a/Topaz_Tools/lib/cmbtc_dump.py b/Topaz_Tools/lib/cmbtc_dump.py

index ac7e33c390e34a443409b86d1918884fe9656247..de0ecf7abda93dd8a98fb73dfd7488a979d8d97a 100644 (file)
--- a/Topaz_Tools/lib/cmbtc_dump.py
+++ b/Topaz_Tools/lib/cmbtc_dump.py
@@ -1,5 +1,5 @@
  #! /usr/bin/python
-# For use in Topaz Scripts version 1.8
+# For use in Topaz Scripts version 2.0
  
  """
  
diff --git a/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py b/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py

index ed7ff877e2c6fea73fe02a8333638190b1e12021..5e43ae6b0ec1d261280bb43ff2d44d798e7d1721 100644 (file)
--- a/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
+++ b/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
@@ -1,5 +1,5 @@
  #! /usr/bin/python
-# For use with Topaz Scripts Version 1.8
+# For use with Topaz Scripts Version 2.0
  
  from __future__ import with_statement
  
diff --git a/Topaz_Tools/lib/convert2xml.py b/Topaz_Tools/lib/convert2xml.py

index 12ca9340941e17119e4733638e921af2ebf71fc5..4e841847fa477de0318ce7ffeffd97dc8138ec21 100644 (file)
--- a/Topaz_Tools/lib/convert2xml.py
+++ b/Topaz_Tools/lib/convert2xml.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  from __future__ import with_statement
  import csv
diff --git a/Topaz_Tools/lib/decode_meta.py b/Topaz_Tools/lib/decode_meta.py

index ba831ec25c3056f47b80524b47d016c42b7a1f60..9f58a53277b03e359f10279bc6641507a5aee525 100644 (file)
--- a/Topaz_Tools/lib/decode_meta.py
+++ b/Topaz_Tools/lib/decode_meta.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  from __future__ import with_statement
  import csv
diff --git a/Topaz_Tools/lib/flatxml2html.py b/Topaz_Tools/lib/flatxml2html.py

index 4182502585d15a89a99571dae74544aa1166f782..eaeeabe1cfdcfd7b6e68b82a128d07431f2b1f4c 100644 (file)
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  from __future__ import with_statement
  import csv
@@ -13,7 +13,7 @@ from struct import unpack
  
  
  class DocParser(object):
-    def __init__(self, flatxml, classlst, fileid, bookDir):
+    def __init__(self, flatxml, classlst, fileid, bookDir, fixedimage):
          self.id = os.path.basename(fileid).replace('.dat','')
          self.svgcount = 0
          self.docList = flatxml.split('\n')
@@ -28,6 +28,7 @@ class DocParser(object):
                  # remove the leading period from the css name
                  cname = pclass[1:]
              self.classList[cname] = True
+        self.fixedimage = fixedimage
          self.ocrtext = []
          self.link_id = []
          self.link_title = []
@@ -63,7 +64,7 @@ class DocParser(object):
          imgname = self.id + '_%04d.svg' % self.svgcount
          imgfile = os.path.join(imgDir,imgname)
  
-        # build hash table of glyph paths keyed by glyph id
+        # build hashtable of glyph paths keyed by glyph id
          if self.numPaths == 0:
              gfile = open(glyfile, 'r')
              while True:
@@ -194,15 +195,9 @@ class DocParser(object):
          return argres
  
  
-
-    # build a description of the paragraph
-    def getParaDescription(self, start, end):
-
-        result = []
-
-        # paragraph
-        (pos, pclass) = self.findinDoc('paragraph.class',start,end) 
-
+    # get the class
+    def getClass(self, pclass):
+        nclass = pclass
          # class names are an issue given topaz may start them with numerals (not allowed),
          # use a mix of cases (which cause some browsers problems), and actually
          # attach numbers after "_reclustered*" to the end to deal classeses that inherit
@@ -212,17 +207,85 @@ class DocParser(object):
          # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
          # that exists in the stylesheet first, and then adding this specific class
          # after
-        if pclass != None :
+        if nclass != None :
              classres = ''
-            pclass = pclass.lower()
-            pclass = 'cl-' + pclass
-            p = pclass.find('_')
-            if p > 0 :
-                baseclass = pclass[0:p]
-                if baseclass in self.classList:
-                    classres += baseclass + ' '
-            classres += pclass
-            pclass = classres
+            nclass = nclass.lower()
+            nclass = 'cl-' + nclass
+            baseclass = ''
+            # graphic is the base class for captions
+            if nclass.find('cl-cap-') >=0 :
+                classres = 'graphic' + ' '
+            else :
+                # strip to find baseclass
+                p = nclass.find('_')
+                if p > 0 :
+                    baseclass = nclass[0:p]
+                    if baseclass in self.classList:
+                        classres += baseclass + ' '
+            classres += nclass
+            nclass = classres
+        return nclass
+
+
+    # develop a sorted description of the starting positions of 
+    # groups and regions on the page, as well as the page type
+    def PageDescription(self):
+
+        def compare(x, y):
+            (xtype, xval) = x
+            (ytype, yval) = y
+            if xval > yval:
+                return 1
+            if xval == yval:
+                return 0
+            return -1
+
+        result = []
+        (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+        groupList = self.posinDoc('page.group')
+        groupregionList = self.posinDoc('page.group.region')
+        pageregionList = self.posinDoc('page.region')
+        # integrate into one list
+        for j in groupList:
+            result.append(('grpbeg',j))
+        for j in groupregionList:
+            result.append(('gregion',j))
+        for j in pageregionList:
+            result.append(('pregion',j))
+        result.sort(compare)
+
+        # insert group end and page end indicators
+        inGroup = False
+        j = 0
+        while True:
+            if j == len(result): break
+            rtype = result[j][0]
+            rval = result[j][1]
+            if not inGroup and (rtype == 'grpbeg') :
+                inGroup = True
+                j = j + 1
+            elif inGroup and (rtype in ('grpbeg', 'pregion')):
+                result.insert(j,('grpend',rval))
+                inGroup = False
+            else:
+                j = j + 1
+        if inGroup:
+            result.append(('grpend',-1))
+        result.append(('pageend', -1))
+        return pagetype, result
+
+
+
+    # build a description of the paragraph
+    def getParaDescription(self, start, end, regtype):
+
+        result = []
+
+        # paragraph
+        (pos, pclass) = self.findinDoc('paragraph.class',start,end) 
+
+        pclass = self.getClass(pclass)
  
          # build up a description of the paragraph in result and return it
          # first check for the  basic - all words paragraph
@@ -231,13 +294,49 @@ class DocParser(object):
          if (sfirst != None) and (slast != None) :
              first = int(sfirst)
              last = int(slast)
-            for wordnum in xrange(first, last):
-                result.append(('ocr', wordnum))
+            
+            makeImage = (regtype == 'vertical') or (regtype == 'table')
+            if self.fixedimage:
+                makeImage = makeImage or (regtype == 'fixed')
+
+            if (pclass != None): 
+                makeImage = makeImage or (pclass.find('.inverted') >= 0)
+                if self.fixedimage :
+                    makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+
+            if not makeImage :
+                # standard all word paragraph
+                for wordnum in xrange(first, last):
+                    result.append(('ocr', wordnum))
+                return pclass, result
+
+            # convert paragraph to svg image
+            # translate first and last word into first and last glyphs
+            # and generate inline image and include it
+            glyphList = []
+            firstglyphList = self.getData('word.firstGlyph',0,-1)
+            gidList = self.getData('info.glyph.glyphID',0,-1)
+            firstGlyph = firstglyphList[first]
+            if last < len(firstglyphList):
+                lastGlyph = firstglyphList[last]
+            else :
+                lastGlyph = len(gidList)
+            for glyphnum in xrange(firstGlyph, lastGlyph):
+                glyphList.append(glyphnum)
+            # include any extratokens if they exist
+            (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
+            (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+            if (sfg != None) and (slg != None):
+                for glyphnum in xrange(int(sfg), int(slg)):
+                    glyphList.append(glyphnum)
+            num = self.svgcount
+            self.glyphs_to_image(glyphList)
+            self.svgcount += 1
+            result.append(('svg', num))
              return pclass, result
  
-        # this type of paragrph may be made up of multiple _spans, inline 
-        # word monograms (images) and words with semantic meaning
-        # and now a new type "span" versus the old "_span"
+        # this type of paragrph may be made up of multiple spans, inline 
+        # word monograms (images), and words with semantic meaning, 
          # plus glyphs used to form starting letter of first word
          
          # need to parse this type line by line
@@ -252,6 +351,7 @@ class DocParser(object):
  
              (name, argres) = self.lineinDoc(line)
  
+            # handle both span and _span
              if name.endswith('span.firstWord') :
                  first = int(argres)
                  (name, argres) = self.lineinDoc(line+1)
@@ -422,148 +522,78 @@ class DocParser(object):
          else:
              self.link_title.append('')
  
-
-        # get page type
-        (pos, pagetype) = self.findinDoc('page.type',0,-1)
-
-
-        # generate a list of each region starting point
-        # each region has one paragraph,, or one image, or one chapterheading
-
-        regionList= self.posinDoc('region')
-        regcnt = len(regionList)
-        regionList.append(-1)
+        # get a descriptions of the starting points of the regions
+        # and groups on the page
+        (pagetype, pageDesc) = self.PageDescription() 
+        regcnt = len(pageDesc) - 1
  
          anchorSet = False
          breakSet = False
-
-        # process each region tag and convert what you can to html
+        inGroup = False
+        
+        # process each region on the page and convert what you can to html
  
          for j in xrange(regcnt):
  
-            start = regionList[j]
-            end = regionList[j+1]
-
-            (pos, regtype) = self.findinDoc('region.type',start,end)
+            (etype, start) = pageDesc[j]
+            (ntype, end) = pageDesc[j+1]
+            
  
              # set anchor for link target on this page
              if not anchorSet and not first_para_continued:
-                htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' + self.id + '" title="pagetype_' + pagetype + '"></div>\n'
+                htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' 
+                htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
                  anchorSet = True
  
-            if regtype == 'graphic' :
-                (pos, simgsrc) = self.findinDoc('img.src',start,end)
-                if simgsrc:
-                    htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
-
+            # handle groups of graphics with text captions
+            if (etype == 'grpbeg'):
+                (pos, grptype) = self.findinDoc('group.type', start, end)
+                if grptype != None:
+                    if grptype == 'graphic':
+                        gcstr = ' class="' + grptype + '"'
+                        htmlpage += '<div' + gcstr + '>'
+                        inGroup = True
+                
+            elif (etype == 'grpend'):
+                if inGroup:
+                    htmlpage += '</div>\n'
+                    inGroup = False
+
+            else:
+                (pos, regtype) = self.findinDoc('region.type',start,end)
+
+                if regtype == 'graphic' :
+                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                    if simgsrc:
+                        if inGroup:
+                            htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
+                        else:
+                            htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
              
-            elif regtype == 'chapterheading' :
-                (pclass, pdesc) = self.getParaDescription(start,end)
-                if not breakSet:
-                    htmlpage += '<div style="page-break-after: always;">&nbsp;</div>\n'
-                    breakSet = True
-                tag = 'h1'
-                if pclass and (len(pclass) >= 7):
-                    if pclass[3:7] == 'ch1-' : tag = 'h1'
-                    if pclass[3:7] == 'ch2-' : tag = 'h2'
-                    if pclass[3:7] == 'ch3-' : tag = 'h3'
-                    htmlpage += '<' + tag + ' class="' + pclass + '">'
-                else:
-                    htmlpage += '<' + tag + '>'
-                htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
-                htmlpage += '</' + tag + '>'
-
-
-            elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
-                ptype = 'full'
-                # check to see if this is a continution from the previous page
-                if first_para_continued :
-                    ptype = 'end'
-                    first_para_continued = False
-                (pclass, pdesc) = self.getParaDescription(start,end)
-                if pclass and (len(pclass) >= 6) and (ptype == 'full'):
-                    tag = 'p'
-                    if pclass[3:6] == 'h1-' : tag = 'h4'
-                    if pclass[3:6] == 'h2-' : tag = 'h5'
-                    if pclass[3:6] == 'h3-' : tag = 'h6'
-                    htmlpage += '<' + tag + ' class="' + pclass + '">'
+                elif regtype == 'chapterheading' :
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    if not breakSet:
+                        htmlpage += '<div style="page-break-after: always;">&nbsp;</div>\n'
+                        breakSet = True
+                    tag = 'h1'
+                    if pclass and (len(pclass) >= 7):
+                        if pclass[3:7] == 'ch1-' : tag = 'h1'
+                        if pclass[3:7] == 'ch2-' : tag = 'h2'
+                        if pclass[3:7] == 'ch3-' : tag = 'h3'
+                        htmlpage += '<' + tag + ' class="' + pclass + '">'
+                    else:
+                        htmlpage += '<' + tag + '>'
                      htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
                      htmlpage += '</' + tag + '>'
-                else :
-                    htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-
-
-            elif (regtype == 'tocentry') :
-                ptype = 'full'
-                if first_para_continued :
-                    ptype = 'end'
-                    first_para_continued = False
-                (pclass, pdesc) = self.getParaDescription(start,end)
-                htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-
-
-            elif (regtype == 'vertical') :
-                ptype = 'full'
-                if first_para_continued :
-                    ptype = 'end'
-                    first_para_continued = False
-                (pclass, pdesc) = self.getParaDescription(start,end)
-                htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-
-
-            elif (regtype == 'table') :
-                # translate first and last word into first and last glyphs
-                # and generate table as an image and include a link to it
-                glyphList = []
-                (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
-                (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
-                firstglyphList = self.getData('word.firstGlyph',0,-1)
-                gidList = self.getData('info.glyph.glyphID',0,-1)
-                if (sfirst != None) and (slast != None) :
-                    first = int(sfirst)
-                    last = int(slast)
-                    firstGlyph = firstglyphList[first]
-                    if last < len(firstglyphList):
-                        lastGlyph = firstglyphList[last]
-                    else :
-                        lastGlyph = len(gidList)
-                    for glyphnum in xrange(firstGlyph, lastGlyph):
-                        glyphList.append(glyphnum)
-                    num = self.svgcount
-                    self.glyphs_to_image(glyphList)
-                    self.svgcount += 1
-                    htmlpage += '<div class="graphic"><img src="img/' + self.id + '_%04d.svg" alt="" /></div>' % num
-                else :
-                    ptype = 'full'
-                    if first_para_continued :
-                        ptype = 'end'
-                        first_para_continued = False
-                        (pclass, pdesc) = self.getParaDescription(start,end)
-                        htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-                        print " "
-                        print "Warning: - Table Conversions are notoriously poor"
-                        print "    Strongly recommend taking a screen capture image of the "
-                        print "    table in %s.svg and using it to replace this attempt at a table" % self.id
-                        print " "
  
-            elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
-                (pos, simgsrc) = self.findinDoc('img.src',start,end)
-                if simgsrc:
-                    htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
-
-            else :
-                print 'Warning: region type', regtype
-                (pos, temp) = self.findinDoc('paragraph',start,end)
-                if pos != -1:
-                    print '   is a "text" region'
-                    regtype = 'fixed'
+                elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
                      ptype = 'full'
                      # check to see if this is a continution from the previous page
                      if first_para_continued :
                          ptype = 'end'
                          first_para_continued = False
-                    (pclass, pdesc) = self.getParaDescription(start,end)
-                    if pclass and (ptype == 'full') and (len(pclass) >= 6):
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    if pclass and (len(pclass) >= 6) and (ptype == 'full'):
                          tag = 'p'
                          if pclass[3:6] == 'h1-' : tag = 'h4'
                          if pclass[3:6] == 'h2-' : tag = 'h5'
@@ -573,12 +603,60 @@ class DocParser(object):
                          htmlpage += '</' + tag + '>'
                      else :
                          htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-                else :
-                    print '    is a "graphic" region'
+
+                elif (regtype == 'tocentry') :
+                    ptype = 'full'
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+                elif (regtype == 'vertical') or (regtype == 'table') :
+                    ptype = 'full'
+                    if inGroup:
+                        ptype = 'middle'
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start, end, regtype)
+                    htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+                elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
                      (pos, simgsrc) = self.findinDoc('img.src',start,end)
                      if simgsrc:
                          htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
  
+                else :
+                    print 'Warning: region type', regtype
+                    (pos, temp) = self.findinDoc('paragraph',start,end)
+                    if pos != -1:
+                        print '   is a "text" region'
+                        regtype = 'fixed'
+                        ptype = 'full'
+                        # check to see if this is a continution from the previous page
+                        if first_para_continued :
+                            ptype = 'end'
+                            first_para_continued = False
+                        (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                        if pclass and (ptype == 'full') and (len(pclass) >= 6):
+                            tag = 'p'
+                            if pclass[3:6] == 'h1-' : tag = 'h4'
+                            if pclass[3:6] == 'h2-' : tag = 'h5'
+                            if pclass[3:6] == 'h3-' : tag = 'h6'
+                            htmlpage += '<' + tag + ' class="' + pclass + '">'
+                            htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+                            htmlpage += '</' + tag + '>'
+                        else :
+                            htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+                    else :
+                        print '    is a "graphic" region'
+                        (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                        if simgsrc:
+                            htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
  
          if last_para_continued :
              if htmlpage[-4:] == '</p>':
@@ -589,10 +667,10 @@ class DocParser(object):
  
  
  
-def convert2HTML(flatxml, classlst, fileid, bookDir):
+def convert2HTML(flatxml, classlst, fileid, bookDir, fixedimage):
  
      # create a document parser
-    dp = DocParser(flatxml, classlst, fileid, bookDir)
+    dp = DocParser(flatxml, classlst, fileid, bookDir, fixedimage)
  
      htmlpage = dp.process()
  
diff --git a/Topaz_Tools/lib/genhtml.py b/Topaz_Tools/lib/genhtml.py

index 58d9e9ae50847ba63c7c5a4fb13ea28a72e4a22b..5fcdd3851bd3aa19c9d6ee4a212102cbd2b0b010 100644 (file)
--- a/Topaz_Tools/lib/genhtml.py
+++ b/Topaz_Tools/lib/genhtml.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  import os, sys, getopt
  
@@ -14,13 +14,16 @@ import getpagedim
  def usage():
      print 'Usage: '
      print ' '
-    print '   genhtml.py unencryptedBookDir'
+    print '   genhtml.py [--fixed-image] unencryptedBookDir'
+    print '  '
+    print '  Options:  '
+    print '     --fixed-image   : force translation of fixed regions into svg images '
      print '  '
-
  
  
  def main(argv):
      bookDir = ''
+    fixedimage = False
  
      if len(argv) == 0:
          argv = sys.argv
@@ -28,7 +31,7 @@ def main(argv):
          argv = argv.split()
  
      try:
-        opts, args = getopt.getopt(argv[1:], "h:")
+        opts, args = getopt.getopt(argv[1:], "h:",["fixed-image"])
  
      except getopt.GetoptError, err:
          print str(err)
@@ -43,6 +46,8 @@ def main(argv):
          if o =="-h":
              usage()
              sys.exit(0)
+        if o =="--fixed-image":
+            fixedimage = True
  
      bookDir = args[0]
  
@@ -139,7 +144,7 @@ def main(argv):
          print '     ', filename
          fname = os.path.join(pageDir,filename)
          flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname) 
-        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir)
+        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, fixedimage)
  
      htmlstr += '</body>\n</html>\n'
  
diff --git a/Topaz_Tools/lib/gensvg.py b/Topaz_Tools/lib/gensvg.py

index fce15b260746e90b30e81dc72d2a2492fd12b186..ea198e4683f344e389bb40ac291bbce26eb07e58 100644 (file)
--- a/Topaz_Tools/lib/gensvg.py
+++ b/Topaz_Tools/lib/gensvg.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  import os, sys, getopt
  
diff --git a/Topaz_Tools/lib/genxml.py b/Topaz_Tools/lib/genxml.py

index cfc53259eea35fe10b4f1015d3e679f9dc782596..299dde2ffc203c7e5976773d4f751c616b355a92 100644 (file)
--- a/Topaz_Tools/lib/genxml.py
+++ b/Topaz_Tools/lib/genxml.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  import os, sys, getopt
  
diff --git a/Topaz_Tools/lib/getpagedim.py b/Topaz_Tools/lib/getpagedim.py

index 33c722a5606488ff11ef8f98cf51fa609ba44553..b2f66c00194deb7f4df2a01efbc27cb38d3346c3 100644 (file)
--- a/Topaz_Tools/lib/getpagedim.py
+++ b/Topaz_Tools/lib/getpagedim.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  from __future__ import with_statement
  import csv
diff --git a/Topaz_Tools/lib/readme.txt b/Topaz_Tools/lib/readme.txt

index eca30f056cc9a819a0da0f8a6b2c8b8284e4d064..6bc17e411d431cb9b80bc311d4cdc4b39a77560c 100644 (file)
--- a/Topaz_Tools/lib/readme.txt
+++ b/Topaz_Tools/lib/readme.txt
@@ -31,9 +31,8 @@ genhtml.py - main program to generate "book.html"
  gensvg.py - (author: clarknova) main program to create an xhmtl page with embedded svg graphics
  
  
-Please note, gensvg.py, genhtml.py, and genxml.py import and use
-decode_meta.py, convert2xml.py, flatxml2html.py, getpagedim.py and stylexml2css.py 
-so please keep all of these python scripts together in the same place.
+Please note, these scripts all import code from each other so please
+keep all of these python scripts together in the same place.
  
  
  
@@ -78,6 +77,12 @@ The step must NOW be done BEFORE attempting conversion to html
  When complete, use a web-browser to open the page*.xhtml files
  in TARGETDIR/svg/ to see what the book really looks like.
  
+If you would prefer pure svg pages, then use the -r option
+as follows:
+
+   gensvg.py -r TARGETDIR
+
+
  All thanks go to CLARKNOVA for this program.  This program is 
  needed to actually see the true image of each page and so that
  the next step can properly create images from glyphs for 
@@ -97,6 +102,16 @@ properly set bold and/or italics, handle font size changes,
  and to fix the sometimes horiffic mistakes in the ocrText
  used to create the html.  
  
+If there critical pages that need fixed layout in your book
+you might want to consider forcing these fixed regions to
+become svg images using the command instead
+
+    genhtml.py --fixed-image TARGETDIR
+
+This will convert all fixed regions into svg images at the 
+expense of increased book size, slower loading speed, and 
+a loss of the ability to search for words in those regions
+
  FYI: Sigil is a wonderful, free cross-
  platform program that can be used to edit the html and 
  create an epub if you so desire.
diff --git a/Topaz_Tools/lib/stylexml2css.py b/Topaz_Tools/lib/stylexml2css.py

index 791d0674b373b4b927a240b4fbba3ee88829c089..f0d07d77b717245b9449eb91a7ef1d1ba2ae59f6 100644 (file)
--- a/Topaz_Tools/lib/stylexml2css.py
+++ b/Topaz_Tools/lib/stylexml2css.py
@@ -1,6 +1,6 @@
  #! /usr/bin/python
  # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 1.8                                                                                                  
+# For use with Topaz Scripts Version 2.0
  
  from __future__ import with_statement
  import csv
author	some_updates <some_updates@gmail.com>
	Tue, 27 Jan 2009 12:20:37 +0000 (12:20 +0000)
committer	Apprentice Alf <apprenticealf@gmail.com>
	Sat, 28 Feb 2015 12:20:58 +0000 (12:20 +0000)
Topaz_Tools/lib/changes.txt		patch \| blob \| blame \| history
Topaz_Tools/lib/cmbtc_dump.py		patch \| blob \| blame \| history
Topaz_Tools/lib/cmbtc_dump_nonK4PC.py		patch \| blob \| blame \| history
Topaz_Tools/lib/convert2xml.py		patch \| blob \| blame \| history
Topaz_Tools/lib/decode_meta.py		patch \| blob \| blame \| history
Topaz_Tools/lib/flatxml2html.py		patch \| blob \| blame \| history
Topaz_Tools/lib/genhtml.py		patch \| blob \| blame \| history
Topaz_Tools/lib/gensvg.py		patch \| blob \| blame \| history
Topaz_Tools/lib/genxml.py		patch \| blob \| blame \| history
Topaz_Tools/lib/getpagedim.py		patch \| blob \| blame \| history
Topaz_Tools/lib/readme.txt		patch \| blob \| blame \| history
Topaz_Tools/lib/stylexml2css.py		patch \| blob \| blame \| history