1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-27 13:53:32 +01:00

Initial import

This commit is contained in:
2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions

View File

@@ -0,0 +1,115 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
"""
This package contains logic to read and write LRF files.
The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
"""
from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \
TextStyle, BlockStyle
from calibre.ebooks.lrf.fonts import FONT_FILE_MAP
from calibre.ebooks import ConversionError
__docformat__ = "epytext"
class LRFParseError(Exception):
pass
class PRS500_PROFILE(object):
screen_width = 600
screen_height = 775
dpi = 166
# Number of pixels to subtract from screen_height when calculating height of text area
fudge = 0
font_size = 10 #: Default (in pt)
parindent = 10 #: Default (in pt)
line_space = 1.2 # : Default (in pt)
header_font_size = 6 #: In pt
header_height = 30 # : In px
default_fonts = {'sans': "Swis721 BT Roman", 'mono': "Courier10 BT Roman",
'serif': "Dutch801 Rm BT Roman"}
name = 'prs500'
def find_custom_fonts(options, logger):
from calibre.utils.fonts.scanner import font_scanner
fonts = {'serif' : None, 'sans' : None, 'mono' : None}
def family(cmd):
return cmd.split(',')[-1].strip()
if options.serif_family:
f = family(options.serif_family)
fonts['serif'] = font_scanner.legacy_fonts_for_family(f)
if not fonts['serif']:
logger.warn('Unable to find serif family %s'%f)
if options.sans_family:
f = family(options.sans_family)
fonts['sans'] = font_scanner.legacy_fonts_for_family(f)
if not fonts['sans']:
logger.warn('Unable to find sans family %s'%f)
if options.mono_family:
f = family(options.mono_family)
fonts['mono'] = font_scanner.legacy_fonts_for_family(f)
if not fonts['mono']:
logger.warn('Unable to find mono family %s'%f)
return fonts
def Book(options, logger, font_delta=0, header=None,
profile=PRS500_PROFILE, **settings):
from uuid import uuid4
ps = {}
ps['topmargin'] = options.top_margin
ps['evensidemargin'] = options.left_margin
ps['oddsidemargin'] = options.left_margin
ps['textwidth'] = profile.screen_width - (options.left_margin + options.right_margin)
ps['textheight'] = profile.screen_height - (options.top_margin + options.bottom_margin) \
- profile.fudge
if header:
hdr = Header()
hb = TextBlock(textStyle=TextStyle(align='foot',
fontsize=int(profile.header_font_size*10)),
blockStyle=BlockStyle(blockwidth=ps['textwidth']))
hb.append(header)
hdr.PutObj(hb)
ps['headheight'] = profile.header_height
ps['headsep'] = options.header_separation
ps['header'] = hdr
ps['topmargin'] = 0
ps['textheight'] = profile.screen_height - (options.bottom_margin + ps['topmargin']) \
- ps['headheight'] - ps['headsep'] - profile.fudge
fontsize = int(10*profile.font_size+font_delta*20)
baselineskip = fontsize + 20
fonts = find_custom_fonts(options, logger)
tsd = dict(fontsize=fontsize,
parindent=int(10*profile.parindent),
linespace=int(10*profile.line_space),
baselineskip=baselineskip,
wordspace=10*options.wordspace)
if fonts['serif'] and 'normal' in fonts['serif']:
tsd['fontfacename'] = fonts['serif']['normal'][1]
book = _Book(textstyledefault=tsd,
pagestyledefault=ps,
blockstyledefault=dict(blockwidth=ps['textwidth']),
bookid=uuid4().hex,
**settings)
for family in fonts.keys():
if fonts[family]:
for font in fonts[family].values():
book.embed_font(*font)
FONT_FILE_MAP[font[1]] = font[0]
for family in ['serif', 'sans', 'mono']:
if not fonts[family]:
fonts[family] = {'normal' : (None, profile.default_fonts[family])}
elif 'normal' not in fonts[family]:
raise ConversionError('Could not find the normal version of the ' + family + ' font')
return book, fonts

View File

@@ -0,0 +1,33 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
from PIL import ImageFont
'''
Default fonts used in the PRS500
'''
LIBERATION_FONT_MAP = {
'Swis721 BT Roman' : 'LiberationSans-Regular',
'Dutch801 Rm BT Roman' : 'LiberationSerif-Regular',
'Courier10 BT Roman' : 'LiberationMono-Regular',
}
FONT_FILE_MAP = {}
def get_font(name, size, encoding='unic'):
'''
Get an ImageFont object by name.
@param size: Font height in pixels. To convert from pts:
sz in pixels = (dpi/72) * size in pts
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
@param manager: A dict that will store the PersistentTemporary
'''
if name in LIBERATION_FONT_MAP:
return ImageFont.truetype(P('fonts/liberation/%s.ttf' % LIBERATION_FONT_MAP[name]), size, encoding=encoding)
elif name in FONT_FILE_MAP:
return ImageFont.truetype(FONT_FILE_MAP[name], size, encoding=encoding)

View File

@@ -0,0 +1,10 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
"""
This package contains code to convert HTML ebooks to LRF ebooks.
"""
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"

View File

@@ -0,0 +1,115 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import re
NAME_MAP = {
'aliceblue': '#F0F8FF',
'antiquewhite': '#FAEBD7',
'aqua': '#00FFFF',
'aquamarine': '#7FFFD4',
'azure': '#F0FFFF',
'beige': '#F5F5DC',
'bisque': '#FFE4C4',
'black': '#000000',
'blanchedalmond': '#FFEBCD',
'blue': '#0000FF',
'brown': '#A52A2A',
'burlywood': '#DEB887',
'cadetblue': '#5F9EA0',
'chartreuse': '#7FFF00',
'chocolate': '#D2691E',
'coral': '#FF7F50',
'crimson': '#DC143C',
'cyan': '#00FFFF',
'darkblue': '#00008B',
'darkgoldenrod': '#B8860B',
'darkgreen': '#006400',
'darkkhaki': '#BDB76B',
'darkmagenta': '#8B008B',
'darkolivegreen': '#556B2F',
'darkorange': '#FF8C00',
'darkorchid': '#9932CC',
'darkred': '#8B0000',
'darksalmon': '#E9967A',
'darkslateblue': '#483D8B',
'darkslategrey': '#2F4F4F',
'darkviolet': '#9400D3',
'deeppink': '#FF1493',
'dodgerblue': '#1E90FF',
'firebrick': '#B22222',
'floralwhite': '#FFFAF0',
'forestgreen': '#228B22',
'fuchsia': '#FF00FF',
'gainsboro': '#DCDCDC',
'ghostwhite': '#F8F8FF',
'gold': '#FFD700',
'goldenrod': '#DAA520',
'indianred ': '#CD5C5C',
'indigo ': '#4B0082',
'khaki': '#F0E68C',
'lavenderblush': '#FFF0F5',
'lawngreen': '#7CFC00',
'lightblue': '#ADD8E6',
'lightcoral': '#F08080',
'lightgoldenrodyellow': '#FAFAD2',
'lightgray': '#D3D3D3',
'lightgrey': '#D3D3D3',
'lightskyblue': '#87CEFA',
'lightslategrey': '#778899',
'lightsteelblue': '#B0C4DE',
'lime': '#87CEFA',
'linen': '#FAF0E6',
'magenta': '#FF00FF',
'maroon': '#800000',
'mediumaquamarine': '#66CDAA',
'mediumblue': '#0000CD',
'mediumorchid': '#BA55D3',
'mediumpurple': '#9370D8',
'mediumseagreen': '#3CB371',
'mediumslateblue': '#7B68EE',
'midnightblue': '#191970',
'moccasin': '#FFE4B5',
'navajowhite': '#FFDEAD',
'navy': '#000080',
'oldlace': '#FDF5E6',
'olive': '#808000',
'orange': '#FFA500',
'orangered': '#FF4500',
'orchid': '#DA70D6',
'paleturquoise': '#AFEEEE',
'papayawhip': '#FFEFD5',
'peachpuff': '#FFDAB9',
'powderblue': '#B0E0E6',
'rosybrown': '#BC8F8F',
'royalblue': '#4169E1',
'saddlebrown': '#8B4513',
'sandybrown': '#8B4513',
'seashell': '#FFF5EE',
'sienna': '#A0522D',
'silver': '#C0C0C0',
'skyblue': '#87CEEB',
'slategrey': '#708090',
'snow': '#FFFAFA',
'springgreen': '#00FF7F',
'violet': '#EE82EE',
'yellowgreen': '#9ACD32'
}
hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})')
rgb_pat = re.compile(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
def lrs_color(html_color):
hcol = html_color.lower()
match = hex_pat.search(hcol)
if match:
return '0x00'+match.group(1)+match.group(2)+match.group(3)
match = rgb_pat.search(hcol)
if match:
return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:]
if hcol in NAME_MAP:
return NAME_MAP[hcol].replace('#', '0x00')
return '0x00000000'

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,386 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import math, sys, re, numbers
from calibre.ebooks.lrf.fonts import get_font
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
CharButton, Plot, Paragraph, \
LrsTextTag
from polyglot.builtins import string_or_bytes, range, native_string_type
def ceil(num):
return int(math.ceil(num))
def print_xml(elem):
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
elem = elem.toElement(native_string_type('utf8'))
ew = ElementWriter(elem, sourceEncoding=native_string_type('utf8'))
ew.write(sys.stdout)
print()
def cattrs(base, extra):
new = base.copy()
new.update(extra)
return new
def tokens(tb):
'''
Return the next token. A token is :
1. A string
a block of text that has the same style
'''
def process_element(x, attrs):
if isinstance(x, CR):
yield 2, None
elif isinstance(x, Text):
yield x.text, cattrs(attrs, {})
elif isinstance(x, string_or_bytes):
yield x, cattrs(attrs, {})
elif isinstance(x, (CharButton, LrsTextTag)):
if x.contents:
if hasattr(x.contents[0], 'text'):
yield x.contents[0].text, cattrs(attrs, {})
elif hasattr(x.contents[0], 'attrs'):
for z in process_element(x.contents[0], x.contents[0].attrs):
yield z
elif isinstance(x, Plot):
yield x, None
elif isinstance(x, Span):
attrs = cattrs(attrs, x.attrs)
for y in x.contents:
for z in process_element(y, attrs):
yield z
for i in tb.contents:
if isinstance(i, CR):
yield 1, None
elif isinstance(i, Paragraph):
for j in i.contents:
attrs = {}
if hasattr(j, 'attrs'):
attrs = j.attrs
for k in process_element(j, attrs):
yield k
class Cell(object):
def __init__(self, conv, tag, css):
self.conv = conv
self.tag = tag
self.css = css
self.text_blocks = []
self.pwidth = -1.
if tag.has_attr('width') and '%' in tag['width']:
try:
self.pwidth = float(tag['width'].replace('%', ''))
except ValueError:
pass
if 'width' in css and '%' in css['width']:
try:
self.pwidth = float(css['width'].replace('%', ''))
except ValueError:
pass
if self.pwidth > 100:
self.pwidth = -1
self.rowspan = self.colspan = 1
try:
self.colspan = int(tag['colspan']) if tag.has_attr('colspan') else 1
self.rowspan = int(tag['rowspan']) if tag.has_attr('rowspan') else 1
except:
pass
pp = conv.current_page
conv.book.allow_new_page = False
conv.current_page = conv.book.create_page()
conv.parse_tag(tag, css)
conv.end_current_block()
for item in conv.current_page.contents:
if isinstance(item, TextBlock):
self.text_blocks.append(item)
conv.current_page = pp
conv.book.allow_new_page = True
if not self.text_blocks:
tb = conv.book.create_text_block()
tb.Paragraph(' ')
self.text_blocks.append(tb)
for tb in self.text_blocks:
tb.parent = None
tb.objId = 0
# Needed as we have to eventually change this BlockStyle's width and
# height attributes. This blockstyle may be shared with other
# elements, so doing that causes havoc.
tb.blockStyle = conv.book.create_block_style()
ts = conv.book.create_text_style(**tb.textStyle.attrs)
ts.attrs['parindent'] = 0
tb.textStyle = ts
if ts.attrs['align'] == 'foot':
if isinstance(tb.contents[-1], Paragraph):
tb.contents[-1].append(' ')
def pts_to_pixels(self, pts):
pts = int(pts)
return ceil((float(self.conv.profile.dpi)/72)*(pts/10))
def minimum_width(self):
return max([self.minimum_tb_width(tb) for tb in self.text_blocks])
def minimum_tb_width(self, tb):
ts = tb.textStyle.attrs
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
parindent = self.pts_to_pixels(ts['parindent'])
mwidth = 0
for token, attrs in tokens(tb):
font = default_font
if isinstance(token, numbers.Integral): # Handle para and line breaks
continue
if isinstance(token, Plot):
return self.pts_to_pixels(token.xsize)
ff = attrs.get('fontfacename', ts['fontfacename'])
fs = attrs.get('fontsize', ts['fontsize'])
if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
font = get_font(ff, self.pts_to_pixels(fs))
if not token.strip():
continue
word = token.split()
word = word[0] if word else ""
width = font.getsize(word)[0]
if width > mwidth:
mwidth = width
return parindent + mwidth + 2
def text_block_size(self, tb, maxwidth=sys.maxsize, debug=False):
ts = tb.textStyle.attrs
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
parindent = self.pts_to_pixels(ts['parindent'])
top, bottom, left, right = 0, 0, parindent, parindent
def add_word(width, height, left, right, top, bottom, ls, ws):
if left + width > maxwidth:
left = width + ws
top += ls
bottom = top+ls if top+ls > bottom else bottom
else:
left += (width + ws)
right = left if left > right else right
bottom = top+ls if top+ls > bottom else bottom
return left, right, top, bottom
for token, attrs in tokens(tb):
if attrs is None:
attrs = {}
font = default_font
ls = self.pts_to_pixels(attrs.get('baselineskip', ts['baselineskip']))+\
self.pts_to_pixels(attrs.get('linespace', ts['linespace']))
ws = self.pts_to_pixels(attrs.get('wordspace', ts['wordspace']))
if isinstance(token, numbers.Integral): # Handle para and line breaks
if top != bottom: # Previous element not a line break
top = bottom
else:
top += ls
bottom += ls
left = parindent if int == 1 else 0
continue
if isinstance(token, Plot):
width, height = self.pts_to_pixels(token.xsize), self.pts_to_pixels(token.ysize)
left, right, top, bottom = add_word(width, height, left, right, top, bottom, height, ws)
continue
ff = attrs.get('fontfacename', ts['fontfacename'])
fs = attrs.get('fontsize', ts['fontsize'])
if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
font = get_font(ff, self.pts_to_pixels(fs))
for word in token.split():
width, height = font.getsize(word)
left, right, top, bottom = add_word(width, height, left, right, top, bottom, ls, ws)
return right+3+max(parindent, 10), bottom
def text_block_preferred_width(self, tb, debug=False):
return self.text_block_size(tb, sys.maxsize, debug=debug)[0]
def preferred_width(self, debug=False):
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
def height(self, width):
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
class Row(object):
def __init__(self, conv, row, css, colpad):
self.cells = []
self.colpad = colpad
cells = row.findAll(re.compile('td|th', re.IGNORECASE))
self.targets = []
for cell in cells:
ccss = conv.tag_css(cell, css)[0]
self.cells.append(Cell(conv, cell, ccss))
for a in row.findAll(id=True) + row.findAll(name=True):
name = a['name'] if a.has_attr('name') else a['id'] if a.has_attr('id') else None
if name is not None:
self.targets.append(name.replace('#', ''))
def number_of_cells(self):
'''Number of cells in this row. Respects colspan'''
ans = 0
for cell in self.cells:
ans += cell.colspan
return ans
def height(self, widths):
i, heights = 0, []
for cell in self.cells:
width = sum(widths[i:i+cell.colspan])
heights.append(cell.height(width))
i += cell.colspan
if not heights:
return 0
return max(heights)
def cell_from_index(self, col):
i = -1
cell = None
for cell in self.cells:
for k in range(0, cell.colspan):
if i == col:
break
i += 1
if i == col:
break
return cell
def minimum_width(self, col):
cell = self.cell_from_index(col)
if not cell:
return 0
return cell.minimum_width()
def preferred_width(self, col):
cell = self.cell_from_index(col)
if not cell:
return 0
return 0 if cell.colspan > 1 else cell.preferred_width()
def width_percent(self, col):
cell = self.cell_from_index(col)
if not cell:
return -1
return -1 if cell.colspan > 1 else cell.pwidth
def cell_iterator(self):
for c in self.cells:
yield c
class Table(object):
def __init__(self, conv, table, css, rowpad=10, colpad=10):
self.rows = []
self.conv = conv
self.rowpad = rowpad
self.colpad = colpad
rows = table.findAll('tr')
conv.in_table = True
for row in rows:
rcss = conv.tag_css(row, css)[0]
self.rows.append(Row(conv, row, rcss, colpad))
conv.in_table = False
def number_of_columns(self):
max = 0
for row in self.rows:
max = row.number_of_cells() if row.number_of_cells() > max else max
return max
def number_or_rows(self):
return len(self.rows)
def height(self, maxwidth):
''' Return row heights + self.rowpad'''
widths = self.get_widths(maxwidth)
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
def minimum_width(self, col):
return max([row.minimum_width(col) for row in self.rows])
def width_percent(self, col):
return max([row.width_percent(col) for row in self.rows])
def get_widths(self, maxwidth):
'''
Return widths of columns + self.colpad
'''
rows, cols = self.number_or_rows(), self.number_of_columns()
widths = list(range(cols))
for c in range(cols):
cellwidths = [0 for i in range(rows)]
for r in range(rows):
try:
cellwidths[r] = self.rows[r].preferred_width(c)
except IndexError:
continue
widths[c] = max(cellwidths)
min_widths = [self.minimum_width(i)+10 for i in range(cols)]
for i in range(len(widths)):
wp = self.width_percent(i)
if wp >= 0:
widths[i] = max(min_widths[i], ceil((wp/100) * (maxwidth - (cols-1)*self.colpad)))
itercount = 0
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
for i in range(cols):
widths[i] = ceil((95/100)*widths[i]) if \
ceil((95/100)*widths[i]) >= min_widths[i] else widths[i]
itercount += 1
return [i+self.colpad for i in widths]
def blocks(self, maxwidth, maxheight):
rows, cols = self.number_or_rows(), self.number_of_columns()
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
rowpos = [0 for i in range(rows)]
for r in range(rows):
nc = self.rows[r].cell_iterator()
try:
while True:
cell = next(nc)
cellmatrix[r][rowpos[r]] = cell
rowpos[r] += cell.colspan
for k in range(1, cell.rowspan):
try:
rowpos[r+k] += 1
except IndexError:
break
except StopIteration: # No more cells in this row
continue
widths = self.get_widths(maxwidth)
heights = [row.height(widths) for row in self.rows]
xpos = [sum(widths[:i]) for i in range(cols)]
delta = maxwidth - sum(widths)
if delta < 0:
delta = 0
for r in range(len(cellmatrix)):
yield None, 0, heights[r], 0, self.rows[r].targets
for c in range(len(cellmatrix[r])):
cell = cellmatrix[r][c]
if not cell:
continue
width = sum(widths[c:c+cell.colspan])-self.colpad*cell.colspan
sypos = 0
for tb in cell.text_blocks:
tb.blockStyle = self.conv.book.create_block_style(
blockwidth=width,
blockheight=cell.text_block_size(tb, width)[1],
blockrule='horz-fixed')
yield tb, xpos[c], sypos, delta, None
sypos += tb.blockStyle.attrs['blockheight']

View File

@@ -0,0 +1,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals
"""
This package contains code to generate ebooks in the SONY LRS/F format. It was
originally developed by Mike Higgins and has been extended and modified by Kovid
Goyal.
"""

View File

@@ -0,0 +1,78 @@
from __future__ import absolute_import, division, print_function, unicode_literals
""" elements.py -- replacements and helpers for ElementTree """
from polyglot.builtins import unicode_type, string_or_bytes
class ElementWriter(object):
def __init__(self, e, header=False, sourceEncoding="ascii",
spaceBeforeClose=True, outputEncodingName="UTF-16"):
self.header = header
self.e = e
self.sourceEncoding=sourceEncoding
self.spaceBeforeClose = spaceBeforeClose
self.outputEncodingName = outputEncodingName
def _encodeCdata(self, rawText):
if isinstance(rawText, bytes):
rawText = rawText.decode(self.sourceEncoding)
text = rawText.replace("&", "&amp;")
text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;")
return text
def _writeAttribute(self, f, name, value):
f.write(' %s="' % unicode_type(name))
if not isinstance(value, string_or_bytes):
value = unicode_type(value)
value = self._encodeCdata(value)
value = value.replace('"', '&quot;')
f.write(value)
f.write('"')
def _writeText(self, f, rawText):
text = self._encodeCdata(rawText)
f.write(text)
def _write(self, f, e):
f.write('<' + unicode_type(e.tag))
attributes = e.items()
attributes.sort()
for name, value in attributes:
self._writeAttribute(f, name, value)
if e.text is not None or len(e) > 0:
f.write('>')
if e.text:
self._writeText(f, e.text)
for e2 in e:
self._write(f, e2)
f.write('</%s>' % e.tag)
else:
if self.spaceBeforeClose:
f.write(' ')
f.write('/>')
if e.tail is not None:
self._writeText(f, e.tail)
def toString(self):
class x:
pass
buffer = []
x.write = buffer.append
self.write(x)
return ''.join(buffer)
def write(self, f):
if self.header:
f.write('<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
self._write(f, self.e)

View File

@@ -0,0 +1,773 @@
#!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
"""
pylrf.py -- very low level interface to create lrf files. See pylrs for
higher level interface that can use this module to render books to lrf.
"""
import struct
import zlib
import io
import codecs
import os
from .pylrfopt import tagListOptimizer
from polyglot.builtins import iteritems, string_or_bytes, unicode_type
PYLRF_VERSION = "1.0"
#
# Acknowledgement:
# This software would not have been possible without the pioneering
# efforts of the author of lrf2lrs.py, Igor Skochinsky.
#
# Copyright (c) 2007 Mike Higgins (Falstaff)
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
# Change History:
#
# V1.0 06 Feb 2007
# Initial Release.
#
# Current limitations and bugs:
# Never "scrambles" any streams (even if asked to). This does not seem
# to hurt anything.
#
# Not based on any official documentation, so many assumptions had to be made.
#
# Can be used to create lrf files that can lock up an eBook reader.
# This is your only warning.
#
# Unsupported objects: Canvas, Window, PopUpWindow, Sound, Import,
# SoundStream, ObjectInfo
#
# The only button type supported is JumpButton.
#
# Unsupported tags: SoundStop, Wait, pos on BlockSpace (and those used by
# unsupported objects).
#
# Tags supporting Japanese text and Asian layout have not been tested.
#
# Tested on Python 2.4 and 2.5, Windows XP and Sony PRS-500.
#
# Commented even less than pylrs, but not very useful when called directly,
# anyway.
#
class LrfError(Exception):
pass
def writeByte(f, byte):
f.write(struct.pack("<B", byte))
def writeWord(f, word):
if int(word) > 65535:
raise LrfError('Cannot encode a number greater than 65535 in a word.')
if int(word) < 0:
raise LrfError('Cannot encode a number < 0 in a word: '+unicode_type(word))
f.write(struct.pack("<H", int(word)))
def writeSignedWord(f, sword):
f.write(struct.pack("<h", int(float(sword))))
def writeWords(f, *words):
f.write(struct.pack("<%dH" % len(words), *words))
def writeDWord(f, dword):
f.write(struct.pack("<I", int(dword)))
def writeDWords(f, *dwords):
f.write(struct.pack("<%dI" % len(dwords), *dwords))
def writeQWord(f, qword):
f.write(struct.pack("<Q", qword))
def writeZeros(f, nZeros):
f.write(b"\0" * nZeros)
def writeString(f, s):
f.write(s)
def writeIdList(f, idList):
writeWord(f, len(idList))
writeDWords(f, *idList)
def writeColor(f, color):
# TODO: allow color names, web format
f.write(struct.pack(">I", int(color, 0)))
def writeLineWidth(f, width):
writeWord(f, int(width))
def writeUnicode(f, string, encoding):
if isinstance(string, bytes):
string = string.decode(encoding)
string = string.encode("utf-16-le")
length = len(string)
if length > 65535:
raise LrfError('Cannot write strings longer than 65535 characters.')
writeWord(f, length)
writeString(f, string)
def writeRaw(f, string, encoding):
if isinstance(string, bytes):
string = string.decode(encoding)
string = string.encode("utf-16-le")
writeString(f, string)
def writeRubyAA(f, rubyAA):
ralign, radjust = rubyAA
radjust = {"line-edge":0x10, "none":0}[radjust]
ralign = {"start":1, "center":2}[ralign]
writeWord(f, ralign | radjust)
def writeBgImage(f, bgInfo):
imode, iid = bgInfo
imode = {"pfix": 0, "fix":1, "tile":2, "centering":3}[imode]
writeWord(f, imode)
writeDWord(f, iid)
def writeEmpDots(f, dotsInfo, encoding):
refDotsFont, dotsFontName, dotsCode = dotsInfo
writeDWord(f, refDotsFont)
LrfTag("fontfacename", dotsFontName).write(f, encoding)
writeWord(f, int(dotsCode, 0))
def writeRuledLine(f, lineInfo):
lineLength, lineType, lineWidth, lineColor = lineInfo
writeWord(f, lineLength)
writeWord(f, LINE_TYPE_ENCODING[lineType])
writeWord(f, lineWidth)
writeColor(f, lineColor)
LRF_SIGNATURE = b"L\x00R\x00F\x00\x00\x00"
# XOR_KEY = 48
XOR_KEY = 65024 # that's what lrf2lrs says -- not used, anyway...
LRF_VERSION = 1000 # is 999 for librie? lrf2lrs uses 1000
IMAGE_TYPE_ENCODING = dict(GIF=0x14, PNG=0x12, BMP=0x13, JPEG=0x11, JPG=0x11)
OBJECT_TYPE_ENCODING = dict(
PageTree=0x01,
Page=0x02,
Header=0x03,
Footer=0x04,
PageAtr=0x05, PageStyle=0x05,
Block=0x06,
BlockAtr=0x07, BlockStyle=0x07,
MiniPage=0x08,
TextBlock=0x0A, Text=0x0A,
TextAtr=0x0B, TextStyle=0x0B,
ImageBlock=0x0C, Image=0x0C,
Canvas=0x0D,
ESound=0x0E,
ImageStream=0x11,
Import=0x12,
Button=0x13,
Window=0x14,
PopUpWindow=0x15,
Sound=0x16,
SoundStream=0x17,
Font=0x19,
ObjectInfo=0x1A,
BookAtr=0x1C, BookStyle=0x1C,
SimpleTextBlock=0x1D,
TOC=0x1E
)
LINE_TYPE_ENCODING = {
'none':0, 'solid':0x10, 'dashed':0x20, 'double':0x30, 'dotted':0x40
}
BINDING_DIRECTION_ENCODING = dict(Lr=1, Rl=16)
TAG_INFO = dict(
rawtext=(0, writeRaw),
ObjectStart=(0xF500, "<IH"),
ObjectEnd=(0xF501,),
# InfoLink (0xF502)
Link=(0xF503, "<I"),
StreamSize=(0xF504, writeDWord),
StreamData=(0xF505, writeString),
StreamEnd=(0xF506,),
oddheaderid=(0xF507, writeDWord),
evenheaderid=(0xF508, writeDWord),
oddfooterid=(0xF509, writeDWord),
evenfooterid=(0xF50A, writeDWord),
ObjectList=(0xF50B, writeIdList),
fontsize=(0xF511, writeSignedWord),
fontwidth=(0xF512, writeSignedWord),
fontescapement=(0xF513, writeSignedWord),
fontorientation=(0xF514, writeSignedWord),
fontweight=(0xF515, writeWord),
fontfacename=(0xF516, writeUnicode),
textcolor=(0xF517, writeColor),
textbgcolor=(0xF518, writeColor),
wordspace=(0xF519, writeSignedWord),
letterspace=(0xF51A, writeSignedWord),
baselineskip=(0xF51B, writeSignedWord),
linespace=(0xF51C, writeSignedWord),
parindent=(0xF51D, writeSignedWord),
parskip=(0xF51E, writeSignedWord),
# F51F, F520
topmargin=(0xF521, writeWord),
headheight=(0xF522, writeWord),
headsep=(0xF523, writeWord),
oddsidemargin=(0xF524, writeWord),
textheight=(0xF525, writeWord),
textwidth=(0xF526, writeWord),
canvaswidth=(0xF551, writeWord),
canvasheight=(0xF552, writeWord),
footspace=(0xF527, writeWord),
footheight=(0xF528, writeWord),
bgimage=(0xF529, writeBgImage),
setemptyview=(0xF52A, {'show':1, 'empty':0}, writeWord),
pageposition=(0xF52B, {'any':0,'upper':1, 'lower':2}, writeWord),
evensidemargin=(0xF52C, writeWord),
framemode=(0xF52E,
{'None':0, 'curve':2, 'square':1}, writeWord),
blockwidth=(0xF531, writeWord),
blockheight=(0xF532, writeWord),
blockrule=(0xF533, {"horz-fixed":0x14, "horz-adjustable":0x12,
"vert-fixed":0x41, "vert-adjustable":0x21,
"block-fixed":0x44, "block-adjustable":0x22},
writeWord),
bgcolor=(0xF534, writeColor),
layout=(0xF535, {'TbRl':0x41, 'LrTb':0x34}, writeWord),
framewidth=(0xF536, writeWord),
framecolor=(0xF537, writeColor),
topskip=(0xF538, writeWord),
sidemargin=(0xF539, writeWord),
footskip=(0xF53A, writeWord),
align=(0xF53C, {'head':1, 'center':4, 'foot':8}, writeWord),
column=(0xF53D, writeWord),
columnsep=(0xF53E, writeSignedWord),
minipagewidth=(0xF541, writeWord),
minipageheight=(0xF542, writeWord),
yspace=(0xF546, writeWord),
xspace=(0xF547, writeWord),
PutObj=(0xF549, "<HHI"),
ImageRect=(0xF54A, "<HHHH"),
ImageSize=(0xF54B, "<HH"),
RefObjId=(0xF54C, "<I"),
PageDiv=(0xF54E, "<HIHI"),
StreamFlags=(0xF554, writeWord),
Comment=(0xF555, writeUnicode),
FontFilename=(0xF559, writeUnicode),
PageList=(0xF55C, writeIdList),
FontFacename=(0xF55D, writeUnicode),
buttonflags=(0xF561, writeWord),
PushButtonStart=(0xF566,),
PushButtonEnd=(0xF567,),
buttonactions=(0xF56A,),
endbuttonactions=(0xF56B,),
jumpto=(0xF56C, "<II"),
RuledLine=(0xF573, writeRuledLine),
rubyaa=(0xF575, writeRubyAA),
rubyoverhang=(0xF576, {'none':0, 'auto':1}, writeWord),
empdotsposition=(0xF577, {'before':1, 'after':2}, writeWord),
empdots=(0xF578, writeEmpDots),
emplineposition=(0xF579, {'before':1, 'after':2}, writeWord),
emplinetype=(0xF57A, LINE_TYPE_ENCODING, writeWord),
ChildPageTree=(0xF57B, "<I"),
ParentPageTree=(0xF57C, "<I"),
Italic=(0xF581,),
ItalicEnd=(0xF582,),
pstart=(0xF5A1, writeDWord), # what goes in the dword? refesound
pend=(0xF5A2,),
CharButton=(0xF5A7, writeDWord),
CharButtonEnd=(0xF5A8,),
Rubi=(0xF5A9,),
RubiEnd=(0xF5AA,),
Oyamoji=(0xF5AB,),
OyamojiEnd=(0xF5AC,),
Rubimoji=(0xF5AD,),
RubimojiEnd=(0xF5AE,),
Yoko=(0xF5B1,),
YokoEnd=(0xF5B2,),
Tate=(0xF5B3,),
TateEnd=(0xF5B4,),
Nekase=(0xF5B5,),
NekaseEnd=(0xF5B6,),
Sup=(0xF5B7,),
SupEnd=(0xF5B8,),
Sub=(0xF5B9,),
SubEnd=(0xF5BA,),
NoBR=(0xF5BB,),
NoBREnd=(0xF5BC,),
EmpDots=(0xF5BD,),
EmpDotsEnd=(0xF5BE,),
EmpLine=(0xF5C1,),
EmpLineEnd=(0xF5C2,),
DrawChar=(0xF5C3, '<H'),
DrawCharEnd=(0xF5C4,),
Box=(0xF5C6, LINE_TYPE_ENCODING, writeWord),
BoxEnd=(0xF5C7,),
Space=(0xF5CA, writeSignedWord),
textstring=(0xF5CC, writeUnicode),
Plot=(0xF5D1, "<HHII"),
CR=(0xF5D2,),
RegisterFont=(0xF5D8, writeDWord),
setwaitprop=(0xF5DA, {'replay':1, 'noreplay':2}, writeWord),
charspace=(0xF5DD, writeSignedWord),
textlinewidth=(0xF5F1, writeLineWidth),
linecolor=(0xF5F2, writeColor)
)
class ObjectTableEntry(object):
def __init__(self, objId, offset, size):
self.objId = objId
self.offset = offset
self.size = size
def write(self, f):
writeDWords(f, self.objId, self.offset, self.size, 0)
class LrfTag(object):
def __init__(self, name, *parameters):
try:
tagInfo = TAG_INFO[name]
except KeyError:
raise LrfError("tag name %s not recognized" % name)
self.name = name
self.type = tagInfo[0]
self.format = tagInfo[1:]
if len(parameters) > 1:
raise LrfError("only one parameter allowed on tag %s" % name)
if len(parameters) == 0:
self.parameter = None
else:
self.parameter = parameters[0]
def write(self, lrf, encoding=None):
if self.type != 0:
writeWord(lrf, self.type)
p = self.parameter
if p is None:
return
# print " Writing tag", self.name
for f in self.format:
if isinstance(f, dict):
p = f[p]
elif isinstance(f, string_or_bytes):
if isinstance(p, tuple):
writeString(lrf, struct.pack(f, *p))
else:
writeString(lrf, struct.pack(f, p))
else:
if f in [writeUnicode, writeRaw, writeEmpDots]:
if encoding is None:
raise LrfError("Tag requires encoding")
f(lrf, p, encoding)
else:
f(lrf, p)
STREAM_SCRAMBLED = 0x200
STREAM_COMPRESSED = 0x100
STREAM_FORCE_COMPRESSED = 0x8100
STREAM_TOC = 0x0051
class LrfStreamBase(object):
def __init__(self, streamFlags, streamData=None):
self.streamFlags = streamFlags
self.streamData = streamData
def setStreamData(self, streamData):
self.streamData = streamData
def getStreamTags(self, optimize=False):
# tags:
# StreamFlags
# StreamSize
# StreamStart
# (data)
# StreamEnd
#
# if flags & 0x200, stream is scrambled
# if flags & 0x100, stream is compressed
flags = self.streamFlags
streamBuffer = self.streamData
# implement scramble? I never scramble anything...
if flags & STREAM_FORCE_COMPRESSED == STREAM_FORCE_COMPRESSED:
optimize = False
if flags & STREAM_COMPRESSED == STREAM_COMPRESSED:
uncompLen = len(streamBuffer)
compStreamBuffer = zlib.compress(streamBuffer)
if optimize and uncompLen <= len(compStreamBuffer) + 4:
flags &= ~STREAM_COMPRESSED
else:
streamBuffer = struct.pack("<I", uncompLen) + compStreamBuffer
return [LrfTag("StreamFlags", flags & 0x01FF),
LrfTag("StreamSize", len(streamBuffer)),
LrfTag("StreamData", streamBuffer),
LrfTag("StreamEnd")]
class LrfTagStream(LrfStreamBase):
def __init__(self, streamFlags, streamTags=None):
LrfStreamBase.__init__(self, streamFlags)
if streamTags is None:
self.tags = []
else:
self.tags = streamTags[:]
def appendLrfTag(self, tag):
self.tags.append(tag)
def getStreamTags(self, encoding,
optimizeTags=False, optimizeCompression=False):
stream = io.BytesIO()
if optimizeTags:
tagListOptimizer(self.tags)
for tag in self.tags:
tag.write(stream, encoding)
self.streamData = stream.getvalue()
stream.close()
return LrfStreamBase.getStreamTags(self, optimize=optimizeCompression)
class LrfFileStream(LrfStreamBase):
def __init__(self, streamFlags, filename):
LrfStreamBase.__init__(self, streamFlags)
with open(filename, "rb") as f:
self.streamData = f.read()
class LrfObject(object):
def __init__(self, name, objId):
if objId <= 0:
raise LrfError("invalid objId for " + name)
self.name = name
self.objId = objId
self.tags = []
try:
self.type = OBJECT_TYPE_ENCODING[name]
except KeyError:
raise LrfError("object name %s not recognized" % name)
def __str__(self):
return 'LRFObject: ' + self.name + ", " + unicode_type(self.objId)
def appendLrfTag(self, tag):
self.tags.append(tag)
def appendLrfTags(self, tagList):
self.tags.extend(tagList)
# deprecated old name
append = appendLrfTag
def appendTagDict(self, tagDict, genClass=None):
#
# This code does not really belong here, I think. But it
# belongs somewhere, so here it is.
#
composites = {}
for name, value in iteritems(tagDict):
if name == 'rubyAlignAndAdjust':
continue
if name in {
"bgimagemode", "bgimageid", "rubyalign", "rubyadjust",
"empdotscode", "empdotsfontname", "refempdotsfont"}:
composites[name] = value
else:
self.append(LrfTag(name, value))
if "rubyalign" in composites or "rubyadjust" in composites:
ralign = composites.get("rubyalign", "none")
radjust = composites.get("rubyadjust", "start")
self.append(LrfTag("rubyaa", (ralign, radjust)))
if "bgimagemode" in composites or "bgimageid" in composites:
imode = composites.get("bgimagemode", "fix")
iid = composites.get("bgimageid", 0)
# for some reason, page style uses 0 for "fix"
# we call this pfix to differentiate it
if genClass == "PageStyle" and imode == "fix":
imode = "pfix"
self.append(LrfTag("bgimage", (imode, iid)))
if "empdotscode" in composites or "empdotsfontname" in composites or \
"refempdotsfont" in composites:
dotscode = composites.get("empdotscode", "0x002E")
dotsfontname = composites.get("empdotsfontname",
"Dutch801 Rm BT Roman")
refdotsfont = composites.get("refempdotsfont", 0)
self.append(LrfTag("empdots", (refdotsfont, dotsfontname,
dotscode)))
def write(self, lrf, encoding=None):
# print "Writing object", self.name
LrfTag("ObjectStart", (self.objId, self.type)).write(lrf)
for tag in self.tags:
tag.write(lrf, encoding)
LrfTag("ObjectEnd").write(lrf)
class LrfToc(LrfObject):
"""
Table of contents. Format of toc is:
[ (pageid, objid, string)...]
"""
def __init__(self, objId, toc, se):
LrfObject.__init__(self, "TOC", objId)
streamData = self._makeTocStream(toc, se)
self._makeStreamTags(streamData)
def _makeStreamTags(self, streamData):
stream = LrfStreamBase(STREAM_TOC, streamData)
self.tags.extend(stream.getStreamTags())
def _makeTocStream(self, toc, se):
stream = io.BytesIO()
nEntries = len(toc)
writeDWord(stream, nEntries)
lastOffset = 0
writeDWord(stream, lastOffset)
for i in range(nEntries - 1):
pageId, objId, label = toc[i]
entryLen = 4 + 4 + 2 + len(label)*2
lastOffset += entryLen
writeDWord(stream, lastOffset)
for entry in toc:
pageId, objId, label = entry
if pageId <= 0:
raise LrfError("page id invalid in toc: " + label)
if objId <= 0:
raise LrfError("textblock id invalid in toc: " + label)
writeDWord(stream, pageId)
writeDWord(stream, objId)
writeUnicode(stream, label, se)
streamData = stream.getvalue()
stream.close()
return streamData
class LrfWriter(object):
def __init__(self, sourceEncoding):
self.sourceEncoding = sourceEncoding
# The following flags are just to have a place to remember these
# values. The flags must still be passed to the appropriate classes
# in order to have them work.
self.saveStreamTags = False # used only in testing -- hogs memory
# highly experimental -- set to True at your own risk
self.optimizeTags = False
self.optimizeCompression = False
# End of placeholders
self.rootObjId = 0
self.rootObj = None
self.binding = 1 # 1=front to back, 16=back to front
self.dpi = 1600
self.width = 600
self.height = 800
self.colorDepth = 24
self.tocObjId = 0
self.docInfoXml = ""
self.thumbnailEncoding = "JPEG"
self.thumbnailData = b""
self.objects = []
self.objectTable = []
def getSourceEncoding(self):
return self.sourceEncoding
def toUnicode(self, string):
if isinstance(string, bytes):
string = string.decode(self.sourceEncoding)
return string
def getDocInfoXml(self):
return self.docInfoXml
def setPageTreeId(self, objId):
self.pageTreeId = objId
def getPageTreeId(self):
return self.pageTreeId
def setRootObject(self, obj):
if self.rootObjId != 0:
raise LrfError("root object already set")
self.rootObjId = obj.objId
self.rootObj = obj
def registerFontId(self, id):
if self.rootObj is None:
raise LrfError("can't register font -- no root object")
self.rootObj.append(LrfTag("RegisterFont", id))
def setTocObject(self, obj):
if self.tocObjId != 0:
raise LrfError("toc object already set")
self.tocObjId = obj.objId
def setThumbnailFile(self, filename, encoding=None):
with open(filename, "rb") as f:
self.thumbnailData = f.read()
if encoding is None:
encoding = os.path.splitext(filename)[1][1:]
encoding = encoding.upper()
if encoding not in IMAGE_TYPE_ENCODING:
raise LrfError("unknown image type: " + encoding)
self.thumbnailEncoding = encoding
def append(self, obj):
self.objects.append(obj)
def addLrfObject(self, objId):
pass
def writeFile(self, lrf):
if self.rootObjId == 0:
raise LrfError("no root object has been set")
self.writeHeader(lrf)
self.writeObjects(lrf)
self.updateObjectTableOffset(lrf)
self.updateTocObjectOffset(lrf)
self.writeObjectTable(lrf)
def writeHeader(self, lrf):
writeString(lrf, LRF_SIGNATURE)
writeWord(lrf, LRF_VERSION)
writeWord(lrf, XOR_KEY)
writeDWord(lrf, self.rootObjId)
writeQWord(lrf, len(self.objects))
writeQWord(lrf, 0) # 0x18 objectTableOffset -- will be updated
writeZeros(lrf, 4) # 0x20 unknown
writeWord(lrf, self.binding)
writeDWord(lrf, self.dpi)
writeWords(lrf, self.width, self.height, self.colorDepth)
writeZeros(lrf, 20) # 0x30 unknown
writeDWord(lrf, self.tocObjId)
writeDWord(lrf, 0) # 0x48 tocObjectOffset -- will be updated
docInfoXml = codecs.BOM_UTF8 + self.docInfoXml.encode("utf-8")
compDocInfo = zlib.compress(docInfoXml)
writeWord(lrf, len(compDocInfo) + 4)
writeWord(lrf, IMAGE_TYPE_ENCODING[self.thumbnailEncoding])
writeDWord(lrf, len(self.thumbnailData))
writeDWord(lrf, len(docInfoXml))
writeString(lrf, compDocInfo)
writeString(lrf, self.thumbnailData)
def writeObjects(self, lrf):
# also appends object entries to the object table
self.objectTable = []
for obj in self.objects:
objStart = lrf.tell()
obj.write(lrf, self.sourceEncoding)
objEnd = lrf.tell()
self.objectTable.append(
ObjectTableEntry(obj.objId, objStart, objEnd-objStart))
def updateObjectTableOffset(self, lrf):
# update the offset of the object table
tableOffset = lrf.tell()
lrf.seek(0x18, 0)
writeQWord(lrf, tableOffset)
lrf.seek(0, 2)
def updateTocObjectOffset(self, lrf):
if self.tocObjId == 0:
return
for entry in self.objectTable:
if entry.objId == self.tocObjId:
lrf.seek(0x48, 0)
writeDWord(lrf, entry.offset)
lrf.seek(0, 2)
break
else:
raise LrfError("toc object not in object table")
def writeObjectTable(self, lrf):
for tableEntry in self.objectTable:
tableEntry.write(lrf)

View File

@@ -0,0 +1,44 @@
from __future__ import absolute_import, division, print_function, unicode_literals
def _optimize(tagList, tagName, conversion):
# copy the tag of interest plus any text
newTagList = []
for tag in tagList:
if tag.name == tagName or tag.name == "rawtext":
newTagList.append(tag)
# now, eliminate any duplicates (leaving the last one)
for i, newTag in enumerate(newTagList[:-1]):
if newTag.name == tagName and newTagList[i+1].name == tagName:
tagList.remove(newTag)
# eliminate redundant settings to same value across text strings
newTagList = []
for tag in tagList:
if tag.name == tagName:
newTagList.append(tag)
for i, newTag in enumerate(newTagList[:-1]):
value = conversion(newTag.parameter)
nextValue = conversion(newTagList[i+1].parameter)
if value == nextValue:
tagList.remove(newTagList[i+1])
# eliminate any setting that don't have text after them
while len(tagList) > 0 and tagList[-1].name == tagName:
del tagList[-1]
def tagListOptimizer(tagList):
# this function eliminates redundant or unnecessary tags
# it scans a list of tags, looking for text settings that are
# changed before any text is output
# for example,
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
# should be:
# fontsize=200 text
oldSize = len(tagList)
_optimize(tagList, "fontsize", int)
_optimize(tagList, "fontweight", int)
return oldSize - len(tagList)

File diff suppressed because it is too large Load Diff