mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-27 13:53:32 +01:00
Initial import
This commit is contained in:
115
ebook_converter/ebooks/lrf/__init__.py
Normal file
115
ebook_converter/ebooks/lrf/__init__.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
"""
|
||||
This package contains logic to read and write LRF files.
|
||||
The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
|
||||
"""
|
||||
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \
|
||||
TextStyle, BlockStyle
|
||||
from calibre.ebooks.lrf.fonts import FONT_FILE_MAP
|
||||
from calibre.ebooks import ConversionError
|
||||
|
||||
__docformat__ = "epytext"
|
||||
|
||||
|
||||
class LRFParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PRS500_PROFILE(object):
|
||||
screen_width = 600
|
||||
screen_height = 775
|
||||
dpi = 166
|
||||
# Number of pixels to subtract from screen_height when calculating height of text area
|
||||
fudge = 0
|
||||
font_size = 10 #: Default (in pt)
|
||||
parindent = 10 #: Default (in pt)
|
||||
line_space = 1.2 # : Default (in pt)
|
||||
header_font_size = 6 #: In pt
|
||||
header_height = 30 # : In px
|
||||
default_fonts = {'sans': "Swis721 BT Roman", 'mono': "Courier10 BT Roman",
|
||||
'serif': "Dutch801 Rm BT Roman"}
|
||||
|
||||
name = 'prs500'
|
||||
|
||||
|
||||
def find_custom_fonts(options, logger):
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
fonts = {'serif' : None, 'sans' : None, 'mono' : None}
|
||||
|
||||
def family(cmd):
|
||||
return cmd.split(',')[-1].strip()
|
||||
if options.serif_family:
|
||||
f = family(options.serif_family)
|
||||
fonts['serif'] = font_scanner.legacy_fonts_for_family(f)
|
||||
if not fonts['serif']:
|
||||
logger.warn('Unable to find serif family %s'%f)
|
||||
if options.sans_family:
|
||||
f = family(options.sans_family)
|
||||
fonts['sans'] = font_scanner.legacy_fonts_for_family(f)
|
||||
if not fonts['sans']:
|
||||
logger.warn('Unable to find sans family %s'%f)
|
||||
if options.mono_family:
|
||||
f = family(options.mono_family)
|
||||
fonts['mono'] = font_scanner.legacy_fonts_for_family(f)
|
||||
if not fonts['mono']:
|
||||
logger.warn('Unable to find mono family %s'%f)
|
||||
return fonts
|
||||
|
||||
|
||||
def Book(options, logger, font_delta=0, header=None,
|
||||
profile=PRS500_PROFILE, **settings):
|
||||
from uuid import uuid4
|
||||
ps = {}
|
||||
ps['topmargin'] = options.top_margin
|
||||
ps['evensidemargin'] = options.left_margin
|
||||
ps['oddsidemargin'] = options.left_margin
|
||||
ps['textwidth'] = profile.screen_width - (options.left_margin + options.right_margin)
|
||||
ps['textheight'] = profile.screen_height - (options.top_margin + options.bottom_margin) \
|
||||
- profile.fudge
|
||||
if header:
|
||||
hdr = Header()
|
||||
hb = TextBlock(textStyle=TextStyle(align='foot',
|
||||
fontsize=int(profile.header_font_size*10)),
|
||||
blockStyle=BlockStyle(blockwidth=ps['textwidth']))
|
||||
hb.append(header)
|
||||
hdr.PutObj(hb)
|
||||
ps['headheight'] = profile.header_height
|
||||
ps['headsep'] = options.header_separation
|
||||
ps['header'] = hdr
|
||||
ps['topmargin'] = 0
|
||||
ps['textheight'] = profile.screen_height - (options.bottom_margin + ps['topmargin']) \
|
||||
- ps['headheight'] - ps['headsep'] - profile.fudge
|
||||
|
||||
fontsize = int(10*profile.font_size+font_delta*20)
|
||||
baselineskip = fontsize + 20
|
||||
fonts = find_custom_fonts(options, logger)
|
||||
tsd = dict(fontsize=fontsize,
|
||||
parindent=int(10*profile.parindent),
|
||||
linespace=int(10*profile.line_space),
|
||||
baselineskip=baselineskip,
|
||||
wordspace=10*options.wordspace)
|
||||
if fonts['serif'] and 'normal' in fonts['serif']:
|
||||
tsd['fontfacename'] = fonts['serif']['normal'][1]
|
||||
|
||||
book = _Book(textstyledefault=tsd,
|
||||
pagestyledefault=ps,
|
||||
blockstyledefault=dict(blockwidth=ps['textwidth']),
|
||||
bookid=uuid4().hex,
|
||||
**settings)
|
||||
for family in fonts.keys():
|
||||
if fonts[family]:
|
||||
for font in fonts[family].values():
|
||||
book.embed_font(*font)
|
||||
FONT_FILE_MAP[font[1]] = font[0]
|
||||
|
||||
for family in ['serif', 'sans', 'mono']:
|
||||
if not fonts[family]:
|
||||
fonts[family] = {'normal' : (None, profile.default_fonts[family])}
|
||||
elif 'normal' not in fonts[family]:
|
||||
raise ConversionError('Could not find the normal version of the ' + family + ' font')
|
||||
return book, fonts
|
||||
33
ebook_converter/ebooks/lrf/fonts.py
Normal file
33
ebook_converter/ebooks/lrf/fonts.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from PIL import ImageFont
|
||||
|
||||
'''
|
||||
Default fonts used in the PRS500
|
||||
'''
|
||||
|
||||
|
||||
LIBERATION_FONT_MAP = {
|
||||
'Swis721 BT Roman' : 'LiberationSans-Regular',
|
||||
'Dutch801 Rm BT Roman' : 'LiberationSerif-Regular',
|
||||
'Courier10 BT Roman' : 'LiberationMono-Regular',
|
||||
}
|
||||
|
||||
FONT_FILE_MAP = {}
|
||||
|
||||
|
||||
def get_font(name, size, encoding='unic'):
|
||||
'''
|
||||
Get an ImageFont object by name.
|
||||
@param size: Font height in pixels. To convert from pts:
|
||||
sz in pixels = (dpi/72) * size in pts
|
||||
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
||||
@param manager: A dict that will store the PersistentTemporary
|
||||
'''
|
||||
if name in LIBERATION_FONT_MAP:
|
||||
return ImageFont.truetype(P('fonts/liberation/%s.ttf' % LIBERATION_FONT_MAP[name]), size, encoding=encoding)
|
||||
elif name in FONT_FILE_MAP:
|
||||
return ImageFont.truetype(FONT_FILE_MAP[name], size, encoding=encoding)
|
||||
10
ebook_converter/ebooks/lrf/html/__init__.py
Normal file
10
ebook_converter/ebooks/lrf/html/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
"""
|
||||
This package contains code to convert HTML ebooks to LRF ebooks.
|
||||
"""
|
||||
|
||||
__docformat__ = "epytext"
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
115
ebook_converter/ebooks/lrf/html/color_map.py
Normal file
115
ebook_converter/ebooks/lrf/html/color_map.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
|
||||
NAME_MAP = {
|
||||
'aliceblue': '#F0F8FF',
|
||||
'antiquewhite': '#FAEBD7',
|
||||
'aqua': '#00FFFF',
|
||||
'aquamarine': '#7FFFD4',
|
||||
'azure': '#F0FFFF',
|
||||
'beige': '#F5F5DC',
|
||||
'bisque': '#FFE4C4',
|
||||
'black': '#000000',
|
||||
'blanchedalmond': '#FFEBCD',
|
||||
'blue': '#0000FF',
|
||||
'brown': '#A52A2A',
|
||||
'burlywood': '#DEB887',
|
||||
'cadetblue': '#5F9EA0',
|
||||
'chartreuse': '#7FFF00',
|
||||
'chocolate': '#D2691E',
|
||||
'coral': '#FF7F50',
|
||||
'crimson': '#DC143C',
|
||||
'cyan': '#00FFFF',
|
||||
'darkblue': '#00008B',
|
||||
'darkgoldenrod': '#B8860B',
|
||||
'darkgreen': '#006400',
|
||||
'darkkhaki': '#BDB76B',
|
||||
'darkmagenta': '#8B008B',
|
||||
'darkolivegreen': '#556B2F',
|
||||
'darkorange': '#FF8C00',
|
||||
'darkorchid': '#9932CC',
|
||||
'darkred': '#8B0000',
|
||||
'darksalmon': '#E9967A',
|
||||
'darkslateblue': '#483D8B',
|
||||
'darkslategrey': '#2F4F4F',
|
||||
'darkviolet': '#9400D3',
|
||||
'deeppink': '#FF1493',
|
||||
'dodgerblue': '#1E90FF',
|
||||
'firebrick': '#B22222',
|
||||
'floralwhite': '#FFFAF0',
|
||||
'forestgreen': '#228B22',
|
||||
'fuchsia': '#FF00FF',
|
||||
'gainsboro': '#DCDCDC',
|
||||
'ghostwhite': '#F8F8FF',
|
||||
'gold': '#FFD700',
|
||||
'goldenrod': '#DAA520',
|
||||
'indianred ': '#CD5C5C',
|
||||
'indigo ': '#4B0082',
|
||||
'khaki': '#F0E68C',
|
||||
'lavenderblush': '#FFF0F5',
|
||||
'lawngreen': '#7CFC00',
|
||||
'lightblue': '#ADD8E6',
|
||||
'lightcoral': '#F08080',
|
||||
'lightgoldenrodyellow': '#FAFAD2',
|
||||
'lightgray': '#D3D3D3',
|
||||
'lightgrey': '#D3D3D3',
|
||||
'lightskyblue': '#87CEFA',
|
||||
'lightslategrey': '#778899',
|
||||
'lightsteelblue': '#B0C4DE',
|
||||
'lime': '#87CEFA',
|
||||
'linen': '#FAF0E6',
|
||||
'magenta': '#FF00FF',
|
||||
'maroon': '#800000',
|
||||
'mediumaquamarine': '#66CDAA',
|
||||
'mediumblue': '#0000CD',
|
||||
'mediumorchid': '#BA55D3',
|
||||
'mediumpurple': '#9370D8',
|
||||
'mediumseagreen': '#3CB371',
|
||||
'mediumslateblue': '#7B68EE',
|
||||
'midnightblue': '#191970',
|
||||
'moccasin': '#FFE4B5',
|
||||
'navajowhite': '#FFDEAD',
|
||||
'navy': '#000080',
|
||||
'oldlace': '#FDF5E6',
|
||||
'olive': '#808000',
|
||||
'orange': '#FFA500',
|
||||
'orangered': '#FF4500',
|
||||
'orchid': '#DA70D6',
|
||||
'paleturquoise': '#AFEEEE',
|
||||
'papayawhip': '#FFEFD5',
|
||||
'peachpuff': '#FFDAB9',
|
||||
'powderblue': '#B0E0E6',
|
||||
'rosybrown': '#BC8F8F',
|
||||
'royalblue': '#4169E1',
|
||||
'saddlebrown': '#8B4513',
|
||||
'sandybrown': '#8B4513',
|
||||
'seashell': '#FFF5EE',
|
||||
'sienna': '#A0522D',
|
||||
'silver': '#C0C0C0',
|
||||
'skyblue': '#87CEEB',
|
||||
'slategrey': '#708090',
|
||||
'snow': '#FFFAFA',
|
||||
'springgreen': '#00FF7F',
|
||||
'violet': '#EE82EE',
|
||||
'yellowgreen': '#9ACD32'
|
||||
}
|
||||
|
||||
hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})')
|
||||
rgb_pat = re.compile(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
|
||||
|
||||
|
||||
def lrs_color(html_color):
|
||||
hcol = html_color.lower()
|
||||
match = hex_pat.search(hcol)
|
||||
if match:
|
||||
return '0x00'+match.group(1)+match.group(2)+match.group(3)
|
||||
match = rgb_pat.search(hcol)
|
||||
if match:
|
||||
return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:]
|
||||
if hcol in NAME_MAP:
|
||||
return NAME_MAP[hcol].replace('#', '0x00')
|
||||
return '0x00000000'
|
||||
1951
ebook_converter/ebooks/lrf/html/convert_from.py
Normal file
1951
ebook_converter/ebooks/lrf/html/convert_from.py
Normal file
File diff suppressed because it is too large
Load Diff
386
ebook_converter/ebooks/lrf/html/table.py
Normal file
386
ebook_converter/ebooks/lrf/html/table.py
Normal file
@@ -0,0 +1,386 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import math, sys, re, numbers
|
||||
|
||||
from calibre.ebooks.lrf.fonts import get_font
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
|
||||
CharButton, Plot, Paragraph, \
|
||||
LrsTextTag
|
||||
from polyglot.builtins import string_or_bytes, range, native_string_type
|
||||
|
||||
|
||||
def ceil(num):
|
||||
return int(math.ceil(num))
|
||||
|
||||
|
||||
def print_xml(elem):
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||
elem = elem.toElement(native_string_type('utf8'))
|
||||
ew = ElementWriter(elem, sourceEncoding=native_string_type('utf8'))
|
||||
ew.write(sys.stdout)
|
||||
print()
|
||||
|
||||
|
||||
def cattrs(base, extra):
|
||||
new = base.copy()
|
||||
new.update(extra)
|
||||
return new
|
||||
|
||||
|
||||
def tokens(tb):
|
||||
'''
|
||||
Return the next token. A token is :
|
||||
1. A string
|
||||
a block of text that has the same style
|
||||
'''
|
||||
def process_element(x, attrs):
|
||||
if isinstance(x, CR):
|
||||
yield 2, None
|
||||
elif isinstance(x, Text):
|
||||
yield x.text, cattrs(attrs, {})
|
||||
elif isinstance(x, string_or_bytes):
|
||||
yield x, cattrs(attrs, {})
|
||||
elif isinstance(x, (CharButton, LrsTextTag)):
|
||||
if x.contents:
|
||||
if hasattr(x.contents[0], 'text'):
|
||||
yield x.contents[0].text, cattrs(attrs, {})
|
||||
elif hasattr(x.contents[0], 'attrs'):
|
||||
for z in process_element(x.contents[0], x.contents[0].attrs):
|
||||
yield z
|
||||
elif isinstance(x, Plot):
|
||||
yield x, None
|
||||
elif isinstance(x, Span):
|
||||
attrs = cattrs(attrs, x.attrs)
|
||||
for y in x.contents:
|
||||
for z in process_element(y, attrs):
|
||||
yield z
|
||||
|
||||
for i in tb.contents:
|
||||
if isinstance(i, CR):
|
||||
yield 1, None
|
||||
elif isinstance(i, Paragraph):
|
||||
for j in i.contents:
|
||||
attrs = {}
|
||||
if hasattr(j, 'attrs'):
|
||||
attrs = j.attrs
|
||||
for k in process_element(j, attrs):
|
||||
yield k
|
||||
|
||||
|
||||
class Cell(object):
|
||||
|
||||
def __init__(self, conv, tag, css):
|
||||
self.conv = conv
|
||||
self.tag = tag
|
||||
self.css = css
|
||||
self.text_blocks = []
|
||||
self.pwidth = -1.
|
||||
if tag.has_attr('width') and '%' in tag['width']:
|
||||
try:
|
||||
self.pwidth = float(tag['width'].replace('%', ''))
|
||||
except ValueError:
|
||||
pass
|
||||
if 'width' in css and '%' in css['width']:
|
||||
try:
|
||||
self.pwidth = float(css['width'].replace('%', ''))
|
||||
except ValueError:
|
||||
pass
|
||||
if self.pwidth > 100:
|
||||
self.pwidth = -1
|
||||
self.rowspan = self.colspan = 1
|
||||
try:
|
||||
self.colspan = int(tag['colspan']) if tag.has_attr('colspan') else 1
|
||||
self.rowspan = int(tag['rowspan']) if tag.has_attr('rowspan') else 1
|
||||
except:
|
||||
pass
|
||||
|
||||
pp = conv.current_page
|
||||
conv.book.allow_new_page = False
|
||||
conv.current_page = conv.book.create_page()
|
||||
conv.parse_tag(tag, css)
|
||||
conv.end_current_block()
|
||||
for item in conv.current_page.contents:
|
||||
if isinstance(item, TextBlock):
|
||||
self.text_blocks.append(item)
|
||||
conv.current_page = pp
|
||||
conv.book.allow_new_page = True
|
||||
if not self.text_blocks:
|
||||
tb = conv.book.create_text_block()
|
||||
tb.Paragraph(' ')
|
||||
self.text_blocks.append(tb)
|
||||
for tb in self.text_blocks:
|
||||
tb.parent = None
|
||||
tb.objId = 0
|
||||
# Needed as we have to eventually change this BlockStyle's width and
|
||||
# height attributes. This blockstyle may be shared with other
|
||||
# elements, so doing that causes havoc.
|
||||
tb.blockStyle = conv.book.create_block_style()
|
||||
ts = conv.book.create_text_style(**tb.textStyle.attrs)
|
||||
ts.attrs['parindent'] = 0
|
||||
tb.textStyle = ts
|
||||
if ts.attrs['align'] == 'foot':
|
||||
if isinstance(tb.contents[-1], Paragraph):
|
||||
tb.contents[-1].append(' ')
|
||||
|
||||
def pts_to_pixels(self, pts):
|
||||
pts = int(pts)
|
||||
return ceil((float(self.conv.profile.dpi)/72)*(pts/10))
|
||||
|
||||
def minimum_width(self):
|
||||
return max([self.minimum_tb_width(tb) for tb in self.text_blocks])
|
||||
|
||||
def minimum_tb_width(self, tb):
|
||||
ts = tb.textStyle.attrs
|
||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||
parindent = self.pts_to_pixels(ts['parindent'])
|
||||
mwidth = 0
|
||||
for token, attrs in tokens(tb):
|
||||
font = default_font
|
||||
if isinstance(token, numbers.Integral): # Handle para and line breaks
|
||||
continue
|
||||
if isinstance(token, Plot):
|
||||
return self.pts_to_pixels(token.xsize)
|
||||
ff = attrs.get('fontfacename', ts['fontfacename'])
|
||||
fs = attrs.get('fontsize', ts['fontsize'])
|
||||
if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
|
||||
font = get_font(ff, self.pts_to_pixels(fs))
|
||||
if not token.strip():
|
||||
continue
|
||||
word = token.split()
|
||||
word = word[0] if word else ""
|
||||
width = font.getsize(word)[0]
|
||||
if width > mwidth:
|
||||
mwidth = width
|
||||
return parindent + mwidth + 2
|
||||
|
||||
def text_block_size(self, tb, maxwidth=sys.maxsize, debug=False):
|
||||
ts = tb.textStyle.attrs
|
||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||
parindent = self.pts_to_pixels(ts['parindent'])
|
||||
top, bottom, left, right = 0, 0, parindent, parindent
|
||||
|
||||
def add_word(width, height, left, right, top, bottom, ls, ws):
|
||||
if left + width > maxwidth:
|
||||
left = width + ws
|
||||
top += ls
|
||||
bottom = top+ls if top+ls > bottom else bottom
|
||||
else:
|
||||
left += (width + ws)
|
||||
right = left if left > right else right
|
||||
bottom = top+ls if top+ls > bottom else bottom
|
||||
return left, right, top, bottom
|
||||
|
||||
for token, attrs in tokens(tb):
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
font = default_font
|
||||
ls = self.pts_to_pixels(attrs.get('baselineskip', ts['baselineskip']))+\
|
||||
self.pts_to_pixels(attrs.get('linespace', ts['linespace']))
|
||||
ws = self.pts_to_pixels(attrs.get('wordspace', ts['wordspace']))
|
||||
if isinstance(token, numbers.Integral): # Handle para and line breaks
|
||||
if top != bottom: # Previous element not a line break
|
||||
top = bottom
|
||||
else:
|
||||
top += ls
|
||||
bottom += ls
|
||||
left = parindent if int == 1 else 0
|
||||
continue
|
||||
if isinstance(token, Plot):
|
||||
width, height = self.pts_to_pixels(token.xsize), self.pts_to_pixels(token.ysize)
|
||||
left, right, top, bottom = add_word(width, height, left, right, top, bottom, height, ws)
|
||||
continue
|
||||
ff = attrs.get('fontfacename', ts['fontfacename'])
|
||||
fs = attrs.get('fontsize', ts['fontsize'])
|
||||
if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
|
||||
font = get_font(ff, self.pts_to_pixels(fs))
|
||||
for word in token.split():
|
||||
width, height = font.getsize(word)
|
||||
left, right, top, bottom = add_word(width, height, left, right, top, bottom, ls, ws)
|
||||
return right+3+max(parindent, 10), bottom
|
||||
|
||||
def text_block_preferred_width(self, tb, debug=False):
|
||||
return self.text_block_size(tb, sys.maxsize, debug=debug)[0]
|
||||
|
||||
def preferred_width(self, debug=False):
|
||||
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
|
||||
|
||||
def height(self, width):
|
||||
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
|
||||
|
||||
|
||||
class Row(object):
|
||||
|
||||
def __init__(self, conv, row, css, colpad):
|
||||
self.cells = []
|
||||
self.colpad = colpad
|
||||
cells = row.findAll(re.compile('td|th', re.IGNORECASE))
|
||||
self.targets = []
|
||||
for cell in cells:
|
||||
ccss = conv.tag_css(cell, css)[0]
|
||||
self.cells.append(Cell(conv, cell, ccss))
|
||||
for a in row.findAll(id=True) + row.findAll(name=True):
|
||||
name = a['name'] if a.has_attr('name') else a['id'] if a.has_attr('id') else None
|
||||
if name is not None:
|
||||
self.targets.append(name.replace('#', ''))
|
||||
|
||||
def number_of_cells(self):
|
||||
'''Number of cells in this row. Respects colspan'''
|
||||
ans = 0
|
||||
for cell in self.cells:
|
||||
ans += cell.colspan
|
||||
return ans
|
||||
|
||||
def height(self, widths):
|
||||
i, heights = 0, []
|
||||
for cell in self.cells:
|
||||
width = sum(widths[i:i+cell.colspan])
|
||||
heights.append(cell.height(width))
|
||||
i += cell.colspan
|
||||
if not heights:
|
||||
return 0
|
||||
return max(heights)
|
||||
|
||||
def cell_from_index(self, col):
|
||||
i = -1
|
||||
cell = None
|
||||
for cell in self.cells:
|
||||
for k in range(0, cell.colspan):
|
||||
if i == col:
|
||||
break
|
||||
i += 1
|
||||
if i == col:
|
||||
break
|
||||
return cell
|
||||
|
||||
def minimum_width(self, col):
|
||||
cell = self.cell_from_index(col)
|
||||
if not cell:
|
||||
return 0
|
||||
return cell.minimum_width()
|
||||
|
||||
def preferred_width(self, col):
|
||||
cell = self.cell_from_index(col)
|
||||
if not cell:
|
||||
return 0
|
||||
return 0 if cell.colspan > 1 else cell.preferred_width()
|
||||
|
||||
def width_percent(self, col):
|
||||
cell = self.cell_from_index(col)
|
||||
if not cell:
|
||||
return -1
|
||||
return -1 if cell.colspan > 1 else cell.pwidth
|
||||
|
||||
def cell_iterator(self):
|
||||
for c in self.cells:
|
||||
yield c
|
||||
|
||||
|
||||
class Table(object):
|
||||
|
||||
def __init__(self, conv, table, css, rowpad=10, colpad=10):
|
||||
self.rows = []
|
||||
self.conv = conv
|
||||
self.rowpad = rowpad
|
||||
self.colpad = colpad
|
||||
rows = table.findAll('tr')
|
||||
conv.in_table = True
|
||||
for row in rows:
|
||||
rcss = conv.tag_css(row, css)[0]
|
||||
self.rows.append(Row(conv, row, rcss, colpad))
|
||||
conv.in_table = False
|
||||
|
||||
def number_of_columns(self):
|
||||
max = 0
|
||||
for row in self.rows:
|
||||
max = row.number_of_cells() if row.number_of_cells() > max else max
|
||||
return max
|
||||
|
||||
def number_or_rows(self):
|
||||
return len(self.rows)
|
||||
|
||||
def height(self, maxwidth):
|
||||
''' Return row heights + self.rowpad'''
|
||||
widths = self.get_widths(maxwidth)
|
||||
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
|
||||
|
||||
def minimum_width(self, col):
|
||||
return max([row.minimum_width(col) for row in self.rows])
|
||||
|
||||
def width_percent(self, col):
|
||||
return max([row.width_percent(col) for row in self.rows])
|
||||
|
||||
def get_widths(self, maxwidth):
|
||||
'''
|
||||
Return widths of columns + self.colpad
|
||||
'''
|
||||
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||
widths = list(range(cols))
|
||||
for c in range(cols):
|
||||
cellwidths = [0 for i in range(rows)]
|
||||
for r in range(rows):
|
||||
try:
|
||||
cellwidths[r] = self.rows[r].preferred_width(c)
|
||||
except IndexError:
|
||||
continue
|
||||
widths[c] = max(cellwidths)
|
||||
|
||||
min_widths = [self.minimum_width(i)+10 for i in range(cols)]
|
||||
for i in range(len(widths)):
|
||||
wp = self.width_percent(i)
|
||||
if wp >= 0:
|
||||
widths[i] = max(min_widths[i], ceil((wp/100) * (maxwidth - (cols-1)*self.colpad)))
|
||||
|
||||
itercount = 0
|
||||
|
||||
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
|
||||
for i in range(cols):
|
||||
widths[i] = ceil((95/100)*widths[i]) if \
|
||||
ceil((95/100)*widths[i]) >= min_widths[i] else widths[i]
|
||||
itercount += 1
|
||||
|
||||
return [i+self.colpad for i in widths]
|
||||
|
||||
def blocks(self, maxwidth, maxheight):
|
||||
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
||||
rowpos = [0 for i in range(rows)]
|
||||
for r in range(rows):
|
||||
nc = self.rows[r].cell_iterator()
|
||||
try:
|
||||
while True:
|
||||
cell = next(nc)
|
||||
cellmatrix[r][rowpos[r]] = cell
|
||||
rowpos[r] += cell.colspan
|
||||
for k in range(1, cell.rowspan):
|
||||
try:
|
||||
rowpos[r+k] += 1
|
||||
except IndexError:
|
||||
break
|
||||
except StopIteration: # No more cells in this row
|
||||
continue
|
||||
|
||||
widths = self.get_widths(maxwidth)
|
||||
heights = [row.height(widths) for row in self.rows]
|
||||
|
||||
xpos = [sum(widths[:i]) for i in range(cols)]
|
||||
delta = maxwidth - sum(widths)
|
||||
if delta < 0:
|
||||
delta = 0
|
||||
for r in range(len(cellmatrix)):
|
||||
yield None, 0, heights[r], 0, self.rows[r].targets
|
||||
for c in range(len(cellmatrix[r])):
|
||||
cell = cellmatrix[r][c]
|
||||
if not cell:
|
||||
continue
|
||||
width = sum(widths[c:c+cell.colspan])-self.colpad*cell.colspan
|
||||
sypos = 0
|
||||
for tb in cell.text_blocks:
|
||||
tb.blockStyle = self.conv.book.create_block_style(
|
||||
blockwidth=width,
|
||||
blockheight=cell.text_block_size(tb, width)[1],
|
||||
blockrule='horz-fixed')
|
||||
|
||||
yield tb, xpos[c], sypos, delta, None
|
||||
sypos += tb.blockStyle.attrs['blockheight']
|
||||
7
ebook_converter/ebooks/lrf/pylrs/__init__.py
Normal file
7
ebook_converter/ebooks/lrf/pylrs/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
"""
|
||||
This package contains code to generate ebooks in the SONY LRS/F format. It was
|
||||
originally developed by Mike Higgins and has been extended and modified by Kovid
|
||||
Goyal.
|
||||
"""
|
||||
78
ebook_converter/ebooks/lrf/pylrs/elements.py
Normal file
78
ebook_converter/ebooks/lrf/pylrs/elements.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
""" elements.py -- replacements and helpers for ElementTree """
|
||||
|
||||
from polyglot.builtins import unicode_type, string_or_bytes
|
||||
|
||||
|
||||
class ElementWriter(object):
|
||||
|
||||
def __init__(self, e, header=False, sourceEncoding="ascii",
|
||||
spaceBeforeClose=True, outputEncodingName="UTF-16"):
|
||||
self.header = header
|
||||
self.e = e
|
||||
self.sourceEncoding=sourceEncoding
|
||||
self.spaceBeforeClose = spaceBeforeClose
|
||||
self.outputEncodingName = outputEncodingName
|
||||
|
||||
def _encodeCdata(self, rawText):
|
||||
if isinstance(rawText, bytes):
|
||||
rawText = rawText.decode(self.sourceEncoding)
|
||||
|
||||
text = rawText.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
return text
|
||||
|
||||
def _writeAttribute(self, f, name, value):
|
||||
f.write(' %s="' % unicode_type(name))
|
||||
if not isinstance(value, string_or_bytes):
|
||||
value = unicode_type(value)
|
||||
value = self._encodeCdata(value)
|
||||
value = value.replace('"', '"')
|
||||
f.write(value)
|
||||
f.write('"')
|
||||
|
||||
def _writeText(self, f, rawText):
|
||||
text = self._encodeCdata(rawText)
|
||||
f.write(text)
|
||||
|
||||
def _write(self, f, e):
|
||||
f.write('<' + unicode_type(e.tag))
|
||||
|
||||
attributes = e.items()
|
||||
attributes.sort()
|
||||
for name, value in attributes:
|
||||
self._writeAttribute(f, name, value)
|
||||
|
||||
if e.text is not None or len(e) > 0:
|
||||
f.write('>')
|
||||
|
||||
if e.text:
|
||||
self._writeText(f, e.text)
|
||||
|
||||
for e2 in e:
|
||||
self._write(f, e2)
|
||||
|
||||
f.write('</%s>' % e.tag)
|
||||
else:
|
||||
if self.spaceBeforeClose:
|
||||
f.write(' ')
|
||||
f.write('/>')
|
||||
|
||||
if e.tail is not None:
|
||||
self._writeText(f, e.tail)
|
||||
|
||||
def toString(self):
|
||||
class x:
|
||||
pass
|
||||
buffer = []
|
||||
x.write = buffer.append
|
||||
self.write(x)
|
||||
return ''.join(buffer)
|
||||
|
||||
def write(self, f):
|
||||
if self.header:
|
||||
f.write('<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
||||
|
||||
self._write(f, self.e)
|
||||
773
ebook_converter/ebooks/lrf/pylrs/pylrf.py
Normal file
773
ebook_converter/ebooks/lrf/pylrs/pylrf.py
Normal file
@@ -0,0 +1,773 @@
|
||||
#!/usr/bin/env python2
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
"""
|
||||
pylrf.py -- very low level interface to create lrf files. See pylrs for
|
||||
higher level interface that can use this module to render books to lrf.
|
||||
"""
|
||||
import struct
|
||||
import zlib
|
||||
import io
|
||||
import codecs
|
||||
import os
|
||||
|
||||
from .pylrfopt import tagListOptimizer
|
||||
from polyglot.builtins import iteritems, string_or_bytes, unicode_type
|
||||
|
||||
PYLRF_VERSION = "1.0"
|
||||
|
||||
#
|
||||
# Acknowledgement:
|
||||
# This software would not have been possible without the pioneering
|
||||
# efforts of the author of lrf2lrs.py, Igor Skochinsky.
|
||||
#
|
||||
# Copyright (c) 2007 Mike Higgins (Falstaff)
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#
|
||||
# Change History:
|
||||
#
|
||||
# V1.0 06 Feb 2007
|
||||
# Initial Release.
|
||||
|
||||
#
|
||||
# Current limitations and bugs:
|
||||
# Never "scrambles" any streams (even if asked to). This does not seem
|
||||
# to hurt anything.
|
||||
#
|
||||
# Not based on any official documentation, so many assumptions had to be made.
|
||||
#
|
||||
# Can be used to create lrf files that can lock up an eBook reader.
|
||||
# This is your only warning.
|
||||
#
|
||||
# Unsupported objects: Canvas, Window, PopUpWindow, Sound, Import,
|
||||
# SoundStream, ObjectInfo
|
||||
#
|
||||
# The only button type supported is JumpButton.
|
||||
#
|
||||
# Unsupported tags: SoundStop, Wait, pos on BlockSpace (and those used by
|
||||
# unsupported objects).
|
||||
#
|
||||
# Tags supporting Japanese text and Asian layout have not been tested.
|
||||
#
|
||||
# Tested on Python 2.4 and 2.5, Windows XP and Sony PRS-500.
|
||||
#
|
||||
# Commented even less than pylrs, but not very useful when called directly,
|
||||
# anyway.
|
||||
#
|
||||
|
||||
|
||||
class LrfError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def writeByte(f, byte):
|
||||
f.write(struct.pack("<B", byte))
|
||||
|
||||
|
||||
def writeWord(f, word):
|
||||
if int(word) > 65535:
|
||||
raise LrfError('Cannot encode a number greater than 65535 in a word.')
|
||||
if int(word) < 0:
|
||||
raise LrfError('Cannot encode a number < 0 in a word: '+unicode_type(word))
|
||||
f.write(struct.pack("<H", int(word)))
|
||||
|
||||
|
||||
def writeSignedWord(f, sword):
|
||||
f.write(struct.pack("<h", int(float(sword))))
|
||||
|
||||
|
||||
def writeWords(f, *words):
|
||||
f.write(struct.pack("<%dH" % len(words), *words))
|
||||
|
||||
|
||||
def writeDWord(f, dword):
|
||||
f.write(struct.pack("<I", int(dword)))
|
||||
|
||||
|
||||
def writeDWords(f, *dwords):
|
||||
f.write(struct.pack("<%dI" % len(dwords), *dwords))
|
||||
|
||||
|
||||
def writeQWord(f, qword):
|
||||
f.write(struct.pack("<Q", qword))
|
||||
|
||||
|
||||
def writeZeros(f, nZeros):
|
||||
f.write(b"\0" * nZeros)
|
||||
|
||||
|
||||
def writeString(f, s):
|
||||
f.write(s)
|
||||
|
||||
|
||||
def writeIdList(f, idList):
|
||||
writeWord(f, len(idList))
|
||||
writeDWords(f, *idList)
|
||||
|
||||
|
||||
def writeColor(f, color):
|
||||
# TODO: allow color names, web format
|
||||
f.write(struct.pack(">I", int(color, 0)))
|
||||
|
||||
|
||||
def writeLineWidth(f, width):
|
||||
writeWord(f, int(width))
|
||||
|
||||
|
||||
def writeUnicode(f, string, encoding):
|
||||
if isinstance(string, bytes):
|
||||
string = string.decode(encoding)
|
||||
string = string.encode("utf-16-le")
|
||||
length = len(string)
|
||||
if length > 65535:
|
||||
raise LrfError('Cannot write strings longer than 65535 characters.')
|
||||
writeWord(f, length)
|
||||
writeString(f, string)
|
||||
|
||||
|
||||
def writeRaw(f, string, encoding):
|
||||
if isinstance(string, bytes):
|
||||
string = string.decode(encoding)
|
||||
|
||||
string = string.encode("utf-16-le")
|
||||
writeString(f, string)
|
||||
|
||||
|
||||
def writeRubyAA(f, rubyAA):
|
||||
ralign, radjust = rubyAA
|
||||
radjust = {"line-edge":0x10, "none":0}[radjust]
|
||||
ralign = {"start":1, "center":2}[ralign]
|
||||
writeWord(f, ralign | radjust)
|
||||
|
||||
|
||||
def writeBgImage(f, bgInfo):
|
||||
imode, iid = bgInfo
|
||||
imode = {"pfix": 0, "fix":1, "tile":2, "centering":3}[imode]
|
||||
writeWord(f, imode)
|
||||
writeDWord(f, iid)
|
||||
|
||||
|
||||
def writeEmpDots(f, dotsInfo, encoding):
|
||||
refDotsFont, dotsFontName, dotsCode = dotsInfo
|
||||
writeDWord(f, refDotsFont)
|
||||
LrfTag("fontfacename", dotsFontName).write(f, encoding)
|
||||
writeWord(f, int(dotsCode, 0))
|
||||
|
||||
|
||||
def writeRuledLine(f, lineInfo):
|
||||
lineLength, lineType, lineWidth, lineColor = lineInfo
|
||||
writeWord(f, lineLength)
|
||||
writeWord(f, LINE_TYPE_ENCODING[lineType])
|
||||
writeWord(f, lineWidth)
|
||||
writeColor(f, lineColor)
|
||||
|
||||
|
||||
LRF_SIGNATURE = b"L\x00R\x00F\x00\x00\x00"
|
||||
|
||||
# XOR_KEY = 48
|
||||
XOR_KEY = 65024 # that's what lrf2lrs says -- not used, anyway...
|
||||
|
||||
LRF_VERSION = 1000 # is 999 for librie? lrf2lrs uses 1000
|
||||
|
||||
IMAGE_TYPE_ENCODING = dict(GIF=0x14, PNG=0x12, BMP=0x13, JPEG=0x11, JPG=0x11)
|
||||
|
||||
OBJECT_TYPE_ENCODING = dict(
|
||||
PageTree=0x01,
|
||||
Page=0x02,
|
||||
Header=0x03,
|
||||
Footer=0x04,
|
||||
PageAtr=0x05, PageStyle=0x05,
|
||||
Block=0x06,
|
||||
BlockAtr=0x07, BlockStyle=0x07,
|
||||
MiniPage=0x08,
|
||||
TextBlock=0x0A, Text=0x0A,
|
||||
TextAtr=0x0B, TextStyle=0x0B,
|
||||
ImageBlock=0x0C, Image=0x0C,
|
||||
Canvas=0x0D,
|
||||
ESound=0x0E,
|
||||
ImageStream=0x11,
|
||||
Import=0x12,
|
||||
Button=0x13,
|
||||
Window=0x14,
|
||||
PopUpWindow=0x15,
|
||||
Sound=0x16,
|
||||
SoundStream=0x17,
|
||||
Font=0x19,
|
||||
ObjectInfo=0x1A,
|
||||
BookAtr=0x1C, BookStyle=0x1C,
|
||||
SimpleTextBlock=0x1D,
|
||||
TOC=0x1E
|
||||
)
|
||||
|
||||
LINE_TYPE_ENCODING = {
|
||||
'none':0, 'solid':0x10, 'dashed':0x20, 'double':0x30, 'dotted':0x40
|
||||
}
|
||||
|
||||
BINDING_DIRECTION_ENCODING = dict(Lr=1, Rl=16)
|
||||
|
||||
|
||||
TAG_INFO = dict(
|
||||
rawtext=(0, writeRaw),
|
||||
ObjectStart=(0xF500, "<IH"),
|
||||
ObjectEnd=(0xF501,),
|
||||
# InfoLink (0xF502)
|
||||
Link=(0xF503, "<I"),
|
||||
StreamSize=(0xF504, writeDWord),
|
||||
StreamData=(0xF505, writeString),
|
||||
StreamEnd=(0xF506,),
|
||||
oddheaderid=(0xF507, writeDWord),
|
||||
evenheaderid=(0xF508, writeDWord),
|
||||
oddfooterid=(0xF509, writeDWord),
|
||||
evenfooterid=(0xF50A, writeDWord),
|
||||
ObjectList=(0xF50B, writeIdList),
|
||||
fontsize=(0xF511, writeSignedWord),
|
||||
fontwidth=(0xF512, writeSignedWord),
|
||||
fontescapement=(0xF513, writeSignedWord),
|
||||
fontorientation=(0xF514, writeSignedWord),
|
||||
fontweight=(0xF515, writeWord),
|
||||
fontfacename=(0xF516, writeUnicode),
|
||||
textcolor=(0xF517, writeColor),
|
||||
textbgcolor=(0xF518, writeColor),
|
||||
wordspace=(0xF519, writeSignedWord),
|
||||
letterspace=(0xF51A, writeSignedWord),
|
||||
baselineskip=(0xF51B, writeSignedWord),
|
||||
linespace=(0xF51C, writeSignedWord),
|
||||
parindent=(0xF51D, writeSignedWord),
|
||||
parskip=(0xF51E, writeSignedWord),
|
||||
# F51F, F520
|
||||
topmargin=(0xF521, writeWord),
|
||||
headheight=(0xF522, writeWord),
|
||||
headsep=(0xF523, writeWord),
|
||||
oddsidemargin=(0xF524, writeWord),
|
||||
textheight=(0xF525, writeWord),
|
||||
textwidth=(0xF526, writeWord),
|
||||
canvaswidth=(0xF551, writeWord),
|
||||
canvasheight=(0xF552, writeWord),
|
||||
footspace=(0xF527, writeWord),
|
||||
footheight=(0xF528, writeWord),
|
||||
bgimage=(0xF529, writeBgImage),
|
||||
setemptyview=(0xF52A, {'show':1, 'empty':0}, writeWord),
|
||||
pageposition=(0xF52B, {'any':0,'upper':1, 'lower':2}, writeWord),
|
||||
evensidemargin=(0xF52C, writeWord),
|
||||
framemode=(0xF52E,
|
||||
{'None':0, 'curve':2, 'square':1}, writeWord),
|
||||
blockwidth=(0xF531, writeWord),
|
||||
blockheight=(0xF532, writeWord),
|
||||
blockrule=(0xF533, {"horz-fixed":0x14, "horz-adjustable":0x12,
|
||||
"vert-fixed":0x41, "vert-adjustable":0x21,
|
||||
"block-fixed":0x44, "block-adjustable":0x22},
|
||||
writeWord),
|
||||
bgcolor=(0xF534, writeColor),
|
||||
layout=(0xF535, {'TbRl':0x41, 'LrTb':0x34}, writeWord),
|
||||
framewidth=(0xF536, writeWord),
|
||||
framecolor=(0xF537, writeColor),
|
||||
topskip=(0xF538, writeWord),
|
||||
sidemargin=(0xF539, writeWord),
|
||||
footskip=(0xF53A, writeWord),
|
||||
align=(0xF53C, {'head':1, 'center':4, 'foot':8}, writeWord),
|
||||
column=(0xF53D, writeWord),
|
||||
columnsep=(0xF53E, writeSignedWord),
|
||||
minipagewidth=(0xF541, writeWord),
|
||||
minipageheight=(0xF542, writeWord),
|
||||
yspace=(0xF546, writeWord),
|
||||
xspace=(0xF547, writeWord),
|
||||
PutObj=(0xF549, "<HHI"),
|
||||
ImageRect=(0xF54A, "<HHHH"),
|
||||
ImageSize=(0xF54B, "<HH"),
|
||||
RefObjId=(0xF54C, "<I"),
|
||||
PageDiv=(0xF54E, "<HIHI"),
|
||||
StreamFlags=(0xF554, writeWord),
|
||||
Comment=(0xF555, writeUnicode),
|
||||
FontFilename=(0xF559, writeUnicode),
|
||||
PageList=(0xF55C, writeIdList),
|
||||
FontFacename=(0xF55D, writeUnicode),
|
||||
buttonflags=(0xF561, writeWord),
|
||||
PushButtonStart=(0xF566,),
|
||||
PushButtonEnd=(0xF567,),
|
||||
buttonactions=(0xF56A,),
|
||||
endbuttonactions=(0xF56B,),
|
||||
jumpto=(0xF56C, "<II"),
|
||||
RuledLine=(0xF573, writeRuledLine),
|
||||
rubyaa=(0xF575, writeRubyAA),
|
||||
rubyoverhang=(0xF576, {'none':0, 'auto':1}, writeWord),
|
||||
empdotsposition=(0xF577, {'before':1, 'after':2}, writeWord),
|
||||
empdots=(0xF578, writeEmpDots),
|
||||
emplineposition=(0xF579, {'before':1, 'after':2}, writeWord),
|
||||
emplinetype=(0xF57A, LINE_TYPE_ENCODING, writeWord),
|
||||
ChildPageTree=(0xF57B, "<I"),
|
||||
ParentPageTree=(0xF57C, "<I"),
|
||||
Italic=(0xF581,),
|
||||
ItalicEnd=(0xF582,),
|
||||
pstart=(0xF5A1, writeDWord), # what goes in the dword? refesound
|
||||
pend=(0xF5A2,),
|
||||
CharButton=(0xF5A7, writeDWord),
|
||||
CharButtonEnd=(0xF5A8,),
|
||||
Rubi=(0xF5A9,),
|
||||
RubiEnd=(0xF5AA,),
|
||||
Oyamoji=(0xF5AB,),
|
||||
OyamojiEnd=(0xF5AC,),
|
||||
Rubimoji=(0xF5AD,),
|
||||
RubimojiEnd=(0xF5AE,),
|
||||
Yoko=(0xF5B1,),
|
||||
YokoEnd=(0xF5B2,),
|
||||
Tate=(0xF5B3,),
|
||||
TateEnd=(0xF5B4,),
|
||||
Nekase=(0xF5B5,),
|
||||
NekaseEnd=(0xF5B6,),
|
||||
Sup=(0xF5B7,),
|
||||
SupEnd=(0xF5B8,),
|
||||
Sub=(0xF5B9,),
|
||||
SubEnd=(0xF5BA,),
|
||||
NoBR=(0xF5BB,),
|
||||
NoBREnd=(0xF5BC,),
|
||||
EmpDots=(0xF5BD,),
|
||||
EmpDotsEnd=(0xF5BE,),
|
||||
EmpLine=(0xF5C1,),
|
||||
EmpLineEnd=(0xF5C2,),
|
||||
DrawChar=(0xF5C3, '<H'),
|
||||
DrawCharEnd=(0xF5C4,),
|
||||
Box=(0xF5C6, LINE_TYPE_ENCODING, writeWord),
|
||||
BoxEnd=(0xF5C7,),
|
||||
Space=(0xF5CA, writeSignedWord),
|
||||
textstring=(0xF5CC, writeUnicode),
|
||||
Plot=(0xF5D1, "<HHII"),
|
||||
CR=(0xF5D2,),
|
||||
RegisterFont=(0xF5D8, writeDWord),
|
||||
setwaitprop=(0xF5DA, {'replay':1, 'noreplay':2}, writeWord),
|
||||
charspace=(0xF5DD, writeSignedWord),
|
||||
textlinewidth=(0xF5F1, writeLineWidth),
|
||||
linecolor=(0xF5F2, writeColor)
|
||||
)
|
||||
|
||||
|
||||
class ObjectTableEntry(object):
|
||||
|
||||
def __init__(self, objId, offset, size):
|
||||
self.objId = objId
|
||||
self.offset = offset
|
||||
self.size = size
|
||||
|
||||
def write(self, f):
|
||||
writeDWords(f, self.objId, self.offset, self.size, 0)
|
||||
|
||||
|
||||
class LrfTag(object):
|
||||
|
||||
def __init__(self, name, *parameters):
|
||||
try:
|
||||
tagInfo = TAG_INFO[name]
|
||||
except KeyError:
|
||||
raise LrfError("tag name %s not recognized" % name)
|
||||
|
||||
self.name = name
|
||||
self.type = tagInfo[0]
|
||||
self.format = tagInfo[1:]
|
||||
|
||||
if len(parameters) > 1:
|
||||
raise LrfError("only one parameter allowed on tag %s" % name)
|
||||
|
||||
if len(parameters) == 0:
|
||||
self.parameter = None
|
||||
else:
|
||||
self.parameter = parameters[0]
|
||||
|
||||
def write(self, lrf, encoding=None):
|
||||
if self.type != 0:
|
||||
writeWord(lrf, self.type)
|
||||
|
||||
p = self.parameter
|
||||
if p is None:
|
||||
return
|
||||
|
||||
# print " Writing tag", self.name
|
||||
for f in self.format:
|
||||
if isinstance(f, dict):
|
||||
p = f[p]
|
||||
elif isinstance(f, string_or_bytes):
|
||||
if isinstance(p, tuple):
|
||||
writeString(lrf, struct.pack(f, *p))
|
||||
else:
|
||||
writeString(lrf, struct.pack(f, p))
|
||||
else:
|
||||
if f in [writeUnicode, writeRaw, writeEmpDots]:
|
||||
if encoding is None:
|
||||
raise LrfError("Tag requires encoding")
|
||||
f(lrf, p, encoding)
|
||||
else:
|
||||
f(lrf, p)
|
||||
|
||||
|
||||
STREAM_SCRAMBLED = 0x200
|
||||
STREAM_COMPRESSED = 0x100
|
||||
STREAM_FORCE_COMPRESSED = 0x8100
|
||||
STREAM_TOC = 0x0051
|
||||
|
||||
|
||||
class LrfStreamBase(object):
|
||||
|
||||
def __init__(self, streamFlags, streamData=None):
|
||||
self.streamFlags = streamFlags
|
||||
self.streamData = streamData
|
||||
|
||||
def setStreamData(self, streamData):
|
||||
self.streamData = streamData
|
||||
|
||||
def getStreamTags(self, optimize=False):
|
||||
# tags:
|
||||
# StreamFlags
|
||||
# StreamSize
|
||||
# StreamStart
|
||||
# (data)
|
||||
# StreamEnd
|
||||
#
|
||||
# if flags & 0x200, stream is scrambled
|
||||
# if flags & 0x100, stream is compressed
|
||||
|
||||
flags = self.streamFlags
|
||||
streamBuffer = self.streamData
|
||||
|
||||
# implement scramble? I never scramble anything...
|
||||
|
||||
if flags & STREAM_FORCE_COMPRESSED == STREAM_FORCE_COMPRESSED:
|
||||
optimize = False
|
||||
|
||||
if flags & STREAM_COMPRESSED == STREAM_COMPRESSED:
|
||||
uncompLen = len(streamBuffer)
|
||||
compStreamBuffer = zlib.compress(streamBuffer)
|
||||
if optimize and uncompLen <= len(compStreamBuffer) + 4:
|
||||
flags &= ~STREAM_COMPRESSED
|
||||
else:
|
||||
streamBuffer = struct.pack("<I", uncompLen) + compStreamBuffer
|
||||
|
||||
return [LrfTag("StreamFlags", flags & 0x01FF),
|
||||
LrfTag("StreamSize", len(streamBuffer)),
|
||||
LrfTag("StreamData", streamBuffer),
|
||||
LrfTag("StreamEnd")]
|
||||
|
||||
|
||||
class LrfTagStream(LrfStreamBase):
|
||||
|
||||
def __init__(self, streamFlags, streamTags=None):
|
||||
LrfStreamBase.__init__(self, streamFlags)
|
||||
if streamTags is None:
|
||||
self.tags = []
|
||||
else:
|
||||
self.tags = streamTags[:]
|
||||
|
||||
def appendLrfTag(self, tag):
|
||||
self.tags.append(tag)
|
||||
|
||||
def getStreamTags(self, encoding,
|
||||
optimizeTags=False, optimizeCompression=False):
|
||||
stream = io.BytesIO()
|
||||
if optimizeTags:
|
||||
tagListOptimizer(self.tags)
|
||||
|
||||
for tag in self.tags:
|
||||
tag.write(stream, encoding)
|
||||
|
||||
self.streamData = stream.getvalue()
|
||||
stream.close()
|
||||
return LrfStreamBase.getStreamTags(self, optimize=optimizeCompression)
|
||||
|
||||
|
||||
class LrfFileStream(LrfStreamBase):
|
||||
|
||||
def __init__(self, streamFlags, filename):
|
||||
LrfStreamBase.__init__(self, streamFlags)
|
||||
with open(filename, "rb") as f:
|
||||
self.streamData = f.read()
|
||||
|
||||
|
||||
class LrfObject(object):
|
||||
|
||||
def __init__(self, name, objId):
|
||||
if objId <= 0:
|
||||
raise LrfError("invalid objId for " + name)
|
||||
|
||||
self.name = name
|
||||
self.objId = objId
|
||||
self.tags = []
|
||||
try:
|
||||
self.type = OBJECT_TYPE_ENCODING[name]
|
||||
except KeyError:
|
||||
raise LrfError("object name %s not recognized" % name)
|
||||
|
||||
def __str__(self):
|
||||
return 'LRFObject: ' + self.name + ", " + unicode_type(self.objId)
|
||||
|
||||
def appendLrfTag(self, tag):
|
||||
self.tags.append(tag)
|
||||
|
||||
def appendLrfTags(self, tagList):
|
||||
self.tags.extend(tagList)
|
||||
|
||||
# deprecated old name
|
||||
append = appendLrfTag
|
||||
|
||||
def appendTagDict(self, tagDict, genClass=None):
|
||||
#
|
||||
# This code does not really belong here, I think. But it
|
||||
# belongs somewhere, so here it is.
|
||||
#
|
||||
composites = {}
|
||||
for name, value in iteritems(tagDict):
|
||||
if name == 'rubyAlignAndAdjust':
|
||||
continue
|
||||
if name in {
|
||||
"bgimagemode", "bgimageid", "rubyalign", "rubyadjust",
|
||||
"empdotscode", "empdotsfontname", "refempdotsfont"}:
|
||||
composites[name] = value
|
||||
else:
|
||||
self.append(LrfTag(name, value))
|
||||
|
||||
if "rubyalign" in composites or "rubyadjust" in composites:
|
||||
ralign = composites.get("rubyalign", "none")
|
||||
radjust = composites.get("rubyadjust", "start")
|
||||
self.append(LrfTag("rubyaa", (ralign, radjust)))
|
||||
|
||||
if "bgimagemode" in composites or "bgimageid" in composites:
|
||||
imode = composites.get("bgimagemode", "fix")
|
||||
iid = composites.get("bgimageid", 0)
|
||||
|
||||
# for some reason, page style uses 0 for "fix"
|
||||
# we call this pfix to differentiate it
|
||||
if genClass == "PageStyle" and imode == "fix":
|
||||
imode = "pfix"
|
||||
|
||||
self.append(LrfTag("bgimage", (imode, iid)))
|
||||
|
||||
if "empdotscode" in composites or "empdotsfontname" in composites or \
|
||||
"refempdotsfont" in composites:
|
||||
dotscode = composites.get("empdotscode", "0x002E")
|
||||
dotsfontname = composites.get("empdotsfontname",
|
||||
"Dutch801 Rm BT Roman")
|
||||
refdotsfont = composites.get("refempdotsfont", 0)
|
||||
self.append(LrfTag("empdots", (refdotsfont, dotsfontname,
|
||||
dotscode)))
|
||||
|
||||
def write(self, lrf, encoding=None):
|
||||
# print "Writing object", self.name
|
||||
LrfTag("ObjectStart", (self.objId, self.type)).write(lrf)
|
||||
|
||||
for tag in self.tags:
|
||||
tag.write(lrf, encoding)
|
||||
|
||||
LrfTag("ObjectEnd").write(lrf)
|
||||
|
||||
|
||||
class LrfToc(LrfObject):
|
||||
"""
|
||||
Table of contents. Format of toc is:
|
||||
[ (pageid, objid, string)...]
|
||||
"""
|
||||
|
||||
def __init__(self, objId, toc, se):
|
||||
LrfObject.__init__(self, "TOC", objId)
|
||||
streamData = self._makeTocStream(toc, se)
|
||||
self._makeStreamTags(streamData)
|
||||
|
||||
def _makeStreamTags(self, streamData):
|
||||
stream = LrfStreamBase(STREAM_TOC, streamData)
|
||||
self.tags.extend(stream.getStreamTags())
|
||||
|
||||
def _makeTocStream(self, toc, se):
|
||||
stream = io.BytesIO()
|
||||
nEntries = len(toc)
|
||||
|
||||
writeDWord(stream, nEntries)
|
||||
|
||||
lastOffset = 0
|
||||
writeDWord(stream, lastOffset)
|
||||
for i in range(nEntries - 1):
|
||||
pageId, objId, label = toc[i]
|
||||
entryLen = 4 + 4 + 2 + len(label)*2
|
||||
lastOffset += entryLen
|
||||
writeDWord(stream, lastOffset)
|
||||
|
||||
for entry in toc:
|
||||
pageId, objId, label = entry
|
||||
if pageId <= 0:
|
||||
raise LrfError("page id invalid in toc: " + label)
|
||||
if objId <= 0:
|
||||
raise LrfError("textblock id invalid in toc: " + label)
|
||||
|
||||
writeDWord(stream, pageId)
|
||||
writeDWord(stream, objId)
|
||||
writeUnicode(stream, label, se)
|
||||
|
||||
streamData = stream.getvalue()
|
||||
stream.close()
|
||||
return streamData
|
||||
|
||||
|
||||
class LrfWriter(object):
|
||||
|
||||
def __init__(self, sourceEncoding):
|
||||
self.sourceEncoding = sourceEncoding
|
||||
|
||||
# The following flags are just to have a place to remember these
|
||||
# values. The flags must still be passed to the appropriate classes
|
||||
# in order to have them work.
|
||||
|
||||
self.saveStreamTags = False # used only in testing -- hogs memory
|
||||
|
||||
# highly experimental -- set to True at your own risk
|
||||
self.optimizeTags = False
|
||||
self.optimizeCompression = False
|
||||
|
||||
# End of placeholders
|
||||
|
||||
self.rootObjId = 0
|
||||
self.rootObj = None
|
||||
self.binding = 1 # 1=front to back, 16=back to front
|
||||
self.dpi = 1600
|
||||
self.width = 600
|
||||
self.height = 800
|
||||
self.colorDepth = 24
|
||||
self.tocObjId = 0
|
||||
self.docInfoXml = ""
|
||||
self.thumbnailEncoding = "JPEG"
|
||||
self.thumbnailData = b""
|
||||
self.objects = []
|
||||
self.objectTable = []
|
||||
|
||||
def getSourceEncoding(self):
|
||||
return self.sourceEncoding
|
||||
|
||||
def toUnicode(self, string):
|
||||
if isinstance(string, bytes):
|
||||
string = string.decode(self.sourceEncoding)
|
||||
|
||||
return string
|
||||
|
||||
def getDocInfoXml(self):
|
||||
return self.docInfoXml
|
||||
|
||||
def setPageTreeId(self, objId):
|
||||
self.pageTreeId = objId
|
||||
|
||||
def getPageTreeId(self):
|
||||
return self.pageTreeId
|
||||
|
||||
def setRootObject(self, obj):
|
||||
if self.rootObjId != 0:
|
||||
raise LrfError("root object already set")
|
||||
|
||||
self.rootObjId = obj.objId
|
||||
self.rootObj = obj
|
||||
|
||||
def registerFontId(self, id):
|
||||
if self.rootObj is None:
|
||||
raise LrfError("can't register font -- no root object")
|
||||
|
||||
self.rootObj.append(LrfTag("RegisterFont", id))
|
||||
|
||||
def setTocObject(self, obj):
|
||||
if self.tocObjId != 0:
|
||||
raise LrfError("toc object already set")
|
||||
|
||||
self.tocObjId = obj.objId
|
||||
|
||||
def setThumbnailFile(self, filename, encoding=None):
|
||||
with open(filename, "rb") as f:
|
||||
self.thumbnailData = f.read()
|
||||
|
||||
if encoding is None:
|
||||
encoding = os.path.splitext(filename)[1][1:]
|
||||
|
||||
encoding = encoding.upper()
|
||||
if encoding not in IMAGE_TYPE_ENCODING:
|
||||
raise LrfError("unknown image type: " + encoding)
|
||||
|
||||
self.thumbnailEncoding = encoding
|
||||
|
||||
def append(self, obj):
|
||||
self.objects.append(obj)
|
||||
|
||||
def addLrfObject(self, objId):
|
||||
pass
|
||||
|
||||
def writeFile(self, lrf):
|
||||
if self.rootObjId == 0:
|
||||
raise LrfError("no root object has been set")
|
||||
|
||||
self.writeHeader(lrf)
|
||||
self.writeObjects(lrf)
|
||||
self.updateObjectTableOffset(lrf)
|
||||
self.updateTocObjectOffset(lrf)
|
||||
self.writeObjectTable(lrf)
|
||||
|
||||
def writeHeader(self, lrf):
|
||||
writeString(lrf, LRF_SIGNATURE)
|
||||
writeWord(lrf, LRF_VERSION)
|
||||
writeWord(lrf, XOR_KEY)
|
||||
writeDWord(lrf, self.rootObjId)
|
||||
writeQWord(lrf, len(self.objects))
|
||||
writeQWord(lrf, 0) # 0x18 objectTableOffset -- will be updated
|
||||
writeZeros(lrf, 4) # 0x20 unknown
|
||||
writeWord(lrf, self.binding)
|
||||
writeDWord(lrf, self.dpi)
|
||||
writeWords(lrf, self.width, self.height, self.colorDepth)
|
||||
writeZeros(lrf, 20) # 0x30 unknown
|
||||
writeDWord(lrf, self.tocObjId)
|
||||
writeDWord(lrf, 0) # 0x48 tocObjectOffset -- will be updated
|
||||
docInfoXml = codecs.BOM_UTF8 + self.docInfoXml.encode("utf-8")
|
||||
compDocInfo = zlib.compress(docInfoXml)
|
||||
writeWord(lrf, len(compDocInfo) + 4)
|
||||
writeWord(lrf, IMAGE_TYPE_ENCODING[self.thumbnailEncoding])
|
||||
writeDWord(lrf, len(self.thumbnailData))
|
||||
writeDWord(lrf, len(docInfoXml))
|
||||
writeString(lrf, compDocInfo)
|
||||
writeString(lrf, self.thumbnailData)
|
||||
|
||||
def writeObjects(self, lrf):
|
||||
# also appends object entries to the object table
|
||||
self.objectTable = []
|
||||
for obj in self.objects:
|
||||
objStart = lrf.tell()
|
||||
obj.write(lrf, self.sourceEncoding)
|
||||
objEnd = lrf.tell()
|
||||
self.objectTable.append(
|
||||
ObjectTableEntry(obj.objId, objStart, objEnd-objStart))
|
||||
|
||||
def updateObjectTableOffset(self, lrf):
|
||||
# update the offset of the object table
|
||||
tableOffset = lrf.tell()
|
||||
lrf.seek(0x18, 0)
|
||||
writeQWord(lrf, tableOffset)
|
||||
lrf.seek(0, 2)
|
||||
|
||||
def updateTocObjectOffset(self, lrf):
|
||||
if self.tocObjId == 0:
|
||||
return
|
||||
|
||||
for entry in self.objectTable:
|
||||
if entry.objId == self.tocObjId:
|
||||
lrf.seek(0x48, 0)
|
||||
writeDWord(lrf, entry.offset)
|
||||
lrf.seek(0, 2)
|
||||
break
|
||||
else:
|
||||
raise LrfError("toc object not in object table")
|
||||
|
||||
def writeObjectTable(self, lrf):
|
||||
for tableEntry in self.objectTable:
|
||||
tableEntry.write(lrf)
|
||||
44
ebook_converter/ebooks/lrf/pylrs/pylrfopt.py
Normal file
44
ebook_converter/ebooks/lrf/pylrs/pylrfopt.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
|
||||
def _optimize(tagList, tagName, conversion):
|
||||
# copy the tag of interest plus any text
|
||||
newTagList = []
|
||||
for tag in tagList:
|
||||
if tag.name == tagName or tag.name == "rawtext":
|
||||
newTagList.append(tag)
|
||||
|
||||
# now, eliminate any duplicates (leaving the last one)
|
||||
for i, newTag in enumerate(newTagList[:-1]):
|
||||
if newTag.name == tagName and newTagList[i+1].name == tagName:
|
||||
tagList.remove(newTag)
|
||||
|
||||
# eliminate redundant settings to same value across text strings
|
||||
newTagList = []
|
||||
for tag in tagList:
|
||||
if tag.name == tagName:
|
||||
newTagList.append(tag)
|
||||
|
||||
for i, newTag in enumerate(newTagList[:-1]):
|
||||
value = conversion(newTag.parameter)
|
||||
nextValue = conversion(newTagList[i+1].parameter)
|
||||
if value == nextValue:
|
||||
tagList.remove(newTagList[i+1])
|
||||
|
||||
# eliminate any setting that don't have text after them
|
||||
while len(tagList) > 0 and tagList[-1].name == tagName:
|
||||
del tagList[-1]
|
||||
|
||||
|
||||
def tagListOptimizer(tagList):
|
||||
# this function eliminates redundant or unnecessary tags
|
||||
# it scans a list of tags, looking for text settings that are
|
||||
# changed before any text is output
|
||||
# for example,
|
||||
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
|
||||
# should be:
|
||||
# fontsize=200 text
|
||||
oldSize = len(tagList)
|
||||
_optimize(tagList, "fontsize", int)
|
||||
_optimize(tagList, "fontweight", int)
|
||||
return oldSize - len(tagList)
|
||||
2442
ebook_converter/ebooks/lrf/pylrs/pylrs.py
Normal file
2442
ebook_converter/ebooks/lrf/pylrs/pylrs.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user