mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-13 04:15:52 +01:00
397 lines
14 KiB
Python
397 lines
14 KiB
Python
import copy
|
|
import mimetypes
|
|
import operator
|
|
import textwrap
|
|
|
|
from lxml import etree
|
|
|
|
from ebook_converter import polyglot
|
|
|
|
|
|
class Canvas(etree.XSLTExtension):
|
|
|
|
def __init__(self, doc, styles, text_block, log):
|
|
self.doc = doc
|
|
self.styles = styles
|
|
self.text_block = text_block
|
|
self.log = log
|
|
self.processed = set()
|
|
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
cid = input_node.get('objid', None)
|
|
if cid is None or cid in self.processed:
|
|
return
|
|
self.processed.add(cid)
|
|
input_node = self.doc.xpath('//Canvas[@objid="%s"]'%cid)[0]
|
|
|
|
objects = list(self.get_objects(input_node))
|
|
if len(objects) == 1 and objects[0][0].tag == 'ImageBlock':
|
|
self.image_page(input_node, objects[0][0], output_parent)
|
|
else:
|
|
canvases = [input_node]
|
|
for x in input_node.itersiblings():
|
|
if x.tag == 'Canvas':
|
|
oid = x.get('objid', None)
|
|
if oid is not None:
|
|
canvases.append(x)
|
|
self.processed.add(oid)
|
|
else:
|
|
break
|
|
|
|
table = etree.Element('table')
|
|
table.text = '\n\t'
|
|
for canvas in canvases:
|
|
oid = canvas.get('objid')
|
|
tr = table.makeelement('tr')
|
|
tr.set('id', oid)
|
|
tr.tail = '\n\t'
|
|
table.append(tr)
|
|
for obj, x, y in self.get_objects(canvas):
|
|
if obj.tag != 'TextBlock':
|
|
self.log.warning('%s elements in Canvas not supported',
|
|
obj.tag)
|
|
continue
|
|
td = table.makeelement('td')
|
|
self.text_block.render_block(obj, td)
|
|
tr.append(td)
|
|
output_parent.append(table)
|
|
|
|
def image_page(self, input_node, block, output_parent):
|
|
div = etree.Element('div')
|
|
div.set('id', input_node.get('objid', 'scuzzy'))
|
|
div.set('class', 'image_page')
|
|
width = self.styles.to_num(block.get("xsize", None))
|
|
height = self.styles.to_num(block.get("ysize", None))
|
|
img = div.makeelement('img')
|
|
if width is not None:
|
|
img.set('width', str(int(width)))
|
|
if height is not None:
|
|
img.set('height', str(int(height)))
|
|
ref = block.get('refstream', None)
|
|
if ref is not None:
|
|
imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref)
|
|
if imstr:
|
|
src = imstr[0].get('file', None)
|
|
if src:
|
|
img.set('src', src)
|
|
div.append(img)
|
|
output_parent.append(div)
|
|
|
|
def get_objects(self, node):
|
|
for x in node.xpath('descendant::PutObj[@refobj and @x1 and @y1]'):
|
|
objs = node.xpath('//*[@objid="%s"]'%x.get('refobj'))
|
|
x, y = map(self.styles.to_num, (x.get('x1'), x.get('y1')))
|
|
if objs and x is not None and y is not None:
|
|
yield objs[0], int(x), int(y)
|
|
|
|
|
|
class MediaType(etree.XSLTExtension):
|
|
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
name = input_node.get('file', None)
|
|
typ = mimetypes.guess_type(name)[0]
|
|
if not typ:
|
|
typ = 'application/octet-stream'
|
|
output_parent.text = typ
|
|
|
|
|
|
class ImageBlock(etree.XSLTExtension):
|
|
|
|
def __init__(self, canvas):
|
|
etree.XSLTExtension.__init__(self)
|
|
self.canvas = canvas
|
|
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
self.canvas.image_page(input_node, input_node, output_parent)
|
|
|
|
|
|
class RuledLine(etree.XSLTExtension):
|
|
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
hr = etree.Element('hr')
|
|
output_parent.append(hr)
|
|
|
|
|
|
class TextBlock(etree.XSLTExtension):
|
|
|
|
def __init__(self, styles, char_button_map, plot_map, log):
|
|
etree.XSLTExtension.__init__(self)
|
|
self.styles = styles
|
|
self.log = log
|
|
self.char_button_map = char_button_map
|
|
self.plot_map = plot_map
|
|
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
input_node = copy.deepcopy(input_node)
|
|
div = etree.Element('div')
|
|
self.render_block(input_node, div)
|
|
output_parent.append(div)
|
|
|
|
def render_block(self, node, root):
|
|
ts = node.get('textstyle', None)
|
|
classes = []
|
|
bs = node.get('blockstyle')
|
|
if bs in self.styles.block_style_map:
|
|
classes.append('bs%d'%self.styles.block_style_map[bs])
|
|
if ts in self.styles.text_style_map:
|
|
classes.append('ts%d'%self.styles.text_style_map[ts])
|
|
if classes:
|
|
root.set('class', ' '.join(classes))
|
|
objid = node.get('objid', None)
|
|
if objid:
|
|
root.set('id', objid)
|
|
root.text = node.text
|
|
self.root = root
|
|
self.parent = root
|
|
self.add_text_to = (self.parent, 'text')
|
|
self.fix_deep_nesting(node)
|
|
for child in node:
|
|
self.process_child(child)
|
|
|
|
def fix_deep_nesting(self, node):
|
|
deepest = 1
|
|
|
|
def depth(node):
|
|
parent = node.getparent()
|
|
ans = 1
|
|
while parent is not None:
|
|
ans += 1
|
|
parent = parent.getparent()
|
|
return ans
|
|
|
|
for span in node.xpath('descendant::Span'):
|
|
d = depth(span)
|
|
if d > deepest:
|
|
deepest = d
|
|
if d > 500:
|
|
break
|
|
|
|
if deepest < 500:
|
|
return
|
|
|
|
self.log.warning('Found deeply nested spans. Flattening.')
|
|
# with open('/t/before.xml', 'wb') as f:
|
|
# f.write(etree.tostring(node, method='xml'))
|
|
|
|
spans = [(depth(span), span) for span in node.xpath('descendant::Span')]
|
|
spans.sort(key=operator.itemgetter(0), reverse=True)
|
|
|
|
for depth, span in spans:
|
|
if depth < 3:
|
|
continue
|
|
p = span.getparent()
|
|
gp = p.getparent()
|
|
idx = p.index(span)
|
|
pidx = gp.index(p)
|
|
children = list(p)[idx:]
|
|
t = children[-1].tail
|
|
t = t if t else ''
|
|
children[-1].tail = t + (p.tail if p.tail else '')
|
|
p.tail = ''
|
|
pattrib = dict(**p.attrib) if p.tag == 'Span' else {}
|
|
for child in children:
|
|
p.remove(child)
|
|
if pattrib and child.tag == "Span":
|
|
attrib = copy.copy(pattrib)
|
|
attrib.update(child.attrib)
|
|
child.attrib.update(attrib)
|
|
|
|
for child in reversed(children):
|
|
gp.insert(pidx+1, child)
|
|
|
|
# with open('/t/after.xml', 'wb') as f:
|
|
# f.write(etree.tostring(node, method='xml'))
|
|
|
|
def add_text(self, text):
|
|
if text:
|
|
if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
|
|
setattr(self.add_text_to[0], self.add_text_to[1], '')
|
|
setattr(self.add_text_to[0], self.add_text_to[1],
|
|
getattr(self.add_text_to[0], self.add_text_to[1])+ text)
|
|
|
|
def process_container(self, child, tgt):
|
|
idx = self.styles.get_text_styles(child)
|
|
if idx is not None:
|
|
tgt.set('class', 'ts%d'%idx)
|
|
self.parent.append(tgt)
|
|
orig_parent = self.parent
|
|
self.parent = tgt
|
|
self.add_text_to = (self.parent, 'text')
|
|
self.add_text(child.text)
|
|
for gchild in child:
|
|
self.process_child(gchild)
|
|
self.parent = orig_parent
|
|
self.add_text_to = (tgt, 'tail')
|
|
self.add_text(child.tail)
|
|
|
|
def process_child(self, child):
|
|
if child.tag == 'CR':
|
|
if self.parent == self.root or self.parent.tag == 'p':
|
|
self.parent = self.root.makeelement('p')
|
|
self.root.append(self.parent)
|
|
self.add_text_to = (self.parent, 'text')
|
|
else:
|
|
br = self.parent.makeelement('br')
|
|
self.parent.append(br)
|
|
self.add_text_to = (br, 'tail')
|
|
self.add_text(child.tail)
|
|
elif child.tag in ('P', 'Span', 'EmpLine', 'NoBR'):
|
|
span = self.root.makeelement('span')
|
|
if child.tag == 'EmpLine':
|
|
td = 'underline' if child.get('emplineposition', 'before') == 'before' else 'overline'
|
|
span.set('style', 'text-decoration: '+td)
|
|
self.process_container(child, span)
|
|
elif child.tag == 'Sup':
|
|
sup = self.root.makeelement('sup')
|
|
self.process_container(child, sup)
|
|
elif child.tag == 'Sub':
|
|
sub = self.root.makeelement('sub')
|
|
self.process_container(child, sub)
|
|
elif child.tag == 'Italic':
|
|
sup = self.root.makeelement('i')
|
|
self.process_container(child, sup)
|
|
elif child.tag == 'CharButton':
|
|
a = self.root.makeelement('a')
|
|
oid = child.get('refobj', None)
|
|
if oid in self.char_button_map:
|
|
a.set('href', self.char_button_map[oid])
|
|
self.process_container(child, a)
|
|
elif child.tag == 'Plot':
|
|
xsize = self.styles.to_num(child.get('xsize', None), 166/720)
|
|
ysize = self.styles.to_num(child.get('ysize', None), 166/720)
|
|
img = self.root.makeelement('img')
|
|
if xsize is not None:
|
|
img.set('width', str(int(xsize)))
|
|
if ysize is not None:
|
|
img.set('height', str(int(ysize)))
|
|
ro = child.get('refobj', None)
|
|
if ro in self.plot_map:
|
|
img.set('src', self.plot_map[ro])
|
|
self.parent.append(img)
|
|
self.add_text_to = (img, 'tail')
|
|
self.add_text(child.tail)
|
|
else:
|
|
self.log.warning('Unhandled Text element: %s', child.tag)
|
|
|
|
|
|
class Styles(etree.XSLTExtension):
|
|
|
|
def __init__(self):
|
|
etree.XSLTExtension.__init__(self)
|
|
self.text_styles, self.block_styles = [], []
|
|
self.text_style_map, self.block_style_map = {}, {}
|
|
self.CSS = textwrap.dedent('''
|
|
.image_page { text-align:center }
|
|
''')
|
|
|
|
def write(self, name='styles.css'):
|
|
|
|
def join(style):
|
|
ans = ['%s : %s;'%(k, v) for k, v in style.items()]
|
|
if ans:
|
|
ans[-1] = ans[-1][:-1]
|
|
return '\n\t'.join(ans)
|
|
|
|
with open(name, 'wb') as f:
|
|
f.write(polyglot.as_bytes(self.CSS))
|
|
for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
|
|
'bs')]:
|
|
for i, s in enumerate(w):
|
|
if not s:
|
|
continue
|
|
rsel = '.%s%d'%(sel, i)
|
|
s = join(s)
|
|
f.write(polyglot.as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
|
|
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
if input_node.tag == 'TextStyle':
|
|
idx = self.get_text_styles(input_node)
|
|
if idx is not None:
|
|
self.text_style_map[input_node.get('objid')] = idx
|
|
else:
|
|
idx = self.get_block_styles(input_node)
|
|
self.block_style_map[input_node.get('objid')] = idx
|
|
|
|
def px_to_pt(self, px):
|
|
try:
|
|
return px * 72/166
|
|
except:
|
|
return None
|
|
|
|
def color(self, val):
|
|
try:
|
|
val = int(val, 16)
|
|
r, g, b, a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
|
|
if a == 255:
|
|
return None
|
|
if a == 0:
|
|
return 'rgb(%d,%d,%d)'%(r,g,b)
|
|
return 'rgba(%d,%d,%d,%f)'%(r,g,b,1.-a/255.)
|
|
except:
|
|
return None
|
|
|
|
def get_block_styles(self, node):
|
|
ans = {}
|
|
sm = self.px_to_pt(node.get('sidemargin', None))
|
|
if sm is not None:
|
|
ans['margin-left'] = ans['margin-right'] = '%fpt'%sm
|
|
ts = self.px_to_pt(node.get('topskip', None))
|
|
if ts is not None:
|
|
ans['margin-top'] = '%fpt'%ts
|
|
fs = self.px_to_pt(node.get('footskip', None))
|
|
if fs is not None:
|
|
ans['margin-bottom'] = '%fpt'%fs
|
|
fw = self.px_to_pt(node.get('framewidth', None))
|
|
if fw is not None:
|
|
ans['border-width'] = '%fpt'%fw
|
|
ans['border-style'] = 'solid'
|
|
fc = self.color(node.get('framecolor', None))
|
|
if fc is not None:
|
|
ans['border-color'] = fc
|
|
bc = self.color(node.get('bgcolor', None))
|
|
if bc is not None:
|
|
ans['background-color'] = bc
|
|
if ans not in self.block_styles:
|
|
self.block_styles.append(ans)
|
|
return self.block_styles.index(ans)
|
|
|
|
def to_num(self, val, factor=1.):
|
|
try:
|
|
return float(val)*factor
|
|
except:
|
|
return None
|
|
|
|
def get_text_styles(self, node):
|
|
ans = {}
|
|
fs = self.to_num(node.get('fontsize', None), 0.1)
|
|
if fs is not None:
|
|
ans['font-size'] = '%fpt'%fs
|
|
fw = self.to_num(node.get('fontweight', None))
|
|
if fw is not None:
|
|
ans['font-weight'] = ('bold' if fw >= 700 else 'normal')
|
|
# fn = getattr(obj, 'fontfacename', None)
|
|
# if fn is not None:
|
|
# fn = cls.FONT_MAP[fn]
|
|
# item('font-family: %s;'%fn)
|
|
fg = self.color(node.get('textcolor', None))
|
|
if fg is not None:
|
|
ans['color'] = fg
|
|
bg = self.color(node.get('textbgcolor', None))
|
|
if bg is not None:
|
|
ans['background-color'] = bg
|
|
al = node.get('align', None)
|
|
if al is not None:
|
|
all = dict(head='left', center='center', foot='right')
|
|
ans['text-align'] = all.get(al, 'left')
|
|
# lh = self.to_num(node.get('linespace', None), 0.1)
|
|
# if lh is not None:
|
|
# ans['line-height'] = '%fpt'%lh
|
|
pi = self.to_num(node.get('parindent', None), 0.1)
|
|
if pi is not None:
|
|
ans['text-indent'] = '%fpt'%pi
|
|
if not ans:
|
|
return None
|
|
if ans not in self.text_styles:
|
|
self.text_styles.append(ans)
|
|
return self.text_styles.index(ans)
|