mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-22 18:33:34 +01:00
Use the real constants module.
This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
This commit is contained in:
@@ -3,6 +3,7 @@ Based on ideas from comiclrf created by FangornUK.
|
||||
"""
|
||||
import shutil, textwrap, codecs, os
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from ebook_converter import CurrentDir
|
||||
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
||||
@@ -245,7 +246,6 @@ class ComicInput(InputFormatPlugin):
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
def create_wrappers(self, pages):
|
||||
from ebook_converter.ebooks.oeb.base import XHTML_NS
|
||||
wrappers = []
|
||||
WRAPPER = textwrap.dedent('''\
|
||||
<html xmlns="%s">
|
||||
@@ -267,7 +267,8 @@ class ComicInput(InputFormatPlugin):
|
||||
''')
|
||||
dir = os.path.dirname(pages[0])
|
||||
for i, page in enumerate(pages):
|
||||
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
||||
wrapper = WRAPPER%(const.XHTML_NS, i+1, os.path.basename(page),
|
||||
i+1)
|
||||
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
||||
with open(page, 'wb') as f:
|
||||
f.write(wrapper.encode('utf-8'))
|
||||
@@ -275,8 +276,6 @@ class ComicInput(InputFormatPlugin):
|
||||
return wrappers
|
||||
|
||||
def create_viewer_wrapper(self, pages):
|
||||
from ebook_converter.ebooks.oeb.base import XHTML_NS
|
||||
|
||||
def page(src):
|
||||
return '<img src="{}"></img>'.format(os.path.basename(src))
|
||||
|
||||
@@ -303,7 +302,7 @@ class ComicInput(InputFormatPlugin):
|
||||
%s
|
||||
</body>
|
||||
</html>
|
||||
''' % (XHTML_NS, pages)
|
||||
''' % (const.XHTML_NS, pages)
|
||||
path = os.path.join(base, 'wrapper.xhtml')
|
||||
with open(path, 'wb') as f:
|
||||
f.write(wrapper.encode('utf-8'))
|
||||
|
||||
@@ -1,14 +1,22 @@
|
||||
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
import io
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.customize import conversion
|
||||
from ebook_converter.ebooks.docx.dump import do_dump
|
||||
from ebook_converter.ebooks.docx.writer.container import DOCX
|
||||
from ebook_converter.ebooks.docx.writer.from_html import Convert
|
||||
from ebook_converter.ebooks.metadata import opf2 as opf_meta
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
PAGE_SIZES = ['a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b0', 'b1',
|
||||
'b2', 'b3', 'b4', 'b5', 'b6', 'legal', 'letter']
|
||||
_OPT = conversion.OptionRecommendation
|
||||
|
||||
|
||||
class DOCXOutput(OutputFormatPlugin):
|
||||
class DOCXOutput(conversion.OutputFormatPlugin):
|
||||
|
||||
name = 'DOCX Output'
|
||||
author = 'Kovid Goyal'
|
||||
@@ -16,75 +24,63 @@ class DOCXOutput(OutputFormatPlugin):
|
||||
commit_name = 'docx_output'
|
||||
ui_data = {'page_sizes': PAGE_SIZES}
|
||||
|
||||
options = {
|
||||
OptionRecommendation(name='docx_page_size', recommended_value='letter',
|
||||
level=OptionRecommendation.LOW, choices=PAGE_SIZES,
|
||||
help='The size of the page. Default is letter. Choices '
|
||||
'are %s' % PAGE_SIZES),
|
||||
|
||||
OptionRecommendation(name='docx_custom_page_size', recommended_value=None,
|
||||
help='Custom size of the document. Use the form widthxheight '
|
||||
'EG. `123x321` to specify the width and height (in pts). '
|
||||
'This overrides any specified page-size.'),
|
||||
|
||||
OptionRecommendation(name='docx_no_cover', recommended_value=False,
|
||||
help='Do not insert the book cover as an image at the start of the document.'
|
||||
' If you use this option, the book cover will be discarded.'),
|
||||
|
||||
OptionRecommendation(name='preserve_cover_aspect_ratio', recommended_value=False,
|
||||
help='Preserve the aspect ratio of the cover image instead of stretching'
|
||||
' it out to cover the entire page.'),
|
||||
|
||||
OptionRecommendation(name='docx_no_toc', recommended_value=False,
|
||||
help='Do not insert the table of contents as a page at the start of the document.'),
|
||||
|
||||
OptionRecommendation(name='extract_to',
|
||||
help='Extract the contents of the generated %s file to the '
|
||||
'specified directory. The contents of the directory are first '
|
||||
'deleted, so be careful.' % 'DOCX'),
|
||||
|
||||
OptionRecommendation(name='docx_page_margin_left', recommended_value=72.0,
|
||||
level=OptionRecommendation.LOW,
|
||||
help='The size of the left page margin, in pts. Default is 72pt.'
|
||||
' Overrides the common left page margin setting.'
|
||||
),
|
||||
|
||||
OptionRecommendation(name='docx_page_margin_top', recommended_value=72.0,
|
||||
level=OptionRecommendation.LOW,
|
||||
help='The size of the top page margin, in pts. Default is 72pt.'
|
||||
' Overrides the common top page margin setting, unless set to zero.'
|
||||
),
|
||||
|
||||
OptionRecommendation(name='docx_page_margin_right', recommended_value=72.0,
|
||||
level=OptionRecommendation.LOW,
|
||||
help='The size of the right page margin, in pts. Default is 72pt.'
|
||||
' Overrides the common right page margin setting, unless set to zero.'
|
||||
),
|
||||
|
||||
OptionRecommendation(name='docx_page_margin_bottom', recommended_value=72.0,
|
||||
level=OptionRecommendation.LOW,
|
||||
help='The size of the bottom page margin, in pts. Default is 72pt.'
|
||||
' Overrides the common bottom page margin setting, unless set to zero.'
|
||||
),
|
||||
|
||||
}
|
||||
options = {_OPT(name='docx_page_size', recommended_value='letter',
|
||||
level=_OPT.LOW, choices=PAGE_SIZES,
|
||||
help='The size of the page. Default is letter. Choices '
|
||||
'are %s' % PAGE_SIZES),
|
||||
_OPT(name='docx_custom_page_size', recommended_value=None,
|
||||
help='Custom size of the document. Use the form '
|
||||
'widthxheight EG. `123x321` to specify the width and '
|
||||
'height (in pts). This overrides any specified '
|
||||
'page-size.'),
|
||||
_OPT(name='docx_no_cover', recommended_value=False,
|
||||
help='Do not insert the book cover as an image at the '
|
||||
'start of the document. If you use this option, the book '
|
||||
'cover will be discarded.'),
|
||||
_OPT(name='preserve_cover_aspect_ratio',
|
||||
recommended_value=False, help='Preserve the aspect ratio '
|
||||
'of the cover image instead of stretching it out to cover '
|
||||
'the entire page.'),
|
||||
_OPT(name='docx_no_toc', recommended_value=False,
|
||||
help='Do not insert the table of contents as a page at '
|
||||
'the start of the document.'),
|
||||
_OPT(name='extract_to', help='Extract the contents of the '
|
||||
'generated DOCX file to the specified directory. The '
|
||||
'contents of the directory are first deleted, so be '
|
||||
'careful.'),
|
||||
_OPT(name='docx_page_margin_left', recommended_value=72.0,
|
||||
level=_OPT.LOW, help='The size of the left page margin, '
|
||||
'in pts. Default is 72pt. Overrides the common left page '
|
||||
'margin setting.'),
|
||||
_OPT(name='docx_page_margin_top', recommended_value=72.0,
|
||||
level=_OPT.LOW, help='The size of the top page margin, '
|
||||
'in pts. Default is 72pt. Overrides the common top page '
|
||||
'margin setting, unless set to zero.'),
|
||||
_OPT(name='docx_page_margin_right', recommended_value=72.0,
|
||||
level=_OPT.LOW, help='The size of the right page margin, '
|
||||
'in pts. Default is 72pt. Overrides the common right page '
|
||||
'margin setting, unless set to zero.'),
|
||||
_OPT(name='docx_page_margin_bottom', recommended_value=72.0,
|
||||
level=_OPT.LOW, help='The size of the bottom page margin, '
|
||||
'in pts. Default is 72pt. Overrides the common bottom '
|
||||
'page margin setting, unless set to zero.')}
|
||||
|
||||
def convert_metadata(self, oeb):
|
||||
from lxml import etree
|
||||
from ebook_converter.ebooks.oeb.base import OPF, OPF2_NS
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPF as ReadOPF
|
||||
from io import BytesIO
|
||||
package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS})
|
||||
|
||||
package = etree.Element(base.tag('opf', 'package'),
|
||||
attrib={'version': '2.0'},
|
||||
nsmap={None: const.OPF2_NS})
|
||||
oeb.metadata.to_opf2(package)
|
||||
self.mi = ReadOPF(BytesIO(etree.tostring(package, encoding='utf-8')), populate_spine=False, try_to_guess_cover=False).to_book_metadata()
|
||||
self.mi = opf_meta.OPF(io.BytesIO(etree.tostring(package,
|
||||
encoding='utf-8')),
|
||||
populate_spine=False,
|
||||
try_to_guess_cover=False).to_book_metadata()
|
||||
|
||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||
from ebook_converter.ebooks.docx.writer.container import DOCX
|
||||
from ebook_converter.ebooks.docx.writer.from_html import Convert
|
||||
docx = DOCX(opts, log)
|
||||
self.convert_metadata(oeb)
|
||||
Convert(oeb, docx, self.mi, not opts.docx_no_cover, not opts.docx_no_toc)()
|
||||
Convert(oeb, docx, self.mi, not opts.docx_no_cover,
|
||||
not opts.docx_no_toc)()
|
||||
docx.write(output_path, self.mi)
|
||||
if opts.extract_to:
|
||||
from ebook_converter.ebooks.docx.dump import do_dump
|
||||
do_dump(output_path, opts.extract_to)
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import os, re, posixpath
|
||||
from itertools import cycle
|
||||
import hashlib
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import uuid
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.ebooks.metadata import opf2 as opf_meta
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
||||
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
||||
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
|
||||
|
||||
|
||||
@@ -16,8 +21,8 @@ def decrypt_font_data(key, data, algorithm):
|
||||
is_adobe = algorithm == ADOBE_OBFUSCATION
|
||||
crypt_len = 1024 if is_adobe else 1040
|
||||
crypt = bytearray(data[:crypt_len])
|
||||
key = cycle(iter(bytearray(key)))
|
||||
decrypt = bytes(bytearray(x^next(key) for x in crypt))
|
||||
key = itertools.cycle(iter(bytearray(key)))
|
||||
decrypt = bytes(bytearray(x ^ next(key) for x in crypt))
|
||||
return decrypt + data[crypt_len:]
|
||||
|
||||
|
||||
@@ -29,18 +34,16 @@ def decrypt_font(key, path, algorithm):
|
||||
|
||||
class EPUBInput(InputFormatPlugin):
|
||||
|
||||
name = 'EPUB Input'
|
||||
author = 'Kovid Goyal'
|
||||
name = 'EPUB Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert EPUB files (.epub) to HTML'
|
||||
file_types = {'epub'}
|
||||
file_types = {'epub'}
|
||||
output_encoding = None
|
||||
commit_name = 'epub_input'
|
||||
|
||||
recommendations = {('page_breaks_before', '/', OptionRecommendation.MED)}
|
||||
|
||||
def process_encryption(self, encfile, opf, log):
|
||||
from lxml import etree
|
||||
import uuid, hashlib
|
||||
idpf_key = opf.raw_unique_identifier
|
||||
if idpf_key:
|
||||
idpf_key = re.sub('[\u0020\u0009\u000d\u000a]', '', idpf_key)
|
||||
@@ -56,27 +59,28 @@ class EPUBInput(InputFormatPlugin):
|
||||
try:
|
||||
key = item.text.rpartition(':')[-1]
|
||||
key = uuid.UUID(key).bytes
|
||||
except:
|
||||
import traceback
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
key = None
|
||||
|
||||
try:
|
||||
root = etree.parse(encfile)
|
||||
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
|
||||
for em in root.xpath('descendant::*[contains(name(), '
|
||||
'"EncryptionMethod")]'):
|
||||
algorithm = em.get('Algorithm', '')
|
||||
if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}:
|
||||
return False
|
||||
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
|
||||
cr = em.getparent().xpath('descendant::*[contains(name(), '
|
||||
'"CipherReference")]')[0]
|
||||
uri = cr.get('URI')
|
||||
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
|
||||
path = os.path.abspath(os.path.join(os.path.dirname(encfile),
|
||||
'..', *uri.split('/')))
|
||||
tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
|
||||
if (tkey and os.path.exists(path)):
|
||||
self._encrypted_font_uris.append(uri)
|
||||
decrypt_font(tkey, path, algorithm)
|
||||
return True
|
||||
except:
|
||||
import traceback
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
@@ -97,8 +101,11 @@ class EPUBInput(InputFormatPlugin):
|
||||
return t
|
||||
|
||||
def rationalize_cover3(self, opf, log):
|
||||
''' If there is a reference to the cover/titlepage via manifest properties, convert to
|
||||
entries in the <guide> so that the rest of the pipeline picks it up. '''
|
||||
"""
|
||||
If there is a reference to the cover/titlepage via manifest
|
||||
properties, convert to entries in the <guide> so that the rest of the
|
||||
pipeline picks it up.
|
||||
"""
|
||||
from ebook_converter.ebooks.metadata.opf3 import items_with_property
|
||||
removed = guide_titlepage_href = guide_titlepage_id = None
|
||||
|
||||
@@ -128,7 +135,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
titlepage_id, titlepage_href = tid, href.partition('#')[0]
|
||||
break
|
||||
if titlepage_href is None:
|
||||
titlepage_href, titlepage_id = guide_titlepage_href, guide_titlepage_id
|
||||
titlepage_href = guide_titlepage_href
|
||||
titlepage_id = guide_titlepage_id
|
||||
if titlepage_href is not None:
|
||||
self.set_guide_type(opf, 'titlepage', titlepage_href, 'Title Page')
|
||||
spine = list(opf.iterspine())
|
||||
@@ -148,7 +156,6 @@ class EPUBInput(InputFormatPlugin):
|
||||
means, at most one entry with type="cover" that points to a raster
|
||||
cover and at most one entry with type="titlepage" that points to an
|
||||
HTML titlepage. '''
|
||||
from ebook_converter.ebooks.oeb.base import OPF
|
||||
removed = None
|
||||
from lxml import etree
|
||||
guide_cover, guide_elem = None, None
|
||||
@@ -160,12 +167,14 @@ class EPUBInput(InputFormatPlugin):
|
||||
raster_cover = opf.raster_cover
|
||||
if raster_cover:
|
||||
if guide_elem is None:
|
||||
g = opf.root.makeelement(OPF('guide'))
|
||||
g = opf.root.makeelement(base.tag('opf', 'guide'))
|
||||
opf.root.append(g)
|
||||
else:
|
||||
g = guide_elem.getparent()
|
||||
guide_cover = raster_cover
|
||||
guide_elem = g.makeelement(OPF('reference'), attrib={'href':raster_cover, 'type':'cover'})
|
||||
guide_elem = g.makeelement(base.tag('opf', 'reference'),
|
||||
attrib={'href': raster_cover,
|
||||
'type': 'cover'})
|
||||
g.append(guide_elem)
|
||||
return
|
||||
spine = list(opf.iterspine())
|
||||
@@ -186,7 +195,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
# specially
|
||||
if not self.for_viewer:
|
||||
if len(spine) == 1:
|
||||
log.warn('There is only a single spine item and it is marked as the cover. Removing cover marking.')
|
||||
log.warn('There is only a single spine item and it is marked '
|
||||
'as the cover. Removing cover marking.')
|
||||
for guide_elem in tuple(opf.iterguide()):
|
||||
if guide_elem.get('type', '').lower() == 'cover':
|
||||
guide_elem.getparent().remove(guide_elem)
|
||||
@@ -215,8 +225,9 @@ class EPUBInput(InputFormatPlugin):
|
||||
# Render the titlepage to create a raster cover
|
||||
from ebook_converter.ebooks import render_html_svg_workaround
|
||||
guide_elem.set('href', 'calibre_raster_cover.jpg')
|
||||
t = etree.SubElement(
|
||||
elem[0].getparent(), OPF('item'), href=guide_elem.get('href'), id='calibre_raster_cover')
|
||||
t = etree.SubElement(elem[0].getparent(), base.tag('opf', 'item'),
|
||||
href=guide_elem.get('href'),
|
||||
id='calibre_raster_cover')
|
||||
t.set('media-type', 'image/jpeg')
|
||||
if os.path.exists(guide_cover):
|
||||
renderer = render_html_svg_workaround(guide_cover, log)
|
||||
@@ -229,17 +240,16 @@ class EPUBInput(InputFormatPlugin):
|
||||
return removed
|
||||
|
||||
def find_opf(self):
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
|
||||
def attr(n, attr):
|
||||
for k, v in n.attrib.items():
|
||||
if k.endswith(attr):
|
||||
return v
|
||||
try:
|
||||
with open('META-INF/container.xml', 'rb') as f:
|
||||
root = safe_xml_fromstring(f.read())
|
||||
root = etree.fromstring(f.read())
|
||||
for r in root.xpath('//*[local-name()="rootfile"]'):
|
||||
if attr(r, 'media-type') != "application/oebps-package+xml":
|
||||
if (attr(r, 'media-type') !=
|
||||
"application/oebps-package+xml"):
|
||||
continue
|
||||
path = attr(r, 'full-path')
|
||||
if not path:
|
||||
@@ -248,20 +258,18 @@ class EPUBInput(InputFormatPlugin):
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from ebook_converter.utils.zipfile import ZipFile
|
||||
from ebook_converter import walk
|
||||
from ebook_converter.ebooks import DRMError
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPF
|
||||
try:
|
||||
zf = ZipFile(stream)
|
||||
zf.extractall(os.getcwd())
|
||||
except:
|
||||
except Exception:
|
||||
log.exception('EPUB appears to be invalid ZIP file, trying a'
|
||||
' more forgiving ZIP parser')
|
||||
' more forgiving ZIP parser')
|
||||
from ebook_converter.utils.localunzip import extractall
|
||||
stream.seek(0)
|
||||
extractall(stream)
|
||||
@@ -276,11 +284,12 @@ class EPUBInput(InputFormatPlugin):
|
||||
path = getattr(stream, 'name', 'stream')
|
||||
|
||||
if opf is None:
|
||||
raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)
|
||||
raise ValueError('%s is not a valid EPUB file (could not find '
|
||||
'opf)' % path)
|
||||
|
||||
opf = os.path.relpath(opf, os.getcwd())
|
||||
parts = os.path.split(opf)
|
||||
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
|
||||
# parts = os.path.split(opf)
|
||||
opf = opf_meta.OPF(opf, os.path.dirname(os.path.abspath(opf)))
|
||||
|
||||
self._encrypted_font_uris = []
|
||||
if os.path.exists(encfile):
|
||||
@@ -288,18 +297,23 @@ class EPUBInput(InputFormatPlugin):
|
||||
raise DRMError(os.path.basename(path))
|
||||
self.encrypted_fonts = self._encrypted_font_uris
|
||||
|
||||
if len(parts) > 1 and parts[0]:
|
||||
delta = '/'.join(parts[:-1])+'/'
|
||||
# XXX(gryf): this code would fail pretty ugly, thus, this part was
|
||||
# never used.
|
||||
# if len(parts) > 1 and parts[0]:
|
||||
# delta = '/'.join(parts[:-1])+'/'
|
||||
|
||||
def normpath(x):
|
||||
return posixpath.normpath(delta + elem.get('href'))
|
||||
# def normpath(x):
|
||||
# return posixpath.normpath(delta + elem.get('href'))
|
||||
|
||||
for elem in opf.itermanifest():
|
||||
elem.set('href', normpath(elem.get('href')))
|
||||
for elem in opf.iterguide():
|
||||
elem.set('href', normpath(elem.get('href')))
|
||||
# for elem in opf.itermanifest():
|
||||
# elem.set('href', normpath(elem.get('href')))
|
||||
# for elem in opf.iterguide():
|
||||
# elem.set('href', normpath(elem.get('href')))
|
||||
|
||||
f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
|
||||
if opf.package_version >= 3.0:
|
||||
f = self.rationalize_cover3
|
||||
else:
|
||||
f = self.rationalize_cover2
|
||||
self.removed_cover = f(opf, log)
|
||||
if self.removed_cover:
|
||||
self.removed_items_to_ignore = (self.removed_cover,)
|
||||
@@ -352,15 +366,18 @@ class EPUBInput(InputFormatPlugin):
|
||||
from lxml import etree
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
from ebook_converter.ebooks.oeb.polish.parsing import parse
|
||||
from ebook_converter.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
|
||||
from ebook_converter.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, \
|
||||
NCX, urlnormalize, urlunquote, serialize
|
||||
from ebook_converter.ebooks.oeb.polish.toc import first_child
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
from tempfile import NamedTemporaryFile
|
||||
with open(nav_path, 'rb') as f:
|
||||
raw = f.read()
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
assume_utf8=True)[0]
|
||||
root = parse(raw, log=log)
|
||||
ncx = safe_xml_fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
|
||||
ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/'
|
||||
'ncx/" version="2005-1" xml:lang="eng">'
|
||||
'<navMap/></ncx>')
|
||||
navmap = ncx[0]
|
||||
et = '{%s}type' % EPUB_NS
|
||||
bn = os.path.basename(nav_path)
|
||||
@@ -368,8 +385,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
def add_from_li(li, parent):
|
||||
href = text = None
|
||||
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
||||
text = etree.tostring(
|
||||
x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(
|
||||
text = etree.tostring(x, method='text', encoding='unicode',
|
||||
with_tail=False).strip() or ' '.join(
|
||||
x.xpath('descendant-or-self::*/@title')).strip()
|
||||
href = x.get('href')
|
||||
if href:
|
||||
@@ -382,7 +399,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
np[0].append(np.makeelement(NCX('text')))
|
||||
np[0][0].text = text
|
||||
if href:
|
||||
np.append(np.makeelement(NCX('content'), attrib={'src':href}))
|
||||
np.append(np.makeelement(NCX('content'), attrib={'src': href}))
|
||||
return np
|
||||
|
||||
def process_nav_node(node, toc_parent):
|
||||
@@ -401,20 +418,25 @@ class EPUBInput(InputFormatPlugin):
|
||||
else:
|
||||
return
|
||||
|
||||
with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f:
|
||||
with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path),
|
||||
delete=False) as f:
|
||||
f.write(etree.tostring(ncx, encoding='utf-8'))
|
||||
ncx_href = os.path.relpath(f.name, os.getcwd()).replace(os.sep, '/')
|
||||
ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id')
|
||||
ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME,
|
||||
append=True).get('id')
|
||||
for spine in opf.root.xpath('//*[local-name()="spine"]'):
|
||||
spine.set('toc', ncx_id)
|
||||
opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
|
||||
url = os.path.relpath(nav_path).replace(os.sep, '/')
|
||||
opts.epub3_nav_href = urlnormalize(url)
|
||||
opts.epub3_nav_parsed = root
|
||||
if getattr(self, 'removed_cover', None):
|
||||
changed = False
|
||||
base_path = os.path.dirname(nav_path)
|
||||
for elem in root.xpath('//*[@href]'):
|
||||
href, frag = elem.get('href').partition('#')[::2]
|
||||
link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
|
||||
link_path = os.path.relpath(os.path.join(base_path,
|
||||
urlunquote(href)),
|
||||
base_path)
|
||||
abs_href = urlnormalize(link_path)
|
||||
if abs_href == self.removed_cover:
|
||||
changed = True
|
||||
|
||||
@@ -2,7 +2,11 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.ebooks.oeb import parse_utils
|
||||
from ebook_converter.customize.conversion import OutputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
|
||||
@@ -132,39 +136,37 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
|
||||
|
||||
def workaround_webkit_quirks(self): # {{{
|
||||
from ebook_converter.ebooks.oeb.base import XPath
|
||||
for x in self.oeb.spine:
|
||||
root = x.data
|
||||
body = XPath('//h:body')(root)
|
||||
body = base.XPath('//h:body')(root)
|
||||
if body:
|
||||
body = body[0]
|
||||
|
||||
if not hasattr(body, 'xpath'):
|
||||
continue
|
||||
|
||||
for pre in XPath('//h:pre')(body):
|
||||
for pre in base.XPath('//h:pre')(body):
|
||||
if not pre.text and len(pre) == 0:
|
||||
pre.tag = 'div'
|
||||
# }}}
|
||||
|
||||
def upshift_markup(self): # {{{
|
||||
'Upgrade markup to comply with XHTML 1.1 where possible'
|
||||
from ebook_converter.ebooks.oeb.base import XPath, XML
|
||||
for x in self.oeb.spine:
|
||||
root = x.data
|
||||
if (not root.get(XML('lang'))) and (root.get('lang')):
|
||||
root.set(XML('lang'), root.get('lang'))
|
||||
body = XPath('//h:body')(root)
|
||||
if (not root.get(base.tag('xml', 'lang'))) and (root.get('lang')):
|
||||
root.set(base.tag('xml', 'lang'), root.get('lang'))
|
||||
body = base.XPath('//h:body')(root)
|
||||
if body:
|
||||
body = body[0]
|
||||
|
||||
if not hasattr(body, 'xpath'):
|
||||
continue
|
||||
for u in XPath('//h:u')(root):
|
||||
for u in base.XPath('//h:u')(root):
|
||||
u.tag = 'span'
|
||||
|
||||
seen_ids, seen_names = set(), set()
|
||||
for x in XPath('//*[@id or @name]')(root):
|
||||
for x in base.XPath('//*[@id or @name]')(root):
|
||||
eid, name = x.get('id', None), x.get('name', None)
|
||||
if eid:
|
||||
if eid in seen_ids:
|
||||
@@ -223,28 +225,27 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
first = next(iter(self.oeb.spine))
|
||||
self.oeb.toc.add('Start', first.href)
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import OPF
|
||||
identifiers = oeb.metadata['identifier']
|
||||
uuid = None
|
||||
_uuid = None
|
||||
for x in identifiers:
|
||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'):
|
||||
uuid = str(x).split(':')[-1]
|
||||
if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
|
||||
str(x).startswith('urn:uuid:')):
|
||||
_uuid = str(x).split(':')[-1]
|
||||
break
|
||||
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
||||
|
||||
if uuid is None:
|
||||
if _uuid is None:
|
||||
self.log.warn('No UUID identifier found')
|
||||
from uuid import uuid4
|
||||
uuid = str(uuid4())
|
||||
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
|
||||
_uuid = str(uuid.uuid4())
|
||||
oeb.metadata.add('identifier', _uuid, scheme='uuid', id=_uuid)
|
||||
|
||||
if encrypted_fonts and not uuid.startswith('urn:uuid:'):
|
||||
if encrypted_fonts and not _uuid.startswith('urn:uuid:'):
|
||||
# Apparently ADE requires this value to start with urn:uuid:
|
||||
# for some absurd reason, or it will throw a hissy fit and refuse
|
||||
# to use the obfuscated fonts.
|
||||
for x in identifiers:
|
||||
if str(x) == uuid:
|
||||
x.content = 'urn:uuid:'+uuid
|
||||
if str(x) == _uuid:
|
||||
x.content = 'urn:uuid:' + _uuid
|
||||
|
||||
with TemporaryDirectory('_epub_output') as tdir:
|
||||
from ebook_converter.customize.ui import plugin_for_output_format
|
||||
@@ -264,7 +265,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
self.upgrade_to_epub3(tdir, opf)
|
||||
encryption = None
|
||||
if encrypted_fonts:
|
||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, _uuid)
|
||||
|
||||
from ebook_converter.ebooks.epub import initialize_container
|
||||
with initialize_container(output_path, os.path.basename(opf),
|
||||
@@ -312,12 +313,12 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
def encrypt_fonts(self, uris, tdir, uuid): # {{{
|
||||
def encrypt_fonts(self, uris, tdir, _uuid): # {{{
|
||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
||||
|
||||
key = re.sub(r'[^a-fA-F0-9]', '', uuid)
|
||||
key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
|
||||
if len(key) < 16:
|
||||
raise ValueError('UUID identifier %r is invalid'%uuid)
|
||||
raise ValueError('UUID identifier %r is invalid'% _uuid)
|
||||
key = bytearray(from_hex_bytes((key + key)[:32]))
|
||||
paths = []
|
||||
with CurrentDir(tdir):
|
||||
@@ -335,7 +336,8 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
if len(data) >= 1024:
|
||||
data = bytearray(data)
|
||||
f.seek(0)
|
||||
f.write(bytes(bytearray(data[i] ^ key[i%16] for i in range(1024))))
|
||||
f.write(bytes(bytearray(data[i] ^ key[i%16]
|
||||
for i in range(1024))))
|
||||
else:
|
||||
self.log.warn('Font', path, 'is invalid, ignoring')
|
||||
if not isinstance(uri, str):
|
||||
@@ -374,11 +376,10 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
# }}}
|
||||
|
||||
def workaround_ade_quirks(self): # {{{
|
||||
'''
|
||||
"""
|
||||
Perform various markup transforms to get the output to render correctly
|
||||
in the quirky ADE.
|
||||
'''
|
||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML, barename, urlunquote
|
||||
"""
|
||||
|
||||
stylesheet = self.oeb.manifest.main_stylesheet
|
||||
|
||||
@@ -388,23 +389,23 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
for node in self.oeb.toc.iter():
|
||||
href = getattr(node, 'href', None)
|
||||
if hasattr(href, 'partition'):
|
||||
base, _, frag = href.partition('#')
|
||||
frag = urlunquote(frag)
|
||||
_base, _, frag = href.partition('#')
|
||||
frag = base.urlunquote(frag)
|
||||
if frag and frag_pat.match(frag) is None:
|
||||
self.log.warn(
|
||||
'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag)
|
||||
node.href = base
|
||||
node.href = _base
|
||||
|
||||
for x in self.oeb.spine:
|
||||
root = x.data
|
||||
body = XPath('//h:body')(root)
|
||||
body = base.XPath('//h:body')(root)
|
||||
if body:
|
||||
body = body[0]
|
||||
|
||||
if hasattr(body, 'xpath'):
|
||||
# remove <img> tags with empty src elements
|
||||
bad = []
|
||||
for x in XPath('//h:img')(body):
|
||||
for x in base.XPath('//h:img')(body):
|
||||
src = x.get('src', '').strip()
|
||||
if src in ('', '#') or src.startswith('http:'):
|
||||
bad.append(x)
|
||||
@@ -412,7 +413,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
img.getparent().remove(img)
|
||||
|
||||
# Add id attribute to <a> tags that have name
|
||||
for x in XPath('//h:a[@name]')(body):
|
||||
for x in base.XPath('//h:a[@name]')(body):
|
||||
if not x.get('id', False):
|
||||
x.set('id', x.get('name'))
|
||||
# The delightful epubcheck has started complaining about <a> tags that
|
||||
@@ -420,19 +421,19 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
x.attrib.pop('name')
|
||||
|
||||
# Replace <br> that are children of <body> as ADE doesn't handle them
|
||||
for br in XPath('./h:br')(body):
|
||||
for br in base.XPath('./h:br')(body):
|
||||
if br.getparent() is None:
|
||||
continue
|
||||
try:
|
||||
prior = next(br.itersiblings(preceding=True))
|
||||
priortag = barename(prior.tag)
|
||||
priortag = parse_utils.barename(prior.tag)
|
||||
priortext = prior.tail
|
||||
except:
|
||||
priortag = 'body'
|
||||
priortext = body.text
|
||||
if priortext:
|
||||
priortext = priortext.strip()
|
||||
br.tag = XHTML('p')
|
||||
br.tag = base.tag('xhtml', 'p')
|
||||
br.text = '\u00a0'
|
||||
style = br.get('style', '').split(';')
|
||||
style = list(filter(None, map(lambda x: x.strip(), style)))
|
||||
@@ -446,44 +447,44 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
style.append('height:0pt')
|
||||
br.set('style', '; '.join(style))
|
||||
|
||||
for tag in XPath('//h:embed')(root):
|
||||
for tag in base.XPath('//h:embed')(root):
|
||||
tag.getparent().remove(tag)
|
||||
for tag in XPath('//h:object')(root):
|
||||
for tag in base.XPath('//h:object')(root):
|
||||
if tag.get('type', '').lower().strip() in {'image/svg+xml', 'application/svg+xml'}:
|
||||
continue
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
for tag in XPath('//h:title|//h:style')(root):
|
||||
for tag in base.XPath('//h:title|//h:style')(root):
|
||||
if not tag.text:
|
||||
tag.getparent().remove(tag)
|
||||
for tag in XPath('//h:script')(root):
|
||||
for tag in base.XPath('//h:script')(root):
|
||||
if (not tag.text and not tag.get('src', False) and tag.get('type', None) != 'text/x-mathjax-config'):
|
||||
tag.getparent().remove(tag)
|
||||
for tag in XPath('//h:body/descendant::h:script')(root):
|
||||
for tag in base.XPath('//h:body/descendant::h:script')(root):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
formchildren = XPath('./h:input|./h:button|./h:textarea|'
|
||||
formchildren = base.XPath('./h:input|./h:button|./h:textarea|'
|
||||
'./h:label|./h:fieldset|./h:legend')
|
||||
for tag in XPath('//h:form')(root):
|
||||
for tag in base.XPath('//h:form')(root):
|
||||
if formchildren(tag):
|
||||
tag.getparent().remove(tag)
|
||||
else:
|
||||
# Not a real form
|
||||
tag.tag = XHTML('div')
|
||||
tag.tag = base.tag('xhtml', 'div')
|
||||
|
||||
for tag in XPath('//h:center')(root):
|
||||
tag.tag = XHTML('div')
|
||||
for tag in base.XPath('//h:center')(root):
|
||||
tag.tag = base.tag('xhtml', 'div')
|
||||
tag.set('style', 'text-align:center')
|
||||
# ADE can't handle & in an img url
|
||||
for tag in XPath('//h:img[@src]')(root):
|
||||
for tag in base.XPath('//h:img[@src]')(root):
|
||||
tag.set('src', tag.get('src', '').replace('&', ''))
|
||||
|
||||
# ADE whimpers in fright when it encounters a <td> outside a
|
||||
# <table>
|
||||
in_table = XPath('ancestor::h:table')
|
||||
for tag in XPath('//h:td|//h:tr|//h:th')(root):
|
||||
in_table = base.XPath('ancestor::h:table')
|
||||
for tag in base.XPath('//h:td|//h:tr|//h:th')(root):
|
||||
if not in_table(tag):
|
||||
tag.tag = XHTML('div')
|
||||
tag.tag = base.tag('xhtml', 'div')
|
||||
|
||||
# ADE fails to render non breaking hyphens/soft hyphens/zero width spaces
|
||||
special_chars = re.compile('[\u200b\u00ad]')
|
||||
@@ -498,7 +499,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
if stylesheet is not None:
|
||||
# ADE doesn't render lists correctly if they have left margins
|
||||
from css_parser.css import CSSRule
|
||||
for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root):
|
||||
for lb in base.XPath('//h:ul[@class]|//h:ol[@class]')(root):
|
||||
sel = '.'+lb.get('class')
|
||||
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||
if sel == rule.selectorList.selectorText:
|
||||
@@ -519,11 +520,10 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
'''
|
||||
Perform toc link transforms to alleviate slow loading.
|
||||
'''
|
||||
from ebook_converter.ebooks.oeb.base import XPath
|
||||
from ebook_converter.ebooks.oeb.polish.toc import item_at_top
|
||||
|
||||
def frag_is_at_top(root, frag):
|
||||
elem = XPath('//*[@id="%s" or @name="%s"]'%(frag, frag))(root)
|
||||
elem = base.XPath('//*[@id="%s" or @name="%s"]'%(frag, frag))(root)
|
||||
if elem:
|
||||
elem = elem[0]
|
||||
else:
|
||||
|
||||
@@ -1,59 +1,57 @@
|
||||
"""
|
||||
Convert .fb2 files to .lrf
|
||||
"""
|
||||
import os, re
|
||||
import os
|
||||
import pkg_resources
|
||||
import re
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
from ebook_converter import guess_type
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
|
||||
|
||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
||||
|
||||
|
||||
class FB2Input(InputFormatPlugin):
|
||||
|
||||
name = 'FB2 Input'
|
||||
author = 'Anatoly Shipitsin'
|
||||
name = 'FB2 Input'
|
||||
author = 'Anatoly Shipitsin'
|
||||
description = 'Convert FB2 and FBZ files to HTML'
|
||||
file_types = {'fb2', 'fbz'}
|
||||
file_types = {'fb2', 'fbz'}
|
||||
commit_name = 'fb2_input'
|
||||
|
||||
recommendations = {
|
||||
('level1_toc', '//h:h1', OptionRecommendation.MED),
|
||||
('level2_toc', '//h:h2', OptionRecommendation.MED),
|
||||
('level3_toc', '//h:h3', OptionRecommendation.MED),
|
||||
}
|
||||
recommendations = {('level1_toc', '//h:h1', OptionRecommendation.MED),
|
||||
('level2_toc', '//h:h2', OptionRecommendation.MED),
|
||||
('level3_toc', '//h:h3', OptionRecommendation.MED)}
|
||||
|
||||
options = {
|
||||
OptionRecommendation(name='no_inline_fb2_toc',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help='Do not insert a Table of Contents at the beginning of the book.'
|
||||
)}
|
||||
options = {OptionRecommendation(name='no_inline_fb2_toc',
|
||||
recommended_value=False,
|
||||
level=OptionRecommendation.LOW,
|
||||
help='Do not insert a Table of Contents '
|
||||
'at the beginning of the book.')}
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
from ebook_converter.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
|
||||
from ebook_converter.ebooks.metadata.fb2 import ensure_namespace
|
||||
from ebook_converter.ebooks.metadata.fb2 import get_fb2_data
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
||||
from ebook_converter.ebooks.metadata.meta import get_metadata
|
||||
from ebook_converter.ebooks.oeb.base import XLINK_NS, XHTML_NS
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
self.log = log
|
||||
log.debug('Parsing XML...')
|
||||
raw = get_fb2_data(stream)[0]
|
||||
raw = raw.replace(b'\0', b'')
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
assume_utf8=True, resolve_entities=True)[0]
|
||||
assume_utf8=True, resolve_entities=True)[0]
|
||||
try:
|
||||
doc = safe_xml_fromstring(raw)
|
||||
doc = etree.fromstring(raw)
|
||||
except etree.XMLSyntaxError:
|
||||
doc = safe_xml_fromstring(raw.replace('& ', '&'))
|
||||
doc = etree.fromstring(raw.replace('& ', '&'))
|
||||
if doc is None:
|
||||
raise ValueError('The FB2 file is not valid XML')
|
||||
doc = ensure_namespace(doc)
|
||||
@@ -62,22 +60,24 @@ class FB2Input(InputFormatPlugin):
|
||||
except Exception:
|
||||
fb_ns = FB2NS
|
||||
|
||||
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||
NAMESPACES = {'f': fb_ns, 'l': const.XLINK_NS}
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and '
|
||||
'@type="text/css"]')
|
||||
css = ''
|
||||
for s in stylesheets:
|
||||
css += etree.tostring(s, encoding='unicode', method='text',
|
||||
with_tail=False) + '\n\n'
|
||||
with_tail=False) + '\n\n'
|
||||
if css:
|
||||
import css_parser, logging
|
||||
import css_parser
|
||||
import logging
|
||||
parser = css_parser.CSSParser(fetcher=None,
|
||||
log=logging.getLogger('calibre.css'))
|
||||
log=logging.getLogger('calibre.css'))
|
||||
|
||||
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
||||
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % const.XHTML_NS
|
||||
text = XHTML_CSS_NAMESPACE + css
|
||||
log.debug('Parsing stylesheet...')
|
||||
stylesheet = parser.parseString(text)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
stylesheet.namespaces['h'] = const.XHTML_NS
|
||||
css = stylesheet.cssText
|
||||
if isinstance(css, bytes):
|
||||
css = css.decode('utf-8', 'replace')
|
||||
@@ -92,16 +92,20 @@ class FB2Input(InputFormatPlugin):
|
||||
if options.no_inline_fb2_toc:
|
||||
log('Disabling generation of inline FB2 TOC')
|
||||
ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
|
||||
re.DOTALL).sub('', ss)
|
||||
re.DOTALL).sub('', ss)
|
||||
|
||||
styledoc = safe_xml_fromstring(ss)
|
||||
styledoc = etree.fromstring(ss)
|
||||
|
||||
transform = etree.XSLT(styledoc)
|
||||
result = transform(doc)
|
||||
|
||||
# Handle links of type note and cite
|
||||
notes = {a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#')}
|
||||
cites = {a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '')}
|
||||
notes = {a.get('href')[1:]: a
|
||||
for a in result.xpath('//a[@link_note and @href]')
|
||||
if a.get('href').startswith('#')}
|
||||
cites = {a.get('link_cite'): a
|
||||
for a in result.xpath('//a[@link_cite]')
|
||||
if not a.get('href', '')}
|
||||
all_ids = {x for x in result.xpath('//*/@id')}
|
||||
for cite, a in cites.items():
|
||||
note = notes.get(cite, None)
|
||||
@@ -137,8 +141,10 @@ class FB2Input(InputFormatPlugin):
|
||||
f.write(mi.cover_data[1])
|
||||
cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
|
||||
else:
|
||||
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
|
||||
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
|
||||
for img in doc.xpath('//f:coverpage/f:image',
|
||||
namespaces=NAMESPACES):
|
||||
href = img.get('{%s}href' % const.XLINK_NS,
|
||||
img.get('href', None))
|
||||
if href is not None:
|
||||
if href.startswith('#'):
|
||||
href = href[1:]
|
||||
@@ -165,15 +171,15 @@ class FB2Input(InputFormatPlugin):
|
||||
ext = ct.rpartition('/')[-1].lower()
|
||||
if ext in ('png', 'jpeg', 'jpg'):
|
||||
if fname.lower().rpartition('.')[-1] not in {'jpg', 'jpeg',
|
||||
'png'}:
|
||||
'png'}:
|
||||
fname += '.' + ext
|
||||
self.binary_map[elem.get('id')] = fname
|
||||
raw = elem.text.strip()
|
||||
try:
|
||||
data = base64_decode(raw)
|
||||
except TypeError:
|
||||
self.log.exception('Binary data with id=%s is corrupted, ignoring'%(
|
||||
elem.get('id')))
|
||||
self.log.exception('Binary data with id=%s is corrupted, '
|
||||
'ignoring' % elem.get('id'))
|
||||
else:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import copy
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class LITInput(InputFormatPlugin):
|
||||
|
||||
name = 'LIT Input'
|
||||
author = 'Marshall T. Vandegrift'
|
||||
name = 'LIT Input'
|
||||
author = 'Marshall T. Vandegrift'
|
||||
description = 'Convert LIT files to HTML'
|
||||
file_types = {'lit'}
|
||||
file_types = {'lit'}
|
||||
commit_name = 'lit_input'
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
@@ -22,7 +22,7 @@ class LITInput(InputFormatPlugin):
|
||||
return create_oebbook(log, stream, options, reader=LitReader)
|
||||
|
||||
def postprocess_book(self, oeb, opts, log):
|
||||
from ebook_converter.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML
|
||||
for item in oeb.spine:
|
||||
root = item.data
|
||||
if not hasattr(root, 'xpath'):
|
||||
@@ -37,22 +37,23 @@ class LITInput(InputFormatPlugin):
|
||||
body = body[0]
|
||||
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
||||
pre = body[0]
|
||||
from ebook_converter.ebooks.txt.processor import convert_basic, \
|
||||
separate_paragraphs_single_line
|
||||
from ebook_converter.ebooks.txt.processor import \
|
||||
convert_basic, separate_paragraphs_single_line
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
import copy
|
||||
self.log('LIT file with all text in singe <pre> tag detected')
|
||||
self.log('LIT file with all text in singe <pre> tag '
|
||||
'detected')
|
||||
html = separate_paragraphs_single_line(pre.text)
|
||||
html = convert_basic(html).replace('<html>',
|
||||
'<html xmlns="%s">'%XHTML_NS)
|
||||
'<html xmlns="%s">' %
|
||||
const.XHTML_NS)
|
||||
html = xml_to_unicode(html, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
resolve_entities=True)[0]
|
||||
if opts.smarten_punctuation:
|
||||
# SmartyPants skips text inside <pre> tags
|
||||
from ebook_converter.ebooks.conversion.preprocess import smarten_punctuation
|
||||
html = smarten_punctuation(html, self.log)
|
||||
root = safe_xml_fromstring(html)
|
||||
from ebook_converter.ebooks.conversion import \
|
||||
preprocess
|
||||
html = preprocess.smarten_punctuation(html, self.log)
|
||||
root = etree.fromstring(html)
|
||||
body = XPath('//h:body')(root)
|
||||
pre.tag = XHTML('div')
|
||||
pre.text = ''
|
||||
|
||||
@@ -1,54 +1,52 @@
|
||||
import os, sys
|
||||
import os
|
||||
import sys
|
||||
import pkg_resources
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class LRFInput(InputFormatPlugin):
|
||||
|
||||
name = 'LRF Input'
|
||||
author = 'Kovid Goyal'
|
||||
name = 'LRF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert LRF files to HTML'
|
||||
file_types = {'lrf'}
|
||||
file_types = {'lrf'}
|
||||
commit_name = 'lrf_input'
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from ebook_converter.ebooks.lrf.input import (MediaType, Styles, TextBlock,
|
||||
Canvas, ImageBlock, RuledLine)
|
||||
from ebook_converter.ebooks.lrf.input import MediaType, Styles, \
|
||||
TextBlock, Canvas, ImageBlock, RuledLine
|
||||
self.log = log
|
||||
self.log('Generating XML')
|
||||
from ebook_converter.ebooks.lrf.lrfparser import LRFDocument
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
from lxml import etree
|
||||
d = LRFDocument(stream)
|
||||
d.parse()
|
||||
xml = d.to_xml(write_files=True)
|
||||
if options.verbose > 2:
|
||||
open(u'lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||
doc = safe_xml_fromstring(xml)
|
||||
doc = etree.fromstring(xml)
|
||||
|
||||
char_button_map = {}
|
||||
for x in doc.xpath('//CharButton[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
|
||||
jump_button = doc.xpath('//*[@objid="%s"]' % ro)
|
||||
if jump_button:
|
||||
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
|
||||
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage '
|
||||
'and @refobj]')
|
||||
if jump_to:
|
||||
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
|
||||
jump_to[0].get('refobj'))
|
||||
char_button_map[ro] = ('%s.xhtml#%s' %
|
||||
(jump_to[0].get('refpage'),
|
||||
jump_to[0].get('refobj')))
|
||||
plot_map = {}
|
||||
for x in doc.xpath('//Plot[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
|
||||
image = doc.xpath('//Image[@objid="%s" and @refstream]' % ro)
|
||||
if image:
|
||||
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
|
||||
image[0].get('refstream'))
|
||||
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]' %
|
||||
image[0].get('refstream'))
|
||||
if imgstr:
|
||||
plot_map[ro] = imgstr[0].get('file')
|
||||
|
||||
@@ -58,21 +56,19 @@ class LRFInput(InputFormatPlugin):
|
||||
resource_filename('ebook_converter',
|
||||
'data/lrf.xsl')) as fobj:
|
||||
# TODO(gryf): change this nonsense to etree.parse() instead.
|
||||
styledoc = safe_xml_fromstring(fobj.read())
|
||||
styledoc = etree.fromstring(fobj.read())
|
||||
media_type = MediaType()
|
||||
styles = Styles()
|
||||
text_block = TextBlock(styles, char_button_map, plot_map, log)
|
||||
canvas = Canvas(doc, styles, text_block, log)
|
||||
image_block = ImageBlock(canvas)
|
||||
ruled_line = RuledLine()
|
||||
extensions = {
|
||||
('calibre', 'media-type') : media_type,
|
||||
('calibre', 'text-block') : text_block,
|
||||
('calibre', 'ruled-line') : ruled_line,
|
||||
('calibre', 'styles') : styles,
|
||||
('calibre', 'canvas') : canvas,
|
||||
('calibre', 'image-block'): image_block,
|
||||
}
|
||||
extensions = {('calibre', 'media-type'): media_type,
|
||||
('calibre', 'text-block'): text_block,
|
||||
('calibre', 'ruled-line'): ruled_line,
|
||||
('calibre', 'styles'): styles,
|
||||
('calibre', 'canvas'): canvas,
|
||||
('calibre', 'image-block'): image_block}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
try:
|
||||
result = transform(doc)
|
||||
|
||||
@@ -1,57 +1,58 @@
|
||||
import os, glob, re, textwrap
|
||||
import glob
|
||||
import os
|
||||
import pkg_resources
|
||||
import re
|
||||
import textwrap
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
border_style_map = {
|
||||
'single' : 'solid',
|
||||
'double-thickness-border' : 'double',
|
||||
'shadowed-border': 'outset',
|
||||
'double-border': 'double',
|
||||
'dotted-border': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'hairline': 'solid',
|
||||
'inset': 'inset',
|
||||
'dash-small': 'dashed',
|
||||
'dot-dash': 'dotted',
|
||||
'dot-dot-dash': 'dotted',
|
||||
'outset': 'outset',
|
||||
'tripple': 'double',
|
||||
'triple': 'double',
|
||||
'thick-thin-small': 'solid',
|
||||
'thin-thick-small': 'solid',
|
||||
'thin-thick-thin-small': 'solid',
|
||||
'thick-thin-medium': 'solid',
|
||||
'thin-thick-medium': 'solid',
|
||||
'thin-thick-thin-medium': 'solid',
|
||||
'thick-thin-large': 'solid',
|
||||
'thin-thick-thin-large': 'solid',
|
||||
'wavy': 'ridge',
|
||||
'double-wavy': 'ridge',
|
||||
'striped': 'ridge',
|
||||
'emboss': 'inset',
|
||||
'engrave': 'inset',
|
||||
'frame': 'ridge',
|
||||
}
|
||||
border_style_map = {'single': 'solid',
|
||||
'double-thickness-border': 'double',
|
||||
'shadowed-border': 'outset',
|
||||
'double-border': 'double',
|
||||
'dotted-border': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'hairline': 'solid',
|
||||
'inset': 'inset',
|
||||
'dash-small': 'dashed',
|
||||
'dot-dash': 'dotted',
|
||||
'dot-dot-dash': 'dotted',
|
||||
'outset': 'outset',
|
||||
'tripple': 'double',
|
||||
'triple': 'double',
|
||||
'thick-thin-small': 'solid',
|
||||
'thin-thick-small': 'solid',
|
||||
'thin-thick-thin-small': 'solid',
|
||||
'thick-thin-medium': 'solid',
|
||||
'thin-thick-medium': 'solid',
|
||||
'thin-thick-thin-medium': 'solid',
|
||||
'thick-thin-large': 'solid',
|
||||
'thin-thick-thin-large': 'solid',
|
||||
'wavy': 'ridge',
|
||||
'double-wavy': 'ridge',
|
||||
'striped': 'ridge',
|
||||
'emboss': 'inset',
|
||||
'engrave': 'inset',
|
||||
'frame': 'ridge'}
|
||||
|
||||
|
||||
class RTFInput(InputFormatPlugin):
|
||||
|
||||
name = 'RTF Input'
|
||||
author = 'Kovid Goyal'
|
||||
name = 'RTF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert RTF files to HTML'
|
||||
file_types = {'rtf'}
|
||||
file_types = {'rtf'}
|
||||
commit_name = 'rtf_input'
|
||||
|
||||
options = {
|
||||
OptionRecommendation(name='ignore_wmf', recommended_value=False,
|
||||
help='Ignore WMF images instead of replacing them with a '
|
||||
'placeholder image.'),
|
||||
}
|
||||
options = {OptionRecommendation(name='ignore_wmf', recommended_value=False,
|
||||
help='Ignore WMF images instead of '
|
||||
'replacing them with a placeholder '
|
||||
'image.')}
|
||||
|
||||
def generate_xml(self, stream):
|
||||
from ebook_converter.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
@@ -64,7 +65,7 @@ class RTFInput(InputFormatPlugin):
|
||||
run_lev = 4
|
||||
indent_out = 1
|
||||
self.log('Running RTFParser in debug mode')
|
||||
except:
|
||||
except Exception:
|
||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||
parser = ParseRtf(
|
||||
in_file=stream,
|
||||
@@ -108,7 +109,8 @@ class RTFInput(InputFormatPlugin):
|
||||
deb_dir=debug_dir,
|
||||
|
||||
# Default encoding
|
||||
default_encoding=getattr(self.opts, 'input_encoding', 'cp1252') or 'cp1252',
|
||||
default_encoding=getattr(self.opts, 'input_encoding',
|
||||
'cp1252') or 'cp1252',
|
||||
|
||||
# Run level
|
||||
run_level=run_lev,
|
||||
@@ -151,7 +153,7 @@ class RTFInput(InputFormatPlugin):
|
||||
for count, val in imap.items():
|
||||
try:
|
||||
imap[count] = self.convert_image(val)
|
||||
except:
|
||||
except Exception:
|
||||
self.log.exception('Failed to convert', val)
|
||||
return imap
|
||||
|
||||
@@ -161,7 +163,7 @@ class RTFInput(InputFormatPlugin):
|
||||
try:
|
||||
return self.rasterize_wmf(name)
|
||||
except Exception:
|
||||
self.log.exception('Failed to convert WMF image %r'%name)
|
||||
self.log.exception('Failed to convert WMF image %r' % name)
|
||||
return self.replace_wmf(name)
|
||||
|
||||
def replace_wmf(self, name):
|
||||
@@ -170,9 +172,11 @@ class RTFInput(InputFormatPlugin):
|
||||
return '__REMOVE_ME__'
|
||||
from ebook_converter.ebooks.covers import message_image
|
||||
if self.default_img is None:
|
||||
self.default_img = message_image('Conversion of WMF images is not supported.'
|
||||
' Use Microsoft Word or OpenOffice to save this RTF file'
|
||||
' as HTML and convert that in calibre.')
|
||||
self.default_img = message_image('Conversion of WMF images is not '
|
||||
'supported. Use Microsoft Word '
|
||||
'or OpenOffice to save this RTF '
|
||||
'file as HTML and convert that '
|
||||
'in calibre.')
|
||||
name = name.replace('.wmf', '.jpg')
|
||||
with open(name, 'wb') as f:
|
||||
f.write(self.default_img)
|
||||
@@ -189,10 +193,10 @@ class RTFInput(InputFormatPlugin):
|
||||
return name
|
||||
|
||||
def write_inline_css(self, ic, border_styles):
|
||||
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
|
||||
enumerate(ic.font_sizes)]
|
||||
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
|
||||
enumerate(ic.colors) if x != 'false']
|
||||
font_size_classes = ['span.fs%d { font-size: %spt }' % (i, x)
|
||||
for i, x in enumerate(ic.font_sizes)]
|
||||
color_classes = ['span.col%d { color: %s }' % (i, x)
|
||||
for i, x in enumerate(ic.colors) if x != 'false']
|
||||
css = textwrap.dedent('''
|
||||
span.none {
|
||||
text-decoration: none; font-weight: normal;
|
||||
@@ -210,11 +214,11 @@ class RTFInput(InputFormatPlugin):
|
||||
span.strike-through { text-decoration: line-through }
|
||||
|
||||
''')
|
||||
css += '\n'+'\n'.join(font_size_classes)
|
||||
css += '\n' +'\n'.join(color_classes)
|
||||
css += '\n' + '\n'.join(font_size_classes)
|
||||
css += '\n' + '\n'.join(color_classes)
|
||||
|
||||
for cls, val in border_styles.items():
|
||||
css += '\n\n.%s {\n%s\n}'%(cls, val)
|
||||
css += '\n\n.%s {\n%s\n}' % (cls, val)
|
||||
|
||||
with open(u'styles.css', 'ab') as f:
|
||||
f.write(css.encode('utf-8'))
|
||||
@@ -224,35 +228,34 @@ class RTFInput(InputFormatPlugin):
|
||||
style_map = {}
|
||||
for elem in doc.xpath(r'//*[local-name()="cell"]'):
|
||||
style = ['border-style: hidden', 'border-width: 1px',
|
||||
'border-color: black']
|
||||
'border-color: black']
|
||||
for x in ('bottom', 'top', 'left', 'right'):
|
||||
bs = elem.get('border-cell-%s-style'%x, None)
|
||||
bs = elem.get('border-cell-%s-style' % x, None)
|
||||
if bs:
|
||||
cbs = border_style_map.get(bs, 'solid')
|
||||
style.append('border-%s-style: %s'%(x, cbs))
|
||||
bw = elem.get('border-cell-%s-line-width'%x, None)
|
||||
style.append('border-%s-style: %s' % (x, cbs))
|
||||
bw = elem.get('border-cell-%s-line-width' % x, None)
|
||||
if bw:
|
||||
style.append('border-%s-width: %spt'%(x, bw))
|
||||
bc = elem.get('border-cell-%s-color'%x, None)
|
||||
style.append('border-%s-width: %spt' % (x, bw))
|
||||
bc = elem.get('border-cell-%s-color' % x, None)
|
||||
if bc:
|
||||
style.append('border-%s-color: %s'%(x, bc))
|
||||
style.append('border-%s-color: %s' % (x, bc))
|
||||
style = ';\n'.join(style)
|
||||
if style not in border_styles:
|
||||
border_styles.append(style)
|
||||
idx = border_styles.index(style)
|
||||
cls = 'border_style%d'%idx
|
||||
cls = 'border_style%d' % idx
|
||||
style_map[cls] = style
|
||||
elem.set('class', cls)
|
||||
return style_map
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from ebook_converter.ebooks.metadata.meta import get_metadata
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
||||
from ebook_converter.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||
from ebook_converter.ebooks.rtf2xml.ParseRtf import \
|
||||
RtfInvalidCodeException
|
||||
from ebook_converter.ebooks.rtf.input import InlineClass
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
self.opts = options
|
||||
self.log = log
|
||||
self.log('Converting RTF to XML...')
|
||||
@@ -269,14 +272,15 @@ class RTFInput(InputFormatPlugin):
|
||||
imap = {}
|
||||
try:
|
||||
imap = self.extract_images(d[0])
|
||||
except:
|
||||
except Exception:
|
||||
self.log.exception('Failed to extract images...')
|
||||
|
||||
self.log('Parsing XML...')
|
||||
doc = safe_xml_fromstring(xml)
|
||||
doc = etree.fromstring(xml)
|
||||
border_styles = self.convert_borders(doc)
|
||||
for pict in doc.xpath('//rtf:pict[@num]',
|
||||
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
|
||||
namespaces={'rtf':
|
||||
'http://rtf2xml.sourceforge.net/'}):
|
||||
num = int(pict.get('num'))
|
||||
name = imap.get(num, None)
|
||||
if name is not None:
|
||||
@@ -286,8 +290,8 @@ class RTFInput(InputFormatPlugin):
|
||||
inline_class = InlineClass(self.log)
|
||||
with open(pkg_resources.resource_filename('ebook_converter',
|
||||
'data/rtf.xsl')) as fobj:
|
||||
styledoc = safe_xml_fromstring(fobj.read())
|
||||
extensions = {('calibre', 'inline-class') : inline_class}
|
||||
styledoc = etree.fromstring(fobj.read())
|
||||
extensions = {('calibre', 'inline-class'): inline_class}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
result = transform(doc)
|
||||
html = u'index.xhtml'
|
||||
@@ -296,7 +300,8 @@ class RTFInput(InputFormatPlugin):
|
||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
# clean multiple \n
|
||||
res = re.sub(b'\n+', b'\n', res)
|
||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||
# Replace newlines inserted by the 'empty_paragraphs' option in
|
||||
# rtf2xml with html blank lines
|
||||
# res = re.sub('\s*<body>', '<body>', res)
|
||||
# res = re.sub('(?<=\n)\n{2}',
|
||||
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
@@ -316,7 +321,8 @@ class RTFInput(InputFormatPlugin):
|
||||
|
||||
def postprocess_book(self, oeb, opts, log):
|
||||
for item in oeb.spine:
|
||||
for img in item.data.xpath('//*[local-name()="img" and @src="__REMOVE_ME__"]'):
|
||||
for img in item.data.xpath('//*[local-name()="img" and '
|
||||
'@src="__REMOVE_ME__"]'):
|
||||
p = img.getparent()
|
||||
idx = p.index(img)
|
||||
p.remove(img)
|
||||
|
||||
@@ -1,27 +1,33 @@
|
||||
import os
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.ptempfile import TemporaryDirectory
|
||||
from ebook_converter.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||
HTML_TEMPLATE = ('<html><head><meta http-equiv="Content-Type" '
|
||||
'content="text/html; charset=utf-8"/><title>%s</title>'
|
||||
'</head><body>\n%s\n</body></html>')
|
||||
|
||||
|
||||
def html_encode(s):
|
||||
return s.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''').replace('\n', '<br/>').replace(' ', ' ') # noqa
|
||||
return (s.replace('&', '&')
|
||||
.replace('<', '<')
|
||||
.replace('>', '>')
|
||||
.replace('"', '"')
|
||||
.replace("'", ''')
|
||||
.replace('\n', '<br/>')
|
||||
.replace(' ', ' '))
|
||||
|
||||
|
||||
class SNBInput(InputFormatPlugin):
|
||||
|
||||
name = 'SNB Input'
|
||||
author = 'Li Fanxi'
|
||||
name = 'SNB Input'
|
||||
author = 'Li Fanxi'
|
||||
description = 'Convert SNB files to OEB'
|
||||
file_types = {'snb'}
|
||||
file_types = {'snb'}
|
||||
commit_name = 'snb_input'
|
||||
|
||||
options = set()
|
||||
@@ -32,13 +38,12 @@ class SNBInput(InputFormatPlugin):
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import DirContainer
|
||||
from ebook_converter.ebooks.snb.snbfile import SNBFile
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
|
||||
log.debug("Parsing SNB file...")
|
||||
snbFile = SNBFile()
|
||||
try:
|
||||
snbFile.Parse(stream)
|
||||
except:
|
||||
except Exception:
|
||||
raise ValueError("Invalid SNB file")
|
||||
if not snbFile.IsValid():
|
||||
log.debug("Invalid SNB file")
|
||||
@@ -46,27 +51,28 @@ class SNBInput(InputFormatPlugin):
|
||||
log.debug("Handle meta data ...")
|
||||
from ebook_converter.ebooks.conversion.plumber import create_oebbook
|
||||
oeb = create_oebbook(log, None, options,
|
||||
encoding=options.input_encoding, populate=False)
|
||||
encoding=options.input_encoding, populate=False)
|
||||
meta = snbFile.GetFileStream('snbf/book.snbf')
|
||||
if meta is not None:
|
||||
meta = safe_xml_fromstring(meta)
|
||||
l = {'title' : './/head/name',
|
||||
'creator' : './/head/author',
|
||||
'language' : './/head/language',
|
||||
'generator': './/head/generator',
|
||||
'publisher': './/head/publisher',
|
||||
'cover' : './/head/cover', }
|
||||
meta = etree.fromstring(meta)
|
||||
item_map = {'title': './/head/name',
|
||||
'creator': './/head/author',
|
||||
'language': './/head/language',
|
||||
'generator': './/head/generator',
|
||||
'publisher': './/head/publisher',
|
||||
'cover': './/head/cover'}
|
||||
d = {}
|
||||
for item in l:
|
||||
node = meta.find(l[item])
|
||||
for key, item in item_map.items():
|
||||
node = meta.find(item)
|
||||
if node is not None:
|
||||
d[item] = node.text if node.text is not None else ''
|
||||
d[key] = node.text if node.text is not None else ''
|
||||
else:
|
||||
d[item] = ''
|
||||
d[key] = ''
|
||||
|
||||
oeb.metadata.add('title', d['title'])
|
||||
oeb.metadata.add('creator', d['creator'], attrib={'role':'aut'})
|
||||
oeb.metadata.add('language', d['language'].lower().replace('_', '-'))
|
||||
oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'})
|
||||
oeb.metadata.add('language',
|
||||
d['language'].lower().replace('_', '-'))
|
||||
oeb.metadata.add('generator', d['generator'])
|
||||
oeb.metadata.add('publisher', d['publisher'])
|
||||
if d['cover'] != '':
|
||||
@@ -84,7 +90,7 @@ class SNBInput(InputFormatPlugin):
|
||||
toc = snbFile.GetFileStream('snbf/toc.snbf')
|
||||
oeb.container = DirContainer(tdir, log)
|
||||
if toc is not None:
|
||||
toc = safe_xml_fromstring(toc)
|
||||
toc = etree.fromstring(toc)
|
||||
i = 1
|
||||
for ch in toc.find('.//body'):
|
||||
chapterName = ch.text
|
||||
@@ -93,18 +99,22 @@ class SNBInput(InputFormatPlugin):
|
||||
data = snbFile.GetFileStream('snbc/' + chapterSrc)
|
||||
if data is None:
|
||||
continue
|
||||
snbc = safe_xml_fromstring(data)
|
||||
snbc = etree.fromstring(data)
|
||||
lines = []
|
||||
for line in snbc.find('.//body'):
|
||||
if line.tag == 'text':
|
||||
lines.append('<p>%s</p>' % html_encode(line.text))
|
||||
elif line.tag == 'img':
|
||||
lines.append('<p><img src="%s" /></p>' % html_encode(line.text))
|
||||
lines.append('<p><img src="%s" /></p>' %
|
||||
html_encode(line.text))
|
||||
with open(os.path.join(tdir, fname), 'wb') as f:
|
||||
f.write((HTML_TEMPLATE % (chapterName, '\n'.join(lines))).encode('utf-8', 'replace'))
|
||||
f.write((HTML_TEMPLATE %
|
||||
(chapterName,
|
||||
'\n'.join(lines))).encode('utf-8',
|
||||
'replace'))
|
||||
oeb.toc.add(ch.text, fname)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(fname))
|
||||
id, href = oeb.manifest.generate(
|
||||
id='html', href=ascii_filename(fname))
|
||||
item = oeb.manifest.add(id, href, 'text/html')
|
||||
item.html_input_href = fname
|
||||
oeb.spine.add(item, True)
|
||||
@@ -112,7 +122,7 @@ class SNBInput(InputFormatPlugin):
|
||||
imageFiles = snbFile.OutputImageFiles(tdir)
|
||||
for f, m in imageFiles:
|
||||
id, href = oeb.manifest.generate(id='image',
|
||||
href=ascii_filename(f))
|
||||
href=ascii_filename(f))
|
||||
item = oeb.manifest.add(id, href, m)
|
||||
item.html_input_href = f
|
||||
|
||||
|
||||
Reference in New Issue
Block a user