mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-11 20:15:46 +01:00
Use the real constants module.
This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
This commit is contained in:
@@ -1,59 +1,57 @@
|
||||
"""
|
||||
Convert .fb2 files to .lrf
|
||||
"""
|
||||
import os, re
|
||||
import os
|
||||
import pkg_resources
|
||||
import re
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
from ebook_converter import guess_type
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
|
||||
|
||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
||||
|
||||
|
||||
class FB2Input(InputFormatPlugin):
|
||||
|
||||
name = 'FB2 Input'
|
||||
author = 'Anatoly Shipitsin'
|
||||
name = 'FB2 Input'
|
||||
author = 'Anatoly Shipitsin'
|
||||
description = 'Convert FB2 and FBZ files to HTML'
|
||||
file_types = {'fb2', 'fbz'}
|
||||
file_types = {'fb2', 'fbz'}
|
||||
commit_name = 'fb2_input'
|
||||
|
||||
recommendations = {
|
||||
('level1_toc', '//h:h1', OptionRecommendation.MED),
|
||||
('level2_toc', '//h:h2', OptionRecommendation.MED),
|
||||
('level3_toc', '//h:h3', OptionRecommendation.MED),
|
||||
}
|
||||
recommendations = {('level1_toc', '//h:h1', OptionRecommendation.MED),
|
||||
('level2_toc', '//h:h2', OptionRecommendation.MED),
|
||||
('level3_toc', '//h:h3', OptionRecommendation.MED)}
|
||||
|
||||
options = {
|
||||
OptionRecommendation(name='no_inline_fb2_toc',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help='Do not insert a Table of Contents at the beginning of the book.'
|
||||
)}
|
||||
options = {OptionRecommendation(name='no_inline_fb2_toc',
|
||||
recommended_value=False,
|
||||
level=OptionRecommendation.LOW,
|
||||
help='Do not insert a Table of Contents '
|
||||
'at the beginning of the book.')}
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
from ebook_converter.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
|
||||
from ebook_converter.ebooks.metadata.fb2 import ensure_namespace
|
||||
from ebook_converter.ebooks.metadata.fb2 import get_fb2_data
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
||||
from ebook_converter.ebooks.metadata.meta import get_metadata
|
||||
from ebook_converter.ebooks.oeb.base import XLINK_NS, XHTML_NS
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
self.log = log
|
||||
log.debug('Parsing XML...')
|
||||
raw = get_fb2_data(stream)[0]
|
||||
raw = raw.replace(b'\0', b'')
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
assume_utf8=True, resolve_entities=True)[0]
|
||||
assume_utf8=True, resolve_entities=True)[0]
|
||||
try:
|
||||
doc = safe_xml_fromstring(raw)
|
||||
doc = etree.fromstring(raw)
|
||||
except etree.XMLSyntaxError:
|
||||
doc = safe_xml_fromstring(raw.replace('& ', '&'))
|
||||
doc = etree.fromstring(raw.replace('& ', '&'))
|
||||
if doc is None:
|
||||
raise ValueError('The FB2 file is not valid XML')
|
||||
doc = ensure_namespace(doc)
|
||||
@@ -62,22 +60,24 @@ class FB2Input(InputFormatPlugin):
|
||||
except Exception:
|
||||
fb_ns = FB2NS
|
||||
|
||||
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||
NAMESPACES = {'f': fb_ns, 'l': const.XLINK_NS}
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and '
|
||||
'@type="text/css"]')
|
||||
css = ''
|
||||
for s in stylesheets:
|
||||
css += etree.tostring(s, encoding='unicode', method='text',
|
||||
with_tail=False) + '\n\n'
|
||||
with_tail=False) + '\n\n'
|
||||
if css:
|
||||
import css_parser, logging
|
||||
import css_parser
|
||||
import logging
|
||||
parser = css_parser.CSSParser(fetcher=None,
|
||||
log=logging.getLogger('calibre.css'))
|
||||
log=logging.getLogger('calibre.css'))
|
||||
|
||||
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
||||
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % const.XHTML_NS
|
||||
text = XHTML_CSS_NAMESPACE + css
|
||||
log.debug('Parsing stylesheet...')
|
||||
stylesheet = parser.parseString(text)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
stylesheet.namespaces['h'] = const.XHTML_NS
|
||||
css = stylesheet.cssText
|
||||
if isinstance(css, bytes):
|
||||
css = css.decode('utf-8', 'replace')
|
||||
@@ -92,16 +92,20 @@ class FB2Input(InputFormatPlugin):
|
||||
if options.no_inline_fb2_toc:
|
||||
log('Disabling generation of inline FB2 TOC')
|
||||
ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
|
||||
re.DOTALL).sub('', ss)
|
||||
re.DOTALL).sub('', ss)
|
||||
|
||||
styledoc = safe_xml_fromstring(ss)
|
||||
styledoc = etree.fromstring(ss)
|
||||
|
||||
transform = etree.XSLT(styledoc)
|
||||
result = transform(doc)
|
||||
|
||||
# Handle links of type note and cite
|
||||
notes = {a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#')}
|
||||
cites = {a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '')}
|
||||
notes = {a.get('href')[1:]: a
|
||||
for a in result.xpath('//a[@link_note and @href]')
|
||||
if a.get('href').startswith('#')}
|
||||
cites = {a.get('link_cite'): a
|
||||
for a in result.xpath('//a[@link_cite]')
|
||||
if not a.get('href', '')}
|
||||
all_ids = {x for x in result.xpath('//*/@id')}
|
||||
for cite, a in cites.items():
|
||||
note = notes.get(cite, None)
|
||||
@@ -137,8 +141,10 @@ class FB2Input(InputFormatPlugin):
|
||||
f.write(mi.cover_data[1])
|
||||
cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
|
||||
else:
|
||||
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
|
||||
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
|
||||
for img in doc.xpath('//f:coverpage/f:image',
|
||||
namespaces=NAMESPACES):
|
||||
href = img.get('{%s}href' % const.XLINK_NS,
|
||||
img.get('href', None))
|
||||
if href is not None:
|
||||
if href.startswith('#'):
|
||||
href = href[1:]
|
||||
@@ -165,15 +171,15 @@ class FB2Input(InputFormatPlugin):
|
||||
ext = ct.rpartition('/')[-1].lower()
|
||||
if ext in ('png', 'jpeg', 'jpg'):
|
||||
if fname.lower().rpartition('.')[-1] not in {'jpg', 'jpeg',
|
||||
'png'}:
|
||||
'png'}:
|
||||
fname += '.' + ext
|
||||
self.binary_map[elem.get('id')] = fname
|
||||
raw = elem.text.strip()
|
||||
try:
|
||||
data = base64_decode(raw)
|
||||
except TypeError:
|
||||
self.log.exception('Binary data with id=%s is corrupted, ignoring'%(
|
||||
elem.get('id')))
|
||||
self.log.exception('Binary data with id=%s is corrupted, '
|
||||
'ignoring' % elem.get('id'))
|
||||
else:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
Reference in New Issue
Block a user