1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-11 20:15:46 +01:00

Use the real constants module.

This is progressing refactor of the calibre code to make it more
readable, and transform it to something more coherent.

In this patch, there are changes regarding imports for some modules,
instead of polluting namespace of each module with some other modules
symbols, which often were imported from other modules. Yuck.
This commit is contained in:
2020-05-29 17:04:53 +02:00
parent ee4801228f
commit ce89f5c9d1
54 changed files with 2383 additions and 2081 deletions

View File

@@ -1,59 +1,57 @@
"""
Convert .fb2 files to .lrf
"""
import os, re
import os
import pkg_resources
import re
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
from lxml import etree
from ebook_converter import constants as const
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter import guess_type
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
class FB2Input(InputFormatPlugin):
name = 'FB2 Input'
author = 'Anatoly Shipitsin'
name = 'FB2 Input'
author = 'Anatoly Shipitsin'
description = 'Convert FB2 and FBZ files to HTML'
file_types = {'fb2', 'fbz'}
file_types = {'fb2', 'fbz'}
commit_name = 'fb2_input'
recommendations = {
('level1_toc', '//h:h1', OptionRecommendation.MED),
('level2_toc', '//h:h2', OptionRecommendation.MED),
('level3_toc', '//h:h3', OptionRecommendation.MED),
}
recommendations = {('level1_toc', '//h:h1', OptionRecommendation.MED),
('level2_toc', '//h:h2', OptionRecommendation.MED),
('level3_toc', '//h:h3', OptionRecommendation.MED)}
options = {
OptionRecommendation(name='no_inline_fb2_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help='Do not insert a Table of Contents at the beginning of the book.'
)}
options = {OptionRecommendation(name='no_inline_fb2_toc',
recommended_value=False,
level=OptionRecommendation.LOW,
help='Do not insert a Table of Contents '
'at the beginning of the book.')}
def convert(self, stream, options, file_ext, log,
accelerators):
from lxml import etree
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
from ebook_converter.ebooks.metadata.fb2 import ensure_namespace
from ebook_converter.ebooks.metadata.fb2 import get_fb2_data
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.ebooks.metadata.meta import get_metadata
from ebook_converter.ebooks.oeb.base import XLINK_NS, XHTML_NS
from ebook_converter.ebooks.chardet import xml_to_unicode
self.log = log
log.debug('Parsing XML...')
raw = get_fb2_data(stream)[0]
raw = raw.replace(b'\0', b'')
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True, resolve_entities=True)[0]
assume_utf8=True, resolve_entities=True)[0]
try:
doc = safe_xml_fromstring(raw)
doc = etree.fromstring(raw)
except etree.XMLSyntaxError:
doc = safe_xml_fromstring(raw.replace('& ', '&amp;'))
doc = etree.fromstring(raw.replace('& ', '&amp;'))
if doc is None:
raise ValueError('The FB2 file is not valid XML')
doc = ensure_namespace(doc)
@@ -62,22 +60,24 @@ class FB2Input(InputFormatPlugin):
except Exception:
fb_ns = FB2NS
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
NAMESPACES = {'f': fb_ns, 'l': const.XLINK_NS}
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and '
'@type="text/css"]')
css = ''
for s in stylesheets:
css += etree.tostring(s, encoding='unicode', method='text',
with_tail=False) + '\n\n'
with_tail=False) + '\n\n'
if css:
import css_parser, logging
import css_parser
import logging
parser = css_parser.CSSParser(fetcher=None,
log=logging.getLogger('calibre.css'))
log=logging.getLogger('calibre.css'))
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % const.XHTML_NS
text = XHTML_CSS_NAMESPACE + css
log.debug('Parsing stylesheet...')
stylesheet = parser.parseString(text)
stylesheet.namespaces['h'] = XHTML_NS
stylesheet.namespaces['h'] = const.XHTML_NS
css = stylesheet.cssText
if isinstance(css, bytes):
css = css.decode('utf-8', 'replace')
@@ -92,16 +92,20 @@ class FB2Input(InputFormatPlugin):
if options.no_inline_fb2_toc:
log('Disabling generation of inline FB2 TOC')
ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
re.DOTALL).sub('', ss)
re.DOTALL).sub('', ss)
styledoc = safe_xml_fromstring(ss)
styledoc = etree.fromstring(ss)
transform = etree.XSLT(styledoc)
result = transform(doc)
# Handle links of type note and cite
notes = {a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#')}
cites = {a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '')}
notes = {a.get('href')[1:]: a
for a in result.xpath('//a[@link_note and @href]')
if a.get('href').startswith('#')}
cites = {a.get('link_cite'): a
for a in result.xpath('//a[@link_cite]')
if not a.get('href', '')}
all_ids = {x for x in result.xpath('//*/@id')}
for cite, a in cites.items():
note = notes.get(cite, None)
@@ -137,8 +141,10 @@ class FB2Input(InputFormatPlugin):
f.write(mi.cover_data[1])
cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
else:
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
for img in doc.xpath('//f:coverpage/f:image',
namespaces=NAMESPACES):
href = img.get('{%s}href' % const.XLINK_NS,
img.get('href', None))
if href is not None:
if href.startswith('#'):
href = href[1:]
@@ -165,15 +171,15 @@ class FB2Input(InputFormatPlugin):
ext = ct.rpartition('/')[-1].lower()
if ext in ('png', 'jpeg', 'jpg'):
if fname.lower().rpartition('.')[-1] not in {'jpg', 'jpeg',
'png'}:
'png'}:
fname += '.' + ext
self.binary_map[elem.get('id')] = fname
raw = elem.text.strip()
try:
data = base64_decode(raw)
except TypeError:
self.log.exception('Binary data with id=%s is corrupted, ignoring'%(
elem.get('id')))
self.log.exception('Binary data with id=%s is corrupted, '
'ignoring' % elem.get('id'))
else:
with open(fname, 'wb') as f:
f.write(data)