1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-04 07:45:48 +01:00

Added dependency for fb2ml, fixed oeb.base references in fb2 format

This commit is contained in:
2020-07-05 19:47:04 +02:00
parent 5ea3a5156d
commit 40e2cbab98
2 changed files with 19 additions and 24 deletions

View File

@@ -12,10 +12,11 @@ from lxml import etree
from ebook_converter import constants as const from ebook_converter import constants as const
from ebook_converter import prepare_string_for_xml from ebook_converter import prepare_string_for_xml
from ebook_converter.constants_old import __appname__, __version__ from ebook_converter.constants_old import __appname__, __version__
from ebook_converter.utils.localization import lang_as_iso639_1 from ebook_converter.ebooks.oeb import base
from ebook_converter.utils.img import save_cover_data_to from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.base import urlnormalize
from ebook_converter.polyglot.binary import as_base64_unicode from ebook_converter.polyglot.binary import as_base64_unicode
from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.utils.localization import lang_as_iso639_1
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -114,7 +115,6 @@ class FB2MLizer(object):
return text return text
def fb2_header(self): def fb2_header(self):
from ebook_converter.ebooks.oeb.base import OPF
metadata = {} metadata = {}
metadata['title'] = self.oeb_book.metadata.title[0].value metadata['title'] = self.oeb_book.metadata.title[0].value
metadata['appname'] = __appname__ metadata['appname'] = __appname__
@@ -179,7 +179,7 @@ class FB2MLizer(object):
year = publisher = isbn = '' year = publisher = isbn = ''
identifiers = self.oeb_book.metadata['identifier'] identifiers = self.oeb_book.metadata['identifier']
for x in identifiers: for x in identifiers:
if (x.get(OPF('scheme'), None).lower() == 'uuid' or if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
str(x).startswith('urn:uuid:')): str(x).startswith('urn:uuid:')):
metadata['id'] = str(x).split(':')[-1] metadata['id'] = str(x).split(':')[-1]
break break
@@ -204,7 +204,7 @@ class FB2MLizer(object):
prepare_string_for_xml(publisher.value)) prepare_string_for_xml(publisher.value))
for x in identifiers: for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'isbn': if x.get(base.tag('opf', 'scheme'), None).lower() == 'isbn':
isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)
metadata['year'] = year metadata['year'] = year
@@ -259,8 +259,6 @@ class FB2MLizer(object):
return '</FictionBook>' return '</FictionBook>'
def get_cover(self): def get_cover(self):
from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
cover_href = None cover_href = None
# Get the raster cover if it's available. # Get the raster cover if it's available.
@@ -269,7 +267,7 @@ class FB2MLizer(object):
self.oeb_book.manifest.ids): self.oeb_book.manifest.ids):
id = str(self.oeb_book.metadata.cover[0]) id = str(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id] cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES: if cover_item.media_type in base.OEB_RASTER_IMAGES:
cover_href = cover_item.href cover_href = cover_item.href
else: else:
# Figure out if we have a title page or a cover page # Figure out if we have a title page or a cover page
@@ -297,7 +295,6 @@ class FB2MLizer(object):
return '' return ''
def get_text(self): def get_text(self):
from ebook_converter.ebooks.oeb.base import XHTML
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
text = ['<body>'] text = ['<body>']
@@ -320,8 +317,8 @@ class FB2MLizer(object):
page_section_open = True page_section_open = True
self.section_level += 1 self.section_level += 1
text += self.dump_text(item.data.find(XHTML('body')), stylizer, text += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
item) stylizer, item)
if page_section_open: if page_section_open:
text.append('</section>') text.append('</section>')
@@ -340,15 +337,13 @@ class FB2MLizer(object):
This function uses the self.image_hrefs dictionary mapping. It is This function uses the self.image_hrefs dictionary mapping. It is
populated by the dump_text function. populated by the dump_text function.
""" """
from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
images = [] images = []
for item in self.oeb_book.manifest: for item in self.oeb_book.manifest:
# Don't write the image if it's not referenced in the document's # Don't write the image if it's not referenced in the document's
# text. # text.
if item.href not in self.image_hrefs: if item.href not in self.image_hrefs:
continue continue
if item.media_type in OEB_RASTER_IMAGES: if item.media_type in base.OEB_RASTER_IMAGES:
try: try:
if item.media_type not in ('image/jpeg', 'image/png'): if item.media_type not in ('image/jpeg', 'image/png'):
imdata = save_cover_data_to(item.data, imdata = save_cover_data_to(item.data,
@@ -423,7 +418,7 @@ class FB2MLizer(object):
return s_out, s_tags return s_out, s_tags
def dump_text(self, elem_tree, stylizer, page, tag_stack=[]): def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
''' """
This function is intended to be used in a recursive manner. dump_text This function is intended to be used in a recursive manner. dump_text
will run though all elements in the elem_tree and call itself on each will run though all elements in the elem_tree and call itself on each
element. element.
@@ -437,18 +432,17 @@ class FB2MLizer(object):
@param tag_stack: List of open FB2 tags to take into account. @param tag_stack: List of open FB2 tags to take into account.
@return: List of string representing the XHTML converted to FB2 markup. @return: List of string representing the XHTML converted to FB2 markup.
''' """
from ebook_converter.ebooks.oeb.base import barename
from ebook_converter.ebooks.oeb.base import namespace
elem = elem_tree elem = elem_tree
# Ensure what we are converting is not a string and that the fist tag # Ensure what we are converting is not a string and that the fist tag
# is part of the XHTML namespace. # is part of the XHTML namespace.
if (not isinstance(elem_tree.tag, (str, bytes)) or if (not isinstance(elem_tree.tag, (str, bytes)) or
namespace(elem_tree.tag) != const.XHTML_NS): parse_utils.namespace(elem_tree.tag) != const.XHTML_NS):
p = elem.getparent() p = elem.getparent()
if (p is not None and isinstance(p.tag, (str, bytes)) and if (p is not None and isinstance(p.tag, (str, bytes)) and
namespace(p.tag) == const.XHTML_NS and elem.tail): parse_utils.namespace(p.tag) == const.XHTML_NS and
elem.tail):
return [elem.tail] return [elem.tail]
return [] return []
@@ -465,7 +459,7 @@ class FB2MLizer(object):
# the tags. # the tags.
tags = [] tags = []
# First tag in tree # First tag in tree
tag = barename(elem_tree.tag) tag = parse_utils.barename(elem_tree.tag)
# Number of blank lines above tag # Number of blank lines above tag
try: try:
ems = int(round((float(style.marginTop) / style.fontSize) - 1)) ems = int(round((float(style.marginTop) / style.fontSize) - 1))
@@ -517,7 +511,7 @@ class FB2MLizer(object):
# tag but it can have multiple styles. # tag but it can have multiple styles.
if tag == 'img' and elem_tree.attrib.get('src', None): if tag == 'img' and elem_tree.attrib.get('src', None):
# Only write the image tag if it is in the manifest. # Only write the image tag if it is in the manifest.
ihref = urlnormalize(page.abshref(elem_tree.attrib['src'])) ihref = base.urlnormalize(page.abshref(elem_tree.attrib['src']))
if ihref in self.oeb_book.manifest.hrefs: if ihref in self.oeb_book.manifest.hrefs:
if ihref not in self.image_hrefs: if ihref not in self.image_hrefs:
self.image_hrefs[ihref] = 'img_%s' % len(self.image_hrefs) self.image_hrefs[ihref] = 'img_%s' % len(self.image_hrefs)
@@ -560,7 +554,7 @@ class FB2MLizer(object):
fb2_out += p_txt fb2_out += p_txt
tags += p_tag tags += p_tag
fb2_out.append('<a l:href="%s">' % fb2_out.append('<a l:href="%s">' %
urlnormalize(elem_tree.attrib['href'])) base.urlnormalize(elem_tree.attrib['href']))
tags.append('a') tags.append('a')
if tag == 'b' or style['font-weight'] in ('bold', 'bolder'): if tag == 'b' or style['font-weight'] in ('bold', 'bolder'):
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)

View File

@@ -36,6 +36,7 @@ install_requires =
html5-parser html5-parser
odfpy odfpy
setuptools setuptools
html2text
[options.entry_points] [options.entry_points]
console_scripts = console_scripts =