mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-28 17:54:06 +02:00
Use the real constants module.
This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
This commit is contained in:
@@ -5,8 +5,10 @@ import copy
|
||||
import re
|
||||
import numbers
|
||||
from lxml import etree
|
||||
from ebook_converter.ebooks.oeb.base import namespace, barename
|
||||
from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.ebooks.oeb import parse_utils
|
||||
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
||||
from ebook_converter.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||
from ebook_converter.ebooks.mobi.utils import convert_color_for_font_tag
|
||||
@@ -23,7 +25,7 @@ def MBP(name):
|
||||
return '{%s}%s' % (MBP_NS, name)
|
||||
|
||||
|
||||
MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}
|
||||
MOBI_NSMAP = {None: const.XHTML_NS, 'mbp': const.MBP_NS}
|
||||
INLINE_TAGS = {'span', 'a', 'code', 'u', 's', 'big', 'strike', 'tt', 'font', 'q', 'i', 'b', 'em', 'strong', 'sup', 'sub'}
|
||||
HEADER_TAGS = {'h1', 'h2', 'h3', 'h4', 'h5', 'h6'}
|
||||
# GR: Added 'caption' to both sets
|
||||
@@ -129,9 +131,9 @@ class MobiMLizer(object):
|
||||
'Iterate over the spine and convert it to MOBIML'
|
||||
for item in self.oeb.spine:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
|
||||
body = item.data.find(XHTML('body'))
|
||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||
body = item.data.find(base.tag('xhtml', 'body'))
|
||||
nroot = etree.Element(base.tag('xhtml', 'html'), nsmap=MOBI_NSMAP)
|
||||
nbody = etree.SubElement(nroot, base.tag('xhtml', 'body'))
|
||||
self.current_spine_item = item
|
||||
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
||||
[FormatState()])
|
||||
@@ -162,7 +164,7 @@ class MobiMLizer(object):
|
||||
lines = text.split('\n')
|
||||
result = lines[:1]
|
||||
for line in lines[1:]:
|
||||
result.append(etree.Element(XHTML('br')))
|
||||
result.append(etree.Element(base.tag('xhtml', 'br')))
|
||||
if line:
|
||||
result.append(line)
|
||||
return result
|
||||
@@ -194,7 +196,7 @@ class MobiMLizer(object):
|
||||
indent = (indent / abs(indent)) * self.profile.fbase
|
||||
if tag in NESTABLE_TAGS and not istate.rendered:
|
||||
para = wrapper = etree.SubElement(
|
||||
parent, XHTML(tag), attrib=istate.attrib)
|
||||
parent, base.tag('xhtml', tag), attrib=istate.attrib)
|
||||
bstate.nested.append(para)
|
||||
if tag == 'li' and len(istates) > 1:
|
||||
istates[-2].list_num += 1
|
||||
@@ -203,21 +205,21 @@ class MobiMLizer(object):
|
||||
para = wrapper = bstate.nested[-1]
|
||||
elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0:
|
||||
ems = self.profile.mobi_ems_per_blockquote
|
||||
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
|
||||
para = wrapper = etree.SubElement(parent, base.tag('xhtml', 'blockquote'))
|
||||
para = wrapper
|
||||
emleft = int(round(left / self.profile.fbase)) - ems
|
||||
emleft = min((emleft, 10))
|
||||
while emleft > ems / 2:
|
||||
para = etree.SubElement(para, XHTML('blockquote'))
|
||||
para = etree.SubElement(para, base.tag('xhtml', 'blockquote'))
|
||||
emleft -= ems
|
||||
else:
|
||||
para = wrapper = etree.SubElement(parent, XHTML('p'))
|
||||
para = wrapper = etree.SubElement(parent, base.tag('xhtml', 'p'))
|
||||
bstate.inline = bstate.para = para
|
||||
vspace = bstate.vpadding + bstate.vmargin
|
||||
bstate.vpadding = bstate.vmargin = 0
|
||||
if tag not in TABLE_TAGS:
|
||||
if tag in ('ul', 'ol') and vspace > 0:
|
||||
wrapper.addprevious(etree.Element(XHTML('div'),
|
||||
wrapper.addprevious(etree.Element(base.tag('xhtml', 'div'),
|
||||
height=self.mobimlize_measure(vspace)))
|
||||
else:
|
||||
wrapper.attrib['height'] = self.mobimlize_measure(vspace)
|
||||
@@ -225,7 +227,7 @@ class MobiMLizer(object):
|
||||
elif tag == 'table' and vspace > 0:
|
||||
vspace = int(round(vspace / self.profile.fbase))
|
||||
while vspace > 0:
|
||||
wrapper.addprevious(etree.Element(XHTML('br')))
|
||||
wrapper.addprevious(etree.Element(base.tag('xhtml', 'br')))
|
||||
vspace -= 1
|
||||
if istate.halign != 'auto' and isinstance(istate.halign, (bytes, str)):
|
||||
if isinstance(istate.halign, bytes):
|
||||
@@ -237,7 +239,7 @@ class MobiMLizer(object):
|
||||
bstate.inline = para
|
||||
pstate = bstate.istate = None
|
||||
try:
|
||||
etree.SubElement(para, XHTML(tag), attrib=istate.attrib)
|
||||
etree.SubElement(para, base.tag('xhtml', tag), attrib=istate.attrib)
|
||||
except:
|
||||
print('Invalid subelement:', para, tag, istate.attrib)
|
||||
raise
|
||||
@@ -245,7 +247,7 @@ class MobiMLizer(object):
|
||||
para.attrib['valign'] = 'top'
|
||||
if istate.ids:
|
||||
for id_ in istate.ids:
|
||||
anchor = etree.Element(XHTML('a'), attrib={'id': id_})
|
||||
anchor = etree.Element(base.tag('xhtml', 'a'), attrib={'id': id_})
|
||||
if tag == 'li':
|
||||
try:
|
||||
last = bstate.body[-1][-1]
|
||||
@@ -262,7 +264,7 @@ class MobiMLizer(object):
|
||||
# This could potentially break if inserting an anchor at
|
||||
# this point in the markup is illegal, but I cannot think
|
||||
# of such a case offhand.
|
||||
if barename(last.tag) in LEAF_TAGS:
|
||||
if parse_utils.barename(last.tag) in LEAF_TAGS:
|
||||
last.addprevious(anchor)
|
||||
else:
|
||||
last.append(anchor)
|
||||
@@ -279,28 +281,28 @@ class MobiMLizer(object):
|
||||
elif pstate and pstate.href == href:
|
||||
inline = bstate.anchor
|
||||
else:
|
||||
inline = etree.SubElement(inline, XHTML('a'), href=href)
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'a'), href=href)
|
||||
bstate.anchor = inline
|
||||
|
||||
if fsize != 3:
|
||||
inline = etree.SubElement(inline, XHTML('font'),
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'font'),
|
||||
size=str(fsize))
|
||||
if istate.family == 'monospace':
|
||||
inline = etree.SubElement(inline, XHTML('tt'))
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'tt'))
|
||||
if istate.italic:
|
||||
inline = etree.SubElement(inline, XHTML('i'))
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'i'))
|
||||
if istate.bold:
|
||||
inline = etree.SubElement(inline, XHTML('b'))
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'b'))
|
||||
if istate.bgcolor is not None and istate.bgcolor != 'transparent' :
|
||||
inline = etree.SubElement(inline, XHTML('span'),
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'span'),
|
||||
bgcolor=convert_color_for_font_tag(istate.bgcolor))
|
||||
if istate.fgcolor != 'black':
|
||||
inline = etree.SubElement(inline, XHTML('font'),
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'font'),
|
||||
color=convert_color_for_font_tag(istate.fgcolor))
|
||||
if istate.strikethrough:
|
||||
inline = etree.SubElement(inline, XHTML('s'))
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 's'))
|
||||
if istate.underline:
|
||||
inline = etree.SubElement(inline, XHTML('u'))
|
||||
inline = etree.SubElement(inline, base.tag('xhtml', 'u'))
|
||||
bstate.inline = inline
|
||||
bstate.istate = istate
|
||||
inline = bstate.inline
|
||||
@@ -318,7 +320,7 @@ class MobiMLizer(object):
|
||||
def mobimlize_elem(self, elem, stylizer, bstate, istates,
|
||||
ignore_valign=False):
|
||||
if not isinstance(elem.tag, (str, bytes)) \
|
||||
or namespace(elem.tag) != XHTML_NS:
|
||||
or parse_utils.namespace(elem.tag) != const.XHTML_NS:
|
||||
return
|
||||
style = stylizer.style(elem)
|
||||
# <mbp:frame-set/> does not exist lalalala
|
||||
@@ -333,10 +335,10 @@ class MobiMLizer(object):
|
||||
elem.text = None
|
||||
elem.set('id', id_)
|
||||
elem.tail = tail
|
||||
elem.tag = XHTML('a')
|
||||
elem.tag = base.tag('xhtml', 'a')
|
||||
else:
|
||||
return
|
||||
tag = barename(elem.tag)
|
||||
tag = parse_utils.barename(elem.tag)
|
||||
istate = copy.copy(istates[-1])
|
||||
istate.rendered = False
|
||||
istate.list_num = 0
|
||||
@@ -451,7 +453,7 @@ class MobiMLizer(object):
|
||||
if 'width' not in istate.attrib or 'height' not in istate.attrib:
|
||||
href = self.current_spine_item.abshref(elem.attrib['src'])
|
||||
try:
|
||||
item = self.oeb.manifest.hrefs[urlnormalize(href)]
|
||||
item = self.oeb.manifest.hrefs[base.urlnormalize(href)]
|
||||
except:
|
||||
self.oeb.logger.warn('Failed to find image:',
|
||||
href)
|
||||
@@ -534,9 +536,9 @@ class MobiMLizer(object):
|
||||
isinstance(valign, numbers.Number) and valign > 0)
|
||||
vtag = 'sup' if issup else 'sub'
|
||||
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
|
||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||
vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
|
||||
vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
|
||||
nroot = etree.Element(base.tag('xhtml', 'html'), nsmap=MOBI_NSMAP)
|
||||
vbstate = BlockState(etree.SubElement(nroot, base.tag('xhtml', 'body')))
|
||||
vbstate.para = etree.SubElement(vbstate.body, base.tag('xhtml', 'p'))
|
||||
self.mobimlize_elem(elem, stylizer, vbstate, istates,
|
||||
ignore_valign=True)
|
||||
if len(istates) > 0:
|
||||
@@ -548,8 +550,8 @@ class MobiMLizer(object):
|
||||
self.mobimlize_content('span', '', bstate, istates)
|
||||
parent = bstate.para if bstate.inline is None else bstate.inline
|
||||
if parent is not None:
|
||||
vtag = etree.SubElement(parent, XHTML(vtag))
|
||||
vtag = etree.SubElement(vtag, XHTML('small'))
|
||||
vtag = etree.SubElement(parent, base.tag('xhtml', vtag))
|
||||
vtag = etree.SubElement(vtag, base.tag('xhtml', 'small'))
|
||||
# Add anchors
|
||||
for child in vbstate.body:
|
||||
if child is not vbstate.para:
|
||||
@@ -601,7 +603,7 @@ class MobiMLizer(object):
|
||||
para = bstate.para
|
||||
if para is not None and para.text == '\xa0' and len(para) < 1:
|
||||
if style.height > 2:
|
||||
para.getparent().replace(para, etree.Element(XHTML('br')))
|
||||
para.getparent().replace(para, etree.Element(base.tag('xhtml', 'br')))
|
||||
else:
|
||||
# This is too small to be rendered effectively, drop it
|
||||
para.getparent().remove(para)
|
||||
|
||||
@@ -8,6 +8,7 @@ import uuid
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from ebook_converter.ebooks.mobi.reader.index import read_index
|
||||
from ebook_converter.ebooks.mobi.reader.ncx import read_ncx, build_toc
|
||||
@@ -17,7 +18,7 @@ from ebook_converter.ebooks.metadata.opf2 import Guide, OPFCreator
|
||||
from ebook_converter.ebooks.metadata.toc import TOC
|
||||
from ebook_converter.ebooks.mobi.utils import read_font_record
|
||||
from ebook_converter.ebooks.oeb.parse_utils import parse_html
|
||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||
from ebook_converter.ebooks.oeb.base import XPath, xml2text
|
||||
from ebook_converter.polyglot.builtins import as_unicode
|
||||
|
||||
|
||||
@@ -553,8 +554,8 @@ class Mobi8Reader(object):
|
||||
seen = set()
|
||||
links = []
|
||||
for elem in root.iterdescendants(etree.Element):
|
||||
if reached and elem.tag == XHTML('a') and elem.get('href',
|
||||
False):
|
||||
if reached and elem.tag == const.XHTML_A and elem.get('href',
|
||||
False):
|
||||
href = elem.get('href')
|
||||
href, frag = urllib.parse.urldefrag(href)
|
||||
href = base_href + '/' + href
|
||||
|
||||
@@ -4,16 +4,11 @@ import re
|
||||
import unicodedata
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter.ebooks.mobi.mobiml import MBP_NS
|
||||
from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
|
||||
from ebook_converter.ebooks.oeb.base import (
|
||||
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
|
||||
)
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.ebooks.oeb import parse_utils
|
||||
|
||||
|
||||
class Buf(io.BytesIO):
|
||||
@@ -25,9 +20,14 @@ class Buf(io.BytesIO):
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||
NSRMAP = {'': None,
|
||||
const.XML_NS: 'xml',
|
||||
const.XHTML_NS: '',
|
||||
MBP_NS: 'mbp'} # TODO(gryf): check why this is different than
|
||||
# MBP_NS from const.
|
||||
|
||||
def __init__(self, oeb, images, is_periodical, write_page_breaks_after_item=True):
|
||||
def __init__(self, oeb, images, is_periodical,
|
||||
write_page_breaks_after_item=True):
|
||||
'''
|
||||
Write all the HTML markup in oeb into a single in memory buffer
|
||||
containing a single html document with links replaced by offsets into
|
||||
@@ -157,7 +157,8 @@ class Serializer(object):
|
||||
buf.write(b'<guide>')
|
||||
for ref in self.oeb.guide.values():
|
||||
path = urllib.parse.urldefrag(ref.href)[0]
|
||||
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
|
||||
if (path not in hrefs or
|
||||
hrefs[path].media_type not in base.OEB_DOCS):
|
||||
continue
|
||||
|
||||
buf.write(b'<reference type="')
|
||||
@@ -178,28 +179,28 @@ class Serializer(object):
|
||||
|
||||
buf.write(b'</guide>')
|
||||
|
||||
def serialize_href(self, href, base=None):
|
||||
'''
|
||||
def serialize_href(self, href, _base=None):
|
||||
"""
|
||||
Serialize the href attribute of an <a> or <reference> tag. It is
|
||||
serialized as filepos="000000000" and a pointer to its location is
|
||||
stored in self.href_offsets so that the correct value can be filled in
|
||||
at the end.
|
||||
'''
|
||||
"""
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
try:
|
||||
path, frag = urllib.parse.urldefrag(urlnormalize(href))
|
||||
path, frag = urllib.parse.urldefrag(base.urlnormalize(href))
|
||||
except ValueError:
|
||||
# Unparseable URL
|
||||
return False
|
||||
if path and base:
|
||||
path = base.abshref(path)
|
||||
if path and _base:
|
||||
path = _base.abshref(path)
|
||||
if path and path not in hrefs:
|
||||
return False
|
||||
buf = self.buf
|
||||
item = hrefs[path] if path else None
|
||||
if item and item.spine_position is None:
|
||||
return False
|
||||
path = item.href if item else base.href
|
||||
path = item.href if item else _base.href
|
||||
href = '#'.join((path, frag)) if frag else path
|
||||
buf.write(b'filepos=')
|
||||
self.href_offsets[href].append(buf.tell())
|
||||
@@ -219,7 +220,7 @@ class Serializer(object):
|
||||
if href is not None:
|
||||
# resolve the section url in id_offsets
|
||||
buf.write(b'<mbp:pagebreak />')
|
||||
self.id_offsets[urlnormalize(href)] = buf.tell()
|
||||
self.id_offsets[base.urlnormalize(href)] = buf.tell()
|
||||
|
||||
if tocref.klass == "periodical":
|
||||
buf.write(b'<div> <div height="1em"></div>')
|
||||
@@ -267,7 +268,7 @@ class Serializer(object):
|
||||
|
||||
if self.is_periodical and item.is_section_start:
|
||||
for section_toc in top_toc.nodes:
|
||||
if urlnormalize(item.href) == section_toc.href:
|
||||
if base.urlnormalize(item.href) == section_toc.href:
|
||||
# create section url of the form r'feed_\d+/index.html'
|
||||
section_url = re.sub(r'article_\d+/', '', section_toc.href)
|
||||
serialize_toc_level(section_toc, section_url)
|
||||
@@ -287,12 +288,12 @@ class Serializer(object):
|
||||
buf = self.buf
|
||||
if not item.linear:
|
||||
self.breaks.append(buf.tell() - 1)
|
||||
self.id_offsets[urlnormalize(item.href)] = buf.tell()
|
||||
self.id_offsets[base.urlnormalize(item.href)] = buf.tell()
|
||||
if item.is_section_start:
|
||||
buf.write(b'<a ></a> ')
|
||||
if item.is_article_start:
|
||||
buf.write(b'<a ></a> <a ></a>')
|
||||
for elem in item.data.find(XHTML('body')):
|
||||
for elem in item.data.find(base.tag('xhtml', 'body')):
|
||||
self.serialize_elem(elem, item)
|
||||
if self.write_page_breaks_after_item:
|
||||
buf.write(b'<mbp:pagebreak/>')
|
||||
@@ -306,15 +307,15 @@ class Serializer(object):
|
||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||
buf = self.buf
|
||||
if not isinstance(elem.tag, (str, bytes)) \
|
||||
or namespace(elem.tag) not in nsrmap:
|
||||
or parse_utils.namespace(elem.tag) not in nsrmap:
|
||||
return
|
||||
tag = prefixname(elem.tag, nsrmap)
|
||||
tag = base.prefixname(elem.tag, nsrmap)
|
||||
# Previous layers take care of @name
|
||||
id_ = elem.attrib.pop('id', None)
|
||||
if id_:
|
||||
href = '#'.join((item.href, id_))
|
||||
offset = self.anchor_offset or buf.tell()
|
||||
key = urlnormalize(href)
|
||||
key = base.urlnormalize(href)
|
||||
# Only set this id_offset if it wasn't previously seen
|
||||
self.id_offsets[key] = self.id_offsets.get(key, offset)
|
||||
if self.anchor_offset is not None and \
|
||||
@@ -326,15 +327,15 @@ class Serializer(object):
|
||||
buf.write(tag.encode('utf-8'))
|
||||
if elem.attrib:
|
||||
for attr, val in elem.attrib.items():
|
||||
if namespace(attr) not in nsrmap:
|
||||
if parse_utils.namespace(attr) not in nsrmap:
|
||||
continue
|
||||
attr = prefixname(attr, nsrmap)
|
||||
attr = base.prefixname(attr, nsrmap)
|
||||
buf.write(b' ')
|
||||
if attr == 'href':
|
||||
if self.serialize_href(val, item):
|
||||
continue
|
||||
elif attr == 'src':
|
||||
href = urlnormalize(item.abshref(val))
|
||||
href = base.urlnormalize(item.abshref(val))
|
||||
if href in self.images:
|
||||
index = self.images[href]
|
||||
self.used_images.add(href)
|
||||
|
||||
@@ -2,6 +2,7 @@ import re
|
||||
from struct import pack
|
||||
from io import BytesIO
|
||||
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.constants_old import iswindows, isosx
|
||||
from ebook_converter.ebooks.mobi.utils import (utf8_text, to_base)
|
||||
from ebook_converter.utils.localization import lang_as_iso639_1
|
||||
@@ -95,9 +96,8 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
|
||||
# Write UUID as ASIN
|
||||
uuid = None
|
||||
from ebook_converter.ebooks.oeb.base import OPF
|
||||
for x in metadata['identifier']:
|
||||
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
||||
if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
|
||||
str(x).startswith('urn:uuid:')):
|
||||
uuid = str(x).split(':')[-1]
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user