mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-13 00:43:34 +02:00
Moved misc functions from polyglot package to single polyglot module.
This commit is contained in:
@@ -5,17 +5,12 @@ import os
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
from lxml.html import builder
|
from lxml.html import builder
|
||||||
|
|
||||||
from ebook_converter.polyglot.urllib import unquote as _unquote
|
|
||||||
from ebook_converter.ebooks.oeb.base import urlquote
|
from ebook_converter.ebooks.oeb.base import urlquote
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||||
from ebook_converter.ptempfile import TemporaryDirectory
|
from ebook_converter.ptempfile import TemporaryDirectory
|
||||||
from ebook_converter.constants_old import filesystem_encoding
|
from ebook_converter.constants_old import filesystem_encoding
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, '
|
|
||||||
'and Alex Bramley <a.bramley at gmail.com>.')
|
|
||||||
|
|
||||||
|
|
||||||
class CHMInput(InputFormatPlugin):
|
class CHMInput(InputFormatPlugin):
|
||||||
@@ -133,7 +128,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
def unquote(x):
|
def unquote(x):
|
||||||
if isinstance(x, str):
|
if isinstance(x, str):
|
||||||
x = x.encode('utf-8')
|
x = x.encode('utf-8')
|
||||||
return _unquote(x).decode('utf-8')
|
return polyglot.unquote(x).decode('utf-8')
|
||||||
|
|
||||||
def unquote_path(x):
|
def unquote_path(x):
|
||||||
y = unquote(x)
|
y = unquote(x)
|
||||||
@@ -175,7 +170,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
pretty_print=True)
|
pretty_print=True)
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
else:
|
else:
|
||||||
f.write(as_bytes(hhcdata))
|
f.write(polyglot.as_bytes(hhcdata))
|
||||||
return htmlpath, toc
|
return htmlpath, toc
|
||||||
|
|
||||||
def _read_file(self, name):
|
def _read_file(self, name):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from ebook_converter.ebooks.oeb import parse_utils
|
|||||||
from ebook_converter.customize.conversion import OutputFormatPlugin
|
from ebook_converter.customize.conversion import OutputFormatPlugin
|
||||||
from ebook_converter.customize.conversion import OptionRecommendation
|
from ebook_converter.customize.conversion import OptionRecommendation
|
||||||
from ebook_converter.ptempfile import TemporaryDirectory
|
from ebook_converter.ptempfile import TemporaryDirectory
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.utils import directory
|
from ebook_converter.utils import directory
|
||||||
|
|
||||||
|
|
||||||
@@ -266,7 +266,8 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
extra_entries=extra_entries) as epub:
|
extra_entries=extra_entries) as epub:
|
||||||
epub.add_dir(tdir)
|
epub.add_dir(tdir)
|
||||||
if encryption is not None:
|
if encryption is not None:
|
||||||
epub.writestr('META-INF/encryption.xml', as_bytes(encryption))
|
epub.writestr('META-INF/encryption.xml',
|
||||||
|
polyglot.as_bytes(encryption))
|
||||||
if metadata_xml is not None:
|
if metadata_xml is not None:
|
||||||
epub.writestr('META-INF/metadata.xml',
|
epub.writestr('META-INF/metadata.xml',
|
||||||
metadata_xml.encode('utf-8'))
|
metadata_xml.encode('utf-8'))
|
||||||
@@ -308,12 +309,10 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def encrypt_fonts(self, uris, tdir, _uuid): # {{{
|
def encrypt_fonts(self, uris, tdir, _uuid): # {{{
|
||||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
|
||||||
|
|
||||||
key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
|
key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
|
||||||
if len(key) < 16:
|
if len(key) < 16:
|
||||||
raise ValueError('UUID identifier %r is invalid'% _uuid)
|
raise ValueError('UUID identifier %r is invalid'% _uuid)
|
||||||
key = bytearray(from_hex_bytes((key + key)[:32]))
|
key = bytearray(polyglot.from_hex_bytes((key + key)[:32]))
|
||||||
paths = []
|
paths = []
|
||||||
with directory.CurrentDir(tdir):
|
with directory.CurrentDir(tdir):
|
||||||
paths = [os.path.join(*x.split('/')) for x in uris]
|
paths = [os.path.join(*x.split('/')) for x in uris]
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from lxml import etree
|
|||||||
|
|
||||||
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||||
from ebook_converter.ebooks.oeb.base import element
|
from ebook_converter.ebooks.oeb.base import element
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
from ebook_converter.utils import directory
|
from ebook_converter.utils import directory
|
||||||
@@ -56,7 +56,8 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
parent = element(parent, ('ul'))
|
parent = element(parent, ('ul'))
|
||||||
for node in current_node.nodes:
|
for node in current_node.nodes:
|
||||||
point = element(parent, 'li')
|
point = element(parent, 'li')
|
||||||
href = relpath(os.path.abspath(unquote(node.href)),
|
href = relpath(os.path.abspath(polyglot
|
||||||
|
.unquote(node.href)),
|
||||||
os.path.dirname(ref_url))
|
os.path.dirname(ref_url))
|
||||||
if isinstance(href, bytes):
|
if isinstance(href, bytes):
|
||||||
href = href.decode('utf-8')
|
href = href.decode('utf-8')
|
||||||
@@ -84,7 +85,6 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from ebook_converter.utils import zipfile
|
from ebook_converter.utils import zipfile
|
||||||
from templite import Templite
|
from templite import Templite
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
|
||||||
from ebook_converter.ebooks.html.meta import EasyMeta
|
from ebook_converter.ebooks.html.meta import EasyMeta
|
||||||
|
|
||||||
# read template files
|
# read template files
|
||||||
@@ -156,7 +156,7 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
with directory.CurrentDir(output_dir):
|
with directory.CurrentDir(output_dir):
|
||||||
for item in oeb_book.manifest:
|
for item in oeb_book.manifest:
|
||||||
path = os.path.abspath(unquote(item.href))
|
path = os.path.abspath(polyglot.unquote(item.href))
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
if not os.path.exists(dir):
|
if not os.path.exists(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
@@ -169,7 +169,7 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
item.unload_data_from_memory(memory=path)
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
path = os.path.abspath(unquote(item.href))
|
path = os.path.abspath(polyglot.unquote(item.href))
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
root = item.data.getroottree()
|
root = item.data.getroottree()
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from lxml import etree
|
|||||||
|
|
||||||
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
|
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
|
||||||
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
|
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
|
||||||
from ebook_converter.utils import directory
|
from ebook_converter.utils import directory
|
||||||
@@ -56,7 +56,7 @@ class OEBOutput(OutputFormatPlugin):
|
|||||||
not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr(
|
not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr(
|
||||||
item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name):
|
item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name):
|
||||||
condense_sheet(item.data)
|
condense_sheet(item.data)
|
||||||
path = os.path.abspath(unquote(item.href))
|
path = os.path.abspath(polyglot.unquote(item.href))
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
if not os.path.exists(dir):
|
if not os.path.exists(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
|
|
||||||
class PDFInput(InputFormatPlugin):
|
class PDFInput(InputFormatPlugin):
|
||||||
@@ -72,7 +67,8 @@ class PDFInput(InputFormatPlugin):
|
|||||||
ncxid = opf.manifest.id_for_path('toc.ncx')
|
ncxid = opf.manifest.id_for_path('toc.ncx')
|
||||||
if ncxid:
|
if ncxid:
|
||||||
with open('metadata.opf', 'r+b') as f:
|
with open('metadata.opf', 'r+b') as f:
|
||||||
raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
|
raw = f.read().replace(b'<spine', b'<spine toc="%s"' %
|
||||||
|
polyglot.as_bytes(ncxid))
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from lxml import etree
|
|||||||
|
|
||||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||||
from ebook_converter.customize.conversion import OptionRecommendation
|
from ebook_converter.customize.conversion import OptionRecommendation
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
border_style_map = {'single': 'solid',
|
border_style_map = {'single': 'solid',
|
||||||
@@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
result = transform(doc)
|
result = transform(doc)
|
||||||
html = u'index.xhtml'
|
html = u'index.xhtml'
|
||||||
with open(html, 'wb') as f:
|
with open(html, 'wb') as f:
|
||||||
res = as_bytes(transform.tostring(result))
|
res = polyglot.as_bytes(transform.tostring(result))
|
||||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
# clean multiple \n
|
# clean multiple \n
|
||||||
res = re.sub(b'\n+', b'\n', res)
|
res = re.sub(b'\n+', b'\n', res)
|
||||||
|
|||||||
@@ -1,22 +1,20 @@
|
|||||||
__license__ = 'GPL v3'
|
import io
|
||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
def base64_decode(raw):
|
def base64_decode(raw):
|
||||||
from io import BytesIO
|
|
||||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
|
||||||
|
|
||||||
# First try the python implementation as it is faster
|
# First try the python implementation as it is faster
|
||||||
try:
|
try:
|
||||||
return from_base64_bytes(raw)
|
return polyglot.from_base64_bytes(raw)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Try a more robust version (adapted from FBReader sources)
|
# Try a more robust version (adapted from FBReader sources)
|
||||||
A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=')
|
A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=')
|
||||||
raw = bytearray(raw)
|
raw = bytearray(raw)
|
||||||
out = BytesIO()
|
out = io.BytesIO()
|
||||||
pos = 0
|
pos = 0
|
||||||
while pos < len(raw):
|
while pos < len(raw):
|
||||||
tot = 0
|
tot = 0
|
||||||
@@ -32,7 +30,7 @@ def base64_decode(raw):
|
|||||||
elif zero <= byt <= nine:
|
elif zero <= byt <= nine:
|
||||||
num = byt - zero + 52
|
num = byt - zero + 52
|
||||||
else:
|
else:
|
||||||
num = {plus:62, slash:63, equal:64}.get(byt, None)
|
num = {plus: 62, slash: 63, equal: 64}.get(byt, None)
|
||||||
if num is None:
|
if num is None:
|
||||||
# Ignore this byte
|
# Ignore this byte
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from ebook_converter import constants as const
|
|||||||
from ebook_converter.constants_old import __appname__, __version__
|
from ebook_converter.constants_old import __appname__, __version__
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.utils import entities
|
from ebook_converter.utils import entities
|
||||||
from ebook_converter.utils.img import save_cover_data_to
|
from ebook_converter.utils.img import save_cover_data_to
|
||||||
from ebook_converter.utils.localization import lang_as_iso639_1
|
from ebook_converter.utils.localization import lang_as_iso639_1
|
||||||
@@ -355,10 +355,10 @@ class FB2MLizer(object):
|
|||||||
if item.media_type not in ('image/jpeg', 'image/png'):
|
if item.media_type not in ('image/jpeg', 'image/png'):
|
||||||
imdata = save_cover_data_to(item.data,
|
imdata = save_cover_data_to(item.data,
|
||||||
compression_quality=70)
|
compression_quality=70)
|
||||||
raw_data = as_base64_unicode(imdata)
|
raw_data = polyglot.as_base64_unicode(imdata)
|
||||||
content_type = 'image/jpeg'
|
content_type = 'image/jpeg'
|
||||||
else:
|
else:
|
||||||
raw_data = as_base64_unicode(item.data)
|
raw_data = polyglot.as_base64_unicode(item.data)
|
||||||
content_type = item.media_type
|
content_type = item.media_type
|
||||||
# Don't put the encoded image on a single line.
|
# Don't put the encoded image on a single line.
|
||||||
step = 72
|
step = 72
|
||||||
|
|||||||
@@ -14,26 +14,24 @@ from ebook_converter.ebooks.oeb import parse_utils
|
|||||||
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
||||||
from ebook_converter.utils import entities
|
from ebook_converter.utils import entities
|
||||||
from ebook_converter.utils.logging import default_log
|
from ebook_converter.utils.logging import default_log
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img',
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
'link', 'meta'}
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
|
|
||||||
|
|
||||||
|
|
||||||
class OEB2HTML(object):
|
class OEB2HTML(object):
|
||||||
'''
|
"""
|
||||||
Base class. All subclasses should implement dump_text to actually transform
|
Base class. All subclasses should implement dump_text to actually
|
||||||
content. Also, callers should use oeb2html to get the transformed html.
|
transform content. Also, callers should use oeb2html to get the
|
||||||
links and images can be retrieved after calling oeb2html to get the mapping
|
transformed html links and images can be retrieved after calling oeb2html
|
||||||
of OEB links and images to the new names used in the html returned by oeb2html.
|
to get the mapping of OEB links and images to the new names used in the
|
||||||
Images will always be referenced as if they are in an images directory.
|
html returned by oeb2html. Images will always be referenced as if they are
|
||||||
|
in an images directory.
|
||||||
|
|
||||||
Use get_css to get the CSS classes for the OEB document as a string.
|
Use get_css to get the CSS classes for the OEB document as a string.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, log=None):
|
def __init__(self, log=None):
|
||||||
self.log = default_log if log is None else log
|
self.log = default_log if log is None else log
|
||||||
@@ -55,16 +53,18 @@ class OEB2HTML(object):
|
|||||||
return self.mlize_spine(oeb_book)
|
return self.mlize_spine(oeb_book)
|
||||||
|
|
||||||
def mlize_spine(self, oeb_book):
|
def mlize_spine(self, oeb_book):
|
||||||
output = [
|
output = ['<html><head><meta http-equiv="Content-Type" '
|
||||||
u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (
|
'content="text/html;charset=utf-8" />'
|
||||||
entities.prepare_string_for_xml(self.book_title))
|
'<title>%s</title></head>'
|
||||||
]
|
'<body>' % entities.prepare_string_for_xml(self.book_title)]
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
self.log.debug('Converting %s to HTML...' % item.href)
|
self.log.debug('Converting %s to HTML...' % item.href)
|
||||||
self.rewrite_ids(item.data, item)
|
self.rewrite_ids(item.data, item)
|
||||||
base.rewrite_links(item.data, partial(self.rewrite_link, page=item))
|
base.rewrite_links(item.data, partial(self.rewrite_link,
|
||||||
|
page=item))
|
||||||
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||||
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item)
|
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
|
||||||
|
stylizer, item)
|
||||||
output.append('\n\n')
|
output.append('\n\n')
|
||||||
output.append('</body></html>')
|
output.append('</body></html>')
|
||||||
return ''.join(output)
|
return ''.join(output)
|
||||||
@@ -126,13 +126,14 @@ class OEB2HTML(object):
|
|||||||
el.attrib['id'] = self.get_link_id(page.href)[1:]
|
el.attrib['id'] = self.get_link_id(page.href)[1:]
|
||||||
continue
|
continue
|
||||||
if 'id' in el.attrib:
|
if 'id' in el.attrib:
|
||||||
el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
|
el.attrib['id'] = self.get_link_id(page.href,
|
||||||
|
el.attrib['id'])[1:]
|
||||||
|
|
||||||
def get_css(self, oeb_book):
|
def get_css(self, oeb_book):
|
||||||
css = b''
|
css = b''
|
||||||
for item in oeb_book.manifest:
|
for item in oeb_book.manifest:
|
||||||
if item.media_type == 'text/css':
|
if item.media_type == 'text/css':
|
||||||
css += as_bytes(item.data.cssText) + b'\n\n'
|
css += polyglot.as_bytes(item.data.cssText) + b'\n\n'
|
||||||
return css
|
return css
|
||||||
|
|
||||||
def prepare_string_for_html(self, raw):
|
def prepare_string_for_html(self, raw):
|
||||||
@@ -157,10 +158,14 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
|||||||
|
|
||||||
# We can only processes tags. If there isn't a tag return any text.
|
# We can only processes tags. If there isn't a tag return any text.
|
||||||
if not isinstance(elem.tag, (str, bytes)) \
|
if not isinstance(elem.tag, (str, bytes)) \
|
||||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
|
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
|
||||||
|
const.SVG_NS):
|
||||||
p = elem.getparent()
|
p = elem.getparent()
|
||||||
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
|
if (p is not None and
|
||||||
and elem.tail:
|
isinstance(p.tag, (str, bytes)) and
|
||||||
|
parse_utils.namespace(p.tag) in (const.XHTML_NS,
|
||||||
|
const.SVG_NS) and
|
||||||
|
elem.tail):
|
||||||
return [elem.tail]
|
return [elem.tail]
|
||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
@@ -176,8 +181,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
|||||||
tags.append(tag)
|
tags.append(tag)
|
||||||
|
|
||||||
# Ignore anything that is set to not be displayed.
|
# Ignore anything that is set to not be displayed.
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if (style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or
|
||||||
or style['visibility'] == 'hidden':
|
style['visibility'] == 'hidden'):
|
||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
# Remove attributes we won't want.
|
# Remove attributes we won't want.
|
||||||
@@ -186,11 +191,13 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
|||||||
if 'style' in attribs:
|
if 'style' in attribs:
|
||||||
del attribs['style']
|
del attribs['style']
|
||||||
|
|
||||||
# Turn the rest of the attributes into a string we can write with the tag.
|
# Turn the rest of the attributes into a string we can write with the
|
||||||
|
# tag.
|
||||||
at = ''
|
at = ''
|
||||||
for k, v in attribs.items():
|
for key, value in attribs.items():
|
||||||
at += ' %s="%s"' % (k, entities
|
at += (' %s="%s"' %
|
||||||
.prepare_string_for_xml(v, attribute=True))
|
(key, entities.prepare_string_for_xml(value,
|
||||||
|
attribute=True)))
|
||||||
|
|
||||||
# Write the tag.
|
# Write the tag.
|
||||||
text.append('<%s%s' % (tag, at))
|
text.append('<%s%s' % (tag, at))
|
||||||
@@ -246,11 +253,15 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
# We can only processes tags. If there isn't a tag return any text.
|
# We can only processes tags. If there isn't a tag return any text.
|
||||||
if not isinstance(elem.tag, (str, bytes)) \
|
if (not isinstance(elem.tag, (str, bytes)) or
|
||||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
|
parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
|
||||||
|
const.SVG_NS)):
|
||||||
p = elem.getparent()
|
p = elem.getparent()
|
||||||
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
|
if (p is not None and
|
||||||
and elem.tail:
|
isinstance(p.tag, (str, bytes)) and
|
||||||
|
parse_utils.namespace(p.tag) in (const.XHTML_NS,
|
||||||
|
const.SVG_NS) and
|
||||||
|
elem.tail):
|
||||||
return [elem.tail]
|
return [elem.tail]
|
||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
@@ -266,9 +277,11 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
|||||||
if tag == 'body':
|
if tag == 'body':
|
||||||
# Change the body to a div so we can merge multiple files.
|
# Change the body to a div so we can merge multiple files.
|
||||||
tag = 'div'
|
tag = 'div'
|
||||||
# Add page-break-brefore: always because renders typically treat a new file (we're merging files)
|
# Add page-break-brefore: always because renders typically treat
|
||||||
# as a page break and remove all other page break types that might be set.
|
# a new file (we're merging files) as a page break and remove all
|
||||||
style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a)
|
# other page break types that might be set.
|
||||||
|
style_a = ('page-break-before: always; %s' %
|
||||||
|
re.sub('page-break-[^:]+:[^;]+;?', '', style_a))
|
||||||
# Remove unnecessary spaces.
|
# Remove unnecessary spaces.
|
||||||
style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
|
style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
|
||||||
tags.append(tag)
|
tags.append(tag)
|
||||||
@@ -279,7 +292,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
|||||||
if 'style' in attribs:
|
if 'style' in attribs:
|
||||||
del attribs['style']
|
del attribs['style']
|
||||||
|
|
||||||
# Turn the rest of the attributes into a string we can write with the tag.
|
# Turn the rest of the attributes into a string we can write with
|
||||||
|
# the tag.
|
||||||
at = ''
|
at = ''
|
||||||
for k, v in attribs.items():
|
for k, v in attribs.items():
|
||||||
at += ' %s="%s"' % (k, entities
|
at += ' %s="%s"' % (k, entities
|
||||||
@@ -319,43 +333,51 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
|||||||
|
|
||||||
|
|
||||||
class OEB2HTMLClassCSSizer(OEB2HTML):
|
class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||||
'''
|
"""
|
||||||
Use CSS classes. css_style option can specify whether to use
|
Use CSS classes. css_style option can specify whether to use inline
|
||||||
inline classes (style tag in the head) or reference an external
|
classes (style tag in the head) or reference an external CSS file called
|
||||||
CSS file called style.css.
|
style.css.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def mlize_spine(self, oeb_book):
|
def mlize_spine(self, oeb_book):
|
||||||
output = []
|
output = []
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
self.log.debug('Converting %s to HTML...' % item.href)
|
self.log.debug('Converting %s to HTML...' % item.href)
|
||||||
self.rewrite_ids(item.data, item)
|
self.rewrite_ids(item.data, item)
|
||||||
base.rewrite_links(item.data, partial(self.rewrite_link, page=item))
|
base.rewrite_links(item.data, partial(self.rewrite_link,
|
||||||
|
page=item))
|
||||||
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||||
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item)
|
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
|
||||||
|
stylizer, item)
|
||||||
output.append('\n\n')
|
output.append('\n\n')
|
||||||
if self.opts.htmlz_class_style == 'external':
|
if self.opts.htmlz_class_style == 'external':
|
||||||
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
|
css = '<link href="style.css" rel="stylesheet" type="text/css" />'
|
||||||
else:
|
else:
|
||||||
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
|
css = ('<style type="text/css">' + self.get_css(oeb_book) +
|
||||||
title = (u'<title>%s</title>' %
|
'</style>')
|
||||||
|
title = ('<title>%s</title>' %
|
||||||
entities.prepare_string_for_xml(self.book_title))
|
entities.prepare_string_for_xml(self.book_title))
|
||||||
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
|
output = (['<html><head><meta http-equiv="Content-Type" '
|
||||||
[css] + [title, u'</head><body>'] + output + [u'</body></html>']
|
'content="text/html;charset=utf-8" />'] + [css] +
|
||||||
|
[title, '</head><body>'] + output + ['</body></html>'])
|
||||||
return ''.join(output)
|
return ''.join(output)
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, page):
|
def dump_text(self, elem, stylizer, page):
|
||||||
'''
|
"""
|
||||||
@elem: The element in the etree that we are working on.
|
@elem: The element in the etree that we are working on.
|
||||||
@stylizer: The style information attached to the element.
|
@stylizer: The style information attached to the element.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
# We can only processes tags. If there isn't a tag return any text.
|
# We can only processes tags. If there isn't a tag return any text.
|
||||||
if not isinstance(elem.tag, (str, bytes)) \
|
if (not isinstance(elem.tag, (str, bytes)) or
|
||||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
|
parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
|
||||||
|
const.SVG_NS)):
|
||||||
p = elem.getparent()
|
p = elem.getparent()
|
||||||
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
|
if (p is not None and
|
||||||
and elem.tail:
|
isinstance(p.tag, (str, bytes)) and
|
||||||
|
parse_utils.namespace(p.tag) in (const.XHTML_NS,
|
||||||
|
const.SVG_NS) and
|
||||||
|
elem.tail):
|
||||||
return [elem.tail]
|
return [elem.tail]
|
||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
@@ -373,11 +395,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
|||||||
if 'style' in attribs:
|
if 'style' in attribs:
|
||||||
del attribs['style']
|
del attribs['style']
|
||||||
|
|
||||||
# Turn the rest of the attributes into a string we can write with the tag.
|
# Turn the rest of the attributes into a string we can write with
|
||||||
|
# the tag.
|
||||||
at = ''
|
at = ''
|
||||||
for k, v in attribs.items():
|
for k, v in attribs.items():
|
||||||
at += ' %s="%s"' % (k,
|
at += ' %s="%s"' % (k, entities
|
||||||
entities.prepare_string_for_xml(v, attribute=True))
|
.prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
# Write the tag.
|
# Write the tag.
|
||||||
text.append('<%s%s' % (tag, at))
|
text.append('<%s%s' % (tag, at))
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import textwrap
|
|||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
class Canvas(etree.XSLTExtension):
|
class Canvas(etree.XSLTExtension):
|
||||||
@@ -292,7 +292,7 @@ class Styles(etree.XSLTExtension):
|
|||||||
return '\n\t'.join(ans)
|
return '\n\t'.join(ans)
|
||||||
|
|
||||||
with open(name, 'wb') as f:
|
with open(name, 'wb') as f:
|
||||||
f.write(as_bytes(self.CSS))
|
f.write(polyglot.as_bytes(self.CSS))
|
||||||
for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
|
for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
|
||||||
'bs')]:
|
'bs')]:
|
||||||
for i, s in enumerate(w):
|
for i, s in enumerate(w):
|
||||||
@@ -300,7 +300,7 @@ class Styles(etree.XSLTExtension):
|
|||||||
continue
|
continue
|
||||||
rsel = '.%s%d'%(sel, i)
|
rsel = '.%s%d'%(sel, i)
|
||||||
s = join(s)
|
s = join(s)
|
||||||
f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
|
f.write(polyglot.as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
|
||||||
|
|
||||||
def execute(self, context, self_node, input_node, output_parent):
|
def execute(self, context, self_node, input_node, output_parent):
|
||||||
if input_node.tag == 'TextStyle':
|
if input_node.tag == 'TextStyle':
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import sys
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.utils.config_base import tweaks
|
from ebook_converter.utils.config_base import tweaks
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.utils import encoding as uenc
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
@@ -248,9 +248,11 @@ class Resource(object):
|
|||||||
pc = url[2]
|
pc = url[2]
|
||||||
if isinstance(pc, str):
|
if isinstance(pc, str):
|
||||||
pc = pc.encode('utf-8')
|
pc = pc.encode('utf-8')
|
||||||
pc = unquote(pc).decode('utf-8')
|
pc = polyglot.unquote(pc).decode('utf-8')
|
||||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
self.path = os.path.abspath(os.path.join(basedir,
|
||||||
self.fragment = unquote(url[-1])
|
pc.replace('/',
|
||||||
|
os.sep)))
|
||||||
|
self.fragment = polyglot.unquote(url[-1])
|
||||||
|
|
||||||
def href(self, basedir=None):
|
def href(self, basedir=None):
|
||||||
'''
|
'''
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from ebook_converter.utils.img import save_cover_data_to
|
|||||||
from ebook_converter.utils.imghdr import identify
|
from ebook_converter.utils.imghdr import identify
|
||||||
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
|
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.utils import encoding as uenc
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
@@ -389,7 +389,7 @@ def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):
|
|||||||
|
|
||||||
def _encode_into_jpeg(data):
|
def _encode_into_jpeg(data):
|
||||||
data = save_cover_data_to(data)
|
data = save_cover_data_to(data)
|
||||||
return as_base64_unicode(data)
|
return polyglot.as_base64_unicode(data)
|
||||||
|
|
||||||
|
|
||||||
def _set_cover(title_info, mi, ctx):
|
def _set_cover(title_info, mi, ctx):
|
||||||
|
|||||||
@@ -30,11 +30,11 @@ from ebook_converter.ebooks.metadata.utils import parse_opf, \
|
|||||||
from ebook_converter.ebooks.metadata import string_to_authors, \
|
from ebook_converter.ebooks.metadata import string_to_authors, \
|
||||||
MetaInformation, check_isbn
|
MetaInformation, check_isbn
|
||||||
from ebook_converter.ebooks.metadata.book.base import Metadata
|
from ebook_converter.ebooks.metadata.book.base import Metadata
|
||||||
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.utils.date import parse_date, isoformat
|
from ebook_converter.utils.date import parse_date, isoformat
|
||||||
from ebook_converter.utils.localization import get_lang, canonicalize_lang
|
from ebook_converter.utils.localization import get_lang, canonicalize_lang
|
||||||
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||||
from ebook_converter.utils.config_base import tweaks
|
from ebook_converter.utils.config_base import tweaks
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
|
||||||
|
|
||||||
|
|
||||||
pretty_print_opf = False
|
pretty_print_opf = False
|
||||||
@@ -838,7 +838,7 @@ class OPF(object): # {{{
|
|||||||
|
|
||||||
def unquote_urls(self):
|
def unquote_urls(self):
|
||||||
def get_href(item):
|
def get_href(item):
|
||||||
raw = unquote(item.get('href', ''))
|
raw = polyglot.unquote(item.get('href', ''))
|
||||||
if not isinstance(raw, str):
|
if not isinstance(raw, str):
|
||||||
raw = raw.decode('utf-8')
|
raw = raw.decode('utf-8')
|
||||||
return raw
|
return raw
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from lxml.builder import ElementMaker
|
|||||||
from ebook_converter.constants_old import __appname__, __version__
|
from ebook_converter.constants_old import __appname__, __version__
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
||||||
@@ -31,7 +31,7 @@ def parse_html_toc(data):
|
|||||||
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
|
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
|
||||||
sanitize_names=True)
|
sanitize_names=True)
|
||||||
for a in root.xpath('//*[@href and local-name()="a"]'):
|
for a in root.xpath('//*[@href and local-name()="a"]'):
|
||||||
purl = urllib.parse.urlparse(unquote(a.get('href')))
|
purl = urllib.parse.urlparse(polyglot.unquote(a.get('href')))
|
||||||
href, fragment = purl[2], purl[5]
|
href, fragment = purl[2], purl[5]
|
||||||
if not fragment:
|
if not fragment:
|
||||||
fragment = None
|
fragment = None
|
||||||
@@ -149,7 +149,7 @@ class TOC(list):
|
|||||||
|
|
||||||
if toc is not None:
|
if toc is not None:
|
||||||
if toc.lower() not in ('ncx', 'ncxtoc'):
|
if toc.lower() not in ('ncx', 'ncxtoc'):
|
||||||
toc = urllib.parse.urlparse(unquote(toc))[2]
|
toc = urllib.parse.urlparse(polyglot.unquote(toc))[2]
|
||||||
toc = toc.replace('/', os.sep)
|
toc = toc.replace('/', os.sep)
|
||||||
if not os.path.isabs(toc):
|
if not os.path.isabs(toc):
|
||||||
toc = os.path.join(self.base_path, toc)
|
toc = os.path.join(self.base_path, toc)
|
||||||
@@ -219,7 +219,8 @@ class TOC(list):
|
|||||||
content = content[0]
|
content = content[0]
|
||||||
# if get_attr(content, attr='src'):
|
# if get_attr(content, attr='src'):
|
||||||
purl = urllib.parse.urlparse(content.get('src'))
|
purl = urllib.parse.urlparse(content.get('src'))
|
||||||
href, fragment = unquote(purl[2]), unquote(purl[5])
|
href = polyglot.unquote(purl[2])
|
||||||
|
fragment = polyglot.unquote(purl[5])
|
||||||
nd = dest.add_item(href, fragment, text)
|
nd = dest.add_item(href, fragment, text)
|
||||||
nd.play_order = play_order
|
nd.play_order = play_order
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from io import BytesIO
|
|||||||
from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
|
from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
|
||||||
from ebook_converter.utils.imghdr import what
|
from ebook_converter.utils.imghdr import what
|
||||||
from ebook_converter.ebooks import normalize
|
from ebook_converter.ebooks import normalize
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.tinycss.color3 import parse_color_string
|
from ebook_converter.tinycss.color3 import parse_color_string
|
||||||
|
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@ def decode_hex_number(raw, codec='utf-8'):
|
|||||||
|
|
||||||
|
|
||||||
def encode_string(raw):
|
def encode_string(raw):
|
||||||
ans = bytearray(as_bytes(raw))
|
ans = bytearray(polyglot.as_bytes(raw))
|
||||||
ans.insert(0, len(ans))
|
ans.insert(0, len(ans))
|
||||||
return bytes(ans)
|
return bytes(ans)
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
|
|||||||
|
|
||||||
from ebook_converter.utils import directory
|
from ebook_converter.utils import directory
|
||||||
from ebook_converter.ebooks.oeb.base import _css_logger
|
from ebook_converter.ebooks.oeb.base import _css_logger
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
class Extract(ODF2XHTML):
|
class Extract(ODF2XHTML):
|
||||||
@@ -292,7 +292,7 @@ class Extract(ODF2XHTML):
|
|||||||
except:
|
except:
|
||||||
log.exception('Failed to filter CSS, conversion may be slow')
|
log.exception('Failed to filter CSS, conversion may be slow')
|
||||||
with open('index.xhtml', 'wb') as f:
|
with open('index.xhtml', 'wb') as f:
|
||||||
f.write(as_bytes(html))
|
f.write(polyglot.as_bytes(html))
|
||||||
zf = ZipFile(stream, 'r')
|
zf = ZipFile(stream, 'r')
|
||||||
self.extract_pictures(zf)
|
self.extract_pictures(zf)
|
||||||
opf = OPFCreator(os.path.abspath(os.getcwd()), mi)
|
opf = OPFCreator(os.path.abspath(os.getcwd()), mi)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ from ebook_converter.utils.localization import get_lang
|
|||||||
from ebook_converter.ptempfile import TemporaryDirectory
|
from ebook_converter.ptempfile import TemporaryDirectory
|
||||||
from ebook_converter.constants_old import __appname__, __version__
|
from ebook_converter.constants_old import __appname__, __version__
|
||||||
from ebook_converter.utils import entities
|
from ebook_converter.utils import entities
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
class OEBReader(object):
|
class OEBReader(object):
|
||||||
@@ -641,7 +641,7 @@ class OEBReader(object):
|
|||||||
with TemporaryDirectory('_html_cover') as tdir:
|
with TemporaryDirectory('_html_cover') as tdir:
|
||||||
writer = OEBWriter()
|
writer = OEBWriter()
|
||||||
writer(self.oeb, tdir)
|
writer(self.oeb, tdir)
|
||||||
path = os.path.join(tdir, unquote(hcover.href))
|
path = os.path.join(tdir, polyglot.unquote(hcover.href))
|
||||||
data = render_html_svg_workaround(path, self.logger)
|
data = render_html_svg_workaround(path, self.logger)
|
||||||
if not data:
|
if not data:
|
||||||
data = b''
|
data = b''
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import urllib.parse
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter.utils.imghdr import identify
|
from ebook_converter.utils.imghdr import identify
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
class CoverManager(object):
|
class CoverManager(object):
|
||||||
@@ -113,7 +113,7 @@ class CoverManager(object):
|
|||||||
if href is not None:
|
if href is not None:
|
||||||
templ = self.non_svg_template if self.no_svg_cover \
|
templ = self.non_svg_template if self.no_svg_cover \
|
||||||
else self.svg_template
|
else self.svg_template
|
||||||
tp = templ % unquote(href)
|
tp = templ % polyglot.unquote(href)
|
||||||
id, href = m.generate('titlepage', 'titlepage.xhtml')
|
id, href = m.generate('titlepage', 'titlepage.xhtml')
|
||||||
item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0],
|
item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0],
|
||||||
data=etree.fromstring(tp))
|
data=etree.fromstring(tp))
|
||||||
|
|||||||
@@ -3,8 +3,7 @@ import re
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import XPath
|
from ebook_converter.ebooks.oeb.base import XPath
|
||||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
|
||||||
|
|
||||||
|
|
||||||
class DataURL(object):
|
class DataURL(object):
|
||||||
@@ -27,14 +26,14 @@ class DataURL(object):
|
|||||||
if ';base64' in header:
|
if ';base64' in header:
|
||||||
data = re.sub(r'\s+', '', data)
|
data = re.sub(r'\s+', '', data)
|
||||||
try:
|
try:
|
||||||
data = from_base64_bytes(data)
|
data = polyglot.from_base64_bytes(data)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.error('Found invalid base64 encoded data '
|
self.log.error('Found invalid base64 encoded data '
|
||||||
'URI, ignoring it')
|
'URI, ignoring it')
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
data = urllib.parse.unquote(data)
|
data = urllib.parse.unquote(data)
|
||||||
data = as_bytes(data)
|
data = polyglot.as_bytes(data)
|
||||||
fmt = what(None, data)
|
fmt = what(None, data)
|
||||||
if not fmt:
|
if not fmt:
|
||||||
self.log.warn('Image encoded as data URL has unknown '
|
self.log.warn('Image encoded as data URL has unknown '
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from ebook_converter import constants as const
|
|||||||
from ebook_converter.ebooks.epub import rules
|
from ebook_converter.ebooks.epub import rules
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb.polish.split import do_split
|
from ebook_converter.ebooks.oeb.polish.split import do_split
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter import polyglot
|
||||||
from ebook_converter.css_selectors import Select, SelectorError
|
from ebook_converter.css_selectors import Select, SelectorError
|
||||||
from ebook_converter.utils import encoding as uenc
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
@@ -189,7 +189,7 @@ class Split(object):
|
|||||||
nhref = anchor_map[frag if frag else None]
|
nhref = anchor_map[frag if frag else None]
|
||||||
nhref = self.current_item.relhref(nhref)
|
nhref = self.current_item.relhref(nhref)
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join((unquote(nhref), frag))
|
nhref = '#'.join((polyglot.unquote(nhref), frag))
|
||||||
|
|
||||||
return nhref
|
return nhref
|
||||||
return url
|
return url
|
||||||
|
|||||||
@@ -1,20 +1,18 @@
|
|||||||
import codecs, zlib, numbers
|
import codecs
|
||||||
from io import BytesIO
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import io
|
||||||
|
import numbers
|
||||||
|
import zlib
|
||||||
|
|
||||||
from ebook_converter.utils.logging import default_log
|
from ebook_converter.utils.logging import default_log
|
||||||
from ebook_converter.polyglot.binary import as_hex_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
pdf_float = lambda x: f"{x:.1f}"
|
pdf_float = lambda x: f"{x:.1f}"
|
||||||
|
|
||||||
EOL = b'\n'
|
EOL = b'\n'
|
||||||
|
|
||||||
# Sizes {{{
|
# Sizes
|
||||||
inch = 72.0
|
inch = 72.0
|
||||||
cm = inch / 2.54
|
cm = inch / 2.54
|
||||||
mm = cm * 0.1
|
mm = cm * 0.1
|
||||||
@@ -45,10 +43,9 @@ B2 = (_BW*2, _BH*2)
|
|||||||
B1 = (_BH*4, _BW*2)
|
B1 = (_BH*4, _BW*2)
|
||||||
B0 = (_BW*4, _BH*4)
|
B0 = (_BW*4, _BH*4)
|
||||||
|
|
||||||
PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
|
PAPER_SIZES = {k: globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 '
|
||||||
' b3 b4 b5 b6 letter legal').split()}
|
'b2 b3 b4 b5 b6 letter '
|
||||||
|
'legal').split()}
|
||||||
# }}}
|
|
||||||
|
|
||||||
|
|
||||||
def fmtnum(o):
|
def fmtnum(o):
|
||||||
@@ -70,12 +67,12 @@ def serialize(o, stream):
|
|||||||
elif o is None:
|
elif o is None:
|
||||||
stream.write_raw(b'null')
|
stream.write_raw(b'null')
|
||||||
elif isinstance(o, datetime):
|
elif isinstance(o, datetime):
|
||||||
val = o.strftime("D:%Y%m%d%H%M%%02d%z")%min(59, o.second)
|
val = o.strftime("D:%Y%m%d%H%M%%02d%z") % min(59, o.second)
|
||||||
if datetime.tzinfo is not None:
|
if datetime.tzinfo is not None:
|
||||||
val = "(%s'%s')"%(val[:-2], val[-2:])
|
val = "(%s'%s')" % (val[:-2], val[-2:])
|
||||||
stream.write(val.encode('ascii'))
|
stream.write(val.encode('ascii'))
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unknown object: %r'%o)
|
raise ValueError('Unknown object: %r' % o)
|
||||||
|
|
||||||
|
|
||||||
class Name(str):
|
class Name(str):
|
||||||
@@ -83,7 +80,7 @@ class Name(str):
|
|||||||
def pdf_serialize(self, stream):
|
def pdf_serialize(self, stream):
|
||||||
raw = self.encode('ascii')
|
raw = self.encode('ascii')
|
||||||
if len(raw) > 126:
|
if len(raw) > 126:
|
||||||
raise ValueError('Name too long: %r'%self)
|
raise ValueError('Name too long: %r' % self)
|
||||||
raw = bytearray(raw)
|
raw = bytearray(raw)
|
||||||
sharp = ord(b'#')
|
sharp = ord(b'#')
|
||||||
buf = (
|
buf = (
|
||||||
@@ -96,7 +93,8 @@ def escape_pdf_string(bytestring):
|
|||||||
indices = []
|
indices = []
|
||||||
bad = []
|
bad = []
|
||||||
ba = bytearray(bytestring)
|
ba = bytearray(bytestring)
|
||||||
bad_map = {10:ord('n'), 13:ord('r'), 12:ord('f'), 8:ord('b'), 9:ord('\t'), 92:ord('\\')}
|
bad_map = {10: ord('n'), 13: ord('r'), 12: ord('f'),
|
||||||
|
8: ord('b'), 9: ord('\t'), 92: ord('\\')}
|
||||||
for i, num in enumerate(ba):
|
for i, num in enumerate(ba):
|
||||||
if num == 40: # (
|
if num == 40: # (
|
||||||
indices.append((i, 40))
|
indices.append((i, 40))
|
||||||
@@ -134,7 +132,7 @@ class UTF16String(str):
|
|||||||
if False:
|
if False:
|
||||||
# Disabled as the parentheses based strings give easier to debug
|
# Disabled as the parentheses based strings give easier to debug
|
||||||
# PDF files
|
# PDF files
|
||||||
stream.write(b'<' + as_hex_bytes(raw) + b'>')
|
stream.write(b'<' + polyglot.as_hex_bytes(raw) + b'>')
|
||||||
else:
|
else:
|
||||||
stream.write(b'('+escape_pdf_string(raw)+b')')
|
stream.write(b'('+escape_pdf_string(raw)+b')')
|
||||||
|
|
||||||
@@ -143,9 +141,9 @@ class Dictionary(dict):
|
|||||||
|
|
||||||
def pdf_serialize(self, stream):
|
def pdf_serialize(self, stream):
|
||||||
stream.write(b'<<' + EOL)
|
stream.write(b'<<' + EOL)
|
||||||
sorted_keys = sorted(self,
|
sorted_keys = sorted(self, key=lambda x: ({'Type': '1',
|
||||||
key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
|
'Subtype': '2'}
|
||||||
x, x)+x))
|
.get(x, x) + x))
|
||||||
for k in sorted_keys:
|
for k in sorted_keys:
|
||||||
serialize(Name(k), stream)
|
serialize(Name(k), stream)
|
||||||
stream.write(b' ')
|
stream.write(b' ')
|
||||||
@@ -177,10 +175,10 @@ class Array(list):
|
|||||||
stream.write(b']')
|
stream.write(b']')
|
||||||
|
|
||||||
|
|
||||||
class Stream(BytesIO):
|
class Stream(io.BytesIO):
|
||||||
|
|
||||||
def __init__(self, compress=False):
|
def __init__(self, compress=False):
|
||||||
BytesIO.__init__(self)
|
io.BytesIO.__init__(self)
|
||||||
self.compress = compress
|
self.compress = compress
|
||||||
self.filters = Array()
|
self.filters = Array()
|
||||||
|
|
||||||
@@ -213,7 +211,7 @@ class Stream(BytesIO):
|
|||||||
raw.encode('ascii'))
|
raw.encode('ascii'))
|
||||||
|
|
||||||
def write_raw(self, raw):
|
def write_raw(self, raw):
|
||||||
BytesIO.write(self, raw)
|
io.BytesIO.write(self, raw)
|
||||||
|
|
||||||
|
|
||||||
class Reference(object):
|
class Reference(object):
|
||||||
@@ -222,11 +220,11 @@ class Reference(object):
|
|||||||
self.num, self.obj = num, obj
|
self.num, self.obj = num, obj
|
||||||
|
|
||||||
def pdf_serialize(self, stream):
|
def pdf_serialize(self, stream):
|
||||||
raw = '%d 0 R'%self.num
|
raw = '%d 0 R' % self.num
|
||||||
stream.write(raw.encode('ascii'))
|
stream.write(raw.encode('ascii'))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '%d 0 R'%self.num
|
return '%d 0 R' % self.num
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr(self)
|
return repr(self)
|
||||||
|
|||||||
59
ebook_converter/polyglot.py
Normal file
59
ebook_converter/polyglot.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"""
|
||||||
|
Misc converting functions from polyglot module.
|
||||||
|
Most of the have something to do with converting between string and binary
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
|
||||||
|
def as_base64_unicode(x, enc='utf-8'):
|
||||||
|
if isinstance(x, str):
|
||||||
|
x = x.encode(enc)
|
||||||
|
return base64.standard_b64encode(x).decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def from_base64_bytes(x):
|
||||||
|
if isinstance(x, str):
|
||||||
|
x = x.encode('ascii')
|
||||||
|
return base64.standard_b64decode(x)
|
||||||
|
|
||||||
|
|
||||||
|
def as_hex_bytes(x, enc='utf-8'):
|
||||||
|
if isinstance(x, str):
|
||||||
|
x = x.encode(enc)
|
||||||
|
return binascii.hexlify(x)
|
||||||
|
|
||||||
|
|
||||||
|
def from_hex_bytes(x):
|
||||||
|
if isinstance(x, str):
|
||||||
|
x = x.encode('ascii')
|
||||||
|
return binascii.unhexlify(x)
|
||||||
|
|
||||||
|
|
||||||
|
def as_bytes(x, encoding='utf-8'):
|
||||||
|
if isinstance(x, str):
|
||||||
|
return x.encode(encoding)
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
return x
|
||||||
|
if isinstance(x, bytearray):
|
||||||
|
return bytes(x)
|
||||||
|
if isinstance(x, memoryview):
|
||||||
|
return x.tobytes()
|
||||||
|
return str(x).encode(encoding)
|
||||||
|
|
||||||
|
|
||||||
|
def unquote(x, encoding='utf-8', errors='replace'):
|
||||||
|
# TODO(gryf): this works like that: if x is a binary, convert it to
|
||||||
|
# string using encoding and make unquote. After that make it binary again.
|
||||||
|
# If x is string, just pass it to the unquote.
|
||||||
|
# This approach is mostly used within lxml etree strings, which suppose to
|
||||||
|
# be binary because of its inner representation. I'm wondering, if
|
||||||
|
# xml.etree could be used instead - to be checked.
|
||||||
|
binary = isinstance(x, bytes)
|
||||||
|
if binary:
|
||||||
|
x = x.decode(encoding, errors)
|
||||||
|
ans = urllib.parse.unquote(x, encoding, errors)
|
||||||
|
if binary:
|
||||||
|
ans = ans.encode(encoding, errors)
|
||||||
|
return ans
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
from base64 import standard_b64decode, standard_b64encode
|
|
||||||
from binascii import hexlify, unhexlify
|
|
||||||
|
|
||||||
|
|
||||||
def as_base64_unicode(x, enc='utf-8'):
|
|
||||||
if isinstance(x, str):
|
|
||||||
x = x.encode(enc)
|
|
||||||
return standard_b64encode(x).decode('ascii')
|
|
||||||
|
|
||||||
|
|
||||||
def from_base64_bytes(x):
|
|
||||||
if isinstance(x, str):
|
|
||||||
x = x.encode('ascii')
|
|
||||||
return standard_b64decode(x)
|
|
||||||
|
|
||||||
|
|
||||||
def as_hex_bytes(x, enc='utf-8'):
|
|
||||||
if isinstance(x, str):
|
|
||||||
x = x.encode(enc)
|
|
||||||
return hexlify(x)
|
|
||||||
|
|
||||||
|
|
||||||
def from_hex_bytes(x):
|
|
||||||
if isinstance(x, str):
|
|
||||||
x = x.encode('ascii')
|
|
||||||
return unhexlify(x)
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
def as_bytes(x, encoding='utf-8'):
|
|
||||||
if isinstance(x, str):
|
|
||||||
return x.encode(encoding)
|
|
||||||
if isinstance(x, bytes):
|
|
||||||
return x
|
|
||||||
if isinstance(x, bytearray):
|
|
||||||
return bytes(x)
|
|
||||||
if isinstance(x, memoryview):
|
|
||||||
return x.tobytes()
|
|
||||||
return str(x).encode(encoding)
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
import urllib.parse
|
|
||||||
|
|
||||||
|
|
||||||
def unquote(x, encoding='utf-8', errors='replace'):
|
|
||||||
# TODO(gryf): this works like that: if x is a binary, convert it to
|
|
||||||
# string using encoding and make unquote. After that make it binary again.
|
|
||||||
# If x is string, just pass it to the unquote.
|
|
||||||
# This approach is mostly used within lxml etree strings, which suppose to
|
|
||||||
# be binary because of its inner representation. I'm wondering, if
|
|
||||||
# xml.etree could be used instead - to be checked.
|
|
||||||
binary = isinstance(x, bytes)
|
|
||||||
if binary:
|
|
||||||
x = x.decode(encoding, errors)
|
|
||||||
ans = urllib.parse.unquote(x, encoding, errors)
|
|
||||||
if binary:
|
|
||||||
ans = ans.encode(encoding, errors)
|
|
||||||
return ans
|
|
||||||
@@ -8,6 +8,7 @@ import tempfile
|
|||||||
|
|
||||||
from ebook_converter.constants_old import __version__, __appname__, \
|
from ebook_converter.constants_old import __version__, __appname__, \
|
||||||
filesystem_encoding
|
filesystem_encoding
|
||||||
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
def cleanup(path):
|
def cleanup(path):
|
||||||
@@ -90,9 +91,8 @@ def base_dir():
|
|||||||
td = os.environ.get('CALIBRE_WORKER_TEMP_DIR', None)
|
td = os.environ.get('CALIBRE_WORKER_TEMP_DIR', None)
|
||||||
if td is not None:
|
if td is not None:
|
||||||
from ebook_converter.utils.serialize import msgpack_loads
|
from ebook_converter.utils.serialize import msgpack_loads
|
||||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
|
||||||
try:
|
try:
|
||||||
td = msgpack_loads(from_hex_bytes(td))
|
td = msgpack_loads(polyglot.from_hex_bytes(td))
|
||||||
except Exception:
|
except Exception:
|
||||||
td = None
|
td = None
|
||||||
if td and os.path.exists(td):
|
if td and os.path.exists(td):
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
import operator
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['decode'] # Everything else is implementation detail
|
__all__ = ['decode'] # Everything else is implementation detail
|
||||||
@@ -94,7 +94,8 @@ def try_encoding(css_bytes, encoding, fallback=True):
|
|||||||
|
|
||||||
|
|
||||||
def hex2re(hex_data):
|
def hex2re(hex_data):
|
||||||
return re.escape(from_hex_bytes(hex_data.replace(' ', '').encode('ascii')))
|
return re.escape(polyglot.from_hex_bytes(hex_data.replace(' ', '')
|
||||||
|
.encode('ascii')))
|
||||||
|
|
||||||
|
|
||||||
class Slicer(object):
|
class Slicer(object):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import struct
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -38,7 +38,7 @@ def get_tables(raw):
|
|||||||
|
|
||||||
def get_table(raw, name):
|
def get_table(raw, name):
|
||||||
''' Get the raw table bytes for the specified table in the font '''
|
''' Get the raw table bytes for the specified table in the font '''
|
||||||
name = as_bytes(name.lower())
|
name = polyglot.as_bytes(name.lower())
|
||||||
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
||||||
if table_tag.lower() == name:
|
if table_tag.lower() == name:
|
||||||
return table, table_index, table_offset, table_checksum
|
return table, table_index, table_offset, table_checksum
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
|
|
||||||
MSGPACK_MIME = 'application/x-msgpack'
|
MSGPACK_MIME = 'application/x-msgpack'
|
||||||
CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
|
CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
|
||||||
|
|
||||||
@@ -56,11 +59,11 @@ def json_dumps(data, **kw):
|
|||||||
|
|
||||||
|
|
||||||
def decode_metadata(x, for_json):
|
def decode_metadata(x, for_json):
|
||||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
|
||||||
from ebook_converter.ebooks.metadata.book.serialize import metadata_from_dict
|
from ebook_converter.ebooks.metadata.book.serialize import metadata_from_dict
|
||||||
obj = metadata_from_dict(x)
|
obj = metadata_from_dict(x)
|
||||||
if for_json and obj.cover_data and obj.cover_data[1]:
|
if for_json and obj.cover_data and obj.cover_data[1]:
|
||||||
obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
|
obj.cover_data = (obj.cover_data[0],
|
||||||
|
polyglot.from_base64_bytes(obj.cover_data[1]))
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
import os, sys, re
|
import fcntl
|
||||||
import fcntl, termios, struct
|
import os
|
||||||
|
import re
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
import termios
|
||||||
|
|
||||||
|
|
||||||
def fmt(code):
|
def fmt(code):
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from tempfile import SpooledTemporaryFile
|
|||||||
from ebook_converter.utils import filenames as fms
|
from ebook_converter.utils import filenames as fms
|
||||||
from ebook_converter.constants_old import filesystem_encoding
|
from ebook_converter.constants_old import filesystem_encoding
|
||||||
from ebook_converter.ebooks.chardet import detect
|
from ebook_converter.ebooks.chardet import detect
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter import polyglot
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import zlib # We may need its compression method
|
import zlib # We may need its compression method
|
||||||
@@ -330,7 +330,7 @@ class ZipInfo (object):
|
|||||||
if os.sep != '/':
|
if os.sep != '/':
|
||||||
os_sep, sep = os.sep, '/'
|
os_sep, sep = os.sep, '/'
|
||||||
if isinstance(filename, bytes):
|
if isinstance(filename, bytes):
|
||||||
os_sep, sep = as_bytes(os_sep), b'/'
|
os_sep, sep = polyglot.as_bytes(os_sep), b'/'
|
||||||
if os_sep in filename:
|
if os_sep in filename:
|
||||||
filename = filename.replace(os_sep, sep)
|
filename = filename.replace(os_sep, sep)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user