mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-09 10:25:45 +01:00
Moved misc functions from polyglot package to single polyglot module.
This commit is contained in:
@@ -5,17 +5,12 @@ import os
|
||||
from lxml import html
|
||||
from lxml.html import builder
|
||||
|
||||
from ebook_converter.polyglot.urllib import unquote as _unquote
|
||||
from ebook_converter.ebooks.oeb.base import urlquote
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.ptempfile import TemporaryDirectory
|
||||
from ebook_converter.constants_old import filesystem_encoding
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, '
|
||||
'and Alex Bramley <a.bramley at gmail.com>.')
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class CHMInput(InputFormatPlugin):
|
||||
@@ -133,7 +128,7 @@ class CHMInput(InputFormatPlugin):
|
||||
def unquote(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('utf-8')
|
||||
return _unquote(x).decode('utf-8')
|
||||
return polyglot.unquote(x).decode('utf-8')
|
||||
|
||||
def unquote_path(x):
|
||||
y = unquote(x)
|
||||
@@ -175,7 +170,7 @@ class CHMInput(InputFormatPlugin):
|
||||
pretty_print=True)
|
||||
f.write(raw)
|
||||
else:
|
||||
f.write(as_bytes(hhcdata))
|
||||
f.write(polyglot.as_bytes(hhcdata))
|
||||
return htmlpath, toc
|
||||
|
||||
def _read_file(self, name):
|
||||
|
||||
@@ -9,7 +9,7 @@ from ebook_converter.ebooks.oeb import parse_utils
|
||||
from ebook_converter.customize.conversion import OutputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
from ebook_converter.ptempfile import TemporaryDirectory
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.utils import directory
|
||||
|
||||
|
||||
@@ -266,7 +266,8 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
extra_entries=extra_entries) as epub:
|
||||
epub.add_dir(tdir)
|
||||
if encryption is not None:
|
||||
epub.writestr('META-INF/encryption.xml', as_bytes(encryption))
|
||||
epub.writestr('META-INF/encryption.xml',
|
||||
polyglot.as_bytes(encryption))
|
||||
if metadata_xml is not None:
|
||||
epub.writestr('META-INF/metadata.xml',
|
||||
metadata_xml.encode('utf-8'))
|
||||
@@ -308,12 +309,10 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
pass
|
||||
|
||||
def encrypt_fonts(self, uris, tdir, _uuid): # {{{
|
||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
||||
|
||||
key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
|
||||
if len(key) < 16:
|
||||
raise ValueError('UUID identifier %r is invalid'% _uuid)
|
||||
key = bytearray(from_hex_bytes((key + key)[:32]))
|
||||
key = bytearray(polyglot.from_hex_bytes((key + key)[:32]))
|
||||
paths = []
|
||||
with directory.CurrentDir(tdir):
|
||||
paths = [os.path.join(*x.split('/')) for x in uris]
|
||||
|
||||
@@ -7,7 +7,7 @@ from lxml import etree
|
||||
|
||||
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from ebook_converter.ebooks.oeb.base import element
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||
from ebook_converter.utils import directory
|
||||
@@ -56,7 +56,8 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
parent = element(parent, ('ul'))
|
||||
for node in current_node.nodes:
|
||||
point = element(parent, 'li')
|
||||
href = relpath(os.path.abspath(unquote(node.href)),
|
||||
href = relpath(os.path.abspath(polyglot
|
||||
.unquote(node.href)),
|
||||
os.path.dirname(ref_url))
|
||||
if isinstance(href, bytes):
|
||||
href = href.decode('utf-8')
|
||||
@@ -84,7 +85,6 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
from lxml import etree
|
||||
from ebook_converter.utils import zipfile
|
||||
from templite import Templite
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter.ebooks.html.meta import EasyMeta
|
||||
|
||||
# read template files
|
||||
@@ -156,7 +156,7 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
|
||||
with directory.CurrentDir(output_dir):
|
||||
for item in oeb_book.manifest:
|
||||
path = os.path.abspath(unquote(item.href))
|
||||
path = os.path.abspath(polyglot.unquote(item.href))
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
@@ -169,7 +169,7 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
item.unload_data_from_memory(memory=path)
|
||||
|
||||
for item in oeb_book.spine:
|
||||
path = os.path.abspath(unquote(item.href))
|
||||
path = os.path.abspath(polyglot.unquote(item.href))
|
||||
dir = os.path.dirname(path)
|
||||
root = item.data.getroottree()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from lxml import etree
|
||||
|
||||
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
|
||||
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
|
||||
from ebook_converter.utils import directory
|
||||
@@ -56,7 +56,7 @@ class OEBOutput(OutputFormatPlugin):
|
||||
not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr(
|
||||
item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name):
|
||||
condense_sheet(item.data)
|
||||
path = os.path.abspath(unquote(item.href))
|
||||
path = os.path.abspath(polyglot.unquote(item.href))
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
import os
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class PDFInput(InputFormatPlugin):
|
||||
@@ -72,7 +67,8 @@ class PDFInput(InputFormatPlugin):
|
||||
ncxid = opf.manifest.id_for_path('toc.ncx')
|
||||
if ncxid:
|
||||
with open('metadata.opf', 'r+b') as f:
|
||||
raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
|
||||
raw = f.read().replace(b'<spine', b'<spine toc="%s"' %
|
||||
polyglot.as_bytes(ncxid))
|
||||
f.seek(0)
|
||||
f.write(raw)
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from lxml import etree
|
||||
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
border_style_map = {'single': 'solid',
|
||||
@@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
|
||||
result = transform(doc)
|
||||
html = u'index.xhtml'
|
||||
with open(html, 'wb') as f:
|
||||
res = as_bytes(transform.tostring(result))
|
||||
res = polyglot.as_bytes(transform.tostring(result))
|
||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
# clean multiple \n
|
||||
res = re.sub(b'\n+', b'\n', res)
|
||||
|
||||
@@ -1,22 +1,20 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
import io
|
||||
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
def base64_decode(raw):
|
||||
from io import BytesIO
|
||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
||||
|
||||
# First try the python implementation as it is faster
|
||||
try:
|
||||
return from_base64_bytes(raw)
|
||||
return polyglot.from_base64_bytes(raw)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try a more robust version (adapted from FBReader sources)
|
||||
A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=')
|
||||
raw = bytearray(raw)
|
||||
out = BytesIO()
|
||||
out = io.BytesIO()
|
||||
pos = 0
|
||||
while pos < len(raw):
|
||||
tot = 0
|
||||
@@ -32,7 +30,7 @@ def base64_decode(raw):
|
||||
elif zero <= byt <= nine:
|
||||
num = byt - zero + 52
|
||||
else:
|
||||
num = {plus:62, slash:63, equal:64}.get(byt, None)
|
||||
num = {plus: 62, slash: 63, equal: 64}.get(byt, None)
|
||||
if num is None:
|
||||
# Ignore this byte
|
||||
continue
|
||||
|
||||
@@ -13,7 +13,7 @@ from ebook_converter import constants as const
|
||||
from ebook_converter.constants_old import __appname__, __version__
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.ebooks.oeb import parse_utils
|
||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.utils import entities
|
||||
from ebook_converter.utils.img import save_cover_data_to
|
||||
from ebook_converter.utils.localization import lang_as_iso639_1
|
||||
@@ -355,10 +355,10 @@ class FB2MLizer(object):
|
||||
if item.media_type not in ('image/jpeg', 'image/png'):
|
||||
imdata = save_cover_data_to(item.data,
|
||||
compression_quality=70)
|
||||
raw_data = as_base64_unicode(imdata)
|
||||
raw_data = polyglot.as_base64_unicode(imdata)
|
||||
content_type = 'image/jpeg'
|
||||
else:
|
||||
raw_data = as_base64_unicode(item.data)
|
||||
raw_data = polyglot.as_base64_unicode(item.data)
|
||||
content_type = item.media_type
|
||||
# Don't put the encoded image on a single line.
|
||||
step = 72
|
||||
|
||||
@@ -14,26 +14,24 @@ from ebook_converter.ebooks.oeb import parse_utils
|
||||
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
||||
from ebook_converter.utils import entities
|
||||
from ebook_converter.utils.logging import default_log
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
|
||||
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img',
|
||||
'link', 'meta'}
|
||||
|
||||
|
||||
class OEB2HTML(object):
|
||||
'''
|
||||
Base class. All subclasses should implement dump_text to actually transform
|
||||
content. Also, callers should use oeb2html to get the transformed html.
|
||||
links and images can be retrieved after calling oeb2html to get the mapping
|
||||
of OEB links and images to the new names used in the html returned by oeb2html.
|
||||
Images will always be referenced as if they are in an images directory.
|
||||
"""
|
||||
Base class. All subclasses should implement dump_text to actually
|
||||
transform content. Also, callers should use oeb2html to get the
|
||||
transformed html links and images can be retrieved after calling oeb2html
|
||||
to get the mapping of OEB links and images to the new names used in the
|
||||
html returned by oeb2html. Images will always be referenced as if they are
|
||||
in an images directory.
|
||||
|
||||
Use get_css to get the CSS classes for the OEB document as a string.
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, log=None):
|
||||
self.log = default_log if log is None else log
|
||||
@@ -55,16 +53,18 @@ class OEB2HTML(object):
|
||||
return self.mlize_spine(oeb_book)
|
||||
|
||||
def mlize_spine(self, oeb_book):
|
||||
output = [
|
||||
u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (
|
||||
entities.prepare_string_for_xml(self.book_title))
|
||||
]
|
||||
output = ['<html><head><meta http-equiv="Content-Type" '
|
||||
'content="text/html;charset=utf-8" />'
|
||||
'<title>%s</title></head>'
|
||||
'<body>' % entities.prepare_string_for_xml(self.book_title)]
|
||||
for item in oeb_book.spine:
|
||||
self.log.debug('Converting %s to HTML...' % item.href)
|
||||
self.rewrite_ids(item.data, item)
|
||||
base.rewrite_links(item.data, partial(self.rewrite_link, page=item))
|
||||
base.rewrite_links(item.data, partial(self.rewrite_link,
|
||||
page=item))
|
||||
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item)
|
||||
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
|
||||
stylizer, item)
|
||||
output.append('\n\n')
|
||||
output.append('</body></html>')
|
||||
return ''.join(output)
|
||||
@@ -126,13 +126,14 @@ class OEB2HTML(object):
|
||||
el.attrib['id'] = self.get_link_id(page.href)[1:]
|
||||
continue
|
||||
if 'id' in el.attrib:
|
||||
el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
|
||||
el.attrib['id'] = self.get_link_id(page.href,
|
||||
el.attrib['id'])[1:]
|
||||
|
||||
def get_css(self, oeb_book):
|
||||
css = b''
|
||||
for item in oeb_book.manifest:
|
||||
if item.media_type == 'text/css':
|
||||
css += as_bytes(item.data.cssText) + b'\n\n'
|
||||
css += polyglot.as_bytes(item.data.cssText) + b'\n\n'
|
||||
return css
|
||||
|
||||
def prepare_string_for_html(self, raw):
|
||||
@@ -157,10 +158,14 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
||||
|
||||
# We can only processes tags. If there isn't a tag return any text.
|
||||
if not isinstance(elem.tag, (str, bytes)) \
|
||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
|
||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
|
||||
const.SVG_NS):
|
||||
p = elem.getparent()
|
||||
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
|
||||
and elem.tail:
|
||||
if (p is not None and
|
||||
isinstance(p.tag, (str, bytes)) and
|
||||
parse_utils.namespace(p.tag) in (const.XHTML_NS,
|
||||
const.SVG_NS) and
|
||||
elem.tail):
|
||||
return [elem.tail]
|
||||
return ['']
|
||||
|
||||
@@ -176,8 +181,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
||||
tags.append(tag)
|
||||
|
||||
# Ignore anything that is set to not be displayed.
|
||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||
or style['visibility'] == 'hidden':
|
||||
if (style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or
|
||||
style['visibility'] == 'hidden'):
|
||||
return ['']
|
||||
|
||||
# Remove attributes we won't want.
|
||||
@@ -186,11 +191,13 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
||||
if 'style' in attribs:
|
||||
del attribs['style']
|
||||
|
||||
# Turn the rest of the attributes into a string we can write with the tag.
|
||||
# Turn the rest of the attributes into a string we can write with the
|
||||
# tag.
|
||||
at = ''
|
||||
for k, v in attribs.items():
|
||||
at += ' %s="%s"' % (k, entities
|
||||
.prepare_string_for_xml(v, attribute=True))
|
||||
for key, value in attribs.items():
|
||||
at += (' %s="%s"' %
|
||||
(key, entities.prepare_string_for_xml(value,
|
||||
attribute=True)))
|
||||
|
||||
# Write the tag.
|
||||
text.append('<%s%s' % (tag, at))
|
||||
@@ -246,11 +253,15 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||
'''
|
||||
|
||||
# We can only processes tags. If there isn't a tag return any text.
|
||||
if not isinstance(elem.tag, (str, bytes)) \
|
||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
|
||||
if (not isinstance(elem.tag, (str, bytes)) or
|
||||
parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
|
||||
const.SVG_NS)):
|
||||
p = elem.getparent()
|
||||
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
|
||||
and elem.tail:
|
||||
if (p is not None and
|
||||
isinstance(p.tag, (str, bytes)) and
|
||||
parse_utils.namespace(p.tag) in (const.XHTML_NS,
|
||||
const.SVG_NS) and
|
||||
elem.tail):
|
||||
return [elem.tail]
|
||||
return ['']
|
||||
|
||||
@@ -266,9 +277,11 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||
if tag == 'body':
|
||||
# Change the body to a div so we can merge multiple files.
|
||||
tag = 'div'
|
||||
# Add page-break-brefore: always because renders typically treat a new file (we're merging files)
|
||||
# as a page break and remove all other page break types that might be set.
|
||||
style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a)
|
||||
# Add page-break-brefore: always because renders typically treat
|
||||
# a new file (we're merging files) as a page break and remove all
|
||||
# other page break types that might be set.
|
||||
style_a = ('page-break-before: always; %s' %
|
||||
re.sub('page-break-[^:]+:[^;]+;?', '', style_a))
|
||||
# Remove unnecessary spaces.
|
||||
style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
|
||||
tags.append(tag)
|
||||
@@ -279,7 +292,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||
if 'style' in attribs:
|
||||
del attribs['style']
|
||||
|
||||
# Turn the rest of the attributes into a string we can write with the tag.
|
||||
# Turn the rest of the attributes into a string we can write with
|
||||
# the tag.
|
||||
at = ''
|
||||
for k, v in attribs.items():
|
||||
at += ' %s="%s"' % (k, entities
|
||||
@@ -319,43 +333,51 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||
|
||||
|
||||
class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||
'''
|
||||
Use CSS classes. css_style option can specify whether to use
|
||||
inline classes (style tag in the head) or reference an external
|
||||
CSS file called style.css.
|
||||
'''
|
||||
"""
|
||||
Use CSS classes. css_style option can specify whether to use inline
|
||||
classes (style tag in the head) or reference an external CSS file called
|
||||
style.css.
|
||||
"""
|
||||
|
||||
def mlize_spine(self, oeb_book):
|
||||
output = []
|
||||
for item in oeb_book.spine:
|
||||
self.log.debug('Converting %s to HTML...' % item.href)
|
||||
self.rewrite_ids(item.data, item)
|
||||
base.rewrite_links(item.data, partial(self.rewrite_link, page=item))
|
||||
base.rewrite_links(item.data, partial(self.rewrite_link,
|
||||
page=item))
|
||||
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item)
|
||||
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
|
||||
stylizer, item)
|
||||
output.append('\n\n')
|
||||
if self.opts.htmlz_class_style == 'external':
|
||||
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
|
||||
css = '<link href="style.css" rel="stylesheet" type="text/css" />'
|
||||
else:
|
||||
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
|
||||
title = (u'<title>%s</title>' %
|
||||
css = ('<style type="text/css">' + self.get_css(oeb_book) +
|
||||
'</style>')
|
||||
title = ('<title>%s</title>' %
|
||||
entities.prepare_string_for_xml(self.book_title))
|
||||
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
|
||||
[css] + [title, u'</head><body>'] + output + [u'</body></html>']
|
||||
output = (['<html><head><meta http-equiv="Content-Type" '
|
||||
'content="text/html;charset=utf-8" />'] + [css] +
|
||||
[title, '</head><body>'] + output + ['</body></html>'])
|
||||
return ''.join(output)
|
||||
|
||||
def dump_text(self, elem, stylizer, page):
|
||||
'''
|
||||
"""
|
||||
@elem: The element in the etree that we are working on.
|
||||
@stylizer: The style information attached to the element.
|
||||
'''
|
||||
"""
|
||||
|
||||
# We can only processes tags. If there isn't a tag return any text.
|
||||
if not isinstance(elem.tag, (str, bytes)) \
|
||||
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
|
||||
if (not isinstance(elem.tag, (str, bytes)) or
|
||||
parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
|
||||
const.SVG_NS)):
|
||||
p = elem.getparent()
|
||||
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
|
||||
and elem.tail:
|
||||
if (p is not None and
|
||||
isinstance(p.tag, (str, bytes)) and
|
||||
parse_utils.namespace(p.tag) in (const.XHTML_NS,
|
||||
const.SVG_NS) and
|
||||
elem.tail):
|
||||
return [elem.tail]
|
||||
return ['']
|
||||
|
||||
@@ -373,11 +395,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||
if 'style' in attribs:
|
||||
del attribs['style']
|
||||
|
||||
# Turn the rest of the attributes into a string we can write with the tag.
|
||||
# Turn the rest of the attributes into a string we can write with
|
||||
# the tag.
|
||||
at = ''
|
||||
for k, v in attribs.items():
|
||||
at += ' %s="%s"' % (k,
|
||||
entities.prepare_string_for_xml(v, attribute=True))
|
||||
at += ' %s="%s"' % (k, entities
|
||||
.prepare_string_for_xml(v, attribute=True))
|
||||
|
||||
# Write the tag.
|
||||
text.append('<%s%s' % (tag, at))
|
||||
|
||||
@@ -5,7 +5,7 @@ import textwrap
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class Canvas(etree.XSLTExtension):
|
||||
@@ -292,7 +292,7 @@ class Styles(etree.XSLTExtension):
|
||||
return '\n\t'.join(ans)
|
||||
|
||||
with open(name, 'wb') as f:
|
||||
f.write(as_bytes(self.CSS))
|
||||
f.write(polyglot.as_bytes(self.CSS))
|
||||
for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
|
||||
'bs')]:
|
||||
for i, s in enumerate(w):
|
||||
@@ -300,7 +300,7 @@ class Styles(etree.XSLTExtension):
|
||||
continue
|
||||
rsel = '.%s%d'%(sel, i)
|
||||
s = join(s)
|
||||
f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
|
||||
f.write(polyglot.as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
|
||||
|
||||
def execute(self, context, self_node, input_node, output_parent):
|
||||
if input_node.tag == 'TextStyle':
|
||||
|
||||
@@ -9,7 +9,7 @@ import sys
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter.utils.config_base import tweaks
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.utils import encoding as uenc
|
||||
|
||||
|
||||
@@ -248,9 +248,11 @@ class Resource(object):
|
||||
pc = url[2]
|
||||
if isinstance(pc, str):
|
||||
pc = pc.encode('utf-8')
|
||||
pc = unquote(pc).decode('utf-8')
|
||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
||||
self.fragment = unquote(url[-1])
|
||||
pc = polyglot.unquote(pc).decode('utf-8')
|
||||
self.path = os.path.abspath(os.path.join(basedir,
|
||||
pc.replace('/',
|
||||
os.sep)))
|
||||
self.fragment = polyglot.unquote(url[-1])
|
||||
|
||||
def href(self, basedir=None):
|
||||
'''
|
||||
|
||||
@@ -14,7 +14,7 @@ from ebook_converter.utils.img import save_cover_data_to
|
||||
from ebook_converter.utils.imghdr import identify
|
||||
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.utils import encoding as uenc
|
||||
|
||||
|
||||
@@ -389,7 +389,7 @@ def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):
|
||||
|
||||
def _encode_into_jpeg(data):
|
||||
data = save_cover_data_to(data)
|
||||
return as_base64_unicode(data)
|
||||
return polyglot.as_base64_unicode(data)
|
||||
|
||||
|
||||
def _set_cover(title_info, mi, ctx):
|
||||
|
||||
@@ -30,11 +30,11 @@ from ebook_converter.ebooks.metadata.utils import parse_opf, \
|
||||
from ebook_converter.ebooks.metadata import string_to_authors, \
|
||||
MetaInformation, check_isbn
|
||||
from ebook_converter.ebooks.metadata.book.base import Metadata
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.utils.date import parse_date, isoformat
|
||||
from ebook_converter.utils.localization import get_lang, canonicalize_lang
|
||||
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||
from ebook_converter.utils.config_base import tweaks
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
|
||||
|
||||
pretty_print_opf = False
|
||||
@@ -838,7 +838,7 @@ class OPF(object): # {{{
|
||||
|
||||
def unquote_urls(self):
|
||||
def get_href(item):
|
||||
raw = unquote(item.get('href', ''))
|
||||
raw = polyglot.unquote(item.get('href', ''))
|
||||
if not isinstance(raw, str):
|
||||
raw = raw.decode('utf-8')
|
||||
return raw
|
||||
|
||||
@@ -11,7 +11,7 @@ from lxml.builder import ElementMaker
|
||||
from ebook_converter.constants_old import __appname__, __version__
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
||||
@@ -31,7 +31,7 @@ def parse_html_toc(data):
|
||||
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
|
||||
sanitize_names=True)
|
||||
for a in root.xpath('//*[@href and local-name()="a"]'):
|
||||
purl = urllib.parse.urlparse(unquote(a.get('href')))
|
||||
purl = urllib.parse.urlparse(polyglot.unquote(a.get('href')))
|
||||
href, fragment = purl[2], purl[5]
|
||||
if not fragment:
|
||||
fragment = None
|
||||
@@ -149,7 +149,7 @@ class TOC(list):
|
||||
|
||||
if toc is not None:
|
||||
if toc.lower() not in ('ncx', 'ncxtoc'):
|
||||
toc = urllib.parse.urlparse(unquote(toc))[2]
|
||||
toc = urllib.parse.urlparse(polyglot.unquote(toc))[2]
|
||||
toc = toc.replace('/', os.sep)
|
||||
if not os.path.isabs(toc):
|
||||
toc = os.path.join(self.base_path, toc)
|
||||
@@ -219,7 +219,8 @@ class TOC(list):
|
||||
content = content[0]
|
||||
# if get_attr(content, attr='src'):
|
||||
purl = urllib.parse.urlparse(content.get('src'))
|
||||
href, fragment = unquote(purl[2]), unquote(purl[5])
|
||||
href = polyglot.unquote(purl[2])
|
||||
fragment = polyglot.unquote(purl[5])
|
||||
nd = dest.add_item(href, fragment, text)
|
||||
nd.play_order = play_order
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from io import BytesIO
|
||||
from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
|
||||
from ebook_converter.utils.imghdr import what
|
||||
from ebook_converter.ebooks import normalize
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.tinycss.color3 import parse_color_string
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ def decode_hex_number(raw, codec='utf-8'):
|
||||
|
||||
|
||||
def encode_string(raw):
|
||||
ans = bytearray(as_bytes(raw))
|
||||
ans = bytearray(polyglot.as_bytes(raw))
|
||||
ans.insert(0, len(ans))
|
||||
return bytes(ans)
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
|
||||
|
||||
from ebook_converter.utils import directory
|
||||
from ebook_converter.ebooks.oeb.base import _css_logger
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class Extract(ODF2XHTML):
|
||||
@@ -292,7 +292,7 @@ class Extract(ODF2XHTML):
|
||||
except:
|
||||
log.exception('Failed to filter CSS, conversion may be slow')
|
||||
with open('index.xhtml', 'wb') as f:
|
||||
f.write(as_bytes(html))
|
||||
f.write(polyglot.as_bytes(html))
|
||||
zf = ZipFile(stream, 'r')
|
||||
self.extract_pictures(zf)
|
||||
opf = OPFCreator(os.path.abspath(os.getcwd()), mi)
|
||||
|
||||
@@ -24,7 +24,7 @@ from ebook_converter.utils.localization import get_lang
|
||||
from ebook_converter.ptempfile import TemporaryDirectory
|
||||
from ebook_converter.constants_old import __appname__, __version__
|
||||
from ebook_converter.utils import entities
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class OEBReader(object):
|
||||
@@ -641,7 +641,7 @@ class OEBReader(object):
|
||||
with TemporaryDirectory('_html_cover') as tdir:
|
||||
writer = OEBWriter()
|
||||
writer(self.oeb, tdir)
|
||||
path = os.path.join(tdir, unquote(hcover.href))
|
||||
path = os.path.join(tdir, polyglot.unquote(hcover.href))
|
||||
data = render_html_svg_workaround(path, self.logger)
|
||||
if not data:
|
||||
data = b''
|
||||
|
||||
@@ -5,7 +5,7 @@ import urllib.parse
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.utils.imghdr import identify
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class CoverManager(object):
|
||||
@@ -113,7 +113,7 @@ class CoverManager(object):
|
||||
if href is not None:
|
||||
templ = self.non_svg_template if self.no_svg_cover \
|
||||
else self.svg_template
|
||||
tp = templ % unquote(href)
|
||||
tp = templ % polyglot.unquote(href)
|
||||
id, href = m.generate('titlepage', 'titlepage.xhtml')
|
||||
item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0],
|
||||
data=etree.fromstring(tp))
|
||||
|
||||
@@ -3,8 +3,7 @@ import re
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import XPath
|
||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
class DataURL(object):
|
||||
@@ -27,14 +26,14 @@ class DataURL(object):
|
||||
if ';base64' in header:
|
||||
data = re.sub(r'\s+', '', data)
|
||||
try:
|
||||
data = from_base64_bytes(data)
|
||||
data = polyglot.from_base64_bytes(data)
|
||||
except Exception:
|
||||
self.log.error('Found invalid base64 encoded data '
|
||||
'URI, ignoring it')
|
||||
continue
|
||||
else:
|
||||
data = urllib.parse.unquote(data)
|
||||
data = as_bytes(data)
|
||||
data = polyglot.as_bytes(data)
|
||||
fmt = what(None, data)
|
||||
if not fmt:
|
||||
self.log.warn('Image encoded as data URL has unknown '
|
||||
|
||||
@@ -17,7 +17,7 @@ from ebook_converter import constants as const
|
||||
from ebook_converter.ebooks.epub import rules
|
||||
from ebook_converter.ebooks.oeb import base
|
||||
from ebook_converter.ebooks.oeb.polish.split import do_split
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
from ebook_converter import polyglot
|
||||
from ebook_converter.css_selectors import Select, SelectorError
|
||||
from ebook_converter.utils import encoding as uenc
|
||||
|
||||
@@ -189,7 +189,7 @@ class Split(object):
|
||||
nhref = anchor_map[frag if frag else None]
|
||||
nhref = self.current_item.relhref(nhref)
|
||||
if frag:
|
||||
nhref = '#'.join((unquote(nhref), frag))
|
||||
nhref = '#'.join((polyglot.unquote(nhref), frag))
|
||||
|
||||
return nhref
|
||||
return url
|
||||
|
||||
@@ -1,20 +1,18 @@
|
||||
import codecs, zlib, numbers
|
||||
from io import BytesIO
|
||||
import codecs
|
||||
from datetime import datetime
|
||||
import io
|
||||
import numbers
|
||||
import zlib
|
||||
|
||||
from ebook_converter.utils.logging import default_log
|
||||
from ebook_converter.polyglot.binary import as_hex_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
pdf_float = lambda x: f"{x:.1f}"
|
||||
|
||||
EOL = b'\n'
|
||||
|
||||
# Sizes {{{
|
||||
# Sizes
|
||||
inch = 72.0
|
||||
cm = inch / 2.54
|
||||
mm = cm * 0.1
|
||||
@@ -45,10 +43,9 @@ B2 = (_BW*2, _BH*2)
|
||||
B1 = (_BH*4, _BW*2)
|
||||
B0 = (_BW*4, _BH*4)
|
||||
|
||||
PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
|
||||
' b3 b4 b5 b6 letter legal').split()}
|
||||
|
||||
# }}}
|
||||
PAPER_SIZES = {k: globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 '
|
||||
'b2 b3 b4 b5 b6 letter '
|
||||
'legal').split()}
|
||||
|
||||
|
||||
def fmtnum(o):
|
||||
@@ -70,12 +67,12 @@ def serialize(o, stream):
|
||||
elif o is None:
|
||||
stream.write_raw(b'null')
|
||||
elif isinstance(o, datetime):
|
||||
val = o.strftime("D:%Y%m%d%H%M%%02d%z")%min(59, o.second)
|
||||
val = o.strftime("D:%Y%m%d%H%M%%02d%z") % min(59, o.second)
|
||||
if datetime.tzinfo is not None:
|
||||
val = "(%s'%s')"%(val[:-2], val[-2:])
|
||||
val = "(%s'%s')" % (val[:-2], val[-2:])
|
||||
stream.write(val.encode('ascii'))
|
||||
else:
|
||||
raise ValueError('Unknown object: %r'%o)
|
||||
raise ValueError('Unknown object: %r' % o)
|
||||
|
||||
|
||||
class Name(str):
|
||||
@@ -83,7 +80,7 @@ class Name(str):
|
||||
def pdf_serialize(self, stream):
|
||||
raw = self.encode('ascii')
|
||||
if len(raw) > 126:
|
||||
raise ValueError('Name too long: %r'%self)
|
||||
raise ValueError('Name too long: %r' % self)
|
||||
raw = bytearray(raw)
|
||||
sharp = ord(b'#')
|
||||
buf = (
|
||||
@@ -96,7 +93,8 @@ def escape_pdf_string(bytestring):
|
||||
indices = []
|
||||
bad = []
|
||||
ba = bytearray(bytestring)
|
||||
bad_map = {10:ord('n'), 13:ord('r'), 12:ord('f'), 8:ord('b'), 9:ord('\t'), 92:ord('\\')}
|
||||
bad_map = {10: ord('n'), 13: ord('r'), 12: ord('f'),
|
||||
8: ord('b'), 9: ord('\t'), 92: ord('\\')}
|
||||
for i, num in enumerate(ba):
|
||||
if num == 40: # (
|
||||
indices.append((i, 40))
|
||||
@@ -134,7 +132,7 @@ class UTF16String(str):
|
||||
if False:
|
||||
# Disabled as the parentheses based strings give easier to debug
|
||||
# PDF files
|
||||
stream.write(b'<' + as_hex_bytes(raw) + b'>')
|
||||
stream.write(b'<' + polyglot.as_hex_bytes(raw) + b'>')
|
||||
else:
|
||||
stream.write(b'('+escape_pdf_string(raw)+b')')
|
||||
|
||||
@@ -143,9 +141,9 @@ class Dictionary(dict):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'<<' + EOL)
|
||||
sorted_keys = sorted(self,
|
||||
key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
|
||||
x, x)+x))
|
||||
sorted_keys = sorted(self, key=lambda x: ({'Type': '1',
|
||||
'Subtype': '2'}
|
||||
.get(x, x) + x))
|
||||
for k in sorted_keys:
|
||||
serialize(Name(k), stream)
|
||||
stream.write(b' ')
|
||||
@@ -177,10 +175,10 @@ class Array(list):
|
||||
stream.write(b']')
|
||||
|
||||
|
||||
class Stream(BytesIO):
|
||||
class Stream(io.BytesIO):
|
||||
|
||||
def __init__(self, compress=False):
|
||||
BytesIO.__init__(self)
|
||||
io.BytesIO.__init__(self)
|
||||
self.compress = compress
|
||||
self.filters = Array()
|
||||
|
||||
@@ -213,7 +211,7 @@ class Stream(BytesIO):
|
||||
raw.encode('ascii'))
|
||||
|
||||
def write_raw(self, raw):
|
||||
BytesIO.write(self, raw)
|
||||
io.BytesIO.write(self, raw)
|
||||
|
||||
|
||||
class Reference(object):
|
||||
@@ -222,11 +220,11 @@ class Reference(object):
|
||||
self.num, self.obj = num, obj
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = '%d 0 R'%self.num
|
||||
raw = '%d 0 R' % self.num
|
||||
stream.write(raw.encode('ascii'))
|
||||
|
||||
def __repr__(self):
|
||||
return '%d 0 R'%self.num
|
||||
return '%d 0 R' % self.num
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
59
ebook_converter/polyglot.py
Normal file
59
ebook_converter/polyglot.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Misc converting functions from polyglot module.
|
||||
Most of the have something to do with converting between string and binary
|
||||
"""
|
||||
import base64
|
||||
import binascii
|
||||
import urllib
|
||||
|
||||
|
||||
def as_base64_unicode(x, enc='utf-8'):
|
||||
if isinstance(x, str):
|
||||
x = x.encode(enc)
|
||||
return base64.standard_b64encode(x).decode('ascii')
|
||||
|
||||
|
||||
def from_base64_bytes(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('ascii')
|
||||
return base64.standard_b64decode(x)
|
||||
|
||||
|
||||
def as_hex_bytes(x, enc='utf-8'):
|
||||
if isinstance(x, str):
|
||||
x = x.encode(enc)
|
||||
return binascii.hexlify(x)
|
||||
|
||||
|
||||
def from_hex_bytes(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('ascii')
|
||||
return binascii.unhexlify(x)
|
||||
|
||||
|
||||
def as_bytes(x, encoding='utf-8'):
|
||||
if isinstance(x, str):
|
||||
return x.encode(encoding)
|
||||
if isinstance(x, bytes):
|
||||
return x
|
||||
if isinstance(x, bytearray):
|
||||
return bytes(x)
|
||||
if isinstance(x, memoryview):
|
||||
return x.tobytes()
|
||||
return str(x).encode(encoding)
|
||||
|
||||
|
||||
def unquote(x, encoding='utf-8', errors='replace'):
|
||||
# TODO(gryf): this works like that: if x is a binary, convert it to
|
||||
# string using encoding and make unquote. After that make it binary again.
|
||||
# If x is string, just pass it to the unquote.
|
||||
# This approach is mostly used within lxml etree strings, which suppose to
|
||||
# be binary because of its inner representation. I'm wondering, if
|
||||
# xml.etree could be used instead - to be checked.
|
||||
binary = isinstance(x, bytes)
|
||||
if binary:
|
||||
x = x.decode(encoding, errors)
|
||||
ans = urllib.parse.unquote(x, encoding, errors)
|
||||
if binary:
|
||||
ans = ans.encode(encoding, errors)
|
||||
return ans
|
||||
@@ -1,26 +0,0 @@
|
||||
from base64 import standard_b64decode, standard_b64encode
|
||||
from binascii import hexlify, unhexlify
|
||||
|
||||
|
||||
def as_base64_unicode(x, enc='utf-8'):
|
||||
if isinstance(x, str):
|
||||
x = x.encode(enc)
|
||||
return standard_b64encode(x).decode('ascii')
|
||||
|
||||
|
||||
def from_base64_bytes(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('ascii')
|
||||
return standard_b64decode(x)
|
||||
|
||||
|
||||
def as_hex_bytes(x, enc='utf-8'):
|
||||
if isinstance(x, str):
|
||||
x = x.encode(enc)
|
||||
return hexlify(x)
|
||||
|
||||
|
||||
def from_hex_bytes(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('ascii')
|
||||
return unhexlify(x)
|
||||
@@ -1,10 +0,0 @@
|
||||
def as_bytes(x, encoding='utf-8'):
|
||||
if isinstance(x, str):
|
||||
return x.encode(encoding)
|
||||
if isinstance(x, bytes):
|
||||
return x
|
||||
if isinstance(x, bytearray):
|
||||
return bytes(x)
|
||||
if isinstance(x, memoryview):
|
||||
return x.tobytes()
|
||||
return str(x).encode(encoding)
|
||||
@@ -1,17 +0,0 @@
|
||||
import urllib.parse
|
||||
|
||||
|
||||
def unquote(x, encoding='utf-8', errors='replace'):
|
||||
# TODO(gryf): this works like that: if x is a binary, convert it to
|
||||
# string using encoding and make unquote. After that make it binary again.
|
||||
# If x is string, just pass it to the unquote.
|
||||
# This approach is mostly used within lxml etree strings, which suppose to
|
||||
# be binary because of its inner representation. I'm wondering, if
|
||||
# xml.etree could be used instead - to be checked.
|
||||
binary = isinstance(x, bytes)
|
||||
if binary:
|
||||
x = x.decode(encoding, errors)
|
||||
ans = urllib.parse.unquote(x, encoding, errors)
|
||||
if binary:
|
||||
ans = ans.encode(encoding, errors)
|
||||
return ans
|
||||
@@ -8,6 +8,7 @@ import tempfile
|
||||
|
||||
from ebook_converter.constants_old import __version__, __appname__, \
|
||||
filesystem_encoding
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
def cleanup(path):
|
||||
@@ -90,9 +91,8 @@ def base_dir():
|
||||
td = os.environ.get('CALIBRE_WORKER_TEMP_DIR', None)
|
||||
if td is not None:
|
||||
from ebook_converter.utils.serialize import msgpack_loads
|
||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
||||
try:
|
||||
td = msgpack_loads(from_hex_bytes(td))
|
||||
td = msgpack_loads(polyglot.from_hex_bytes(td))
|
||||
except Exception:
|
||||
td = None
|
||||
if td and os.path.exists(td):
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
import operator
|
||||
import re
|
||||
|
||||
from ebook_converter.polyglot.binary import from_hex_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
__all__ = ['decode'] # Everything else is implementation detail
|
||||
@@ -94,7 +94,8 @@ def try_encoding(css_bytes, encoding, fallback=True):
|
||||
|
||||
|
||||
def hex2re(hex_data):
|
||||
return re.escape(from_hex_bytes(hex_data.replace(' ', '').encode('ascii')))
|
||||
return re.escape(polyglot.from_hex_bytes(hex_data.replace(' ', '')
|
||||
.encode('ascii')))
|
||||
|
||||
|
||||
class Slicer(object):
|
||||
|
||||
@@ -2,7 +2,7 @@ import struct
|
||||
from io import BytesIO
|
||||
from collections import defaultdict
|
||||
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -38,7 +38,7 @@ def get_tables(raw):
|
||||
|
||||
def get_table(raw, name):
|
||||
''' Get the raw table bytes for the specified table in the font '''
|
||||
name = as_bytes(name.lower())
|
||||
name = polyglot.as_bytes(name.lower())
|
||||
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
||||
if table_tag.lower() == name:
|
||||
return table, table_index, table_offset, table_checksum
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
from ebook_converter import polyglot
|
||||
|
||||
|
||||
MSGPACK_MIME = 'application/x-msgpack'
|
||||
CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
|
||||
|
||||
@@ -56,11 +59,11 @@ def json_dumps(data, **kw):
|
||||
|
||||
|
||||
def decode_metadata(x, for_json):
|
||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
||||
from ebook_converter.ebooks.metadata.book.serialize import metadata_from_dict
|
||||
obj = metadata_from_dict(x)
|
||||
if for_json and obj.cover_data and obj.cover_data[1]:
|
||||
obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
|
||||
obj.cover_data = (obj.cover_data[0],
|
||||
polyglot.from_base64_bytes(obj.cover_data[1]))
|
||||
return obj
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import os, sys, re
|
||||
import fcntl, termios, struct
|
||||
import fcntl
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import sys
|
||||
import termios
|
||||
|
||||
|
||||
def fmt(code):
|
||||
|
||||
@@ -10,7 +10,7 @@ from tempfile import SpooledTemporaryFile
|
||||
from ebook_converter.utils import filenames as fms
|
||||
from ebook_converter.constants_old import filesystem_encoding
|
||||
from ebook_converter.ebooks.chardet import detect
|
||||
from ebook_converter.polyglot.builtins import as_bytes
|
||||
from ebook_converter import polyglot
|
||||
|
||||
try:
|
||||
import zlib # We may need its compression method
|
||||
@@ -330,7 +330,7 @@ class ZipInfo (object):
|
||||
if os.sep != '/':
|
||||
os_sep, sep = os.sep, '/'
|
||||
if isinstance(filename, bytes):
|
||||
os_sep, sep = as_bytes(os_sep), b'/'
|
||||
os_sep, sep = polyglot.as_bytes(os_sep), b'/'
|
||||
if os_sep in filename:
|
||||
filename = filename.replace(os_sep, sep)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user