1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-13 00:43:34 +02:00

Moved misc functions from polyglot package to single polyglot module.

This commit is contained in:
2021-05-25 19:06:31 +02:00
parent f46984267e
commit f47376830f
32 changed files with 244 additions and 219 deletions

View File

@@ -5,17 +5,12 @@ import os
from lxml import html from lxml import html
from lxml.html import builder from lxml.html import builder
from ebook_converter.polyglot.urllib import unquote as _unquote
from ebook_converter.ebooks.oeb.base import urlquote from ebook_converter.ebooks.oeb.base import urlquote
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants_old import filesystem_encoding from ebook_converter.constants_old import filesystem_encoding
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
__license__ = 'GPL v3'
__copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, '
'and Alex Bramley <a.bramley at gmail.com>.')
class CHMInput(InputFormatPlugin): class CHMInput(InputFormatPlugin):
@@ -133,7 +128,7 @@ class CHMInput(InputFormatPlugin):
def unquote(x): def unquote(x):
if isinstance(x, str): if isinstance(x, str):
x = x.encode('utf-8') x = x.encode('utf-8')
return _unquote(x).decode('utf-8') return polyglot.unquote(x).decode('utf-8')
def unquote_path(x): def unquote_path(x):
y = unquote(x) y = unquote(x)
@@ -175,7 +170,7 @@ class CHMInput(InputFormatPlugin):
pretty_print=True) pretty_print=True)
f.write(raw) f.write(raw)
else: else:
f.write(as_bytes(hhcdata)) f.write(polyglot.as_bytes(hhcdata))
return htmlpath, toc return htmlpath, toc
def _read_file(self, name): def _read_file(self, name):

View File

@@ -9,7 +9,7 @@ from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.customize.conversion import OutputFormatPlugin from ebook_converter.customize.conversion import OutputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
from ebook_converter.utils import directory from ebook_converter.utils import directory
@@ -266,7 +266,8 @@ class EPUBOutput(OutputFormatPlugin):
extra_entries=extra_entries) as epub: extra_entries=extra_entries) as epub:
epub.add_dir(tdir) epub.add_dir(tdir)
if encryption is not None: if encryption is not None:
epub.writestr('META-INF/encryption.xml', as_bytes(encryption)) epub.writestr('META-INF/encryption.xml',
polyglot.as_bytes(encryption))
if metadata_xml is not None: if metadata_xml is not None:
epub.writestr('META-INF/metadata.xml', epub.writestr('META-INF/metadata.xml',
metadata_xml.encode('utf-8')) metadata_xml.encode('utf-8'))
@@ -308,12 +309,10 @@ class EPUBOutput(OutputFormatPlugin):
pass pass
def encrypt_fonts(self, uris, tdir, _uuid): # {{{ def encrypt_fonts(self, uris, tdir, _uuid): # {{{
from ebook_converter.polyglot.binary import from_hex_bytes
key = re.sub(r'[^a-fA-F0-9]', '', _uuid) key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
if len(key) < 16: if len(key) < 16:
raise ValueError('UUID identifier %r is invalid'% _uuid) raise ValueError('UUID identifier %r is invalid'% _uuid)
key = bytearray(from_hex_bytes((key + key)[:32])) key = bytearray(polyglot.from_hex_bytes((key + key)[:32]))
paths = [] paths = []
with directory.CurrentDir(tdir): with directory.CurrentDir(tdir):
paths = [os.path.join(*x.split('/')) for x in uris] paths = [os.path.join(*x.split('/')) for x in uris]

View File

@@ -7,7 +7,7 @@ from lxml import etree
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter.ebooks.oeb.base import element from ebook_converter.ebooks.oeb.base import element
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
from ebook_converter.ptempfile import PersistentTemporaryDirectory from ebook_converter.ptempfile import PersistentTemporaryDirectory
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils import directory from ebook_converter.utils import directory
@@ -56,7 +56,8 @@ class HTMLOutput(OutputFormatPlugin):
parent = element(parent, ('ul')) parent = element(parent, ('ul'))
for node in current_node.nodes: for node in current_node.nodes:
point = element(parent, 'li') point = element(parent, 'li')
href = relpath(os.path.abspath(unquote(node.href)), href = relpath(os.path.abspath(polyglot
.unquote(node.href)),
os.path.dirname(ref_url)) os.path.dirname(ref_url))
if isinstance(href, bytes): if isinstance(href, bytes):
href = href.decode('utf-8') href = href.decode('utf-8')
@@ -84,7 +85,6 @@ class HTMLOutput(OutputFormatPlugin):
from lxml import etree from lxml import etree
from ebook_converter.utils import zipfile from ebook_converter.utils import zipfile
from templite import Templite from templite import Templite
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ebooks.html.meta import EasyMeta from ebook_converter.ebooks.html.meta import EasyMeta
# read template files # read template files
@@ -156,7 +156,7 @@ class HTMLOutput(OutputFormatPlugin):
with directory.CurrentDir(output_dir): with directory.CurrentDir(output_dir):
for item in oeb_book.manifest: for item in oeb_book.manifest:
path = os.path.abspath(unquote(item.href)) path = os.path.abspath(polyglot.unquote(item.href))
dir = os.path.dirname(path) dir = os.path.dirname(path)
if not os.path.exists(dir): if not os.path.exists(dir):
os.makedirs(dir) os.makedirs(dir)
@@ -169,7 +169,7 @@ class HTMLOutput(OutputFormatPlugin):
item.unload_data_from_memory(memory=path) item.unload_data_from_memory(memory=path)
for item in oeb_book.spine: for item in oeb_book.spine:
path = os.path.abspath(unquote(item.href)) path = os.path.abspath(polyglot.unquote(item.href))
dir = os.path.dirname(path) dir = os.path.dirname(path)
root = item.data.getroottree() root = item.data.getroottree()

View File

@@ -5,7 +5,7 @@ from lxml import etree
from ebook_converter.customize.conversion import (OutputFormatPlugin, from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
from ebook_converter.utils import directory from ebook_converter.utils import directory
@@ -56,7 +56,7 @@ class OEBOutput(OutputFormatPlugin):
not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr( not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr(
item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name): item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name):
condense_sheet(item.data) condense_sheet(item.data)
path = os.path.abspath(unquote(item.href)) path = os.path.abspath(polyglot.unquote(item.href))
dir = os.path.dirname(path) dir = os.path.dirname(path)
if not os.path.exists(dir): if not os.path.exists(dir):
os.makedirs(dir) os.makedirs(dir)

View File

@@ -1,12 +1,7 @@
import os import os
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
class PDFInput(InputFormatPlugin): class PDFInput(InputFormatPlugin):
@@ -72,7 +67,8 @@ class PDFInput(InputFormatPlugin):
ncxid = opf.manifest.id_for_path('toc.ncx') ncxid = opf.manifest.id_for_path('toc.ncx')
if ncxid: if ncxid:
with open('metadata.opf', 'r+b') as f: with open('metadata.opf', 'r+b') as f:
raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) raw = f.read().replace(b'<spine', b'<spine toc="%s"' %
polyglot.as_bytes(ncxid))
f.seek(0) f.seek(0)
f.write(raw) f.write(raw)

View File

@@ -8,7 +8,7 @@ from lxml import etree
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
border_style_map = {'single': 'solid', border_style_map = {'single': 'solid',
@@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
result = transform(doc) result = transform(doc)
html = u'index.xhtml' html = u'index.xhtml'
with open(html, 'wb') as f: with open(html, 'wb') as f:
res = as_bytes(transform.tostring(result)) res = polyglot.as_bytes(transform.tostring(result))
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
# clean multiple \n # clean multiple \n
res = re.sub(b'\n+', b'\n', res) res = re.sub(b'\n+', b'\n', res)

View File

@@ -1,22 +1,20 @@
__license__ = 'GPL v3' import io
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' from ebook_converter import polyglot
def base64_decode(raw): def base64_decode(raw):
from io import BytesIO
from ebook_converter.polyglot.binary import from_base64_bytes
# First try the python implementation as it is faster # First try the python implementation as it is faster
try: try:
return from_base64_bytes(raw) return polyglot.from_base64_bytes(raw)
except Exception: except Exception:
pass pass
# Try a more robust version (adapted from FBReader sources) # Try a more robust version (adapted from FBReader sources)
A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=') A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=')
raw = bytearray(raw) raw = bytearray(raw)
out = BytesIO() out = io.BytesIO()
pos = 0 pos = 0
while pos < len(raw): while pos < len(raw):
tot = 0 tot = 0
@@ -32,7 +30,7 @@ def base64_decode(raw):
elif zero <= byt <= nine: elif zero <= byt <= nine:
num = byt - zero + 52 num = byt - zero + 52
else: else:
num = {plus:62, slash:63, equal:64}.get(byt, None) num = {plus: 62, slash: 63, equal: 64}.get(byt, None)
if num is None: if num is None:
# Ignore this byte # Ignore this byte
continue continue

View File

@@ -13,7 +13,7 @@ from ebook_converter import constants as const
from ebook_converter.constants_old import __appname__, __version__ from ebook_converter.constants_old import __appname__, __version__
from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb import parse_utils from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.polyglot.binary import as_base64_unicode from ebook_converter import polyglot
from ebook_converter.utils import entities from ebook_converter.utils import entities
from ebook_converter.utils.img import save_cover_data_to from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.utils.localization import lang_as_iso639_1 from ebook_converter.utils.localization import lang_as_iso639_1
@@ -355,10 +355,10 @@ class FB2MLizer(object):
if item.media_type not in ('image/jpeg', 'image/png'): if item.media_type not in ('image/jpeg', 'image/png'):
imdata = save_cover_data_to(item.data, imdata = save_cover_data_to(item.data,
compression_quality=70) compression_quality=70)
raw_data = as_base64_unicode(imdata) raw_data = polyglot.as_base64_unicode(imdata)
content_type = 'image/jpeg' content_type = 'image/jpeg'
else: else:
raw_data = as_base64_unicode(item.data) raw_data = polyglot.as_base64_unicode(item.data)
content_type = item.media_type content_type = item.media_type
# Don't put the encoded image on a single line. # Don't put the encoded image on a single line.
step = 72 step = 72

View File

@@ -14,26 +14,24 @@ from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.utils import entities from ebook_converter.utils import entities
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
__license__ = 'GPL 3' SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img',
__copyright__ = '2011, John Schember <john@nachtimwald.com>' 'link', 'meta'}
__docformat__ = 'restructuredtext en'
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
class OEB2HTML(object): class OEB2HTML(object):
''' """
Base class. All subclasses should implement dump_text to actually transform Base class. All subclasses should implement dump_text to actually
content. Also, callers should use oeb2html to get the transformed html. transform content. Also, callers should use oeb2html to get the
links and images can be retrieved after calling oeb2html to get the mapping transformed html links and images can be retrieved after calling oeb2html
of OEB links and images to the new names used in the html returned by oeb2html. to get the mapping of OEB links and images to the new names used in the
Images will always be referenced as if they are in an images directory. html returned by oeb2html. Images will always be referenced as if they are
in an images directory.
Use get_css to get the CSS classes for the OEB document as a string. Use get_css to get the CSS classes for the OEB document as a string.
''' """
def __init__(self, log=None): def __init__(self, log=None):
self.log = default_log if log is None else log self.log = default_log if log is None else log
@@ -55,16 +53,18 @@ class OEB2HTML(object):
return self.mlize_spine(oeb_book) return self.mlize_spine(oeb_book)
def mlize_spine(self, oeb_book): def mlize_spine(self, oeb_book):
output = [ output = ['<html><head><meta http-equiv="Content-Type" '
u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % ( 'content="text/html;charset=utf-8" />'
entities.prepare_string_for_xml(self.book_title)) '<title>%s</title></head>'
] '<body>' % entities.prepare_string_for_xml(self.book_title)]
for item in oeb_book.spine: for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href) self.log.debug('Converting %s to HTML...' % item.href)
self.rewrite_ids(item.data, item) self.rewrite_ids(item.data, item)
base.rewrite_links(item.data, partial(self.rewrite_link, page=item)) base.rewrite_links(item.data, partial(self.rewrite_link,
page=item))
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item) output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
stylizer, item)
output.append('\n\n') output.append('\n\n')
output.append('</body></html>') output.append('</body></html>')
return ''.join(output) return ''.join(output)
@@ -126,13 +126,14 @@ class OEB2HTML(object):
el.attrib['id'] = self.get_link_id(page.href)[1:] el.attrib['id'] = self.get_link_id(page.href)[1:]
continue continue
if 'id' in el.attrib: if 'id' in el.attrib:
el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:] el.attrib['id'] = self.get_link_id(page.href,
el.attrib['id'])[1:]
def get_css(self, oeb_book): def get_css(self, oeb_book):
css = b'' css = b''
for item in oeb_book.manifest: for item in oeb_book.manifest:
if item.media_type == 'text/css': if item.media_type == 'text/css':
css += as_bytes(item.data.cssText) + b'\n\n' css += polyglot.as_bytes(item.data.cssText) + b'\n\n'
return css return css
def prepare_string_for_html(self, raw): def prepare_string_for_html(self, raw):
@@ -157,10 +158,14 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
# We can only processes tags. If there isn't a tag return any text. # We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, (str, bytes)) \ if not isinstance(elem.tag, (str, bytes)) \
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS): or parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
const.SVG_NS):
p = elem.getparent() p = elem.getparent()
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \ if (p is not None and
and elem.tail: isinstance(p.tag, (str, bytes)) and
parse_utils.namespace(p.tag) in (const.XHTML_NS,
const.SVG_NS) and
elem.tail):
return [elem.tail] return [elem.tail]
return [''] return ['']
@@ -176,8 +181,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
tags.append(tag) tags.append(tag)
# Ignore anything that is set to not be displayed. # Ignore anything that is set to not be displayed.
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ if (style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or
or style['visibility'] == 'hidden': style['visibility'] == 'hidden'):
return [''] return ['']
# Remove attributes we won't want. # Remove attributes we won't want.
@@ -186,11 +191,13 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
if 'style' in attribs: if 'style' in attribs:
del attribs['style'] del attribs['style']
# Turn the rest of the attributes into a string we can write with the tag. # Turn the rest of the attributes into a string we can write with the
# tag.
at = '' at = ''
for k, v in attribs.items(): for key, value in attribs.items():
at += ' %s="%s"' % (k, entities at += (' %s="%s"' %
.prepare_string_for_xml(v, attribute=True)) (key, entities.prepare_string_for_xml(value,
attribute=True)))
# Write the tag. # Write the tag.
text.append('<%s%s' % (tag, at)) text.append('<%s%s' % (tag, at))
@@ -246,11 +253,15 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
''' '''
# We can only processes tags. If there isn't a tag return any text. # We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, (str, bytes)) \ if (not isinstance(elem.tag, (str, bytes)) or
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS): parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
const.SVG_NS)):
p = elem.getparent() p = elem.getparent()
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \ if (p is not None and
and elem.tail: isinstance(p.tag, (str, bytes)) and
parse_utils.namespace(p.tag) in (const.XHTML_NS,
const.SVG_NS) and
elem.tail):
return [elem.tail] return [elem.tail]
return [''] return ['']
@@ -266,9 +277,11 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
if tag == 'body': if tag == 'body':
# Change the body to a div so we can merge multiple files. # Change the body to a div so we can merge multiple files.
tag = 'div' tag = 'div'
# Add page-break-brefore: always because renders typically treat a new file (we're merging files) # Add page-break-brefore: always because renders typically treat
# as a page break and remove all other page break types that might be set. # a new file (we're merging files) as a page break and remove all
style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a) # other page break types that might be set.
style_a = ('page-break-before: always; %s' %
re.sub('page-break-[^:]+:[^;]+;?', '', style_a))
# Remove unnecessary spaces. # Remove unnecessary spaces.
style_a = re.sub(r'\s{2,}', ' ', style_a).strip() style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
tags.append(tag) tags.append(tag)
@@ -279,7 +292,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
if 'style' in attribs: if 'style' in attribs:
del attribs['style'] del attribs['style']
# Turn the rest of the attributes into a string we can write with the tag. # Turn the rest of the attributes into a string we can write with
# the tag.
at = '' at = ''
for k, v in attribs.items(): for k, v in attribs.items():
at += ' %s="%s"' % (k, entities at += ' %s="%s"' % (k, entities
@@ -319,43 +333,51 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
class OEB2HTMLClassCSSizer(OEB2HTML): class OEB2HTMLClassCSSizer(OEB2HTML):
''' """
Use CSS classes. css_style option can specify whether to use Use CSS classes. css_style option can specify whether to use inline
inline classes (style tag in the head) or reference an external classes (style tag in the head) or reference an external CSS file called
CSS file called style.css. style.css.
''' """
def mlize_spine(self, oeb_book): def mlize_spine(self, oeb_book):
output = [] output = []
for item in oeb_book.spine: for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href) self.log.debug('Converting %s to HTML...' % item.href)
self.rewrite_ids(item.data, item) self.rewrite_ids(item.data, item)
base.rewrite_links(item.data, partial(self.rewrite_link, page=item)) base.rewrite_links(item.data, partial(self.rewrite_link,
page=item))
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item) output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
stylizer, item)
output.append('\n\n') output.append('\n\n')
if self.opts.htmlz_class_style == 'external': if self.opts.htmlz_class_style == 'external':
css = u'<link href="style.css" rel="stylesheet" type="text/css" />' css = '<link href="style.css" rel="stylesheet" type="text/css" />'
else: else:
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>' css = ('<style type="text/css">' + self.get_css(oeb_book) +
title = (u'<title>%s</title>' % '</style>')
title = ('<title>%s</title>' %
entities.prepare_string_for_xml(self.book_title)) entities.prepare_string_for_xml(self.book_title))
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \ output = (['<html><head><meta http-equiv="Content-Type" '
[css] + [title, u'</head><body>'] + output + [u'</body></html>'] 'content="text/html;charset=utf-8" />'] + [css] +
[title, '</head><body>'] + output + ['</body></html>'])
return ''.join(output) return ''.join(output)
def dump_text(self, elem, stylizer, page): def dump_text(self, elem, stylizer, page):
''' """
@elem: The element in the etree that we are working on. @elem: The element in the etree that we are working on.
@stylizer: The style information attached to the element. @stylizer: The style information attached to the element.
''' """
# We can only processes tags. If there isn't a tag return any text. # We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, (str, bytes)) \ if (not isinstance(elem.tag, (str, bytes)) or
or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS): parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
const.SVG_NS)):
p = elem.getparent() p = elem.getparent()
if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \ if (p is not None and
and elem.tail: isinstance(p.tag, (str, bytes)) and
parse_utils.namespace(p.tag) in (const.XHTML_NS,
const.SVG_NS) and
elem.tail):
return [elem.tail] return [elem.tail]
return [''] return ['']
@@ -373,11 +395,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
if 'style' in attribs: if 'style' in attribs:
del attribs['style'] del attribs['style']
# Turn the rest of the attributes into a string we can write with the tag. # Turn the rest of the attributes into a string we can write with
# the tag.
at = '' at = ''
for k, v in attribs.items(): for k, v in attribs.items():
at += ' %s="%s"' % (k, at += ' %s="%s"' % (k, entities
entities.prepare_string_for_xml(v, attribute=True)) .prepare_string_for_xml(v, attribute=True))
# Write the tag. # Write the tag.
text.append('<%s%s' % (tag, at)) text.append('<%s%s' % (tag, at))

View File

@@ -5,7 +5,7 @@ import textwrap
from lxml import etree from lxml import etree
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
class Canvas(etree.XSLTExtension): class Canvas(etree.XSLTExtension):
@@ -292,7 +292,7 @@ class Styles(etree.XSLTExtension):
return '\n\t'.join(ans) return '\n\t'.join(ans)
with open(name, 'wb') as f: with open(name, 'wb') as f:
f.write(as_bytes(self.CSS)) f.write(polyglot.as_bytes(self.CSS))
for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles, for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
'bs')]: 'bs')]:
for i, s in enumerate(w): for i, s in enumerate(w):
@@ -300,7 +300,7 @@ class Styles(etree.XSLTExtension):
continue continue
rsel = '.%s%d'%(sel, i) rsel = '.%s%d'%(sel, i)
s = join(s) s = join(s)
f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n')) f.write(polyglot.as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
def execute(self, context, self_node, input_node, output_parent): def execute(self, context, self_node, input_node, output_parent):
if input_node.tag == 'TextStyle': if input_node.tag == 'TextStyle':

View File

@@ -9,7 +9,7 @@ import sys
import urllib.parse import urllib.parse
from ebook_converter.utils.config_base import tweaks from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
from ebook_converter.utils import encoding as uenc from ebook_converter.utils import encoding as uenc
@@ -248,9 +248,11 @@ class Resource(object):
pc = url[2] pc = url[2]
if isinstance(pc, str): if isinstance(pc, str):
pc = pc.encode('utf-8') pc = pc.encode('utf-8')
pc = unquote(pc).decode('utf-8') pc = polyglot.unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir,
self.fragment = unquote(url[-1]) pc.replace('/',
os.sep)))
self.fragment = polyglot.unquote(url[-1])
def href(self, basedir=None): def href(self, basedir=None):
''' '''

View File

@@ -14,7 +14,7 @@ from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.utils.imghdr import identify from ebook_converter.utils.imghdr import identify
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.polyglot.binary import as_base64_unicode from ebook_converter import polyglot
from ebook_converter.utils import encoding as uenc from ebook_converter.utils import encoding as uenc
@@ -389,7 +389,7 @@ def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):
def _encode_into_jpeg(data): def _encode_into_jpeg(data):
data = save_cover_data_to(data) data = save_cover_data_to(data)
return as_base64_unicode(data) return polyglot.as_base64_unicode(data)
def _set_cover(title_info, mi, ctx): def _set_cover(title_info, mi, ctx):

View File

@@ -30,11 +30,11 @@ from ebook_converter.ebooks.metadata.utils import parse_opf, \
from ebook_converter.ebooks.metadata import string_to_authors, \ from ebook_converter.ebooks.metadata import string_to_authors, \
MetaInformation, check_isbn MetaInformation, check_isbn
from ebook_converter.ebooks.metadata.book.base import Metadata from ebook_converter.ebooks.metadata.book.base import Metadata
from ebook_converter import polyglot
from ebook_converter.utils.date import parse_date, isoformat from ebook_converter.utils.date import parse_date, isoformat
from ebook_converter.utils.localization import get_lang, canonicalize_lang from ebook_converter.utils.localization import get_lang, canonicalize_lang
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.config_base import tweaks from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.urllib import unquote
pretty_print_opf = False pretty_print_opf = False
@@ -838,7 +838,7 @@ class OPF(object): # {{{
def unquote_urls(self): def unquote_urls(self):
def get_href(item): def get_href(item):
raw = unquote(item.get('href', '')) raw = polyglot.unquote(item.get('href', ''))
if not isinstance(raw, str): if not isinstance(raw, str):
raw = raw.decode('utf-8') raw = raw.decode('utf-8')
return raw return raw

View File

@@ -11,7 +11,7 @@ from lxml.builder import ElementMaker
from ebook_converter.constants_old import __appname__, __version__ from ebook_converter.constants_old import __appname__, __version__
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/" NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
@@ -31,7 +31,7 @@ def parse_html_toc(data):
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
sanitize_names=True) sanitize_names=True)
for a in root.xpath('//*[@href and local-name()="a"]'): for a in root.xpath('//*[@href and local-name()="a"]'):
purl = urllib.parse.urlparse(unquote(a.get('href'))) purl = urllib.parse.urlparse(polyglot.unquote(a.get('href')))
href, fragment = purl[2], purl[5] href, fragment = purl[2], purl[5]
if not fragment: if not fragment:
fragment = None fragment = None
@@ -149,7 +149,7 @@ class TOC(list):
if toc is not None: if toc is not None:
if toc.lower() not in ('ncx', 'ncxtoc'): if toc.lower() not in ('ncx', 'ncxtoc'):
toc = urllib.parse.urlparse(unquote(toc))[2] toc = urllib.parse.urlparse(polyglot.unquote(toc))[2]
toc = toc.replace('/', os.sep) toc = toc.replace('/', os.sep)
if not os.path.isabs(toc): if not os.path.isabs(toc):
toc = os.path.join(self.base_path, toc) toc = os.path.join(self.base_path, toc)
@@ -219,7 +219,8 @@ class TOC(list):
content = content[0] content = content[0]
# if get_attr(content, attr='src'): # if get_attr(content, attr='src'):
purl = urllib.parse.urlparse(content.get('src')) purl = urllib.parse.urlparse(content.get('src'))
href, fragment = unquote(purl[2]), unquote(purl[5]) href = polyglot.unquote(purl[2])
fragment = polyglot.unquote(purl[5])
nd = dest.add_item(href, fragment, text) nd = dest.add_item(href, fragment, text)
nd.play_order = play_order nd.play_order = play_order

View File

@@ -5,7 +5,7 @@ from io import BytesIO
from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.ebooks import normalize from ebook_converter.ebooks import normalize
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
from ebook_converter.tinycss.color3 import parse_color_string from ebook_converter.tinycss.color3 import parse_color_string
@@ -61,7 +61,7 @@ def decode_hex_number(raw, codec='utf-8'):
def encode_string(raw): def encode_string(raw):
ans = bytearray(as_bytes(raw)) ans = bytearray(polyglot.as_bytes(raw))
ans.insert(0, len(ans)) ans.insert(0, len(ans))
return bytes(ans) return bytes(ans)

View File

@@ -15,7 +15,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
from ebook_converter.utils import directory from ebook_converter.utils import directory
from ebook_converter.ebooks.oeb.base import _css_logger from ebook_converter.ebooks.oeb.base import _css_logger
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
class Extract(ODF2XHTML): class Extract(ODF2XHTML):
@@ -292,7 +292,7 @@ class Extract(ODF2XHTML):
except: except:
log.exception('Failed to filter CSS, conversion may be slow') log.exception('Failed to filter CSS, conversion may be slow')
with open('index.xhtml', 'wb') as f: with open('index.xhtml', 'wb') as f:
f.write(as_bytes(html)) f.write(polyglot.as_bytes(html))
zf = ZipFile(stream, 'r') zf = ZipFile(stream, 'r')
self.extract_pictures(zf) self.extract_pictures(zf)
opf = OPFCreator(os.path.abspath(os.getcwd()), mi) opf = OPFCreator(os.path.abspath(os.getcwd()), mi)

View File

@@ -24,7 +24,7 @@ from ebook_converter.utils.localization import get_lang
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants_old import __appname__, __version__ from ebook_converter.constants_old import __appname__, __version__
from ebook_converter.utils import entities from ebook_converter.utils import entities
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
class OEBReader(object): class OEBReader(object):
@@ -641,7 +641,7 @@ class OEBReader(object):
with TemporaryDirectory('_html_cover') as tdir: with TemporaryDirectory('_html_cover') as tdir:
writer = OEBWriter() writer = OEBWriter()
writer(self.oeb, tdir) writer(self.oeb, tdir)
path = os.path.join(tdir, unquote(hcover.href)) path = os.path.join(tdir, polyglot.unquote(hcover.href))
data = render_html_svg_workaround(path, self.logger) data = render_html_svg_workaround(path, self.logger)
if not data: if not data:
data = b'' data = b''

View File

@@ -5,7 +5,7 @@ import urllib.parse
from lxml import etree from lxml import etree
from ebook_converter.utils.imghdr import identify from ebook_converter.utils.imghdr import identify
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
class CoverManager(object): class CoverManager(object):
@@ -113,7 +113,7 @@ class CoverManager(object):
if href is not None: if href is not None:
templ = self.non_svg_template if self.no_svg_cover \ templ = self.non_svg_template if self.no_svg_cover \
else self.svg_template else self.svg_template
tp = templ % unquote(href) tp = templ % polyglot.unquote(href)
id, href = m.generate('titlepage', 'titlepage.xhtml') id, href = m.generate('titlepage', 'titlepage.xhtml')
item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0], item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0],
data=etree.fromstring(tp)) data=etree.fromstring(tp))

View File

@@ -3,8 +3,7 @@ import re
import urllib.parse import urllib.parse
from ebook_converter.ebooks.oeb.base import XPath from ebook_converter.ebooks.oeb.base import XPath
from ebook_converter.polyglot.binary import from_base64_bytes from ebook_converter import polyglot
from ebook_converter.polyglot.builtins import as_bytes
class DataURL(object): class DataURL(object):
@@ -27,14 +26,14 @@ class DataURL(object):
if ';base64' in header: if ';base64' in header:
data = re.sub(r'\s+', '', data) data = re.sub(r'\s+', '', data)
try: try:
data = from_base64_bytes(data) data = polyglot.from_base64_bytes(data)
except Exception: except Exception:
self.log.error('Found invalid base64 encoded data ' self.log.error('Found invalid base64 encoded data '
'URI, ignoring it') 'URI, ignoring it')
continue continue
else: else:
data = urllib.parse.unquote(data) data = urllib.parse.unquote(data)
data = as_bytes(data) data = polyglot.as_bytes(data)
fmt = what(None, data) fmt = what(None, data)
if not fmt: if not fmt:
self.log.warn('Image encoded as data URL has unknown ' self.log.warn('Image encoded as data URL has unknown '

View File

@@ -17,7 +17,7 @@ from ebook_converter import constants as const
from ebook_converter.ebooks.epub import rules from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish.split import do_split from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.urllib import unquote from ebook_converter import polyglot
from ebook_converter.css_selectors import Select, SelectorError from ebook_converter.css_selectors import Select, SelectorError
from ebook_converter.utils import encoding as uenc from ebook_converter.utils import encoding as uenc
@@ -189,7 +189,7 @@ class Split(object):
nhref = anchor_map[frag if frag else None] nhref = anchor_map[frag if frag else None]
nhref = self.current_item.relhref(nhref) nhref = self.current_item.relhref(nhref)
if frag: if frag:
nhref = '#'.join((unquote(nhref), frag)) nhref = '#'.join((polyglot.unquote(nhref), frag))
return nhref return nhref
return url return url

View File

@@ -1,20 +1,18 @@
import codecs, zlib, numbers import codecs
from io import BytesIO
from datetime import datetime from datetime import datetime
import io
import numbers
import zlib
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.binary import as_hex_bytes from ebook_converter import polyglot
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
pdf_float = lambda x: f"{x:.1f}" pdf_float = lambda x: f"{x:.1f}"
EOL = b'\n' EOL = b'\n'
# Sizes {{{ # Sizes
inch = 72.0 inch = 72.0
cm = inch / 2.54 cm = inch / 2.54
mm = cm * 0.1 mm = cm * 0.1
@@ -45,10 +43,9 @@ B2 = (_BW*2, _BH*2)
B1 = (_BH*4, _BW*2) B1 = (_BH*4, _BW*2)
B0 = (_BW*4, _BH*4) B0 = (_BW*4, _BH*4)
PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2' PAPER_SIZES = {k: globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 '
' b3 b4 b5 b6 letter legal').split()} 'b2 b3 b4 b5 b6 letter '
'legal').split()}
# }}}
def fmtnum(o): def fmtnum(o):
@@ -70,12 +67,12 @@ def serialize(o, stream):
elif o is None: elif o is None:
stream.write_raw(b'null') stream.write_raw(b'null')
elif isinstance(o, datetime): elif isinstance(o, datetime):
val = o.strftime("D:%Y%m%d%H%M%%02d%z")%min(59, o.second) val = o.strftime("D:%Y%m%d%H%M%%02d%z") % min(59, o.second)
if datetime.tzinfo is not None: if datetime.tzinfo is not None:
val = "(%s'%s')"%(val[:-2], val[-2:]) val = "(%s'%s')" % (val[:-2], val[-2:])
stream.write(val.encode('ascii')) stream.write(val.encode('ascii'))
else: else:
raise ValueError('Unknown object: %r'%o) raise ValueError('Unknown object: %r' % o)
class Name(str): class Name(str):
@@ -83,7 +80,7 @@ class Name(str):
def pdf_serialize(self, stream): def pdf_serialize(self, stream):
raw = self.encode('ascii') raw = self.encode('ascii')
if len(raw) > 126: if len(raw) > 126:
raise ValueError('Name too long: %r'%self) raise ValueError('Name too long: %r' % self)
raw = bytearray(raw) raw = bytearray(raw)
sharp = ord(b'#') sharp = ord(b'#')
buf = ( buf = (
@@ -96,7 +93,8 @@ def escape_pdf_string(bytestring):
indices = [] indices = []
bad = [] bad = []
ba = bytearray(bytestring) ba = bytearray(bytestring)
bad_map = {10:ord('n'), 13:ord('r'), 12:ord('f'), 8:ord('b'), 9:ord('\t'), 92:ord('\\')} bad_map = {10: ord('n'), 13: ord('r'), 12: ord('f'),
8: ord('b'), 9: ord('\t'), 92: ord('\\')}
for i, num in enumerate(ba): for i, num in enumerate(ba):
if num == 40: # ( if num == 40: # (
indices.append((i, 40)) indices.append((i, 40))
@@ -134,7 +132,7 @@ class UTF16String(str):
if False: if False:
# Disabled as the parentheses based strings give easier to debug # Disabled as the parentheses based strings give easier to debug
# PDF files # PDF files
stream.write(b'<' + as_hex_bytes(raw) + b'>') stream.write(b'<' + polyglot.as_hex_bytes(raw) + b'>')
else: else:
stream.write(b'('+escape_pdf_string(raw)+b')') stream.write(b'('+escape_pdf_string(raw)+b')')
@@ -143,9 +141,9 @@ class Dictionary(dict):
def pdf_serialize(self, stream): def pdf_serialize(self, stream):
stream.write(b'<<' + EOL) stream.write(b'<<' + EOL)
sorted_keys = sorted(self, sorted_keys = sorted(self, key=lambda x: ({'Type': '1',
key=lambda x:({'Type':'1', 'Subtype':'2'}.get( 'Subtype': '2'}
x, x)+x)) .get(x, x) + x))
for k in sorted_keys: for k in sorted_keys:
serialize(Name(k), stream) serialize(Name(k), stream)
stream.write(b' ') stream.write(b' ')
@@ -177,10 +175,10 @@ class Array(list):
stream.write(b']') stream.write(b']')
class Stream(BytesIO): class Stream(io.BytesIO):
def __init__(self, compress=False): def __init__(self, compress=False):
BytesIO.__init__(self) io.BytesIO.__init__(self)
self.compress = compress self.compress = compress
self.filters = Array() self.filters = Array()
@@ -213,7 +211,7 @@ class Stream(BytesIO):
raw.encode('ascii')) raw.encode('ascii'))
def write_raw(self, raw): def write_raw(self, raw):
BytesIO.write(self, raw) io.BytesIO.write(self, raw)
class Reference(object): class Reference(object):
@@ -222,11 +220,11 @@ class Reference(object):
self.num, self.obj = num, obj self.num, self.obj = num, obj
def pdf_serialize(self, stream): def pdf_serialize(self, stream):
raw = '%d 0 R'%self.num raw = '%d 0 R' % self.num
stream.write(raw.encode('ascii')) stream.write(raw.encode('ascii'))
def __repr__(self): def __repr__(self):
return '%d 0 R'%self.num return '%d 0 R' % self.num
def __str__(self): def __str__(self):
return repr(self) return repr(self)

View File

@@ -0,0 +1,59 @@
"""
Misc converting functions from polyglot module.
Most of the have something to do with converting between string and binary
"""
import base64
import binascii
import urllib
def as_base64_unicode(x, enc='utf-8'):
if isinstance(x, str):
x = x.encode(enc)
return base64.standard_b64encode(x).decode('ascii')
def from_base64_bytes(x):
if isinstance(x, str):
x = x.encode('ascii')
return base64.standard_b64decode(x)
def as_hex_bytes(x, enc='utf-8'):
if isinstance(x, str):
x = x.encode(enc)
return binascii.hexlify(x)
def from_hex_bytes(x):
if isinstance(x, str):
x = x.encode('ascii')
return binascii.unhexlify(x)
def as_bytes(x, encoding='utf-8'):
if isinstance(x, str):
return x.encode(encoding)
if isinstance(x, bytes):
return x
if isinstance(x, bytearray):
return bytes(x)
if isinstance(x, memoryview):
return x.tobytes()
return str(x).encode(encoding)
def unquote(x, encoding='utf-8', errors='replace'):
# TODO(gryf): this works like that: if x is a binary, convert it to
# string using encoding and make unquote. After that make it binary again.
# If x is string, just pass it to the unquote.
# This approach is mostly used within lxml etree strings, which suppose to
# be binary because of its inner representation. I'm wondering, if
# xml.etree could be used instead - to be checked.
binary = isinstance(x, bytes)
if binary:
x = x.decode(encoding, errors)
ans = urllib.parse.unquote(x, encoding, errors)
if binary:
ans = ans.encode(encoding, errors)
return ans

View File

@@ -1,26 +0,0 @@
from base64 import standard_b64decode, standard_b64encode
from binascii import hexlify, unhexlify
def as_base64_unicode(x, enc='utf-8'):
if isinstance(x, str):
x = x.encode(enc)
return standard_b64encode(x).decode('ascii')
def from_base64_bytes(x):
if isinstance(x, str):
x = x.encode('ascii')
return standard_b64decode(x)
def as_hex_bytes(x, enc='utf-8'):
if isinstance(x, str):
x = x.encode(enc)
return hexlify(x)
def from_hex_bytes(x):
if isinstance(x, str):
x = x.encode('ascii')
return unhexlify(x)

View File

@@ -1,10 +0,0 @@
def as_bytes(x, encoding='utf-8'):
if isinstance(x, str):
return x.encode(encoding)
if isinstance(x, bytes):
return x
if isinstance(x, bytearray):
return bytes(x)
if isinstance(x, memoryview):
return x.tobytes()
return str(x).encode(encoding)

View File

@@ -1,17 +0,0 @@
import urllib.parse
def unquote(x, encoding='utf-8', errors='replace'):
# TODO(gryf): this works like that: if x is a binary, convert it to
# string using encoding and make unquote. After that make it binary again.
# If x is string, just pass it to the unquote.
# This approach is mostly used within lxml etree strings, which suppose to
# be binary because of its inner representation. I'm wondering, if
# xml.etree could be used instead - to be checked.
binary = isinstance(x, bytes)
if binary:
x = x.decode(encoding, errors)
ans = urllib.parse.unquote(x, encoding, errors)
if binary:
ans = ans.encode(encoding, errors)
return ans

View File

@@ -8,6 +8,7 @@ import tempfile
from ebook_converter.constants_old import __version__, __appname__, \ from ebook_converter.constants_old import __version__, __appname__, \
filesystem_encoding filesystem_encoding
from ebook_converter import polyglot
def cleanup(path): def cleanup(path):
@@ -90,9 +91,8 @@ def base_dir():
td = os.environ.get('CALIBRE_WORKER_TEMP_DIR', None) td = os.environ.get('CALIBRE_WORKER_TEMP_DIR', None)
if td is not None: if td is not None:
from ebook_converter.utils.serialize import msgpack_loads from ebook_converter.utils.serialize import msgpack_loads
from ebook_converter.polyglot.binary import from_hex_bytes
try: try:
td = msgpack_loads(from_hex_bytes(td)) td = msgpack_loads(polyglot.from_hex_bytes(td))
except Exception: except Exception:
td = None td = None
if td and os.path.exists(td): if td and os.path.exists(td):

View File

@@ -11,7 +11,7 @@
import operator import operator
import re import re
from ebook_converter.polyglot.binary import from_hex_bytes from ebook_converter import polyglot
__all__ = ['decode'] # Everything else is implementation detail __all__ = ['decode'] # Everything else is implementation detail
@@ -94,7 +94,8 @@ def try_encoding(css_bytes, encoding, fallback=True):
def hex2re(hex_data): def hex2re(hex_data):
return re.escape(from_hex_bytes(hex_data.replace(' ', '').encode('ascii'))) return re.escape(polyglot.from_hex_bytes(hex_data.replace(' ', '')
.encode('ascii')))
class Slicer(object): class Slicer(object):

View File

@@ -2,7 +2,7 @@ import struct
from io import BytesIO from io import BytesIO
from collections import defaultdict from collections import defaultdict
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -38,7 +38,7 @@ def get_tables(raw):
def get_table(raw, name): def get_table(raw, name):
''' Get the raw table bytes for the specified table in the font ''' ''' Get the raw table bytes for the specified table in the font '''
name = as_bytes(name.lower()) name = polyglot.as_bytes(name.lower())
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw): for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
if table_tag.lower() == name: if table_tag.lower() == name:
return table, table_index, table_offset, table_checksum return table, table_index, table_offset, table_checksum

View File

@@ -1,3 +1,6 @@
from ebook_converter import polyglot
MSGPACK_MIME = 'application/x-msgpack' MSGPACK_MIME = 'application/x-msgpack'
CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9' CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
@@ -56,11 +59,11 @@ def json_dumps(data, **kw):
def decode_metadata(x, for_json): def decode_metadata(x, for_json):
from ebook_converter.polyglot.binary import from_base64_bytes
from ebook_converter.ebooks.metadata.book.serialize import metadata_from_dict from ebook_converter.ebooks.metadata.book.serialize import metadata_from_dict
obj = metadata_from_dict(x) obj = metadata_from_dict(x)
if for_json and obj.cover_data and obj.cover_data[1]: if for_json and obj.cover_data and obj.cover_data[1]:
obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1]) obj.cover_data = (obj.cover_data[0],
polyglot.from_base64_bytes(obj.cover_data[1]))
return obj return obj

View File

@@ -1,5 +1,9 @@
import os, sys, re import fcntl
import fcntl, termios, struct import os
import re
import struct
import sys
import termios
def fmt(code): def fmt(code):

View File

@@ -10,7 +10,7 @@ from tempfile import SpooledTemporaryFile
from ebook_converter.utils import filenames as fms from ebook_converter.utils import filenames as fms
from ebook_converter.constants_old import filesystem_encoding from ebook_converter.constants_old import filesystem_encoding
from ebook_converter.ebooks.chardet import detect from ebook_converter.ebooks.chardet import detect
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter import polyglot
try: try:
import zlib # We may need its compression method import zlib # We may need its compression method
@@ -330,7 +330,7 @@ class ZipInfo (object):
if os.sep != '/': if os.sep != '/':
os_sep, sep = os.sep, '/' os_sep, sep = os.sep, '/'
if isinstance(filename, bytes): if isinstance(filename, bytes):
os_sep, sep = as_bytes(os_sep), b'/' os_sep, sep = polyglot.as_bytes(os_sep), b'/'
if os_sep in filename: if os_sep in filename:
filename = filename.replace(os_sep, sep) filename = filename.replace(os_sep, sep)