diff --git a/ebook_converter/ebooks/conversion/plugins/epub_input.py b/ebook_converter/ebooks/conversion/plugins/epub_input.py index 9579c7b..1ef7ef7 100644 --- a/ebook_converter/ebooks/conversion/plugins/epub_input.py +++ b/ebook_converter/ebooks/conversion/plugins/epub_input.py @@ -5,6 +5,7 @@ import re import posixpath import traceback import uuid +import urllib.parse from lxml import etree @@ -438,7 +439,8 @@ class EPUBInput(InputFormatPlugin): href, frag = elem.get('href').partition('#')[::2] link_path = (os.path .relpath(os.path - .join(base_path, base.urlunquote(href)), + .join(base_path, + urllib.parse.unquote(href)), base_path)) abs_href = base.urlnormalize(link_path) if abs_href == self.removed_cover: diff --git a/ebook_converter/ebooks/conversion/plugins/epub_output.py b/ebook_converter/ebooks/conversion/plugins/epub_output.py index df7c917..aea5f67 100644 --- a/ebook_converter/ebooks/conversion/plugins/epub_output.py +++ b/ebook_converter/ebooks/conversion/plugins/epub_output.py @@ -4,7 +4,6 @@ import shutil import urllib.parse import uuid -from ebook_converter import constants as const from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb import parse_utils from ebook_converter.customize.conversion import OutputFormatPlugin @@ -390,7 +389,7 @@ class EPUBOutput(OutputFormatPlugin): href = getattr(node, 'href', None) if hasattr(href, 'partition'): _base, _, frag = href.partition('#') - frag = base.urlunquote(frag) + frag = urllib.parse.unquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn( 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag) diff --git a/ebook_converter/ebooks/docx/writer/images.py b/ebook_converter/ebooks/docx/writer/images.py index 311fd80..daaff85 100644 --- a/ebook_converter/ebooks/docx/writer/images.py +++ b/ebook_converter/ebooks/docx/writer/images.py @@ -1,21 +1,18 @@ +import collections +import functools import os import posixpath -from collections import namedtuple -from functools import partial +import urllib.parse from lxml import etree from ebook_converter import fit_image -from ebook_converter.ebooks.oeb.base import urlunquote from ebook_converter.ebooks.docx.images import pt_to_emu from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.imghdr import identify -__license__ = 'GPL v3' -__copyright__ = '2015, Kovid Goyal ' - -Image = namedtuple('Image', 'rid fname width height fmt item') +Image = collections.namedtuple('Image', 'rid fname width height fmt item') def as_num(x): @@ -102,7 +99,7 @@ class ImagesManager(object): fake_margins = floating is None self.count += 1 img = self.images[href] - name = urlunquote(posixpath.basename(href)) + name = urllib.parse.unquote(posixpath.basename(href)) width, height = style.img_size(img.width, img.height) scaled, width, height = fit_image(width, height, self.page_width, self.page_height) width, height = map(pt_to_emu, (width, height)) @@ -157,7 +154,7 @@ class ImagesManager(object): makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') def create_filename(self, href, fmt): - fname = ascii_filename(urlunquote(posixpath.basename(href))) + fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href))) fname = posixpath.splitext(fname)[0] fname = fname[:75].rstrip('.') or 'image' num = 0 @@ -171,7 +168,8 @@ class ImagesManager(object): def serialize(self, images_map): for img in self.images.values(): - images_map['word/' + img.fname] = partial(self.get_data, img.item) + images_map['word/' + img.fname] = functools.partial(self.get_data, + img.item) def get_data(self, item): try: diff --git a/ebook_converter/ebooks/html/input.py b/ebook_converter/ebooks/html/input.py index 6baabd4..7c4562c 100644 --- a/ebook_converter/ebooks/html/input.py +++ b/ebook_converter/ebooks/html/input.py @@ -7,7 +7,6 @@ import re import sys import urllib.parse -from ebook_converter.ebooks.oeb.base import urlunquote from ebook_converter.ebooks.chardet import detect_xml_encoding from ebook_converter import unicode_path, replace_entities @@ -23,7 +22,7 @@ class Link(object): isabs = False path = urllib.parse.urlunparse(('', '', path, url.params, url.query, '')) - path = urlunquote(path) + path = urllib.parse.unquote(path) if isabs or os.path.isabs(path): return path return os.path.abspath(os.path.join(base, path)) @@ -41,7 +40,7 @@ class Link(object): self.is_local = self.parsed_url.scheme in ('', 'file') self.is_internal = self.is_local and not bool(self.parsed_url.path) self.path = None - self.fragment = urlunquote(self.parsed_url.fragment) + self.fragment = urllib.parse.unquote(self.parsed_url.fragment) if self.is_local and not self.is_internal: self.path = self.url_to_local_path(self.parsed_url, base) diff --git a/ebook_converter/ebooks/oeb/base.py b/ebook_converter/ebooks/oeb/base.py index dce8242..121897b 100644 --- a/ebook_converter/ebooks/oeb/base.py +++ b/ebook_converter/ebooks/oeb/base.py @@ -24,7 +24,6 @@ from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor from ebook_converter.ebooks.oeb import parse_utils from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.short_uuid import uuid4 -from ebook_converter.polyglot.urllib import unquote as urlunquote def tag(tag_ns, name): @@ -423,7 +422,7 @@ def urlnormalize(href): path, frag = urllib.parse.urldefrag(href) parts = ('', '', path, '', '', frag) parts = (part.replace('\\', '/') for part in parts) - parts = (urlunquote(part) for part in parts) + parts = (urllib.parse.unquote(part) for part in parts) parts = (urlquote(part) for part in parts) return urllib.parse.urlunparse(parts) @@ -516,24 +515,15 @@ class DirContainer(object): self.opfname = path return - def _unquote(self, path): - # unquote must run on a bytestring and will return a bytestring - # If it runs on a unicode object, it returns a double encoded unicode - # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8') - # and the latter is correct - if isinstance(path, str): - path = path.encode('utf-8') - return urlunquote(path).decode('utf-8') - def read(self, path): if path is None: path = self.opfname - path = os.path.join(self.rootdir, self._unquote(path)) + path = os.path.join(self.rootdir, urllib.parse.unquote(path)) with open(path, 'rb') as f: return f.read() def write(self, path, data): - path = os.path.join(self.rootdir, self._unquote(path)) + path = os.path.join(self.rootdir, urllib.parse.unquote(path)) dir = os.path.dirname(path) if not os.path.isdir(dir): os.makedirs(dir) @@ -544,7 +534,7 @@ class DirContainer(object): if not path: return False try: - path = os.path.join(self.rootdir, self._unquote(path)) + path = os.path.join(self.rootdir, urllib.parse.unquote(path)) except ValueError: # Happens if path contains quoted special chars return False try: @@ -913,7 +903,7 @@ class Manifest(object): def _parse_xhtml(self, data): orig_data = data - fname = urlunquote(self.href) + fname = urllib.parse.unquote(self.href) self.oeb.log.debug('Parsing', fname, '...') self.oeb.html_preprocessor.current_href = self.href try: @@ -1212,7 +1202,7 @@ class Manifest(object): media_type = OEB_DOC_MIME elif media_type in OEB_STYLES: media_type = OEB_CSS_MIME - attrib = {'id': item.id, 'href': urlunquote(item.href), + attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href), 'media-type': media_type} if item.fallback: attrib['fallback'] = item.fallback @@ -1227,7 +1217,7 @@ class Manifest(object): media_type = XHTML_MIME elif media_type in OEB_STYLES: media_type = CSS_MIME - attrib = {'id': item.id, 'href': urlunquote(item.href), + attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href), 'media-type': media_type} if item.fallback: attrib['fallback'] = item.fallback @@ -1446,7 +1436,7 @@ class Guide(object): def to_opf1(self, parent=None): elem = element(parent, 'guide') for ref in self.refs.values(): - attrib = {'type': ref.type, 'href': urlunquote(ref.href)} + attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)} if ref.title: attrib['title'] = ref.title element(elem, 'reference', attrib=attrib) @@ -1457,7 +1447,7 @@ class Guide(object): return elem = element(parent, tag('opf', 'guide')) for ref in self.refs.values(): - attrib = {'type': ref.type, 'href': urlunquote(ref.href)} + attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)} if ref.title: attrib['title'] = ref.title element(elem, tag('opf', 'reference'), attrib=attrib) @@ -1594,7 +1584,7 @@ class TOC(object): def to_opf1(self, tour): for node in self.nodes: element(tour, 'site', attrib={ - 'title': node.title, 'href': urlunquote(node.href)}) + 'title': node.title, 'href': urllib.parse.unquote(node.href)}) node.to_opf1(tour) return tour diff --git a/ebook_converter/ebooks/oeb/polish/container.py b/ebook_converter/ebooks/oeb/polish/container.py index 1119f15..e6f2142 100644 --- a/ebook_converter/ebooks/oeb/polish/container.py +++ b/ebook_converter/ebooks/oeb/polish/container.py @@ -106,7 +106,7 @@ def href_to_name(href, root, base=None): return None if purl.scheme or not purl.path: return None - href = oeb_base.urlunquote(purl.path) + href = urllib.parse.unquote(purl.path) fullpath = os.path.join(base, *href.split('/')) return unicodedata.normalize('NFC', abspath_to_name(fullpath, root)) @@ -1171,7 +1171,7 @@ class EpubContainer(Container): ) if not opf_files: raise InvalidEpub('META-INF/container.xml contains no link to OPF file') - opf_path = os.path.join(self.root, *(oeb_base.urlunquote(opf_files[0].get('full-path')).split('/'))) + opf_path = os.path.join(self.root, *(urllib.parse.unquote(opf_files[0].get('full-path')).split('/'))) if not exists(opf_path): raise InvalidEpub('OPF file does not exist at location pointed to' ' by META-INF/container.xml') diff --git a/ebook_converter/ebooks/oeb/transforms/data_url.py b/ebook_converter/ebooks/oeb/transforms/data_url.py index 30b9697..1e0aff1 100644 --- a/ebook_converter/ebooks/oeb/transforms/data_url.py +++ b/ebook_converter/ebooks/oeb/transforms/data_url.py @@ -1,7 +1,8 @@ import mimetypes import re +import urllib.parse -from ebook_converter.ebooks.oeb.base import XPath, urlunquote +from ebook_converter.ebooks.oeb.base import XPath from ebook_converter.polyglot.binary import from_base64_bytes from ebook_converter.polyglot.builtins import as_bytes @@ -32,7 +33,7 @@ class DataURL(object): 'URI, ignoring it') continue else: - data = urlunquote(data) + data = urllib.parse.unquote(data) data = as_bytes(data) fmt = what(None, data) if not fmt: