Removed urlunquote

2026-02-24 19:25:47 +01:00 · 2020-07-11 16:24:16 +02:00
parent 5fb02998a1
commit c82c4e456c
7 changed files with 29 additions and 40 deletions
--- a/ebook_converter/ebooks/conversion/plugins/epub_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/epub_input.py
@@ -5,6 +5,7 @@ import re
 import posixpath
 import traceback
 import uuid
+import urllib.parse

 from lxml import etree

@@ -438,7 +439,8 @@ class EPUBInput(InputFormatPlugin):
                href, frag = elem.get('href').partition('#')[::2]
                link_path = (os.path
                             .relpath(os.path
-                                      .join(base_path, base.urlunquote(href)),
+                                      .join(base_path,
+                                            urllib.parse.unquote(href)),
                                      base_path))
                abs_href = base.urlnormalize(link_path)
                if abs_href == self.removed_cover:
--- a/ebook_converter/ebooks/conversion/plugins/epub_output.py
+++ b/ebook_converter/ebooks/conversion/plugins/epub_output.py
@@ -4,7 +4,6 @@ import shutil
 import urllib.parse
 import uuid

-from ebook_converter import constants as const
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.customize.conversion import OutputFormatPlugin
@@ -390,7 +389,7 @@ class EPUBOutput(OutputFormatPlugin):
            href = getattr(node, 'href', None)
            if hasattr(href, 'partition'):
                _base, _, frag = href.partition('#')
-                frag = base.urlunquote(frag)
+                frag = urllib.parse.unquote(frag)
                if frag and frag_pat.match(frag) is None:
                    self.log.warn(
                            'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag)
--- a/ebook_converter/ebooks/docx/writer/images.py
+++ b/ebook_converter/ebooks/docx/writer/images.py
@@ -1,21 +1,18 @@
+import collections
+import functools
 import os
 import posixpath
-from collections import namedtuple
-from functools import partial
+import urllib.parse

 from lxml import etree

 from ebook_converter import fit_image
-from ebook_converter.ebooks.oeb.base import urlunquote
 from ebook_converter.ebooks.docx.images import pt_to_emu
 from ebook_converter.utils.filenames import ascii_filename
 from ebook_converter.utils.imghdr import identify


-__license__ = 'GPL v3'
-__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
-
-Image = namedtuple('Image', 'rid fname width height fmt item')
+Image = collections.namedtuple('Image', 'rid fname width height fmt item')


 def as_num(x):
@@ -102,7 +99,7 @@ class ImagesManager(object):
        fake_margins = floating is None
        self.count += 1
        img = self.images[href]
-        name = urlunquote(posixpath.basename(href))
+        name = urllib.parse.unquote(posixpath.basename(href))
        width, height = style.img_size(img.width, img.height)
        scaled, width, height = fit_image(width, height, self.page_width, self.page_height)
        width, height = map(pt_to_emu, (width, height))
@@ -157,7 +154,7 @@ class ImagesManager(object):
        makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')

    def create_filename(self, href, fmt):
-        fname = ascii_filename(urlunquote(posixpath.basename(href)))
+        fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href)))
        fname = posixpath.splitext(fname)[0]
        fname = fname[:75].rstrip('.') or 'image'
        num = 0
@@ -171,7 +168,8 @@ class ImagesManager(object):

    def serialize(self, images_map):
        for img in self.images.values():
-            images_map['word/' + img.fname] = partial(self.get_data, img.item)
+            images_map['word/' + img.fname] = functools.partial(self.get_data,
+                                                                img.item)

    def get_data(self, item):
        try:
--- a/ebook_converter/ebooks/html/input.py
+++ b/ebook_converter/ebooks/html/input.py
@@ -7,7 +7,6 @@ import re
 import sys
 import urllib.parse

-from ebook_converter.ebooks.oeb.base import urlunquote
 from ebook_converter.ebooks.chardet import detect_xml_encoding
 from ebook_converter import unicode_path, replace_entities

@@ -23,7 +22,7 @@ class Link(object):
        isabs = False
        path = urllib.parse.urlunparse(('', '', path, url.params, url.query,
                                        ''))
-        path = urlunquote(path)
+        path = urllib.parse.unquote(path)
        if isabs or os.path.isabs(path):
            return path
        return os.path.abspath(os.path.join(base, path))
@@ -41,7 +40,7 @@ class Link(object):
        self.is_local = self.parsed_url.scheme in ('', 'file')
        self.is_internal = self.is_local and not bool(self.parsed_url.path)
        self.path = None
-        self.fragment = urlunquote(self.parsed_url.fragment)
+        self.fragment = urllib.parse.unquote(self.parsed_url.fragment)
        if self.is_local and not self.is_internal:
            self.path = self.url_to_local_path(self.parsed_url, base)

--- a/ebook_converter/ebooks/oeb/base.py
+++ b/ebook_converter/ebooks/oeb/base.py
@@ -24,7 +24,6 @@ from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
 from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.utils.cleantext import clean_xml_chars
 from ebook_converter.utils.short_uuid import uuid4
-from ebook_converter.polyglot.urllib import unquote as urlunquote


 def tag(tag_ns, name):
@@ -423,7 +422,7 @@ def urlnormalize(href):
        path, frag = urllib.parse.urldefrag(href)
        parts = ('', '', path, '', '', frag)
    parts = (part.replace('\\', '/') for part in parts)
-    parts = (urlunquote(part) for part in parts)
+    parts = (urllib.parse.unquote(part) for part in parts)
    parts = (urlquote(part) for part in parts)
    return urllib.parse.urlunparse(parts)

@@ -516,24 +515,15 @@ class DirContainer(object):
                    self.opfname = path
                    return

-    def _unquote(self, path):
-        # unquote must run on a bytestring and will return a bytestring
-        # If it runs on a unicode object, it returns a double encoded unicode
-        # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
-        # and the latter is correct
-        if isinstance(path, str):
-            path = path.encode('utf-8')
-        return urlunquote(path).decode('utf-8')
-
    def read(self, path):
        if path is None:
            path = self.opfname
-        path = os.path.join(self.rootdir, self._unquote(path))
+        path = os.path.join(self.rootdir, urllib.parse.unquote(path))
        with open(path, 'rb') as f:
            return f.read()

    def write(self, path, data):
-        path = os.path.join(self.rootdir, self._unquote(path))
+        path = os.path.join(self.rootdir, urllib.parse.unquote(path))
        dir = os.path.dirname(path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
@@ -544,7 +534,7 @@ class DirContainer(object):
        if not path:
            return False
        try:
-            path = os.path.join(self.rootdir, self._unquote(path))
+            path = os.path.join(self.rootdir, urllib.parse.unquote(path))
        except ValueError:  # Happens if path contains quoted special chars
            return False
        try:
@@ -913,7 +903,7 @@ class Manifest(object):

        def _parse_xhtml(self, data):
            orig_data = data
-            fname = urlunquote(self.href)
+            fname = urllib.parse.unquote(self.href)
            self.oeb.log.debug('Parsing', fname, '...')
            self.oeb.html_preprocessor.current_href = self.href
            try:
@@ -1212,7 +1202,7 @@ class Manifest(object):
                media_type = OEB_DOC_MIME
            elif media_type in OEB_STYLES:
                media_type = OEB_CSS_MIME
-            attrib = {'id': item.id, 'href': urlunquote(item.href),
+            attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href),
                      'media-type': media_type}
            if item.fallback:
                attrib['fallback'] = item.fallback
@@ -1227,7 +1217,7 @@ class Manifest(object):
                media_type = XHTML_MIME
            elif media_type in OEB_STYLES:
                media_type = CSS_MIME
-            attrib = {'id': item.id, 'href': urlunquote(item.href),
+            attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href),
                      'media-type': media_type}
            if item.fallback:
                attrib['fallback'] = item.fallback
@@ -1446,7 +1436,7 @@ class Guide(object):
    def to_opf1(self, parent=None):
        elem = element(parent, 'guide')
        for ref in self.refs.values():
-            attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
+            attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)}
            if ref.title:
                attrib['title'] = ref.title
            element(elem, 'reference', attrib=attrib)
@@ -1457,7 +1447,7 @@ class Guide(object):
            return
        elem = element(parent, tag('opf', 'guide'))
        for ref in self.refs.values():
-            attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
+            attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)}
            if ref.title:
                attrib['title'] = ref.title
            element(elem, tag('opf', 'reference'), attrib=attrib)
@@ -1594,7 +1584,7 @@ class TOC(object):
    def to_opf1(self, tour):
        for node in self.nodes:
            element(tour, 'site', attrib={
-                'title': node.title, 'href': urlunquote(node.href)})
+                'title': node.title, 'href': urllib.parse.unquote(node.href)})
            node.to_opf1(tour)
        return tour

--- a/ebook_converter/ebooks/oeb/polish/container.py
+++ b/ebook_converter/ebooks/oeb/polish/container.py
@@ -106,7 +106,7 @@ def href_to_name(href, root, base=None):
        return None
    if purl.scheme or not purl.path:
        return None
-    href = oeb_base.urlunquote(purl.path)
+    href = urllib.parse.unquote(purl.path)

    fullpath = os.path.join(base, *href.split('/'))
    return unicodedata.normalize('NFC', abspath_to_name(fullpath, root))
@@ -1171,7 +1171,7 @@ class EpubContainer(Container):
        )
        if not opf_files:
            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
-        opf_path = os.path.join(self.root, *(oeb_base.urlunquote(opf_files[0].get('full-path')).split('/')))
+        opf_path = os.path.join(self.root, *(urllib.parse.unquote(opf_files[0].get('full-path')).split('/')))
        if not exists(opf_path):
            raise InvalidEpub('OPF file does not exist at location pointed to'
                    ' by META-INF/container.xml')
--- a/ebook_converter/ebooks/oeb/transforms/data_url.py
+++ b/ebook_converter/ebooks/oeb/transforms/data_url.py
@@ -1,7 +1,8 @@
 import mimetypes
 import re
+import urllib.parse

-from ebook_converter.ebooks.oeb.base import XPath, urlunquote
+from ebook_converter.ebooks.oeb.base import XPath
 from ebook_converter.polyglot.binary import from_base64_bytes
 from ebook_converter.polyglot.builtins import as_bytes

@@ -32,7 +33,7 @@ class DataURL(object):
                                       'URI, ignoring it')
                        continue
                else:
-                    data = urlunquote(data)
+                    data = urllib.parse.unquote(data)
                data = as_bytes(data)
                fmt = what(None, data)
                if not fmt: