1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-26 15:25:45 +01:00

Removed urlunquote

This commit is contained in:
2020-07-11 16:24:16 +02:00
parent 5fb02998a1
commit c82c4e456c
7 changed files with 29 additions and 40 deletions

View File

@@ -5,6 +5,7 @@ import re
import posixpath import posixpath
import traceback import traceback
import uuid import uuid
import urllib.parse
from lxml import etree from lxml import etree
@@ -438,7 +439,8 @@ class EPUBInput(InputFormatPlugin):
href, frag = elem.get('href').partition('#')[::2] href, frag = elem.get('href').partition('#')[::2]
link_path = (os.path link_path = (os.path
.relpath(os.path .relpath(os.path
.join(base_path, base.urlunquote(href)), .join(base_path,
urllib.parse.unquote(href)),
base_path)) base_path))
abs_href = base.urlnormalize(link_path) abs_href = base.urlnormalize(link_path)
if abs_href == self.removed_cover: if abs_href == self.removed_cover:

View File

@@ -4,7 +4,6 @@ import shutil
import urllib.parse import urllib.parse
import uuid import uuid
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb import parse_utils from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.customize.conversion import OutputFormatPlugin from ebook_converter.customize.conversion import OutputFormatPlugin
@@ -390,7 +389,7 @@ class EPUBOutput(OutputFormatPlugin):
href = getattr(node, 'href', None) href = getattr(node, 'href', None)
if hasattr(href, 'partition'): if hasattr(href, 'partition'):
_base, _, frag = href.partition('#') _base, _, frag = href.partition('#')
frag = base.urlunquote(frag) frag = urllib.parse.unquote(frag)
if frag and frag_pat.match(frag) is None: if frag and frag_pat.match(frag) is None:
self.log.warn( self.log.warn(
'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag) 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag)

View File

@@ -1,21 +1,18 @@
import collections
import functools
import os import os
import posixpath import posixpath
from collections import namedtuple import urllib.parse
from functools import partial
from lxml import etree from lxml import etree
from ebook_converter import fit_image from ebook_converter import fit_image
from ebook_converter.ebooks.oeb.base import urlunquote
from ebook_converter.ebooks.docx.images import pt_to_emu from ebook_converter.ebooks.docx.images import pt_to_emu
from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.filenames import ascii_filename
from ebook_converter.utils.imghdr import identify from ebook_converter.utils.imghdr import identify
__license__ = 'GPL v3' Image = collections.namedtuple('Image', 'rid fname width height fmt item')
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
Image = namedtuple('Image', 'rid fname width height fmt item')
def as_num(x): def as_num(x):
@@ -102,7 +99,7 @@ class ImagesManager(object):
fake_margins = floating is None fake_margins = floating is None
self.count += 1 self.count += 1
img = self.images[href] img = self.images[href]
name = urlunquote(posixpath.basename(href)) name = urllib.parse.unquote(posixpath.basename(href))
width, height = style.img_size(img.width, img.height) width, height = style.img_size(img.width, img.height)
scaled, width, height = fit_image(width, height, self.page_width, self.page_height) scaled, width, height = fit_image(width, height, self.page_width, self.page_height)
width, height = map(pt_to_emu, (width, height)) width, height = map(pt_to_emu, (width, height))
@@ -157,7 +154,7 @@ class ImagesManager(object):
makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')
def create_filename(self, href, fmt): def create_filename(self, href, fmt):
fname = ascii_filename(urlunquote(posixpath.basename(href))) fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href)))
fname = posixpath.splitext(fname)[0] fname = posixpath.splitext(fname)[0]
fname = fname[:75].rstrip('.') or 'image' fname = fname[:75].rstrip('.') or 'image'
num = 0 num = 0
@@ -171,7 +168,8 @@ class ImagesManager(object):
def serialize(self, images_map): def serialize(self, images_map):
for img in self.images.values(): for img in self.images.values():
images_map['word/' + img.fname] = partial(self.get_data, img.item) images_map['word/' + img.fname] = functools.partial(self.get_data,
img.item)
def get_data(self, item): def get_data(self, item):
try: try:

View File

@@ -7,7 +7,6 @@ import re
import sys import sys
import urllib.parse import urllib.parse
from ebook_converter.ebooks.oeb.base import urlunquote
from ebook_converter.ebooks.chardet import detect_xml_encoding from ebook_converter.ebooks.chardet import detect_xml_encoding
from ebook_converter import unicode_path, replace_entities from ebook_converter import unicode_path, replace_entities
@@ -23,7 +22,7 @@ class Link(object):
isabs = False isabs = False
path = urllib.parse.urlunparse(('', '', path, url.params, url.query, path = urllib.parse.urlunparse(('', '', path, url.params, url.query,
'')) ''))
path = urlunquote(path) path = urllib.parse.unquote(path)
if isabs or os.path.isabs(path): if isabs or os.path.isabs(path):
return path return path
return os.path.abspath(os.path.join(base, path)) return os.path.abspath(os.path.join(base, path))
@@ -41,7 +40,7 @@ class Link(object):
self.is_local = self.parsed_url.scheme in ('', 'file') self.is_local = self.parsed_url.scheme in ('', 'file')
self.is_internal = self.is_local and not bool(self.parsed_url.path) self.is_internal = self.is_local and not bool(self.parsed_url.path)
self.path = None self.path = None
self.fragment = urlunquote(self.parsed_url.fragment) self.fragment = urllib.parse.unquote(self.parsed_url.fragment)
if self.is_local and not self.is_internal: if self.is_local and not self.is_internal:
self.path = self.url_to_local_path(self.parsed_url, base) self.path = self.url_to_local_path(self.parsed_url, base)

View File

@@ -24,7 +24,6 @@ from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
from ebook_converter.ebooks.oeb import parse_utils from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.short_uuid import uuid4 from ebook_converter.utils.short_uuid import uuid4
from ebook_converter.polyglot.urllib import unquote as urlunquote
def tag(tag_ns, name): def tag(tag_ns, name):
@@ -423,7 +422,7 @@ def urlnormalize(href):
path, frag = urllib.parse.urldefrag(href) path, frag = urllib.parse.urldefrag(href)
parts = ('', '', path, '', '', frag) parts = ('', '', path, '', '', frag)
parts = (part.replace('\\', '/') for part in parts) parts = (part.replace('\\', '/') for part in parts)
parts = (urlunquote(part) for part in parts) parts = (urllib.parse.unquote(part) for part in parts)
parts = (urlquote(part) for part in parts) parts = (urlquote(part) for part in parts)
return urllib.parse.urlunparse(parts) return urllib.parse.urlunparse(parts)
@@ -516,24 +515,15 @@ class DirContainer(object):
self.opfname = path self.opfname = path
return return
def _unquote(self, path):
# unquote must run on a bytestring and will return a bytestring
# If it runs on a unicode object, it returns a double encoded unicode
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
# and the latter is correct
if isinstance(path, str):
path = path.encode('utf-8')
return urlunquote(path).decode('utf-8')
def read(self, path): def read(self, path):
if path is None: if path is None:
path = self.opfname path = self.opfname
path = os.path.join(self.rootdir, self._unquote(path)) path = os.path.join(self.rootdir, urllib.parse.unquote(path))
with open(path, 'rb') as f: with open(path, 'rb') as f:
return f.read() return f.read()
def write(self, path, data): def write(self, path, data):
path = os.path.join(self.rootdir, self._unquote(path)) path = os.path.join(self.rootdir, urllib.parse.unquote(path))
dir = os.path.dirname(path) dir = os.path.dirname(path)
if not os.path.isdir(dir): if not os.path.isdir(dir):
os.makedirs(dir) os.makedirs(dir)
@@ -544,7 +534,7 @@ class DirContainer(object):
if not path: if not path:
return False return False
try: try:
path = os.path.join(self.rootdir, self._unquote(path)) path = os.path.join(self.rootdir, urllib.parse.unquote(path))
except ValueError: # Happens if path contains quoted special chars except ValueError: # Happens if path contains quoted special chars
return False return False
try: try:
@@ -913,7 +903,7 @@ class Manifest(object):
def _parse_xhtml(self, data): def _parse_xhtml(self, data):
orig_data = data orig_data = data
fname = urlunquote(self.href) fname = urllib.parse.unquote(self.href)
self.oeb.log.debug('Parsing', fname, '...') self.oeb.log.debug('Parsing', fname, '...')
self.oeb.html_preprocessor.current_href = self.href self.oeb.html_preprocessor.current_href = self.href
try: try:
@@ -1212,7 +1202,7 @@ class Manifest(object):
media_type = OEB_DOC_MIME media_type = OEB_DOC_MIME
elif media_type in OEB_STYLES: elif media_type in OEB_STYLES:
media_type = OEB_CSS_MIME media_type = OEB_CSS_MIME
attrib = {'id': item.id, 'href': urlunquote(item.href), attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href),
'media-type': media_type} 'media-type': media_type}
if item.fallback: if item.fallback:
attrib['fallback'] = item.fallback attrib['fallback'] = item.fallback
@@ -1227,7 +1217,7 @@ class Manifest(object):
media_type = XHTML_MIME media_type = XHTML_MIME
elif media_type in OEB_STYLES: elif media_type in OEB_STYLES:
media_type = CSS_MIME media_type = CSS_MIME
attrib = {'id': item.id, 'href': urlunquote(item.href), attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href),
'media-type': media_type} 'media-type': media_type}
if item.fallback: if item.fallback:
attrib['fallback'] = item.fallback attrib['fallback'] = item.fallback
@@ -1446,7 +1436,7 @@ class Guide(object):
def to_opf1(self, parent=None): def to_opf1(self, parent=None):
elem = element(parent, 'guide') elem = element(parent, 'guide')
for ref in self.refs.values(): for ref in self.refs.values():
attrib = {'type': ref.type, 'href': urlunquote(ref.href)} attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)}
if ref.title: if ref.title:
attrib['title'] = ref.title attrib['title'] = ref.title
element(elem, 'reference', attrib=attrib) element(elem, 'reference', attrib=attrib)
@@ -1457,7 +1447,7 @@ class Guide(object):
return return
elem = element(parent, tag('opf', 'guide')) elem = element(parent, tag('opf', 'guide'))
for ref in self.refs.values(): for ref in self.refs.values():
attrib = {'type': ref.type, 'href': urlunquote(ref.href)} attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)}
if ref.title: if ref.title:
attrib['title'] = ref.title attrib['title'] = ref.title
element(elem, tag('opf', 'reference'), attrib=attrib) element(elem, tag('opf', 'reference'), attrib=attrib)
@@ -1594,7 +1584,7 @@ class TOC(object):
def to_opf1(self, tour): def to_opf1(self, tour):
for node in self.nodes: for node in self.nodes:
element(tour, 'site', attrib={ element(tour, 'site', attrib={
'title': node.title, 'href': urlunquote(node.href)}) 'title': node.title, 'href': urllib.parse.unquote(node.href)})
node.to_opf1(tour) node.to_opf1(tour)
return tour return tour

View File

@@ -106,7 +106,7 @@ def href_to_name(href, root, base=None):
return None return None
if purl.scheme or not purl.path: if purl.scheme or not purl.path:
return None return None
href = oeb_base.urlunquote(purl.path) href = urllib.parse.unquote(purl.path)
fullpath = os.path.join(base, *href.split('/')) fullpath = os.path.join(base, *href.split('/'))
return unicodedata.normalize('NFC', abspath_to_name(fullpath, root)) return unicodedata.normalize('NFC', abspath_to_name(fullpath, root))
@@ -1171,7 +1171,7 @@ class EpubContainer(Container):
) )
if not opf_files: if not opf_files:
raise InvalidEpub('META-INF/container.xml contains no link to OPF file') raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
opf_path = os.path.join(self.root, *(oeb_base.urlunquote(opf_files[0].get('full-path')).split('/'))) opf_path = os.path.join(self.root, *(urllib.parse.unquote(opf_files[0].get('full-path')).split('/')))
if not exists(opf_path): if not exists(opf_path):
raise InvalidEpub('OPF file does not exist at location pointed to' raise InvalidEpub('OPF file does not exist at location pointed to'
' by META-INF/container.xml') ' by META-INF/container.xml')

View File

@@ -1,7 +1,8 @@
import mimetypes import mimetypes
import re import re
import urllib.parse
from ebook_converter.ebooks.oeb.base import XPath, urlunquote from ebook_converter.ebooks.oeb.base import XPath
from ebook_converter.polyglot.binary import from_base64_bytes from ebook_converter.polyglot.binary import from_base64_bytes
from ebook_converter.polyglot.builtins import as_bytes from ebook_converter.polyglot.builtins import as_bytes
@@ -32,7 +33,7 @@ class DataURL(object):
'URI, ignoring it') 'URI, ignoring it')
continue continue
else: else:
data = urlunquote(data) data = urllib.parse.unquote(data)
data = as_bytes(data) data = as_bytes(data)
fmt = what(None, data) fmt = what(None, data)
if not fmt: if not fmt: