mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-26 15:25:45 +01:00
Removed urlunquote
This commit is contained in:
@@ -5,6 +5,7 @@ import re
|
|||||||
import posixpath
|
import posixpath
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@@ -438,7 +439,8 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
href, frag = elem.get('href').partition('#')[::2]
|
href, frag = elem.get('href').partition('#')[::2]
|
||||||
link_path = (os.path
|
link_path = (os.path
|
||||||
.relpath(os.path
|
.relpath(os.path
|
||||||
.join(base_path, base.urlunquote(href)),
|
.join(base_path,
|
||||||
|
urllib.parse.unquote(href)),
|
||||||
base_path))
|
base_path))
|
||||||
abs_href = base.urlnormalize(link_path)
|
abs_href = base.urlnormalize(link_path)
|
||||||
if abs_href == self.removed_cover:
|
if abs_href == self.removed_cover:
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import shutil
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.customize.conversion import OutputFormatPlugin
|
from ebook_converter.customize.conversion import OutputFormatPlugin
|
||||||
@@ -390,7 +389,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
href = getattr(node, 'href', None)
|
href = getattr(node, 'href', None)
|
||||||
if hasattr(href, 'partition'):
|
if hasattr(href, 'partition'):
|
||||||
_base, _, frag = href.partition('#')
|
_base, _, frag = href.partition('#')
|
||||||
frag = base.urlunquote(frag)
|
frag = urllib.parse.unquote(frag)
|
||||||
if frag and frag_pat.match(frag) is None:
|
if frag and frag_pat.match(frag) is None:
|
||||||
self.log.warn(
|
self.log.warn(
|
||||||
'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag)
|
'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag)
|
||||||
|
|||||||
@@ -1,21 +1,18 @@
|
|||||||
|
import collections
|
||||||
|
import functools
|
||||||
import os
|
import os
|
||||||
import posixpath
|
import posixpath
|
||||||
from collections import namedtuple
|
import urllib.parse
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter import fit_image
|
from ebook_converter import fit_image
|
||||||
from ebook_converter.ebooks.oeb.base import urlunquote
|
|
||||||
from ebook_converter.ebooks.docx.images import pt_to_emu
|
from ebook_converter.ebooks.docx.images import pt_to_emu
|
||||||
from ebook_converter.utils.filenames import ascii_filename
|
from ebook_converter.utils.filenames import ascii_filename
|
||||||
from ebook_converter.utils.imghdr import identify
|
from ebook_converter.utils.imghdr import identify
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
Image = collections.namedtuple('Image', 'rid fname width height fmt item')
|
||||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
|
|
||||||
Image = namedtuple('Image', 'rid fname width height fmt item')
|
|
||||||
|
|
||||||
|
|
||||||
def as_num(x):
|
def as_num(x):
|
||||||
@@ -102,7 +99,7 @@ class ImagesManager(object):
|
|||||||
fake_margins = floating is None
|
fake_margins = floating is None
|
||||||
self.count += 1
|
self.count += 1
|
||||||
img = self.images[href]
|
img = self.images[href]
|
||||||
name = urlunquote(posixpath.basename(href))
|
name = urllib.parse.unquote(posixpath.basename(href))
|
||||||
width, height = style.img_size(img.width, img.height)
|
width, height = style.img_size(img.width, img.height)
|
||||||
scaled, width, height = fit_image(width, height, self.page_width, self.page_height)
|
scaled, width, height = fit_image(width, height, self.page_width, self.page_height)
|
||||||
width, height = map(pt_to_emu, (width, height))
|
width, height = map(pt_to_emu, (width, height))
|
||||||
@@ -157,7 +154,7 @@ class ImagesManager(object):
|
|||||||
makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')
|
makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')
|
||||||
|
|
||||||
def create_filename(self, href, fmt):
|
def create_filename(self, href, fmt):
|
||||||
fname = ascii_filename(urlunquote(posixpath.basename(href)))
|
fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href)))
|
||||||
fname = posixpath.splitext(fname)[0]
|
fname = posixpath.splitext(fname)[0]
|
||||||
fname = fname[:75].rstrip('.') or 'image'
|
fname = fname[:75].rstrip('.') or 'image'
|
||||||
num = 0
|
num = 0
|
||||||
@@ -171,7 +168,8 @@ class ImagesManager(object):
|
|||||||
|
|
||||||
def serialize(self, images_map):
|
def serialize(self, images_map):
|
||||||
for img in self.images.values():
|
for img in self.images.values():
|
||||||
images_map['word/' + img.fname] = partial(self.get_data, img.item)
|
images_map['word/' + img.fname] = functools.partial(self.get_data,
|
||||||
|
img.item)
|
||||||
|
|
||||||
def get_data(self, item):
|
def get_data(self, item):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import urlunquote
|
|
||||||
from ebook_converter.ebooks.chardet import detect_xml_encoding
|
from ebook_converter.ebooks.chardet import detect_xml_encoding
|
||||||
from ebook_converter import unicode_path, replace_entities
|
from ebook_converter import unicode_path, replace_entities
|
||||||
|
|
||||||
@@ -23,7 +22,7 @@ class Link(object):
|
|||||||
isabs = False
|
isabs = False
|
||||||
path = urllib.parse.urlunparse(('', '', path, url.params, url.query,
|
path = urllib.parse.urlunparse(('', '', path, url.params, url.query,
|
||||||
''))
|
''))
|
||||||
path = urlunquote(path)
|
path = urllib.parse.unquote(path)
|
||||||
if isabs or os.path.isabs(path):
|
if isabs or os.path.isabs(path):
|
||||||
return path
|
return path
|
||||||
return os.path.abspath(os.path.join(base, path))
|
return os.path.abspath(os.path.join(base, path))
|
||||||
@@ -41,7 +40,7 @@ class Link(object):
|
|||||||
self.is_local = self.parsed_url.scheme in ('', 'file')
|
self.is_local = self.parsed_url.scheme in ('', 'file')
|
||||||
self.is_internal = self.is_local and not bool(self.parsed_url.path)
|
self.is_internal = self.is_local and not bool(self.parsed_url.path)
|
||||||
self.path = None
|
self.path = None
|
||||||
self.fragment = urlunquote(self.parsed_url.fragment)
|
self.fragment = urllib.parse.unquote(self.parsed_url.fragment)
|
||||||
if self.is_local and not self.is_internal:
|
if self.is_local and not self.is_internal:
|
||||||
self.path = self.url_to_local_path(self.parsed_url, base)
|
self.path = self.url_to_local_path(self.parsed_url, base)
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
|
|||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
from ebook_converter.utils.short_uuid import uuid4
|
from ebook_converter.utils.short_uuid import uuid4
|
||||||
from ebook_converter.polyglot.urllib import unquote as urlunquote
|
|
||||||
|
|
||||||
|
|
||||||
def tag(tag_ns, name):
|
def tag(tag_ns, name):
|
||||||
@@ -423,7 +422,7 @@ def urlnormalize(href):
|
|||||||
path, frag = urllib.parse.urldefrag(href)
|
path, frag = urllib.parse.urldefrag(href)
|
||||||
parts = ('', '', path, '', '', frag)
|
parts = ('', '', path, '', '', frag)
|
||||||
parts = (part.replace('\\', '/') for part in parts)
|
parts = (part.replace('\\', '/') for part in parts)
|
||||||
parts = (urlunquote(part) for part in parts)
|
parts = (urllib.parse.unquote(part) for part in parts)
|
||||||
parts = (urlquote(part) for part in parts)
|
parts = (urlquote(part) for part in parts)
|
||||||
return urllib.parse.urlunparse(parts)
|
return urllib.parse.urlunparse(parts)
|
||||||
|
|
||||||
@@ -516,24 +515,15 @@ class DirContainer(object):
|
|||||||
self.opfname = path
|
self.opfname = path
|
||||||
return
|
return
|
||||||
|
|
||||||
def _unquote(self, path):
|
|
||||||
# unquote must run on a bytestring and will return a bytestring
|
|
||||||
# If it runs on a unicode object, it returns a double encoded unicode
|
|
||||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
|
||||||
# and the latter is correct
|
|
||||||
if isinstance(path, str):
|
|
||||||
path = path.encode('utf-8')
|
|
||||||
return urlunquote(path).decode('utf-8')
|
|
||||||
|
|
||||||
def read(self, path):
|
def read(self, path):
|
||||||
if path is None:
|
if path is None:
|
||||||
path = self.opfname
|
path = self.opfname
|
||||||
path = os.path.join(self.rootdir, self._unquote(path))
|
path = os.path.join(self.rootdir, urllib.parse.unquote(path))
|
||||||
with open(path, 'rb') as f:
|
with open(path, 'rb') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
def write(self, path, data):
|
def write(self, path, data):
|
||||||
path = os.path.join(self.rootdir, self._unquote(path))
|
path = os.path.join(self.rootdir, urllib.parse.unquote(path))
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
if not os.path.isdir(dir):
|
if not os.path.isdir(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
@@ -544,7 +534,7 @@ class DirContainer(object):
|
|||||||
if not path:
|
if not path:
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
path = os.path.join(self.rootdir, self._unquote(path))
|
path = os.path.join(self.rootdir, urllib.parse.unquote(path))
|
||||||
except ValueError: # Happens if path contains quoted special chars
|
except ValueError: # Happens if path contains quoted special chars
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
@@ -913,7 +903,7 @@ class Manifest(object):
|
|||||||
|
|
||||||
def _parse_xhtml(self, data):
|
def _parse_xhtml(self, data):
|
||||||
orig_data = data
|
orig_data = data
|
||||||
fname = urlunquote(self.href)
|
fname = urllib.parse.unquote(self.href)
|
||||||
self.oeb.log.debug('Parsing', fname, '...')
|
self.oeb.log.debug('Parsing', fname, '...')
|
||||||
self.oeb.html_preprocessor.current_href = self.href
|
self.oeb.html_preprocessor.current_href = self.href
|
||||||
try:
|
try:
|
||||||
@@ -1212,7 +1202,7 @@ class Manifest(object):
|
|||||||
media_type = OEB_DOC_MIME
|
media_type = OEB_DOC_MIME
|
||||||
elif media_type in OEB_STYLES:
|
elif media_type in OEB_STYLES:
|
||||||
media_type = OEB_CSS_MIME
|
media_type = OEB_CSS_MIME
|
||||||
attrib = {'id': item.id, 'href': urlunquote(item.href),
|
attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href),
|
||||||
'media-type': media_type}
|
'media-type': media_type}
|
||||||
if item.fallback:
|
if item.fallback:
|
||||||
attrib['fallback'] = item.fallback
|
attrib['fallback'] = item.fallback
|
||||||
@@ -1227,7 +1217,7 @@ class Manifest(object):
|
|||||||
media_type = XHTML_MIME
|
media_type = XHTML_MIME
|
||||||
elif media_type in OEB_STYLES:
|
elif media_type in OEB_STYLES:
|
||||||
media_type = CSS_MIME
|
media_type = CSS_MIME
|
||||||
attrib = {'id': item.id, 'href': urlunquote(item.href),
|
attrib = {'id': item.id, 'href': urllib.parse.unquote(item.href),
|
||||||
'media-type': media_type}
|
'media-type': media_type}
|
||||||
if item.fallback:
|
if item.fallback:
|
||||||
attrib['fallback'] = item.fallback
|
attrib['fallback'] = item.fallback
|
||||||
@@ -1446,7 +1436,7 @@ class Guide(object):
|
|||||||
def to_opf1(self, parent=None):
|
def to_opf1(self, parent=None):
|
||||||
elem = element(parent, 'guide')
|
elem = element(parent, 'guide')
|
||||||
for ref in self.refs.values():
|
for ref in self.refs.values():
|
||||||
attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
|
attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)}
|
||||||
if ref.title:
|
if ref.title:
|
||||||
attrib['title'] = ref.title
|
attrib['title'] = ref.title
|
||||||
element(elem, 'reference', attrib=attrib)
|
element(elem, 'reference', attrib=attrib)
|
||||||
@@ -1457,7 +1447,7 @@ class Guide(object):
|
|||||||
return
|
return
|
||||||
elem = element(parent, tag('opf', 'guide'))
|
elem = element(parent, tag('opf', 'guide'))
|
||||||
for ref in self.refs.values():
|
for ref in self.refs.values():
|
||||||
attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
|
attrib = {'type': ref.type, 'href': urllib.parse.unquote(ref.href)}
|
||||||
if ref.title:
|
if ref.title:
|
||||||
attrib['title'] = ref.title
|
attrib['title'] = ref.title
|
||||||
element(elem, tag('opf', 'reference'), attrib=attrib)
|
element(elem, tag('opf', 'reference'), attrib=attrib)
|
||||||
@@ -1594,7 +1584,7 @@ class TOC(object):
|
|||||||
def to_opf1(self, tour):
|
def to_opf1(self, tour):
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
element(tour, 'site', attrib={
|
element(tour, 'site', attrib={
|
||||||
'title': node.title, 'href': urlunquote(node.href)})
|
'title': node.title, 'href': urllib.parse.unquote(node.href)})
|
||||||
node.to_opf1(tour)
|
node.to_opf1(tour)
|
||||||
return tour
|
return tour
|
||||||
|
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ def href_to_name(href, root, base=None):
|
|||||||
return None
|
return None
|
||||||
if purl.scheme or not purl.path:
|
if purl.scheme or not purl.path:
|
||||||
return None
|
return None
|
||||||
href = oeb_base.urlunquote(purl.path)
|
href = urllib.parse.unquote(purl.path)
|
||||||
|
|
||||||
fullpath = os.path.join(base, *href.split('/'))
|
fullpath = os.path.join(base, *href.split('/'))
|
||||||
return unicodedata.normalize('NFC', abspath_to_name(fullpath, root))
|
return unicodedata.normalize('NFC', abspath_to_name(fullpath, root))
|
||||||
@@ -1171,7 +1171,7 @@ class EpubContainer(Container):
|
|||||||
)
|
)
|
||||||
if not opf_files:
|
if not opf_files:
|
||||||
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
|
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
|
||||||
opf_path = os.path.join(self.root, *(oeb_base.urlunquote(opf_files[0].get('full-path')).split('/')))
|
opf_path = os.path.join(self.root, *(urllib.parse.unquote(opf_files[0].get('full-path')).split('/')))
|
||||||
if not exists(opf_path):
|
if not exists(opf_path):
|
||||||
raise InvalidEpub('OPF file does not exist at location pointed to'
|
raise InvalidEpub('OPF file does not exist at location pointed to'
|
||||||
' by META-INF/container.xml')
|
' by META-INF/container.xml')
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import mimetypes
|
import mimetypes
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import XPath, urlunquote
|
from ebook_converter.ebooks.oeb.base import XPath
|
||||||
from ebook_converter.polyglot.binary import from_base64_bytes
|
from ebook_converter.polyglot.binary import from_base64_bytes
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter.polyglot.builtins import as_bytes
|
||||||
|
|
||||||
@@ -32,7 +33,7 @@ class DataURL(object):
|
|||||||
'URI, ignoring it')
|
'URI, ignoring it')
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
data = urlunquote(data)
|
data = urllib.parse.unquote(data)
|
||||||
data = as_bytes(data)
|
data = as_bytes(data)
|
||||||
fmt = what(None, data)
|
fmt = what(None, data)
|
||||||
if not fmt:
|
if not fmt:
|
||||||
|
|||||||
Reference in New Issue
Block a user