mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-22 18:33:34 +01:00
Removing is_py3 method and duplicated by urllib.
This commit is contained in:
@@ -5,6 +5,7 @@ import os, re, logging, sys, numbers
|
||||
from collections import defaultdict
|
||||
from itertools import count
|
||||
from operator import attrgetter
|
||||
import urllib.parse
|
||||
|
||||
from lxml import etree, html
|
||||
from ebook_converter import force_unicode
|
||||
@@ -17,7 +18,7 @@ from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace
|
||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||
from ebook_converter.utils.short_uuid import uuid4
|
||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr
|
||||
from ebook_converter.polyglot.urllib import unquote as urlunquote, urldefrag, urljoin, urlparse, urlunparse
|
||||
from ebook_converter.polyglot.urllib import unquote as urlunquote
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -185,13 +186,13 @@ def iterlinks(root, find_links_in_css=True):
|
||||
if attrib in attribs:
|
||||
value = el.get(attrib)
|
||||
if codebase is not None:
|
||||
value = urljoin(codebase, value)
|
||||
value = urllib.parse.urljoin(codebase, value)
|
||||
yield (el, attrib, value, 0)
|
||||
if 'archive' in attribs:
|
||||
for match in _archive_re.finditer(el.get('archive')):
|
||||
value = match.group(0)
|
||||
if codebase is not None:
|
||||
value = urljoin(codebase, value)
|
||||
value = urllib.parse.urljoin(codebase, value)
|
||||
yield (el, 'archive', value, match.start())
|
||||
else:
|
||||
for attr in attribs:
|
||||
@@ -217,7 +218,7 @@ def make_links_absolute(root, base_url):
|
||||
came from)
|
||||
'''
|
||||
def link_repl(href):
|
||||
return urljoin(base_url, href)
|
||||
return urllib.parse.urljoin(base_url, href)
|
||||
rewrite_links(root, link_repl)
|
||||
|
||||
|
||||
@@ -463,16 +464,16 @@ def urlnormalize(href):
|
||||
characters URL quoted.
|
||||
"""
|
||||
try:
|
||||
parts = urlparse(href)
|
||||
parts = urllib.parse.urlparse(href)
|
||||
except ValueError as e:
|
||||
raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e)))
|
||||
if not parts.scheme or parts.scheme == 'file':
|
||||
path, frag = urldefrag(href)
|
||||
path, frag = urllib.parse.urldefrag(href)
|
||||
parts = ('', '', path, '', '', frag)
|
||||
parts = (part.replace('\\', '/') for part in parts)
|
||||
parts = (urlunquote(part) for part in parts)
|
||||
parts = (urlquote(part) for part in parts)
|
||||
return urlunparse(parts)
|
||||
return urllib.parse.urlunparse(parts)
|
||||
|
||||
|
||||
def extract(elem):
|
||||
@@ -1135,7 +1136,7 @@ class Manifest(object):
|
||||
relative to this manifest item to a book-absolute reference.
|
||||
"""
|
||||
try:
|
||||
purl = urlparse(href)
|
||||
purl = urllib.parse.urlparse(href)
|
||||
except ValueError:
|
||||
return href
|
||||
scheme = purl.scheme
|
||||
@@ -1143,8 +1144,8 @@ class Manifest(object):
|
||||
return href
|
||||
purl = list(purl)
|
||||
purl[0] = ''
|
||||
href = urlunparse(purl)
|
||||
path, frag = urldefrag(href)
|
||||
href = urllib.parse.urlunparse(purl)
|
||||
path, frag = urllib.parse.urldefrag(href)
|
||||
if not path:
|
||||
if frag:
|
||||
return '#'.join((self.href, frag))
|
||||
@@ -1423,7 +1424,7 @@ class Guide(object):
|
||||
@property
|
||||
def item(self):
|
||||
"""The manifest item associated with this reference."""
|
||||
path = urldefrag(self.href)[0]
|
||||
path = uurllib.parse.rldefrag(self.href)[0]
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
return hrefs.get(path, None)
|
||||
|
||||
@@ -1596,7 +1597,7 @@ class TOC(object):
|
||||
"""
|
||||
prev = None
|
||||
for node in list(self.nodes):
|
||||
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
|
||||
if prev and urllib.parse.urldefrag(prev.href)[0] == urllib.parse.urldefrag(node.href)[0]:
|
||||
self.nodes.remove(node)
|
||||
prev.nodes.append(node)
|
||||
else:
|
||||
@@ -1988,7 +1989,7 @@ class OEBBook(object):
|
||||
def rel_href(base_href, href):
|
||||
"""Convert the URL provided in :param:`href` to a URL relative to the URL
|
||||
in :param:`base_href` """
|
||||
if urlparse(href).scheme:
|
||||
if urllib.parse.urlparse(href).scheme:
|
||||
return href
|
||||
if '/' not in base_href:
|
||||
return href
|
||||
@@ -2004,7 +2005,7 @@ def rel_href(base_href, href):
|
||||
break
|
||||
if not base:
|
||||
return href
|
||||
target, frag = urldefrag(href)
|
||||
target, frag = urllib.parse.urldefrag(href)
|
||||
target = target.split('/')
|
||||
index = 0
|
||||
for index in range(min(len(base), len(target))):
|
||||
|
||||
@@ -11,6 +11,7 @@ import uuid
|
||||
from collections import defaultdict
|
||||
from io import BytesIO
|
||||
from itertools import count
|
||||
import urllib.parse
|
||||
|
||||
from css_parser import getUrls, replaceUrls
|
||||
|
||||
@@ -49,7 +50,6 @@ from ebook_converter.utils.logging import default_log
|
||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||
from ebook_converter.utils.zipfile import ZipFile
|
||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||
from ebook_converter.polyglot.urllib import urlparse
|
||||
|
||||
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
|
||||
|
||||
@@ -107,7 +107,7 @@ def name_to_href(name, root, base=None, quote=urlquote):
|
||||
def href_to_name(href, root, base=None):
|
||||
base = root if base is None else os.path.dirname(name_to_abspath(base, root))
|
||||
try:
|
||||
purl = urlparse(href)
|
||||
purl = urllib.parse.urlparse(href)
|
||||
except ValueError:
|
||||
return None
|
||||
if purl.scheme or not purl.path:
|
||||
|
||||
@@ -2,13 +2,13 @@ import codecs, shutil, os, posixpath
|
||||
from ebook_converter.polyglot.builtins import iteritems, itervalues
|
||||
from functools import partial
|
||||
from collections import Counter, defaultdict
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter import sanitize_file_name
|
||||
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
||||
from ebook_converter.ebooks.oeb.base import css_text
|
||||
from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
|
||||
from ebook_converter.ebooks.oeb.polish.utils import extract
|
||||
from ebook_converter.polyglot.urllib import urlparse, urlunparse
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -38,7 +38,7 @@ class LinkReplacer(object):
|
||||
nname = self.link_map.get(name, None)
|
||||
if not nname:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
href = self.container.name_to_href(nname, self.base)
|
||||
if purl.fragment:
|
||||
nfrag = self.frag_map(name, purl.fragment)
|
||||
@@ -68,12 +68,12 @@ class IdReplacer(object):
|
||||
id_map = self.id_map.get(name)
|
||||
if id_map is None:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
nfrag = id_map.get(purl.fragment)
|
||||
if nfrag is None:
|
||||
return url
|
||||
purl = purl._replace(fragment=nfrag)
|
||||
href = urlunparse(purl)
|
||||
href = urllib.parse.urlunparse(purl)
|
||||
if href != url:
|
||||
self.replaced = True
|
||||
return href
|
||||
@@ -89,7 +89,7 @@ class LinkRebaser(object):
|
||||
def __call__(self, url):
|
||||
if url and url.startswith('#'):
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
frag = purl.fragment
|
||||
name = self.container.href_to_name(url, self.old_name)
|
||||
if not name:
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import copy, os, re
|
||||
from ebook_converter.polyglot.builtins import string_or_bytes
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
|
||||
from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup
|
||||
from ebook_converter.ebooks.oeb.polish.toc import node_from_loc
|
||||
from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser
|
||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||
from ebook_converter.polyglot.urllib import urlparse
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -160,7 +160,7 @@ class SplitLinkReplacer(object):
|
||||
name = self.container.href_to_name(url, self.base)
|
||||
if name != self.top_name:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
if purl.fragment and purl.fragment in self.bottom_anchors:
|
||||
url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
|
||||
self.replaced = True
|
||||
@@ -225,7 +225,7 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
|
||||
else:
|
||||
fname = container.href_to_name(url, name)
|
||||
if fname == name:
|
||||
purl = urlparse(url)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
if purl.fragment in anchors_in_top:
|
||||
if r is root2:
|
||||
a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
|
||||
@@ -310,7 +310,7 @@ class MergeLinkReplacer(object):
|
||||
amap = self.anchor_map.get(name, None)
|
||||
if amap is None:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
frag = purl.fragment or ''
|
||||
frag = amap.get(frag, frag)
|
||||
url = self.container.name_to_href(self.master, self.base) + '#' + frag
|
||||
|
||||
@@ -3,6 +3,7 @@ from collections import Counter, OrderedDict
|
||||
from functools import partial
|
||||
from operator import itemgetter
|
||||
import pkg_resources
|
||||
import urllib.parse
|
||||
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
@@ -16,7 +17,6 @@ from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_langu
|
||||
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
|
||||
from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
|
||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||
from ebook_converter.polyglot.urllib import urlparse
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -150,7 +150,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name):
|
||||
href = content.get('src', None)
|
||||
if href:
|
||||
dest = container.href_to_name(href, base=ncx_name)
|
||||
frag = urlparse(href).fragment or None
|
||||
frag = urllib.parse.urlparse(href).fragment or None
|
||||
return parent.add(text or None, dest or None, frag or None)
|
||||
|
||||
|
||||
@@ -183,7 +183,7 @@ def parse_ncx(container, ncx_name):
|
||||
href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
|
||||
if href:
|
||||
dest = container.href_to_name(href[0], base=ncx_name)
|
||||
frag = urlparse(href[0]).fragment or None
|
||||
frag = urllib.parse.urlparse(href[0]).fragment or None
|
||||
toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
|
||||
return toc_root
|
||||
|
||||
@@ -195,7 +195,7 @@ def add_from_li(container, li, parent, nav_name):
|
||||
href = x.get('href')
|
||||
if href:
|
||||
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
|
||||
frag = urlparse(href).fragment or None
|
||||
frag = urllib.parse.urlparse(href).fragment or None
|
||||
break
|
||||
return parent.add(text or None, dest or None, frag or None)
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ Container-/OPF-based input OEBBook reader.
|
||||
"""
|
||||
import sys, os, uuid, copy, re, io
|
||||
from collections import defaultdict
|
||||
import urllib.parse
|
||||
|
||||
from lxml import etree
|
||||
|
||||
@@ -23,7 +24,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
|
||||
from ebook_converter.constants import __appname__, __version__
|
||||
from ebook_converter import guess_type, xml_replace_entities
|
||||
from ebook_converter.polyglot.builtins import unicode_type
|
||||
from ebook_converter.polyglot.urllib import unquote, urldefrag, urlparse
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
|
||||
|
||||
__all__ = ['OEBReader']
|
||||
@@ -203,12 +204,12 @@ class OEBReader(object):
|
||||
for href in hrefs:
|
||||
if isinstance(href, bytes):
|
||||
href = href.decode('utf-8')
|
||||
href, _ = urldefrag(href)
|
||||
href, _ = urllib.parse.urldefrag(href)
|
||||
if not href:
|
||||
continue
|
||||
try:
|
||||
href = item.abshref(urlnormalize(href))
|
||||
scheme = urlparse(href).scheme
|
||||
scheme = urllib.parse.urlparse(href).scheme
|
||||
except:
|
||||
self.oeb.log.exception(
|
||||
'Skipping invalid href: %r'%href)
|
||||
@@ -221,9 +222,9 @@ class OEBReader(object):
|
||||
except:
|
||||
urls = []
|
||||
for url in urls:
|
||||
href, _ = urldefrag(url)
|
||||
href, _ = urllib.parse.urldefrag(url)
|
||||
href = item.abshref(urlnormalize(href))
|
||||
scheme = urlparse(href).scheme
|
||||
scheme = urllib.parse.urlparse(href).scheme
|
||||
if not scheme and href not in known:
|
||||
new.add(href)
|
||||
unchecked.clear()
|
||||
@@ -294,7 +295,7 @@ class OEBReader(object):
|
||||
# TODO: handle fallback chains
|
||||
continue
|
||||
for href in selector(item.data):
|
||||
href, _ = urldefrag(href)
|
||||
href, _ = urllib.parse.urldefrag(href)
|
||||
if not href:
|
||||
continue
|
||||
try:
|
||||
@@ -350,7 +351,7 @@ class OEBReader(object):
|
||||
manifest = self.oeb.manifest
|
||||
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
||||
ref_href = elem.get('href')
|
||||
path = urlnormalize(urldefrag(ref_href)[0])
|
||||
path = urlnormalize(urllib.parse.urldefrag(ref_href)[0])
|
||||
if path not in manifest.hrefs:
|
||||
corrected_href = None
|
||||
for href in manifest.hrefs:
|
||||
@@ -393,7 +394,7 @@ class OEBReader(object):
|
||||
# This node is useless
|
||||
continue
|
||||
href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
|
||||
path, _ = urldefrag(href)
|
||||
path, _ = urllib.parse.urldefrag(href)
|
||||
if path and path not in self.oeb.manifest.hrefs:
|
||||
path = urlnormalize(path)
|
||||
if href and path not in self.oeb.manifest.hrefs:
|
||||
@@ -468,7 +469,7 @@ class OEBReader(object):
|
||||
href = site.get('href')
|
||||
if not title or not href:
|
||||
continue
|
||||
path, _ = urldefrag(urlnormalize(href))
|
||||
path, _ = urllib.parse.urldefrag(urlnormalize(href))
|
||||
if path not in self.oeb.manifest.hrefs:
|
||||
self.logger.warn('TOC reference %r not found' % href)
|
||||
continue
|
||||
@@ -480,7 +481,7 @@ class OEBReader(object):
|
||||
if 'toc' not in self.oeb.guide:
|
||||
return False
|
||||
self.log.debug('Reading TOC from HTML...')
|
||||
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
||||
itempath, frag = urllib.parse.urldefrag(self.oeb.guide['toc'].href)
|
||||
item = self.oeb.manifest.hrefs[itempath]
|
||||
html = item.data
|
||||
if frag:
|
||||
@@ -496,7 +497,7 @@ class OEBReader(object):
|
||||
for anchor in xpath(html, './/h:a[@href]'):
|
||||
href = anchor.attrib['href']
|
||||
href = item.abshref(urlnormalize(href))
|
||||
path, frag = urldefrag(href)
|
||||
path, frag = urllib.parse.urldefrag(href)
|
||||
if path not in self.oeb.manifest.hrefs:
|
||||
continue
|
||||
title = xml2text(anchor)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import textwrap
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter import guess_type
|
||||
from ebook_converter.utils.imghdr import identify
|
||||
@@ -93,7 +94,6 @@ class CoverManager(object):
|
||||
return -1, -1
|
||||
|
||||
def insert_cover(self):
|
||||
from ebook_converter.ebooks.oeb.base import urldefrag
|
||||
g, m = self.oeb.guide, self.oeb.manifest
|
||||
item = None
|
||||
href = None
|
||||
@@ -124,7 +124,7 @@ class CoverManager(object):
|
||||
data=safe_xml_fromstring(tp))
|
||||
else:
|
||||
item = self.oeb.manifest.hrefs[
|
||||
urldefrag(self.oeb.guide['titlepage'].href)[0]]
|
||||
urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]]
|
||||
if item is not None:
|
||||
self.oeb.spine.insert(0, item, True)
|
||||
if 'cover' not in self.oeb.guide.refs:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import posixpath
|
||||
import urllib.parse
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize
|
||||
from ebook_converter.polyglot.urllib import urldefrag, urlparse
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -38,7 +38,7 @@ class RenameFiles(object): # {{{
|
||||
if self.oeb.guide:
|
||||
for ref in self.oeb.guide.values():
|
||||
href = urlnormalize(ref.href)
|
||||
href, frag = urldefrag(href)
|
||||
href, frag = urllib.parse.urldefrag(href)
|
||||
replacement = self.rename_map.get(href, None)
|
||||
if replacement is not None:
|
||||
nhref = replacement
|
||||
@@ -52,7 +52,7 @@ class RenameFiles(object): # {{{
|
||||
def fix_toc_entry(self, toc):
|
||||
if toc.href:
|
||||
href = urlnormalize(toc.href)
|
||||
href, frag = urldefrag(href)
|
||||
href, frag = urllib.parse.urldefrag(href)
|
||||
replacement = self.rename_map.get(href, None)
|
||||
|
||||
if replacement is not None:
|
||||
@@ -66,11 +66,11 @@ class RenameFiles(object): # {{{
|
||||
|
||||
def url_replacer(self, orig_url):
|
||||
url = urlnormalize(orig_url)
|
||||
parts = urlparse(url)
|
||||
parts = urllib.parse.urlparse(url)
|
||||
if parts.scheme:
|
||||
# Only rewrite local URLs
|
||||
return orig_url
|
||||
path, frag = urldefrag(url)
|
||||
path, frag = urllib.parse.urldefrag(url)
|
||||
if self.renamed_items_map:
|
||||
orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
|
||||
else:
|
||||
|
||||
@@ -2,10 +2,11 @@ import sys, os, re
|
||||
from xml.sax.saxutils import escape
|
||||
from string import Formatter
|
||||
import pkg_resources
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter import guess_type, strftime
|
||||
from ebook_converter.constants import iswindows
|
||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize
|
||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urlnormalize
|
||||
from ebook_converter.library.comments import comments_to_html, markdown
|
||||
from ebook_converter.utils.date import is_date_undefined, as_local_time
|
||||
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
||||
@@ -73,7 +74,7 @@ class RemoveFirstImage(Base):
|
||||
self.log.warn('Could not find first image to remove')
|
||||
if deleted_item is not None:
|
||||
for item in list(self.oeb.toc):
|
||||
href = urldefrag(item.href)[0]
|
||||
href = urllib.parse.urldefrag(item.href)[0]
|
||||
if href == deleted_item.href:
|
||||
self.oeb.toc.remove(item)
|
||||
self.oeb.guide.remove_by_href(deleted_item.href)
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
"""
|
||||
SVG rasterization transform.
|
||||
"""
|
||||
import os, re
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
# from PyQt5.Qt import (
|
||||
# Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
|
||||
@@ -14,7 +16,6 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||
from ebook_converter.utils.imghdr import what
|
||||
from ebook_converter.polyglot.builtins import unicode_type
|
||||
from ebook_converter.polyglot.urllib import urldefrag
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -114,7 +115,7 @@ class SVGRasterizer(object):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||
href = urlnormalize(elem.attrib[XLINK('href')])
|
||||
path = urldefrag(href)[0]
|
||||
path = urllib.parse.urldefrag(href)[0]
|
||||
if not path:
|
||||
continue
|
||||
abshref = item.abshref(path)
|
||||
|
||||
@@ -5,6 +5,7 @@ assumes a prior call to the flatcss transform.
|
||||
"""
|
||||
import os, functools, collections, re, copy
|
||||
from collections import OrderedDict
|
||||
import urllib.parse
|
||||
|
||||
from lxml.etree import XPath as _XPath
|
||||
from lxml import etree
|
||||
@@ -12,7 +13,7 @@ from lxml import etree
|
||||
from ebook_converter import as_unicode, force_unicode
|
||||
from ebook_converter.ebooks.epub import rules
|
||||
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
|
||||
urldefrag, rewrite_links, XHTML, urlnormalize)
|
||||
rewrite_links, XHTML, urlnormalize)
|
||||
from ebook_converter.ebooks.oeb.polish.split import do_split
|
||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
@@ -162,7 +163,7 @@ class Split(object):
|
||||
rewrite_links(item.data, self.rewrite_links)
|
||||
|
||||
def rewrite_links(self, url):
|
||||
href, frag = urldefrag(url)
|
||||
href, frag = urllib.parse.urldefrag(url)
|
||||
try:
|
||||
href = self.current_item.abshref(href)
|
||||
except ValueError:
|
||||
@@ -453,7 +454,7 @@ class FlowSplitter(object):
|
||||
|
||||
if self.oeb.guide:
|
||||
for ref in self.oeb.guide.values():
|
||||
href, frag = urldefrag(ref.href)
|
||||
href, frag = urllib.parse.urldefrag(ref.href)
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
@@ -462,7 +463,7 @@ class FlowSplitter(object):
|
||||
|
||||
def fix_toc_entry(toc):
|
||||
if toc.href:
|
||||
href, frag = urldefrag(toc.href)
|
||||
href, frag = urllib.parse.urldefrag(toc.href)
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
@@ -476,7 +477,7 @@ class FlowSplitter(object):
|
||||
|
||||
if self.oeb.pages:
|
||||
for page in self.oeb.pages:
|
||||
href, frag = urldefrag(page.href)
|
||||
href, frag = urllib.parse.urldefrag(page.href)
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import re, uuid
|
||||
import re
|
||||
import uuid
|
||||
import urllib.parse
|
||||
|
||||
from lxml import etree
|
||||
from collections import OrderedDict, Counter
|
||||
@@ -6,7 +8,6 @@ from collections import OrderedDict, Counter
|
||||
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
|
||||
from ebook_converter.ebooks import ConversionError
|
||||
from ebook_converter.polyglot.builtins import itervalues, unicode_type
|
||||
from ebook_converter.polyglot.urllib import urlparse
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -209,7 +210,7 @@ class DetectStructure(object):
|
||||
for a in XPath('//h:a[@href]')(item.data):
|
||||
href = a.get('href')
|
||||
try:
|
||||
purl = urlparse(href)
|
||||
purl = urllib.parse.urlparse(href)
|
||||
except ValueError:
|
||||
self.log.warning('Ignoring malformed URL:', href)
|
||||
continue
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
"""
|
||||
OPF manifest trimming transform.
|
||||
"""
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS
|
||||
from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks
|
||||
from ebook_converter.polyglot.urllib import urldefrag
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -32,7 +33,7 @@ class ManifestTrimmer(object):
|
||||
elif item.value in oeb.manifest.ids:
|
||||
used.add(oeb.manifest.ids[item.value])
|
||||
for ref in oeb.guide.values():
|
||||
path, _ = urldefrag(ref.href)
|
||||
path, _ = urllib.parse.urldefrag(ref.href)
|
||||
if path in oeb.manifest.hrefs:
|
||||
used.add(oeb.manifest.hrefs[path])
|
||||
# TOC items are required to be in the spine
|
||||
|
||||
Reference in New Issue
Block a user