mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-24 23:31:29 +02:00
Removing is_py3 method and duplicated by urllib.
This commit is contained in:
@@ -3,10 +3,18 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, os, re, time, random, warnings
|
import math
|
||||||
|
import os
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
import warnings
|
||||||
|
|
||||||
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type
|
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type
|
||||||
from math import floor
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'):
|
if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'):
|
||||||
@@ -276,8 +284,7 @@ def extract(path, dir):
|
|||||||
|
|
||||||
|
|
||||||
def get_proxies(debug=True):
|
def get_proxies(debug=True):
|
||||||
from polyglot.urllib import getproxies
|
proxies = urllib.request.getproxies()
|
||||||
proxies = getproxies()
|
|
||||||
for key, proxy in list(proxies.items()):
|
for key, proxy in list(proxies.items()):
|
||||||
if not proxy or '..' in proxy or key == 'auto':
|
if not proxy or '..' in proxy or key == 'auto':
|
||||||
del proxies[key]
|
del proxies[key]
|
||||||
@@ -338,10 +345,9 @@ def get_proxy_info(proxy_scheme, proxy_string):
|
|||||||
is not available in the string. If an exception occurs parsing the string
|
is not available in the string. If an exception occurs parsing the string
|
||||||
this method returns None.
|
this method returns None.
|
||||||
'''
|
'''
|
||||||
from polyglot.urllib import urlparse
|
|
||||||
try:
|
try:
|
||||||
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
|
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
|
||||||
urlinfo = urlparse(proxy_url)
|
urlinfo = urllib.parse.urlparse(proxy_url)
|
||||||
ans = {
|
ans = {
|
||||||
'scheme': urlinfo.scheme,
|
'scheme': urlinfo.scheme,
|
||||||
'hostname': urlinfo.hostname,
|
'hostname': urlinfo.hostname,
|
||||||
@@ -414,13 +420,13 @@ def fit_image(width, height, pwidth, pheight):
|
|||||||
scaled = height > pheight or width > pwidth
|
scaled = height > pheight or width > pwidth
|
||||||
if height > pheight:
|
if height > pheight:
|
||||||
corrf = pheight / float(height)
|
corrf = pheight / float(height)
|
||||||
width, height = floor(corrf*width), pheight
|
width, height = math.floor(corrf*width), pheight
|
||||||
if width > pwidth:
|
if width > pwidth:
|
||||||
corrf = pwidth / float(width)
|
corrf = pwidth / float(width)
|
||||||
width, height = pwidth, floor(corrf*height)
|
width, height = pwidth, math.floor(corrf*height)
|
||||||
if height > pheight:
|
if height > pheight:
|
||||||
corrf = pheight / float(height)
|
corrf = pheight / float(height)
|
||||||
width, height = floor(corrf*width), pheight
|
width, height = math.floor(corrf*width), pheight
|
||||||
|
|
||||||
return scaled, int(width), int(height)
|
return scaled, int(width), int(height)
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,11 @@
|
|||||||
CHM File decoding support
|
CHM File decoding support
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from ebook_converter.polyglot.urllib import unquote as _unquote
|
||||||
|
from ebook_converter.ebooks.oeb.base import urlquote
|
||||||
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||||
from ebook_converter.ptempfile import TemporaryDirectory
|
from ebook_converter.ptempfile import TemporaryDirectory
|
||||||
from ebook_converter.constants import filesystem_encoding
|
from ebook_converter.constants import filesystem_encoding
|
||||||
@@ -109,10 +113,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
def _create_html_root(self, hhcpath, log, encoding):
|
def _create_html_root(self, hhcpath, log, encoding):
|
||||||
from lxml import html
|
|
||||||
from ebook_converter.polyglot.urllib import unquote as _unquote
|
|
||||||
from ebook_converter.ebooks.oeb.base import urlquote
|
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
|
||||||
hhcdata = self._read_file(hhcpath)
|
hhcdata = self._read_file(hhcpath)
|
||||||
hhcdata = hhcdata.decode(encoding)
|
hhcdata = hhcdata.decode(encoding)
|
||||||
hhcdata = xml_to_unicode(hhcdata, verbose=True,
|
hhcdata = xml_to_unicode(hhcdata, verbose=True,
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
import os, shutil, re
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
@@ -514,7 +517,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
'''
|
'''
|
||||||
Perform toc link transforms to alleviate slow loading.
|
Perform toc link transforms to alleviate slow loading.
|
||||||
'''
|
'''
|
||||||
from ebook_converter.ebooks.oeb.base import urldefrag, XPath
|
from ebook_converter.ebooks.oeb.base import XPath
|
||||||
from ebook_converter.ebooks.oeb.polish.toc import item_at_top
|
from ebook_converter.ebooks.oeb.polish.toc import item_at_top
|
||||||
|
|
||||||
def frag_is_at_top(root, frag):
|
def frag_is_at_top(root, frag):
|
||||||
@@ -527,7 +530,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
def simplify_toc_entry(toc):
|
def simplify_toc_entry(toc):
|
||||||
if toc.href:
|
if toc.href:
|
||||||
href, frag = urldefrag(toc.href)
|
href, frag = urllib.parse.urldefrag(toc.href)
|
||||||
if frag:
|
if frag:
|
||||||
for x in self.oeb.spine:
|
for x in self.oeb.spine:
|
||||||
if x.href == href:
|
if x.href == href:
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
import re, tempfile, os
|
import functools
|
||||||
from functools import partial
|
import os
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.constants import islinux, isbsd
|
from ebook_converter.constants import islinux, isbsd
|
||||||
from ebook_converter.customize.conversion import (InputFormatPlugin,
|
from ebook_converter.customize.conversion import (InputFormatPlugin,
|
||||||
@@ -97,7 +100,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
import uuid
|
import uuid
|
||||||
from ebook_converter.ebooks.conversion.plumber import create_oebbook
|
from ebook_converter.ebooks.conversion.plumber import create_oebbook
|
||||||
from ebook_converter.ebooks.oeb.base import (DirContainer,
|
from ebook_converter.ebooks.oeb.base import (DirContainer,
|
||||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES,
|
||||||
xpath, urlquote)
|
xpath, urlquote)
|
||||||
from ebook_converter import guess_type
|
from ebook_converter import guess_type
|
||||||
from ebook_converter.ebooks.oeb.transforms.metadata import \
|
from ebook_converter.ebooks.oeb.transforms.metadata import \
|
||||||
@@ -163,7 +166,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
path = path.lower()
|
path = path.lower()
|
||||||
self.added_resources[path] = href
|
self.added_resources[path] = href
|
||||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||||
self.urldefrag = urldefrag
|
self.urldefrag = urllib.parse.urldefrag
|
||||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||||
|
|
||||||
self.log('Rewriting HTML links')
|
self.log('Rewriting HTML links')
|
||||||
@@ -176,7 +179,8 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
item = oeb.manifest.hrefs[href]
|
item = oeb.manifest.hrefs[href]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
item = oeb.manifest.hrefs[urlnormalize(href)]
|
item = oeb.manifest.hrefs[urlnormalize(href)]
|
||||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
rewrite_links(item.data,
|
||||||
|
functools.partial(self.resource_adder, base=dpath))
|
||||||
|
|
||||||
for item in oeb.manifest.values():
|
for item in oeb.manifest.values():
|
||||||
if item.media_type in self.OEB_STYLES:
|
if item.media_type in self.OEB_STYLES:
|
||||||
@@ -186,7 +190,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
dpath = os.path.dirname(path)
|
dpath = os.path.dirname(path)
|
||||||
break
|
break
|
||||||
css_parser.replaceUrls(item.data,
|
css_parser.replaceUrls(item.data,
|
||||||
partial(self.resource_adder, base=dpath))
|
functools.partial(self.resource_adder, base=dpath))
|
||||||
|
|
||||||
toc = self.oeb.toc
|
toc = self.oeb.toc
|
||||||
self.oeb.auto_generated_toc = True
|
self.oeb.auto_generated_toc = True
|
||||||
@@ -242,7 +246,6 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
return link, frag
|
return link, frag
|
||||||
|
|
||||||
def resource_adder(self, link_, base=None):
|
def resource_adder(self, link_, base=None):
|
||||||
from ebook_converter.polyglot.urllib import quote
|
|
||||||
link, frag = self.link_to_local_path(link_, base=base)
|
link, frag = self.link_to_local_path(link_, base=base)
|
||||||
if link is None:
|
if link is None:
|
||||||
return link_
|
return link_
|
||||||
@@ -287,9 +290,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
# file, therefore we quote it here.
|
# file, therefore we quote it here.
|
||||||
if isinstance(bhref, unicode_type):
|
if isinstance(bhref, unicode_type):
|
||||||
bhref = bhref.encode('utf-8')
|
bhref = bhref.encode('utf-8')
|
||||||
item.html_input_href = as_unicode(quote(bhref))
|
item.html_input_href = as_unicode(urllib.parse.quote(bhref))
|
||||||
if guessed in self.OEB_STYLES:
|
if guessed in self.OEB_STYLES:
|
||||||
item.override_css_fetch = partial(
|
item.override_css_fetch = functools.partial(
|
||||||
self.css_import_handler, os.path.dirname(link))
|
self.css_import_handler, os.path.dirname(link))
|
||||||
item.data
|
item.data
|
||||||
self.added_resources[link] = href
|
self.added_resources[link] = href
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
import os, re, shutil
|
import os
|
||||||
from os.path import dirname, abspath, relpath as _relpath, exists, basename
|
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
|
||||||
from ebook_converter import CurrentDir
|
from ebook_converter import CurrentDir
|
||||||
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||||
|
from ebook_converter.ebooks.oeb.base import element
|
||||||
from ebook_converter.polyglot.builtins import unicode_type
|
from ebook_converter.polyglot.builtins import unicode_type
|
||||||
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||||
@@ -14,7 +19,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
|
|
||||||
def relpath(*args):
|
def relpath(*args):
|
||||||
return _relpath(*args).replace(os.sep, '/')
|
return os.path.relpath(*args).replace(os.sep, '/')
|
||||||
|
|
||||||
|
|
||||||
class HTMLOutput(OutputFormatPlugin):
|
class HTMLOutput(OutputFormatPlugin):
|
||||||
@@ -47,11 +52,7 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
'''
|
'''
|
||||||
Generate table of contents
|
Generate table of contents
|
||||||
'''
|
'''
|
||||||
from lxml import etree
|
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import element
|
|
||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
|
||||||
with CurrentDir(output_dir):
|
with CurrentDir(output_dir):
|
||||||
def build_node(current_node, parent=None):
|
def build_node(current_node, parent=None):
|
||||||
if parent is None:
|
if parent is None:
|
||||||
@@ -60,7 +61,8 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
parent = element(parent, ('ul'))
|
parent = element(parent, ('ul'))
|
||||||
for node in current_node.nodes:
|
for node in current_node.nodes:
|
||||||
point = element(parent, 'li')
|
point = element(parent, 'li')
|
||||||
href = relpath(abspath(unquote(node.href)), dirname(ref_url))
|
href = relpath(os.path.abspath(unquote(node.href)),
|
||||||
|
os.path.dirname(ref_url))
|
||||||
if isinstance(href, bytes):
|
if isinstance(href, bytes):
|
||||||
href = href.decode('utf-8')
|
href = href.decode('utf-8')
|
||||||
link = element(point, 'a', href=clean_xml_chars(href))
|
link = element(point, 'a', href=clean_xml_chars(href))
|
||||||
@@ -131,10 +133,10 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
tempdir = os.path.realpath(PersistentTemporaryDirectory())
|
tempdir = os.path.realpath(PersistentTemporaryDirectory())
|
||||||
output_file = os.path.join(tempdir,
|
output_file = os.path.join(tempdir,
|
||||||
basename(re.sub(r'\.zip', '', output_path)+'.html'))
|
os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
|
||||||
output_dir = re.sub(r'\.html', '', output_file)+'_files'
|
output_dir = re.sub(r'\.html', '', output_file)+'_files'
|
||||||
|
|
||||||
if not exists(output_dir):
|
if not os.path.exists(output_dir):
|
||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
|
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
|
||||||
@@ -145,9 +147,10 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
|
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
|
||||||
templite = Templite(template_html_index_data)
|
templite = Templite(template_html_index_data)
|
||||||
nextLink = oeb_book.spine[0].href
|
nextLink = oeb_book.spine[0].href
|
||||||
nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
|
nextLink = relpath(output_dir+os.sep+nextLink,
|
||||||
cssLink = relpath(abspath(css_path), dirname(output_file))
|
os.path.dirname(output_file))
|
||||||
tocUrl = relpath(output_file, dirname(output_file))
|
cssLink = relpath(os.path.abspath(css_path), os.path.dirname(output_file))
|
||||||
|
tocUrl = relpath(output_file, os.path.dirname(output_file))
|
||||||
t = templite.render(has_toc=bool(oeb_book.toc.count()),
|
t = templite.render(has_toc=bool(oeb_book.toc.count()),
|
||||||
toc=html_toc, meta=meta, nextLink=nextLink,
|
toc=html_toc, meta=meta, nextLink=nextLink,
|
||||||
tocUrl=tocUrl, cssLink=cssLink,
|
tocUrl=tocUrl, cssLink=cssLink,
|
||||||
@@ -158,9 +161,9 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
with CurrentDir(output_dir):
|
with CurrentDir(output_dir):
|
||||||
for item in oeb_book.manifest:
|
for item in oeb_book.manifest:
|
||||||
path = abspath(unquote(item.href))
|
path = os.path.abspath(unquote(item.href))
|
||||||
dir = dirname(path)
|
dir = os.path.dirname(path)
|
||||||
if not exists(dir):
|
if not os.path.exists(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
if item.spine_position is not None:
|
if item.spine_position is not None:
|
||||||
with open(path, 'wb') as f:
|
with open(path, 'wb') as f:
|
||||||
@@ -171,8 +174,8 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
item.unload_data_from_memory(memory=path)
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
path = abspath(unquote(item.href))
|
path = os.path.abspath(unquote(item.href))
|
||||||
dir = dirname(path)
|
dir = os.path.dirname(path)
|
||||||
root = item.data.getroottree()
|
root = item.data.getroottree()
|
||||||
|
|
||||||
# get & clean HTML <HEAD>-data
|
# get & clean HTML <HEAD>-data
|
||||||
@@ -191,18 +194,18 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
# generate link to next page
|
# generate link to next page
|
||||||
if item.spine_position+1 < len(oeb_book.spine):
|
if item.spine_position+1 < len(oeb_book.spine):
|
||||||
nextLink = oeb_book.spine[item.spine_position+1].href
|
nextLink = oeb_book.spine[item.spine_position+1].href
|
||||||
nextLink = relpath(abspath(nextLink), dir)
|
nextLink = relpath(os.path.abspath(nextLink), dir)
|
||||||
else:
|
else:
|
||||||
nextLink = None
|
nextLink = None
|
||||||
|
|
||||||
# generate link to previous page
|
# generate link to previous page
|
||||||
if item.spine_position > 0:
|
if item.spine_position > 0:
|
||||||
prevLink = oeb_book.spine[item.spine_position-1].href
|
prevLink = oeb_book.spine[item.spine_position-1].href
|
||||||
prevLink = relpath(abspath(prevLink), dir)
|
prevLink = relpath(os.path.abspath(prevLink), dir)
|
||||||
else:
|
else:
|
||||||
prevLink = None
|
prevLink = None
|
||||||
|
|
||||||
cssLink = relpath(abspath(css_path), dir)
|
cssLink = relpath(os.path.abspath(css_path), dir)
|
||||||
tocUrl = relpath(output_file, dir)
|
tocUrl = relpath(output_file, dir)
|
||||||
firstContentPageLink = oeb_book.spine[0].href
|
firstContentPageLink = oeb_book.spine[0].href
|
||||||
|
|
||||||
@@ -222,8 +225,8 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
item.unload_data_from_memory(memory=path)
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
zfile = zipfile.ZipFile(output_path, "w")
|
zfile = zipfile.ZipFile(output_path, "w")
|
||||||
zfile.add_dir(output_dir, basename(output_dir))
|
zfile.add_dir(output_dir, os.path.basename(output_dir))
|
||||||
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
|
zfile.write(output_file, os.path.basename(output_file), zipfile.ZIP_DEFLATED)
|
||||||
|
|
||||||
if opts.extract_to:
|
if opts.extract_to:
|
||||||
if os.path.exists(opts.extract_to):
|
if os.path.exists(opts.extract_to):
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
import os, re
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
from ebook_converter import CurrentDir
|
from ebook_converter import CurrentDir
|
||||||
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
|
||||||
|
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
@@ -21,14 +26,10 @@ class OEBOutput(OutputFormatPlugin):
|
|||||||
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
|
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
self.log, self.opts = log, opts
|
self.log, self.opts = log, opts
|
||||||
if not os.path.exists(output_path):
|
if not os.path.exists(output_path):
|
||||||
os.makedirs(output_path)
|
os.makedirs(output_path)
|
||||||
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
|
|
||||||
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
|
|
||||||
with CurrentDir(output_path):
|
with CurrentDir(output_path):
|
||||||
results = oeb_book.to_opf2(page_map=True)
|
results = oeb_book.to_opf2(page_map=True)
|
||||||
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
|
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
import posixpath, re
|
import posixpath
|
||||||
from uuid import uuid4
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
from ebook_converter.utils.filenames import ascii_text
|
from ebook_converter.utils.filenames import ascii_text
|
||||||
from ebook_converter.polyglot.builtins import unicode_type
|
from ebook_converter.polyglot.builtins import unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -67,7 +68,7 @@ class LinksManager(object):
|
|||||||
self.namespace = namespace
|
self.namespace = namespace
|
||||||
self.log = log
|
self.log = log
|
||||||
self.document_relationships = document_relationships
|
self.document_relationships = document_relationships
|
||||||
self.top_anchor = unicode_type(uuid4().hex)
|
self.top_anchor = unicode_type(uuid.uuid4().hex)
|
||||||
self.anchor_map = {}
|
self.anchor_map = {}
|
||||||
self.used_bookmark_names = set()
|
self.used_bookmark_names = set()
|
||||||
self.bmark_id = 0
|
self.bmark_id = 0
|
||||||
@@ -100,7 +101,7 @@ class LinksManager(object):
|
|||||||
|
|
||||||
def serialize_hyperlink(self, parent, link):
|
def serialize_hyperlink(self, parent, link):
|
||||||
item, url, tooltip = link
|
item, url, tooltip = link
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
href = purl.path
|
href = purl.path
|
||||||
|
|
||||||
def make_link(parent, anchor=None, id=None, tooltip=None):
|
def make_link(parent, anchor=None, id=None, tooltip=None):
|
||||||
@@ -133,7 +134,7 @@ class LinksManager(object):
|
|||||||
def process_toc_node(self, toc, level=0):
|
def process_toc_node(self, toc, level=0):
|
||||||
href = toc.href
|
href = toc.href
|
||||||
if href:
|
if href:
|
||||||
purl = urlparse(href)
|
purl = urllib.parse.urlparse(href)
|
||||||
href = purl.path
|
href = purl.path
|
||||||
if href in self.document_hrefs:
|
if href in self.document_hrefs:
|
||||||
key = (href, purl.fragment or self.top_anchor)
|
key = (href, purl.fragment or self.top_anchor)
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
"""
|
"""
|
||||||
Transform OEB content into FB2 markup
|
Transform OEB content into FB2 markup
|
||||||
"""
|
"""
|
||||||
import re, textwrap, uuid
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
import textwrap
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@@ -14,7 +17,6 @@ from ebook_converter.utils.img import save_cover_data_to
|
|||||||
from ebook_converter.ebooks.oeb.base import urlnormalize
|
from ebook_converter.ebooks.oeb.base import urlnormalize
|
||||||
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
|
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
|
||||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
from ebook_converter.polyglot.binary import as_base64_unicode
|
||||||
from ebook_converter.polyglot.urllib import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
@@ -508,7 +510,7 @@ class FB2MLizer(object):
|
|||||||
tags.append('p')
|
tags.append('p')
|
||||||
if tag == 'a' and elem_tree.attrib.get('href', None):
|
if tag == 'a' and elem_tree.attrib.get('href', None):
|
||||||
# Handle only external links for now
|
# Handle only external links for now
|
||||||
if urlparse(elem_tree.attrib['href']).netloc:
|
if urllib.parse.urlparse(elem_tree.attrib['href']).netloc:
|
||||||
p_txt, p_tag = self.ensure_p()
|
p_txt, p_tag = self.ensure_p()
|
||||||
fb2_out += p_txt
|
fb2_out += p_txt
|
||||||
tags += p_tag
|
tags += p_tag
|
||||||
|
|||||||
@@ -1,14 +1,17 @@
|
|||||||
"""
|
"""
|
||||||
Input plugin for HTML or OPF ebooks.
|
Input plugin for HTML or OPF ebooks.
|
||||||
"""
|
"""
|
||||||
import os, re, sys, errno as gerrno
|
import errno
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import urlunquote
|
from ebook_converter.ebooks.oeb.base import urlunquote
|
||||||
from ebook_converter.ebooks.chardet import detect_xml_encoding
|
from ebook_converter.ebooks.chardet import detect_xml_encoding
|
||||||
from ebook_converter.constants import iswindows
|
from ebook_converter.constants import iswindows
|
||||||
from ebook_converter import unicode_path, as_unicode, replace_entities
|
from ebook_converter import unicode_path, as_unicode, replace_entities
|
||||||
from ebook_converter.polyglot.builtins import is_py3, unicode_type
|
from ebook_converter.polyglot.builtins import unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlparse, urlunparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -29,7 +32,7 @@ class Link(object):
|
|||||||
if iswindows and path.startswith('/'):
|
if iswindows and path.startswith('/'):
|
||||||
path = path[1:]
|
path = path[1:]
|
||||||
isabs = True
|
isabs = True
|
||||||
path = urlunparse(('', '', path, url.params, url.query, ''))
|
path = urllib.parse.urlunparse(('', '', path, url.params, url.query, ''))
|
||||||
path = urlunquote(path)
|
path = urlunquote(path)
|
||||||
if isabs or os.path.isabs(path):
|
if isabs or os.path.isabs(path):
|
||||||
return path
|
return path
|
||||||
@@ -43,7 +46,7 @@ class Link(object):
|
|||||||
'''
|
'''
|
||||||
assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
|
assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
|
||||||
self.url = url
|
self.url = url
|
||||||
self.parsed_url = urlparse(self.url)
|
self.parsed_url = urllib.parse.urlparse(self.url)
|
||||||
self.is_local = self.parsed_url.scheme in ('', 'file')
|
self.is_local = self.parsed_url.scheme in ('', 'file')
|
||||||
self.is_internal = self.is_local and not bool(self.parsed_url.path)
|
self.is_internal = self.is_local and not bool(self.parsed_url.path)
|
||||||
self.path = None
|
self.path = None
|
||||||
@@ -62,16 +65,13 @@ class Link(object):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Link: %s --> %s'%(self.url, self.path)
|
return 'Link: %s --> %s'%(self.url, self.path)
|
||||||
|
|
||||||
if not is_py3:
|
|
||||||
__unicode__ = __str__
|
|
||||||
|
|
||||||
|
|
||||||
class IgnoreFile(Exception):
|
class IgnoreFile(Exception):
|
||||||
|
|
||||||
def __init__(self, msg, errno):
|
def __init__(self, msg, err_no):
|
||||||
Exception.__init__(self, msg)
|
Exception.__init__(self, msg)
|
||||||
self.doesnt_exist = errno == gerrno.ENOENT
|
self.errno = err_no
|
||||||
self.errno = errno
|
self.doesnt_exist = err_no == errno.ENOENT
|
||||||
|
|
||||||
|
|
||||||
class HTMLFile(object):
|
class HTMLFile(object):
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ Transform OEB content into a single (more or less) HTML file.
|
|||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@@ -13,7 +14,6 @@ from ebook_converter.ebooks.oeb.base import (
|
|||||||
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
||||||
from ebook_converter.utils.logging import default_log
|
from ebook_converter.utils.logging import default_log
|
||||||
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes
|
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes
|
||||||
from ebook_converter.polyglot.urllib import urldefrag
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
@@ -101,7 +101,7 @@ class OEB2HTML(object):
|
|||||||
for attr in attribs:
|
for attr in attribs:
|
||||||
if attr in link_attrs:
|
if attr in link_attrs:
|
||||||
href = item.abshref(attribs[attr])
|
href = item.abshref(attribs[attr])
|
||||||
href, id = urldefrag(href)
|
href, id = urllib.parse.urldefrag(href)
|
||||||
if href in self.base_hrefs:
|
if href in self.base_hrefs:
|
||||||
self.get_link_id(href, id)
|
self.get_link_id(href, id)
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import urllib.parse
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
@@ -37,7 +38,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
|
|||||||
)
|
)
|
||||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||||
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
|
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
|
||||||
from ebook_converter.polyglot.urllib import unquote, urlparse
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
@@ -51,7 +52,7 @@ def update_css(ncss, ocss):
|
|||||||
|
|
||||||
|
|
||||||
def munge_paths(basepath, url):
|
def munge_paths(basepath, url):
|
||||||
purl = urlparse(unquote(url),)
|
purl = urllib.parse.urlparse(unquote(url),)
|
||||||
path, fragment = purl[2], purl[5]
|
path, fragment = purl[2], purl[5]
|
||||||
if path:
|
if path:
|
||||||
path = path.replace('/', os.sep)
|
path = path.replace('/', os.sep)
|
||||||
@@ -1471,7 +1472,8 @@ class HTMLConverter(object):
|
|||||||
pass
|
pass
|
||||||
elif tagname == 'a' and self.link_levels >= 0:
|
elif tagname == 'a' and self.link_levels >= 0:
|
||||||
if tag.has_attr('href') and not self.link_exclude.match(tag['href']):
|
if tag.has_attr('href') and not self.link_exclude.match(tag['href']):
|
||||||
if urlparse(tag['href'])[0] not in ('', 'file'):
|
if urllib.parse.urlparse(tag['href'])[0] not in ('',
|
||||||
|
'file'):
|
||||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||||
else:
|
else:
|
||||||
path = munge_paths(self.target_prefix, tag['href'])[0]
|
path = munge_paths(self.target_prefix, tag['href'])[0]
|
||||||
@@ -1513,7 +1515,7 @@ class HTMLConverter(object):
|
|||||||
dropcaps = tag.get('class') in ('libprs500_dropcaps', ['libprs500_dropcaps'])
|
dropcaps = tag.get('class') in ('libprs500_dropcaps', ['libprs500_dropcaps'])
|
||||||
self.process_image(path, tag_css, width, height,
|
self.process_image(path, tag_css, width, height,
|
||||||
dropcaps=dropcaps, rescale=True)
|
dropcaps=dropcaps, rescale=True)
|
||||||
elif not urlparse(tag['src'])[0]:
|
elif not urllib.parse.urlparse(tag['src'])[0]:
|
||||||
self.log.warn('Could not find image: '+tag['src'])
|
self.log.warn('Could not find image: '+tag['src'])
|
||||||
else:
|
else:
|
||||||
self.log.debug("Failed to process: %s"%unicode_type(tag))
|
self.log.debug("Failed to process: %s"%unicode_type(tag))
|
||||||
|
|||||||
@@ -2,12 +2,15 @@
|
|||||||
Provides abstraction for metadata reading.writing from a variety of ebook
|
Provides abstraction for metadata reading.writing from a variety of ebook
|
||||||
formats.
|
formats.
|
||||||
"""
|
"""
|
||||||
import os, sys, re
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter import relpath, guess_type, prints, force_unicode
|
from ebook_converter import relpath, guess_type, prints, force_unicode
|
||||||
from ebook_converter.utils.config_base import tweaks
|
from ebook_converter.utils.config_base import tweaks
|
||||||
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode
|
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode
|
||||||
from ebook_converter.polyglot.urllib import quote, unquote, urlparse
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -241,7 +244,7 @@ class Resource(object):
|
|||||||
path = path.decode(sys.getfilesystemencoding())
|
path = path.decode(sys.getfilesystemencoding())
|
||||||
self.path = path
|
self.path = path
|
||||||
else:
|
else:
|
||||||
url = urlparse(href_or_path)
|
url = urllib.parse.urlparse(href_or_path)
|
||||||
if url[0] not in ('', 'file'):
|
if url[0] not in ('', 'file'):
|
||||||
self._href = href_or_path
|
self._href = href_or_path
|
||||||
else:
|
else:
|
||||||
@@ -268,7 +271,7 @@ class Resource(object):
|
|||||||
if self.path is None:
|
if self.path is None:
|
||||||
return self._href
|
return self._href
|
||||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
||||||
frag = '#'+as_unicode(quote(f)) if self.fragment else ''
|
frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else ''
|
||||||
if self.path == basedir:
|
if self.path == basedir:
|
||||||
return ''+frag
|
return ''+frag
|
||||||
try:
|
try:
|
||||||
@@ -277,7 +280,7 @@ class Resource(object):
|
|||||||
rpath = self.path
|
rpath = self.path
|
||||||
if isinstance(rpath, unicode_type):
|
if isinstance(rpath, unicode_type):
|
||||||
rpath = rpath.encode('utf-8')
|
rpath = rpath.encode('utf-8')
|
||||||
return as_unicode(quote(rpath.replace(os.sep, '/')))+frag
|
return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag
|
||||||
|
|
||||||
def set_basedir(self, path):
|
def set_basedir(self, path):
|
||||||
self._basedir = path
|
self._basedir = path
|
||||||
|
|||||||
@@ -1,7 +1,17 @@
|
|||||||
"""
|
"""
|
||||||
lxml based OPF parser.
|
lxml based OPF parser.
|
||||||
"""
|
"""
|
||||||
import re, sys, unittest, functools, os, uuid, glob, io, json, copy
|
import copy
|
||||||
|
import functools
|
||||||
|
import glob
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@@ -18,7 +28,7 @@ from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
|||||||
from ebook_converter.utils.config import tweaks
|
from ebook_converter.utils.config import tweaks
|
||||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd
|
from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd
|
||||||
from ebook_converter.polyglot.urllib import unquote, urlparse
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -76,7 +86,7 @@ class Resource(object): # {{{
|
|||||||
self.path = path
|
self.path = path
|
||||||
else:
|
else:
|
||||||
href_or_path = href_or_path
|
href_or_path = href_or_path
|
||||||
url = urlparse(href_or_path)
|
url = urllib.parse.urlparse(href_or_path)
|
||||||
if url[0] not in ('', 'file'):
|
if url[0] not in ('', 'file'):
|
||||||
self._href = href_or_path
|
self._href = href_or_path
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
import os, glob, re, functools
|
import collections
|
||||||
from collections import Counter
|
import functools
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from lxml.builder import ElementMaker
|
from lxml.builder import ElementMaker
|
||||||
@@ -9,7 +13,7 @@ from ebook_converter.ebooks.chardet import xml_to_unicode
|
|||||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
from ebook_converter.polyglot.builtins import unicode_type, getcwd
|
from ebook_converter.polyglot.builtins import unicode_type, getcwd
|
||||||
from ebook_converter.polyglot.urllib import unquote, urlparse
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -30,7 +34,7 @@ def parse_html_toc(data):
|
|||||||
data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0]
|
data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0]
|
||||||
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
|
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
|
||||||
for a in root.xpath('//*[@href and local-name()="a"]'):
|
for a in root.xpath('//*[@href and local-name()="a"]'):
|
||||||
purl = urlparse(unquote(a.get('href')))
|
purl = urllib.parse.urlparse(unquote(a.get('href')))
|
||||||
href, fragment = purl[2], purl[5]
|
href, fragment = purl[2], purl[5]
|
||||||
if not fragment:
|
if not fragment:
|
||||||
fragment = None
|
fragment = None
|
||||||
@@ -142,7 +146,7 @@ class TOC(list):
|
|||||||
|
|
||||||
if toc is not None:
|
if toc is not None:
|
||||||
if toc.lower() not in ('ncx', 'ncxtoc'):
|
if toc.lower() not in ('ncx', 'ncxtoc'):
|
||||||
toc = urlparse(unquote(toc))[2]
|
toc = urllib.parse.urlparse(unquote(toc))[2]
|
||||||
toc = toc.replace('/', os.sep)
|
toc = toc.replace('/', os.sep)
|
||||||
if not os.path.isabs(toc):
|
if not os.path.isabs(toc):
|
||||||
toc = os.path.join(self.base_path, toc)
|
toc = os.path.join(self.base_path, toc)
|
||||||
@@ -209,7 +213,7 @@ class TOC(list):
|
|||||||
if content and text:
|
if content and text:
|
||||||
content = content[0]
|
content = content[0]
|
||||||
# if get_attr(content, attr='src'):
|
# if get_attr(content, attr='src'):
|
||||||
purl = urlparse(content.get('src'))
|
purl = urllib.parse.urlparse(content.get('src'))
|
||||||
href, fragment = unquote(purl[2]), unquote(purl[5])
|
href, fragment = unquote(purl[2]), unquote(purl[5])
|
||||||
nd = dest.add_item(href, fragment, text)
|
nd = dest.add_item(href, fragment, text)
|
||||||
nd.play_order = play_order
|
nd.play_order = play_order
|
||||||
@@ -253,7 +257,7 @@ class TOC(list):
|
|||||||
navmap = E.navMap()
|
navmap = E.navMap()
|
||||||
root.append(navmap)
|
root.append(navmap)
|
||||||
root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
c = Counter()
|
c = collections.Counter()
|
||||||
|
|
||||||
def navpoint(parent, np):
|
def navpoint(parent, np):
|
||||||
text = np.text
|
text = np.text
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import struct, re, os
|
import collections
|
||||||
from collections import namedtuple
|
import itertools
|
||||||
from itertools import repeat
|
import os
|
||||||
from uuid import uuid4
|
import re
|
||||||
|
import struct
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@@ -16,21 +19,20 @@ from ebook_converter.ebooks.mobi.utils import read_font_record
|
|||||||
from ebook_converter.ebooks.oeb.parse_utils import parse_html
|
from ebook_converter.ebooks.oeb.parse_utils import parse_html
|
||||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
|
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||||
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
|
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
|
||||||
from ebook_converter.polyglot.urllib import urldefrag
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
Part = namedtuple('Part',
|
Part = collections.namedtuple('Part',
|
||||||
'num type filename start end aid')
|
'num type filename start end aid')
|
||||||
|
|
||||||
Elem = namedtuple('Elem',
|
Elem = collections.namedtuple('Elem',
|
||||||
'insert_pos toc_text file_number sequence_number start_pos '
|
'insert_pos toc_text file_number sequence_number start_pos '
|
||||||
'length')
|
'length')
|
||||||
|
|
||||||
FlowInfo = namedtuple('FlowInfo',
|
FlowInfo = collections.namedtuple('FlowInfo',
|
||||||
'type format dir fname')
|
'type format dir fname')
|
||||||
|
|
||||||
# locate beginning and ending positions of tag with specific aid attribute
|
# locate beginning and ending positions of tag with specific aid attribute
|
||||||
@@ -81,7 +83,7 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
self.mobi6_reader.check_for_drm()
|
self.mobi6_reader.check_for_drm()
|
||||||
self.aid_anchor_suffix = uuid4().hex.encode('utf-8')
|
self.aid_anchor_suffix = uuid.uuid4().hex.encode('utf-8')
|
||||||
bh = self.mobi6_reader.book_header
|
bh = self.mobi6_reader.book_header
|
||||||
if self.mobi6_reader.kf8_type == 'joint':
|
if self.mobi6_reader.kf8_type == 'joint':
|
||||||
offset = self.mobi6_reader.kf8_boundary + 2
|
offset = self.mobi6_reader.kf8_boundary + 2
|
||||||
@@ -127,7 +129,7 @@ class Mobi8Reader(object):
|
|||||||
if self.header.skelidx != NULL_INDEX:
|
if self.header.skelidx != NULL_INDEX:
|
||||||
table = read_index(self.kf8_sections, self.header.skelidx,
|
table = read_index(self.kf8_sections, self.header.skelidx,
|
||||||
self.header.codec)[0]
|
self.header.codec)[0]
|
||||||
File = namedtuple('File',
|
File = collections.namedtuple('File',
|
||||||
'file_number name divtbl_count start_position length')
|
'file_number name divtbl_count start_position length')
|
||||||
|
|
||||||
for i, text in enumerate(table):
|
for i, text in enumerate(table):
|
||||||
@@ -149,7 +151,7 @@ class Mobi8Reader(object):
|
|||||||
if self.header.othidx != NULL_INDEX:
|
if self.header.othidx != NULL_INDEX:
|
||||||
table, cncx = read_index(self.kf8_sections, self.header.othidx,
|
table, cncx = read_index(self.kf8_sections, self.header.othidx,
|
||||||
self.header.codec)
|
self.header.codec)
|
||||||
Item = namedtuple('Item',
|
Item = collections.namedtuple('Item',
|
||||||
'type title pos_fid')
|
'type title pos_fid')
|
||||||
|
|
||||||
for i, ref_type in enumerate(table):
|
for i, ref_type in enumerate(table):
|
||||||
@@ -222,7 +224,7 @@ class Mobi8Reader(object):
|
|||||||
self.parts.append(skeleton)
|
self.parts.append(skeleton)
|
||||||
if divcnt < 1:
|
if divcnt < 1:
|
||||||
# Empty file
|
# Empty file
|
||||||
aidtext = unicode_type(uuid4())
|
aidtext = unicode_type(uuid.uuid4())
|
||||||
filename = aidtext + '.html'
|
filename = aidtext + '.html'
|
||||||
self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
|
self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
|
||||||
baseptr, aidtext))
|
baseptr, aidtext))
|
||||||
@@ -293,7 +295,7 @@ class Mobi8Reader(object):
|
|||||||
for part in self.partinfo:
|
for part in self.partinfo:
|
||||||
if pos >= part.start and pos < part.end:
|
if pos >= part.start and pos < part.end:
|
||||||
return part
|
return part
|
||||||
return Part(*repeat(None, len(Part._fields)))
|
return Part(*itertools.repeat(None, len(Part._fields)))
|
||||||
|
|
||||||
def get_id_tag_by_pos_fid(self, posfid, offset):
|
def get_id_tag_by_pos_fid(self, posfid, offset):
|
||||||
# first convert kindle:pos:fid and offset info to position in file
|
# first convert kindle:pos:fid and offset info to position in file
|
||||||
@@ -475,7 +477,7 @@ class Mobi8Reader(object):
|
|||||||
for ref in guide:
|
for ref in guide:
|
||||||
if ref.type == 'toc':
|
if ref.type == 'toc':
|
||||||
href = ref.href()
|
href = ref.href()
|
||||||
href, frag = urldefrag(href)
|
href, frag = urllib.parse.urldefrag(href)
|
||||||
if os.path.exists(href.replace('/', os.sep)):
|
if os.path.exists(href.replace('/', os.sep)):
|
||||||
try:
|
try:
|
||||||
toc = self.read_inline_toc(href, frag)
|
toc = self.read_inline_toc(href, frag)
|
||||||
@@ -554,7 +556,7 @@ class Mobi8Reader(object):
|
|||||||
if reached and elem.tag == XHTML('a') and elem.get('href',
|
if reached and elem.tag == XHTML('a') and elem.get('href',
|
||||||
False):
|
False):
|
||||||
href = elem.get('href')
|
href = elem.get('href')
|
||||||
href, frag = urldefrag(href)
|
href, frag = urllib.parse.urldefrag(href)
|
||||||
href = base_href + '/' + href
|
href = base_href + '/' + href
|
||||||
text = xml2text(elem).strip()
|
text = xml2text(elem).strip()
|
||||||
if (text, href, frag) in seen:
|
if (text, href, frag) in seen:
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
|
import collections
|
||||||
|
import io
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from collections import defaultdict
|
import urllib.parse
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
from ebook_converter.ebooks.mobi.mobiml import MBP_NS
|
from ebook_converter.ebooks.mobi.mobiml import MBP_NS
|
||||||
from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
|
from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
|
||||||
@@ -9,7 +10,6 @@ from ebook_converter.ebooks.oeb.base import (
|
|||||||
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
|
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
|
||||||
)
|
)
|
||||||
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
|
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
|
||||||
from ebook_converter.polyglot.urllib import urldefrag
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -17,12 +17,12 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
class Buf(BytesIO):
|
class Buf(io.BytesIO):
|
||||||
|
|
||||||
def write(self, x):
|
def write(self, x):
|
||||||
if isinstance(x, unicode_type):
|
if isinstance(x, unicode_type):
|
||||||
x = x.encode('utf-8')
|
x = x.encode('utf-8')
|
||||||
BytesIO.write(self, x)
|
io.BytesIO.write(self, x)
|
||||||
|
|
||||||
|
|
||||||
class Serializer(object):
|
class Serializer(object):
|
||||||
@@ -63,7 +63,7 @@ class Serializer(object):
|
|||||||
# Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
|
# Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
|
||||||
# where filepos="..." elements are written corresponding to links that
|
# where filepos="..." elements are written corresponding to links that
|
||||||
# point to the href. This is used at the end to fill in the correct values.
|
# point to the href. This is used at the end to fill in the correct values.
|
||||||
self.href_offsets = defaultdict(list)
|
self.href_offsets = collections.defaultdict(list)
|
||||||
|
|
||||||
# List of offsets in the buffer of non linear items in the spine. These
|
# List of offsets in the buffer of non linear items in the spine. These
|
||||||
# become uncrossable breaks in the MOBI
|
# become uncrossable breaks in the MOBI
|
||||||
@@ -81,7 +81,7 @@ class Serializer(object):
|
|||||||
item.is_article_start = item.is_article_end = False
|
item.is_article_start = item.is_article_end = False
|
||||||
|
|
||||||
def spine_item(tocitem):
|
def spine_item(tocitem):
|
||||||
href = urldefrag(tocitem.href)[0]
|
href = urllib.parse.urldefrag(tocitem.href)[0]
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
if item.href == href:
|
if item.href == href:
|
||||||
return item
|
return item
|
||||||
@@ -157,7 +157,7 @@ class Serializer(object):
|
|||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
buf.write(b'<guide>')
|
buf.write(b'<guide>')
|
||||||
for ref in self.oeb.guide.values():
|
for ref in self.oeb.guide.values():
|
||||||
path = urldefrag(ref.href)[0]
|
path = urllib.parse.urldefrag(ref.href)[0]
|
||||||
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
|
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -188,7 +188,7 @@ class Serializer(object):
|
|||||||
'''
|
'''
|
||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
try:
|
try:
|
||||||
path, frag = urldefrag(urlnormalize(href))
|
path, frag = urllib.parse.urldefrag(urlnormalize(href))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# Unparseable URL
|
# Unparseable URL
|
||||||
return False
|
return False
|
||||||
@@ -382,7 +382,7 @@ class Serializer(object):
|
|||||||
if href not in id_offsets:
|
if href not in id_offsets:
|
||||||
self.logger.warn('Hyperlink target %r not found' % href)
|
self.logger.warn('Hyperlink target %r not found' % href)
|
||||||
# Link to the top of the document, better than just ignoring
|
# Link to the top of the document, better than just ignoring
|
||||||
href, _ = urldefrag(href)
|
href, _ = urllib.parse.urldefrag(href)
|
||||||
if href in self.id_offsets:
|
if href in self.id_offsets:
|
||||||
ioff = self.id_offsets[href]
|
ioff = self.id_offsets[href]
|
||||||
if is_start:
|
if is_start:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import os, re, logging, sys, numbers
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from itertools import count
|
from itertools import count
|
||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from ebook_converter import force_unicode
|
from ebook_converter import force_unicode
|
||||||
@@ -17,7 +18,7 @@ from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace
|
|||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
from ebook_converter.utils.short_uuid import uuid4
|
from ebook_converter.utils.short_uuid import uuid4
|
||||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr
|
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr
|
||||||
from ebook_converter.polyglot.urllib import unquote as urlunquote, urldefrag, urljoin, urlparse, urlunparse
|
from ebook_converter.polyglot.urllib import unquote as urlunquote
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -185,13 +186,13 @@ def iterlinks(root, find_links_in_css=True):
|
|||||||
if attrib in attribs:
|
if attrib in attribs:
|
||||||
value = el.get(attrib)
|
value = el.get(attrib)
|
||||||
if codebase is not None:
|
if codebase is not None:
|
||||||
value = urljoin(codebase, value)
|
value = urllib.parse.urljoin(codebase, value)
|
||||||
yield (el, attrib, value, 0)
|
yield (el, attrib, value, 0)
|
||||||
if 'archive' in attribs:
|
if 'archive' in attribs:
|
||||||
for match in _archive_re.finditer(el.get('archive')):
|
for match in _archive_re.finditer(el.get('archive')):
|
||||||
value = match.group(0)
|
value = match.group(0)
|
||||||
if codebase is not None:
|
if codebase is not None:
|
||||||
value = urljoin(codebase, value)
|
value = urllib.parse.urljoin(codebase, value)
|
||||||
yield (el, 'archive', value, match.start())
|
yield (el, 'archive', value, match.start())
|
||||||
else:
|
else:
|
||||||
for attr in attribs:
|
for attr in attribs:
|
||||||
@@ -217,7 +218,7 @@ def make_links_absolute(root, base_url):
|
|||||||
came from)
|
came from)
|
||||||
'''
|
'''
|
||||||
def link_repl(href):
|
def link_repl(href):
|
||||||
return urljoin(base_url, href)
|
return urllib.parse.urljoin(base_url, href)
|
||||||
rewrite_links(root, link_repl)
|
rewrite_links(root, link_repl)
|
||||||
|
|
||||||
|
|
||||||
@@ -463,16 +464,16 @@ def urlnormalize(href):
|
|||||||
characters URL quoted.
|
characters URL quoted.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
parts = urlparse(href)
|
parts = urllib.parse.urlparse(href)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e)))
|
raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e)))
|
||||||
if not parts.scheme or parts.scheme == 'file':
|
if not parts.scheme or parts.scheme == 'file':
|
||||||
path, frag = urldefrag(href)
|
path, frag = urllib.parse.urldefrag(href)
|
||||||
parts = ('', '', path, '', '', frag)
|
parts = ('', '', path, '', '', frag)
|
||||||
parts = (part.replace('\\', '/') for part in parts)
|
parts = (part.replace('\\', '/') for part in parts)
|
||||||
parts = (urlunquote(part) for part in parts)
|
parts = (urlunquote(part) for part in parts)
|
||||||
parts = (urlquote(part) for part in parts)
|
parts = (urlquote(part) for part in parts)
|
||||||
return urlunparse(parts)
|
return urllib.parse.urlunparse(parts)
|
||||||
|
|
||||||
|
|
||||||
def extract(elem):
|
def extract(elem):
|
||||||
@@ -1135,7 +1136,7 @@ class Manifest(object):
|
|||||||
relative to this manifest item to a book-absolute reference.
|
relative to this manifest item to a book-absolute reference.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
purl = urlparse(href)
|
purl = urllib.parse.urlparse(href)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return href
|
return href
|
||||||
scheme = purl.scheme
|
scheme = purl.scheme
|
||||||
@@ -1143,8 +1144,8 @@ class Manifest(object):
|
|||||||
return href
|
return href
|
||||||
purl = list(purl)
|
purl = list(purl)
|
||||||
purl[0] = ''
|
purl[0] = ''
|
||||||
href = urlunparse(purl)
|
href = urllib.parse.urlunparse(purl)
|
||||||
path, frag = urldefrag(href)
|
path, frag = urllib.parse.urldefrag(href)
|
||||||
if not path:
|
if not path:
|
||||||
if frag:
|
if frag:
|
||||||
return '#'.join((self.href, frag))
|
return '#'.join((self.href, frag))
|
||||||
@@ -1423,7 +1424,7 @@ class Guide(object):
|
|||||||
@property
|
@property
|
||||||
def item(self):
|
def item(self):
|
||||||
"""The manifest item associated with this reference."""
|
"""The manifest item associated with this reference."""
|
||||||
path = urldefrag(self.href)[0]
|
path = uurllib.parse.rldefrag(self.href)[0]
|
||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
return hrefs.get(path, None)
|
return hrefs.get(path, None)
|
||||||
|
|
||||||
@@ -1596,7 +1597,7 @@ class TOC(object):
|
|||||||
"""
|
"""
|
||||||
prev = None
|
prev = None
|
||||||
for node in list(self.nodes):
|
for node in list(self.nodes):
|
||||||
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
|
if prev and urllib.parse.urldefrag(prev.href)[0] == urllib.parse.urldefrag(node.href)[0]:
|
||||||
self.nodes.remove(node)
|
self.nodes.remove(node)
|
||||||
prev.nodes.append(node)
|
prev.nodes.append(node)
|
||||||
else:
|
else:
|
||||||
@@ -1988,7 +1989,7 @@ class OEBBook(object):
|
|||||||
def rel_href(base_href, href):
|
def rel_href(base_href, href):
|
||||||
"""Convert the URL provided in :param:`href` to a URL relative to the URL
|
"""Convert the URL provided in :param:`href` to a URL relative to the URL
|
||||||
in :param:`base_href` """
|
in :param:`base_href` """
|
||||||
if urlparse(href).scheme:
|
if urllib.parse.urlparse(href).scheme:
|
||||||
return href
|
return href
|
||||||
if '/' not in base_href:
|
if '/' not in base_href:
|
||||||
return href
|
return href
|
||||||
@@ -2004,7 +2005,7 @@ def rel_href(base_href, href):
|
|||||||
break
|
break
|
||||||
if not base:
|
if not base:
|
||||||
return href
|
return href
|
||||||
target, frag = urldefrag(href)
|
target, frag = urllib.parse.urldefrag(href)
|
||||||
target = target.split('/')
|
target = target.split('/')
|
||||||
index = 0
|
index = 0
|
||||||
for index in range(min(len(base), len(target))):
|
for index in range(min(len(base), len(target))):
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import uuid
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from itertools import count
|
from itertools import count
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from css_parser import getUrls, replaceUrls
|
from css_parser import getUrls, replaceUrls
|
||||||
|
|
||||||
@@ -49,7 +50,6 @@ from ebook_converter.utils.logging import default_log
|
|||||||
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
||||||
from ebook_converter.utils.zipfile import ZipFile
|
from ebook_converter.utils.zipfile import ZipFile
|
||||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlparse
|
|
||||||
|
|
||||||
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
|
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ def name_to_href(name, root, base=None, quote=urlquote):
|
|||||||
def href_to_name(href, root, base=None):
|
def href_to_name(href, root, base=None):
|
||||||
base = root if base is None else os.path.dirname(name_to_abspath(base, root))
|
base = root if base is None else os.path.dirname(name_to_abspath(base, root))
|
||||||
try:
|
try:
|
||||||
purl = urlparse(href)
|
purl = urllib.parse.urlparse(href)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
if purl.scheme or not purl.path:
|
if purl.scheme or not purl.path:
|
||||||
|
|||||||
@@ -2,13 +2,13 @@ import codecs, shutil, os, posixpath
|
|||||||
from ebook_converter.polyglot.builtins import iteritems, itervalues
|
from ebook_converter.polyglot.builtins import iteritems, itervalues
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter import sanitize_file_name
|
from ebook_converter import sanitize_file_name
|
||||||
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
||||||
from ebook_converter.ebooks.oeb.base import css_text
|
from ebook_converter.ebooks.oeb.base import css_text
|
||||||
from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
|
from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
|
||||||
from ebook_converter.ebooks.oeb.polish.utils import extract
|
from ebook_converter.ebooks.oeb.polish.utils import extract
|
||||||
from ebook_converter.polyglot.urllib import urlparse, urlunparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -38,7 +38,7 @@ class LinkReplacer(object):
|
|||||||
nname = self.link_map.get(name, None)
|
nname = self.link_map.get(name, None)
|
||||||
if not nname:
|
if not nname:
|
||||||
return url
|
return url
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
href = self.container.name_to_href(nname, self.base)
|
href = self.container.name_to_href(nname, self.base)
|
||||||
if purl.fragment:
|
if purl.fragment:
|
||||||
nfrag = self.frag_map(name, purl.fragment)
|
nfrag = self.frag_map(name, purl.fragment)
|
||||||
@@ -68,12 +68,12 @@ class IdReplacer(object):
|
|||||||
id_map = self.id_map.get(name)
|
id_map = self.id_map.get(name)
|
||||||
if id_map is None:
|
if id_map is None:
|
||||||
return url
|
return url
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
nfrag = id_map.get(purl.fragment)
|
nfrag = id_map.get(purl.fragment)
|
||||||
if nfrag is None:
|
if nfrag is None:
|
||||||
return url
|
return url
|
||||||
purl = purl._replace(fragment=nfrag)
|
purl = purl._replace(fragment=nfrag)
|
||||||
href = urlunparse(purl)
|
href = urllib.parse.urlunparse(purl)
|
||||||
if href != url:
|
if href != url:
|
||||||
self.replaced = True
|
self.replaced = True
|
||||||
return href
|
return href
|
||||||
@@ -89,7 +89,7 @@ class LinkRebaser(object):
|
|||||||
def __call__(self, url):
|
def __call__(self, url):
|
||||||
if url and url.startswith('#'):
|
if url and url.startswith('#'):
|
||||||
return url
|
return url
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
frag = purl.fragment
|
frag = purl.fragment
|
||||||
name = self.container.href_to_name(url, self.old_name)
|
name = self.container.href_to_name(url, self.old_name)
|
||||||
if not name:
|
if not name:
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
import copy, os, re
|
import copy, os, re
|
||||||
from ebook_converter.polyglot.builtins import string_or_bytes
|
from ebook_converter.polyglot.builtins import string_or_bytes
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
|
from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
|
||||||
from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup
|
from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup
|
||||||
from ebook_converter.ebooks.oeb.polish.toc import node_from_loc
|
from ebook_converter.ebooks.oeb.polish.toc import node_from_loc
|
||||||
from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser
|
from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser
|
||||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -160,7 +160,7 @@ class SplitLinkReplacer(object):
|
|||||||
name = self.container.href_to_name(url, self.base)
|
name = self.container.href_to_name(url, self.base)
|
||||||
if name != self.top_name:
|
if name != self.top_name:
|
||||||
return url
|
return url
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
if purl.fragment and purl.fragment in self.bottom_anchors:
|
if purl.fragment and purl.fragment in self.bottom_anchors:
|
||||||
url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
|
url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
|
||||||
self.replaced = True
|
self.replaced = True
|
||||||
@@ -225,7 +225,7 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
|
|||||||
else:
|
else:
|
||||||
fname = container.href_to_name(url, name)
|
fname = container.href_to_name(url, name)
|
||||||
if fname == name:
|
if fname == name:
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
if purl.fragment in anchors_in_top:
|
if purl.fragment in anchors_in_top:
|
||||||
if r is root2:
|
if r is root2:
|
||||||
a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
|
a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
|
||||||
@@ -310,7 +310,7 @@ class MergeLinkReplacer(object):
|
|||||||
amap = self.anchor_map.get(name, None)
|
amap = self.anchor_map.get(name, None)
|
||||||
if amap is None:
|
if amap is None:
|
||||||
return url
|
return url
|
||||||
purl = urlparse(url)
|
purl = urllib.parse.urlparse(url)
|
||||||
frag = purl.fragment or ''
|
frag = purl.fragment or ''
|
||||||
frag = amap.get(frag, frag)
|
frag = amap.get(frag, frag)
|
||||||
url = self.container.name_to_href(self.master, self.base) + '#' + frag
|
url = self.container.name_to_href(self.master, self.base) + '#' + frag
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from collections import Counter, OrderedDict
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from lxml.builder import ElementMaker
|
from lxml.builder import ElementMaker
|
||||||
@@ -16,7 +17,6 @@ from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_langu
|
|||||||
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
|
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
|
||||||
from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
|
from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
|
||||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -150,7 +150,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name):
|
|||||||
href = content.get('src', None)
|
href = content.get('src', None)
|
||||||
if href:
|
if href:
|
||||||
dest = container.href_to_name(href, base=ncx_name)
|
dest = container.href_to_name(href, base=ncx_name)
|
||||||
frag = urlparse(href).fragment or None
|
frag = urllib.parse.urlparse(href).fragment or None
|
||||||
return parent.add(text or None, dest or None, frag or None)
|
return parent.add(text or None, dest or None, frag or None)
|
||||||
|
|
||||||
|
|
||||||
@@ -183,7 +183,7 @@ def parse_ncx(container, ncx_name):
|
|||||||
href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
|
href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
|
||||||
if href:
|
if href:
|
||||||
dest = container.href_to_name(href[0], base=ncx_name)
|
dest = container.href_to_name(href[0], base=ncx_name)
|
||||||
frag = urlparse(href[0]).fragment or None
|
frag = urllib.parse.urlparse(href[0]).fragment or None
|
||||||
toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
|
toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
|
||||||
return toc_root
|
return toc_root
|
||||||
|
|
||||||
@@ -195,7 +195,7 @@ def add_from_li(container, li, parent, nav_name):
|
|||||||
href = x.get('href')
|
href = x.get('href')
|
||||||
if href:
|
if href:
|
||||||
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
|
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
|
||||||
frag = urlparse(href).fragment or None
|
frag = urllib.parse.urlparse(href).fragment or None
|
||||||
break
|
break
|
||||||
return parent.add(text or None, dest or None, frag or None)
|
return parent.add(text or None, dest or None, frag or None)
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ Container-/OPF-based input OEBBook reader.
|
|||||||
"""
|
"""
|
||||||
import sys, os, uuid, copy, re, io
|
import sys, os, uuid, copy, re, io
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@@ -23,7 +24,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
|
|||||||
from ebook_converter.constants import __appname__, __version__
|
from ebook_converter.constants import __appname__, __version__
|
||||||
from ebook_converter import guess_type, xml_replace_entities
|
from ebook_converter import guess_type, xml_replace_entities
|
||||||
from ebook_converter.polyglot.builtins import unicode_type
|
from ebook_converter.polyglot.builtins import unicode_type
|
||||||
from ebook_converter.polyglot.urllib import unquote, urldefrag, urlparse
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['OEBReader']
|
__all__ = ['OEBReader']
|
||||||
@@ -203,12 +204,12 @@ class OEBReader(object):
|
|||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
if isinstance(href, bytes):
|
if isinstance(href, bytes):
|
||||||
href = href.decode('utf-8')
|
href = href.decode('utf-8')
|
||||||
href, _ = urldefrag(href)
|
href, _ = urllib.parse.urldefrag(href)
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
href = item.abshref(urlnormalize(href))
|
href = item.abshref(urlnormalize(href))
|
||||||
scheme = urlparse(href).scheme
|
scheme = urllib.parse.urlparse(href).scheme
|
||||||
except:
|
except:
|
||||||
self.oeb.log.exception(
|
self.oeb.log.exception(
|
||||||
'Skipping invalid href: %r'%href)
|
'Skipping invalid href: %r'%href)
|
||||||
@@ -221,9 +222,9 @@ class OEBReader(object):
|
|||||||
except:
|
except:
|
||||||
urls = []
|
urls = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
href, _ = urldefrag(url)
|
href, _ = urllib.parse.urldefrag(url)
|
||||||
href = item.abshref(urlnormalize(href))
|
href = item.abshref(urlnormalize(href))
|
||||||
scheme = urlparse(href).scheme
|
scheme = urllib.parse.urlparse(href).scheme
|
||||||
if not scheme and href not in known:
|
if not scheme and href not in known:
|
||||||
new.add(href)
|
new.add(href)
|
||||||
unchecked.clear()
|
unchecked.clear()
|
||||||
@@ -294,7 +295,7 @@ class OEBReader(object):
|
|||||||
# TODO: handle fallback chains
|
# TODO: handle fallback chains
|
||||||
continue
|
continue
|
||||||
for href in selector(item.data):
|
for href in selector(item.data):
|
||||||
href, _ = urldefrag(href)
|
href, _ = urllib.parse.urldefrag(href)
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
@@ -350,7 +351,7 @@ class OEBReader(object):
|
|||||||
manifest = self.oeb.manifest
|
manifest = self.oeb.manifest
|
||||||
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
||||||
ref_href = elem.get('href')
|
ref_href = elem.get('href')
|
||||||
path = urlnormalize(urldefrag(ref_href)[0])
|
path = urlnormalize(urllib.parse.urldefrag(ref_href)[0])
|
||||||
if path not in manifest.hrefs:
|
if path not in manifest.hrefs:
|
||||||
corrected_href = None
|
corrected_href = None
|
||||||
for href in manifest.hrefs:
|
for href in manifest.hrefs:
|
||||||
@@ -393,7 +394,7 @@ class OEBReader(object):
|
|||||||
# This node is useless
|
# This node is useless
|
||||||
continue
|
continue
|
||||||
href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
|
href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
|
||||||
path, _ = urldefrag(href)
|
path, _ = urllib.parse.urldefrag(href)
|
||||||
if path and path not in self.oeb.manifest.hrefs:
|
if path and path not in self.oeb.manifest.hrefs:
|
||||||
path = urlnormalize(path)
|
path = urlnormalize(path)
|
||||||
if href and path not in self.oeb.manifest.hrefs:
|
if href and path not in self.oeb.manifest.hrefs:
|
||||||
@@ -468,7 +469,7 @@ class OEBReader(object):
|
|||||||
href = site.get('href')
|
href = site.get('href')
|
||||||
if not title or not href:
|
if not title or not href:
|
||||||
continue
|
continue
|
||||||
path, _ = urldefrag(urlnormalize(href))
|
path, _ = urllib.parse.urldefrag(urlnormalize(href))
|
||||||
if path not in self.oeb.manifest.hrefs:
|
if path not in self.oeb.manifest.hrefs:
|
||||||
self.logger.warn('TOC reference %r not found' % href)
|
self.logger.warn('TOC reference %r not found' % href)
|
||||||
continue
|
continue
|
||||||
@@ -480,7 +481,7 @@ class OEBReader(object):
|
|||||||
if 'toc' not in self.oeb.guide:
|
if 'toc' not in self.oeb.guide:
|
||||||
return False
|
return False
|
||||||
self.log.debug('Reading TOC from HTML...')
|
self.log.debug('Reading TOC from HTML...')
|
||||||
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
itempath, frag = urllib.parse.urldefrag(self.oeb.guide['toc'].href)
|
||||||
item = self.oeb.manifest.hrefs[itempath]
|
item = self.oeb.manifest.hrefs[itempath]
|
||||||
html = item.data
|
html = item.data
|
||||||
if frag:
|
if frag:
|
||||||
@@ -496,7 +497,7 @@ class OEBReader(object):
|
|||||||
for anchor in xpath(html, './/h:a[@href]'):
|
for anchor in xpath(html, './/h:a[@href]'):
|
||||||
href = anchor.attrib['href']
|
href = anchor.attrib['href']
|
||||||
href = item.abshref(urlnormalize(href))
|
href = item.abshref(urlnormalize(href))
|
||||||
path, frag = urldefrag(href)
|
path, frag = urllib.parse.urldefrag(href)
|
||||||
if path not in self.oeb.manifest.hrefs:
|
if path not in self.oeb.manifest.hrefs:
|
||||||
continue
|
continue
|
||||||
title = xml2text(anchor)
|
title = xml2text(anchor)
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import textwrap
|
import textwrap
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter import guess_type
|
from ebook_converter import guess_type
|
||||||
from ebook_converter.utils.imghdr import identify
|
from ebook_converter.utils.imghdr import identify
|
||||||
@@ -93,7 +94,6 @@ class CoverManager(object):
|
|||||||
return -1, -1
|
return -1, -1
|
||||||
|
|
||||||
def insert_cover(self):
|
def insert_cover(self):
|
||||||
from ebook_converter.ebooks.oeb.base import urldefrag
|
|
||||||
g, m = self.oeb.guide, self.oeb.manifest
|
g, m = self.oeb.guide, self.oeb.manifest
|
||||||
item = None
|
item = None
|
||||||
href = None
|
href = None
|
||||||
@@ -124,7 +124,7 @@ class CoverManager(object):
|
|||||||
data=safe_xml_fromstring(tp))
|
data=safe_xml_fromstring(tp))
|
||||||
else:
|
else:
|
||||||
item = self.oeb.manifest.hrefs[
|
item = self.oeb.manifest.hrefs[
|
||||||
urldefrag(self.oeb.guide['titlepage'].href)[0]]
|
urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]]
|
||||||
if item is not None:
|
if item is not None:
|
||||||
self.oeb.spine.insert(0, item, True)
|
self.oeb.spine.insert(0, item, True)
|
||||||
if 'cover' not in self.oeb.guide.refs:
|
if 'cover' not in self.oeb.guide.refs:
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
import posixpath
|
import posixpath
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize
|
from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize
|
||||||
from ebook_converter.polyglot.urllib import urldefrag, urlparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -38,7 +38,7 @@ class RenameFiles(object): # {{{
|
|||||||
if self.oeb.guide:
|
if self.oeb.guide:
|
||||||
for ref in self.oeb.guide.values():
|
for ref in self.oeb.guide.values():
|
||||||
href = urlnormalize(ref.href)
|
href = urlnormalize(ref.href)
|
||||||
href, frag = urldefrag(href)
|
href, frag = urllib.parse.urldefrag(href)
|
||||||
replacement = self.rename_map.get(href, None)
|
replacement = self.rename_map.get(href, None)
|
||||||
if replacement is not None:
|
if replacement is not None:
|
||||||
nhref = replacement
|
nhref = replacement
|
||||||
@@ -52,7 +52,7 @@ class RenameFiles(object): # {{{
|
|||||||
def fix_toc_entry(self, toc):
|
def fix_toc_entry(self, toc):
|
||||||
if toc.href:
|
if toc.href:
|
||||||
href = urlnormalize(toc.href)
|
href = urlnormalize(toc.href)
|
||||||
href, frag = urldefrag(href)
|
href, frag = urllib.parse.urldefrag(href)
|
||||||
replacement = self.rename_map.get(href, None)
|
replacement = self.rename_map.get(href, None)
|
||||||
|
|
||||||
if replacement is not None:
|
if replacement is not None:
|
||||||
@@ -66,11 +66,11 @@ class RenameFiles(object): # {{{
|
|||||||
|
|
||||||
def url_replacer(self, orig_url):
|
def url_replacer(self, orig_url):
|
||||||
url = urlnormalize(orig_url)
|
url = urlnormalize(orig_url)
|
||||||
parts = urlparse(url)
|
parts = urllib.parse.urlparse(url)
|
||||||
if parts.scheme:
|
if parts.scheme:
|
||||||
# Only rewrite local URLs
|
# Only rewrite local URLs
|
||||||
return orig_url
|
return orig_url
|
||||||
path, frag = urldefrag(url)
|
path, frag = urllib.parse.urldefrag(url)
|
||||||
if self.renamed_items_map:
|
if self.renamed_items_map:
|
||||||
orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
|
orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -2,10 +2,11 @@ import sys, os, re
|
|||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
from string import Formatter
|
from string import Formatter
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter import guess_type, strftime
|
from ebook_converter import guess_type, strftime
|
||||||
from ebook_converter.constants import iswindows
|
from ebook_converter.constants import iswindows
|
||||||
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize
|
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urlnormalize
|
||||||
from ebook_converter.library.comments import comments_to_html, markdown
|
from ebook_converter.library.comments import comments_to_html, markdown
|
||||||
from ebook_converter.utils.date import is_date_undefined, as_local_time
|
from ebook_converter.utils.date import is_date_undefined, as_local_time
|
||||||
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
||||||
@@ -73,7 +74,7 @@ class RemoveFirstImage(Base):
|
|||||||
self.log.warn('Could not find first image to remove')
|
self.log.warn('Could not find first image to remove')
|
||||||
if deleted_item is not None:
|
if deleted_item is not None:
|
||||||
for item in list(self.oeb.toc):
|
for item in list(self.oeb.toc):
|
||||||
href = urldefrag(item.href)[0]
|
href = urllib.parse.urldefrag(item.href)[0]
|
||||||
if href == deleted_item.href:
|
if href == deleted_item.href:
|
||||||
self.oeb.toc.remove(item)
|
self.oeb.toc.remove(item)
|
||||||
self.oeb.guide.remove_by_href(deleted_item.href)
|
self.oeb.guide.remove_by_href(deleted_item.href)
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
"""
|
"""
|
||||||
SVG rasterization transform.
|
SVG rasterization transform.
|
||||||
"""
|
"""
|
||||||
import os, re
|
import os
|
||||||
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
# from PyQt5.Qt import (
|
# from PyQt5.Qt import (
|
||||||
# Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
|
# Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
|
||||||
@@ -14,7 +16,6 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
|||||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||||
from ebook_converter.utils.imghdr import what
|
from ebook_converter.utils.imghdr import what
|
||||||
from ebook_converter.polyglot.builtins import unicode_type
|
from ebook_converter.polyglot.builtins import unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urldefrag
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -114,7 +115,7 @@ class SVGRasterizer(object):
|
|||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||||
href = urlnormalize(elem.attrib[XLINK('href')])
|
href = urlnormalize(elem.attrib[XLINK('href')])
|
||||||
path = urldefrag(href)[0]
|
path = urllib.parse.urldefrag(href)[0]
|
||||||
if not path:
|
if not path:
|
||||||
continue
|
continue
|
||||||
abshref = item.abshref(path)
|
abshref = item.abshref(path)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ assumes a prior call to the flatcss transform.
|
|||||||
"""
|
"""
|
||||||
import os, functools, collections, re, copy
|
import os, functools, collections, re, copy
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml.etree import XPath as _XPath
|
from lxml.etree import XPath as _XPath
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
@@ -12,7 +13,7 @@ from lxml import etree
|
|||||||
from ebook_converter import as_unicode, force_unicode
|
from ebook_converter import as_unicode, force_unicode
|
||||||
from ebook_converter.ebooks.epub import rules
|
from ebook_converter.ebooks.epub import rules
|
||||||
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
|
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
|
||||||
urldefrag, rewrite_links, XHTML, urlnormalize)
|
rewrite_links, XHTML, urlnormalize)
|
||||||
from ebook_converter.ebooks.oeb.polish.split import do_split
|
from ebook_converter.ebooks.oeb.polish.split import do_split
|
||||||
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
from ebook_converter.polyglot.builtins import iteritems, unicode_type
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
@@ -162,7 +163,7 @@ class Split(object):
|
|||||||
rewrite_links(item.data, self.rewrite_links)
|
rewrite_links(item.data, self.rewrite_links)
|
||||||
|
|
||||||
def rewrite_links(self, url):
|
def rewrite_links(self, url):
|
||||||
href, frag = urldefrag(url)
|
href, frag = urllib.parse.urldefrag(url)
|
||||||
try:
|
try:
|
||||||
href = self.current_item.abshref(href)
|
href = self.current_item.abshref(href)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -453,7 +454,7 @@ class FlowSplitter(object):
|
|||||||
|
|
||||||
if self.oeb.guide:
|
if self.oeb.guide:
|
||||||
for ref in self.oeb.guide.values():
|
for ref in self.oeb.guide.values():
|
||||||
href, frag = urldefrag(ref.href)
|
href, frag = urllib.parse.urldefrag(ref.href)
|
||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
@@ -462,7 +463,7 @@ class FlowSplitter(object):
|
|||||||
|
|
||||||
def fix_toc_entry(toc):
|
def fix_toc_entry(toc):
|
||||||
if toc.href:
|
if toc.href:
|
||||||
href, frag = urldefrag(toc.href)
|
href, frag = urllib.parse.urldefrag(toc.href)
|
||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
@@ -476,7 +477,7 @@ class FlowSplitter(object):
|
|||||||
|
|
||||||
if self.oeb.pages:
|
if self.oeb.pages:
|
||||||
for page in self.oeb.pages:
|
for page in self.oeb.pages:
|
||||||
href, frag = urldefrag(page.href)
|
href, frag = urllib.parse.urldefrag(page.href)
|
||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
import re, uuid
|
import re
|
||||||
|
import uuid
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from collections import OrderedDict, Counter
|
from collections import OrderedDict, Counter
|
||||||
@@ -6,7 +8,6 @@ from collections import OrderedDict, Counter
|
|||||||
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
|
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
|
||||||
from ebook_converter.ebooks import ConversionError
|
from ebook_converter.ebooks import ConversionError
|
||||||
from ebook_converter.polyglot.builtins import itervalues, unicode_type
|
from ebook_converter.polyglot.builtins import itervalues, unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -209,7 +210,7 @@ class DetectStructure(object):
|
|||||||
for a in XPath('//h:a[@href]')(item.data):
|
for a in XPath('//h:a[@href]')(item.data):
|
||||||
href = a.get('href')
|
href = a.get('href')
|
||||||
try:
|
try:
|
||||||
purl = urlparse(href)
|
purl = urllib.parse.urlparse(href)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
self.log.warning('Ignoring malformed URL:', href)
|
self.log.warning('Ignoring malformed URL:', href)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
"""
|
"""
|
||||||
OPF manifest trimming transform.
|
OPF manifest trimming transform.
|
||||||
"""
|
"""
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS
|
from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS
|
||||||
from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks
|
from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks
|
||||||
from ebook_converter.polyglot.urllib import urldefrag
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -32,7 +33,7 @@ class ManifestTrimmer(object):
|
|||||||
elif item.value in oeb.manifest.ids:
|
elif item.value in oeb.manifest.ids:
|
||||||
used.add(oeb.manifest.ids[item.value])
|
used.add(oeb.manifest.ids[item.value])
|
||||||
for ref in oeb.guide.values():
|
for ref in oeb.guide.values():
|
||||||
path, _ = urldefrag(ref.href)
|
path, _ = urllib.parse.urldefrag(ref.href)
|
||||||
if path in oeb.manifest.hrefs:
|
if path in oeb.manifest.hrefs:
|
||||||
used.add(oeb.manifest.hrefs[path])
|
used.add(oeb.manifest.hrefs[path])
|
||||||
# TOC items are required to be in the spine
|
# TOC items are required to be in the spine
|
||||||
|
|||||||
@@ -4,11 +4,12 @@ PyTextile
|
|||||||
A Humane Web Text Generator
|
A Humane Web Text Generator
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from ebook_converter.utils.smartypants import smartyPants
|
from ebook_converter.utils.smartypants import smartyPants
|
||||||
from ebook_converter.polyglot.builtins import unicode_type
|
from ebook_converter.polyglot.builtins import unicode_type
|
||||||
from ebook_converter.polyglot.urllib import urlopen, urlparse
|
|
||||||
|
|
||||||
|
|
||||||
# Last upstream version basis
|
# Last upstream version basis
|
||||||
@@ -85,7 +86,7 @@ def getimagesize(url):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
p = ImageFile.Parser()
|
p = ImageFile.Parser()
|
||||||
f = urlopen(url)
|
f = urllib.request.urlopen(url)
|
||||||
while True:
|
while True:
|
||||||
s = f.read(1024)
|
s = f.read(1024)
|
||||||
if not s:
|
if not s:
|
||||||
@@ -777,11 +778,11 @@ class Textile(object):
|
|||||||
True
|
True
|
||||||
|
|
||||||
"""
|
"""
|
||||||
(scheme, netloc) = urlparse(url)[0:2]
|
(scheme, netloc) = urllib.parse.urlparse(url)[0:2]
|
||||||
return not scheme and not netloc
|
return not scheme and not netloc
|
||||||
|
|
||||||
def relURL(self, url):
|
def relURL(self, url):
|
||||||
scheme = urlparse(url)[0]
|
scheme = urllib.parse.urlparse(url)[0]
|
||||||
if self.restricted and scheme and scheme not in self.url_schemes:
|
if self.restricted and scheme and scheme not in self.url_schemes:
|
||||||
return '#'
|
return '#'
|
||||||
return url
|
return url
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
from ebook_converter.polyglot.builtins import is_py3
|
|
||||||
if is_py3:
|
|
||||||
from functools import lru_cache
|
|
||||||
else:
|
|
||||||
from backports.functools_lru_cache import lru_cache
|
|
||||||
|
|
||||||
|
|
||||||
lru_cache
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# License: GPL v3 Copyright: 2019, Eli Schwartz <eschwartz@archlinux.org>
|
|
||||||
|
|
||||||
from ebook_converter.polyglot.builtins import is_py3
|
|
||||||
|
|
||||||
if is_py3:
|
|
||||||
from html.entities import name2codepoint
|
|
||||||
else:
|
|
||||||
from htmlentitydefs import name2codepoint
|
|
||||||
@@ -1,44 +1,19 @@
|
|||||||
from ebook_converter.polyglot.builtins import is_py3
|
from urllib.request import (build_opener, getproxies, install_opener,
|
||||||
|
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler,
|
||||||
|
url2pathname, urlopen, Request)
|
||||||
|
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag,
|
||||||
|
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit)
|
||||||
|
from urllib.error import HTTPError, URLError
|
||||||
|
|
||||||
|
|
||||||
if is_py3:
|
def unquote(x, encoding='utf-8', errors='replace'):
|
||||||
from urllib.request import (build_opener, getproxies, install_opener, # noqa
|
binary = isinstance(x, bytes)
|
||||||
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, # noqa
|
if binary:
|
||||||
url2pathname, urlopen, Request) # noqa
|
x = x.decode(encoding, errors)
|
||||||
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag, # noqa
|
ans = uq(x, encoding, errors)
|
||||||
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) # noqa
|
if binary:
|
||||||
from urllib.error import HTTPError, URLError # noqa
|
ans = ans.encode(encoding, errors)
|
||||||
|
return ans
|
||||||
def unquote(x, encoding='utf-8', errors='replace'):
|
|
||||||
binary = isinstance(x, bytes)
|
|
||||||
if binary:
|
|
||||||
x = x.decode(encoding, errors)
|
|
||||||
ans = uq(x, encoding, errors)
|
|
||||||
if binary:
|
|
||||||
ans = ans.encode(encoding, errors)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
else:
|
|
||||||
from urllib import (getproxies, quote, unquote as uq, quote_plus, url2pathname, # noqa
|
|
||||||
urlencode) # noqa
|
|
||||||
from urllib2 import (build_opener, install_opener, HTTPBasicAuthHandler, # noqa
|
|
||||||
HTTPCookieProcessor, HTTPDigestAuthHandler, HTTPError, URLError, # noqa
|
|
||||||
urlopen, Request) # noqa
|
|
||||||
from urlparse import (parse_qs, urldefrag, urljoin, urlparse, urlunparse, # noqa
|
|
||||||
urlsplit, urlunsplit) # noqa
|
|
||||||
|
|
||||||
def unquote(x, encoding='utf-8', errors='replace'):
|
|
||||||
# unquote must run on a bytestring and will return a bytestring
|
|
||||||
# If it runs on a unicode object, it returns a double encoded unicode
|
|
||||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
|
||||||
# and the latter is correct
|
|
||||||
binary = isinstance(x, bytes)
|
|
||||||
if not binary:
|
|
||||||
x = x.encode(encoding, errors)
|
|
||||||
ans = uq(x)
|
|
||||||
if not binary:
|
|
||||||
ans = ans.decode(encoding, errors)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
|
|
||||||
def unquote_plus(x, encoding='utf-8', errors='replace'):
|
def unquote_plus(x, encoding='utf-8', errors='replace'):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
|
import html.entities
|
||||||
|
|
||||||
from ebook_converter.polyglot.builtins import codepoint_to_chr
|
from ebook_converter.polyglot.builtins import codepoint_to_chr
|
||||||
from ebook_converter.polyglot.html_entities import name2codepoint
|
|
||||||
from ebook_converter.constants import plugins, preferred_encoding
|
from ebook_converter.constants import plugins, preferred_encoding
|
||||||
|
|
||||||
|
|
||||||
@@ -77,7 +77,8 @@ def unescape(text, rm=False, rchar=''):
|
|||||||
else:
|
else:
|
||||||
# named entity
|
# named entity
|
||||||
try:
|
try:
|
||||||
text = codepoint_to_chr(name2codepoint[text[1:-1]])
|
text = codepoint_to_chr(html.entities
|
||||||
|
.name2codepoint[text[1:-1]])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
if rm:
|
if rm:
|
||||||
|
|||||||
@@ -1,10 +1,16 @@
|
|||||||
import os, errno, sys
|
import errno
|
||||||
from threading import Thread
|
import functools
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
|
||||||
from ebook_converter import force_unicode
|
from ebook_converter import force_unicode
|
||||||
from ebook_converter.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3
|
from ebook_converter.constants import filesystem_encoding
|
||||||
|
from ebook_converter.constants import get_windows_username
|
||||||
|
from ebook_converter.constants import islinux
|
||||||
|
from ebook_converter.constants import ispy3
|
||||||
|
from ebook_converter.constants import iswindows
|
||||||
from ebook_converter.utils.filenames import ascii_filename
|
from ebook_converter.utils.filenames import ascii_filename
|
||||||
from ebook_converter.polyglot.functools import lru_cache
|
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -24,7 +30,7 @@ def eintr_retry_call(func, *args, **kwargs):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@lru_cache()
|
@functools.lru_cache()
|
||||||
def socket_address(which):
|
def socket_address(which):
|
||||||
if iswindows:
|
if iswindows:
|
||||||
ans = r'\\.\pipe\Calibre' + which
|
ans = r'\\.\pipe\Calibre' + which
|
||||||
@@ -58,12 +64,12 @@ def viewer_socket_address():
|
|||||||
return socket_address('Viewer' if iswindows else 'viewer')
|
return socket_address('Viewer' if iswindows else 'viewer')
|
||||||
|
|
||||||
|
|
||||||
class RC(Thread):
|
class RC(threading.Thread):
|
||||||
|
|
||||||
def __init__(self, print_error=True, socket_address=None):
|
def __init__(self, print_error=True, socket_address=None):
|
||||||
self.print_error = print_error
|
self.print_error = print_error
|
||||||
self.socket_address = socket_address or gui_socket_address()
|
self.socket_address = socket_address or gui_socket_address()
|
||||||
Thread.__init__(self)
|
threading.Thread.__init__(self)
|
||||||
self.conn = None
|
self.conn = None
|
||||||
self.daemon = True
|
self.daemon = True
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user