mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-12 20:45:47 +01:00
Some misc cleanups
This commit is contained in:
@@ -7,12 +7,9 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import warnings
|
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
if os.getenv('CALIBRE_SHOW_DEPRECATION_WARNINGS') is None:
|
|
||||||
warnings.simplefilter('ignore', DeprecationWarning)
|
|
||||||
try:
|
try:
|
||||||
os.getcwd()
|
os.getcwd()
|
||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
@@ -140,9 +137,6 @@ def sanitize_file_name(name, substitute='_'):
|
|||||||
return one
|
return one
|
||||||
|
|
||||||
|
|
||||||
sanitize_file_name2 = sanitize_file_name_unicode = sanitize_file_name
|
|
||||||
|
|
||||||
|
|
||||||
def prints(*args, **kwargs):
|
def prints(*args, **kwargs):
|
||||||
'''
|
'''
|
||||||
Print unicode arguments safely by encoding them to preferred_encoding
|
Print unicode arguments safely by encoding them to preferred_encoding
|
||||||
@@ -551,12 +545,12 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
|
|||||||
|
|
||||||
|
|
||||||
_ent_pat = re.compile(r'&(\S+?);')
|
_ent_pat = re.compile(r'&(\S+?);')
|
||||||
xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions={
|
xml_entity_to_unicode = partial(entity_to_unicode,
|
||||||
'"' : '"',
|
result_exceptions={'"': '"',
|
||||||
"'" : ''',
|
"'": ''',
|
||||||
'<' : '<',
|
'<': '<',
|
||||||
'>' : '>',
|
'>': '>',
|
||||||
'&' : '&'})
|
'&': '&'})
|
||||||
|
|
||||||
|
|
||||||
def replace_entities(raw, encoding='cp1252'):
|
def replace_entities(raw, encoding='cp1252'):
|
||||||
@@ -586,7 +580,7 @@ def force_unicode(obj, enc=preferred_encoding):
|
|||||||
except Exception:
|
except Exception:
|
||||||
try:
|
try:
|
||||||
obj = obj.decode(filesystem_encoding if enc ==
|
obj = obj.decode(filesystem_encoding if enc ==
|
||||||
preferred_encoding else preferred_encoding)
|
preferred_encoding else preferred_encoding)
|
||||||
except Exception:
|
except Exception:
|
||||||
try:
|
try:
|
||||||
obj = obj.decode('utf-8')
|
obj = obj.decode('utf-8')
|
||||||
@@ -628,11 +622,6 @@ def human_readable(size, sep=' '):
|
|||||||
return size + sep + suffix
|
return size + sep + suffix
|
||||||
|
|
||||||
|
|
||||||
def ipython(user_ns=None):
|
|
||||||
from ebook_converter.utils.ipython import ipython
|
|
||||||
ipython(user_ns=user_ns)
|
|
||||||
|
|
||||||
|
|
||||||
def fsync(fileobj):
|
def fsync(fileobj):
|
||||||
fileobj.flush()
|
fileobj.flush()
|
||||||
os.fsync(fileobj.fileno())
|
os.fsync(fileobj.fileno())
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from ebook_converter.customize import profiles
|
|||||||
from ebook_converter.customize import builtins
|
from ebook_converter.customize import builtins
|
||||||
from ebook_converter.ebooks import metadata
|
from ebook_converter.ebooks import metadata
|
||||||
from ebook_converter.utils import config as cfg
|
from ebook_converter.utils import config as cfg
|
||||||
|
from ebook_converter import prints
|
||||||
|
|
||||||
|
|
||||||
builtin_names = frozenset(p.name for p in builtins.plugins)
|
builtin_names = frozenset(p.name for p in builtins.plugins)
|
||||||
@@ -348,7 +349,6 @@ def set_file_type_metadata(stream, mi, ftype, report_error=None):
|
|||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
if report_error is None:
|
if report_error is None:
|
||||||
from ebook_converter import prints
|
|
||||||
prints('Failed to set metadata for the', ftype.upper(),
|
prints('Failed to set metadata for the', ftype.upper(),
|
||||||
'format of:', getattr(mi, 'title', ''),
|
'format of:', getattr(mi, 'title', ''),
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
@@ -481,8 +481,6 @@ def initialize_plugins():
|
|||||||
_initialized_plugins.append(plugin)
|
_initialized_plugins.append(plugin)
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Failed to initialize plugin:', repr(zfp))
|
print('Failed to initialize plugin:', repr(zfp))
|
||||||
# Prevent a custom plugin from overriding stdout/stderr as this breaks
|
|
||||||
# ipython
|
|
||||||
sys.stdout, sys.stderr = ostdout, ostderr
|
sys.stdout, sys.stderr = ostdout, ostderr
|
||||||
_initialized_plugins.sort(key=lambda x: x.priority, reverse=True)
|
_initialized_plugins.sort(key=lambda x: x.priority, reverse=True)
|
||||||
reread_filetype_plugins()
|
reread_filetype_plugins()
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ CHM File decoding support
|
|||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from lxml.html import builder
|
||||||
|
|
||||||
from ebook_converter.polyglot.urllib import unquote as _unquote
|
from ebook_converter.polyglot.urllib import unquote as _unquote
|
||||||
from ebook_converter.ebooks.oeb.base import urlquote
|
from ebook_converter.ebooks.oeb.base import urlquote
|
||||||
@@ -19,13 +20,16 @@ __copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, '
|
|||||||
|
|
||||||
class CHMInput(InputFormatPlugin):
|
class CHMInput(InputFormatPlugin):
|
||||||
|
|
||||||
name = 'CHM Input'
|
name = 'CHM Input'
|
||||||
author = 'Kovid Goyal and Alex Bramley'
|
author = 'Kovid Goyal and Alex Bramley'
|
||||||
description = 'Convert CHM files to OEB'
|
description = 'Convert CHM files to OEB'
|
||||||
file_types = {'chm'}
|
file_types = {'chm'}
|
||||||
commit_name = 'chm_input'
|
commit_name = 'chm_input'
|
||||||
|
|
||||||
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
|
def _chmtohtml(self, output_dir, chm_path, no_images, log,
|
||||||
|
debug_dump=False):
|
||||||
|
# NOTE(gryf): for some reason, this import cannot be moved to the top
|
||||||
|
# of module.
|
||||||
from ebook_converter.ebooks.chm.reader import CHMReader
|
from ebook_converter.ebooks.chm.reader import CHMReader
|
||||||
log.debug('Opening CHM file')
|
log.debug('Opening CHM file')
|
||||||
rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
|
rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
|
||||||
@@ -35,6 +39,8 @@ class CHMInput(InputFormatPlugin):
|
|||||||
return rdr.hhc_path
|
return rdr.hhc_path
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
|
# NOTE(gryf): for some reason, those import cannot be moved to the top
|
||||||
|
# of module.
|
||||||
from ebook_converter.ebooks.chm.metadata import get_metadata_from_reader
|
from ebook_converter.ebooks.chm.metadata import get_metadata_from_reader
|
||||||
from ebook_converter.customize.ui import plugin_for_input_format
|
from ebook_converter.customize.ui import plugin_for_input_format
|
||||||
self.opts = options
|
self.opts = options
|
||||||
@@ -59,7 +65,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
if odi:
|
if odi:
|
||||||
debug_dump = os.path.join(odi, 'input')
|
debug_dump = os.path.join(odi, 'input')
|
||||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log,
|
mainname = self._chmtohtml(tdir, chm_name, no_images, log,
|
||||||
debug_dump=debug_dump)
|
debug_dump=debug_dump)
|
||||||
mainpath = os.path.join(tdir, mainname)
|
mainpath = os.path.join(tdir, mainname)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -68,11 +74,9 @@ class CHMInput(InputFormatPlugin):
|
|||||||
log.exception('Failed to read metadata, using filename')
|
log.exception('Failed to read metadata, using filename')
|
||||||
from ebook_converter.ebooks.metadata.book.base import Metadata
|
from ebook_converter.ebooks.metadata.book.base import Metadata
|
||||||
metadata = Metadata(os.path.basename(chm_name))
|
metadata = Metadata(os.path.basename(chm_name))
|
||||||
encoding = self._chm_reader.get_encoding() or options.input_encoding or 'cp1252'
|
encoding = (self._chm_reader.get_encoding() or
|
||||||
|
options.input_encoding or 'cp1252')
|
||||||
self._chm_reader.CloseCHM()
|
self._chm_reader.CloseCHM()
|
||||||
# print((tdir, mainpath))
|
|
||||||
# from ebook_converter import ipython
|
|
||||||
# ipython()
|
|
||||||
|
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
options.input_encoding = 'utf-8'
|
options.input_encoding = 'utf-8'
|
||||||
@@ -80,7 +84,8 @@ class CHMInput(InputFormatPlugin):
|
|||||||
if os.path.abspath(mainpath) in self._chm_reader.re_encoded_files:
|
if os.path.abspath(mainpath) in self._chm_reader.re_encoded_files:
|
||||||
uenc = 'utf-8'
|
uenc = 'utf-8'
|
||||||
htmlpath, toc = self._create_html_root(mainpath, log, uenc)
|
htmlpath, toc = self._create_html_root(mainpath, log, uenc)
|
||||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
oeb = self._create_oebbook_html(htmlpath, tdir, options, log,
|
||||||
|
metadata)
|
||||||
options.debug_pipeline = odi
|
options.debug_pipeline = odi
|
||||||
if toc.count() > 1:
|
if toc.count() > 1:
|
||||||
oeb.toc = self.parse_html_toc(oeb.spine[0])
|
oeb.toc = self.parse_html_toc(oeb.spine[0])
|
||||||
@@ -117,13 +122,10 @@ class CHMInput(InputFormatPlugin):
|
|||||||
hhcdata = self._read_file(hhcpath)
|
hhcdata = self._read_file(hhcpath)
|
||||||
hhcdata = hhcdata.decode(encoding)
|
hhcdata = hhcdata.decode(encoding)
|
||||||
hhcdata = xml_to_unicode(hhcdata, verbose=True,
|
hhcdata = xml_to_unicode(hhcdata, verbose=True,
|
||||||
strip_encoding_pats=True, resolve_entities=True)[0]
|
strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
hhcroot = html.fromstring(hhcdata)
|
hhcroot = html.fromstring(hhcdata)
|
||||||
toc = self._process_nodes(hhcroot)
|
toc = self._process_nodes(hhcroot)
|
||||||
# print("=============================")
|
|
||||||
# print("Printing hhcroot")
|
|
||||||
# print(etree.tostring(hhcroot, pretty_print=True))
|
|
||||||
# print("=============================")
|
|
||||||
log.debug('Found %d section nodes' % toc.count())
|
log.debug('Found %d section nodes' % toc.count())
|
||||||
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
||||||
base = os.path.dirname(os.path.abspath(htmlpath))
|
base = os.path.dirname(os.path.abspath(htmlpath))
|
||||||
@@ -135,7 +137,8 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def unquote_path(x):
|
def unquote_path(x):
|
||||||
y = unquote(x)
|
y = unquote(x)
|
||||||
if (not os.path.exists(os.path.join(base, x)) and os.path.exists(os.path.join(base, y))):
|
if (not os.path.exists(os.path.join(base, x)) and
|
||||||
|
os.path.exists(os.path.join(base, y))):
|
||||||
x = y
|
x = y
|
||||||
return x
|
return x
|
||||||
|
|
||||||
@@ -147,28 +150,29 @@ class CHMInput(InputFormatPlugin):
|
|||||||
raw = unquote_path(child.href or '')
|
raw = unquote_path(child.href or '')
|
||||||
rsrcname = os.path.basename(raw)
|
rsrcname = os.path.basename(raw)
|
||||||
rsrcpath = os.path.join(subpath, rsrcname)
|
rsrcpath = os.path.join(subpath, rsrcname)
|
||||||
if (not os.path.exists(os.path.join(base, rsrcpath)) and os.path.exists(os.path.join(base, raw))):
|
if (not os.path.exists(os.path.join(base, rsrcpath)) and
|
||||||
|
os.path.exists(os.path.join(base, raw))):
|
||||||
rsrcpath = raw
|
rsrcpath = raw
|
||||||
|
|
||||||
if '%' not in rsrcpath:
|
if '%' not in rsrcpath:
|
||||||
rsrcpath = urlquote(rsrcpath)
|
rsrcpath = urlquote(rsrcpath)
|
||||||
if not raw:
|
if not raw:
|
||||||
rsrcpath = ''
|
rsrcpath = ''
|
||||||
c = DIV(A(title, href=rsrcpath))
|
c = builder.DIV(builder.A(title, href=rsrcpath))
|
||||||
donode(child, c, base, subpath)
|
donode(child, c, base, subpath)
|
||||||
parent.append(c)
|
parent.append(c)
|
||||||
|
|
||||||
with open(htmlpath, 'wb') as f:
|
with open(htmlpath, 'wb') as f:
|
||||||
if toc.count() > 1:
|
if toc.count() > 1:
|
||||||
from lxml.html.builder import HTML, BODY, DIV, A
|
|
||||||
path0 = toc[0].href
|
path0 = toc[0].href
|
||||||
path0 = unquote_path(path0)
|
path0 = unquote_path(path0)
|
||||||
subpath = os.path.dirname(path0)
|
subpath = os.path.dirname(path0)
|
||||||
base = os.path.dirname(f.name)
|
base = os.path.dirname(f.name)
|
||||||
root = DIV()
|
root = builder.DIV()
|
||||||
donode(toc, root, base, subpath)
|
donode(toc, root, base, subpath)
|
||||||
raw = html.tostring(HTML(BODY(root)), encoding='utf-8',
|
raw = html.tostring(builder.HTML(builder.BODY(root)),
|
||||||
pretty_print=True)
|
encoding='utf-8',
|
||||||
|
pretty_print=True)
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
else:
|
else:
|
||||||
f.write(as_bytes(hhcdata))
|
f.write(as_bytes(hhcdata))
|
||||||
|
|||||||
@@ -171,7 +171,7 @@ def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIO
|
|||||||
|
|
||||||
|
|
||||||
def convert_textile(txt, title=''):
|
def convert_textile(txt, title=''):
|
||||||
from ebook_converter.ebooks.textile import textile
|
from ebook_converter.ebooks.textile.functions import textile
|
||||||
html = textile(txt, encoding='utf-8')
|
html = textile(txt, encoding='utf-8')
|
||||||
return HTML_TEMPLATE % (title, html)
|
return HTML_TEMPLATE % (title, html)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user