1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-02-01 19:45:45 +01:00

Cleanup, cleanup

This commit is contained in:
2020-07-17 18:59:45 +02:00
parent 5ac8451668
commit 45b6bb5b2c
10 changed files with 63 additions and 167 deletions

View File

@@ -18,6 +18,8 @@ To build and run ebook converter, you'll need:
- Python 3.6 or newer
- `Liberation fonts`_
- setuptools
- ``pdftohtml``, ``pdfinfo`` and ``pdftoppm`` from `poppler`_ project for
conversion from PDF available in ``$PATH``
No Python2 support. Even if Calibre probably still is able to run on Python2, I
do not have an intention to support it.
@@ -101,7 +103,7 @@ managers), i.e:
.. code:: shell-session
$ virtualenv venv
$ python -m venv venv
$ . venv/bin/activate
(venv) $ git clone https://github.com/gryf/ebook-converter
(venv) $ cd ebook-converter
@@ -128,3 +130,4 @@ for details.
.. _pypi: https://pypi.python.org
.. _Liberation fonts: https://github.com/liberationfonts/liberation-fonts
.. _Kindle periodical: https://sellercentral.amazon.com/gp/help/external/help.html?itemID=202047960&language=en-US
.. _poppler: https://poppler.freedesktop.org/

View File

@@ -10,17 +10,15 @@ import sys
from functools import partial
from ebook_converter import constants_old
from ebook_converter.constants_old import islinux, isfrozen, \
isbsd, __appname__, __version__, __author__, \
from ebook_converter.constants_old import isfrozen, \
__appname__, __version__, __author__, \
config_dir
from ebook_converter.ebooks.html_entities import html5_entities
if False:
# Prevent pyflakes from complaining
__appname__, islinux, __version__
isfrozen, __author__
isbsd, config_dir
__appname__, __version__, isfrozen, __author__, config_dir
def init_mimetypes():

View File

@@ -6,11 +6,10 @@ import os
import sys
__appname__ = 'calibre'
__appname__ = 'ebook-converter'
numeric_version = (4, 12, 0)
__version__ = '.'.join(map(str, numeric_version))
git_version = None
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
__version__ = '.'.join([str(x) for x in numeric_version])
__author__ = "foobar"
'''
Various run time constants.
@@ -18,16 +17,14 @@ Various run time constants.
_plat = sys.platform.lower()
isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isfreebsd = 'freebsd' in _plat
isnetbsd = 'netbsd' in _plat
isdragonflybsd = 'dragonfly' in _plat
isbsd = isfreebsd or isnetbsd or isdragonflybsd
ishaiku = 'haiku1' in _plat
islinux = True
isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux or ishaiku
isfrozen = hasattr(sys, 'frozen')
isunix = True
isportable = os.getenv('CALIBRE_PORTABLE_BUILD') is not None
isxp = isoldvista = False
is64bit = sys.maxsize > (1 << 32)
@@ -40,7 +37,7 @@ TOC_DIALOG_APP_UID = 'com.calibre-ebook.toc-editor'
try:
preferred_encoding = locale.getpreferredencoding()
codecs.lookup(preferred_encoding)
except:
except Exception:
preferred_encoding = 'utf-8'
fcntl = importlib.import_module('fcntl')
@@ -214,12 +211,9 @@ del dv
def get_version():
'''Return version string for display to user '''
if git_version is not None:
v = git_version
else:
v = __version__
if numeric_version[-1] == 0:
v = v[:-2]
v = __version__
if numeric_version[-1] == 0:
v = v[:-2]
if is_running_from_develop:
v += '*'

View File

@@ -121,7 +121,6 @@ def render_html_data(path_to_html, width, height):
result = {}
def report_error(text=''):
__import__('pdb').set_trace()
print(f'Failed to render {path_to_html}')
# file=sys.stderr)
if text:

View File

@@ -2,10 +2,8 @@ import functools
import mimetypes
import os
import re
import tempfile
import urllib.parse
from ebook_converter.constants_old import islinux, isbsd
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.utils.localization import get_lang
@@ -55,7 +53,6 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log,
accelerators):
self._is_case_sensitive = None
basedir = os.getcwd()
self.opts = opts
@@ -81,14 +78,6 @@ class HTMLInput(InputFormatPlugin):
return create_oebbook(log, stream.name, opts,
encoding=opts.input_encoding)
def is_case_sensitive(self, path):
if getattr(self, '_is_case_sensitive', None) is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isbsd
self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper()))
return self._is_case_sensitive
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
import uuid
from ebook_converter.ebooks.conversion.plumber import create_oebbook
@@ -154,8 +143,6 @@ class HTMLInput(InputFormatPlugin):
self.log = log
self.log('Normalizing filename cases')
for path, href in htmlfile_map.items():
if not self.is_case_sensitive(path):
path = path.lower()
self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urllib.parse.urldefrag
@@ -252,8 +239,6 @@ class HTMLInput(InputFormatPlugin):
if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_
if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower()
if link not in self.added_resources:
bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))

View File

@@ -1,27 +1,17 @@
"""
Read meta information from PDF files
"""
import os, subprocess, shutil, re
from functools import partial
import functools
import os
import re
import shutil
import subprocess
from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.ebooks.metadata import (
MetaInformation, string_to_authors, check_isbn, check_doi)
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
def get_tools():
from ebook_converter.ebooks.pdf.pdftohtml import PDFTOHTML
base = os.path.dirname(PDFTOHTML)
suffix = ''
pdfinfo = os.path.join(base, 'pdfinfo') + suffix
pdftoppm = os.path.join(base, 'pdftoppm') + suffix
return pdfinfo, pdftoppm
def read_info(outputdir, get_cover):
''' Read info dict and cover from a pdf file named src.pdf in outputdir.
Note that this function changes the cwd to outputdir and is therefore not
@@ -29,7 +19,8 @@ def read_info(outputdir, get_cover):
way to pass unicode paths via command line arguments. This also ensures
that if poppler crashes, no stale file handles are left for the original
file, only for src.pdf.'''
pdfinfo, pdftoppm = get_tools()
pdfinfo = 'pdfinfo'
pdftoppm = 'pdftoppm'
source_file = os.path.join(outputdir, 'src.pdf')
cover_file = os.path.join(outputdir, 'cover')
ans = {}
@@ -55,8 +46,8 @@ def read_info(outputdir, get_cover):
ans[field] = val.strip()
# Now read XMP metadata
# Versions of poppler before 0.47.0 used to print out both the Info dict and
# XMP metadata packet together. However, since that changed in
# Versions of poppler before 0.47.0 used to print out both the Info dict
# and XMP metadata packet together. However, since that changed in
# https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d
# we can no longer rely on it.
try:
@@ -77,13 +68,14 @@ def read_info(outputdir, get_cover):
subprocess.check_call([pdftoppm, '-singlefile', '-jpeg',
'-cropbox', source_file, cover_file])
except subprocess.CalledProcessError as e:
print('pdftoppm errored out with return code: {e.returncode}')
print(f'pdftoppm errored out with return code: {e.returncode}')
return ans
def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'):
pdftoppm = get_tools()[1]
def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg',
prefix='page-images'):
pdftoppm = 'pdftoppm'
outputdir = os.path.abspath(outputdir)
args = {}
try:
@@ -92,11 +84,12 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
'-l', str(last), pdfpath, os.path.join(outputdir, prefix)
], **args)
except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s' %
e.returncode)
def is_pdf_encrypted(path_to_pdf):
pdfinfo = get_tools()[0]
pdfinfo = 'pdfinfo'
raw = subprocess.check_output([pdfinfo, path_to_pdf])
q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE)
if q is not None:
@@ -149,7 +142,7 @@ def get_metadata(stream, cover=True):
# Look for recognizable identifiers in the info dict, if they were not
# found in the XMP metadata
for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.items():
for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
if scheme not in mi.get_identifiers():
for k, v in info.items():
if k != 'xmp_metadata':
@@ -163,9 +156,7 @@ def get_metadata(stream, cover=True):
return mi
get_quick_metadata = partial(get_metadata, cover=False)
#from ebook_converter.utils.podofo import set_metadata as podofo_set_metadata
get_quick_metadata = functools.partial(get_metadata, cover=False)
def set_metadata(stream, mi):

View File

@@ -3,12 +3,10 @@ import os
import re
import shutil
import subprocess
import sys
from lxml import etree
from ebook_converter import CurrentDir, xml_replace_entities
from ebook_converter.constants_old import isbsd, islinux, isosx
from ebook_converter.ebooks import ConversionError, DRMError
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ptempfile import PersistentTemporaryFile
@@ -16,21 +14,10 @@ from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.ipc import eintr_retry_call
PDFTOHTML = 'pdftohtml'
def popen(cmd, **kw):
return subprocess.Popen(cmd, **kw)
if isosx and hasattr(sys, 'frameworks_dir'):
base = os.path.join(os.path.dirname(sys.frameworks_dir), 'utils.app',
'Contents', 'MacOS')
PDFTOHTML = os.path.join(base, PDFTOHTML)
if (islinux or isbsd) and getattr(sys, 'frozen', False):
PDFTOHTML = os.path.join(sys.executables_location, 'bin', 'pdftohtml')
def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
'''
Convert the pdf into html using the pdftohtml app.
@@ -49,12 +36,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
def a(x):
return os.path.basename(x)
exe = PDFTOHTML
cmd = [exe, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
cmd = ['pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
'-nodrm', a(pdfsrc), a(index)]
if isbsd:
cmd.remove('-nodrm')
if no_images:
cmd.append('-i')
if as_xml:
@@ -105,11 +89,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
i.write(raw.encode('utf-8'))
cmd = [exe, '-f', '1', '-l', '1', '-xml', '-i', '-enc', 'UTF-8',
'-noframes', '-p', '-nomerge', '-nodrm', '-q', '-stdout',
a(pdfsrc)]
if isbsd:
cmd.remove('-nodrm')
cmd = ['pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc',
'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q',
'-stdout', a(pdfsrc)]
p = popen(cmd, stdout=subprocess.PIPE)
raw = p.stdout.read().strip()
if p.wait() == 0 and raw:

View File

@@ -5,6 +5,7 @@ import builtins
import fcntl
import locale
import sys
import threading
from ebook_converter import constants_old
@@ -22,7 +23,9 @@ if not _run_once:
try:
base_dir()
except EnvironmentError:
pass # Ignore this error during startup, so we can show a better error message to the user later.
# Ignore this error during startup, so we can show a better error
# message to the user later.
pass
#
# Convert command line arguments to unicode
@@ -51,7 +54,6 @@ if not _run_once:
import ebook_converter.utils.resources as resources
resources
#
# Initialize locale
# Import string as we do not want locale specific
# string.whitespace/printable, on windows especially, this causes problems.
@@ -87,28 +89,26 @@ if not _run_once:
bound_signal.connect(slot, **kw)
builtins.__dict__['connect_lambda'] = connect_lambda
if constants_old.islinux or constants_old.isosx or constants_old.isfreebsd:
# Name all threads at the OS level created using the threading module, see
# http://bugs.python.org/issue15500
import threading
# Name all threads at the OS level created using the threading module, see
# http://bugs.python.org/issue15500
orig_start = threading.Thread.start
orig_start = threading.Thread.start
def new_start(self):
orig_start(self)
try:
name = self.name
if not name or name.startswith('Thread-'):
name = self.__class__.__name__
if name == 'Thread':
name = self.name
if name:
if isinstance(name, str):
name = name.encode('ascii', 'replace').decode('ascii')
constants_old.plugins['speedup'][0].set_thread_name(name[:15])
except Exception:
pass # Don't care about failure to set name
threading.Thread.start = new_start
def new_start(self):
orig_start(self)
try:
name = self.name
if not name or name.startswith('Thread-'):
name = self.__class__.__name__
if name == 'Thread':
name = self.name
if name:
if isinstance(name, str):
name = name.encode('ascii', 'replace').decode('ascii')
constants_old.plugins['speedup'][0].set_thread_name(name[:15])
except Exception:
pass # Don't care about failure to set name
threading.Thread.start = new_start
def test_lopen():

View File

@@ -1,16 +1,4 @@
import errno
import functools
import os
import sys
import threading
from ebook_converter import force_unicode
from ebook_converter.constants_old import filesystem_encoding
from ebook_converter.constants_old import islinux
from ebook_converter.utils.filenames import ascii_filename
VADDRESS = None
def eintr_retry_call(func, *args, **kwargs):
@@ -21,47 +9,3 @@ def eintr_retry_call(func, *args, **kwargs):
if getattr(e, 'errno', None) == errno.EINTR:
continue
raise
@functools.lru_cache()
def socket_address(which):
user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
if islinux:
ans = '\0' + sock_name
else:
from tempfile import gettempdir
tmp = force_unicode(gettempdir(), filesystem_encoding)
ans = os.path.join(tmp, sock_name)
return ans
def gui_socket_address():
return socket_address('gui')
def viewer_socket_address():
return socket_address('viewer')
class RC(threading.Thread):
def __init__(self, print_error=True, socket_address=None):
self.print_error = print_error
self.socket_address = socket_address or gui_socket_address()
threading.Thread.__init__(self)
self.conn = None
self.daemon = True
def run(self):
from multiprocessing.connection import Client
self.done = False
try:
self.conn = Client(self.socket_address)
self.done = True
except Exception:
if self.print_error:
print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
import traceback
traceback.print_exc()

View File

@@ -1,6 +1,6 @@
[metadata]
name = ebook-converter
version = 4.9.1
version = 4.12.0
summary = Convert ebook between different formats
description-file =
README.rst