From 45b6bb5b2cc151e36c4b299c5b8540ed615cca26 Mon Sep 17 00:00:00 2001
From: gryf <gryf73@gmail.com>
Date: Fri, 17 Jul 2020 18:59:45 +0200
Subject: [PATCH] Cleanup, cleanup

---
 README.rst                                    |  5 +-
 ebook_converter/__init__.py                   |  8 +--
 ebook_converter/constants_old.py              | 28 ++++------
 ebook_converter/ebooks/__init__.py            |  1 -
 .../ebooks/conversion/plugins/html_input.py   | 15 -----
 ebook_converter/ebooks/metadata/pdf.py        | 45 ++++++---------
 ebook_converter/ebooks/pdf/pdftohtml.py       | 26 ++-------
 ebook_converter/startup.py                    | 44 +++++++--------
 ebook_converter/utils/ipc/__init__.py         | 56 -------------------
 setup.cfg                                     |  2 +-
 10 files changed, 63 insertions(+), 167 deletions(-)

diff --git a/README.rst b/README.rst
index 446309c..d125154 100644
--- a/README.rst
+++ b/README.rst
@@ -18,6 +18,8 @@ To build and run ebook converter, you'll need:
 - Python 3.6 or newer
 - `Liberation fonts`_
 - setuptools
+- ``pdftohtml``, ``pdfinfo`` and ``pdftoppm`` from `poppler`_ project for
+  conversion from PDF available in ``$PATH``
 
 No Python2 support. Even if Calibre probably still is able to run on Python2, I
 do not have an intention to support it.
@@ -101,7 +103,7 @@ managers), i.e:
 
 .. code:: shell-session
 
-   $ virtualenv venv
+   $ python -m venv venv
    $ . venv/bin/activate
    (venv) $ git clone https://github.com/gryf/ebook-converter
    (venv) $ cd ebook-converter
@@ -128,3 +130,4 @@ for details.
 .. _pypi: https://pypi.python.org
 .. _Liberation fonts: https://github.com/liberationfonts/liberation-fonts
 .. _Kindle periodical: https://sellercentral.amazon.com/gp/help/external/help.html?itemID=202047960&language=en-US
+.. _poppler: https://poppler.freedesktop.org/
diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py
index ae80283..14d6c6e 100644
--- a/ebook_converter/__init__.py
+++ b/ebook_converter/__init__.py
@@ -10,17 +10,15 @@ import sys
 from functools import partial
 
 from ebook_converter import constants_old
-from ebook_converter.constants_old import islinux, isfrozen, \
-    isbsd, __appname__, __version__, __author__, \
+from ebook_converter.constants_old import isfrozen, \
+    __appname__, __version__, __author__, \
     config_dir
 from ebook_converter.ebooks.html_entities import html5_entities
 
 
 if False:
     # Prevent pyflakes from complaining
-    __appname__, islinux, __version__
-    isfrozen, __author__
-    isbsd, config_dir
+    __appname__, __version__, isfrozen, __author__, config_dir
 
 
 def init_mimetypes():
diff --git a/ebook_converter/constants_old.py b/ebook_converter/constants_old.py
index 6526cce..896343b 100644
--- a/ebook_converter/constants_old.py
+++ b/ebook_converter/constants_old.py
@@ -6,11 +6,10 @@ import os
 import sys
 
 
-__appname__   = 'calibre'
+__appname__ = 'ebook-converter'
 numeric_version = (4, 12, 0)
-__version__   = '.'.join(map(str, numeric_version))
-git_version   = None
-__author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
+__version__ = '.'.join([str(x) for x in numeric_version])
+__author__ = "foobar"
 
 '''
 Various run time constants.
@@ -18,16 +17,14 @@ Various run time constants.
 
 
 _plat = sys.platform.lower()
-isosx     = 'darwin' in _plat
-isnewosx  = isosx and getattr(sys, 'new_app_bundle', False)
+isosx = 'darwin' in _plat
+isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
 isfreebsd = 'freebsd' in _plat
 isnetbsd = 'netbsd' in _plat
 isdragonflybsd = 'dragonfly' in _plat
-isbsd = isfreebsd or isnetbsd or isdragonflybsd
 ishaiku = 'haiku1' in _plat
-islinux   = True
-isfrozen  = hasattr(sys, 'frozen')
-isunix = isosx or islinux or ishaiku
+isfrozen = hasattr(sys, 'frozen')
+isunix = True
 isportable = os.getenv('CALIBRE_PORTABLE_BUILD') is not None
 isxp = isoldvista = False
 is64bit = sys.maxsize > (1 << 32)
@@ -40,7 +37,7 @@ TOC_DIALOG_APP_UID = 'com.calibre-ebook.toc-editor'
 try:
     preferred_encoding = locale.getpreferredencoding()
     codecs.lookup(preferred_encoding)
-except:
+except Exception:
     preferred_encoding = 'utf-8'
 
 fcntl = importlib.import_module('fcntl')
@@ -214,12 +211,9 @@ del dv
 
 def get_version():
     '''Return version string for display to user '''
-    if git_version is not None:
-        v = git_version
-    else:
-        v = __version__
-        if numeric_version[-1] == 0:
-            v = v[:-2]
+    v = __version__
+    if numeric_version[-1] == 0:
+        v = v[:-2]
     if is_running_from_develop:
         v += '*'
 
diff --git a/ebook_converter/ebooks/__init__.py b/ebook_converter/ebooks/__init__.py
index e2a4eef..22d6e3b 100644
--- a/ebook_converter/ebooks/__init__.py
+++ b/ebook_converter/ebooks/__init__.py
@@ -121,7 +121,6 @@ def render_html_data(path_to_html, width, height):
     result = {}
 
     def report_error(text=''):
-        __import__('pdb').set_trace()
         print(f'Failed to render {path_to_html}')
         # file=sys.stderr)
         if text:
diff --git a/ebook_converter/ebooks/conversion/plugins/html_input.py b/ebook_converter/ebooks/conversion/plugins/html_input.py
index 637030d..d6f0481 100644
--- a/ebook_converter/ebooks/conversion/plugins/html_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/html_input.py
@@ -2,10 +2,8 @@ import functools
 import mimetypes
 import os
 import re
-import tempfile
 import urllib.parse
 
-from ebook_converter.constants_old import islinux, isbsd
 from ebook_converter.customize.conversion import InputFormatPlugin
 from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter.utils.localization import get_lang
@@ -55,7 +53,6 @@ class HTMLInput(InputFormatPlugin):
 
     def convert(self, stream, opts, file_ext, log,
                 accelerators):
-        self._is_case_sensitive = None
         basedir = os.getcwd()
         self.opts = opts
 
@@ -81,14 +78,6 @@ class HTMLInput(InputFormatPlugin):
         return create_oebbook(log, stream.name, opts,
                 encoding=opts.input_encoding)
 
-    def is_case_sensitive(self, path):
-        if getattr(self, '_is_case_sensitive', None) is not None:
-            return self._is_case_sensitive
-        if not path or not os.path.exists(path):
-            return islinux or isbsd
-        self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper()))
-        return self._is_case_sensitive
-
     def create_oebbook(self, htmlpath, basedir, opts, log, mi):
         import uuid
         from ebook_converter.ebooks.conversion.plumber import create_oebbook
@@ -154,8 +143,6 @@ class HTMLInput(InputFormatPlugin):
         self.log = log
         self.log('Normalizing filename cases')
         for path, href in htmlfile_map.items():
-            if not self.is_case_sensitive(path):
-                path = path.lower()
             self.added_resources[path] = href
         self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
         self.urldefrag = urllib.parse.urldefrag
@@ -252,8 +239,6 @@ class HTMLInput(InputFormatPlugin):
         if os.path.isdir(link):
             self.log.warn(link_, 'is a link to a directory. Ignoring.')
             return link_
-        if not self.is_case_sensitive(tempfile.gettempdir()):
-            link = link.lower()
         if link not in self.added_resources:
             bhref = os.path.basename(link)
             id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))
diff --git a/ebook_converter/ebooks/metadata/pdf.py b/ebook_converter/ebooks/metadata/pdf.py
index 984cf6f..0b1ac1f 100644
--- a/ebook_converter/ebooks/metadata/pdf.py
+++ b/ebook_converter/ebooks/metadata/pdf.py
@@ -1,27 +1,17 @@
 """
 Read meta information from PDF files
 """
-import os, subprocess, shutil, re
-from functools import partial
+import functools
+import os
+import re
+import shutil
+import subprocess
 
 from ebook_converter.ptempfile import TemporaryDirectory
 from ebook_converter.ebooks.metadata import (
     MetaInformation, string_to_authors, check_isbn, check_doi)
 
 
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-
-def get_tools():
-    from ebook_converter.ebooks.pdf.pdftohtml import PDFTOHTML
-    base = os.path.dirname(PDFTOHTML)
-    suffix = ''
-    pdfinfo = os.path.join(base, 'pdfinfo') + suffix
-    pdftoppm = os.path.join(base, 'pdftoppm') + suffix
-    return pdfinfo, pdftoppm
-
-
 def read_info(outputdir, get_cover):
     ''' Read info dict and cover from a pdf file named src.pdf in outputdir.
     Note that this function changes the cwd to outputdir and is therefore not
@@ -29,7 +19,8 @@ def read_info(outputdir, get_cover):
     way to pass unicode paths via command line arguments. This also ensures
     that if poppler crashes, no stale file handles are left for the original
     file, only for src.pdf.'''
-    pdfinfo, pdftoppm = get_tools()
+    pdfinfo = 'pdfinfo'
+    pdftoppm = 'pdftoppm'
     source_file = os.path.join(outputdir, 'src.pdf')
     cover_file = os.path.join(outputdir, 'cover')
     ans = {}
@@ -55,8 +46,8 @@ def read_info(outputdir, get_cover):
             ans[field] = val.strip()
 
     # Now read XMP metadata
-    # Versions of poppler before 0.47.0 used to print out both the Info dict and
-    # XMP metadata packet together. However, since that changed in
+    # Versions of poppler before 0.47.0 used to print out both the Info dict
+    # and XMP metadata packet together. However, since that changed in
     # https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d
     # we can no longer rely on it.
     try:
@@ -77,13 +68,14 @@ def read_info(outputdir, get_cover):
             subprocess.check_call([pdftoppm, '-singlefile', '-jpeg',
                                    '-cropbox', source_file, cover_file])
         except subprocess.CalledProcessError as e:
-            print('pdftoppm errored out with return code: {e.returncode}')
+            print(f'pdftoppm errored out with return code: {e.returncode}')
 
     return ans
 
 
-def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'):
-    pdftoppm = get_tools()[1]
+def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg',
+                prefix='page-images'):
+    pdftoppm = 'pdftoppm'
     outputdir = os.path.abspath(outputdir)
     args = {}
     try:
@@ -92,11 +84,12 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
             '-l', str(last), pdfpath, os.path.join(outputdir, prefix)
         ], **args)
     except subprocess.CalledProcessError as e:
-        raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
+        raise ValueError('Failed to render PDF, pdftoppm errorcode: %s' %
+                         e.returncode)
 
 
 def is_pdf_encrypted(path_to_pdf):
-    pdfinfo = get_tools()[0]
+    pdfinfo = 'pdfinfo'
     raw = subprocess.check_output([pdfinfo, path_to_pdf])
     q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE)
     if q is not None:
@@ -149,7 +142,7 @@ def get_metadata(stream, cover=True):
 
     # Look for recognizable identifiers in the info dict, if they were not
     # found in the XMP metadata
-    for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.items():
+    for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
         if scheme not in mi.get_identifiers():
             for k, v in info.items():
                 if k != 'xmp_metadata':
@@ -163,9 +156,7 @@ def get_metadata(stream, cover=True):
     return mi
 
 
-get_quick_metadata = partial(get_metadata, cover=False)
-
-#from ebook_converter.utils.podofo import set_metadata as podofo_set_metadata
+get_quick_metadata = functools.partial(get_metadata, cover=False)
 
 
 def set_metadata(stream, mi):
diff --git a/ebook_converter/ebooks/pdf/pdftohtml.py b/ebook_converter/ebooks/pdf/pdftohtml.py
index 1f62166..8c8b03f 100644
--- a/ebook_converter/ebooks/pdf/pdftohtml.py
+++ b/ebook_converter/ebooks/pdf/pdftohtml.py
@@ -3,12 +3,10 @@ import os
 import re
 import shutil
 import subprocess
-import sys
 
 from lxml import etree
 
 from ebook_converter import CurrentDir, xml_replace_entities
-from ebook_converter.constants_old import isbsd, islinux, isosx
 from ebook_converter.ebooks import ConversionError, DRMError
 from ebook_converter.ebooks.chardet import xml_to_unicode
 from ebook_converter.ptempfile import PersistentTemporaryFile
@@ -16,21 +14,10 @@ from ebook_converter.utils.cleantext import clean_xml_chars
 from ebook_converter.utils.ipc import eintr_retry_call
 
 
-PDFTOHTML = 'pdftohtml'
-
-
 def popen(cmd, **kw):
     return subprocess.Popen(cmd, **kw)
 
 
-if isosx and hasattr(sys, 'frameworks_dir'):
-    base = os.path.join(os.path.dirname(sys.frameworks_dir), 'utils.app',
-                        'Contents', 'MacOS')
-    PDFTOHTML = os.path.join(base, PDFTOHTML)
-if (islinux or isbsd) and getattr(sys, 'frozen', False):
-    PDFTOHTML = os.path.join(sys.executables_location, 'bin', 'pdftohtml')
-
-
 def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
     '''
     Convert the pdf into html using the pdftohtml app.
@@ -49,12 +36,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
         def a(x):
             return os.path.basename(x)
 
-        exe = PDFTOHTML
-        cmd = [exe, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
+        cmd = ['pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
                '-nodrm', a(pdfsrc), a(index)]
 
-        if isbsd:
-            cmd.remove('-nodrm')
         if no_images:
             cmd.append('-i')
         if as_xml:
@@ -105,11 +89,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
 
                 i.write(raw.encode('utf-8'))
 
-            cmd = [exe, '-f', '1', '-l', '1', '-xml', '-i', '-enc', 'UTF-8',
-                   '-noframes', '-p', '-nomerge', '-nodrm', '-q', '-stdout',
-                   a(pdfsrc)]
-            if isbsd:
-                cmd.remove('-nodrm')
+            cmd = ['pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc',
+                   'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q',
+                   '-stdout', a(pdfsrc)]
             p = popen(cmd, stdout=subprocess.PIPE)
             raw = p.stdout.read().strip()
             if p.wait() == 0 and raw:
diff --git a/ebook_converter/startup.py b/ebook_converter/startup.py
index b1227ca..a440577 100644
--- a/ebook_converter/startup.py
+++ b/ebook_converter/startup.py
@@ -5,6 +5,7 @@ import builtins
 import fcntl
 import locale
 import sys
+import threading
 
 from ebook_converter import constants_old
 
@@ -22,7 +23,9 @@ if not _run_once:
     try:
         base_dir()
     except EnvironmentError:
-        pass  # Ignore this error during startup, so we can show a better error message to the user later.
+        # Ignore this error during startup, so we can show a better error
+        # message to the user later.
+        pass
 
     #
     # Convert command line arguments to unicode
@@ -51,7 +54,6 @@ if not _run_once:
     import ebook_converter.utils.resources as resources
     resources
 
-    #
     # Initialize locale
     # Import string as we do not want locale specific
     # string.whitespace/printable, on windows especially, this causes problems.
@@ -87,28 +89,26 @@ if not _run_once:
         bound_signal.connect(slot, **kw)
     builtins.__dict__['connect_lambda'] = connect_lambda
 
-    if constants_old.islinux or constants_old.isosx or constants_old.isfreebsd:
-        # Name all threads at the OS level created using the threading module, see
-        # http://bugs.python.org/issue15500
-        import threading
+    # Name all threads at the OS level created using the threading module, see
+    # http://bugs.python.org/issue15500
 
-        orig_start = threading.Thread.start
+    orig_start = threading.Thread.start
 
-        def new_start(self):
-            orig_start(self)
-            try:
-                name = self.name
-                if not name or name.startswith('Thread-'):
-                    name = self.__class__.__name__
-                    if name == 'Thread':
-                        name = self.name
-                if name:
-                    if isinstance(name, str):
-                        name = name.encode('ascii', 'replace').decode('ascii')
-                    constants_old.plugins['speedup'][0].set_thread_name(name[:15])
-            except Exception:
-                pass  # Don't care about failure to set name
-        threading.Thread.start = new_start
+    def new_start(self):
+        orig_start(self)
+        try:
+            name = self.name
+            if not name or name.startswith('Thread-'):
+                name = self.__class__.__name__
+                if name == 'Thread':
+                    name = self.name
+            if name:
+                if isinstance(name, str):
+                    name = name.encode('ascii', 'replace').decode('ascii')
+                constants_old.plugins['speedup'][0].set_thread_name(name[:15])
+        except Exception:
+            pass  # Don't care about failure to set name
+    threading.Thread.start = new_start
 
 
 def test_lopen():
diff --git a/ebook_converter/utils/ipc/__init__.py b/ebook_converter/utils/ipc/__init__.py
index 7285615..add5650 100644
--- a/ebook_converter/utils/ipc/__init__.py
+++ b/ebook_converter/utils/ipc/__init__.py
@@ -1,16 +1,4 @@
 import errno
-import functools
-import os
-import sys
-import threading
-
-from ebook_converter import force_unicode
-from ebook_converter.constants_old import filesystem_encoding
-from ebook_converter.constants_old import islinux
-from ebook_converter.utils.filenames import ascii_filename
-
-
-VADDRESS = None
 
 
 def eintr_retry_call(func, *args, **kwargs):
@@ -21,47 +9,3 @@ def eintr_retry_call(func, *args, **kwargs):
             if getattr(e, 'errno', None) == errno.EINTR:
                 continue
             raise
-
-
-@functools.lru_cache()
-def socket_address(which):
-
-    user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
-    sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
-    if islinux:
-        ans = '\0' + sock_name
-    else:
-        from tempfile import gettempdir
-        tmp = force_unicode(gettempdir(), filesystem_encoding)
-        ans = os.path.join(tmp, sock_name)
-    return ans
-
-
-def gui_socket_address():
-    return socket_address('gui')
-
-
-def viewer_socket_address():
-    return socket_address('viewer')
-
-
-class RC(threading.Thread):
-
-    def __init__(self, print_error=True, socket_address=None):
-        self.print_error = print_error
-        self.socket_address = socket_address or gui_socket_address()
-        threading.Thread.__init__(self)
-        self.conn = None
-        self.daemon = True
-
-    def run(self):
-        from multiprocessing.connection import Client
-        self.done = False
-        try:
-            self.conn = Client(self.socket_address)
-            self.done = True
-        except Exception:
-            if self.print_error:
-                print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
-                import traceback
-                traceback.print_exc()
diff --git a/setup.cfg b/setup.cfg
index 0f8352b..d41409e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = ebook-converter
-version = 4.9.1
+version = 4.12.0
 summary = Convert ebook between different formats
 description-file =
     README.rst