From 1465e4267f52e5220977af4eee18dbcfe12cdb61 Mon Sep 17 00:00:00 2001 From: gryf Date: Sun, 14 Jun 2020 15:41:18 +0200 Subject: [PATCH] Sorted out mime initialization. Every mime related function in main __init__.py has a flag check for the check if initialization has already done. This is nonsense, since it should be done implicitly early on the converter is starting. This commit straight the things out, and initialization is done in cli module. Also, function guess_type was removed, since it's just a proxy for mimetypes.guess_type function. --- ebook_converter/__init__.py | 27 ++--------------- ebook_converter/customize/builtins.py | 30 +++++++++++-------- ebook_converter/ebooks/conversion/cli.py | 2 ++ .../ebooks/conversion/plugins/fb2_input.py | 5 ++-- .../ebooks/conversion/plugins/html_input.py | 17 ++++------- .../ebooks/conversion/plugins/htmlz_input.py | 10 ++----- ebook_converter/ebooks/conversion/plumber.py | 3 +- ebook_converter/ebooks/docx/container.py | 5 ++-- ebook_converter/ebooks/docx/to_html.py | 5 ++-- .../ebooks/docx/writer/container.py | 14 +++++---- ebook_converter/ebooks/lrf/input.py | 13 ++++---- ebook_converter/ebooks/metadata/__init__.py | 10 ++----- ebook_converter/ebooks/metadata/fb2.py | 3 +- ebook_converter/ebooks/metadata/opf2.py | 7 +++-- ebook_converter/ebooks/mobi/reader/mobi6.py | 4 ++- ebook_converter/ebooks/oeb/polish/utils.py | 9 ++---- ebook_converter/ebooks/oeb/reader.py | 8 ++--- .../ebooks/oeb/transforms/cover.py | 4 +-- .../ebooks/oeb/transforms/data_url.py | 4 ++- .../ebooks/oeb/transforms/flatcss.py | 9 +++--- .../ebooks/oeb/transforms/jacket.py | 10 +++++-- .../ebooks/oeb/transforms/metadata.py | 7 ++--- 22 files changed, 94 insertions(+), 112 deletions(-) diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index 417ee9c..7c1967d 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -1,12 +1,10 @@ import math import os import pkg_resources -import random import re import sys import time -import urllib.parse -import urllib.request +import mimetypes from functools import partial @@ -28,35 +26,17 @@ if False: fcntl, win32event, isfrozen, __author__ winerror, win32api, isbsd, config_dir -_mt_inited = False - -def _init_mimetypes(): - global _mt_inited - import mimetypes +def init_mimetypes(): mimetypes.init([pkg_resources.resource_filename('ebook_converter', 'data/mime.types')]) - _mt_inited = True - - -def guess_type(*args, **kwargs): - import mimetypes - if not _mt_inited: - _init_mimetypes() - return mimetypes.guess_type(*args, **kwargs) def guess_all_extensions(*args, **kwargs): - import mimetypes - if not _mt_inited: - _init_mimetypes() return mimetypes.guess_all_extensions(*args, **kwargs) def guess_extension(*args, **kwargs): - import mimetypes - if not _mt_inited: - _init_mimetypes() ext = mimetypes.guess_extension(*args, **kwargs) if not ext and args and args[0] == 'application/x-palmreader': ext = '.pdb' @@ -64,9 +44,6 @@ def guess_extension(*args, **kwargs): def get_types_map(): - import mimetypes - if not _mt_inited: - _init_mimetypes() return mimetypes.types_map diff --git a/ebook_converter/customize/builtins.py b/ebook_converter/customize/builtins.py index df820c9..db347b0 100644 --- a/ebook_converter/customize/builtins.py +++ b/ebook_converter/customize/builtins.py @@ -1,16 +1,19 @@ -import os, glob, re +import glob +import mimetypes +import os +import re -from ebook_converter import guess_type -from ebook_converter.customize import (FileTypePlugin, MetadataReaderPlugin, - MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase) from ebook_converter.constants_old import numeric_version -from ebook_converter.ebooks.metadata.archive import ArchiveExtract, KPFExtract, get_comic_metadata +from ebook_converter.customize import FileTypePlugin +from ebook_converter.customize import InterfaceActionBase +from ebook_converter.customize import MetadataReaderPlugin +from ebook_converter.customize import MetadataWriterPlugin from ebook_converter.ebooks.html.to_zip import HTML2ZIP +from ebook_converter.ebooks.metadata.archive import ArchiveExtract +from ebook_converter.ebooks.metadata.archive import KPFExtract +from ebook_converter.ebooks.metadata.archive import get_comic_metadata -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - plugins = [] # To archive plugins {{{ @@ -36,12 +39,13 @@ class PML2PMLZ(FileTypePlugin): pmlz.write(pmlfile, os.path.basename(pmlfile), zipfile.ZIP_DEFLATED) pml_img = os.path.splitext(pmlfile)[0] + '_img' - i_img = os.path.join(os.path.dirname(pmlfile),'images') + i_img = os.path.join(os.path.dirname(pmlfile), 'images') img_dir = pml_img if os.path.isdir(pml_img) else i_img if \ os.path.isdir(i_img) else '' if img_dir: for image in glob.glob(os.path.join(img_dir, '*.png')): - pmlz.write(image, os.path.join('images', (os.path.basename(image)))) + pmlz.write(image, os.path.join('images', + (os.path.basename(image)))) pmlz.close() return of.name @@ -67,13 +71,13 @@ class TXT2TXTZ(FileTypePlugin): # Textile for m in re.finditer(r'(?mu)(?:[\[{])?\!(?:\. )?(?P[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt): path = m.group('path') - if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + if path and not os.path.isabs(path) and mimetypes.guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): images.append(path) # Markdown inline for m in re.finditer(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P[^\)]*)\)', txt): # noqa path = m.group('path') - if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + if path and not os.path.isabs(path) and mimetypes.guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): images.append(path) # Markdown reference @@ -83,7 +87,7 @@ class TXT2TXTZ(FileTypePlugin): refs[m.group('id')] = m.group('path') for m in re.finditer(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P[^\]]*)\]', txt): # noqa path = refs.get(m.group('id'), None) - if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + if path and not os.path.isabs(path) and mimetypes.guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): images.append(path) # Remove duplicates diff --git a/ebook_converter/ebooks/conversion/cli.py b/ebook_converter/ebooks/conversion/cli.py index 7bb39d0..3ad1a57 100644 --- a/ebook_converter/ebooks/conversion/cli.py +++ b/ebook_converter/ebooks/conversion/cli.py @@ -11,6 +11,7 @@ from ebook_converter.utils.config import OptionParser from ebook_converter.utils.logging import Log from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter import patheq +from ebook_converter import init_mimetypes from ebook_converter.utils.localization import localize_user_manual_link @@ -361,6 +362,7 @@ def read_sr_patterns(path, log=None): def main(args=sys.argv): log = Log() + init_mimetypes() parser, plumber = create_option_parser(args, log) opts, leftover_args = parser.parse_args(args) if len(leftover_args) > 3: diff --git a/ebook_converter/ebooks/conversion/plugins/fb2_input.py b/ebook_converter/ebooks/conversion/plugins/fb2_input.py index 62f7b86..09da836 100644 --- a/ebook_converter/ebooks/conversion/plugins/fb2_input.py +++ b/ebook_converter/ebooks/conversion/plugins/fb2_input.py @@ -1,6 +1,7 @@ """ Convert .fb2 files to .lrf """ +import mimetypes import os import pkg_resources import re @@ -10,7 +11,6 @@ from lxml import etree from ebook_converter import constants as const from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import OptionRecommendation -from ebook_converter import guess_type FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0' @@ -152,7 +152,8 @@ class FB2Input(InputFormatPlugin): break opf = OPFCreator(os.getcwd(), mi) - entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir(u'.')] + entries = [(f2, mimetypes.guess_type(f2)[0]) + for f2 in os.listdir(u'.')] opf.create_manifest(entries) opf.create_spine(['index.xhtml']) if cpath: diff --git a/ebook_converter/ebooks/conversion/plugins/html_input.py b/ebook_converter/ebooks/conversion/plugins/html_input.py index 7428231..97c553f 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_input.py +++ b/ebook_converter/ebooks/conversion/plugins/html_input.py @@ -1,23 +1,19 @@ import functools +import mimetypes import os import re import tempfile import urllib.parse from ebook_converter.constants_old import islinux, isbsd -from ebook_converter.customize.conversion import (InputFormatPlugin, - OptionRecommendation) +from ebook_converter.customize.conversion import InputFormatPlugin +from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.utils.localization import get_lang from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.imghdr import what from ebook_converter.polyglot.builtins import as_unicode -__license__ = 'GPL v3' -__copyright__ = '2012, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - def sanitize_file_name(x): ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_', ascii_filename(x))).strip().rstrip('.') ans, ext = ans.rpartition('.')[::2] @@ -99,7 +95,6 @@ class HTMLInput(InputFormatPlugin): from ebook_converter.ebooks.oeb.base import (DirContainer, rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES, xpath, urlquote) - from ebook_converter import guess_type from ebook_converter.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata from ebook_converter.ebooks.html.input import get_filelist @@ -164,7 +159,7 @@ class HTMLInput(InputFormatPlugin): self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urldefrag = urllib.parse.urldefrag - self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME + self.BINARY_MIME = BINARY_MIME self.log('Rewriting HTML links') for f in filelist: @@ -262,7 +257,7 @@ class HTMLInput(InputFormatPlugin): if link not in self.added_resources: bhref = os.path.basename(link) id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref)) - guessed = self.guess_type(href)[0] + guessed = mimetypes.guess_type(href)[0] media_type = guessed or self.BINARY_MIME if media_type == 'text/plain': self.log.warn('Ignoring link to text file %r'%link_) @@ -275,7 +270,7 @@ class HTMLInput(InputFormatPlugin): pass else: if img: - media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME + media_type = mimetypes.guess_type('dummy.'+img)[0] or self.BINARY_MIME self.oeb.log.debug('Added', link) self.oeb.container = self.DirContainer(os.path.dirname(link), diff --git a/ebook_converter/ebooks/conversion/plugins/htmlz_input.py b/ebook_converter/ebooks/conversion/plugins/htmlz_input.py index 6b55ecf..e8d7765 100644 --- a/ebook_converter/ebooks/conversion/plugins/htmlz_input.py +++ b/ebook_converter/ebooks/conversion/plugins/htmlz_input.py @@ -1,14 +1,9 @@ +import mimetypes import os -from ebook_converter import guess_type from ebook_converter.customize.conversion import InputFormatPlugin -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' - - class HTMLZInput(InputFormatPlugin): name = 'HTLZ Input' @@ -124,7 +119,8 @@ class HTMLZInput(InputFormatPlugin): cdata = cf.read() cover_name = os.path.basename(cover_path) id, href = oeb.manifest.generate('cover', cover_name) - oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata) + oeb.manifest.add(id, href, mimetypes.guess_type(cover_name)[0], + data=cdata) oeb.guide.add('cover', 'Cover', href) return oeb diff --git a/ebook_converter/ebooks/conversion/plumber.py b/ebook_converter/ebooks/conversion/plumber.py index a15a1cf..3b39790 100644 --- a/ebook_converter/ebooks/conversion/plumber.py +++ b/ebook_converter/ebooks/conversion/plumber.py @@ -14,7 +14,7 @@ from ebook_converter.ebooks.conversion.preprocess import HTMLPreProcessor from ebook_converter.ptempfile import PersistentTemporaryDirectory from ebook_converter.utils.date import parse_date from ebook_converter.utils.zipfile import ZipFile -from ebook_converter import extract, walk, filesystem_encoding, get_types_map +from ebook_converter import extract, walk, filesystem_encoding from ebook_converter.constants_old import __version__ @@ -1010,7 +1010,6 @@ OptionRecommendation(name='search_replace', from ebook_converter.utils.fonts.scanner import font_scanner # noqa import css_parser, logging css_parser.log.setLevel(logging.WARN) - get_types_map() # Ensure the mimetypes module is intialized if self.opts.debug_pipeline is not None: self.opts.verbose = max(self.opts.verbose, 4) diff --git a/ebook_converter/ebooks/docx/container.py b/ebook_converter/ebooks/docx/container.py index db11f3d..12a6f86 100644 --- a/ebook_converter/ebooks/docx/container.py +++ b/ebook_converter/ebooks/docx/container.py @@ -1,10 +1,11 @@ +import mimetypes import os import shutil import sys from lxml import etree -from ebook_converter import walk, guess_type +from ebook_converter import walk from ebook_converter.ebooks.metadata import authors_to_sort_string from ebook_converter.ebooks.metadata import string_to_authors from ebook_converter.ebooks.metadata.book.base import Metadata @@ -150,7 +151,7 @@ class DOCX(object): ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] - return guess_type(name)[0] + return mimetypes.guess_type(name)[0] def read_package_relationships(self): try: diff --git a/ebook_converter/ebooks/docx/to_html.py b/ebook_converter/ebooks/docx/to_html.py index 30c2a64..d1c4ed8 100644 --- a/ebook_converter/ebooks/docx/to_html.py +++ b/ebook_converter/ebooks/docx/to_html.py @@ -1,5 +1,6 @@ import sys, os, re, math, errno, uuid, numbers from collections import OrderedDict, defaultdict +import mimetypes from lxml import etree from lxml import html @@ -24,8 +25,6 @@ from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1 -__license__ = 'GPL v3' -__copyright__ = '2013, Kovid Goyal ' NBSP = '\xa0' @@ -384,7 +383,7 @@ class Convert(object): opf.create_manifest_from_files_in([self.dest_dir]) for item in opf.manifest: if item.media_type == 'text/html': - item.media_type = guess_type('a.xhtml')[0] + item.media_type = mimetypes.guess_type('a.xhtml')[0] opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) diff --git a/ebook_converter/ebooks/docx/writer/container.py b/ebook_converter/ebooks/docx/writer/container.py index 015af68..9af3b0a 100644 --- a/ebook_converter/ebooks/docx/writer/container.py +++ b/ebook_converter/ebooks/docx/writer/container.py @@ -1,9 +1,9 @@ +import mimetypes import textwrap, os from lxml import etree from lxml.builder import ElementMaker -from ebook_converter import guess_type from ebook_converter.constants_old import numeric_version, __appname__ from ebook_converter.ebooks.docx.names import DOCXNamespace from ebook_converter.ebooks.metadata import authors_to_string @@ -179,16 +179,20 @@ class DOCX(object): types.append(E.Override(PartName=partname, ContentType=mt)) added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'} for ext in added: - types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0])) - for ext, mt in {"rels": "application/vnd.openxmlformats-package.relationships+xml", - "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont"}.items(): + types.append(E.Default(Extension=ext, + ContentType=mimetypes.guess_type('a.' + + ext)[0])) + for ext, mt in {"rels": "application/vnd.openxmlformats-package" + ".relationships+xml", + "odttf": "application/vnd.openxmlformats-" + "officedocument.obfuscatedFont"}.items(): added.add(ext) types.append(E.Default(Extension=ext, ContentType=mt)) for fname in self.images: ext = fname.rpartition(os.extsep)[-1] if ext not in added: added.add(ext) - mt = guess_type('a.' + ext)[0] + mt = mimetypes.guess_type('a.' + ext)[0] if mt: types.append(E.Default(Extension=ext, ContentType=mt)) return xml2str(types) diff --git a/ebook_converter/ebooks/lrf/input.py b/ebook_converter/ebooks/lrf/input.py index d9ed86f..fd28128 100644 --- a/ebook_converter/ebooks/lrf/input.py +++ b/ebook_converter/ebooks/lrf/input.py @@ -1,9 +1,10 @@ -import textwrap, operator -from copy import deepcopy, copy +import copy +import mimetypes +import operator +import textwrap from lxml import etree -from ebook_converter import guess_type from ebook_converter.polyglot.builtins import as_bytes @@ -87,7 +88,7 @@ class MediaType(etree.XSLTExtension): def execute(self, context, self_node, input_node, output_parent): name = input_node.get('file', None) - typ = guess_type(name)[0] + typ = mimetypes.guess_type(name)[0] if not typ: typ = 'application/octet-stream' output_parent.text = typ @@ -120,7 +121,7 @@ class TextBlock(etree.XSLTExtension): self.plot_map = plot_map def execute(self, context, self_node, input_node, output_parent): - input_node = deepcopy(input_node) + input_node = copy.deepcopy(input_node) div = etree.Element('div') self.render_block(input_node, div) output_parent.append(div) @@ -190,7 +191,7 @@ class TextBlock(etree.XSLTExtension): for child in children: p.remove(child) if pattrib and child.tag == "Span": - attrib = copy(pattrib) + attrib = copy.copy(pattrib) attrib.update(child.attrib) child.attrib.update(attrib) diff --git a/ebook_converter/ebooks/metadata/__init__.py b/ebook_converter/ebooks/metadata/__init__.py index a50093d..3196056 100644 --- a/ebook_converter/ebooks/metadata/__init__.py +++ b/ebook_converter/ebooks/metadata/__init__.py @@ -2,22 +2,18 @@ Provides abstraction for metadata reading.writing from a variety of ebook formats. """ +import mimetypes import os import re import sys import urllib.parse -from ebook_converter import relpath, guess_type, prints, force_unicode +from ebook_converter import relpath, prints, force_unicode from ebook_converter.utils.config_base import tweaks from ebook_converter.polyglot.builtins import as_unicode from ebook_converter.polyglot.urllib import unquote -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -__docformat__ = 'restructuredtext en' - - try: _author_pat = re.compile(tweaks['authors_split_regex']) except Exception: @@ -231,7 +227,7 @@ class Resource(object): self.path = None self.fragment = '' try: - self.mime_type = guess_type(href_or_path)[0] + self.mime_type = mimetypes.guess_type(href_or_path)[0] except: self.mime_type = None if self.mime_type is None: diff --git a/ebook_converter/ebooks/metadata/fb2.py b/ebook_converter/ebooks/metadata/fb2.py index 43bc493..6e72e42 100644 --- a/ebook_converter/ebooks/metadata/fb2.py +++ b/ebook_converter/ebooks/metadata/fb2.py @@ -1,6 +1,7 @@ """ Read meta information from fb2 files """ +import mimetypes import functools import os import random @@ -225,7 +226,7 @@ def _parse_cover_data(root, imgid, mi, ctx): mime_extensions = guess_all_extensions(mimetype) if not mime_extensions and mimetype.startswith('image/'): - mimetype_fromid = guess_type(imgid)[0] + mimetype_fromid = mimetypes.guess_type(imgid)[0] if mimetype_fromid and mimetype_fromid.startswith('image/'): mime_extensions = guess_all_extensions(mimetype_fromid) diff --git a/ebook_converter/ebooks/metadata/opf2.py b/ebook_converter/ebooks/metadata/opf2.py index 9c85d23..64b9fa7 100644 --- a/ebook_converter/ebooks/metadata/opf2.py +++ b/ebook_converter/ebooks/metadata/opf2.py @@ -6,6 +6,7 @@ import functools import glob import io import json +import mimetypes import os import re import sys @@ -74,7 +75,7 @@ class Resource(object): # {{{ self.path = None self.fragment = '' try: - self.mime_type = guess_type(href_or_path)[0] + self.mime_type = mimetypes.guess_type(href_or_path)[0] except Exception: self.mime_type = None if self.mime_type is None: @@ -1304,8 +1305,8 @@ class OPF(object): # {{{ 'other.ms-coverimage'): for item in self.guide: if item.type.lower() == t: - self.create_manifest_item(item.href(), - guess_type(path)[0]) + self.create_manifest_item( + item.href(), mimetypes.guess_type(path)[0]) def get_metadata_element(self, name): matches = self.metadata_elem_path(self.metadata, name=name) diff --git a/ebook_converter/ebooks/mobi/reader/mobi6.py b/ebook_converter/ebooks/mobi/reader/mobi6.py index a59a2cd..6087a67 100644 --- a/ebook_converter/ebooks/mobi/reader/mobi6.py +++ b/ebook_converter/ebooks/mobi/reader/mobi6.py @@ -1,5 +1,6 @@ import shutil, os, re, struct, textwrap, io import logging +import mimetypes from lxml import html, etree @@ -674,7 +675,8 @@ class MobiReader(object): for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) - manifest.append((path, guess_type(path)[0] or 'image/jpeg')) + manifest.append((path, + mimetypes.guess_type(path)[0] or 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) diff --git a/ebook_converter/ebooks/oeb/polish/utils.py b/ebook_converter/ebooks/oeb/polish/utils.py index 3465a57..c3077f5 100644 --- a/ebook_converter/ebooks/oeb/polish/utils.py +++ b/ebook_converter/ebooks/oeb/polish/utils.py @@ -1,12 +1,9 @@ import bisect import os import re +import mimetypes -from ebook_converter import guess_type as _guess_type, replace_entities - - -__license__ = 'GPL v3' -__copyright__ = '2013, Kovid Goyal ' +from ebook_converter import replace_entities def _upper(string): @@ -14,7 +11,7 @@ def _upper(string): def guess_type(x): - return _guess_type(x)[0] or 'application/octet-stream' + return mimetypes.guess_type(x)[0] or 'application/octet-stream' def setup_css_parser_serialization(tab_width=2): diff --git a/ebook_converter/ebooks/oeb/reader.py b/ebook_converter/ebooks/oeb/reader.py index f5a02f7..d92403f 100644 --- a/ebook_converter/ebooks/oeb/reader.py +++ b/ebook_converter/ebooks/oeb/reader.py @@ -4,6 +4,7 @@ Container-/OPF-based input OEBBook reader. import collections import copy import io +import mimetypes import os import re import sys @@ -22,7 +23,7 @@ from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.localization import get_lang from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.constants_old import __appname__, __version__ -from ebook_converter import guess_type, xml_replace_entities +from ebook_converter import xml_replace_entities from ebook_converter.polyglot.urllib import unquote @@ -130,7 +131,6 @@ class OEBReader(object): meta_info_to_oeb_metadata stream = io.BytesIO(etree.tostring(opf, xml_declaration=True, encoding='utf-8')) - # o = opf_meta.OPF(stream) o = OPF(stream) pwm = o.primary_writing_mode if pwm: @@ -251,7 +251,7 @@ class OEBReader(object): href) warned.add(href) id, _ = manifest.generate(id='added') - guessed = guess_type(href)[0] + guessed = mimetypes.guess_type(href)[0] media_type = guessed or base.BINARY_MIME added = manifest.add(id, href, media_type) unchecked.add(added) @@ -268,7 +268,7 @@ class OEBReader(object): if media_type is None: media_type = elem.get('mediatype', None) if not media_type or media_type == 'text/xml': - guessed = guess_type(href)[0] + guessed = mimetypes.guess_type(href)[0] media_type = guessed or media_type or base.BINARY_MIME if hasattr(media_type, 'lower'): media_type = media_type.lower() diff --git a/ebook_converter/ebooks/oeb/transforms/cover.py b/ebook_converter/ebooks/oeb/transforms/cover.py index f03e52c..5ebf265 100644 --- a/ebook_converter/ebooks/oeb/transforms/cover.py +++ b/ebook_converter/ebooks/oeb/transforms/cover.py @@ -1,9 +1,9 @@ +import mimetypes import textwrap import urllib.parse from lxml import etree -from ebook_converter import guess_type from ebook_converter.utils.imghdr import identify from ebook_converter.polyglot.urllib import unquote @@ -115,7 +115,7 @@ class CoverManager(object): else self.svg_template tp = templ % unquote(href) id, href = m.generate('titlepage', 'titlepage.xhtml') - item = m.add(id, href, guess_type('t.xhtml')[0], + item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0], data=etree.fromstring(tp)) else: key = urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0] diff --git a/ebook_converter/ebooks/oeb/transforms/data_url.py b/ebook_converter/ebooks/oeb/transforms/data_url.py index 60f874a..07c0363 100644 --- a/ebook_converter/ebooks/oeb/transforms/data_url.py +++ b/ebook_converter/ebooks/oeb/transforms/data_url.py @@ -1,3 +1,4 @@ +import mimetypes import re from ebook_converter.ebooks.oeb.base import XPath, urlunquote from ebook_converter.polyglot.builtins import as_bytes @@ -45,5 +46,6 @@ class DataURL(object): self.log('Found image encoded as data URI converting it to normal image') from ebook_converter import guess_type item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt) - oeb.manifest.add(item_id, item_href, guess_type(item_href)[0], data=data) + oeb.manifest.add(item_id, item_href, + mimetypes.guess_type(item_href)[0], data=data) return item_href diff --git a/ebook_converter/ebooks/oeb/transforms/flatcss.py b/ebook_converter/ebooks/oeb/transforms/flatcss.py index 6d29be3..c336bc4 100644 --- a/ebook_converter/ebooks/oeb/transforms/flatcss.py +++ b/ebook_converter/ebooks/oeb/transforms/flatcss.py @@ -3,6 +3,7 @@ CSS flattening transform. """ import collections import math +import mimetypes import numbers import operator import re @@ -13,7 +14,6 @@ import css_parser from css_parser import css as cp_css from ebook_converter import constants as const -from ebook_converter import guess_type from ebook_converter.ebooks import unit_convert from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb import parse_utils @@ -125,8 +125,9 @@ class EmbedFontsCSSRules(object): rules = [base.css_text(x) for x in self.rules] rules = '\n\n'.join(rules) sheet = css_parser.parseString(rules, validate=False) - self.href = oeb.manifest.add(iid, href, guess_type(href)[0], - data=sheet).href + self.href = oeb.manifest.add(iid, href, + mimetypes.guess_type(href)[0], + data=sheet).href return self.href @@ -244,7 +245,7 @@ class CSSFlattener(object): fid, href = self.oeb.manifest.generate(id=u'font', href='fonts/%s.%s'%(ascii_filename(font['full_name']).replace(' ', '-'), ext)) item = self.oeb.manifest.add(fid, href, - guess_type('dummy.'+ext)[0], + mimetypes.guess_type('dummy.'+ext)[0], data=font_scanner.get_font_data(font)) item.unload_data_from_memory() diff --git a/ebook_converter/ebooks/oeb/transforms/jacket.py b/ebook_converter/ebooks/oeb/transforms/jacket.py index 9786bfb..7f14d66 100644 --- a/ebook_converter/ebooks/oeb/transforms/jacket.py +++ b/ebook_converter/ebooks/oeb/transforms/jacket.py @@ -1,3 +1,4 @@ +import mimetypes import sys, os, re from xml.sax.saxutils import escape from string import Formatter @@ -5,7 +6,7 @@ import pkg_resources import urllib.parse from ebook_converter import constants as const -from ebook_converter import guess_type, strftime +from ebook_converter import strftime from ebook_converter.constants_old import iswindows from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize @@ -124,7 +125,8 @@ class Jacket(Base): alt_comments=comments, rescale_fonts=True) id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') - jacket = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) + jacket = self.oeb.manifest.add(id, href, mimetypes.guess_type(href)[0], + data=root) self.oeb.spine.insert(0, jacket, True) self.oeb.inserted_metadata_jacket = jacket for img, path in referenced_images(root): @@ -132,7 +134,9 @@ class Jacket(Base): ext = path.rpartition('.')[-1].lower() item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.'+ext) with open(path, 'rb') as f: - item = self.oeb.manifest.add(item_id, href, guess_type(href)[0], data=f.read()) + item = self.oeb.manifest.add( + item_id, href, mimetypes.guess_type(href)[0], + data=f.read()) item.unload_data_from_memory() img.set('src', jacket.relhref(item.href)) diff --git a/ebook_converter/ebooks/oeb/transforms/metadata.py b/ebook_converter/ebooks/oeb/transforms/metadata.py index be550b6..2198276 100644 --- a/ebook_converter/ebooks/oeb/transforms/metadata.py +++ b/ebook_converter/ebooks/oeb/transforms/metadata.py @@ -1,9 +1,9 @@ +import mimetypes import os import re from ebook_converter.ebooks.oeb import base from ebook_converter.utils.date import isoformat, now -from ebook_converter import guess_type def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False): @@ -156,9 +156,8 @@ class MergeMetadata(object): new_cover_item = None if cdata: id, href = self.oeb.manifest.generate('cover', 'cover.'+ext) - new_cover_item = self.oeb.manifest.add(id, href, - guess_type('cover.'+ext)[0], - data=cdata) + new_cover_item = self.oeb.manifest.add( + id, href, mimetypes.guess_type('cover.'+ext)[0], data=cdata) self.oeb.guide.add('cover', 'Cover', href) if do_remove_old_cover: self.remove_old_cover(item, new_cover_item.href)