mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-23 18:45:55 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
204 lines
6.6 KiB
Python
204 lines
6.6 KiB
Python
#!/usr/bin/env python2
|
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
import os
|
|
from contextlib import closing
|
|
|
|
from ebook_converter.customize import FileTypePlugin
|
|
from ebook_converter.utils.localization import canonicalize_lang
|
|
from ebook_converter.polyglot.builtins import filter, unicode_type
|
|
|
|
|
|
def is_comic(list_of_names):
|
|
extensions = {x.rpartition('.')[-1].lower() for x in list_of_names
|
|
if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'}
|
|
comic_extensions = {'jpg', 'jpeg', 'png'}
|
|
return len(extensions - comic_extensions) == 0
|
|
|
|
|
|
def archive_type(stream):
|
|
from ebook_converter.utils.zipfile import stringFileHeader
|
|
try:
|
|
pos = stream.tell()
|
|
except:
|
|
pos = 0
|
|
id_ = stream.read(4)
|
|
ans = None
|
|
if id_ == stringFileHeader:
|
|
ans = 'zip'
|
|
elif id_.startswith(b'Rar'):
|
|
ans = 'rar'
|
|
try:
|
|
stream.seek(pos)
|
|
except Exception:
|
|
pass
|
|
return ans
|
|
|
|
|
|
class KPFExtract(FileTypePlugin):
|
|
|
|
name = 'KPF Extract'
|
|
author = 'Kovid Goyal'
|
|
description = _('Extract the source DOCX file from Amazon Kindle Create KPF files.'
|
|
' Note this will not contain any edits made in the Kindle Create program itself.')
|
|
file_types = {'kpf'}
|
|
supported_platforms = ['windows', 'osx', 'linux']
|
|
on_import = True
|
|
|
|
def run(self, archive):
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
with ZipFile(archive, 'r') as zf:
|
|
fnames = zf.namelist()
|
|
candidates = [x for x in fnames if x.lower().endswith('.docx')]
|
|
if not candidates:
|
|
return archive
|
|
of = self.temporary_file('_kpf_extract.docx')
|
|
with closing(of):
|
|
of.write(zf.read(candidates[0]))
|
|
return of.name
|
|
|
|
|
|
class ArchiveExtract(FileTypePlugin):
|
|
name = 'Archive Extract'
|
|
author = 'Kovid Goyal'
|
|
description = _('Extract common e-book formats from archive files '
|
|
'(ZIP/RAR). Also try to autodetect if they are actually '
|
|
'CBZ/CBR files.')
|
|
file_types = {'zip', 'rar'}
|
|
supported_platforms = ['windows', 'osx', 'linux']
|
|
on_import = True
|
|
|
|
def run(self, archive):
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
is_rar = archive.lower().endswith('.rar')
|
|
if is_rar:
|
|
from ebook_converter.utils.unrar import extract_member, names
|
|
else:
|
|
zf = ZipFile(archive, 'r')
|
|
|
|
if is_rar:
|
|
fnames = list(names(archive))
|
|
else:
|
|
fnames = zf.namelist()
|
|
|
|
def fname_ok(fname):
|
|
bn = os.path.basename(fname).lower()
|
|
if bn == 'thumbs.db':
|
|
return False
|
|
if '.' not in bn:
|
|
return False
|
|
if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
|
|
return False
|
|
if '__MACOSX' in fname.split('/'):
|
|
return False
|
|
return True
|
|
|
|
fnames = list(filter(fname_ok, fnames))
|
|
if is_comic(fnames):
|
|
ext = '.cbr' if is_rar else '.cbz'
|
|
of = self.temporary_file('_archive_extract'+ext)
|
|
with open(archive, 'rb') as f:
|
|
of.write(f.read())
|
|
of.close()
|
|
return of.name
|
|
if len(fnames) > 1 or not fnames:
|
|
return archive
|
|
fname = fnames[0]
|
|
ext = os.path.splitext(fname)[1][1:]
|
|
if ext.lower() not in {
|
|
'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
|
|
'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
|
|
return archive
|
|
|
|
of = self.temporary_file('_archive_extract.'+ext)
|
|
with closing(of):
|
|
if is_rar:
|
|
data = extract_member(archive, match=None, name=fname)[1]
|
|
of.write(data)
|
|
else:
|
|
of.write(zf.read(fname))
|
|
return of.name
|
|
|
|
|
|
def get_comic_book_info(d, mi, series_index='volume'):
|
|
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
|
series = d.get('series', '')
|
|
if series.strip():
|
|
mi.series = series
|
|
si = d.get(series_index, None)
|
|
if si is None:
|
|
si = d.get('issue' if series_index == 'volume' else 'volume', None)
|
|
if si is not None:
|
|
try:
|
|
mi.series_index = float(si)
|
|
except Exception:
|
|
mi.series_index = 1
|
|
if d.get('language', None):
|
|
lang = canonicalize_lang(d.get('lang'))
|
|
if lang:
|
|
mi.languages = [lang]
|
|
if d.get('rating', -1) > -1:
|
|
mi.rating = d['rating']
|
|
for x in ('title', 'publisher'):
|
|
y = d.get(x, '').strip()
|
|
if y:
|
|
setattr(mi, x, y)
|
|
tags = d.get('tags', [])
|
|
if tags:
|
|
mi.tags = tags
|
|
authors = []
|
|
for credit in d.get('credits', []):
|
|
if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist',
|
|
'Creator'):
|
|
x = credit.get('person', '')
|
|
if x:
|
|
x = ' '.join((reversed(x.split(', '))))
|
|
authors.append(x)
|
|
if authors:
|
|
mi.authors = authors
|
|
comments = d.get('comments', '')
|
|
if comments and comments.strip():
|
|
mi.comments = comments.strip()
|
|
pubm, puby = d.get('publicationMonth', None), d.get('publicationYear', None)
|
|
if puby is not None:
|
|
from ebook_converter.utils.date import parse_only_date
|
|
from datetime import date
|
|
try:
|
|
dt = date(puby, 6 if pubm is None else pubm, 15)
|
|
dt = parse_only_date(unicode_type(dt))
|
|
mi.pubdate = dt
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def parse_comic_comment(comment, series_index='volume'):
|
|
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
|
from ebook_converter.ebooks.metadata import MetaInformation
|
|
import json
|
|
mi = MetaInformation(None, None)
|
|
m = json.loads(comment)
|
|
if isinstance(m, dict):
|
|
for cat in m:
|
|
if cat.startswith('ComicBookInfo'):
|
|
get_comic_book_info(m[cat], mi, series_index=series_index)
|
|
break
|
|
return mi
|
|
|
|
|
|
def get_comic_metadata(stream, stream_type, series_index='volume'):
|
|
comment = None
|
|
if stream_type == 'cbz':
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
zf = ZipFile(stream)
|
|
comment = zf.comment
|
|
elif stream_type == 'cbr':
|
|
from ebook_converter.utils.unrar import comment as get_comment
|
|
comment = get_comment(stream)
|
|
|
|
return parse_comic_comment(comment or b'{}', series_index=series_index)
|