mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-08 04:34:12 +01:00
200 lines
6.4 KiB
Python
200 lines
6.4 KiB
Python
import os
|
|
from contextlib import closing
|
|
|
|
from ebook_converter.customize import FileTypePlugin
|
|
from ebook_converter.utils.localization import canonicalize_lang
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
def is_comic(list_of_names):
|
|
extensions = {x.rpartition('.')[-1].lower() for x in list_of_names
|
|
if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'}
|
|
comic_extensions = {'jpg', 'jpeg', 'png'}
|
|
return len(extensions - comic_extensions) == 0
|
|
|
|
|
|
def archive_type(stream):
|
|
from ebook_converter.utils.zipfile import stringFileHeader
|
|
try:
|
|
pos = stream.tell()
|
|
except:
|
|
pos = 0
|
|
id_ = stream.read(4)
|
|
ans = None
|
|
if id_ == stringFileHeader:
|
|
ans = 'zip'
|
|
elif id_.startswith(b'Rar'):
|
|
ans = 'rar'
|
|
try:
|
|
stream.seek(pos)
|
|
except Exception:
|
|
pass
|
|
return ans
|
|
|
|
|
|
class KPFExtract(FileTypePlugin):
|
|
|
|
name = 'KPF Extract'
|
|
author = 'Kovid Goyal'
|
|
description = _('Extract the source DOCX file from Amazon Kindle Create KPF files.'
|
|
' Note this will not contain any edits made in the Kindle Create program itself.')
|
|
file_types = {'kpf'}
|
|
supported_platforms = ['windows', 'osx', 'linux']
|
|
on_import = True
|
|
|
|
def run(self, archive):
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
with ZipFile(archive, 'r') as zf:
|
|
fnames = zf.namelist()
|
|
candidates = [x for x in fnames if x.lower().endswith('.docx')]
|
|
if not candidates:
|
|
return archive
|
|
of = self.temporary_file('_kpf_extract.docx')
|
|
with closing(of):
|
|
of.write(zf.read(candidates[0]))
|
|
return of.name
|
|
|
|
|
|
class ArchiveExtract(FileTypePlugin):
|
|
name = 'Archive Extract'
|
|
author = 'Kovid Goyal'
|
|
description = _('Extract common e-book formats from archive files '
|
|
'(ZIP/RAR). Also try to autodetect if they are actually '
|
|
'CBZ/CBR files.')
|
|
file_types = {'zip', 'rar'}
|
|
supported_platforms = ['windows', 'osx', 'linux']
|
|
on_import = True
|
|
|
|
def run(self, archive):
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
is_rar = archive.lower().endswith('.rar')
|
|
if is_rar:
|
|
from ebook_converter.utils.unrar import extract_member, names
|
|
else:
|
|
zf = ZipFile(archive, 'r')
|
|
|
|
if is_rar:
|
|
fnames = list(names(archive))
|
|
else:
|
|
fnames = zf.namelist()
|
|
|
|
def fname_ok(fname):
|
|
bn = os.path.basename(fname).lower()
|
|
if bn == 'thumbs.db':
|
|
return False
|
|
if '.' not in bn:
|
|
return False
|
|
if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
|
|
return False
|
|
if '__MACOSX' in fname.split('/'):
|
|
return False
|
|
return True
|
|
|
|
fnames = list(filter(fname_ok, fnames))
|
|
if is_comic(fnames):
|
|
ext = '.cbr' if is_rar else '.cbz'
|
|
of = self.temporary_file('_archive_extract'+ext)
|
|
with open(archive, 'rb') as f:
|
|
of.write(f.read())
|
|
of.close()
|
|
return of.name
|
|
if len(fnames) > 1 or not fnames:
|
|
return archive
|
|
fname = fnames[0]
|
|
ext = os.path.splitext(fname)[1][1:]
|
|
if ext.lower() not in {
|
|
'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
|
|
'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
|
|
return archive
|
|
|
|
of = self.temporary_file('_archive_extract.'+ext)
|
|
with closing(of):
|
|
if is_rar:
|
|
data = extract_member(archive, match=None, name=fname)[1]
|
|
of.write(data)
|
|
else:
|
|
of.write(zf.read(fname))
|
|
return of.name
|
|
|
|
|
|
def get_comic_book_info(d, mi, series_index='volume'):
|
|
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
|
series = d.get('series', '')
|
|
if series.strip():
|
|
mi.series = series
|
|
si = d.get(series_index, None)
|
|
if si is None:
|
|
si = d.get('issue' if series_index == 'volume' else 'volume', None)
|
|
if si is not None:
|
|
try:
|
|
mi.series_index = float(si)
|
|
except Exception:
|
|
mi.series_index = 1
|
|
if d.get('language', None):
|
|
lang = canonicalize_lang(d.get('lang'))
|
|
if lang:
|
|
mi.languages = [lang]
|
|
if d.get('rating', -1) > -1:
|
|
mi.rating = d['rating']
|
|
for x in ('title', 'publisher'):
|
|
y = d.get(x, '').strip()
|
|
if y:
|
|
setattr(mi, x, y)
|
|
tags = d.get('tags', [])
|
|
if tags:
|
|
mi.tags = tags
|
|
authors = []
|
|
for credit in d.get('credits', []):
|
|
if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist',
|
|
'Creator'):
|
|
x = credit.get('person', '')
|
|
if x:
|
|
x = ' '.join((reversed(x.split(', '))))
|
|
authors.append(x)
|
|
if authors:
|
|
mi.authors = authors
|
|
comments = d.get('comments', '')
|
|
if comments and comments.strip():
|
|
mi.comments = comments.strip()
|
|
pubm, puby = d.get('publicationMonth', None), d.get('publicationYear', None)
|
|
if puby is not None:
|
|
from ebook_converter.utils.date import parse_only_date
|
|
from datetime import date
|
|
try:
|
|
dt = date(puby, 6 if pubm is None else pubm, 15)
|
|
dt = parse_only_date(str(dt))
|
|
mi.pubdate = dt
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def parse_comic_comment(comment, series_index='volume'):
|
|
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
|
from ebook_converter.ebooks.metadata import MetaInformation
|
|
import json
|
|
mi = MetaInformation(None, None)
|
|
m = json.loads(comment)
|
|
if isinstance(m, dict):
|
|
for cat in m:
|
|
if cat.startswith('ComicBookInfo'):
|
|
get_comic_book_info(m[cat], mi, series_index=series_index)
|
|
break
|
|
return mi
|
|
|
|
|
|
def get_comic_metadata(stream, stream_type, series_index='volume'):
|
|
comment = None
|
|
if stream_type == 'cbz':
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
zf = ZipFile(stream)
|
|
comment = zf.comment
|
|
elif stream_type == 'cbr':
|
|
from ebook_converter.utils.unrar import comment as get_comment
|
|
comment = get_comment(stream)
|
|
|
|
return parse_comic_comment(comment or b'{}', series_index=series_index)
|