mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-14 17:43:31 +02:00
Initial import
This commit is contained in:
203
ebook_converter/ebooks/metadata/archive.py
Normal file
203
ebook_converter/ebooks/metadata/archive.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from contextlib import closing
|
||||
|
||||
from calibre.customize import FileTypePlugin
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from polyglot.builtins import filter, unicode_type
|
||||
|
||||
|
||||
def is_comic(list_of_names):
|
||||
extensions = {x.rpartition('.')[-1].lower() for x in list_of_names
|
||||
if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'}
|
||||
comic_extensions = {'jpg', 'jpeg', 'png'}
|
||||
return len(extensions - comic_extensions) == 0
|
||||
|
||||
|
||||
def archive_type(stream):
|
||||
from calibre.utils.zipfile import stringFileHeader
|
||||
try:
|
||||
pos = stream.tell()
|
||||
except:
|
||||
pos = 0
|
||||
id_ = stream.read(4)
|
||||
ans = None
|
||||
if id_ == stringFileHeader:
|
||||
ans = 'zip'
|
||||
elif id_.startswith(b'Rar'):
|
||||
ans = 'rar'
|
||||
try:
|
||||
stream.seek(pos)
|
||||
except Exception:
|
||||
pass
|
||||
return ans
|
||||
|
||||
|
||||
class KPFExtract(FileTypePlugin):
|
||||
|
||||
name = 'KPF Extract'
|
||||
author = 'Kovid Goyal'
|
||||
description = _('Extract the source DOCX file from Amazon Kindle Create KPF files.'
|
||||
' Note this will not contain any edits made in the Kindle Create program itself.')
|
||||
file_types = {'kpf'}
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
on_import = True
|
||||
|
||||
def run(self, archive):
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
with ZipFile(archive, 'r') as zf:
|
||||
fnames = zf.namelist()
|
||||
candidates = [x for x in fnames if x.lower().endswith('.docx')]
|
||||
if not candidates:
|
||||
return archive
|
||||
of = self.temporary_file('_kpf_extract.docx')
|
||||
with closing(of):
|
||||
of.write(zf.read(candidates[0]))
|
||||
return of.name
|
||||
|
||||
|
||||
class ArchiveExtract(FileTypePlugin):
|
||||
name = 'Archive Extract'
|
||||
author = 'Kovid Goyal'
|
||||
description = _('Extract common e-book formats from archive files '
|
||||
'(ZIP/RAR). Also try to autodetect if they are actually '
|
||||
'CBZ/CBR files.')
|
||||
file_types = {'zip', 'rar'}
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
on_import = True
|
||||
|
||||
def run(self, archive):
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
is_rar = archive.lower().endswith('.rar')
|
||||
if is_rar:
|
||||
from calibre.utils.unrar import extract_member, names
|
||||
else:
|
||||
zf = ZipFile(archive, 'r')
|
||||
|
||||
if is_rar:
|
||||
fnames = list(names(archive))
|
||||
else:
|
||||
fnames = zf.namelist()
|
||||
|
||||
def fname_ok(fname):
|
||||
bn = os.path.basename(fname).lower()
|
||||
if bn == 'thumbs.db':
|
||||
return False
|
||||
if '.' not in bn:
|
||||
return False
|
||||
if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
|
||||
return False
|
||||
if '__MACOSX' in fname.split('/'):
|
||||
return False
|
||||
return True
|
||||
|
||||
fnames = list(filter(fname_ok, fnames))
|
||||
if is_comic(fnames):
|
||||
ext = '.cbr' if is_rar else '.cbz'
|
||||
of = self.temporary_file('_archive_extract'+ext)
|
||||
with open(archive, 'rb') as f:
|
||||
of.write(f.read())
|
||||
of.close()
|
||||
return of.name
|
||||
if len(fnames) > 1 or not fnames:
|
||||
return archive
|
||||
fname = fnames[0]
|
||||
ext = os.path.splitext(fname)[1][1:]
|
||||
if ext.lower() not in {
|
||||
'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
|
||||
'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
|
||||
return archive
|
||||
|
||||
of = self.temporary_file('_archive_extract.'+ext)
|
||||
with closing(of):
|
||||
if is_rar:
|
||||
data = extract_member(archive, match=None, name=fname)[1]
|
||||
of.write(data)
|
||||
else:
|
||||
of.write(zf.read(fname))
|
||||
return of.name
|
||||
|
||||
|
||||
def get_comic_book_info(d, mi, series_index='volume'):
|
||||
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
||||
series = d.get('series', '')
|
||||
if series.strip():
|
||||
mi.series = series
|
||||
si = d.get(series_index, None)
|
||||
if si is None:
|
||||
si = d.get('issue' if series_index == 'volume' else 'volume', None)
|
||||
if si is not None:
|
||||
try:
|
||||
mi.series_index = float(si)
|
||||
except Exception:
|
||||
mi.series_index = 1
|
||||
if d.get('language', None):
|
||||
lang = canonicalize_lang(d.get('lang'))
|
||||
if lang:
|
||||
mi.languages = [lang]
|
||||
if d.get('rating', -1) > -1:
|
||||
mi.rating = d['rating']
|
||||
for x in ('title', 'publisher'):
|
||||
y = d.get(x, '').strip()
|
||||
if y:
|
||||
setattr(mi, x, y)
|
||||
tags = d.get('tags', [])
|
||||
if tags:
|
||||
mi.tags = tags
|
||||
authors = []
|
||||
for credit in d.get('credits', []):
|
||||
if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist',
|
||||
'Creator'):
|
||||
x = credit.get('person', '')
|
||||
if x:
|
||||
x = ' '.join((reversed(x.split(', '))))
|
||||
authors.append(x)
|
||||
if authors:
|
||||
mi.authors = authors
|
||||
comments = d.get('comments', '')
|
||||
if comments and comments.strip():
|
||||
mi.comments = comments.strip()
|
||||
pubm, puby = d.get('publicationMonth', None), d.get('publicationYear', None)
|
||||
if puby is not None:
|
||||
from calibre.utils.date import parse_only_date
|
||||
from datetime import date
|
||||
try:
|
||||
dt = date(puby, 6 if pubm is None else pubm, 15)
|
||||
dt = parse_only_date(unicode_type(dt))
|
||||
mi.pubdate = dt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def parse_comic_comment(comment, series_index='volume'):
|
||||
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
import json
|
||||
mi = MetaInformation(None, None)
|
||||
m = json.loads(comment)
|
||||
if isinstance(m, dict):
|
||||
for cat in m:
|
||||
if cat.startswith('ComicBookInfo'):
|
||||
get_comic_book_info(m[cat], mi, series_index=series_index)
|
||||
break
|
||||
return mi
|
||||
|
||||
|
||||
def get_comic_metadata(stream, stream_type, series_index='volume'):
|
||||
comment = None
|
||||
if stream_type == 'cbz':
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
zf = ZipFile(stream)
|
||||
comment = zf.comment
|
||||
elif stream_type == 'cbr':
|
||||
from calibre.utils.unrar import comment as get_comment
|
||||
comment = get_comment(stream)
|
||||
|
||||
return parse_comic_comment(comment or b'{}', series_index=series_index)
|
||||
Reference in New Issue
Block a user