mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-03 15:25:48 +01:00
This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
225 lines
7.1 KiB
Python
225 lines
7.1 KiB
Python
import re
|
|
from struct import pack
|
|
from io import BytesIO
|
|
|
|
from ebook_converter.ebooks.oeb import base
|
|
from ebook_converter.constants_old import iswindows, isosx
|
|
from ebook_converter.ebooks.mobi.utils import (utf8_text, to_base)
|
|
from ebook_converter.utils.localization import lang_as_iso639_1
|
|
from ebook_converter.ebooks.metadata import authors_to_sort_string
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
EXTH_CODES = {
|
|
'creator': 100,
|
|
'publisher': 101,
|
|
'description': 103,
|
|
'identifier': 104,
|
|
'subject': 105,
|
|
'pubdate': 106,
|
|
'review': 107,
|
|
'contributor': 108,
|
|
'rights': 109,
|
|
'type': 111,
|
|
'source': 112,
|
|
'versionnumber': 114,
|
|
'startreading': 116,
|
|
'kf8_header_index': 121,
|
|
'num_of_resources': 125,
|
|
'kf8_thumbnail_uri': 129,
|
|
'kf8_unknown_count': 131,
|
|
'coveroffset': 201,
|
|
'thumboffset': 202,
|
|
'hasfakecover': 203,
|
|
'lastupdatetime': 502,
|
|
'title': 503,
|
|
'language': 524,
|
|
'primary_writing_mode': 525,
|
|
'page_progression_direction': 527,
|
|
}
|
|
|
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
|
|
|
|
|
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
|
share_not_sync=True, cover_offset=None, thumbnail_offset=None,
|
|
start_offset=None, mobi_doctype=2, num_of_resources=None,
|
|
kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None,
|
|
page_progression_direction=None, primary_writing_mode=None):
|
|
exth = BytesIO()
|
|
nrecs = 0
|
|
|
|
for term in metadata:
|
|
if term not in EXTH_CODES:
|
|
continue
|
|
code = EXTH_CODES[term]
|
|
items = metadata[term]
|
|
if term == 'creator':
|
|
if prefer_author_sort:
|
|
creators = [authors_to_sort_string([str(c)]) for c in
|
|
items]
|
|
else:
|
|
creators = [str(c) for c in items]
|
|
items = creators
|
|
elif term == 'rights':
|
|
try:
|
|
rights = utf8_text(str(metadata.rights[0]))
|
|
except:
|
|
rights = b'Unknown'
|
|
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
|
|
exth.write(rights)
|
|
nrecs += 1
|
|
continue
|
|
|
|
for item in items:
|
|
data = str(item)
|
|
if term != 'description':
|
|
data = COLLAPSE_RE.sub(' ', data)
|
|
if term == 'identifier':
|
|
if data.lower().startswith('urn:isbn:'):
|
|
data = data[9:]
|
|
elif item.scheme.lower() == 'isbn':
|
|
pass
|
|
else:
|
|
continue
|
|
if term == 'language':
|
|
d2 = lang_as_iso639_1(data)
|
|
if d2:
|
|
data = d2
|
|
data = utf8_text(data)
|
|
exth.write(pack(b'>II', code, len(data) + 8))
|
|
exth.write(data)
|
|
nrecs += 1
|
|
|
|
# Write UUID as ASIN
|
|
uuid = None
|
|
for x in metadata['identifier']:
|
|
if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
|
|
str(x).startswith('urn:uuid:')):
|
|
uuid = str(x).split(':')[-1]
|
|
break
|
|
if uuid is None:
|
|
from uuid import uuid4
|
|
uuid = str(uuid4())
|
|
|
|
if isinstance(uuid, str):
|
|
uuid = uuid.encode('utf-8')
|
|
if not share_not_sync:
|
|
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
|
exth.write(uuid)
|
|
nrecs += 1
|
|
|
|
# Write UUID as SOURCE
|
|
c_uuid = b'calibre:%s' % uuid
|
|
exth.write(pack(b'>II', 112, len(c_uuid) + 8))
|
|
exth.write(c_uuid)
|
|
nrecs += 1
|
|
|
|
# Write cdetype
|
|
if not is_periodical:
|
|
if not share_not_sync:
|
|
exth.write(pack(b'>II', 501, 12))
|
|
exth.write(b'EBOK')
|
|
nrecs += 1
|
|
else:
|
|
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
|
if ids:
|
|
exth.write(pack(b'>II', 501, 12))
|
|
exth.write(ids)
|
|
nrecs += 1
|
|
|
|
# Add a publication date entry
|
|
if metadata['date']:
|
|
datestr = str(metadata['date'][0])
|
|
elif metadata['timestamp']:
|
|
datestr = str(metadata['timestamp'][0])
|
|
|
|
if datestr is None:
|
|
raise ValueError("missing date or timestamp")
|
|
|
|
datestr = datestr.encode('utf-8')
|
|
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
|
exth.write(datestr)
|
|
nrecs += 1
|
|
if is_periodical:
|
|
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
|
|
exth.write(datestr)
|
|
nrecs += 1
|
|
|
|
if be_kindlegen2:
|
|
mv = 200 if iswindows else 202 if isosx else 201
|
|
vals = {204:mv, 205:2, 206:9, 207:0}
|
|
elif is_periodical:
|
|
# Pretend to be amazon's super secret periodical generator
|
|
vals = {204:201, 205:2, 206:0, 207:101}
|
|
else:
|
|
# Pretend to be kindlegen 1.2
|
|
vals = {204:201, 205:1, 206:2, 207:33307}
|
|
for code, val in vals.items():
|
|
exth.write(pack(b'>III', code, 12, val))
|
|
nrecs += 1
|
|
if be_kindlegen2:
|
|
revnum = b'0730-890adc2'
|
|
exth.write(pack(b'>II', 535, 8 + len(revnum)) + revnum)
|
|
nrecs += 1
|
|
|
|
if cover_offset is not None:
|
|
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
|
|
cover_offset))
|
|
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
|
|
nrecs += 2
|
|
if thumbnail_offset is not None:
|
|
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
|
thumbnail_offset))
|
|
thumbnail_uri_str = ('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4))).encode('utf-8')
|
|
exth.write(pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8))
|
|
exth.write(thumbnail_uri_str)
|
|
nrecs += 2
|
|
|
|
if start_offset is not None:
|
|
try:
|
|
len(start_offset)
|
|
except TypeError:
|
|
start_offset = [start_offset]
|
|
for so in start_offset:
|
|
if so is not None:
|
|
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
|
|
so))
|
|
nrecs += 1
|
|
|
|
if kf8_header_index is not None:
|
|
exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12,
|
|
kf8_header_index))
|
|
nrecs += 1
|
|
|
|
if num_of_resources is not None:
|
|
exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
|
|
num_of_resources))
|
|
nrecs += 1
|
|
|
|
if kf8_unknown_count is not None:
|
|
exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
|
|
kf8_unknown_count))
|
|
nrecs += 1
|
|
|
|
if primary_writing_mode:
|
|
pwm = primary_writing_mode.encode('utf-8')
|
|
exth.write(pack(b'>II', EXTH_CODES['primary_writing_mode'], len(pwm) + 8))
|
|
exth.write(pwm)
|
|
nrecs += 1
|
|
|
|
if page_progression_direction in {'rtl', 'ltr', 'default'}:
|
|
ppd = page_progression_direction.encode('ascii')
|
|
exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8))
|
|
exth.write(ppd)
|
|
nrecs += 1
|
|
|
|
exth = exth.getvalue()
|
|
trail = len(exth) % 4
|
|
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
|
|
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
|
|
return b''.join(exth)
|