mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-04 15:55:52 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
|
__docformat__ = 'restructuredtext en'
|
|
__all__ = ["Unihandecoder"]
|
|
|
|
'''
|
|
Decode unicode text to an ASCII representation of the text.
|
|
Translate unicode characters to ASCII.
|
|
|
|
Inspired from John Schember's unidecode library which was created as part
|
|
of calibre.
|
|
|
|
Copyright(c) 2009, John Schember
|
|
|
|
Tranliterate the string from unicode characters to ASCII in Chinese and others.
|
|
|
|
'''
|
|
import unicodedata
|
|
|
|
|
|
class Unihandecoder(object):
|
|
preferred_encoding = None
|
|
decoder = None
|
|
|
|
def __init__(self, lang="zh", encoding='utf-8'):
|
|
self.preferred_encoding = encoding
|
|
lang = lang.lower()
|
|
if lang[:2] == 'ja':
|
|
from ebook_converter.ebooks.unihandecode.jadecoder import Jadecoder
|
|
self.decoder = Jadecoder()
|
|
elif lang[:2] == 'kr' or lang == 'korean':
|
|
from ebook_converter.ebooks.unihandecode.krdecoder import Krdecoder
|
|
self.decoder = Krdecoder()
|
|
elif lang[:2] == 'vn' or lang == 'vietnum':
|
|
from ebook_converter.ebooks.unihandecode.vndecoder import Vndecoder
|
|
self.decoder = Vndecoder()
|
|
else: # zh and others
|
|
from ebook_converter.ebooks.unihandecode.unidecoder import Unidecoder
|
|
self.decoder = Unidecoder()
|
|
|
|
def decode(self, text):
|
|
if isinstance(text, bytes):
|
|
try:
|
|
text = text.decode(self.preferred_encoding)
|
|
except Exception:
|
|
text = text.decode('utf-8', 'replace')
|
|
# at first unicode normalize it. (see Unicode standards)
|
|
ntext = unicodedata.normalize('NFKC', text)
|
|
return self.decoder.decode(ntext)
|