1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-30 18:25:44 +01:00
Files
ebook-converter/ebook_converter/ebooks/unihandecode/__init__.py
gryf 48fedea799 Removing unneeded from __future__ import statements.
Since we are on Python 3.6 and up, we don't need those anymore.
2020-04-19 17:39:02 +02:00

50 lines
1.7 KiB
Python

"""
Decode unicode text to an ASCII representation of the text.
Translate unicode characters to ASCII.
Inspired from John Schember's unidecode library which was created as part
of calibre.
Copyright(c) 2009, John Schember
Tranliterate the string from unicode characters to ASCII in Chinese and others.
"""
import unicodedata
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
__all__ = ["Unihandecoder"]
class Unihandecoder(object):
preferred_encoding = None
decoder = None
def __init__(self, lang="zh", encoding='utf-8'):
self.preferred_encoding = encoding
lang = lang.lower()
if lang[:2] == 'ja':
from ebook_converter.ebooks.unihandecode.jadecoder import Jadecoder
self.decoder = Jadecoder()
elif lang[:2] == 'kr' or lang == 'korean':
from ebook_converter.ebooks.unihandecode.krdecoder import Krdecoder
self.decoder = Krdecoder()
elif lang[:2] == 'vn' or lang == 'vietnum':
from ebook_converter.ebooks.unihandecode.vndecoder import Vndecoder
self.decoder = Vndecoder()
else: # zh and others
from ebook_converter.ebooks.unihandecode.unidecoder import Unidecoder
self.decoder = Unidecoder()
def decode(self, text):
if isinstance(text, bytes):
try:
text = text.decode(self.preferred_encoding)
except Exception:
text = text.decode('utf-8', 'replace')
# at first unicode normalize it. (see Unicode standards)
ntext = unicodedata.normalize('NFKC', text)
return self.decoder.decode(ntext)