1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-04 15:55:52 +01:00
Files
ebook-converter/ebook_converter/ebooks/unihandecode/__init__.py
gryf 0f9792df36 Convert calibre modules to ebook_converter.
Here is the first batch of modules, which are needed for converting
several formats to LRF. Some of the logic has been change, more cleanups
will follow.
2020-04-19 15:16:48 +02:00

53 lines
1.8 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
__all__ = ["Unihandecoder"]
'''
Decode unicode text to an ASCII representation of the text.
Translate unicode characters to ASCII.
Inspired from John Schember's unidecode library which was created as part
of calibre.
Copyright(c) 2009, John Schember
Tranliterate the string from unicode characters to ASCII in Chinese and others.
'''
import unicodedata
class Unihandecoder(object):
preferred_encoding = None
decoder = None
def __init__(self, lang="zh", encoding='utf-8'):
self.preferred_encoding = encoding
lang = lang.lower()
if lang[:2] == 'ja':
from ebook_converter.ebooks.unihandecode.jadecoder import Jadecoder
self.decoder = Jadecoder()
elif lang[:2] == 'kr' or lang == 'korean':
from ebook_converter.ebooks.unihandecode.krdecoder import Krdecoder
self.decoder = Krdecoder()
elif lang[:2] == 'vn' or lang == 'vietnum':
from ebook_converter.ebooks.unihandecode.vndecoder import Vndecoder
self.decoder = Vndecoder()
else: # zh and others
from ebook_converter.ebooks.unihandecode.unidecoder import Unidecoder
self.decoder = Unidecoder()
def decode(self, text):
if isinstance(text, bytes):
try:
text = text.decode(self.preferred_encoding)
except Exception:
text = text.decode('utf-8', 'replace')
# at first unicode normalize it. (see Unicode standards)
ntext = unicodedata.normalize('NFKC', text)
return self.decoder.decode(ntext)