1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-30 02:05:45 +01:00
Files
ebook-converter/ebook_converter/ebooks/unihandecode/jadecoder.py
gryf 48fedea799 Removing unneeded from __future__ import statements.
Since we are on Python 3.6 and up, we don't need those anymore.
2020-04-19 17:39:02 +02:00

42 lines
1.3 KiB
Python

"""
Decode unicode text to an ASCII representation of the text for Japanese.
Translate unicode string to ASCII roman string.
API is based on the python unidecode,
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
and perl module Text::Unidecode
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
This functionality is owned by Kakasi Japanese processing engine.
Copyright (c) 2010 Hiroshi Miura
"""
import re
from ebook_converter.ebooks.unihandecode.unidecoder import Unidecoder
from ebook_converter.ebooks.unihandecode.unicodepoints import CODEPOINTS
from ebook_converter.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
from ebook_converter.ebooks.unihandecode.pykakasi.kakasi import kakasi
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
class Jadecoder(Unidecoder):
kakasi = None
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(JACODES)
self.kakasi = kakasi()
def decode(self, text):
try:
result=self.kakasi.do(text)
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
except:
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)