1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-02-19 07:55:52 +01:00
Files
ebook-converter/ebook_converter/ebooks/unihandecode/jadecoder.py
gryf 0f9792df36 Convert calibre modules to ebook_converter.
Here is the first batch of modules, which are needed for converting
several formats to LRF. Some of the logic has been change, more cleanups
will follow.
2020-04-19 15:16:48 +02:00

44 lines
1.4 KiB
Python

# coding:utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
__docformat__ = 'restructuredtext en'
'''
Decode unicode text to an ASCII representation of the text for Japanese.
Translate unicode string to ASCII roman string.
API is based on the python unidecode,
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
and perl module Text::Unidecode
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
This functionality is owned by Kakasi Japanese processing engine.
Copyright (c) 2010 Hiroshi Miura
'''
import re
from ebook_converter.ebooks.unihandecode.unidecoder import Unidecoder
from ebook_converter.ebooks.unihandecode.unicodepoints import CODEPOINTS
from ebook_converter.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
from ebook_converter.ebooks.unihandecode.pykakasi.kakasi import kakasi
class Jadecoder(Unidecoder):
kakasi = None
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints.update(JACODES)
self.kakasi = kakasi()
def decode(self, text):
try:
result=self.kakasi.do(text)
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
except:
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)