mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-22 18:15:49 +01:00
Initial import
This commit is contained in:
43
ebook_converter/ebooks/unihandecode/jadecoder.py
Normal file
43
ebook_converter/ebooks/unihandecode/jadecoder.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# coding:utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Decode unicode text to an ASCII representation of the text for Japanese.
|
||||
Translate unicode string to ASCII roman string.
|
||||
|
||||
API is based on the python unidecode,
|
||||
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
|
||||
and perl module Text::Unidecode
|
||||
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
|
||||
|
||||
This functionality is owned by Kakasi Japanese processing engine.
|
||||
|
||||
Copyright (c) 2010 Hiroshi Miura
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
||||
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
|
||||
|
||||
|
||||
class Jadecoder(Unidecoder):
|
||||
kakasi = None
|
||||
codepoints = {}
|
||||
|
||||
def __init__(self):
|
||||
self.codepoints = CODEPOINTS
|
||||
self.codepoints.update(JACODES)
|
||||
self.kakasi = kakasi()
|
||||
|
||||
def decode(self, text):
|
||||
try:
|
||||
result=self.kakasi.do(text)
|
||||
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
|
||||
except:
|
||||
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
|
||||
Reference in New Issue
Block a user