mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-19 07:55:52 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
# coding:utf-8
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
'''
|
|
Decode unicode text to an ASCII representation of the text for Japanese.
|
|
Translate unicode string to ASCII roman string.
|
|
|
|
API is based on the python unidecode,
|
|
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
|
|
and perl module Text::Unidecode
|
|
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
|
|
|
|
This functionality is owned by Kakasi Japanese processing engine.
|
|
|
|
Copyright (c) 2010 Hiroshi Miura
|
|
'''
|
|
|
|
import re
|
|
from ebook_converter.ebooks.unihandecode.unidecoder import Unidecoder
|
|
from ebook_converter.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
|
from ebook_converter.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
|
from ebook_converter.ebooks.unihandecode.pykakasi.kakasi import kakasi
|
|
|
|
|
|
class Jadecoder(Unidecoder):
|
|
kakasi = None
|
|
codepoints = {}
|
|
|
|
def __init__(self):
|
|
self.codepoints = CODEPOINTS
|
|
self.codepoints.update(JACODES)
|
|
self.kakasi = kakasi()
|
|
|
|
def decode(self, text):
|
|
try:
|
|
result=self.kakasi.do(text)
|
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
|
|
except:
|
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
|