1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-02-09 02:15:46 +01:00

Fix unsmarten text option (#16)

* Create unsmarten.py

* Update unsmarten.py

* Update unsmarten.py

* Create unsmarten.py
This commit is contained in:
Vitaliy Krasnoperov
2026-02-06 10:06:12 +02:00
committed by GitHub
parent 8b8a92e9fd
commit c89fc132b8
2 changed files with 68 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from ebook_converter.ebooks.oeb.base import OEB_DOCS, XPath
from ebook_converter.ebooks.oeb.parse_utils import barename
from ebook_converter.utils.unsmarten import unsmarten_text
class UnsmartenPunctuation:
def __init__(self):
self.html_tags = XPath('descendant::h:*')
def unsmarten(self, root):
for x in self.html_tags(root):
if not barename(x.tag) == 'pre':
if getattr(x, 'text', None):
x.text = unsmarten_text(x.text)
if getattr(x, 'tail', None) and x.tail:
x.tail = unsmarten_text(x.tail)
def __call__(self, oeb, context):
bx = XPath('//h:body')
for x in oeb.manifest.items:
if x.media_type in OEB_DOCS:
for body in bx(x.data):
self.unsmarten(body)

View File

@@ -0,0 +1,40 @@
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from ebook_converter.utils.mreplace import MReplace
_mreplace = MReplace({
'&#8211;': '--',
'&ndash;': '--',
'': '--',
'&#8212;': '---',
'&mdash;': '---',
'': '---',
'&#8230;': '...',
'&hellip;': '...',
'': '...',
'&#8220;': '"',
'&#8221;': '"',
'&#8222;': '"',
'&#8243;': '"',
'&ldquo;': '"',
'&rdquo;': '"',
'&bdquo;': '"',
'&Prime;': '"',
'':'"',
'':'"',
'':'"',
'':'"',
'&#8216;':"'",
'&#8217;':"'",
'&#8242;':"'",
'&lsquo;':"'",
'&rsquo;':"'",
'&prime;':"'",
'':"'",
'':"'",
'':"'",
})
unsmarten_text = _mreplace.mreplace