mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-13 04:55:49 +01:00
Moved prepare_string_for_xml to utils.entities.
This commit is contained in:
@@ -24,14 +24,3 @@ class CurrentDir(object):
|
|||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
# The previous CWD no longer exists
|
# The previous CWD no longer exists
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
_ent_pat = re.compile(r'&(\S+?);')
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_string_for_xml(raw, attribute=False):
|
|
||||||
raw = _ent_pat.sub(entities.entity_to_unicode, raw)
|
|
||||||
raw = raw.replace('&', '&').replace('<', '<').replace('>', '>')
|
|
||||||
if attribute:
|
|
||||||
raw = raw.replace('"', '"').replace("'", ''')
|
|
||||||
return raw
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from ebook_converter import _ent_pat
|
|
||||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||||
from ebook_converter.customize.conversion import OptionRecommendation
|
from ebook_converter.customize.conversion import OptionRecommendation
|
||||||
from ebook_converter.utils import entities
|
from ebook_converter.utils import entities
|
||||||
@@ -198,7 +197,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
txt = txt.decode(ienc, 'replace')
|
txt = txt.decode(ienc, 'replace')
|
||||||
|
|
||||||
# Replace entities
|
# Replace entities
|
||||||
txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt)
|
txt = entities.ENT_PAT.sub(entities.xml_entity_to_unicode, txt)
|
||||||
|
|
||||||
# Normalize line endings
|
# Normalize line endings
|
||||||
txt = normalize_line_endings(txt)
|
txt = normalize_line_endings(txt)
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ import uuid
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter import prepare_string_for_xml
|
|
||||||
from ebook_converter.constants_old import __appname__, __version__
|
from ebook_converter.constants_old import __appname__, __version__
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
from ebook_converter.polyglot.binary import as_base64_unicode
|
||||||
|
from ebook_converter.utils import entities
|
||||||
from ebook_converter.utils.img import save_cover_data_to
|
from ebook_converter.utils.img import save_cover_data_to
|
||||||
from ebook_converter.utils.localization import lang_as_iso639_1
|
from ebook_converter.utils.localization import lang_as_iso639_1
|
||||||
|
|
||||||
@@ -149,13 +149,16 @@ class FB2MLizer(object):
|
|||||||
author_middle = ' '.join(author_parts[1:-1])
|
author_middle = ' '.join(author_parts[1:-1])
|
||||||
author_last = author_parts[-1]
|
author_last = author_parts[-1]
|
||||||
metadata['author'] += '<author>'
|
metadata['author'] += '<author>'
|
||||||
metadata['author'] += ('<first-name>%s</first-name>' %
|
metadata['author'] += (
|
||||||
prepare_string_for_xml(author_first))
|
'<first-name>%s</first-name>' %
|
||||||
|
entities.prepare_string_for_xml(author_first))
|
||||||
if author_middle:
|
if author_middle:
|
||||||
metadata['author'] += ('<middle-name>%s</middle-name>' %
|
metadata['author'] += (
|
||||||
prepare_string_for_xml(author_middle))
|
'<middle-name>%s</middle-name>' %
|
||||||
metadata['author'] += ('<last-name>%s</last-name>' %
|
entities.prepare_string_for_xml(author_middle))
|
||||||
prepare_string_for_xml(author_last))
|
metadata['author'] += (
|
||||||
|
'<last-name>%s</last-name>' %
|
||||||
|
entities.prepare_string_for_xml(author_last))
|
||||||
metadata['author'] += '</author>'
|
metadata['author'] += '</author>'
|
||||||
if not metadata['author']:
|
if not metadata['author']:
|
||||||
metadata['author'] = ('<author><first-name></first-name>'
|
metadata['author'] = ('<author><first-name></first-name>'
|
||||||
@@ -164,7 +167,7 @@ class FB2MLizer(object):
|
|||||||
metadata['keywords'] = ''
|
metadata['keywords'] = ''
|
||||||
tags = list(map(str, self.oeb_book.metadata.subject))
|
tags = list(map(str, self.oeb_book.metadata.subject))
|
||||||
if tags:
|
if tags:
|
||||||
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
|
tags = ', '.join(entities.prepare_string_for_xml(x) for x in tags)
|
||||||
metadata['keywords'] = '<keywords>%s</keywords>' % tags
|
metadata['keywords'] = '<keywords>%s</keywords>' % tags
|
||||||
|
|
||||||
metadata['sequence'] = ''
|
metadata['sequence'] = ''
|
||||||
@@ -172,7 +175,8 @@ class FB2MLizer(object):
|
|||||||
index = '1'
|
index = '1'
|
||||||
if self.oeb_book.metadata.series_index:
|
if self.oeb_book.metadata.series_index:
|
||||||
index = self.oeb_book.metadata.series_index[0]
|
index = self.oeb_book.metadata.series_index[0]
|
||||||
seq = prepare_string_for_xml(str(self.oeb_book.metadata.series[0]))
|
seq = entities.prepare_string_for_xml(str(self.oeb_book.metadata
|
||||||
|
.series[0]))
|
||||||
metadata['sequence'] = ('<sequence name="%s" number="%s"/>' %
|
metadata['sequence'] = ('<sequence name="%s" number="%s"/>' %
|
||||||
(seq, index))
|
(seq, index))
|
||||||
|
|
||||||
@@ -193,7 +197,8 @@ class FB2MLizer(object):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
year = ('<year>%s</year>' %
|
year = ('<year>%s</year>' %
|
||||||
prepare_string_for_xml(date.value.partition('-')[0]))
|
entities.prepare_string_for_xml(date.value
|
||||||
|
.partition('-')[0]))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
publisher = self.oeb_book.metadata['publisher'][0]
|
publisher = self.oeb_book.metadata['publisher'][0]
|
||||||
@@ -201,11 +206,12 @@ class FB2MLizer(object):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
publisher = ('<publisher>%s</publisher>' %
|
publisher = ('<publisher>%s</publisher>' %
|
||||||
prepare_string_for_xml(publisher.value))
|
entities.prepare_string_for_xml(publisher.value))
|
||||||
|
|
||||||
for x in identifiers:
|
for x in identifiers:
|
||||||
if x.get(base.tag('opf', 'scheme'), None).lower() == 'isbn':
|
if x.get(base.tag('opf', 'scheme'), None).lower() == 'isbn':
|
||||||
isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)
|
isbn = ('<isbn>%s</isbn>' %
|
||||||
|
entities.prepare_string_for_xml(x.value))
|
||||||
|
|
||||||
metadata['year'] = year
|
metadata['year'] = year
|
||||||
metadata['isbn'] = isbn
|
metadata['isbn'] = isbn
|
||||||
@@ -213,7 +219,7 @@ class FB2MLizer(object):
|
|||||||
for key, value in metadata.items():
|
for key, value in metadata.items():
|
||||||
if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
|
if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
|
||||||
'publisher', 'isbn'):
|
'publisher', 'isbn'):
|
||||||
metadata[key] = prepare_string_for_xml(value)
|
metadata[key] = entities.prepare_string_for_xml(value)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
comments = self.oeb_book.metadata['description'][0]
|
comments = self.oeb_book.metadata['description'][0]
|
||||||
@@ -221,7 +227,8 @@ class FB2MLizer(object):
|
|||||||
metadata['comments'] = ''
|
metadata['comments'] = ''
|
||||||
else:
|
else:
|
||||||
from ebook_converter.utils.html2text import html2text
|
from ebook_converter.utils.html2text import html2text
|
||||||
annot = prepare_string_for_xml(html2text(comments.value).strip())
|
annot = entities.prepare_string_for_xml(html2text(comments
|
||||||
|
.value).strip())
|
||||||
metadata['comments'] = f'<annotation><p>{annot}</p></annotation>'
|
metadata['comments'] = f'<annotation><p>{annot}</p></annotation>'
|
||||||
|
|
||||||
# Keep the indentation level of the description the same as the body.
|
# Keep the indentation level of the description the same as the body.
|
||||||
@@ -583,7 +590,7 @@ class FB2MLizer(object):
|
|||||||
if hasattr(elem_tree, 'text') and elem_tree.text:
|
if hasattr(elem_tree, 'text') and elem_tree.text:
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_out.append('<p>')
|
fb2_out.append('<p>')
|
||||||
fb2_out.append(prepare_string_for_xml(elem_tree.text))
|
fb2_out.append(entities.prepare_string_for_xml(elem_tree.text))
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_out.append('</p>')
|
fb2_out.append('</p>')
|
||||||
|
|
||||||
@@ -600,7 +607,7 @@ class FB2MLizer(object):
|
|||||||
if hasattr(elem_tree, 'tail') and elem_tree.tail:
|
if hasattr(elem_tree, 'tail') and elem_tree.tail:
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_out.append('<p>')
|
fb2_out.append('<p>')
|
||||||
fb2_out.append(prepare_string_for_xml(elem_tree.tail))
|
fb2_out.append(entities.prepare_string_for_xml(elem_tree.tail))
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_out.append('</p>')
|
fb2_out.append('</p>')
|
||||||
|
|
||||||
|
|||||||
@@ -8,11 +8,11 @@ import urllib.parse
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from ebook_converter import prepare_string_for_xml
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
||||||
|
from ebook_converter.utils import entities
|
||||||
from ebook_converter.utils.logging import default_log
|
from ebook_converter.utils.logging import default_log
|
||||||
from ebook_converter.polyglot.builtins import as_bytes
|
from ebook_converter.polyglot.builtins import as_bytes
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ class OEB2HTML(object):
|
|||||||
def mlize_spine(self, oeb_book):
|
def mlize_spine(self, oeb_book):
|
||||||
output = [
|
output = [
|
||||||
u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (
|
u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (
|
||||||
prepare_string_for_xml(self.book_title))
|
entities.prepare_string_for_xml(self.book_title))
|
||||||
]
|
]
|
||||||
for item in oeb_book.spine:
|
for item in oeb_book.spine:
|
||||||
self.log.debug('Converting %s to HTML...' % item.href)
|
self.log.debug('Converting %s to HTML...' % item.href)
|
||||||
@@ -136,7 +136,7 @@ class OEB2HTML(object):
|
|||||||
return css
|
return css
|
||||||
|
|
||||||
def prepare_string_for_html(self, raw):
|
def prepare_string_for_html(self, raw):
|
||||||
raw = prepare_string_for_xml(raw)
|
raw = entities.prepare_string_for_xml(raw)
|
||||||
raw = raw.replace(u'\u00ad', '­')
|
raw = raw.replace(u'\u00ad', '­')
|
||||||
raw = raw.replace(u'\u2014', '—')
|
raw = raw.replace(u'\u2014', '—')
|
||||||
raw = raw.replace(u'\u2013', '–')
|
raw = raw.replace(u'\u2013', '–')
|
||||||
@@ -189,7 +189,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
|
|||||||
# Turn the rest of the attributes into a string we can write with the tag.
|
# Turn the rest of the attributes into a string we can write with the tag.
|
||||||
at = ''
|
at = ''
|
||||||
for k, v in attribs.items():
|
for k, v in attribs.items():
|
||||||
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
|
at += ' %s="%s"' % (k, entities
|
||||||
|
.prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
# Write the tag.
|
# Write the tag.
|
||||||
text.append('<%s%s' % (tag, at))
|
text.append('<%s%s' % (tag, at))
|
||||||
@@ -281,7 +282,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
|||||||
# Turn the rest of the attributes into a string we can write with the tag.
|
# Turn the rest of the attributes into a string we can write with the tag.
|
||||||
at = ''
|
at = ''
|
||||||
for k, v in attribs.items():
|
for k, v in attribs.items():
|
||||||
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
|
at += ' %s="%s"' % (k, entities
|
||||||
|
.prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
# Turn style into strings for putting in the tag.
|
# Turn style into strings for putting in the tag.
|
||||||
style_t = ''
|
style_t = ''
|
||||||
@@ -336,7 +338,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
|||||||
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
|
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
|
||||||
else:
|
else:
|
||||||
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
|
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
|
||||||
title = u'<title>%s</title>' % prepare_string_for_xml(self.book_title)
|
title = (u'<title>%s</title>' %
|
||||||
|
entities.prepare_string_for_xml(self.book_title))
|
||||||
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
|
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
|
||||||
[css] + [title, u'</head><body>'] + output + [u'</body></html>']
|
[css] + [title, u'</head><body>'] + output + [u'</body></html>']
|
||||||
return ''.join(output)
|
return ''.join(output)
|
||||||
@@ -373,7 +376,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
|||||||
# Turn the rest of the attributes into a string we can write with the tag.
|
# Turn the rest of the attributes into a string we can write with the tag.
|
||||||
at = ''
|
at = ''
|
||||||
for k, v in attribs.items():
|
for k, v in attribs.items():
|
||||||
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
|
at += ' %s="%s"' % (k,
|
||||||
|
entities.prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
# Write the tag.
|
# Write the tag.
|
||||||
text.append('<%s%s' % (tag, at))
|
text.append('<%s%s' % (tag, at))
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import struct
|
|||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
||||||
from ebook_converter import prepare_string_for_xml
|
from ebook_converter.utils import entities
|
||||||
from ebook_converter.ebooks.lrf.tags import Tag
|
from ebook_converter.ebooks.lrf.tags import Tag
|
||||||
from ebook_converter.utils import entities
|
from ebook_converter.utils import entities
|
||||||
|
|
||||||
@@ -877,7 +877,7 @@ class Text(LRFStream):
|
|||||||
open_containers = collections.deque()
|
open_containers = collections.deque()
|
||||||
for c in self.content:
|
for c in self.content:
|
||||||
if isinstance(c, str):
|
if isinstance(c, str):
|
||||||
s += prepare_string_for_xml(c).replace('\0', '')
|
s += entities.prepare_string_for_xml(c).replace('\0', '')
|
||||||
elif c is None:
|
elif c is None:
|
||||||
if open_containers:
|
if open_containers:
|
||||||
p = open_containers.pop()
|
p = open_containers.pop()
|
||||||
|
|||||||
@@ -1,19 +1,15 @@
|
|||||||
"""
|
"""
|
||||||
Read content from Haodoo.net pdb file.
|
Read content from Haodoo.net pdb file.
|
||||||
"""
|
"""
|
||||||
import struct
|
|
||||||
import os
|
import os
|
||||||
|
import struct
|
||||||
|
|
||||||
from ebook_converter import prepare_string_for_xml
|
|
||||||
from ebook_converter.ebooks.pdb.formatreader import FormatReader
|
from ebook_converter.ebooks.pdb.formatreader import FormatReader
|
||||||
from ebook_converter.ebooks.metadata import MetaInformation
|
from ebook_converter.ebooks.metadata import MetaInformation
|
||||||
from ebook_converter.ebooks.txt.processor import opf_writer, HTML_TEMPLATE
|
from ebook_converter.ebooks.txt.processor import opf_writer, HTML_TEMPLATE
|
||||||
|
from ebook_converter.utils import entities
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Kan-Ru Chen <kanru@kanru.info>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
BPDB_IDENT = b'BOOKMTIT'
|
BPDB_IDENT = b'BOOKMTIT'
|
||||||
UPDB_IDENT = b'BOOKMTIU'
|
UPDB_IDENT = b'BOOKMTIU'
|
||||||
|
|
||||||
@@ -133,7 +129,7 @@ class Reader(FormatReader):
|
|||||||
line = '<h1 class="chapter">' + line + '</h1>\n'
|
line = '<h1 class="chapter">' + line + '</h1>\n'
|
||||||
title_added = True
|
title_added = True
|
||||||
else:
|
else:
|
||||||
line = prepare_string_for_xml(line)
|
line = entities.prepare_string_for_xml(line)
|
||||||
lines.append('<p>%s</p>' % line)
|
lines.append('<p>%s</p>' % line)
|
||||||
if not title_added:
|
if not title_added:
|
||||||
lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
|
lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ Read content from txt file.
|
|||||||
"""
|
"""
|
||||||
import os, re
|
import os, re
|
||||||
|
|
||||||
from ebook_converter import prepare_string_for_xml
|
|
||||||
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
from ebook_converter.ebooks.conversion.preprocess import DocAnalysis
|
from ebook_converter.ebooks.conversion.preprocess import DocAnalysis
|
||||||
from ebook_converter.utils.cleantext import clean_ascii_chars
|
from ebook_converter.utils.cleantext import clean_ascii_chars
|
||||||
|
from ebook_converter.utils import entities
|
||||||
|
|
||||||
|
|
||||||
HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
||||||
@@ -87,7 +87,8 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
|||||||
for line in txt.split('\n'):
|
for line in txt.split('\n'):
|
||||||
if line.strip():
|
if line.strip():
|
||||||
blank_count = 0
|
blank_count = 0
|
||||||
lines.append(u'<p>%s</p>' % prepare_string_for_xml(line.replace('\n', ' ')))
|
lines.append(u'<p>%s</p>' % entities
|
||||||
|
.prepare_string_for_xml(line.replace('\n', ' ')))
|
||||||
else:
|
else:
|
||||||
blank_count += 1
|
blank_count += 1
|
||||||
if blank_count == 2:
|
if blank_count == 2:
|
||||||
|
|||||||
@@ -2,10 +2,10 @@ import re
|
|||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
from ebook_converter import prepare_string_for_xml
|
|
||||||
from ebook_converter.constants_old import preferred_encoding
|
from ebook_converter.constants_old import preferred_encoding
|
||||||
from ebook_converter.ebooks.BeautifulSoup import html5_parser
|
from ebook_converter.ebooks.BeautifulSoup import html5_parser
|
||||||
from ebook_converter.utils.html2text import html2text
|
from ebook_converter.utils.html2text import html2text
|
||||||
|
from ebook_converter.utils import entities
|
||||||
|
|
||||||
|
|
||||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||||
@@ -51,7 +51,7 @@ def comments_to_html(comments):
|
|||||||
return comments
|
return comments
|
||||||
|
|
||||||
if '<' not in comments:
|
if '<' not in comments:
|
||||||
comments = prepare_string_for_xml(comments)
|
comments = entities.prepare_string_for_xml(comments)
|
||||||
parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
|
parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
|
||||||
for x in comments.split('\n\n')]
|
for x in comments.split('\n\n')]
|
||||||
return '\n'.join(parts)
|
return '\n'.join(parts)
|
||||||
|
|||||||
Reference in New Issue
Block a user