1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-24 23:31:29 +02:00

Move force_uniceode to utils package

This commit is contained in:
2020-11-11 19:30:50 +01:00
parent 35c34c3b45
commit 3152c52839
16 changed files with 72 additions and 60 deletions
+2 -2
View File
@@ -17,12 +17,12 @@ from lxml import etree
from lxml import html
from ebook_converter import constants as const
from ebook_converter import force_unicode
from ebook_converter.constants_old import filesystem_encoding, __version__
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils import encoding as uenc
from ebook_converter.utils.short_uuid import uuid4
@@ -1074,7 +1074,7 @@ class Manifest(object):
def sort_key(self):
href = self.href
if isinstance(href, bytes):
href = force_unicode(href)
href = uenc.force_unicode(href)
if isinstance(self.spine_position, numbers.Number):
sp = self.spine_position
+3 -2
View File
@@ -5,9 +5,10 @@ from lxml import etree
from lxml import html
from ebook_converter import constants as const
from ebook_converter import xml_replace_entities, force_unicode
from ebook_converter import xml_replace_entities
from ebook_converter.constants_old import filesystem_encoding
from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
from ebook_converter.utils import encoding as uenc
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True,
@@ -159,7 +160,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
if log is None:
log = LOG
filename = force_unicode(filename, enc=filesystem_encoding)
filename = uenc.force_unicode(filename, enc=filesystem_encoding)
if not isinstance(data, str):
if decoder is not None:
+6 -5
View File
@@ -3,12 +3,12 @@ import functools
from css_parser.css import CSSRule, CSSStyleDeclaration
from ebook_converter import force_unicode
from ebook_converter.css_selectors import parse, SelectorSyntaxError
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish import pretty
from ebook_converter.utils.icu import numeric_sort_key
from ebook_converter.css_selectors import Select, SelectorError
from ebook_converter.utils import encoding as uenc
def filter_used_rules(rules, log, select):
@@ -137,7 +137,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False,
if unused_rules:
num_of_removed_rules += len(unused_rules)
[sheet.cssRules.remove(r) for r in unused_rules]
style.text = force_unicode(sheet.cssText, 'utf-8')
style.text = uenc.force_unicode(sheet.cssText, 'utf-8')
pretty.pretty_script_or_style(container, style)
container.dirty(name)
@@ -241,7 +241,7 @@ def transform_inline_styles(container, name, transform_sheet, transform_style):
sheet = container.parse_css(style.text)
if transform_sheet(sheet):
changed = True
style.text = force_unicode(sheet.cssText, 'utf-8')
style.text = uenc.force_unicode(sheet.cssText, 'utf-8')
pretty.pretty_script_or_style(container, style)
for elem in root.xpath('//*[@style]'):
text = elem.get('style', None)
@@ -253,8 +253,9 @@ def transform_inline_styles(container, name, transform_sheet, transform_style):
del elem.attrib['style']
else:
elem.set('style',
force_unicode(style.getCssText(separator=' '),
'utf-8'))
uenc.force_unicode(style
.getCssText(separator=' '),
'utf-8'))
return changed
+4 -3
View File
@@ -1,10 +1,10 @@
import textwrap
from ebook_converter import constants as const
from ebook_converter import force_unicode
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish.utils import guess_type
from ebook_converter.utils import encoding as uenc
from ebook_converter.utils.icu import sort_key
@@ -157,8 +157,9 @@ def pretty_script_or_style(container, child):
if child.text:
indent = indent_for_tag(child)
if child.tag.endswith('style'):
child.text = force_unicode(pretty_css(container, '', child.text),
'utf-8')
child.text = uenc.force_unicode(pretty_css(container, '',
child.text),
'utf-8')
child.text = textwrap.dedent(child.text)
child.text = '\n' + '\n'.join([(indent + x) if x else ''
for x in child.text.splitlines()])
+3 -3
View File
@@ -12,12 +12,12 @@ from css_parser import (profile as cssprofiles, parseString, parseStyle, log as
css_parser_log, CSSParser, profiles, replaceUrls)
from ebook_converter import constants as const
from ebook_converter import force_unicode
from ebook_converter.ebooks import unit_convert
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.normalize_css import DEFAULTS, normalizers
from ebook_converter.css_selectors import Select, SelectorError, INAPPROPRIATE_PSEUDO_CLASSES
from ebook_converter.tinycss.media3 import CSSMedia3Parser
from ebook_converter.utils import encoding as uenc
css_parser_log.setLevel(logging.WARN)
@@ -223,10 +223,10 @@ class Stylizer(object):
for x in elem:
t = getattr(x, 'text', None)
if t:
text += '\n\n' + force_unicode(t, 'utf-8')
text += '\n\n' + uenc.force_unicode(t, 'utf-8')
t = getattr(x, 'tail', None)
if t:
text += '\n\n' + force_unicode(t, 'utf-8')
text += '\n\n' + uenc.force_unicode(t, 'utf-8')
if text:
text = oeb.css_preprocessor(text)
# We handle @import rules separately
@@ -14,12 +14,12 @@ from lxml.etree import XPath as _XPath
from lxml import etree
from ebook_converter import constants as const
from ebook_converter import force_unicode
from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.css_selectors import Select, SelectorError
from ebook_converter.utils import encoding as uenc
__license__ = 'GPL v3'
@@ -86,10 +86,12 @@ class Split(object):
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
base.OEB_STYLES]
for rule in rules(stylesheets):
before = force_unicode(getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower())
after = force_unicode(getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower())
before = uenc.force_unicode(
getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower())
after = uenc.force_unicode(
getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower())
try:
if before and before not in {'avoid', 'auto', 'inherit'}:
self.page_break_selectors.add((rule.selectorText,