mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-28 09:34:05 +02:00
Get rid of icu string functions in favor of native ones.
This commit is contained in:
@@ -6,6 +6,7 @@ from collections import defaultdict
|
||||
from itertools import count
|
||||
from operator import attrgetter
|
||||
import urllib.parse
|
||||
import string
|
||||
|
||||
from lxml import etree, html
|
||||
from ebook_converter import force_unicode
|
||||
@@ -763,7 +764,7 @@ class Metadata(object):
|
||||
key = barename(key)
|
||||
attrib[key] = prefixname(value, nsrmap)
|
||||
if namespace(self.term) == DC11_NS:
|
||||
name = DC(icu_title(barename(self.term)))
|
||||
name = DC(string.capwords(barename(self.term)))
|
||||
elem = element(dcmeta, name, attrib=attrib)
|
||||
elem.text = self.value
|
||||
else:
|
||||
|
||||
@@ -93,10 +93,14 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
|
||||
if num:
|
||||
container.dirty(name)
|
||||
num_merged += num
|
||||
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
|
||||
import_map = {name: get_imported_sheets(name, container, sheets)
|
||||
for name in sheets}
|
||||
if remove_unused_classes:
|
||||
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.items()}
|
||||
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.items()}
|
||||
class_map = {name: {x.lower() for x in
|
||||
classes_in_rule_list(sheet.cssRules)}
|
||||
for name, sheet in sheets.items()}
|
||||
style_rules = {name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
|
||||
for name, sheet in sheets.items()}
|
||||
|
||||
num_of_removed_rules = num_of_removed_classes = 0
|
||||
|
||||
@@ -115,7 +119,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
|
||||
num_merged += num
|
||||
container.dirty(name)
|
||||
if remove_unused_classes:
|
||||
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
|
||||
used_classes |= {x.lower() for x in classes_in_rule_list(sheet.cssRules)}
|
||||
imports = get_imported_sheets(name, container, sheets, sheet=sheet)
|
||||
for imported_sheet in imports:
|
||||
style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
|
||||
@@ -147,7 +151,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
|
||||
for elem in root.xpath('//*[@class]'):
|
||||
original_classes, classes = elem.get('class', '').split(), []
|
||||
for x in original_classes:
|
||||
if icu_lower(x) in used_classes:
|
||||
if x.lower() in used_classes:
|
||||
classes.append(x)
|
||||
if len(classes) != len(original_classes):
|
||||
if classes:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import re, os
|
||||
from bisect import bisect
|
||||
import bisect
|
||||
import os
|
||||
import re
|
||||
|
||||
from ebook_converter import guess_type as _guess_type, replace_entities
|
||||
|
||||
@@ -8,6 +9,10 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
def _upper(string):
|
||||
return string.upper()
|
||||
|
||||
|
||||
def guess_type(x):
|
||||
return _guess_type(x)[0] or 'application/octet-stream'
|
||||
|
||||
@@ -23,7 +28,8 @@ def setup_css_parser_serialization(tab_width=2):
|
||||
def actual_case_for_name(container, name):
|
||||
from ebook_converter.utils.filenames import samefile
|
||||
if not container.exists(name):
|
||||
raise ValueError('Cannot get actual case for %s as it does not exist' % name)
|
||||
raise ValueError('Cannot get actual case for %s as it does not '
|
||||
'exist' % name)
|
||||
parts = name.split('/')
|
||||
base = ''
|
||||
ans = []
|
||||
@@ -55,9 +61,12 @@ def corrected_case_for_name(container, name):
|
||||
correctx = x
|
||||
else:
|
||||
try:
|
||||
candidates = {q for q in os.listdir(os.path.dirname(container.name_to_abspath(base)))}
|
||||
dirname = os.path.dirname(container.name_to_abspath(base))
|
||||
candidates = {q for q in os.listdir(dirname)}
|
||||
except EnvironmentError:
|
||||
return None # one of the non-terminal components of name is a file instead of a directory
|
||||
# one of the non-terminal components of name is a file instead
|
||||
# of a directory
|
||||
return None
|
||||
for q in candidates:
|
||||
if q.lower() == x.lower():
|
||||
correctx = q
|
||||
@@ -75,7 +84,7 @@ class PositionFinder(object):
|
||||
self.new_lines = tuple(m.start() + 1 for m in re.finditer(pat, raw))
|
||||
|
||||
def __call__(self, pos):
|
||||
lnum = bisect(self.new_lines, pos)
|
||||
lnum = bisect.bisect(self.new_lines, pos)
|
||||
try:
|
||||
offset = abs(pos - self.new_lines[lnum - 1])
|
||||
except IndexError:
|
||||
@@ -94,7 +103,7 @@ class CommentFinder(object):
|
||||
def __call__(self, offset):
|
||||
if not self.starts:
|
||||
return False
|
||||
q = bisect(self.starts, offset) - 1
|
||||
q = bisect.bisect(self.starts, offset) - 1
|
||||
return q >= 0 and self.starts[q] <= offset <= self.ends[q]
|
||||
|
||||
|
||||
@@ -182,13 +191,16 @@ def handle_entities(text, func):
|
||||
return func(replace_entities(text))
|
||||
|
||||
|
||||
def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities):
|
||||
'''Apply the specified function to individual groups in the match object (the result of re.search() or
|
||||
the whole match if no groups were defined. Returns the replaced string.'''
|
||||
def apply_func_to_match_groups(match, func=_upper,
|
||||
handle_entities=handle_entities):
|
||||
"""
|
||||
Apply the specified function to individual groups in the match object (the
|
||||
result of re.search() or
|
||||
the whole match if no groups were defined. Returns the replaced string.
|
||||
"""
|
||||
found_groups = False
|
||||
i = 0
|
||||
parts, pos = [], match.start()
|
||||
f = lambda text:handle_entities(text, func)
|
||||
while True:
|
||||
i += 1
|
||||
try:
|
||||
@@ -198,19 +210,22 @@ def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_ent
|
||||
found_groups = True
|
||||
if start > -1:
|
||||
parts.append(match.string[pos:start])
|
||||
parts.append(f(match.string[start:end]))
|
||||
parts.append(handle_entities(match.string[start:end], func))
|
||||
pos = end
|
||||
if not found_groups:
|
||||
return f(match.group())
|
||||
return handle_entities(match.group(), func)
|
||||
parts.append(match.string[pos:match.end()])
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
def apply_func_to_html_text(match, func=icu_upper, handle_entities=handle_entities):
|
||||
''' Apply the specified function only to text between HTML tag definitions. '''
|
||||
f = lambda text:handle_entities(text, func)
|
||||
def apply_func_to_html_text(match, func=_upper,
|
||||
handle_entities=handle_entities):
|
||||
"""
|
||||
Apply the specified function only to text between HTML tag definitions.
|
||||
"""
|
||||
parts = re.split(r'(<[^>]+>)', match.group())
|
||||
parts = (x if x.startswith('<') else f(x) for x in parts)
|
||||
parts = (x if x.startswith('<') else handle_entities(x, func)
|
||||
for x in parts)
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
"""
|
||||
CSS case-mangling transform.
|
||||
"""
|
||||
import string
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from ebook_converter.ebooks.oeb.base import CSS_MIME
|
||||
from ebook_converter.ebooks.oeb.base import namespace
|
||||
@@ -46,16 +49,17 @@ class CaseMangler(object):
|
||||
relhref = item.relhref(href)
|
||||
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
|
||||
rel='stylesheet', href=relhref, type=CSS_MIME)
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.opts,
|
||||
self.profile)
|
||||
self.mangle_elem(html.find(XHTML('body')), stylizer)
|
||||
|
||||
def text_transform(self, transform, text):
|
||||
if transform == 'capitalize':
|
||||
return icu_title(text)
|
||||
return string.capwords(text)
|
||||
elif transform == 'uppercase':
|
||||
return icu_upper(text)
|
||||
return text.upper()
|
||||
elif transform == 'lowercase':
|
||||
return icu_lower(text)
|
||||
return text.lower()
|
||||
return text
|
||||
|
||||
def split_text(self, text):
|
||||
|
||||
Reference in New Issue
Block a user