Get rid of icu string functions in favor of native ones.

2026-04-28 17:54:06 +02:00 · 2020-05-03 20:19:11 +02:00
parent 212cb56d42
commit da010d7841
10 changed files with 138 additions and 185 deletions
@@ -6,6 +6,7 @@ from collections import defaultdict
 from itertools import count
 from operator import attrgetter
 import urllib.parse
+import string

 from lxml import etree, html
 from ebook_converter import force_unicode
@@ -763,7 +764,7 @@ class Metadata(object):
                    key = barename(key)
                attrib[key] = prefixname(value, nsrmap)
            if namespace(self.term) == DC11_NS:
-                name = DC(icu_title(barename(self.term)))
+                name = DC(string.capwords(barename(self.term)))
                elem = element(dcmeta, name, attrib=attrib)
                elem.text = self.value
            else:
@@ -93,10 +93,14 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
            if num:
                container.dirty(name)
                num_merged += num
-    import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
+    import_map = {name: get_imported_sheets(name, container, sheets)
+                  for name in sheets}
    if remove_unused_classes:
-        class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.items()}
-    style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.items()}
+        class_map = {name: {x.lower() for x in
+                            classes_in_rule_list(sheet.cssRules)}
+                     for name, sheet in sheets.items()}
+    style_rules = {name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
+                   for name, sheet in sheets.items()}

    num_of_removed_rules = num_of_removed_classes = 0

@@ -115,7 +119,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
                        num_merged += num
                        container.dirty(name)
                if remove_unused_classes:
-                    used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
+                    used_classes |= {x.lower() for x in classes_in_rule_list(sheet.cssRules)}
                imports = get_imported_sheets(name, container, sheets, sheet=sheet)
                for imported_sheet in imports:
                    style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
@@ -147,7 +151,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge
            for elem in root.xpath('//*[@class]'):
                original_classes, classes = elem.get('class', '').split(), []
                for x in original_classes:
-                    if icu_lower(x) in used_classes:
+                    if x.lower() in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
@@ -1,5 +1,6 @@
-import re, os
-from bisect import bisect
+import bisect
+import os
+import re

 from ebook_converter import guess_type as _guess_type, replace_entities

@@ -8,6 +9,10 @@ __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'


+def _upper(string):
+    return string.upper()
+
+
 def guess_type(x):
    return _guess_type(x)[0] or 'application/octet-stream'

@@ -23,7 +28,8 @@ def setup_css_parser_serialization(tab_width=2):
 def actual_case_for_name(container, name):
    from ebook_converter.utils.filenames import samefile
    if not container.exists(name):
-        raise ValueError('Cannot get actual case for %s as it does not exist' % name)
+        raise ValueError('Cannot get actual case for %s as it does not '
+                         'exist' % name)
    parts = name.split('/')
    base = ''
    ans = []
@@ -55,9 +61,12 @@ def corrected_case_for_name(container, name):
            correctx = x
        else:
            try:
-                candidates = {q for q in os.listdir(os.path.dirname(container.name_to_abspath(base)))}
+                dirname = os.path.dirname(container.name_to_abspath(base))
+                candidates = {q for q in os.listdir(dirname)}
            except EnvironmentError:
-                return None  # one of the non-terminal components of name is a file instead of a directory
+                # one of the non-terminal components of name is a file instead
+                # of a directory
+                return None
            for q in candidates:
                if q.lower() == x.lower():
                    correctx = q
@@ -75,7 +84,7 @@ class PositionFinder(object):
        self.new_lines = tuple(m.start() + 1 for m in re.finditer(pat, raw))

    def __call__(self, pos):
-        lnum = bisect(self.new_lines, pos)
+        lnum = bisect.bisect(self.new_lines, pos)
        try:
            offset = abs(pos - self.new_lines[lnum - 1])
        except IndexError:
@@ -94,7 +103,7 @@ class CommentFinder(object):
    def __call__(self, offset):
        if not self.starts:
            return False
-        q = bisect(self.starts, offset) - 1
+        q = bisect.bisect(self.starts, offset) - 1
        return q >= 0 and self.starts[q] <= offset <= self.ends[q]


@@ -182,13 +191,16 @@ def handle_entities(text, func):
    return func(replace_entities(text))


-def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities):
-    '''Apply the specified function to individual groups in the match object (the result of re.search() or
-    the whole match if no groups were defined. Returns the replaced string.'''
+def apply_func_to_match_groups(match, func=_upper,
+                               handle_entities=handle_entities):
+    """
+    Apply the specified function to individual groups in the match object (the
+    result of re.search() or
+    the whole match if no groups were defined. Returns the replaced string.
+    """
    found_groups = False
    i = 0
    parts, pos = [], match.start()
-    f = lambda text:handle_entities(text, func)
    while True:
        i += 1
        try:
@@ -198,19 +210,22 @@ def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_ent
        found_groups = True
        if start > -1:
            parts.append(match.string[pos:start])
-            parts.append(f(match.string[start:end]))
+            parts.append(handle_entities(match.string[start:end], func))
            pos = end
    if not found_groups:
-        return f(match.group())
+        return handle_entities(match.group(), func)
    parts.append(match.string[pos:match.end()])
    return ''.join(parts)


-def apply_func_to_html_text(match, func=icu_upper, handle_entities=handle_entities):
-    ''' Apply the specified function only to text between HTML tag definitions. '''
-    f = lambda text:handle_entities(text, func)
+def apply_func_to_html_text(match, func=_upper,
+                            handle_entities=handle_entities):
+    """
+    Apply the specified function only to text between HTML tag definitions.
+    """
    parts = re.split(r'(<[^>]+>)', match.group())
-    parts = (x if x.startswith('<') else f(x) for x in parts)
+    parts = (x if x.startswith('<') else handle_entities(x, func)
+             for x in parts)
    return ''.join(parts)


@@ -1,7 +1,10 @@
 """
 CSS case-mangling transform.
 """
+import string
+
 from lxml import etree
+
 from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS
 from ebook_converter.ebooks.oeb.base import CSS_MIME
 from ebook_converter.ebooks.oeb.base import namespace
@@ -46,16 +49,17 @@ class CaseMangler(object):
            relhref = item.relhref(href)
            etree.SubElement(html.find(XHTML('head')), XHTML('link'),
                             rel='stylesheet', href=relhref, type=CSS_MIME)
-            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
+            stylizer = Stylizer(html, item.href, self.oeb, self.opts,
+                                self.profile)
            self.mangle_elem(html.find(XHTML('body')), stylizer)

    def text_transform(self, transform, text):
        if transform == 'capitalize':
-            return icu_title(text)
+            return string.capwords(text)
        elif transform == 'uppercase':
-            return icu_upper(text)
+            return text.upper()
        elif transform == 'lowercase':
-            return icu_lower(text)
+            return text.lower()
        return text

    def split_text(self, text):