Get rid of icu string functions in favor of native ones.

2026-04-27 01:01:28 +02:00 · 2020-05-03 20:19:11 +02:00
parent 212cb56d42
commit da010d7841
10 changed files with 138 additions and 185 deletions
@@ -1220,10 +1220,10 @@ class BuiltinListUnion(BuiltinFormatterFunction):
    def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):
        res = [l.strip() for l in list1.split(separator) if l.strip()]
        l2 = [l.strip() for l in list2.split(separator) if l.strip()]
-        lcl1 = {icu_lower(l) for l in res}
+        lcl1 = {l.lower() for l in res}

        for i in l2:
-            if icu_lower(i) not in lcl1 and i not in res:
+            if i.lower() not in lcl1 and i not in res:
                res.append(i)
        if separator == ',':
            return ', '.join(res)
@@ -1241,11 +1241,11 @@ class BuiltinListDifference(BuiltinFormatterFunction):

    def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):
        l1 = [l.strip() for l in list1.split(separator) if l.strip()]
-        l2 = {icu_lower(l.strip()) for l in list2.split(separator) if l.strip()}
+        l2 = {l.strip().lower() for l in list2.split(separator) if l.strip()}

        res = []
        for i in l1:
-            if icu_lower(i) not in l2 and i not in res:
+            if i.lower() not in l2 and i not in res:
                res.append(i)
        if separator == ',':
            return ', '.join(res)
@@ -1263,11 +1263,11 @@ class BuiltinListIntersection(BuiltinFormatterFunction):

    def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):
        l1 = [l.strip() for l in list1.split(separator) if l.strip()]
-        l2 = {icu_lower(l.strip()) for l in list2.split(separator) if l.strip()}
+        l2 = {l.strip().lower() for l in list2.split(separator) if l.strip()}

        res = []
        for i in l1:
-            if icu_lower(i) in l2 and i not in res:
+            if i.lower() in l2 and i not in res:
                res.append(i)
        if separator == ',':
            return ', '.join(res)
@@ -1302,8 +1302,8 @@ class BuiltinListEquals(BuiltinFormatterFunction):
            'The comparison is case insensitive.')

    def evaluate(self, formatter, kwargs, mi, locals, list1, sep1, list2, sep2, yes_val, no_val):
-        s1 = {icu_lower(l.strip()) for l in list1.split(sep1) if l.strip()}
-        s2 = {icu_lower(l.strip()) for l in list2.split(sep2) if l.strip()}
+        s1 = {l.strip().lower() for l in list1.split(sep1) if l.strip()}
+        s2 = {l.strip().lower() for l in list2.split(sep2) if l.strip()}
        if s1 == s2:
            return yes_val
        return no_val
@@ -1426,7 +1426,7 @@ class BuiltinLanguageStrings(BuiltinFormatterFunction):
        retval = []
        for c in [c.strip() for c in lang_codes.split(',') if c.strip()]:
            try:
-                n = calibre_langcode_to_name(c, localize != '0')
+                n = calibre_langcode_to_name(c)
                if n:
                    retval.append(n)
            except:
@@ -292,7 +292,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
    ans = OrderedDict()
    last_c, last_ordnum = ' ', 0
    for item in items:
-        c = icu_upper(key(item) or ' ')
+        c = (key(item) or ' ').upper()
        ordnum, ordlen = collation_order(c)
        if last_ordnum != ordnum:
            if not is_narrow_build:
@@ -1,27 +1,6 @@
-__license__ = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import re
-import io
-import sys
 import json
 import pkg_resources

-_available_translations = None
-
-
-def sanitize_lang(lang):
-    if lang:
-        match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
-        if match:
-            lang = match.group()
-    if lang == 'zh':
-        lang = 'zh_CN'
-    if not lang:
-        lang = 'en'
-    return lang
-

 def get_lang():
    return 'en_US'
@@ -34,121 +13,78 @@ def is_rtl():
 _lang_trans = None


-lcdata = {
-    'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
-    'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'),
-    'd_fmt': '%m/%d/%Y',
-    'd_t_fmt': '%a %d %b %Y %r %Z',
-    'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
-    'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'),
-    'noexpr': '^[nN].*',
-    'radixchar': '.',
-    't_fmt': '%r',
-    't_fmt_ampm': '%I:%M:%S %p',
-    'thousep': ',',
-    'yesexpr': '^[yY].*'
-}
-
-
-def load_po(path):
-    from ebook_converter.translations.msgfmt import make
-    buf = io.BytesIO()
-    try:
-        make(path, buf)
-    except Exception:
-        print(('Failed to compile translations file: %s, ignoring') % path)
-        buf = None
-    else:
-        buf = io.BytesIO(buf.getvalue())
-    return buf
+lcdata = {'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
+          'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
+                    'Sep', 'Oct', 'Nov', 'Dec'),
+          'd_fmt': '%m/%d/%Y',
+          'd_t_fmt': '%a %d %b %Y %r %Z',
+          'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
+                  'Friday', 'Saturday'),
+          'mon': ('January', 'February', 'March', 'April', 'May', 'June',
+                  'July', 'August', 'September', 'October', 'November',
+                  'December'),
+          'noexpr': '^[nN].*',
+          'radixchar': '.',
+          't_fmt': '%r',
+          't_fmt_ampm': '%I:%M:%S %p',
+          'thousep': ',',
+          'yesexpr': '^[yY].*'}


 _iso639 = None
-_extra_lang_codes = {
-        'pt_BR' : 'Brazilian Portuguese',
-        'en_GB' : 'English (UK)',
-        'zh_CN' : 'Simplified Chinese',
-        'zh_TW' : 'Traditional Chinese',
-        'en'    : 'English',
-        'en_US' : 'English (United States)',
-        'en_AR' : 'English (Argentina)',
-        'en_AU' : 'English (Australia)',
-        'en_JP' : 'English (Japan)',
-        'en_DE' : 'English (Germany)',
-        'en_BG' : 'English (Bulgaria)',
-        'en_EG' : 'English (Egypt)',
-        'en_NZ' : 'English (New Zealand)',
-        'en_CA' : 'English (Canada)',
-        'en_GR' : 'English (Greece)',
-        'en_IN' : 'English (India)',
-        'en_NP' : 'English (Nepal)',
-        'en_TH' : 'English (Thailand)',
-        'en_TR' : 'English (Turkey)',
-        'en_CY' : 'English (Cyprus)',
-        'en_CZ' : 'English (Czech Republic)',
-        'en_PH' : 'English (Philippines)',
-        'en_PK' : 'English (Pakistan)',
-        'en_PL' : 'English (Poland)',
-        'en_HR' : 'English (Croatia)',
-        'en_HU' : 'English (Hungary)',
-        'en_ID' : 'English (Indonesia)',
-        'en_IL' : 'English (Israel)',
-        'en_RU' : 'English (Russia)',
-        'en_SG' : 'English (Singapore)',
-        'en_YE' : 'English (Yemen)',
-        'en_IE' : 'English (Ireland)',
-        'en_CN' : 'English (China)',
-        'en_TW' : 'English (Taiwan)',
-        'en_ZA' : 'English (South Africa)',
-        'es_PY' : 'Spanish (Paraguay)',
-        'es_UY' : 'Spanish (Uruguay)',
-        'es_AR' : 'Spanish (Argentina)',
-        'es_CR' : 'Spanish (Costa Rica)',
-        'es_MX' : 'Spanish (Mexico)',
-        'es_CU' : 'Spanish (Cuba)',
-        'es_CL' : 'Spanish (Chile)',
-        'es_EC' : 'Spanish (Ecuador)',
-        'es_HN' : 'Spanish (Honduras)',
-        'es_VE' : 'Spanish (Venezuela)',
-        'es_BO' : 'Spanish (Bolivia)',
-        'es_NI' : 'Spanish (Nicaragua)',
-        'es_CO' : 'Spanish (Colombia)',
-        'de_AT' : 'German (AT)',
-        'fr_BE' : 'French (BE)',
-        'nl'    : 'Dutch (NL)',
-        'nl_BE' : 'Dutch (BE)',
-        'und'   : 'Unknown'
-        }
-
-if False:
-    # Extra strings needed for Qt
-
-    # NOTE: Ante Meridian (i.e. like 10:00 AM)
-    'AM'
-    # NOTE: Post Meridian (i.e. like 10:00 PM)
-    'PM'
-    # NOTE: Ante Meridian (i.e. like 10:00 am)
-    'am'
-    # NOTE: Post Meridian (i.e. like 10:00 pm)
-    'pm'
-    '&Copy'
-    'Select All'
-    'Copy Link'
-    '&Select All'
-    'Copy &Link Location'
-    '&Undo'
-    '&Redo'
-    'Cu&t'
-    '&Paste'
-    'Paste and Match Style'
-    'Directions'
-    'Left to Right'
-    'Right to Left'
-    'Fonts'
-    '&Step up'
-    'Step &down'
-    'Close without Saving'
-    'Close Tab'
+_extra_lang_codes = {'pt_BR': 'Brazilian Portuguese',
+                     'en_GB': 'English (UK)',
+                     'zh_CN': 'Simplified Chinese',
+                     'zh_TW': 'Traditional Chinese',
+                     'en': 'English',
+                     'en_US': 'English (United States)',
+                     'en_AR': 'English (Argentina)',
+                     'en_AU': 'English (Australia)',
+                     'en_JP': 'English (Japan)',
+                     'en_DE': 'English (Germany)',
+                     'en_BG': 'English (Bulgaria)',
+                     'en_EG': 'English (Egypt)',
+                     'en_NZ': 'English (New Zealand)',
+                     'en_CA': 'English (Canada)',
+                     'en_GR': 'English (Greece)',
+                     'en_IN': 'English (India)',
+                     'en_NP': 'English (Nepal)',
+                     'en_TH': 'English (Thailand)',
+                     'en_TR': 'English (Turkey)',
+                     'en_CY': 'English (Cyprus)',
+                     'en_CZ': 'English (Czech Republic)',
+                     'en_PH': 'English (Philippines)',
+                     'en_PK': 'English (Pakistan)',
+                     'en_PL': 'English (Poland)',
+                     'en_HR': 'English (Croatia)',
+                     'en_HU': 'English (Hungary)',
+                     'en_ID': 'English (Indonesia)',
+                     'en_IL': 'English (Israel)',
+                     'en_RU': 'English (Russia)',
+                     'en_SG': 'English (Singapore)',
+                     'en_YE': 'English (Yemen)',
+                     'en_IE': 'English (Ireland)',
+                     'en_CN': 'English (China)',
+                     'en_TW': 'English (Taiwan)',
+                     'en_ZA': 'English (South Africa)',
+                     'es_PY': 'Spanish (Paraguay)',
+                     'es_UY': 'Spanish (Uruguay)',
+                     'es_AR': 'Spanish (Argentina)',
+                     'es_CR': 'Spanish (Costa Rica)',
+                     'es_MX': 'Spanish (Mexico)',
+                     'es_CU': 'Spanish (Cuba)',
+                     'es_CL': 'Spanish (Chile)',
+                     'es_EC': 'Spanish (Ecuador)',
+                     'es_HN': 'Spanish (Honduras)',
+                     'es_VE': 'Spanish (Venezuela)',
+                     'es_BO': 'Spanish (Bolivia)',
+                     'es_NI': 'Spanish (Nicaragua)',
+                     'es_CO': 'Spanish (Colombia)',
+                     'de_AT': 'German (AT)',
+                     'fr_BE': 'French (BE)',
+                     'nl': 'Dutch (NL)',
+                     'nl_BE': 'Dutch (BE)',
+                     'und': 'Unknown'}

 _lcase_map = {}
 for k in _extra_lang_codes:
@@ -219,11 +155,9 @@ def get_iso_language(lang_trans, lang):
    return lang_trans(ans)


-
-
 def calibre_langcode_to_name(lc, localize=True):
    iso639 = _load_iso639()
-    translate = _ if localize else lambda x: x
+    translate = lambda x: x
    try:
        return translate(iso639['by_3'][lc])
    except:
@@ -64,23 +64,23 @@ def titlecase(text):
                line.append(word)
                continue
            else:
-                word = icu_lower(word)
+                word = word.lower()

        if APOS_SECOND.match(word):
-            word = word.replace(word[0], icu_upper(word[0]), 1)
-            word = word[:2] + icu_upper(word[2]) + word[3:]
+            word = word.replace(word[0], word[0].upprt(), 1)
+            word = word[:2] + word[2].upper() + word[3:]
            line.append(word)
            continue
        if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
            line.append(word)
            continue
        if SMALL_WORDS.match(word):
-            line.append(icu_lower(word))
+            line.append(word.lower())
            continue

        hyphenated = []
        for item in word.split('-'):
-            hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
+            hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item))
        line.append("-".join(hyphenated))

    result = "".join(line)