1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-10 15:13:35 +02:00

Get rid of icu string functions in favor of native ones.

This commit is contained in:
2020-05-03 20:19:11 +02:00
parent 212cb56d42
commit da010d7841
10 changed files with 138 additions and 185 deletions

View File

@@ -1220,10 +1220,10 @@ class BuiltinListUnion(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):
res = [l.strip() for l in list1.split(separator) if l.strip()]
l2 = [l.strip() for l in list2.split(separator) if l.strip()]
lcl1 = {icu_lower(l) for l in res}
lcl1 = {l.lower() for l in res}
for i in l2:
if icu_lower(i) not in lcl1 and i not in res:
if i.lower() not in lcl1 and i not in res:
res.append(i)
if separator == ',':
return ', '.join(res)
@@ -1241,11 +1241,11 @@ class BuiltinListDifference(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):
l1 = [l.strip() for l in list1.split(separator) if l.strip()]
l2 = {icu_lower(l.strip()) for l in list2.split(separator) if l.strip()}
l2 = {l.strip().lower() for l in list2.split(separator) if l.strip()}
res = []
for i in l1:
if icu_lower(i) not in l2 and i not in res:
if i.lower() not in l2 and i not in res:
res.append(i)
if separator == ',':
return ', '.join(res)
@@ -1263,11 +1263,11 @@ class BuiltinListIntersection(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):
l1 = [l.strip() for l in list1.split(separator) if l.strip()]
l2 = {icu_lower(l.strip()) for l in list2.split(separator) if l.strip()}
l2 = {l.strip().lower() for l in list2.split(separator) if l.strip()}
res = []
for i in l1:
if icu_lower(i) in l2 and i not in res:
if i.lower() in l2 and i not in res:
res.append(i)
if separator == ',':
return ', '.join(res)
@@ -1302,8 +1302,8 @@ class BuiltinListEquals(BuiltinFormatterFunction):
'The comparison is case insensitive.')
def evaluate(self, formatter, kwargs, mi, locals, list1, sep1, list2, sep2, yes_val, no_val):
s1 = {icu_lower(l.strip()) for l in list1.split(sep1) if l.strip()}
s2 = {icu_lower(l.strip()) for l in list2.split(sep2) if l.strip()}
s1 = {l.strip().lower() for l in list1.split(sep1) if l.strip()}
s2 = {l.strip().lower() for l in list2.split(sep2) if l.strip()}
if s1 == s2:
return yes_val
return no_val
@@ -1426,7 +1426,7 @@ class BuiltinLanguageStrings(BuiltinFormatterFunction):
retval = []
for c in [c.strip() for c in lang_codes.split(',') if c.strip()]:
try:
n = calibre_langcode_to_name(c, localize != '0')
n = calibre_langcode_to_name(c)
if n:
retval.append(n)
except:

View File

@@ -292,7 +292,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x):
ans = OrderedDict()
last_c, last_ordnum = ' ', 0
for item in items:
c = icu_upper(key(item) or ' ')
c = (key(item) or ' ').upper()
ordnum, ordlen = collation_order(c)
if last_ordnum != ordnum:
if not is_narrow_build:

View File

@@ -1,27 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
import io
import sys
import json
import pkg_resources
_available_translations = None
def sanitize_lang(lang):
if lang:
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
if match:
lang = match.group()
if lang == 'zh':
lang = 'zh_CN'
if not lang:
lang = 'en'
return lang
def get_lang():
return 'en_US'
@@ -34,121 +13,78 @@ def is_rtl():
_lang_trans = None
lcdata = {
'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'),
'd_fmt': '%m/%d/%Y',
'd_t_fmt': '%a %d %b %Y %r %Z',
'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'),
'noexpr': '^[nN].*',
'radixchar': '.',
't_fmt': '%r',
't_fmt_ampm': '%I:%M:%S %p',
'thousep': ',',
'yesexpr': '^[yY].*'
}
def load_po(path):
from ebook_converter.translations.msgfmt import make
buf = io.BytesIO()
try:
make(path, buf)
except Exception:
print(('Failed to compile translations file: %s, ignoring') % path)
buf = None
else:
buf = io.BytesIO(buf.getvalue())
return buf
lcdata = {'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
'Sep', 'Oct', 'Nov', 'Dec'),
'd_fmt': '%m/%d/%Y',
'd_t_fmt': '%a %d %b %Y %r %Z',
'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday'),
'mon': ('January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November',
'December'),
'noexpr': '^[nN].*',
'radixchar': '.',
't_fmt': '%r',
't_fmt_ampm': '%I:%M:%S %p',
'thousep': ',',
'yesexpr': '^[yY].*'}
_iso639 = None
_extra_lang_codes = {
'pt_BR' : 'Brazilian Portuguese',
'en_GB' : 'English (UK)',
'zh_CN' : 'Simplified Chinese',
'zh_TW' : 'Traditional Chinese',
'en' : 'English',
'en_US' : 'English (United States)',
'en_AR' : 'English (Argentina)',
'en_AU' : 'English (Australia)',
'en_JP' : 'English (Japan)',
'en_DE' : 'English (Germany)',
'en_BG' : 'English (Bulgaria)',
'en_EG' : 'English (Egypt)',
'en_NZ' : 'English (New Zealand)',
'en_CA' : 'English (Canada)',
'en_GR' : 'English (Greece)',
'en_IN' : 'English (India)',
'en_NP' : 'English (Nepal)',
'en_TH' : 'English (Thailand)',
'en_TR' : 'English (Turkey)',
'en_CY' : 'English (Cyprus)',
'en_CZ' : 'English (Czech Republic)',
'en_PH' : 'English (Philippines)',
'en_PK' : 'English (Pakistan)',
'en_PL' : 'English (Poland)',
'en_HR' : 'English (Croatia)',
'en_HU' : 'English (Hungary)',
'en_ID' : 'English (Indonesia)',
'en_IL' : 'English (Israel)',
'en_RU' : 'English (Russia)',
'en_SG' : 'English (Singapore)',
'en_YE' : 'English (Yemen)',
'en_IE' : 'English (Ireland)',
'en_CN' : 'English (China)',
'en_TW' : 'English (Taiwan)',
'en_ZA' : 'English (South Africa)',
'es_PY' : 'Spanish (Paraguay)',
'es_UY' : 'Spanish (Uruguay)',
'es_AR' : 'Spanish (Argentina)',
'es_CR' : 'Spanish (Costa Rica)',
'es_MX' : 'Spanish (Mexico)',
'es_CU' : 'Spanish (Cuba)',
'es_CL' : 'Spanish (Chile)',
'es_EC' : 'Spanish (Ecuador)',
'es_HN' : 'Spanish (Honduras)',
'es_VE' : 'Spanish (Venezuela)',
'es_BO' : 'Spanish (Bolivia)',
'es_NI' : 'Spanish (Nicaragua)',
'es_CO' : 'Spanish (Colombia)',
'de_AT' : 'German (AT)',
'fr_BE' : 'French (BE)',
'nl' : 'Dutch (NL)',
'nl_BE' : 'Dutch (BE)',
'und' : 'Unknown'
}
if False:
# Extra strings needed for Qt
# NOTE: Ante Meridian (i.e. like 10:00 AM)
'AM'
# NOTE: Post Meridian (i.e. like 10:00 PM)
'PM'
# NOTE: Ante Meridian (i.e. like 10:00 am)
'am'
# NOTE: Post Meridian (i.e. like 10:00 pm)
'pm'
'&Copy'
'Select All'
'Copy Link'
'&Select All'
'Copy &Link Location'
'&Undo'
'&Redo'
'Cu&t'
'&Paste'
'Paste and Match Style'
'Directions'
'Left to Right'
'Right to Left'
'Fonts'
'&Step up'
'Step &down'
'Close without Saving'
'Close Tab'
_extra_lang_codes = {'pt_BR': 'Brazilian Portuguese',
'en_GB': 'English (UK)',
'zh_CN': 'Simplified Chinese',
'zh_TW': 'Traditional Chinese',
'en': 'English',
'en_US': 'English (United States)',
'en_AR': 'English (Argentina)',
'en_AU': 'English (Australia)',
'en_JP': 'English (Japan)',
'en_DE': 'English (Germany)',
'en_BG': 'English (Bulgaria)',
'en_EG': 'English (Egypt)',
'en_NZ': 'English (New Zealand)',
'en_CA': 'English (Canada)',
'en_GR': 'English (Greece)',
'en_IN': 'English (India)',
'en_NP': 'English (Nepal)',
'en_TH': 'English (Thailand)',
'en_TR': 'English (Turkey)',
'en_CY': 'English (Cyprus)',
'en_CZ': 'English (Czech Republic)',
'en_PH': 'English (Philippines)',
'en_PK': 'English (Pakistan)',
'en_PL': 'English (Poland)',
'en_HR': 'English (Croatia)',
'en_HU': 'English (Hungary)',
'en_ID': 'English (Indonesia)',
'en_IL': 'English (Israel)',
'en_RU': 'English (Russia)',
'en_SG': 'English (Singapore)',
'en_YE': 'English (Yemen)',
'en_IE': 'English (Ireland)',
'en_CN': 'English (China)',
'en_TW': 'English (Taiwan)',
'en_ZA': 'English (South Africa)',
'es_PY': 'Spanish (Paraguay)',
'es_UY': 'Spanish (Uruguay)',
'es_AR': 'Spanish (Argentina)',
'es_CR': 'Spanish (Costa Rica)',
'es_MX': 'Spanish (Mexico)',
'es_CU': 'Spanish (Cuba)',
'es_CL': 'Spanish (Chile)',
'es_EC': 'Spanish (Ecuador)',
'es_HN': 'Spanish (Honduras)',
'es_VE': 'Spanish (Venezuela)',
'es_BO': 'Spanish (Bolivia)',
'es_NI': 'Spanish (Nicaragua)',
'es_CO': 'Spanish (Colombia)',
'de_AT': 'German (AT)',
'fr_BE': 'French (BE)',
'nl': 'Dutch (NL)',
'nl_BE': 'Dutch (BE)',
'und': 'Unknown'}
_lcase_map = {}
for k in _extra_lang_codes:
@@ -219,11 +155,9 @@ def get_iso_language(lang_trans, lang):
return lang_trans(ans)
def calibre_langcode_to_name(lc, localize=True):
iso639 = _load_iso639()
translate = _ if localize else lambda x: x
translate = lambda x: x
try:
return translate(iso639['by_3'][lc])
except:

View File

@@ -64,23 +64,23 @@ def titlecase(text):
line.append(word)
continue
else:
word = icu_lower(word)
word = word.lower()
if APOS_SECOND.match(word):
word = word.replace(word[0], icu_upper(word[0]), 1)
word = word[:2] + icu_upper(word[2]) + word[3:]
word = word.replace(word[0], word[0].upprt(), 1)
word = word[:2] + word[2].upper() + word[3:]
line.append(word)
continue
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
line.append(word)
continue
if SMALL_WORDS.match(word):
line.append(icu_lower(word))
line.append(word.lower())
continue
hyphenated = []
for item in word.split('-'):
hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item))
line.append("-".join(hyphenated))
result = "".join(line)