diff --git a/ebook_converter/ebooks/oeb/base.py b/ebook_converter/ebooks/oeb/base.py index 3aada07..f7a5193 100644 --- a/ebook_converter/ebooks/oeb/base.py +++ b/ebook_converter/ebooks/oeb/base.py @@ -6,6 +6,7 @@ from collections import defaultdict from itertools import count from operator import attrgetter import urllib.parse +import string from lxml import etree, html from ebook_converter import force_unicode @@ -763,7 +764,7 @@ class Metadata(object): key = barename(key) attrib[key] = prefixname(value, nsrmap) if namespace(self.term) == DC11_NS: - name = DC(icu_title(barename(self.term))) + name = DC(string.capwords(barename(self.term))) elem = element(dcmeta, name, attrib=attrib) elem.text = self.value else: diff --git a/ebook_converter/ebooks/oeb/polish/css.py b/ebook_converter/ebooks/oeb/polish/css.py index fe75684..ef7a5b7 100644 --- a/ebook_converter/ebooks/oeb/polish/css.py +++ b/ebook_converter/ebooks/oeb/polish/css.py @@ -93,10 +93,14 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge if num: container.dirty(name) num_merged += num - import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets} + import_map = {name: get_imported_sheets(name, container, sheets) + for name in sheets} if remove_unused_classes: - class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.items()} - style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.items()} + class_map = {name: {x.lower() for x in + classes_in_rule_list(sheet.cssRules)} + for name, sheet in sheets.items()} + style_rules = {name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) + for name, sheet in sheets.items()} num_of_removed_rules = num_of_removed_classes = 0 @@ -115,7 +119,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge num_merged += num container.dirty(name) if remove_unused_classes: - used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} + used_classes |= {x.lower() for x in classes_in_rule_list(sheet.cssRules)} imports = get_imported_sheets(name, container, sheets, sheet=sheet) for imported_sheet in imports: style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select)) @@ -147,7 +151,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False, merge for elem in root.xpath('//*[@class]'): original_classes, classes = elem.get('class', '').split(), [] for x in original_classes: - if icu_lower(x) in used_classes: + if x.lower() in used_classes: classes.append(x) if len(classes) != len(original_classes): if classes: diff --git a/ebook_converter/ebooks/oeb/polish/utils.py b/ebook_converter/ebooks/oeb/polish/utils.py index e653d26..3465a57 100644 --- a/ebook_converter/ebooks/oeb/polish/utils.py +++ b/ebook_converter/ebooks/oeb/polish/utils.py @@ -1,5 +1,6 @@ -import re, os -from bisect import bisect +import bisect +import os +import re from ebook_converter import guess_type as _guess_type, replace_entities @@ -8,6 +9,10 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' +def _upper(string): + return string.upper() + + def guess_type(x): return _guess_type(x)[0] or 'application/octet-stream' @@ -23,7 +28,8 @@ def setup_css_parser_serialization(tab_width=2): def actual_case_for_name(container, name): from ebook_converter.utils.filenames import samefile if not container.exists(name): - raise ValueError('Cannot get actual case for %s as it does not exist' % name) + raise ValueError('Cannot get actual case for %s as it does not ' + 'exist' % name) parts = name.split('/') base = '' ans = [] @@ -55,9 +61,12 @@ def corrected_case_for_name(container, name): correctx = x else: try: - candidates = {q for q in os.listdir(os.path.dirname(container.name_to_abspath(base)))} + dirname = os.path.dirname(container.name_to_abspath(base)) + candidates = {q for q in os.listdir(dirname)} except EnvironmentError: - return None # one of the non-terminal components of name is a file instead of a directory + # one of the non-terminal components of name is a file instead + # of a directory + return None for q in candidates: if q.lower() == x.lower(): correctx = q @@ -75,7 +84,7 @@ class PositionFinder(object): self.new_lines = tuple(m.start() + 1 for m in re.finditer(pat, raw)) def __call__(self, pos): - lnum = bisect(self.new_lines, pos) + lnum = bisect.bisect(self.new_lines, pos) try: offset = abs(pos - self.new_lines[lnum - 1]) except IndexError: @@ -94,7 +103,7 @@ class CommentFinder(object): def __call__(self, offset): if not self.starts: return False - q = bisect(self.starts, offset) - 1 + q = bisect.bisect(self.starts, offset) - 1 return q >= 0 and self.starts[q] <= offset <= self.ends[q] @@ -182,13 +191,16 @@ def handle_entities(text, func): return func(replace_entities(text)) -def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities): - '''Apply the specified function to individual groups in the match object (the result of re.search() or - the whole match if no groups were defined. Returns the replaced string.''' +def apply_func_to_match_groups(match, func=_upper, + handle_entities=handle_entities): + """ + Apply the specified function to individual groups in the match object (the + result of re.search() or + the whole match if no groups were defined. Returns the replaced string. + """ found_groups = False i = 0 parts, pos = [], match.start() - f = lambda text:handle_entities(text, func) while True: i += 1 try: @@ -198,19 +210,22 @@ def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_ent found_groups = True if start > -1: parts.append(match.string[pos:start]) - parts.append(f(match.string[start:end])) + parts.append(handle_entities(match.string[start:end], func)) pos = end if not found_groups: - return f(match.group()) + return handle_entities(match.group(), func) parts.append(match.string[pos:match.end()]) return ''.join(parts) -def apply_func_to_html_text(match, func=icu_upper, handle_entities=handle_entities): - ''' Apply the specified function only to text between HTML tag definitions. ''' - f = lambda text:handle_entities(text, func) +def apply_func_to_html_text(match, func=_upper, + handle_entities=handle_entities): + """ + Apply the specified function only to text between HTML tag definitions. + """ parts = re.split(r'(<[^>]+>)', match.group()) - parts = (x if x.startswith('<') else f(x) for x in parts) + parts = (x if x.startswith('<') else handle_entities(x, func) + for x in parts) return ''.join(parts) diff --git a/ebook_converter/ebooks/oeb/transforms/manglecase.py b/ebook_converter/ebooks/oeb/transforms/manglecase.py index 4e55696..67b6493 100644 --- a/ebook_converter/ebooks/oeb/transforms/manglecase.py +++ b/ebook_converter/ebooks/oeb/transforms/manglecase.py @@ -1,7 +1,10 @@ """ CSS case-mangling transform. """ +import string + from lxml import etree + from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS from ebook_converter.ebooks.oeb.base import CSS_MIME from ebook_converter.ebooks.oeb.base import namespace @@ -46,16 +49,17 @@ class CaseMangler(object): relhref = item.relhref(href) etree.SubElement(html.find(XHTML('head')), XHTML('link'), rel='stylesheet', href=relhref, type=CSS_MIME) - stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile) + stylizer = Stylizer(html, item.href, self.oeb, self.opts, + self.profile) self.mangle_elem(html.find(XHTML('body')), stylizer) def text_transform(self, transform, text): if transform == 'capitalize': - return icu_title(text) + return string.capwords(text) elif transform == 'uppercase': - return icu_upper(text) + return text.upper() elif transform == 'lowercase': - return icu_lower(text) + return text.lower() return text def split_text(self, text): diff --git a/ebook_converter/library/field_metadata.py b/ebook_converter/library/field_metadata.py index f604efe..80b245f 100644 --- a/ebook_converter/library/field_metadata.py +++ b/ebook_converter/library/field_metadata.py @@ -620,8 +620,8 @@ class FieldMetadata(object): if label in self._tb_cats: raise ValueError('Duplicate user field [%s]'%(label)) st = [label] - if icu_lower(label) != label: - st.append(icu_lower(label)) + if label.lower() != label: + st.append(label.lower()) self._tb_cats[label] = {'table':None, 'column':None, 'datatype':None, 'is_multiple':{}, 'kind':'user', 'name':name, diff --git a/ebook_converter/startup.py b/ebook_converter/startup.py index a2c5fa0..bfa6ce8 100644 --- a/ebook_converter/startup.py +++ b/ebook_converter/startup.py @@ -104,11 +104,6 @@ if not _run_once: except: pass - from ebook_converter.utils.icu import lower as icu_lower, upper as icu_upper - builtins.__dict__['icu_lower'] = icu_lower - builtins.__dict__['icu_upper'] = icu_upper - builtins.__dict__['icu_title'] = lambda s: ' '.join([x.capitalize() for x in s.split(' ')]) - def connect_lambda(bound_signal, self, func, **kw): import weakref r = weakref.ref(self) diff --git a/ebook_converter/utils/formatter_functions.py b/ebook_converter/utils/formatter_functions.py index 3ccf6e7..d3531b0 100644 --- a/ebook_converter/utils/formatter_functions.py +++ b/ebook_converter/utils/formatter_functions.py @@ -1220,10 +1220,10 @@ class BuiltinListUnion(BuiltinFormatterFunction): def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator): res = [l.strip() for l in list1.split(separator) if l.strip()] l2 = [l.strip() for l in list2.split(separator) if l.strip()] - lcl1 = {icu_lower(l) for l in res} + lcl1 = {l.lower() for l in res} for i in l2: - if icu_lower(i) not in lcl1 and i not in res: + if i.lower() not in lcl1 and i not in res: res.append(i) if separator == ',': return ', '.join(res) @@ -1241,11 +1241,11 @@ class BuiltinListDifference(BuiltinFormatterFunction): def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator): l1 = [l.strip() for l in list1.split(separator) if l.strip()] - l2 = {icu_lower(l.strip()) for l in list2.split(separator) if l.strip()} + l2 = {l.strip().lower() for l in list2.split(separator) if l.strip()} res = [] for i in l1: - if icu_lower(i) not in l2 and i not in res: + if i.lower() not in l2 and i not in res: res.append(i) if separator == ',': return ', '.join(res) @@ -1263,11 +1263,11 @@ class BuiltinListIntersection(BuiltinFormatterFunction): def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator): l1 = [l.strip() for l in list1.split(separator) if l.strip()] - l2 = {icu_lower(l.strip()) for l in list2.split(separator) if l.strip()} + l2 = {l.strip().lower() for l in list2.split(separator) if l.strip()} res = [] for i in l1: - if icu_lower(i) in l2 and i not in res: + if i.lower() in l2 and i not in res: res.append(i) if separator == ',': return ', '.join(res) @@ -1302,8 +1302,8 @@ class BuiltinListEquals(BuiltinFormatterFunction): 'The comparison is case insensitive.') def evaluate(self, formatter, kwargs, mi, locals, list1, sep1, list2, sep2, yes_val, no_val): - s1 = {icu_lower(l.strip()) for l in list1.split(sep1) if l.strip()} - s2 = {icu_lower(l.strip()) for l in list2.split(sep2) if l.strip()} + s1 = {l.strip().lower() for l in list1.split(sep1) if l.strip()} + s2 = {l.strip().lower() for l in list2.split(sep2) if l.strip()} if s1 == s2: return yes_val return no_val @@ -1426,7 +1426,7 @@ class BuiltinLanguageStrings(BuiltinFormatterFunction): retval = [] for c in [c.strip() for c in lang_codes.split(',') if c.strip()]: try: - n = calibre_langcode_to_name(c, localize != '0') + n = calibre_langcode_to_name(c) if n: retval.append(n) except: diff --git a/ebook_converter/utils/icu.py b/ebook_converter/utils/icu.py index 14ea8cf..27f5643 100644 --- a/ebook_converter/utils/icu.py +++ b/ebook_converter/utils/icu.py @@ -292,7 +292,7 @@ def partition_by_first_letter(items, reverse=False, key=lambda x:x): ans = OrderedDict() last_c, last_ordnum = ' ', 0 for item in items: - c = icu_upper(key(item) or ' ') + c = (key(item) or ' ').upper() ordnum, ordlen = collation_order(c) if last_ordnum != ordnum: if not is_narrow_build: diff --git a/ebook_converter/utils/localization.py b/ebook_converter/utils/localization.py index 2f99631..f09a695 100644 --- a/ebook_converter/utils/localization.py +++ b/ebook_converter/utils/localization.py @@ -1,27 +1,6 @@ -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import re -import io -import sys import json import pkg_resources -_available_translations = None - - -def sanitize_lang(lang): - if lang: - match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang) - if match: - lang = match.group() - if lang == 'zh': - lang = 'zh_CN' - if not lang: - lang = 'en' - return lang - def get_lang(): return 'en_US' @@ -34,121 +13,78 @@ def is_rtl(): _lang_trans = None -lcdata = { - 'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'), - 'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), - 'd_fmt': '%m/%d/%Y', - 'd_t_fmt': '%a %d %b %Y %r %Z', - 'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'), - 'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'), - 'noexpr': '^[nN].*', - 'radixchar': '.', - 't_fmt': '%r', - 't_fmt_ampm': '%I:%M:%S %p', - 'thousep': ',', - 'yesexpr': '^[yY].*' -} - - -def load_po(path): - from ebook_converter.translations.msgfmt import make - buf = io.BytesIO() - try: - make(path, buf) - except Exception: - print(('Failed to compile translations file: %s, ignoring') % path) - buf = None - else: - buf = io.BytesIO(buf.getvalue()) - return buf +lcdata = {'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'), + 'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', + 'Sep', 'Oct', 'Nov', 'Dec'), + 'd_fmt': '%m/%d/%Y', + 'd_t_fmt': '%a %d %b %Y %r %Z', + 'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday'), + 'mon': ('January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', + 'December'), + 'noexpr': '^[nN].*', + 'radixchar': '.', + 't_fmt': '%r', + 't_fmt_ampm': '%I:%M:%S %p', + 'thousep': ',', + 'yesexpr': '^[yY].*'} _iso639 = None -_extra_lang_codes = { - 'pt_BR' : 'Brazilian Portuguese', - 'en_GB' : 'English (UK)', - 'zh_CN' : 'Simplified Chinese', - 'zh_TW' : 'Traditional Chinese', - 'en' : 'English', - 'en_US' : 'English (United States)', - 'en_AR' : 'English (Argentina)', - 'en_AU' : 'English (Australia)', - 'en_JP' : 'English (Japan)', - 'en_DE' : 'English (Germany)', - 'en_BG' : 'English (Bulgaria)', - 'en_EG' : 'English (Egypt)', - 'en_NZ' : 'English (New Zealand)', - 'en_CA' : 'English (Canada)', - 'en_GR' : 'English (Greece)', - 'en_IN' : 'English (India)', - 'en_NP' : 'English (Nepal)', - 'en_TH' : 'English (Thailand)', - 'en_TR' : 'English (Turkey)', - 'en_CY' : 'English (Cyprus)', - 'en_CZ' : 'English (Czech Republic)', - 'en_PH' : 'English (Philippines)', - 'en_PK' : 'English (Pakistan)', - 'en_PL' : 'English (Poland)', - 'en_HR' : 'English (Croatia)', - 'en_HU' : 'English (Hungary)', - 'en_ID' : 'English (Indonesia)', - 'en_IL' : 'English (Israel)', - 'en_RU' : 'English (Russia)', - 'en_SG' : 'English (Singapore)', - 'en_YE' : 'English (Yemen)', - 'en_IE' : 'English (Ireland)', - 'en_CN' : 'English (China)', - 'en_TW' : 'English (Taiwan)', - 'en_ZA' : 'English (South Africa)', - 'es_PY' : 'Spanish (Paraguay)', - 'es_UY' : 'Spanish (Uruguay)', - 'es_AR' : 'Spanish (Argentina)', - 'es_CR' : 'Spanish (Costa Rica)', - 'es_MX' : 'Spanish (Mexico)', - 'es_CU' : 'Spanish (Cuba)', - 'es_CL' : 'Spanish (Chile)', - 'es_EC' : 'Spanish (Ecuador)', - 'es_HN' : 'Spanish (Honduras)', - 'es_VE' : 'Spanish (Venezuela)', - 'es_BO' : 'Spanish (Bolivia)', - 'es_NI' : 'Spanish (Nicaragua)', - 'es_CO' : 'Spanish (Colombia)', - 'de_AT' : 'German (AT)', - 'fr_BE' : 'French (BE)', - 'nl' : 'Dutch (NL)', - 'nl_BE' : 'Dutch (BE)', - 'und' : 'Unknown' - } - -if False: - # Extra strings needed for Qt - - # NOTE: Ante Meridian (i.e. like 10:00 AM) - 'AM' - # NOTE: Post Meridian (i.e. like 10:00 PM) - 'PM' - # NOTE: Ante Meridian (i.e. like 10:00 am) - 'am' - # NOTE: Post Meridian (i.e. like 10:00 pm) - 'pm' - '&Copy' - 'Select All' - 'Copy Link' - '&Select All' - 'Copy &Link Location' - '&Undo' - '&Redo' - 'Cu&t' - '&Paste' - 'Paste and Match Style' - 'Directions' - 'Left to Right' - 'Right to Left' - 'Fonts' - '&Step up' - 'Step &down' - 'Close without Saving' - 'Close Tab' +_extra_lang_codes = {'pt_BR': 'Brazilian Portuguese', + 'en_GB': 'English (UK)', + 'zh_CN': 'Simplified Chinese', + 'zh_TW': 'Traditional Chinese', + 'en': 'English', + 'en_US': 'English (United States)', + 'en_AR': 'English (Argentina)', + 'en_AU': 'English (Australia)', + 'en_JP': 'English (Japan)', + 'en_DE': 'English (Germany)', + 'en_BG': 'English (Bulgaria)', + 'en_EG': 'English (Egypt)', + 'en_NZ': 'English (New Zealand)', + 'en_CA': 'English (Canada)', + 'en_GR': 'English (Greece)', + 'en_IN': 'English (India)', + 'en_NP': 'English (Nepal)', + 'en_TH': 'English (Thailand)', + 'en_TR': 'English (Turkey)', + 'en_CY': 'English (Cyprus)', + 'en_CZ': 'English (Czech Republic)', + 'en_PH': 'English (Philippines)', + 'en_PK': 'English (Pakistan)', + 'en_PL': 'English (Poland)', + 'en_HR': 'English (Croatia)', + 'en_HU': 'English (Hungary)', + 'en_ID': 'English (Indonesia)', + 'en_IL': 'English (Israel)', + 'en_RU': 'English (Russia)', + 'en_SG': 'English (Singapore)', + 'en_YE': 'English (Yemen)', + 'en_IE': 'English (Ireland)', + 'en_CN': 'English (China)', + 'en_TW': 'English (Taiwan)', + 'en_ZA': 'English (South Africa)', + 'es_PY': 'Spanish (Paraguay)', + 'es_UY': 'Spanish (Uruguay)', + 'es_AR': 'Spanish (Argentina)', + 'es_CR': 'Spanish (Costa Rica)', + 'es_MX': 'Spanish (Mexico)', + 'es_CU': 'Spanish (Cuba)', + 'es_CL': 'Spanish (Chile)', + 'es_EC': 'Spanish (Ecuador)', + 'es_HN': 'Spanish (Honduras)', + 'es_VE': 'Spanish (Venezuela)', + 'es_BO': 'Spanish (Bolivia)', + 'es_NI': 'Spanish (Nicaragua)', + 'es_CO': 'Spanish (Colombia)', + 'de_AT': 'German (AT)', + 'fr_BE': 'French (BE)', + 'nl': 'Dutch (NL)', + 'nl_BE': 'Dutch (BE)', + 'und': 'Unknown'} _lcase_map = {} for k in _extra_lang_codes: @@ -219,11 +155,9 @@ def get_iso_language(lang_trans, lang): return lang_trans(ans) - - def calibre_langcode_to_name(lc, localize=True): iso639 = _load_iso639() - translate = _ if localize else lambda x: x + translate = lambda x: x try: return translate(iso639['by_3'][lc]) except: diff --git a/ebook_converter/utils/titlecase.py b/ebook_converter/utils/titlecase.py index 002ce40..64f5613 100644 --- a/ebook_converter/utils/titlecase.py +++ b/ebook_converter/utils/titlecase.py @@ -64,23 +64,23 @@ def titlecase(text): line.append(word) continue else: - word = icu_lower(word) + word = word.lower() if APOS_SECOND.match(word): - word = word.replace(word[0], icu_upper(word[0]), 1) - word = word[:2] + icu_upper(word[2]) + word[3:] + word = word.replace(word[0], word[0].upprt(), 1) + word = word[:2] + word[2].upper() + word[3:] line.append(word) continue if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word): line.append(word) continue if SMALL_WORDS.match(word): - line.append(icu_lower(word)) + line.append(word.lower()) continue hyphenated = [] for item in word.split('-'): - hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item)) + hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item)) line.append("-".join(hyphenated)) result = "".join(line)