mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-15 06:03:38 +01:00
Removed polyglots unicode_type usage
This commit is contained in:
@@ -2,7 +2,6 @@ import functools, re, json
|
||||
from math import ceil
|
||||
|
||||
from ebook_converter import entity_to_unicode, as_unicode
|
||||
from ebook_converter.polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@@ -72,8 +71,8 @@ def smarten_punctuation(html, log=None):
|
||||
from ebook_converter.ebooks.conversion.utils import HeuristicProcessor
|
||||
preprocessor = HeuristicProcessor(log=log)
|
||||
from uuid import uuid4
|
||||
start = 'calibre-smartypants-'+unicode_type(uuid4())
|
||||
stop = 'calibre-smartypants-'+unicode_type(uuid4())
|
||||
start = 'calibre-smartypants-'+str(uuid4())
|
||||
stop = 'calibre-smartypants-'+str(uuid4())
|
||||
html = html.replace('<!--', start)
|
||||
html = html.replace('-->', stop)
|
||||
html = preprocessor.fix_nbsp_indents(html)
|
||||
@@ -149,20 +148,20 @@ class DocAnalysis(object):
|
||||
maxLineLength=1900 # Discard larger than this to stay in range
|
||||
buckets=20 # Each line is divided into a bucket based on length
|
||||
|
||||
# print("there are "+unicode_type(len(lines))+" lines")
|
||||
# print("there are "+str(len(lines))+" lines")
|
||||
# max = 0
|
||||
# for line in self.lines:
|
||||
# l = len(line)
|
||||
# if l > max:
|
||||
# max = l
|
||||
# print("max line found is "+unicode_type(max))
|
||||
# print("max line found is "+str(max))
|
||||
# Build the line length histogram
|
||||
hRaw = [0 for i in range(0,buckets)]
|
||||
for line in self.lines:
|
||||
l = len(line)
|
||||
if l > minLineLength and l < maxLineLength:
|
||||
l = int(l // 100)
|
||||
# print("adding "+unicode_type(l))
|
||||
# print("adding "+str(l))
|
||||
hRaw[l]+=1
|
||||
|
||||
# Normalize the histogram into percents
|
||||
@@ -171,8 +170,8 @@ class DocAnalysis(object):
|
||||
h = [float(count)/totalLines for count in hRaw]
|
||||
else:
|
||||
h = []
|
||||
# print("\nhRaw histogram lengths are: "+unicode_type(hRaw))
|
||||
# print(" percents are: "+unicode_type(h)+"\n")
|
||||
# print("\nhRaw histogram lengths are: "+str(hRaw))
|
||||
# print(" percents are: "+str(h)+"\n")
|
||||
|
||||
# Find the biggest bucket
|
||||
maxValue = 0
|
||||
@@ -184,7 +183,7 @@ class DocAnalysis(object):
|
||||
# print("Line lengths are too variable. Not unwrapping.")
|
||||
return False
|
||||
else:
|
||||
# print(unicode_type(maxValue)+" of the lines were in one bucket")
|
||||
# print(str(maxValue)+" of the lines were in one bucket")
|
||||
return True
|
||||
|
||||
|
||||
@@ -220,8 +219,8 @@ class Dehyphenator(object):
|
||||
wraptags = match.group('wraptags')
|
||||
except:
|
||||
wraptags = ''
|
||||
hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf)
|
||||
dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf)
|
||||
hyphenated = str(firsthalf) + "-" + str(secondhalf)
|
||||
dehyphenated = str(firsthalf) + str(secondhalf)
|
||||
if self.suffixes.match(secondhalf) is None:
|
||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||
else:
|
||||
@@ -327,7 +326,7 @@ class CSSPreProcessor(object):
|
||||
# are commented lines before the first @import or @charset rule. Since
|
||||
# the conversion will remove all stylesheets anyway, we don't lose
|
||||
# anything
|
||||
data = re.sub(unicode_type(r'/\*.*?\*/'), '', data, flags=re.DOTALL)
|
||||
data = re.sub(str(r'/\*.*?\*/'), '', data, flags=re.DOTALL)
|
||||
|
||||
ans, namespaced = [], False
|
||||
for line in data.splitlines():
|
||||
@@ -535,7 +534,7 @@ class HTMLPreProcessor(object):
|
||||
docanalysis = DocAnalysis('pdf', html)
|
||||
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
|
||||
if length:
|
||||
# print("The pdf line length returned is " + unicode_type(length))
|
||||
# print("The pdf line length returned is " + str(length))
|
||||
# unwrap em/en dashes
|
||||
end_rules.append((re.compile(
|
||||
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))
|
||||
|
||||
Reference in New Issue
Block a user