mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-02 16:54:12 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
43 lines
1.3 KiB
Python
43 lines
1.3 KiB
Python
#!/usr/bin/env python2
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
import sys
|
|
|
|
from ebook_converter.polyglot.builtins import unicode_type
|
|
|
|
|
|
class CheckEncoding:
|
|
|
|
def __init__(self, bug_handler):
|
|
self.__bug_handler = bug_handler
|
|
|
|
def __get_position_error(self, line, encoding, line_num):
|
|
char_position = 0
|
|
for char in line:
|
|
char_position +=1
|
|
try:
|
|
char.decode(encoding)
|
|
except ValueError as msg:
|
|
sys.stderr.write('line: %s char: %s\n%s\n' % (line_num, char_position, unicode_type(msg)))
|
|
|
|
def check_encoding(self, path, encoding='us-ascii', verbose=True):
|
|
line_num = 0
|
|
with open(path, 'rb') as read_obj:
|
|
for line in read_obj:
|
|
line_num += 1
|
|
try:
|
|
line.decode(encoding)
|
|
except ValueError:
|
|
if verbose:
|
|
if len(line) < 1000:
|
|
self.__get_position_error(line, encoding, line_num)
|
|
else:
|
|
sys.stderr.write('line: %d has bad encoding\n' % line_num)
|
|
return True
|
|
return False
|
|
|
|
|
|
if __name__ == '__main__':
|
|
check_encoding_obj = CheckEncoding()
|
|
check_encoding_obj.check_encoding(sys.argv[1])
|