mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-23 10:35:49 +01:00
Initial import
This commit is contained in:
42
ebook_converter/ebooks/rtf2xml/check_encoding.py
Normal file
42
ebook_converter/ebooks/rtf2xml/check_encoding.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python2
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import sys
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class CheckEncoding:
|
||||
|
||||
def __init__(self, bug_handler):
|
||||
self.__bug_handler = bug_handler
|
||||
|
||||
def __get_position_error(self, line, encoding, line_num):
|
||||
char_position = 0
|
||||
for char in line:
|
||||
char_position +=1
|
||||
try:
|
||||
char.decode(encoding)
|
||||
except ValueError as msg:
|
||||
sys.stderr.write('line: %s char: %s\n%s\n' % (line_num, char_position, unicode_type(msg)))
|
||||
|
||||
def check_encoding(self, path, encoding='us-ascii', verbose=True):
|
||||
line_num = 0
|
||||
with open(path, 'rb') as read_obj:
|
||||
for line in read_obj:
|
||||
line_num += 1
|
||||
try:
|
||||
line.decode(encoding)
|
||||
except ValueError:
|
||||
if verbose:
|
||||
if len(line) < 1000:
|
||||
self.__get_position_error(line, encoding, line_num)
|
||||
else:
|
||||
sys.stderr.write('line: %d has bad encoding\n' % line_num)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
check_encoding_obj = CheckEncoding()
|
||||
check_encoding_obj.check_encoding(sys.argv[1])
|
||||
Reference in New Issue
Block a user