mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-01 11:35:45 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
202 lines
7.7 KiB
Python
202 lines
7.7 KiB
Python
from __future__ import unicode_literals, absolute_import, print_function, division
|
|
#########################################################################
|
|
# #
|
|
# #
|
|
# copyright 2002 Paul Henry Tremblay #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
# General Public License for more details. #
|
|
# #
|
|
# #
|
|
#########################################################################
|
|
import os
|
|
from ebook_converter.ebooks.rtf2xml import copy
|
|
from ebook_converter.ptempfile import better_mktemp
|
|
from . import open_for_read, open_for_write
|
|
|
|
|
|
class ListNumbers:
|
|
"""
|
|
RTF puts list numbers outside of the paragraph. The public method
|
|
in this class put the list numbers inside the paragraphs.
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_file,
|
|
bug_handler,
|
|
copy=None,
|
|
run_level=1,
|
|
):
|
|
"""
|
|
Required:
|
|
'file'
|
|
Optional:
|
|
'copy'-- whether to make a copy of result for debugging
|
|
'temp_dir' --where to output temporary results (default is
|
|
directory from which the script is run.)
|
|
Returns:
|
|
nothing
|
|
"""
|
|
self.__file = in_file
|
|
self.__bug_handler = bug_handler
|
|
self.__copy = copy
|
|
self.__write_to = better_mktemp()
|
|
|
|
def __initiate_values(self):
|
|
"""
|
|
initiate values for fix_list_numbers.
|
|
Required:
|
|
Nothing
|
|
Return:
|
|
Nothing
|
|
"""
|
|
self.__state = "default"
|
|
self.__list_chunk = ''
|
|
self.__previous_line = ''
|
|
self.__list_text_ob_count = ''
|
|
self.__state_dict={
|
|
'default' : self.__default_func,
|
|
'after_ob' : self.__after_ob_func,
|
|
'list_text' : self.__list_text_func,
|
|
'after_list_text' : self.__after_list_text_func
|
|
}
|
|
|
|
def __after_ob_func(self, line):
|
|
"""
|
|
Handle the line immediately after an open bracket.
|
|
Required:
|
|
self, line
|
|
Returns:
|
|
Nothing
|
|
"""
|
|
if self.__token_info == 'cw<ls<list-text_':
|
|
self.__state = 'list_text'
|
|
self.__list_chunk = self.__list_chunk + \
|
|
self.__previous_line + line
|
|
self.__list_text_ob = self.__ob_count
|
|
self.__cb_count = 0
|
|
else:
|
|
self.__write_obj.write(self.__previous_line)
|
|
self.__write_obj.write(line)
|
|
self.__state = 'default'
|
|
|
|
def __after_list_text_func(self, line):
|
|
"""
|
|
Look for an open bracket or a line of text, and then print out the
|
|
self.__list_chunk. Print out the line.
|
|
"""
|
|
if line[0:2] == 'ob' or line[0:2] == 'tx':
|
|
self.__state = 'default'
|
|
self.__write_obj.write('mi<mk<lst-txbeg_\n')
|
|
self.__write_obj.write('mi<mk<para-beg__\n')
|
|
self.__write_obj.write('mi<mk<lst-tx-beg\n')
|
|
self.__write_obj.write(
|
|
# 'mi<tg<open-att__<list-text<type>%s\n' % self.__list_type)
|
|
'mi<tg<open-att__<list-text\n')
|
|
self.__write_obj.write(self.__list_chunk)
|
|
self.__write_obj.write('mi<tg<close_____<list-text\n')
|
|
self.__write_obj.write('mi<mk<lst-tx-end\n')
|
|
self.__list_chunk = ''
|
|
self.__write_obj.write(line)
|
|
|
|
def __determine_list_type(self, chunk):
|
|
"""
|
|
Determine if the list is ordered or itemized
|
|
"""
|
|
lines = chunk.split('\n')
|
|
text_string = ''
|
|
for line in lines:
|
|
if line[0:5] == 'tx<hx':
|
|
if line[17:] == '\'B7':
|
|
return "unordered"
|
|
elif line[0:5] == 'tx<nu':
|
|
text_string += line[17:]
|
|
text_string = text_string.replace('.', '')
|
|
text_string = text_string.replace('(', '')
|
|
text_string = text_string.replace(')', '')
|
|
if text_string.isdigit():
|
|
return 'ordered'
|
|
"""
|
|
sys.stderr.write('module is list_numbers\n')
|
|
sys.stderr.write('method is __determine type\n')
|
|
sys.stderr.write('Couldn\'t get type of list\n')
|
|
"""
|
|
# must be some type of ordered list -- just a guess!
|
|
return 'unordered'
|
|
|
|
def __list_text_func(self, line):
|
|
"""
|
|
Handle lines that are part of the list text. If the end of the list
|
|
text is found (the closing bracket matches the self.__list_text_ob),
|
|
then change the state. Always add the line to the self.__list_chunk
|
|
Required:
|
|
self, line
|
|
Returns:
|
|
Nothing
|
|
"""
|
|
if self.__list_text_ob == self.__cb_count:
|
|
self.__state = 'after_list_text'
|
|
self.__right_after_list_text = 1
|
|
self.__list_type = self.__determine_list_type(self.__list_chunk)
|
|
self.__write_obj.write('mi<mk<list-type_<%s\n' % self.__list_type)
|
|
if self.__token_info != 'cw<pf<par-def___':
|
|
self.__list_chunk = self.__list_chunk + line
|
|
|
|
def __default_func(self, line):
|
|
"""
|
|
Handle the lines that are not part of any special state. Look for an
|
|
opening bracket. If an open bracket is found, add this line to a
|
|
temporary self.__previous line, which other methods need. Otherwise,
|
|
print out the line.
|
|
Required:
|
|
self, line
|
|
Returns:
|
|
Nothing
|
|
"""
|
|
if self.__token_info == 'ob<nu<open-brack':
|
|
self.__state = 'after_ob'
|
|
self.__previous_line = line
|
|
else:
|
|
self.__write_obj.write(line)
|
|
|
|
def fix_list_numbers(self):
|
|
"""
|
|
Required:
|
|
nothing
|
|
Returns:
|
|
original file will be changed
|
|
Logic:
|
|
Read in one line a time from the file. Keep track of opening and
|
|
closing brackets. Determine the method ('action') by passing the
|
|
state to the self.__state_dict.
|
|
Simply print out the line to a temp file until an open bracket
|
|
is found. Check the next line. If it is list-text, then start
|
|
adding to the self.__list_chunk until the closing bracket is
|
|
found.
|
|
Next, look for an open bracket or text. When either is found,
|
|
print out self.__list_chunk and the line.
|
|
"""
|
|
self.__initiate_values()
|
|
read_obj = open_for_read(self.__file)
|
|
self.__write_obj = open_for_write(self.__write_to)
|
|
line_to_read = 1
|
|
while line_to_read:
|
|
line_to_read = read_obj.readline()
|
|
line = line_to_read
|
|
self.__token_info = line[:16]
|
|
if self.__token_info == 'ob<nu<open-brack':
|
|
self.__ob_count = line[-5:-1]
|
|
if self.__token_info == 'cb<nu<clos-brack':
|
|
self.__cb_count = line[-5:-1]
|
|
action = self.__state_dict.get(self.__state)
|
|
action(line)
|
|
read_obj.close()
|
|
self.__write_obj.close()
|
|
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
|
|
if self.__copy:
|
|
copy_obj.copy_file(self.__write_to, "list_numbers.data")
|
|
copy_obj.rename(self.__write_to, self.__file)
|
|
os.remove(self.__write_to)
|