mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-31 10:55:44 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
228 lines
8.2 KiB
Python
228 lines
8.2 KiB
Python
from __future__ import unicode_literals, absolute_import, print_function, division
|
|
#########################################################################
|
|
# #
|
|
# #
|
|
# copyright 2002 Paul Henry Tremblay #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
# General Public License for more details. #
|
|
# #
|
|
# #
|
|
#########################################################################
|
|
import os, re
|
|
from ebook_converter.ebooks.rtf2xml import copy
|
|
from ebook_converter.ptempfile import better_mktemp
|
|
from . import open_for_read, open_for_write
|
|
|
|
|
|
class HeadingsToSections:
|
|
"""
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_file,
|
|
bug_handler,
|
|
copy=None,
|
|
run_level=1,
|
|
):
|
|
"""
|
|
Required:
|
|
'file'
|
|
Optional:
|
|
'copy'-- whether to make a copy of result for debugging
|
|
'temp_dir' --where to output temporary results (default is
|
|
directory from which the script is run.)
|
|
Returns:
|
|
nothing
|
|
"""
|
|
self.__file = in_file
|
|
self.__bug_handler = bug_handler
|
|
self.__copy = copy
|
|
self.__write_to = better_mktemp()
|
|
|
|
def __initiate_values(self):
|
|
"""
|
|
Required:
|
|
Nothing
|
|
Return:
|
|
Nothing
|
|
Logic:
|
|
The self.__end_list is a list of tokens that will force a list to end.
|
|
Likewise, the self.__end_lines is a list of lines that forces a list to end.
|
|
"""
|
|
self.__state = "default"
|
|
self.__all_sections = []
|
|
self.__chunk = ''
|
|
self.__state_dict={
|
|
'default' : self.__default_func,
|
|
'in_table' : self.__in_table_func,
|
|
'in_list' : self.__in_list_func,
|
|
'after_body' : self.__after_body_func,
|
|
}
|
|
self.__list_depth = 0
|
|
self.__end_list = [
|
|
'mi<mk<body-close',
|
|
# changed 2004-04-26
|
|
# 'mi<mk<par-in-fld',
|
|
'mi<mk<sect-close', # right before close of section
|
|
'mi<mk<sect-start', # right before section start
|
|
# this should be sect-close!
|
|
# 'mi<mk<header-beg',
|
|
# 'mi<mk<header-end',
|
|
# 'mi<mk<head___clo',
|
|
#
|
|
# changed 2004-04-26
|
|
# 'mi<mk<fldbk-end_',
|
|
# 'mi<mk<sec-fd-beg',
|
|
]
|
|
self.__headings = [
|
|
'heading 1', 'heading 2', 'heading 3', 'heading 4',
|
|
'heading 5', 'heading 6', 'heading 7', 'heading 8',
|
|
'heading 9'
|
|
]
|
|
self.__section_num = [0]
|
|
self.__id_regex = re.compile(r'\<list-id\>(\d+)')
|
|
|
|
def __close_lists(self):
|
|
"""
|
|
Required:
|
|
Nothing
|
|
Return:
|
|
Nothing
|
|
Logic:
|
|
Reverse the list of dictionaries. Iterate through the list and
|
|
get the indent for each list. If the current indent is less than
|
|
or equal to the indent in the dictionary, close that level.
|
|
Keep track of how many levels you close. Reduce the list by that
|
|
many levels.
|
|
Reverse the list again.
|
|
"""
|
|
current_indent = self.__left_indent
|
|
self.__all_lists.reverse()
|
|
num_levels_closed = 0
|
|
for the_dict in self.__all_lists:
|
|
list_indent = the_dict.get('left-indent')
|
|
if current_indent <= list_indent:
|
|
self.__write_end_item()
|
|
self.__write_end_list()
|
|
num_levels_closed += 1
|
|
self.__all_lists = self.__all_lists[num_levels_closed:]
|
|
self.__all_lists.reverse()
|
|
|
|
def __close_sections(self, current_level):
|
|
self.__all_sections.reverse()
|
|
num_levels_closed = 0
|
|
for level in self.__all_sections:
|
|
if current_level <= level:
|
|
self.__write_end_section()
|
|
num_levels_closed += 1
|
|
self.__all_sections = self.__all_sections[num_levels_closed:]
|
|
self.__all_sections.reverse()
|
|
|
|
def __write_start_section(self, current_level, name):
|
|
section_num = ''
|
|
for the_num in self.__section_num:
|
|
section_num += '%s.' % the_num
|
|
section_num = section_num[:-1]
|
|
num_in_level = len(self.__all_sections)
|
|
num_in_level = self.__section_num[num_in_level]
|
|
level = len(self.__all_sections)
|
|
self.__write_obj.write(
|
|
'mi<mk<sect-start\n'
|
|
)
|
|
self.__write_obj.write(
|
|
'mi<tg<open-att__<section<num>%s<num-in-level>%s<level>%s'
|
|
'<type>%s\n'
|
|
% (section_num, num_in_level, level, name)
|
|
)
|
|
|
|
def __write_end_section(self):
|
|
self.__write_obj.write('mi<mk<sect-close\n')
|
|
self.__write_obj.write('mi<tg<close_____<section\n')
|
|
|
|
def __default_func(self, line):
|
|
"""
|
|
Required:
|
|
self, line
|
|
Returns:
|
|
Nothing
|
|
Logic
|
|
Look for the start of a paragraph defintion. If one is found, check if
|
|
it contains a list-id. If it does, start a list. Change the state to
|
|
in_pard.
|
|
"""
|
|
if self.__token_info == 'mi<mk<sect-start':
|
|
self.__section_num[0] += 1
|
|
self.__section_num = self.__section_num[0:1]
|
|
if self.__token_info == 'mi<mk<tabl-start':
|
|
self.__state = 'in_table'
|
|
elif self.__token_info == 'mi<mk<list_start':
|
|
self.__state = 'in_list'
|
|
self.__list_depth += 1
|
|
elif self.__token_info in self.__end_list:
|
|
self.__close_sections(0)
|
|
elif self.__token_info == 'mi<mk<style-name':
|
|
name = line[17:-1]
|
|
if name in self.__headings:
|
|
self.__handle_heading(name)
|
|
if self.__token_info == 'mi<mk<body-close':
|
|
self.__state = 'after_body'
|
|
self.__write_obj.write(line)
|
|
|
|
def __handle_heading(self, name):
|
|
num = self.__headings.index(name) + 1
|
|
self.__close_sections(num)
|
|
self.__all_sections.append(num)
|
|
level_depth = len(self.__all_sections) + 1
|
|
self.__section_num = self.__section_num[:level_depth]
|
|
if len(self.__section_num) < level_depth:
|
|
self.__section_num.append(1)
|
|
else:
|
|
self.__section_num[-1] += 1
|
|
self.__write_start_section(num, name)
|
|
|
|
def __in_table_func(self, line):
|
|
if self.__token_info == 'mi<mk<table-end_':
|
|
self.__state = 'default'
|
|
self.__write_obj.write(line)
|
|
|
|
def __in_list_func(self, line):
|
|
if self.__token_info == 'mi<mk<list_close':
|
|
self.__list_depth -= 1
|
|
elif self.__token_info == 'mi<mk<list_start':
|
|
self.__list_depth += 1
|
|
if self.__list_depth == 0:
|
|
self.__state = 'default'
|
|
self.__write_obj.write(line)
|
|
|
|
def __after_body_func(self, line):
|
|
self.__write_obj.write(line)
|
|
|
|
def make_sections(self):
|
|
"""
|
|
Required:
|
|
nothing
|
|
Returns:
|
|
original file will be changed
|
|
Logic:
|
|
"""
|
|
self.__initiate_values()
|
|
read_obj = open_for_read(self.__file)
|
|
self.__write_obj = open_for_write(self.__write_to)
|
|
line_to_read = 1
|
|
while line_to_read:
|
|
line_to_read = read_obj.readline()
|
|
line = line_to_read
|
|
self.__token_info = line[:16]
|
|
action = self.__state_dict.get(self.__state)
|
|
action(line)
|
|
read_obj.close()
|
|
self.__write_obj.close()
|
|
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
|
|
if self.__copy:
|
|
copy_obj.copy_file(self.__write_to, "sections_to_headings.data")
|
|
copy_obj.rename(self.__write_to, self.__file)
|
|
os.remove(self.__write_to)
|