mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-02 08:32:26 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
262 lines
10 KiB
Python
262 lines
10 KiB
Python
from __future__ import unicode_literals, absolute_import, print_function, division
|
|
#########################################################################
|
|
# #
|
|
# #
|
|
# copyright 2002 Paul Henry Tremblay #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
# General Public License for more details. #
|
|
# #
|
|
# #
|
|
#########################################################################
|
|
import sys, os
|
|
|
|
from ebook_converter.ebooks.rtf2xml import copy
|
|
from ebook_converter.ptempfile import better_mktemp
|
|
from . import open_for_read, open_for_write
|
|
|
|
|
|
class Header:
|
|
"""
|
|
Two public methods are available. The first separates all of the headers
|
|
and footers from the body and puts them at the bottom of the text, where
|
|
they are easier to process. The second joins those headers and footers to
|
|
the proper places in the body.
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_file ,
|
|
bug_handler,
|
|
copy=None,
|
|
run_level=1,
|
|
):
|
|
self.__file = in_file
|
|
self.__bug_handler = bug_handler
|
|
self.__copy = copy
|
|
self.__write_to = better_mktemp()
|
|
self.__found_a_header = False
|
|
|
|
def __in_header_func(self, line):
|
|
"""
|
|
Handle all tokens that are part of header
|
|
"""
|
|
if self.__cb_count == self.__header_bracket_count:
|
|
self.__in_header = False
|
|
self.__write_obj.write(line)
|
|
self.__write_to_head_obj.write(
|
|
'mi<mk<head___clo\n'
|
|
'mi<tg<close_____<header-or-footer\n'
|
|
'mi<mk<header-clo\n')
|
|
else:
|
|
self.__write_to_head_obj.write(line)
|
|
|
|
def __found_header(self, line):
|
|
"""
|
|
Found a header
|
|
"""
|
|
# but this could be header or footer
|
|
self.__found_a_header = True
|
|
self.__in_header = True
|
|
self.__header_count += 1
|
|
# temporarily set this to zero so I can enter loop
|
|
self.__cb_count = 0
|
|
self.__header_bracket_count = self.__ob_count
|
|
self.__write_obj.write(
|
|
'mi<mk<header-ind<%04d\n' % self.__header_count)
|
|
self.__write_to_head_obj.write(
|
|
'mi<mk<header-ope<%04d\n' % self.__header_count)
|
|
info = line[6:16]
|
|
type = self.__head_dict.get(info)
|
|
if type:
|
|
self.__write_to_head_obj.write(
|
|
'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
|
|
)
|
|
else:
|
|
sys.stderr.write(
|
|
'module is header\n'
|
|
'method is __found_header\n'
|
|
'no dict entry\n'
|
|
'line is %s' % line)
|
|
self.__write_to_head_obj.write(
|
|
'mi<tg<open-att__<header-or-footer<type>none\n'
|
|
)
|
|
|
|
def __default_sep(self, line):
|
|
"""
|
|
Handle all tokens that are not header tokens
|
|
"""
|
|
if self.__token_info[3:5] == 'hf':
|
|
self.__found_header(line)
|
|
self.__write_obj.write(line)
|
|
|
|
def __initiate_sep_values(self):
|
|
"""
|
|
initiate counters for separate_footnotes method.
|
|
"""
|
|
self.__bracket_count=0
|
|
self.__ob_count = 0
|
|
self.__cb_count = 0
|
|
self.__header_bracket_count = 0
|
|
self.__in_header = False
|
|
self.__header_count = 0
|
|
self.__head_dict = {
|
|
'head-left_' : ('header-left'),
|
|
'head-right' : ('header-right'),
|
|
'foot-left_' : ('footer-left'),
|
|
'foot-right' : ('footer-right'),
|
|
'head-first' : ('header-first'),
|
|
'foot-first' : ('footer-first'),
|
|
'header____' : ('header'),
|
|
'footer____' : ('footer'),
|
|
}
|
|
|
|
def separate_headers(self):
|
|
"""
|
|
Separate all the footnotes in an RTF file and put them at the bottom,
|
|
where they are easier to process. Each time a footnote is found,
|
|
print all of its contents to a temporary file. Close both the main and
|
|
temporary file. Print the footnotes from the temporary file to the
|
|
bottom of the main file.
|
|
"""
|
|
self.__initiate_sep_values()
|
|
self.__header_holder = better_mktemp()
|
|
with open_for_read(self.__file) as read_obj:
|
|
with open_for_write(self.__write_to) as self.__write_obj:
|
|
with open_for_write(self.__header_holder) as self.__write_to_head_obj:
|
|
for line in read_obj:
|
|
self.__token_info = line[:16]
|
|
# keep track of opening and closing brackets
|
|
if self.__token_info == 'ob<nu<open-brack':
|
|
self.__ob_count = line[-5:-1]
|
|
if self.__token_info == 'cb<nu<clos-brack':
|
|
self.__cb_count = line[-5:-1]
|
|
# In the middle of footnote text
|
|
if self.__in_header:
|
|
self.__in_header_func(line)
|
|
# not in the middle of footnote text
|
|
else:
|
|
self.__default_sep(line)
|
|
|
|
with open_for_read(self.__header_holder) as read_obj:
|
|
with open_for_write(self.__write_to, append=True) as write_obj:
|
|
write_obj.write(
|
|
'mi<mk<header-beg\n')
|
|
for line in read_obj:
|
|
write_obj.write(line)
|
|
write_obj.write(
|
|
'mi<mk<header-end\n')
|
|
os.remove(self.__header_holder)
|
|
|
|
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
|
|
if self.__copy:
|
|
copy_obj.copy_file(self.__write_to, "header_separate.data")
|
|
copy_obj.rename(self.__write_to, self.__file)
|
|
os.remove(self.__write_to)
|
|
|
|
def update_info(self, file, copy):
|
|
"""
|
|
Unused method
|
|
"""
|
|
self.__file = file
|
|
self.__copy = copy
|
|
|
|
def __get_head_body_func(self, line):
|
|
"""
|
|
Process lines in main body and look for beginning of headers.
|
|
"""
|
|
# mi<mk<footnt-end
|
|
if self.__token_info == 'mi<mk<header-beg':
|
|
self.__state = 'head'
|
|
else:
|
|
self.__write_obj.write(line)
|
|
|
|
def __get_head_head_func(self, line):
|
|
"""
|
|
Copy headers and footers from bottom of file to a separate, temporary file.
|
|
"""
|
|
if self.__token_info == 'mi<mk<header-end':
|
|
self.__state = 'body'
|
|
else:
|
|
self.__write_to_head_obj.write(line)
|
|
|
|
def __get_headers(self):
|
|
"""
|
|
Private method to remove footnotes from main file. Read one line from
|
|
the main file at a time. If the state is 'body', call on the private
|
|
__get_foot_foot_func. Otherwise, call on the __get_foot_body_func.
|
|
These two functions do the work of separating the footnotes form the
|
|
body.
|
|
"""
|
|
with open_for_read(self.__file) as read_obj:
|
|
with open_for_write(self.__write_to) as self.__write_obj:
|
|
with open_for_write(self.__header_holder) as self.__write_to_head_obj:
|
|
for line in read_obj:
|
|
self.__token_info = line[:16]
|
|
if self.__state == 'body':
|
|
self.__get_head_body_func(line)
|
|
elif self.__state == 'head':
|
|
self.__get_head_head_func(line)
|
|
|
|
def __get_head_from_temp(self, num):
|
|
"""
|
|
Private method for joining headers and footers to body. This method
|
|
reads from the temporary file until the proper footnote marker is
|
|
found. It collects all the tokens until the end of the footnote, and
|
|
returns them as a string.
|
|
"""
|
|
look_for = 'mi<mk<header-ope<' + num + '\n'
|
|
found_head = False
|
|
string_to_return = ''
|
|
for line in self.__read_from_head_obj:
|
|
if found_head:
|
|
if line == 'mi<mk<header-clo\n':
|
|
return string_to_return
|
|
string_to_return += line
|
|
else:
|
|
if line == look_for:
|
|
found_head = True
|
|
|
|
def __join_from_temp(self):
|
|
"""
|
|
Private method for rejoining footnotes to body. Read from the
|
|
newly-created, temporary file that contains the body text but no
|
|
footnotes. Each time a footnote marker is found, call the private
|
|
method __get_foot_from_temp(). This method will return a string to
|
|
print out to the third file.
|
|
If no footnote marker is found, simply print out the token (line).
|
|
"""
|
|
self.__read_from_head_obj = open_for_read(self.__header_holder)
|
|
self.__write_obj = open_for_write(self.__write_to2)
|
|
with open_for_read(self.__write_to) as read_obj:
|
|
for line in read_obj:
|
|
if line[:16] == 'mi<mk<header-ind':
|
|
line = self.__get_head_from_temp(line[17:-1])
|
|
self.__write_obj.write(line)
|
|
|
|
def join_headers(self):
|
|
"""
|
|
Join the footnotes from the bottom of the file and put them in their
|
|
former places. First, remove the footnotes from the bottom of the
|
|
input file, outputting them to a temporary file. This creates two new
|
|
files, one without footnotes, and one of just footnotes. Open both
|
|
these files to read. When a marker is found in the main file, find the
|
|
corresponding marker in the footnote file. Output the mix of body and
|
|
footnotes to a third file.
|
|
"""
|
|
if not self.__found_a_header:
|
|
return
|
|
self.__write_to2 = better_mktemp()
|
|
self.__state = 'body'
|
|
self.__get_headers()
|
|
self.__join_from_temp()
|
|
self.__write_obj.close()
|
|
self.__read_from_head_obj.close()
|
|
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
|
|
if self.__copy:
|
|
copy_obj.copy_file(self.__write_to, "header_join.data")
|
|
copy_obj.rename(self.__write_to, self.__file)
|
|
os.remove(self.__write_to)
|
|
os.remove(self.__header_holder)
|