mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-05 02:24:11 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
724 lines
27 KiB
Python
724 lines
27 KiB
Python
from __future__ import unicode_literals, absolute_import, print_function, division
|
|
#########################################################################
|
|
# #
|
|
# #
|
|
# copyright 2002 Paul Henry Tremblay #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
# General Public License for more details. #
|
|
# #
|
|
# #
|
|
#########################################################################
|
|
import sys, os
|
|
from ebook_converter.ebooks.rtf2xml import copy, border_parse
|
|
from ebook_converter.ptempfile import better_mktemp
|
|
from . import open_for_read, open_for_write
|
|
|
|
|
|
class Styles:
|
|
"""
|
|
Change lines with style numbers to actual style names.
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_file,
|
|
bug_handler,
|
|
copy=None,
|
|
run_level=1,
|
|
):
|
|
"""
|
|
Required:
|
|
'file'--file to parse
|
|
Optional:
|
|
'copy'-- whether to make a copy of result for debugging
|
|
'temp_dir' --where to output temporary results (default is
|
|
directory from which the script is run.)
|
|
Returns:
|
|
nothing
|
|
"""
|
|
self.__file = in_file
|
|
self.__bug_handler = bug_handler
|
|
self.__copy = copy
|
|
self.__write_to = better_mktemp()
|
|
self.__run_level = run_level
|
|
|
|
def __initiate_values(self):
|
|
"""
|
|
Initiate all values.
|
|
"""
|
|
self.__border_obj = border_parse.BorderParse()
|
|
self.__styles_dict = {'par':{}, 'char':{}}
|
|
self.__styles_num = '0'
|
|
self.__type_of_style = 'par'
|
|
self.__text_string = ''
|
|
self.__state = 'before_styles_table'
|
|
self.__state_dict = {
|
|
'before_styles_table': self.__before_styles_func,
|
|
'in_styles_table' : self.__in_styles_func,
|
|
'in_individual_style' : self.__in_individual_style_func,
|
|
'after_styles_table' : self.__after_styles_func,
|
|
'mi<mk<styles-beg' : self.__found_styles_table_func,
|
|
'mi<mk<styles-end' : self.__found_end_styles_table_func,
|
|
'mi<mk<stylei-beg' : self.__found_beg_ind_style_func,
|
|
'mi<mk<stylei-end' : self.__found_end_ind_style_func,
|
|
'cw<ss<para-style' : self.__para_style_func,
|
|
'cw<ss<char-style' : self.__char_style_func,
|
|
}
|
|
# A separate dictionary for parsing the body text
|
|
self.__body_dict = {
|
|
'cw<ss<para-style' : (self.__para_style_in_body_func, 'par'),
|
|
'cw<ss<char-style' : (self.__para_style_in_body_func, 'char'),
|
|
}
|
|
# Dictionary needed to convert shortened style names to readable names
|
|
self.__token_dict={
|
|
# paragraph formatting => pf
|
|
'par-end___' : 'para',
|
|
'par-def___' : 'paragraph-definition',
|
|
'keep-w-nex' : 'keep-with-next',
|
|
'widow-cntl' : 'widow-control',
|
|
'adjust-rgt' : 'adjust-right',
|
|
'language__' : 'language',
|
|
'right-inde' : 'right-indent',
|
|
'fir-ln-ind' : 'first-line-indent',
|
|
'left-inden' : 'left-indent',
|
|
'space-befo' : 'space-before',
|
|
'space-afte' : 'space-after',
|
|
'line-space' : 'line-spacing',
|
|
'default-ta' : 'default-tab',
|
|
'align_____' : 'align',
|
|
'widow-cntr' : 'widow-control',
|
|
# page fomratting mixed in! (Just in older RTF?)
|
|
'margin-lef' : 'left-indent',
|
|
'margin-rig' : 'right-indent',
|
|
'margin-bot' : 'space-after',
|
|
'margin-top' : 'space-before',
|
|
# stylesheet = > ss
|
|
'style-shet' : 'stylesheet',
|
|
'based-on__' : 'based-on-style',
|
|
'next-style' : 'next-style',
|
|
'char-style' : 'character-style',
|
|
'para-style' : 'paragraph-style',
|
|
# graphics => gr
|
|
'picture___' : 'pict',
|
|
'obj-class_' : 'obj_class',
|
|
'mac-pic___' : 'mac-pict',
|
|
# section => sc
|
|
'section___' : 'section-new',
|
|
'sect-defin' : 'section-reset',
|
|
'sect-note_' : 'endnotes-in-section',
|
|
# list=> ls
|
|
'list-text_' : 'list-text',
|
|
'list______' : 'list',
|
|
'list-lev-d' : 'list-level-definition',
|
|
'list-cardi' : 'list-cardinal-numbering',
|
|
'list-decim' : 'list-decimal-numbering',
|
|
'list-up-al' : 'list-uppercase-alphabetic-numbering',
|
|
'list-up-ro' : 'list-uppercae-roman-numbering',
|
|
'list-ord__' : 'list-ordinal-numbering',
|
|
'list-ordte' : 'list-ordinal-text-numbering',
|
|
'list-bulli' : 'list-bullet',
|
|
'list-simpi' : 'list-simple',
|
|
'list-conti' : 'list-continue',
|
|
'list-hang_' : 'list-hang',
|
|
# 'list-tebef' : 'list-text-before',
|
|
# 'list-level' : 'level',
|
|
'list-id___' : 'list-id',
|
|
'list-start' : 'list-start',
|
|
'nest-level' : 'nest-level',
|
|
# duplicate
|
|
'list-level' : 'list-level',
|
|
# notes => nt
|
|
'footnote__' : 'footnote',
|
|
'type______' : 'type',
|
|
# anchor => an
|
|
'toc_______' : 'anchor-toc',
|
|
'book-mk-st' : 'bookmark-start',
|
|
'book-mk-en' : 'bookmark-end',
|
|
'index-mark' : 'anchor-index',
|
|
'place_____' : 'place',
|
|
# field => fd
|
|
'field_____' : 'field',
|
|
'field-inst' : 'field-instruction',
|
|
'field-rslt' : 'field-result',
|
|
'datafield_' : 'data-field',
|
|
# info-tables => it
|
|
'font-table' : 'font-table',
|
|
'colr-table' : 'color-table',
|
|
'lovr-table' : 'list-override-table',
|
|
'listtable_' : 'list-table',
|
|
'revi-table' : 'revision-table',
|
|
# character info => ci
|
|
'hidden____' : 'hidden',
|
|
'italics___' : 'italics',
|
|
'bold______' : 'bold',
|
|
'strike-thr' : 'strike-through',
|
|
'shadow____' : 'shadow',
|
|
'outline___' : 'outline',
|
|
'small-caps' : 'small-caps',
|
|
'dbl-strike' : 'double-strike-through',
|
|
'emboss____' : 'emboss',
|
|
'engrave___' : 'engrave',
|
|
'subscript_' : 'subscript',
|
|
'superscrip' : 'superscript',
|
|
'plain_____' : 'plain',
|
|
'font-style' : 'font-style',
|
|
'font-color' : 'font-color',
|
|
'font-size_' : 'font-size',
|
|
'font-up___' : 'superscript',
|
|
'font-down_' : 'subscript',
|
|
'red_______' : 'red',
|
|
'blue______' : 'blue',
|
|
'green_____' : 'green',
|
|
'caps______' : 'caps',
|
|
# table => tb
|
|
'row-def___' : 'row-definition',
|
|
'cell______' : 'cell',
|
|
'row_______' : 'row',
|
|
'in-table__' : 'in-table',
|
|
'columns___' : 'columns',
|
|
'row-pos-le' : 'row-position-left',
|
|
'cell-posit' : 'cell-position',
|
|
# preamble => pr
|
|
# underline
|
|
'underlined' : 'underlined',
|
|
# border => bd
|
|
'bor-t-r-hi' : 'border-table-row-horizontal-inside',
|
|
'bor-t-r-vi' : 'border-table-row-vertical-inside',
|
|
'bor-t-r-to' : 'border-table-row-top',
|
|
'bor-t-r-le' : 'border-table-row-left',
|
|
'bor-t-r-bo' : 'border-table-row-bottom',
|
|
'bor-t-r-ri' : 'border-table-row-right',
|
|
'bor-cel-bo' : 'border-cell-bottom',
|
|
'bor-cel-to' : 'border-cell-top',
|
|
'bor-cel-le' : 'border-cell-left',
|
|
'bor-cel-ri' : 'border-cell-right',
|
|
# 'bor-par-bo' : 'border-paragraph-bottom',
|
|
'bor-par-to' : 'border-paragraph-top',
|
|
'bor-par-le' : 'border-paragraph-left',
|
|
'bor-par-ri' : 'border-paragraph-right',
|
|
'bor-par-bo' : 'border-paragraph-box',
|
|
'bor-for-ev' : 'border-for-every-paragraph',
|
|
'bor-outsid' : 'border-outisde',
|
|
'bor-none__' : 'border',
|
|
# border type => bt
|
|
'bdr-single' : 'single',
|
|
'bdr-doubtb' : 'double-thickness-border',
|
|
'bdr-shadow' : 'shadowed-border',
|
|
'bdr-double' : 'double-border',
|
|
'bdr-dotted' : 'dotted-border',
|
|
'bdr-dashed' : 'dashed',
|
|
'bdr-hair__' : 'hairline',
|
|
'bdr-inset_' : 'inset',
|
|
'bdr-das-sm' : 'dash-small',
|
|
'bdr-dot-sm' : 'dot-dash',
|
|
'bdr-dot-do' : 'dot-dot-dash',
|
|
'bdr-outset' : 'outset',
|
|
'bdr-trippl' : 'tripple',
|
|
'bdr-thsm__' : 'thick-thin-small',
|
|
'bdr-htsm__' : 'thin-thick-small',
|
|
'bdr-hthsm_' : 'thin-thick-thin-small',
|
|
'bdr-thm__' : 'thick-thin-medium',
|
|
'bdr-htm__' : 'thin-thick-medium',
|
|
'bdr-hthm_' : 'thin-thick-thin-medium',
|
|
'bdr-thl__' : 'thick-thin-large',
|
|
'bdr-hthl_' : 'think-thick-think-large',
|
|
'bdr-wavy_' : 'wavy',
|
|
'bdr-d-wav' : 'double-wavy',
|
|
'bdr-strip' : 'striped',
|
|
'bdr-embos' : 'emboss',
|
|
'bdr-engra' : 'engrave',
|
|
'bdr-frame' : 'frame',
|
|
'bdr-li-wid' : 'line-width',
|
|
# tabs
|
|
'tab-center' : 'center',
|
|
'tab-right_' : 'right',
|
|
'tab-dec___' : 'decimal',
|
|
'leader-dot' : 'leader-dot',
|
|
'leader-hyp' : 'leader-hyphen',
|
|
'leader-und' : 'leader-underline',
|
|
}
|
|
self.__tabs_dict = {
|
|
'cw<pf<tab-stop__' : self.__tab_stop_func,
|
|
'cw<pf<tab-center' : self.__tab_type_func,
|
|
'cw<pf<tab-right_' : self.__tab_type_func,
|
|
'cw<pf<tab-dec___' : self.__tab_type_func,
|
|
'cw<pf<leader-dot' : self.__tab_leader_func,
|
|
'cw<pf<leader-hyp' : self.__tab_leader_func,
|
|
'cw<pf<leader-und' : self.__tab_leader_func,
|
|
'cw<pf<tab-bar-st' : self.__tab_bar_func,
|
|
}
|
|
self.__tab_type_dict = {
|
|
'cw<pf<tab-center' : 'center',
|
|
'cw<pf<tab-right_' : 'right',
|
|
'cw<pf<tab-dec___' : 'decimal',
|
|
'cw<pf<leader-dot' : 'leader-dot',
|
|
'cw<pf<leader-hyp' : 'leader-hyphen',
|
|
'cw<pf<leader-und' : 'leader-underline',
|
|
}
|
|
self.__ignore_list = [
|
|
'list-tebef',
|
|
]
|
|
self.__tabs_list = self.__tabs_dict.keys()
|
|
self.__tab_type = 'left'
|
|
self.__leader_found = 0
|
|
|
|
def __in_individual_style_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Check if the token marks the end of the individual style. (Action
|
|
is the value of the state dictionary, and the only key that will
|
|
match in this function is the end of the individual style.)
|
|
If the end of the individual style is not found, check if the line
|
|
is a control word. If it is, extract the relelvant info and look
|
|
up this info in the tokens dictionary. I want to change
|
|
abbreviated names for longer, more readable ones.
|
|
Write an error message if no key is found for the info.
|
|
If the line is text, add the text to a text string. The text
|
|
string will be the name of the style.
|
|
"""
|
|
action = self.__state_dict.get(self.__token_info)
|
|
if action:
|
|
action(line)
|
|
# have to parse border lines with external module
|
|
elif line[0:5] == 'cw<bd':
|
|
border_dict = self.__border_obj.parse_border(line)
|
|
keys = border_dict.keys()
|
|
for key in keys:
|
|
self.__enter_dict_entry(key, border_dict[key])
|
|
elif self.__token_info in self.__tabs_list:
|
|
action = self.__tabs_dict.get(self.__token_info)
|
|
if action is not None:
|
|
action(line)
|
|
elif line[0:2] == 'cw':
|
|
# cw<pf<widow-cntl<nu<true
|
|
info = line[6:16]
|
|
att = self.__token_dict.get(info)
|
|
if att is None :
|
|
if info not in self.__ignore_list:
|
|
if self.__run_level > 3:
|
|
msg = 'no value for key %s\n' % info
|
|
raise self.__bug_handler(msg)
|
|
else:
|
|
value = line[20:-1]
|
|
self.__enter_dict_entry(att, value)
|
|
elif line[0:2] == 'tx':
|
|
self.__text_string += line[17:-1]
|
|
|
|
def __tab_stop_func(self, line):
|
|
"""
|
|
Requires:
|
|
line -- line to parse
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Try to add the number to dictionary entry tabs-left, or tabs-right, etc.
|
|
If the dictionary entry doesn't exist, create one.
|
|
"""
|
|
try:
|
|
if self.__leader_found:
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s:' % self.__tab_type
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s;' % line[20:-1]
|
|
else:
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s:' % self.__tab_type
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s;' % line[20:-1]
|
|
except KeyError:
|
|
self.__enter_dict_entry('tabs', '')
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s:' % self.__tab_type
|
|
self.__styles_dict['par'][self.__styles_num]['tabs'] += '%s;' % line[20:-1]
|
|
self.__tab_type = 'left'
|
|
self.__leader_found = 0
|
|
|
|
def __tab_type_func(self, line):
|
|
"""
|
|
"""
|
|
type = self.__tab_type_dict.get(self.__token_info)
|
|
if type is not None:
|
|
self.__tab_type = type
|
|
else:
|
|
if self.__run_level > 3:
|
|
msg = 'no entry for %s\n' % self.__token_info
|
|
raise self.__bug_handler(msg)
|
|
|
|
def __tab_leader_func(self, line):
|
|
"""
|
|
Requires:
|
|
line --line to parse
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Try to add the string of the tab leader to dictionary entry
|
|
tabs-left, or tabs-right, etc. If the dictionary entry doesn't
|
|
exist, create one.
|
|
"""
|
|
self.__leader_found = 1
|
|
leader = self.__tab_type_dict.get(self.__token_info)
|
|
if leader is not None:
|
|
leader += '^'
|
|
try:
|
|
self.__styles_dict['par'][self.__styles_num]['tabs'] += ':%s;' % leader
|
|
except KeyError:
|
|
self.__enter_dict_entry('tabs', '')
|
|
self.__styles_dict['par'][self.__styles_num]['tabs'] += '%s;' % leader
|
|
else:
|
|
if self.__run_level > 3:
|
|
msg = 'no entry for %s\n' % self.__token_info
|
|
raise self.__bug_handler(msg)
|
|
|
|
def __tab_bar_func(self, line):
|
|
"""
|
|
Requires:
|
|
line -- line to parse
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Try to add the string of the tab bar to dictionary entry tabs-bar.
|
|
If the dictionary entry doesn't exist, create one.
|
|
"""
|
|
# self.__add_dict_entry('tabs-bar', line[20:-1])
|
|
try:
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s:' % 'bar'
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s;' % line[20:-1]
|
|
except KeyError:
|
|
self.__enter_dict_entry('tabs', '')
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s:' % 'bar'
|
|
self.__styles_dict['par'][self.__styles_num]['tabs']\
|
|
+= '%s;' % line[20:-1]
|
|
self.__tab_type = 'left'
|
|
|
|
def __enter_dict_entry(self, att, value):
|
|
"""
|
|
Required:
|
|
att -- the attribute
|
|
value -- the value
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Try to add the attribute value directly to the styles dictionary.
|
|
If a keyerror is found, that means I have to build the "branches"
|
|
of the dictionary before I can add the key value pair.
|
|
"""
|
|
try:
|
|
self.__styles_dict[self.__type_of_style][self.__styles_num][att] = value
|
|
except KeyError:
|
|
self.__add_dict_entry(att, value)
|
|
|
|
def __add_dict_entry(self, att, value):
|
|
"""
|
|
Required:
|
|
att --the attribute
|
|
value --the value
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
I have to build the branches of the dictionary before I can add
|
|
the leaves. (I am comparing a dictionary to a tree.) To achieve
|
|
this, I first make a temporary dictionary by extracting either the
|
|
inside dictionary of the keyword par or char. This temporary
|
|
dictionary is called type_dict.
|
|
Next, create a second, smaller dictionary with just the attribute and value.
|
|
Add the small dictionary to the type dictionary.
|
|
Add this type dictionary to the main styles dictionary.
|
|
"""
|
|
if self.__type_of_style == 'par':
|
|
type_dict =self.__styles_dict['par']
|
|
elif self.__type_of_style == 'char':
|
|
type_dict = self.__styles_dict['char']
|
|
else:
|
|
if self.__run_level > 3:
|
|
msg = self.__type_of_style + 'error\n'
|
|
raise self.__bug_handler(msg)
|
|
smallest_dict = {}
|
|
smallest_dict[att] = value
|
|
type_dict[self.__styles_num] = smallest_dict
|
|
self.__styles_dict[self.__type_of_style] = type_dict
|
|
|
|
def __para_style_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Set the type of style to paragraph.
|
|
Extract the number for a line such as "cw<ss<para-style<nu<15".
|
|
"""
|
|
self.__type_of_style = 'par'
|
|
self.__styles_num = line[20:-1]
|
|
"""
|
|
self.__enter_dict_entry('tabs-left', '')
|
|
self.__enter_dict_entry('tabs-right', '')
|
|
self.__enter_dict_entry('tabs-center', '')
|
|
self.__enter_dict_entry('tabs-decimal', '')
|
|
self.__enter_dict_entry('tabs-bar', '')
|
|
"""
|
|
|
|
def __char_style_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Set the type of style to character.
|
|
Extract the number for a line such as "cw<ss<char-style<nu<15".
|
|
"""
|
|
self.__type_of_style = 'char'
|
|
self.__styles_num = line[20:-1]
|
|
|
|
def __found_beg_ind_style_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Get rid of the last semicolon in the text string. Add the text
|
|
string as the value with 'name' as the key in the style
|
|
dictionary.
|
|
"""
|
|
self.__state = 'in_individual_style'
|
|
|
|
def __found_end_ind_style_func(self, line):
|
|
name = self.__text_string[:-1] # get rid of semicolon
|
|
# add 2005-04-29
|
|
# get rid of space before or after
|
|
name = name.strip()
|
|
self.__enter_dict_entry('name', name)
|
|
self.__text_string = ''
|
|
|
|
def __found_end_styles_table_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Set the state to after the styles table.
|
|
Fix the styles. (I explain this below.)
|
|
Print out the style table.
|
|
"""
|
|
self.__state = 'after_styles_table'
|
|
self.__fix_based_on()
|
|
self.__print_style_table()
|
|
|
|
def __fix_based_on(self):
|
|
"""
|
|
Requires:
|
|
nothing
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
The styles dictionary may contain a pair of key values such as
|
|
'next-style' => '15'. I want to change the 15 to the name of the
|
|
style. I accomplish this by simply looking up the value of 15 in
|
|
the styles table.
|
|
Use two loops. First, check all the paragraph styles. Then check
|
|
all the characer styles.
|
|
The inner loop: first check 'next-style', then check 'based-on-style'.
|
|
Make sure values exist for the keys to avoid the nasty keyerror message.
|
|
"""
|
|
types = ['par', 'char']
|
|
for type in types:
|
|
keys = self.__styles_dict[type].keys()
|
|
for key in keys:
|
|
styles = ['next-style', 'based-on-style']
|
|
for style in styles:
|
|
value = self.__styles_dict[type][key].get(style)
|
|
if value is not None:
|
|
temp_dict = self.__styles_dict[type].get(value)
|
|
if temp_dict:
|
|
changed_value = self.__styles_dict[type][value].get('name')
|
|
if changed_value:
|
|
self.__styles_dict[type][key][style] = \
|
|
changed_value
|
|
else:
|
|
if value == 0 or value == '0':
|
|
pass
|
|
else:
|
|
if self.__run_level > 4:
|
|
msg = '%s %s is based on %s\n' % (type, key, value)
|
|
msg = 'There is no style with %s\n' % value
|
|
raise self.__bug_handler(msg)
|
|
del self.__styles_dict[type][key][style]
|
|
|
|
def __print_style_table(self):
|
|
"""
|
|
Required:
|
|
nothing
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
This function prints out the style table.
|
|
I use three nested for loops. The outer loop prints out the
|
|
paragraphs styles, then the character styles.
|
|
The next loop iterates through the style numbers.
|
|
The most inside loop iterates over the pairs of attributes and
|
|
values, and prints them out.
|
|
"""
|
|
types = ['par', 'char']
|
|
for type in types:
|
|
if type == 'par':
|
|
prefix = 'paragraph'
|
|
else:
|
|
prefix = 'character'
|
|
self.__write_obj.write(
|
|
'mi<tg<open______<%s-styles\n' % prefix
|
|
)
|
|
style_numbers = self.__styles_dict[type].keys()
|
|
for num in style_numbers:
|
|
self.__write_obj.write(
|
|
'mi<tg<empty-att_<%s-style-in-table<num>%s' % (prefix, num)
|
|
)
|
|
attributes = self.__styles_dict[type][num].keys()
|
|
for att in attributes:
|
|
this_value = self.__styles_dict[type][num][att]
|
|
self.__write_obj.write(
|
|
'<%s>%s' % (att, this_value)
|
|
)
|
|
self.__write_obj.write('\n')
|
|
self.__write_obj.write(
|
|
'mi<tg<close_____<%s-styles\n' % prefix
|
|
)
|
|
|
|
def __found_styles_table_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Change the state to in the style table when the marker has been found.
|
|
"""
|
|
self.__state = 'in_styles_table'
|
|
|
|
def __before_styles_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing.
|
|
Logic:
|
|
Check the line info in the state dictionary. When the beginning of
|
|
the styles table is found, change the state to in the styles
|
|
table.
|
|
"""
|
|
action = self.__state_dict.get(self.__token_info)
|
|
if not action:
|
|
self.__write_obj.write(line)
|
|
else:
|
|
action(line)
|
|
|
|
def __in_styles_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Check the line for the beginning of an individaul style. If it is
|
|
not found, simply print out the line.
|
|
"""
|
|
action = self.__state_dict.get(self.__token_info)
|
|
if action is None:
|
|
self.__write_obj.write(line)
|
|
else:
|
|
action(line)
|
|
|
|
def __para_style_in_body_func(self, line, type):
|
|
"""
|
|
Required:
|
|
line-- the line
|
|
type -- whether a character or paragraph
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Determine the prefix by whether the type is "par" or "char".
|
|
Extract the number from a line such as "cw<ss<para-style<nu<15".
|
|
Look up that number in the styles dictionary and put a name for a number
|
|
"""
|
|
if type == 'par':
|
|
prefix = 'para'
|
|
else:
|
|
prefix = 'char'
|
|
num = line[20:-1]
|
|
# may be invalid RTF--a style down below not defined above!
|
|
try:
|
|
value = self.__styles_dict[type][num]['name']
|
|
except KeyError:
|
|
value = None
|
|
if value:
|
|
self.__write_obj.write(
|
|
'cw<ss<%s-style<nu<%s\n' % (prefix, value)
|
|
)
|
|
else:
|
|
self.__write_obj.write(
|
|
'cw<ss<%s_style<nu<not-defined\n' % prefix
|
|
)
|
|
|
|
def __after_styles_func(self, line):
|
|
"""
|
|
Required:
|
|
line
|
|
Returns:
|
|
nothing
|
|
Logic:
|
|
Determine if a line with either character of paragraph style info
|
|
has been found. If so, then use the appropriate method to parse
|
|
the line. Otherwise, write the line to a file.
|
|
"""
|
|
action, type = self.__body_dict.get(self.__token_info, (None, None))
|
|
if action:
|
|
action(line, type)
|
|
else:
|
|
self.__write_obj.write(line)
|
|
|
|
def convert_styles(self):
|
|
"""
|
|
Requires:
|
|
nothing
|
|
Returns:
|
|
nothing (changes the original file)
|
|
Logic:
|
|
Read one line in at a time. Determine what action to take based on
|
|
the state. If the state is before the style table, look for the
|
|
beginning of the style table.
|
|
If the state is in the style table, create the style dictionary
|
|
and print out the tags.
|
|
If the state if afer the style table, look for lines with style
|
|
info, and substitute the number with the name of the style.
|
|
"""
|
|
self.__initiate_values()
|
|
read_obj = open_for_read(self.__file)
|
|
self.__write_obj = open_for_write(self.__write_to)
|
|
line_to_read = 1
|
|
while line_to_read:
|
|
line_to_read = read_obj.readline()
|
|
line = line_to_read
|
|
self.__token_info = line[:16]
|
|
action = self.__state_dict.get(self.__state)
|
|
if action is None:
|
|
sys.stderr.write('no matching state in module styles.py\n')
|
|
sys.stderr.write(self.__state + '\n')
|
|
action(line)
|
|
read_obj.close()
|
|
self.__write_obj.close()
|
|
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
|
|
if self.__copy:
|
|
copy_obj.copy_file(self.__write_to, "styles.data")
|
|
copy_obj.rename(self.__write_to, self.__file)
|
|
os.remove(self.__write_to)
|