######################################################################### # # # # # copyright 2002 Paul Henry Tremblay # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # # General Public License for more details. # # # # # ######################################################################### import sys, os from ebook_converter.ebooks.rtf2xml import copy, border_parse from ebook_converter.ptempfile import better_mktemp from . import open_for_read, open_for_write class ParagraphDef: """ ================= Purpose ================= Write paragraph definition tags. States: 1. before_1st_para_def. Before any para_def token is found. This means all the text in the preamble. Look for the token 'cw pf 'par-end___' : 'para', 'par-def___' : 'paragraph-definition', 'keep-w-nex' : 'keep-with-next', 'widow-cntl' : 'widow-control', 'adjust-rgt' : 'adjust-right', 'language__' : 'language', 'right-inde' : 'right-indent', 'fir-ln-ind' : 'first-line-indent', 'left-inden' : 'left-indent', 'space-befo' : 'space-before', 'space-afte' : 'space-after', 'line-space' : 'line-spacing', 'default-ta' : 'default-tab', 'align_____' : 'align', 'widow-cntr' : 'widow-control', # stylesheet = > ss 'style-shet' : 'stylesheet', 'based-on__' : 'based-on-style', 'next-style' : 'next-style', 'char-style' : 'character-style', # this is changed to get a nice attribute 'para-style' : 'name', # graphics => gr 'picture___' : 'pict', 'obj-class_' : 'obj_class', 'mac-pic___' : 'mac-pict', # section => sc 'section___' : 'section-new', 'sect-defin' : 'section-reset', 'sect-note_' : 'endnotes-in-section', # list=> ls 'list-text_' : 'list-text', # this line must be wrong because it duplicates an earlier one 'list-text_' : 'list-text', 'list______' : 'list', 'list-lev-d' : 'list-level-definition', 'list-cardi' : 'list-cardinal-numbering', 'list-decim' : 'list-decimal-numbering', 'list-up-al' : 'list-uppercase-alphabetic-numbering', 'list-up-ro' : 'list-uppercae-roman-numbering', 'list-ord__' : 'list-ordinal-numbering', 'list-ordte' : 'list-ordinal-text-numbering', 'list-bulli' : 'list-bullet', 'list-simpi' : 'list-simple', 'list-conti' : 'list-continue', 'list-hang_' : 'list-hang', # 'list-tebef' : 'list-text-before', # 'list-level' : 'level', 'list-id___' : 'list-id', 'list-start' : 'list-start', 'nest-level' : 'nest-level', # duplicate 'list-level' : 'list-level', # notes => nt 'footnote__' : 'footnote', 'type______' : 'type', # anchor => an 'toc_______' : 'anchor-toc', 'book-mk-st' : 'bookmark-start', 'book-mk-en' : 'bookmark-end', 'index-mark' : 'anchor-index', 'place_____' : 'place', # field => fd 'field_____' : 'field', 'field-inst' : 'field-instruction', 'field-rslt' : 'field-result', 'datafield_' : 'data-field', # info-tables => it 'font-table' : 'font-table', 'colr-table' : 'color-table', 'lovr-table' : 'list-override-table', 'listtable_' : 'list-table', 'revi-table' : 'revision-table', # character info => ci 'hidden____' : 'hidden', 'italics___' : 'italics', 'bold______' : 'bold', 'strike-thr' : 'strike-through', 'shadow____' : 'shadow', 'outline___' : 'outline', 'small-caps' : 'small-caps', 'caps______' : 'caps', 'dbl-strike' : 'double-strike-through', 'emboss____' : 'emboss', 'engrave___' : 'engrave', 'subscript_' : 'subscript', 'superscrip' : 'superscipt', 'font-style' : 'font-style', 'font-color' : 'font-color', 'font-size_' : 'font-size', 'font-up___' : 'superscript', 'font-down_' : 'subscript', 'red_______' : 'red', 'blue______' : 'blue', 'green_____' : 'green', # table => tb 'row-def___' : 'row-definition', 'cell______' : 'cell', 'row_______' : 'row', 'in-table__' : 'in-table', 'columns___' : 'columns', 'row-pos-le' : 'row-position-left', 'cell-posit' : 'cell-position', # preamble => pr # underline 'underlined' : 'underlined', # border => bd 'bor-t-r-hi' : 'border-table-row-horizontal-inside', 'bor-t-r-vi' : 'border-table-row-vertical-inside', 'bor-t-r-to' : 'border-table-row-top', 'bor-t-r-le' : 'border-table-row-left', 'bor-t-r-bo' : 'border-table-row-bottom', 'bor-t-r-ri' : 'border-table-row-right', 'bor-cel-bo' : 'border-cell-bottom', 'bor-cel-to' : 'border-cell-top', 'bor-cel-le' : 'border-cell-left', 'bor-cel-ri' : 'border-cell-right', # 'bor-par-bo' : 'border-paragraph-bottom', 'bor-par-to' : 'border-paragraph-top', 'bor-par-le' : 'border-paragraph-left', 'bor-par-ri' : 'border-paragraph-right', 'bor-par-bo' : 'border-paragraph-box', 'bor-for-ev' : 'border-for-every-paragraph', 'bor-outsid' : 'border-outisde', 'bor-none__' : 'border', # border type => bt 'bdr-single' : 'single', 'bdr-doubtb' : 'double-thickness-border', 'bdr-shadow' : 'shadowed-border', 'bdr-double' : 'double-border', 'bdr-dotted' : 'dotted-border', 'bdr-dashed' : 'dashed', 'bdr-hair__' : 'hairline', 'bdr-inset_' : 'inset', 'bdr-das-sm' : 'dash-small', 'bdr-dot-sm' : 'dot-dash', 'bdr-dot-do' : 'dot-dot-dash', 'bdr-outset' : 'outset', 'bdr-trippl' : 'tripple', 'bdr-thsm__' : 'thick-thin-small', 'bdr-htsm__' : 'thin-thick-small', 'bdr-hthsm_' : 'thin-thick-thin-small', 'bdr-thm__' : 'thick-thin-medium', 'bdr-htm__' : 'thin-thick-medium', 'bdr-hthm_' : 'thin-thick-thin-medium', 'bdr-thl__' : 'thick-thin-large', 'bdr-hthl_' : 'think-thick-think-large', 'bdr-wavy_' : 'wavy', 'bdr-d-wav' : 'double-wavy', 'bdr-strip' : 'striped', 'bdr-embos' : 'emboss', 'bdr-engra' : 'engrave', 'bdr-frame' : 'frame', 'bdr-li-wid' : 'line-width', } self.__tabs_dict = { 'cw 3: msg = 'no entry for %s\n' % self.__token_info raise self.__bug_handler(msg) def __tab_leader_func(self, line): """ """ leader = self.__tab_type_dict.get(self.__token_info) if leader is not None: self.__att_val_dict['tabs'] += '%s^' % leader else: if self.__run_level > 3: msg = 'no entry for %s\n' % self.__token_info raise self.__bug_handler(msg) def __tab_bar_func(self, line): """ """ # self.__att_val_dict['tabs-bar'] += '%s:' % line[20:-1] self.__att_val_dict['tabs'] += 'bar:%s;' % (line[20:-1]) self.__tab_type = 'left' def __parse_border(self, line): """ Requires: line --line to parse Returns: nothing (updates dictionary) Logic: Uses the border_parse module to return a dictionary of attribute value pairs for a border line. """ border_dict = self.__border_obj.parse_border(line) self.__att_val_dict.update(border_dict) def __para_def_in_para_def_func(self, line): """ Requires: line --line to parse Returns: nothing Logic: I have found a \\pard while I am collecting tokens. I want to reset the dectionary and do nothing else. """ # Change this self.__state = 'collect_tokens' self.__reset_dict() def __end_para_def_func(self, line): """ Requires: Nothing Returns: Nothing Logic: The previous state was collect tokens, and I have found the start of a paragraph. I want to outut the defintion tag; output the line itself (telling me of the beginning of a paragraph);change the state to 'in_paragraphs'; """ self.__write_para_def_beg() self.__write_obj.write(line) self.__state = 'in_paragraphs' def __start_para_after_def_func(self, line): """ Requires: Nothing Returns: Nothing Logic: The state was is after_para_def. and I have found the start of a paragraph. I want to outut the defintion tag; output the line itself (telling me of the beginning of a paragraph);change the state to 'in_paragraphs'. (I now realize that this is absolutely identical to the function above!) """ self.__write_para_def_beg() self.__write_obj.write(line) self.__state = 'in_paragraphs' def __after_para_def_func(self, line): """ Requires: line -- line to parse Returns: nothing Logic: Check if the token info is the start of a paragraph. If so, call on the function found in the value of the dictionary. """ action = self.__after_para_def_dict.get(self.__token_info) if self.__token_info == 'cw%s' % ('tabs', the_value)) exclude = frozenset(['name', 'style-num', 'in-table'] + tabs_list) for k in sorted(self.__att_val_dict): if k not in exclude: style_string += ('<%s>%s' % (k, self.__att_val_dict[k])) style_string += '\n' self.__body_style_strings.append(style_string) def __write_para_def_beg(self): """ Requires: nothing Returns: nothing Logic: Print out the beginning of the pargraph definition tag, and the markers that let me know when I have reached this tag. (These markers are used for later parsing.) """ self.__get_num_of_style() table = self.__att_val_dict.get('in-table') if table: # del self.__att_val_dict['in-table'] self.__write_obj.write('mi%s' % self.__att_val_dict['name']) self.__write_obj.write('%s' % self.__att_val_dict['style-num']) tabs_list = ['tabs-left', 'tabs-right', 'tabs-decimal', 'tabs-center', 'tabs-bar', 'tabs'] """ for tab_item in tabs_list: if self.__att_val_dict[tab_item] != '': the_value = self.__att_val_dict[tab_item] the_value = the_value[:-1] self.__write_obj.write('<%s>%s' % (tab_item, the_value)) """ if self.__att_val_dict['tabs'] != '': the_value = self.__att_val_dict['tabs'] # the_value = the_value[:-1] self.__write_obj.write('<%s>%s' % ('tabs', the_value)) keys = sorted(self.__att_val_dict) exclude = frozenset(['name', 'style-num', 'in-table'] + tabs_list) for key in keys: if key not in exclude: self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key])) self.__write_obj.write('\n') self.__write_obj.write(self.__start2_marker) if 'font-style' in keys: face = self.__att_val_dict['font-style'] self.__write_obj.write('mi