######################################################################### # # # # # copyright 2002 Paul Henry Tremblay # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # # General Public License for more details. # # # # # ######################################################################### import sys, os from ebook_converter.ebooks.rtf2xml import field_strings, copy from ebook_converter.ptempfile import better_mktemp from . import open_for_read, open_for_write class FieldsLarge: r""" ========================= Logic ========================= Make tags for fields. -Fields reflect text that Microsoft Word automatically generates. -Each file contains (or should contain) an inner group called field instructions. -Fields can be nested. -------------- Logic -------------- 1. As soon as a field is found, make a new text string by appending an empty text string to the field list. Collect all the lines in this string until the field instructions are found. 2. Collect all the tokens and text in the field instructions. When the end of the field instructions is found, process the string of text with the field_strings module. Append the processed string to the field instructins list. 3. Continue collecting tokens. Check for paragraphs or sections. If either is found, add to the paragraph or section list. 4. Continue collecting tokens and text either the beginning of a new field is found, or the end of this field is found. 5. If a new field is found, repeat steps 1-3. 6. If the end of the field is found, process the last text string of the field list. 7. If the field list is empty (after removing the last text string), there are no more fields. Print out the final string. If the list contains other strings, add the processed string to the last string in the field list. ============================ Examples ============================ This line of RTF: {\field{\*\fldinst { CREATEDATE \\* MERGEFORMAT }}{\fldrslt { \lang1024 1/11/03 10:34 PM}}} Becomes: 10:34 PM The simple field in the above example conatins no paragraph or sections breaks. This line of RTF: {{\field{\*\fldinst SYMBOL 97 \\f "Symbol" \\s 12}{\fldrslt\f3\fs24}}} Becomes: Χ The RTF in the example above should be represented as UTF-8 rather than a field. This RTF: {\field\fldedit{\*\fldinst { TOC \\o "1-3" }}{\fldrslt {\lang1024 Heading one\tab }{\field{\*\fldinst {\lang1024 PAGEREF _Toc440880424 \\h }{\lang1024 {\*\datafield {\lang1024 1}}}{\lang1024 \par }\pard\plain \s18\li240\widctlpar\tqr\tldot\tx8630\aspalpha\aspnum\faauto\adjustright\rin0\lin240\itap0 \f4\lang1033\cgrid {\lang1024 Heading 2\tab }{\field{\*\fldinst {\lang1024 PAGEREF _Toc440880425 \\h }{\lang1024 {\*\datafield {\lang1024 1}}}{\lang1024 \par }\pard\plain \widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \f4\lang1033\cgrid }}\pard\plain \widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \f4\lang1033\cgrid {\fs28 \\u214\'85 \par }{\fs36 {\field{\*\fldinst SYMBOL 67 \\f "Symbol" \\s 18}{\fldrslt\f3\fs36}}} Becomes: Heading one 1 Heading 2 1 """ def __init__(self, in_file, bug_handler, copy=None, run_level=1, ): """ Required: 'file'--file to parse Optional: 'copy'-- whether to make a copy of result for debugging 'temp_dir' --where to output temporary results (default is directory from which the script is run.) Returns: nothing """ self.__file = in_file self.__bug_handler = bug_handler self.__copy = copy self.__run_level = run_level self.__write_to = better_mktemp() def __initiate_values(self): """ Initiate all values. """ self.__text_string = '' self.__field_instruction_string = '' self.__marker = 'mi, since this field is really just UTF-8. If the field contains paragraph or section breaks, it is a field-block rather than just a field. Write the paragraph or section markers for later parsing of the file. If the filed list contains more strings, add the latest (processed) string to the last string in the list. Otherwise, write the string to the output file. """ last_bracket = self.__field_count.pop() instruction = self.__field_instruction.pop() inner_field_string = self.__field_string.pop() sec_in_field = self.__sec_in_field.pop() par_in_field = self.__par_in_field.pop() # add a closing bracket, since the closing bracket is not included in # the field string if self.__symbol: inner_field_string = '%scb%s\n%s'\ 'mi%s\n%s'\ 'mi