"""
Code to convert HTML ebooks into LRF ebooks.
I am indebted to esperanc for the initial CSS->Xylog Style conversion code
and to Falstaff for pylrs.
License: GPLv3 Copyright: 2008, Kovid Goyal
"""
import copy
import glob
import os
import re
import sys
import tempfile
import urllib.parse
import collections
import functools
import itertools
import math
import bs4
from PIL import Image as PILImage
from ebook_converter.constants_old import __appname__, filesystem_encoding, \
preferred_encoding
from ebook_converter.devices.interface import DevicePlugin as Device
from ebook_converter.ebooks import ConversionError
from ebook_converter.ebooks.BeautifulSoup import html5_parser
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ebooks.lrf import Book
from ebook_converter.ebooks.lrf.html.color_map import lrs_color
from ebook_converter.ebooks.lrf.html.table import Table
from ebook_converter.ebooks.lrf.pylrs.pylrs import (
CR, BlockSpace, BookSetting, Canvas, CharButton, DropCaps, EmpLine, Image,
ImageBlock, ImageStream, Italic, JumpButton, LrsError, Paragraph, Plot,
RuledLine, Span, Sub, Sup, TextBlock
)
from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils import encoding as uenc
from ebook_converter.utils import img as uimg
from ebook_converter.utils import entities
def strip_style_comments(match):
src = match.group()
while True:
lindex = src.find('/*')
if lindex < 0:
break
rindex = src.find('*/', lindex)
if rindex < 0:
src = src[:lindex]
break
src = src[:lindex] + src[rindex+2:]
return src
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+"
r"[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)',
re.IGNORECASE)
IGNORED_TAGS = (bs4.Comment, bs4.Declaration, bs4.ProcessingInstruction)
MARKUP_MASSAGE = [ # Close tags
(re.compile(r']*)?/>', re.IGNORECASE),
lambda match: ''),
# Strip comments from )',
re.IGNORECASE | re.DOTALL),
strip_style_comments),
# Remove self closing script tags as they also mess up
# BeautifulSoup
(re.compile(r'(?i)