1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-15 00:04:10 +01:00

Removed 'browser' related functions

This commit is contained in:
2020-06-14 12:28:23 +02:00
parent be671ef2d8
commit ebb2e810eb
2 changed files with 2 additions and 141 deletions

View File

@@ -267,131 +267,6 @@ def extract(path, dir):
extractor(path, dir)
def get_proxies(debug=True):
proxies = urllib.request.getproxies()
for key, proxy in list(proxies.items()):
if not proxy or '..' in proxy or key == 'auto':
del proxies[key]
continue
if proxy.startswith(key+'://'):
proxy = proxy[len(key)+3:]
if key == 'https' and proxy.startswith('http://'):
proxy = proxy[7:]
if proxy.endswith('/'):
proxy = proxy[:-1]
if len(proxy) > 4:
proxies[key] = proxy
else:
prints('Removing invalid', key, 'proxy:', proxy)
del proxies[key]
if proxies and debug:
prints('Using proxies:', proxies)
return proxies
def get_parsed_proxy(typ='http', debug=True):
proxies = get_proxies(debug)
proxy = proxies.get(typ, None)
if proxy:
pattern = re.compile((
'(?:ptype://)?'
'(?:(?P<user>\\w+):(?P<pass>.*)@)?'
'(?P<host>[\\w\\-\\.]+)'
'(?::(?P<port>\\d+))?').replace('ptype', typ)
)
match = pattern.match(proxies[typ])
if match:
try:
ans = {
'host' : match.group('host'),
'port' : match.group('port'),
'user' : match.group('user'),
'pass' : match.group('pass')
}
if ans['port']:
ans['port'] = int(ans['port'])
except:
if debug:
import traceback
traceback.print_exc()
else:
if debug:
prints('Using http proxy', str(ans))
return ans
def get_proxy_info(proxy_scheme, proxy_string):
'''
Parse all proxy information from a proxy string (as returned by
get_proxies). The returned dict will have members set to None when the info
is not available in the string. If an exception occurs parsing the string
this method returns None.
'''
try:
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
urlinfo = urllib.parse.urlparse(proxy_url)
ans = {
'scheme': urlinfo.scheme,
'hostname': urlinfo.hostname,
'port': urlinfo.port,
'username': urlinfo.username,
'password': urlinfo.password,
}
except Exception:
return None
return ans
# IE 11 on windows 7
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'
USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
def is_mobile_ua(ua):
return 'Mobile/' in ua or 'Mobile ' in ua
def random_user_agent(choose=None, allow_ie=True):
from ebook_converter.utils.random_ua import common_user_agents
ua_list = common_user_agents()
ua_list = [x for x in ua_list if not is_mobile_ua(x)]
if not allow_ie:
ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
return random.choice(ua_list) if choose is None else ua_list[choose]
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, verify_ssl_certificates=True, handle_refresh=True):
'''
Create a mechanize browser for web scraping. The browser handles cookies,
refresh requests and ignores robots.txt. Also uses proxy if available.
:param honor_time: If True honors pause time in refresh requests
:param max_time: Maximum time in seconds to wait during a refresh request
:param verify_ssl_certificates: If false SSL certificates errors are ignored
'''
from ebook_converter.utils.browser import Browser
opener = Browser(verify_ssl=verify_ssl_certificates)
opener.set_handle_refresh(handle_refresh, max_time=max_time, honor_time=honor_time)
opener.set_handle_robots(False)
if user_agent is None:
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
opener.addheaders = [('User-agent', user_agent)]
proxies = get_proxies()
to_add = {}
http_proxy = proxies.get('http', None)
if http_proxy:
to_add['http'] = http_proxy
https_proxy = proxies.get('https', None)
if https_proxy:
to_add['https'] = https_proxy
if to_add:
opener.set_proxies(to_add)
return opener
def fit_image(width, height, pwidth, pheight):
'''
Fit image in box of width pwidth and height pheight.

View File

@@ -889,21 +889,6 @@ OptionRecommendation(name='search_replace',
continue
setattr(mi, x, val)
def download_cover(self, url):
from ebook_converter import browser
from PIL import Image
import io
from ebook_converter.ptempfile import PersistentTemporaryFile
self.log('Downloading cover from %r'%url)
br = browser()
raw = br.open_novisit(url).read()
buf = io.BytesIO(raw)
pt = PersistentTemporaryFile('.jpg')
pt.close()
img = Image.open(buf)
img.convert('RGB').save(pt.name)
return pt.name
def read_user_metadata(self):
'''
Read all metadata specified by the user. Command line options override
@@ -921,7 +906,8 @@ OptionRecommendation(name='search_replace',
self.opts_to_mi(mi)
if mi.cover:
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
mi.cover = self.download_cover(mi.cover)
self.log.warn("TODO: Cover image is on remote server, "
"implement downloading using requests")
ext = mi.cover.rpartition('.')[-1].lower().strip()
if ext not in ('png', 'jpg', 'jpeg', 'gif'):
ext = 'jpg'