mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-15 00:04:10 +01:00
Removed 'browser' related functions
This commit is contained in:
@@ -267,131 +267,6 @@ def extract(path, dir):
|
||||
extractor(path, dir)
|
||||
|
||||
|
||||
def get_proxies(debug=True):
|
||||
proxies = urllib.request.getproxies()
|
||||
for key, proxy in list(proxies.items()):
|
||||
if not proxy or '..' in proxy or key == 'auto':
|
||||
del proxies[key]
|
||||
continue
|
||||
if proxy.startswith(key+'://'):
|
||||
proxy = proxy[len(key)+3:]
|
||||
if key == 'https' and proxy.startswith('http://'):
|
||||
proxy = proxy[7:]
|
||||
if proxy.endswith('/'):
|
||||
proxy = proxy[:-1]
|
||||
if len(proxy) > 4:
|
||||
proxies[key] = proxy
|
||||
else:
|
||||
prints('Removing invalid', key, 'proxy:', proxy)
|
||||
del proxies[key]
|
||||
|
||||
if proxies and debug:
|
||||
prints('Using proxies:', proxies)
|
||||
return proxies
|
||||
|
||||
|
||||
def get_parsed_proxy(typ='http', debug=True):
|
||||
proxies = get_proxies(debug)
|
||||
proxy = proxies.get(typ, None)
|
||||
if proxy:
|
||||
pattern = re.compile((
|
||||
'(?:ptype://)?'
|
||||
'(?:(?P<user>\\w+):(?P<pass>.*)@)?'
|
||||
'(?P<host>[\\w\\-\\.]+)'
|
||||
'(?::(?P<port>\\d+))?').replace('ptype', typ)
|
||||
)
|
||||
|
||||
match = pattern.match(proxies[typ])
|
||||
if match:
|
||||
try:
|
||||
ans = {
|
||||
'host' : match.group('host'),
|
||||
'port' : match.group('port'),
|
||||
'user' : match.group('user'),
|
||||
'pass' : match.group('pass')
|
||||
}
|
||||
if ans['port']:
|
||||
ans['port'] = int(ans['port'])
|
||||
except:
|
||||
if debug:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
else:
|
||||
if debug:
|
||||
prints('Using http proxy', str(ans))
|
||||
return ans
|
||||
|
||||
|
||||
def get_proxy_info(proxy_scheme, proxy_string):
|
||||
'''
|
||||
Parse all proxy information from a proxy string (as returned by
|
||||
get_proxies). The returned dict will have members set to None when the info
|
||||
is not available in the string. If an exception occurs parsing the string
|
||||
this method returns None.
|
||||
'''
|
||||
try:
|
||||
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
|
||||
urlinfo = urllib.parse.urlparse(proxy_url)
|
||||
ans = {
|
||||
'scheme': urlinfo.scheme,
|
||||
'hostname': urlinfo.hostname,
|
||||
'port': urlinfo.port,
|
||||
'username': urlinfo.username,
|
||||
'password': urlinfo.password,
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
return ans
|
||||
|
||||
|
||||
# IE 11 on windows 7
|
||||
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'
|
||||
USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
|
||||
|
||||
|
||||
def is_mobile_ua(ua):
|
||||
return 'Mobile/' in ua or 'Mobile ' in ua
|
||||
|
||||
|
||||
def random_user_agent(choose=None, allow_ie=True):
|
||||
from ebook_converter.utils.random_ua import common_user_agents
|
||||
ua_list = common_user_agents()
|
||||
ua_list = [x for x in ua_list if not is_mobile_ua(x)]
|
||||
if not allow_ie:
|
||||
ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
|
||||
return random.choice(ua_list) if choose is None else ua_list[choose]
|
||||
|
||||
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, verify_ssl_certificates=True, handle_refresh=True):
|
||||
'''
|
||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||
refresh requests and ignores robots.txt. Also uses proxy if available.
|
||||
|
||||
:param honor_time: If True honors pause time in refresh requests
|
||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||
:param verify_ssl_certificates: If false SSL certificates errors are ignored
|
||||
'''
|
||||
from ebook_converter.utils.browser import Browser
|
||||
opener = Browser(verify_ssl=verify_ssl_certificates)
|
||||
opener.set_handle_refresh(handle_refresh, max_time=max_time, honor_time=honor_time)
|
||||
opener.set_handle_robots(False)
|
||||
if user_agent is None:
|
||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
||||
opener.addheaders = [('User-agent', user_agent)]
|
||||
proxies = get_proxies()
|
||||
to_add = {}
|
||||
http_proxy = proxies.get('http', None)
|
||||
if http_proxy:
|
||||
to_add['http'] = http_proxy
|
||||
https_proxy = proxies.get('https', None)
|
||||
if https_proxy:
|
||||
to_add['https'] = https_proxy
|
||||
if to_add:
|
||||
opener.set_proxies(to_add)
|
||||
|
||||
return opener
|
||||
|
||||
|
||||
def fit_image(width, height, pwidth, pheight):
|
||||
'''
|
||||
Fit image in box of width pwidth and height pheight.
|
||||
|
||||
@@ -889,21 +889,6 @@ OptionRecommendation(name='search_replace',
|
||||
continue
|
||||
setattr(mi, x, val)
|
||||
|
||||
def download_cover(self, url):
|
||||
from ebook_converter import browser
|
||||
from PIL import Image
|
||||
import io
|
||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||
self.log('Downloading cover from %r'%url)
|
||||
br = browser()
|
||||
raw = br.open_novisit(url).read()
|
||||
buf = io.BytesIO(raw)
|
||||
pt = PersistentTemporaryFile('.jpg')
|
||||
pt.close()
|
||||
img = Image.open(buf)
|
||||
img.convert('RGB').save(pt.name)
|
||||
return pt.name
|
||||
|
||||
def read_user_metadata(self):
|
||||
'''
|
||||
Read all metadata specified by the user. Command line options override
|
||||
@@ -921,7 +906,8 @@ OptionRecommendation(name='search_replace',
|
||||
self.opts_to_mi(mi)
|
||||
if mi.cover:
|
||||
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
|
||||
mi.cover = self.download_cover(mi.cover)
|
||||
self.log.warn("TODO: Cover image is on remote server, "
|
||||
"implement downloading using requests")
|
||||
ext = mi.cover.rpartition('.')[-1].lower().strip()
|
||||
if ext not in ('png', 'jpg', 'jpeg', 'gif'):
|
||||
ext = 'jpg'
|
||||
|
||||
Reference in New Issue
Block a user