mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-01 22:25:46 +01:00
Removed 'browser' related functions
This commit is contained in:
@@ -267,131 +267,6 @@ def extract(path, dir):
|
|||||||
extractor(path, dir)
|
extractor(path, dir)
|
||||||
|
|
||||||
|
|
||||||
def get_proxies(debug=True):
|
|
||||||
proxies = urllib.request.getproxies()
|
|
||||||
for key, proxy in list(proxies.items()):
|
|
||||||
if not proxy or '..' in proxy or key == 'auto':
|
|
||||||
del proxies[key]
|
|
||||||
continue
|
|
||||||
if proxy.startswith(key+'://'):
|
|
||||||
proxy = proxy[len(key)+3:]
|
|
||||||
if key == 'https' and proxy.startswith('http://'):
|
|
||||||
proxy = proxy[7:]
|
|
||||||
if proxy.endswith('/'):
|
|
||||||
proxy = proxy[:-1]
|
|
||||||
if len(proxy) > 4:
|
|
||||||
proxies[key] = proxy
|
|
||||||
else:
|
|
||||||
prints('Removing invalid', key, 'proxy:', proxy)
|
|
||||||
del proxies[key]
|
|
||||||
|
|
||||||
if proxies and debug:
|
|
||||||
prints('Using proxies:', proxies)
|
|
||||||
return proxies
|
|
||||||
|
|
||||||
|
|
||||||
def get_parsed_proxy(typ='http', debug=True):
|
|
||||||
proxies = get_proxies(debug)
|
|
||||||
proxy = proxies.get(typ, None)
|
|
||||||
if proxy:
|
|
||||||
pattern = re.compile((
|
|
||||||
'(?:ptype://)?'
|
|
||||||
'(?:(?P<user>\\w+):(?P<pass>.*)@)?'
|
|
||||||
'(?P<host>[\\w\\-\\.]+)'
|
|
||||||
'(?::(?P<port>\\d+))?').replace('ptype', typ)
|
|
||||||
)
|
|
||||||
|
|
||||||
match = pattern.match(proxies[typ])
|
|
||||||
if match:
|
|
||||||
try:
|
|
||||||
ans = {
|
|
||||||
'host' : match.group('host'),
|
|
||||||
'port' : match.group('port'),
|
|
||||||
'user' : match.group('user'),
|
|
||||||
'pass' : match.group('pass')
|
|
||||||
}
|
|
||||||
if ans['port']:
|
|
||||||
ans['port'] = int(ans['port'])
|
|
||||||
except:
|
|
||||||
if debug:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
else:
|
|
||||||
if debug:
|
|
||||||
prints('Using http proxy', str(ans))
|
|
||||||
return ans
|
|
||||||
|
|
||||||
|
|
||||||
def get_proxy_info(proxy_scheme, proxy_string):
|
|
||||||
'''
|
|
||||||
Parse all proxy information from a proxy string (as returned by
|
|
||||||
get_proxies). The returned dict will have members set to None when the info
|
|
||||||
is not available in the string. If an exception occurs parsing the string
|
|
||||||
this method returns None.
|
|
||||||
'''
|
|
||||||
try:
|
|
||||||
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
|
|
||||||
urlinfo = urllib.parse.urlparse(proxy_url)
|
|
||||||
ans = {
|
|
||||||
'scheme': urlinfo.scheme,
|
|
||||||
'hostname': urlinfo.hostname,
|
|
||||||
'port': urlinfo.port,
|
|
||||||
'username': urlinfo.username,
|
|
||||||
'password': urlinfo.password,
|
|
||||||
}
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
return ans
|
|
||||||
|
|
||||||
|
|
||||||
# IE 11 on windows 7
|
|
||||||
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'
|
|
||||||
USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
|
|
||||||
|
|
||||||
|
|
||||||
def is_mobile_ua(ua):
|
|
||||||
return 'Mobile/' in ua or 'Mobile ' in ua
|
|
||||||
|
|
||||||
|
|
||||||
def random_user_agent(choose=None, allow_ie=True):
|
|
||||||
from ebook_converter.utils.random_ua import common_user_agents
|
|
||||||
ua_list = common_user_agents()
|
|
||||||
ua_list = [x for x in ua_list if not is_mobile_ua(x)]
|
|
||||||
if not allow_ie:
|
|
||||||
ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
|
|
||||||
return random.choice(ua_list) if choose is None else ua_list[choose]
|
|
||||||
|
|
||||||
|
|
||||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, verify_ssl_certificates=True, handle_refresh=True):
|
|
||||||
'''
|
|
||||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
|
||||||
refresh requests and ignores robots.txt. Also uses proxy if available.
|
|
||||||
|
|
||||||
:param honor_time: If True honors pause time in refresh requests
|
|
||||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
|
||||||
:param verify_ssl_certificates: If false SSL certificates errors are ignored
|
|
||||||
'''
|
|
||||||
from ebook_converter.utils.browser import Browser
|
|
||||||
opener = Browser(verify_ssl=verify_ssl_certificates)
|
|
||||||
opener.set_handle_refresh(handle_refresh, max_time=max_time, honor_time=honor_time)
|
|
||||||
opener.set_handle_robots(False)
|
|
||||||
if user_agent is None:
|
|
||||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
|
||||||
opener.addheaders = [('User-agent', user_agent)]
|
|
||||||
proxies = get_proxies()
|
|
||||||
to_add = {}
|
|
||||||
http_proxy = proxies.get('http', None)
|
|
||||||
if http_proxy:
|
|
||||||
to_add['http'] = http_proxy
|
|
||||||
https_proxy = proxies.get('https', None)
|
|
||||||
if https_proxy:
|
|
||||||
to_add['https'] = https_proxy
|
|
||||||
if to_add:
|
|
||||||
opener.set_proxies(to_add)
|
|
||||||
|
|
||||||
return opener
|
|
||||||
|
|
||||||
|
|
||||||
def fit_image(width, height, pwidth, pheight):
|
def fit_image(width, height, pwidth, pheight):
|
||||||
'''
|
'''
|
||||||
Fit image in box of width pwidth and height pheight.
|
Fit image in box of width pwidth and height pheight.
|
||||||
|
|||||||
@@ -889,21 +889,6 @@ OptionRecommendation(name='search_replace',
|
|||||||
continue
|
continue
|
||||||
setattr(mi, x, val)
|
setattr(mi, x, val)
|
||||||
|
|
||||||
def download_cover(self, url):
|
|
||||||
from ebook_converter import browser
|
|
||||||
from PIL import Image
|
|
||||||
import io
|
|
||||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
|
||||||
self.log('Downloading cover from %r'%url)
|
|
||||||
br = browser()
|
|
||||||
raw = br.open_novisit(url).read()
|
|
||||||
buf = io.BytesIO(raw)
|
|
||||||
pt = PersistentTemporaryFile('.jpg')
|
|
||||||
pt.close()
|
|
||||||
img = Image.open(buf)
|
|
||||||
img.convert('RGB').save(pt.name)
|
|
||||||
return pt.name
|
|
||||||
|
|
||||||
def read_user_metadata(self):
|
def read_user_metadata(self):
|
||||||
'''
|
'''
|
||||||
Read all metadata specified by the user. Command line options override
|
Read all metadata specified by the user. Command line options override
|
||||||
@@ -921,7 +906,8 @@ OptionRecommendation(name='search_replace',
|
|||||||
self.opts_to_mi(mi)
|
self.opts_to_mi(mi)
|
||||||
if mi.cover:
|
if mi.cover:
|
||||||
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
|
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
|
||||||
mi.cover = self.download_cover(mi.cover)
|
self.log.warn("TODO: Cover image is on remote server, "
|
||||||
|
"implement downloading using requests")
|
||||||
ext = mi.cover.rpartition('.')[-1].lower().strip()
|
ext = mi.cover.rpartition('.')[-1].lower().strip()
|
||||||
if ext not in ('png', 'jpg', 'jpeg', 'gif'):
|
if ext not in ('png', 'jpg', 'jpeg', 'gif'):
|
||||||
ext = 'jpg'
|
ext = 'jpg'
|
||||||
|
|||||||
Reference in New Issue
Block a user