Removed 'browser' related functions

2020-06-14 12:28:23 +02:00
parent be671ef2d8
commit ebb2e810eb
2 changed files with 2 additions and 141 deletions
@@ -267,131 +267,6 @@ def extract(path, dir):
    extractor(path, dir)
 def get_proxies(debug=True):
    proxies = urllib.request.getproxies()
    for key, proxy in list(proxies.items()):
        if not proxy or '..' in proxy or key == 'auto':
            del proxies[key]
            continue
        if proxy.startswith(key+'://'):
            proxy = proxy[len(key)+3:]
        if key == 'https' and proxy.startswith('http://'):
            proxy = proxy[7:]
        if proxy.endswith('/'):
            proxy = proxy[:-1]
        if len(proxy) > 4:
            proxies[key] = proxy
        else:
            prints('Removing invalid', key, 'proxy:', proxy)
            del proxies[key]
    if proxies and debug:
        prints('Using proxies:', proxies)
    return proxies
 def get_parsed_proxy(typ='http', debug=True):
    proxies = get_proxies(debug)
    proxy = proxies.get(typ, None)
    if proxy:
        pattern = re.compile((
            '(?:ptype://)?'
            '(?:(?P<user>\\w+):(?P<pass>.*)@)?'
            '(?P<host>[\\w\\-\\.]+)'
            '(?::(?P<port>\\d+))?').replace('ptype', typ)
        )
        match = pattern.match(proxies[typ])
        if match:
            try:
                ans = {
                        'host' : match.group('host'),
                        'port' : match.group('port'),
                        'user' : match.group('user'),
                        'pass' : match.group('pass')
                    }
                if ans['port']:
                    ans['port'] = int(ans['port'])
            except:
                if debug:
                    import traceback
                    traceback.print_exc()
            else:
                if debug:
                    prints('Using http proxy', str(ans))
                return ans
 def get_proxy_info(proxy_scheme, proxy_string):
    '''
    Parse all proxy information from a proxy string (as returned by
    get_proxies). The returned dict will have members set to None when the info
    is not available in the string. If an exception occurs parsing the string
    this method returns None.
    '''
    try:
        proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
        urlinfo = urllib.parse.urlparse(proxy_url)
        ans = {
            'scheme': urlinfo.scheme,
            'hostname': urlinfo.hostname,
            'port': urlinfo.port,
            'username': urlinfo.username,
            'password': urlinfo.password,
        }
    except Exception:
        return None
    return ans
 # IE 11 on windows 7
 USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'
 USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
 def is_mobile_ua(ua):
    return 'Mobile/' in ua or 'Mobile ' in ua
 def random_user_agent(choose=None, allow_ie=True):
    from ebook_converter.utils.random_ua import common_user_agents
    ua_list = common_user_agents()
    ua_list = [x for x in ua_list if not is_mobile_ua(x)]
    if not allow_ie:
        ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
    return random.choice(ua_list) if choose is None else ua_list[choose]
 def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, verify_ssl_certificates=True, handle_refresh=True):
    '''
    Create a mechanize browser for web scraping. The browser handles cookies,
    refresh requests and ignores robots.txt. Also uses proxy if available.
    :param honor_time: If True honors pause time in refresh requests
    :param max_time: Maximum time in seconds to wait during a refresh request
    :param verify_ssl_certificates: If false SSL certificates errors are ignored
    '''
    from ebook_converter.utils.browser import Browser
    opener = Browser(verify_ssl=verify_ssl_certificates)
    opener.set_handle_refresh(handle_refresh, max_time=max_time, honor_time=honor_time)
    opener.set_handle_robots(False)
    if user_agent is None:
        user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
    opener.addheaders = [('User-agent', user_agent)]
    proxies = get_proxies()
    to_add = {}
    http_proxy = proxies.get('http', None)
    if http_proxy:
        to_add['http'] = http_proxy
    https_proxy = proxies.get('https', None)
    if https_proxy:
        to_add['https'] = https_proxy
    if to_add:
        opener.set_proxies(to_add)
    return opener
 def fit_image(width, height, pwidth, pheight):
    '''
    Fit image in box of width pwidth and height pheight.
@@ -889,21 +889,6 @@ OptionRecommendation(name='search_replace',
                        continue
                setattr(mi, x, val)
    def download_cover(self, url):
        from ebook_converter import browser
        from PIL import Image
        import io
        from ebook_converter.ptempfile import PersistentTemporaryFile
        self.log('Downloading cover from %r'%url)
        br = browser()
        raw = br.open_novisit(url).read()
        buf = io.BytesIO(raw)
        pt = PersistentTemporaryFile('.jpg')
        pt.close()
        img = Image.open(buf)
        img.convert('RGB').save(pt.name)
        return pt.name
    def read_user_metadata(self):
        '''
        Read all metadata specified by the user. Command line options override
@@ -921,7 +906,8 @@ OptionRecommendation(name='search_replace',
        self.opts_to_mi(mi)
        if mi.cover:
            if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
-                mi.cover = self.download_cover(mi.cover)
+                self.log.warn("TODO: Cover image is on remote server, "
                              "implement downloading using requests")
            ext = mi.cover.rpartition('.')[-1].lower().strip()
            if ext not in ('png', 'jpg', 'jpeg', 'gif'):
                ext = 'jpg'