Removed 'browser' related functions

2026-03-01 14:15:54 +01:00 · 2020-06-14 12:28:23 +02:00
parent be671ef2d8
commit ebb2e810eb
2 changed files with 2 additions and 141 deletions
--- a/ebook_converter/init.py
+++ b/ebook_converter/init.py
@@ -267,131 +267,6 @@ def extract(path, dir):
    extractor(path, dir)


-def get_proxies(debug=True):
-    proxies = urllib.request.getproxies()
-    for key, proxy in list(proxies.items()):
-        if not proxy or '..' in proxy or key == 'auto':
-            del proxies[key]
-            continue
-        if proxy.startswith(key+'://'):
-            proxy = proxy[len(key)+3:]
-        if key == 'https' and proxy.startswith('http://'):
-            proxy = proxy[7:]
-        if proxy.endswith('/'):
-            proxy = proxy[:-1]
-        if len(proxy) > 4:
-            proxies[key] = proxy
-        else:
-            prints('Removing invalid', key, 'proxy:', proxy)
-            del proxies[key]
-
-    if proxies and debug:
-        prints('Using proxies:', proxies)
-    return proxies
-
-
-def get_parsed_proxy(typ='http', debug=True):
-    proxies = get_proxies(debug)
-    proxy = proxies.get(typ, None)
-    if proxy:
-        pattern = re.compile((
-            '(?:ptype://)?'
-            '(?:(?P<user>\\w+):(?P<pass>.*)@)?'
-            '(?P<host>[\\w\\-\\.]+)'
-            '(?::(?P<port>\\d+))?').replace('ptype', typ)
-        )
-
-        match = pattern.match(proxies[typ])
-        if match:
-            try:
-                ans = {
-                        'host' : match.group('host'),
-                        'port' : match.group('port'),
-                        'user' : match.group('user'),
-                        'pass' : match.group('pass')
-                    }
-                if ans['port']:
-                    ans['port'] = int(ans['port'])
-            except:
-                if debug:
-                    import traceback
-                    traceback.print_exc()
-            else:
-                if debug:
-                    prints('Using http proxy', str(ans))
-                return ans
-
-
-def get_proxy_info(proxy_scheme, proxy_string):
-    '''
-    Parse all proxy information from a proxy string (as returned by
-    get_proxies). The returned dict will have members set to None when the info
-    is not available in the string. If an exception occurs parsing the string
-    this method returns None.
-    '''
-    try:
-        proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
-        urlinfo = urllib.parse.urlparse(proxy_url)
-        ans = {
-            'scheme': urlinfo.scheme,
-            'hostname': urlinfo.hostname,
-            'port': urlinfo.port,
-            'username': urlinfo.username,
-            'password': urlinfo.password,
-        }
-    except Exception:
-        return None
-    return ans
-
-
-# IE 11 on windows 7
-USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'
-USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
-
-
-def is_mobile_ua(ua):
-    return 'Mobile/' in ua or 'Mobile ' in ua
-
-
-def random_user_agent(choose=None, allow_ie=True):
-    from ebook_converter.utils.random_ua import common_user_agents
-    ua_list = common_user_agents()
-    ua_list = [x for x in ua_list if not is_mobile_ua(x)]
-    if not allow_ie:
-        ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
-    return random.choice(ua_list) if choose is None else ua_list[choose]
-
-
-def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, verify_ssl_certificates=True, handle_refresh=True):
-    '''
-    Create a mechanize browser for web scraping. The browser handles cookies,
-    refresh requests and ignores robots.txt. Also uses proxy if available.
-
-    :param honor_time: If True honors pause time in refresh requests
-    :param max_time: Maximum time in seconds to wait during a refresh request
-    :param verify_ssl_certificates: If false SSL certificates errors are ignored
-    '''
-    from ebook_converter.utils.browser import Browser
-    opener = Browser(verify_ssl=verify_ssl_certificates)
-    opener.set_handle_refresh(handle_refresh, max_time=max_time, honor_time=honor_time)
-    opener.set_handle_robots(False)
-    if user_agent is None:
-        user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
-    opener.addheaders = [('User-agent', user_agent)]
-    proxies = get_proxies()
-    to_add = {}
-    http_proxy = proxies.get('http', None)
-    if http_proxy:
-        to_add['http'] = http_proxy
-    https_proxy = proxies.get('https', None)
-    if https_proxy:
-        to_add['https'] = https_proxy
-    if to_add:
-        opener.set_proxies(to_add)
-
-    return opener
-
-
 def fit_image(width, height, pwidth, pheight):
    '''
    Fit image in box of width pwidth and height pheight.
--- a/ebook_converter/ebooks/conversion/plumber.py
+++ b/ebook_converter/ebooks/conversion/plumber.py
@@ -889,21 +889,6 @@ OptionRecommendation(name='search_replace',
                        continue
                setattr(mi, x, val)

-    def download_cover(self, url):
-        from ebook_converter import browser
-        from PIL import Image
-        import io
-        from ebook_converter.ptempfile import PersistentTemporaryFile
-        self.log('Downloading cover from %r'%url)
-        br = browser()
-        raw = br.open_novisit(url).read()
-        buf = io.BytesIO(raw)
-        pt = PersistentTemporaryFile('.jpg')
-        pt.close()
-        img = Image.open(buf)
-        img.convert('RGB').save(pt.name)
-        return pt.name
-
    def read_user_metadata(self):
        '''
        Read all metadata specified by the user. Command line options override
@@ -921,7 +906,8 @@ OptionRecommendation(name='search_replace',
        self.opts_to_mi(mi)
        if mi.cover:
            if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
-                mi.cover = self.download_cover(mi.cover)
+                self.log.warn("TODO: Cover image is on remote server, "
+                              "implement downloading using requests")
            ext = mi.cover.rpartition('.')[-1].lower().strip()
            if ext not in ('png', 'jpg', 'jpeg', 'gif'):
                ext = 'jpg'