diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py
index 3475c4d..59c28ab 100644
--- a/rtv/mime_parsers.py
+++ b/rtv/mime_parsers.py
@@ -130,64 +130,40 @@ class RedditUploadsMIMEParser(BaseMIMEParser):
class ImgurMIMEParser(BaseMIMEParser):
"""
- The majority of imgur links don't point directly to the image, so we need
- to open the provided url and scrape the page for the link.
+ Imgur provides a json api exposing its entire infrastructure. Each imgur
+ page has an associated hash and can either contain an album, a gallery, or single image.
- Scrape the actual image url from an imgur landing page. Imgur intentionally
- obscures this on most reddit links in order to draw more traffic for their
- advertisements.
-
- There are a couple of tags that supply the relevant info:
-
-
-
+ see https://apidocs.imgur.com
"""
pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/[^.]+$')
@staticmethod
def get_mimetype(url):
- page = requests.get(url)
- soup = BeautifulSoup(page.content, 'html.parser')
- tag = soup.find('meta', attrs={'name': 'twitter:image'})
- if tag:
- url = tag.get('content')
- if GifvMIMEParser.pattern.match(url):
- return GifvMIMEParser.get_mimetype(url)
- return BaseMIMEParser.get_mimetype(url)
+ endpoint = 'https://api.imgur.com/3/{domain}/{page_hash}'
+ header = {'authorization': 'Client-ID {}'.format('d8842d573e8b9dd')}
+ pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/((?Pa|album|gallery)/)?(?P.+)$')
+ m = pattern.match(url)
+ page_hash = m.group('hash')
+ domain = 'album' if m.group('domain') in ['a', 'album'] else 'gallery'
-class ImgurAlbumMIMEParser(BaseMIMEParser):
- """
- Imgur albums can contain several images, which need to be scraped from the
- landing page. Assumes the following html structure:
+ r = requests.get(endpoint.format(domain=domain, page_hash=page_hash),
+ headers=header)
+ if r.status_code != 200:
+ r = requests.get(endpoint.format(domain='image',
+ page_hash=page_hash), headers=header)
+ if r.status_code != 200:
+ return url, None
-