Adding tests and some minor tweaks to structure.

This commit is contained in:
Michael Lazar
2018-10-01 00:45:09 -04:00
parent 67fb93b2ef
commit e4cced27eb
5 changed files with 158 additions and 49 deletions

View File

@@ -8,6 +8,7 @@ from datetime import datetime
from timeit import default_timer as timer
import six
from bs4 import BeautifulSoup
from kitchen.text.display import wrap
from . import exceptions
@@ -317,6 +318,22 @@ class Content(object):
out.extend(lines)
return out
@staticmethod
def extract_links(html):
"""
Extract a list of hyperlinks from an HTMl document.
"""
links = []
soup = BeautifulSoup(html, 'html.parser')
for link in soup.findAll('a'):
href = link.get('href')
if not href:
continue
if href.startswith('/'):
href = 'https://www.reddit.com' + href
links.append({'text': link.text, 'href': href})
return links
class SubmissionContent(Content):
"""