Fixing edge cases in comment order when unfolding more comments
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import six
|
import six
|
||||||
@@ -11,6 +12,8 @@ from kitchen.text.display import wrap
|
|||||||
|
|
||||||
from . import exceptions
|
from . import exceptions
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Content(object):
|
class Content(object):
|
||||||
|
|
||||||
@@ -50,46 +53,63 @@ class Content(object):
|
|||||||
"""
|
"""
|
||||||
Flatten a PRAW comment tree while preserving the nested level of each
|
Flatten a PRAW comment tree while preserving the nested level of each
|
||||||
comment via the `nested_level` attribute.
|
comment via the `nested_level` attribute.
|
||||||
|
|
||||||
|
There are a couple of different ways that the input comment list can be
|
||||||
|
organized depending on its source:
|
||||||
|
|
||||||
|
1. Comments that are returned from the get_submission() api call.
|
||||||
|
In this case, the comments list will contain only top level
|
||||||
|
comments and replies will be attached to those comments via
|
||||||
|
the `comment.replies` property.
|
||||||
|
|
||||||
|
2. Comments that are returned from the comments() method on a
|
||||||
|
MoreComments object. In this case, the api returns all of the
|
||||||
|
comments and replies as a flat list. We need to sort out which
|
||||||
|
ones are replies to other comments by looking at the parent_id
|
||||||
|
parameter and checking if the id matches another comment.
|
||||||
|
|
||||||
|
In addition, there is a bug in praw where a MoreComments object that is
|
||||||
|
also a reply will be added below the comment as a sibling instead of
|
||||||
|
a child. So it is especially important that this method is robust and
|
||||||
|
double-checks all of the parent_id's of the comments.
|
||||||
|
|
||||||
|
Reference:
|
||||||
|
https://github.com/praw-dev/praw/issues/391
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
stack = comments[:]
|
stack = comments[:]
|
||||||
for item in stack:
|
for item in stack:
|
||||||
item.nested_level = root_level
|
item.nested_level = root_level
|
||||||
|
|
||||||
retval = []
|
retval, parent_candidates = [], {}
|
||||||
while stack:
|
while stack:
|
||||||
item = stack.pop(0)
|
item = stack.pop(0)
|
||||||
|
|
||||||
# MoreComments item count should never be zero, but if it is then
|
# The MoreComments item count should never be zero, discard it if
|
||||||
# discard the MoreComment object. Need to look into this further.
|
# it is. Need to look into this further.
|
||||||
if isinstance(item, praw.objects.MoreComments) and item.count == 0:
|
if isinstance(item, praw.objects.MoreComments) and item.count == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# https://github.com/praw-dev/praw/issues/391
|
|
||||||
# Attach children replies to parents. Children will have the
|
|
||||||
# same parent_id, but with a suffix attached.
|
|
||||||
# E.g.
|
|
||||||
# parent_comment.id = c0tprcm
|
|
||||||
# comment.parent_id = t1_c0tprcm
|
|
||||||
if item.parent_id:
|
if item.parent_id:
|
||||||
level = None
|
# Search the list of previous comments for a possible parent
|
||||||
# Search through previous comments for a possible parent
|
# The match is based off of the parent_id parameter E.g.
|
||||||
for parent in retval[::-1]:
|
# parent.id = c0tprcm
|
||||||
if level and parent.nested_level >= level:
|
# child.parent_id = t1_c0tprcm
|
||||||
# Stop if we reach a sibling or a child, we know that
|
parent = parent_candidates.get(item.parent_id[3:])
|
||||||
# nothing before this point is a candidate for parent.
|
if parent:
|
||||||
break
|
item.nested_level = parent.nested_level + 1
|
||||||
level = parent.nested_level
|
|
||||||
if item.parent_id.endswith(parent.id):
|
|
||||||
item.nested_level = parent.nested_level + 1
|
|
||||||
|
|
||||||
# Otherwise, grab all of the attached replies and add them back to
|
# Add all of the attached replies to the front of the stack to be
|
||||||
# the list of comments to parse
|
# parsed separately
|
||||||
if hasattr(item, 'replies'):
|
if hasattr(item, 'replies'):
|
||||||
for n in item.replies:
|
for n in item.replies:
|
||||||
n.nested_level = item.nested_level + 1
|
n.nested_level = item.nested_level + 1
|
||||||
stack[0:0] = item.replies
|
stack[0:0] = item.replies
|
||||||
|
|
||||||
|
# The comment is now a potential parent for the rest of the items
|
||||||
|
parent_candidates[item.id] = item
|
||||||
|
|
||||||
retval.append(item)
|
retval.append(item)
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|||||||
1593
tests/cassettes/test_content_flatten_comments_2.yaml
Normal file
1593
tests/cassettes/test_content_flatten_comments_2.yaml
Normal file
File diff suppressed because one or more lines are too long
@@ -154,6 +154,65 @@ def test_content_flatten_comments(reddit):
|
|||||||
assert comment.nested_level > 2
|
assert comment.nested_level > 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_content_flatten_comments_2(reddit):
|
||||||
|
|
||||||
|
# Grab a large MoreComments instance to test
|
||||||
|
url = 'https://www.reddit.com/r/CollegeBasketball/comments/31owr1'
|
||||||
|
submission = reddit.get_submission(url, comment_sort='top')
|
||||||
|
more_comment = submission.comments[-1]
|
||||||
|
assert isinstance(more_comment, praw.objects.MoreComments)
|
||||||
|
|
||||||
|
# Make sure that all comments are displayed one level below their parents
|
||||||
|
comments = more_comment.comments()
|
||||||
|
flattened = Content.flatten_comments(comments)
|
||||||
|
for i, item in enumerate(flattened):
|
||||||
|
for j in range(i-1, -1, -1):
|
||||||
|
prev = flattened[j]
|
||||||
|
if item.parent_id and item.parent_id.endswith(prev.id):
|
||||||
|
x, y = item.nested_level, prev.nested_level
|
||||||
|
assert item.nested_level == prev.nested_level + 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
assert item.nested_level == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_content_flatten_comments_3(reddit):
|
||||||
|
# Build the comment structure as described in issue
|
||||||
|
# https://github.com/michael-lazar/rtv/issues/327
|
||||||
|
|
||||||
|
class MockComment(object):
|
||||||
|
def __init__(self, comment_id, parent_id='t3_xxxxx'):
|
||||||
|
self.id = comment_id
|
||||||
|
self.parent_id = parent_id
|
||||||
|
self.replies = []
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s (%s)' % (self.id, self.parent_id)
|
||||||
|
|
||||||
|
# This is an example of something that might be returned by PRAW after
|
||||||
|
# clicking to expand a "More comments [6]" link.
|
||||||
|
comments = [
|
||||||
|
MockComment('axxxx'),
|
||||||
|
MockComment('a1xxx', parent_id='t1_axxxx'),
|
||||||
|
MockComment('a11xx', parent_id='t1_a1xxx'),
|
||||||
|
MockComment('a12xx', parent_id='t1_a1xxx'),
|
||||||
|
MockComment('a2xxx', parent_id='t1_axxxx'),
|
||||||
|
MockComment('a3xxx', parent_id='t1_axxxx'),
|
||||||
|
MockComment('bxxxx'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Make sure that all comments are displayed one level below their parents
|
||||||
|
flattened = Content.flatten_comments(comments)
|
||||||
|
for i, item in enumerate(flattened):
|
||||||
|
for j in range(i-1, -1, -1):
|
||||||
|
prev = flattened[j]
|
||||||
|
if item.parent_id and item.parent_id.endswith(prev.id):
|
||||||
|
x, y = item.nested_level, prev.nested_level
|
||||||
|
assert item.nested_level == prev.nested_level + 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
assert item.nested_level == 0
|
||||||
|
|
||||||
|
|
||||||
def test_content_submission_initialize(reddit, terminal):
|
def test_content_submission_initialize(reddit, terminal):
|
||||||
|
|
||||||
url = 'https://www.reddit.com/r/Python/comments/2xmo63/'
|
url = 'https://www.reddit.com/r/Python/comments/2xmo63/'
|
||||||
|
|||||||
Reference in New Issue
Block a user