Fixing edge cases in comment order when unfolding more comments

2017-03-04 23:54:02 -08:00
parent aa2c6d5728
commit 08c9af03fa
3 changed files with 1693 additions and 21 deletions
--- a/rtv/content.py
+++ b/rtv/content.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 import re
 import logging
 from datetime import datetime
 import six
@@ -11,6 +12,8 @@ from kitchen.text.display import wrap
 from . import exceptions
 _logger = logging.getLogger(__name__)
 class Content(object):
@@ -50,46 +53,63 @@ class Content(object):
        """
        Flatten a PRAW comment tree while preserving the nested level of each
        comment via the `nested_level` attribute.
        There are a couple of different ways that the input comment list can be
        organized depending on its source:
            1. Comments that are returned from the get_submission() api call.
               In this case, the comments list will contain only top level
               comments and replies will be attached to those comments via
               the `comment.replies` property.
            2. Comments that are returned from the comments() method on a
               MoreComments object. In this case, the api returns all of the
               comments and replies as a flat list. We need to sort out which
               ones are replies to other comments by looking at the parent_id
               parameter and checking if the id matches another comment.
        In addition, there is a bug in praw where a MoreComments object that is
        also a reply will be added below the comment as a sibling instead of
        a child. So it is especially important that this method is robust and
        double-checks all of the parent_id's of the comments.
        Reference:
            https://github.com/praw-dev/praw/issues/391
        """
        stack = comments[:]
        for item in stack:
            item.nested_level = root_level
-        retval = []
+        retval, parent_candidates = [], {}
        while stack:
            item = stack.pop(0)
-            # MoreComments item count should never be zero, but if it is then
+            # The MoreComments item count should never be zero, discard it if
-            # discard the MoreComment object. Need to look into this further.
+            # it is. Need to look into this further.
            if isinstance(item, praw.objects.MoreComments) and item.count == 0:
                continue
            # https://github.com/praw-dev/praw/issues/391
            # Attach children replies to parents. Children will have the
            # same parent_id, but with a suffix attached.
            # E.g.
            #   parent_comment.id = c0tprcm
            #   comment.parent_id = t1_c0tprcm
            if item.parent_id:
-                level = None
+                # Search the list of previous comments for a possible parent
-                # Search through previous comments for a possible parent
+                # The match is based off of the parent_id parameter E.g.
-                for parent in retval[::-1]:
+                #   parent.id = c0tprcm
-                    if level and parent.nested_level >= level:
+                #   child.parent_id = t1_c0tprcm
-                        # Stop if we reach a sibling or a child, we know that
+                parent = parent_candidates.get(item.parent_id[3:])
-                        # nothing before this point is a candidate for parent.
+                if parent:
-                        break
+                    item.nested_level = parent.nested_level + 1
                    level = parent.nested_level
                    if item.parent_id.endswith(parent.id):
                        item.nested_level = parent.nested_level + 1
-            # Otherwise, grab all of the attached replies and add them back to
+            # Add all of the attached replies to the front of the stack to be
-            # the list of comments to parse
+            # parsed separately
            if hasattr(item, 'replies'):
                for n in item.replies:
                    n.nested_level = item.nested_level + 1
                stack[0:0] = item.replies
            # The comment is now a potential parent for the rest of the items
            parent_candidates[item.id] = item
            retval.append(item)
        return retval
--- a/tests/cassettes/test_content_flatten_comments_2.yaml
+++ b/tests/cassettes/test_content_flatten_comments_2.yaml
--- a/tests/test_content.py
+++ b/tests/test_content.py
@@ -154,6 +154,65 @@ def test_content_flatten_comments(reddit):
            assert comment.nested_level > 2
 def test_content_flatten_comments_2(reddit):
    # Grab a large MoreComments instance to test
    url = 'https://www.reddit.com/r/CollegeBasketball/comments/31owr1'
    submission = reddit.get_submission(url, comment_sort='top')
    more_comment = submission.comments[-1]
    assert isinstance(more_comment, praw.objects.MoreComments)
    # Make sure that all comments are displayed one level below their parents
    comments = more_comment.comments()
    flattened = Content.flatten_comments(comments)
    for i, item in enumerate(flattened):
        for j in range(i-1, -1, -1):
            prev = flattened[j]
            if item.parent_id and item.parent_id.endswith(prev.id):
                x, y = item.nested_level, prev.nested_level
                assert item.nested_level == prev.nested_level + 1
                break
        else:
            assert item.nested_level == 0
 def test_content_flatten_comments_3(reddit):
    # Build the comment structure as described in issue
    # https://github.com/michael-lazar/rtv/issues/327
    class MockComment(object):
        def __init__(self, comment_id, parent_id='t3_xxxxx'):
            self.id = comment_id
            self.parent_id = parent_id
            self.replies = []
        def __repr__(self):
            return '%s (%s)' % (self.id, self.parent_id)
    # This is an example of something that might be returned by PRAW after
    # clicking to expand a "More comments [6]" link.
    comments = [
        MockComment('axxxx'),
        MockComment('a1xxx', parent_id='t1_axxxx'),
        MockComment('a11xx', parent_id='t1_a1xxx'),
        MockComment('a12xx', parent_id='t1_a1xxx'),
        MockComment('a2xxx', parent_id='t1_axxxx'),
        MockComment('a3xxx', parent_id='t1_axxxx'),
        MockComment('bxxxx'),
    ]
    # Make sure that all comments are displayed one level below their parents
    flattened = Content.flatten_comments(comments)
    for i, item in enumerate(flattened):
        for j in range(i-1, -1, -1):
            prev = flattened[j]
            if item.parent_id and item.parent_id.endswith(prev.id):
                x, y = item.nested_level, prev.nested_level
                assert item.nested_level == prev.nested_level + 1
                break
        else:
            assert item.nested_level == 0
 def test_content_submission_initialize(reddit, terminal):
    url = 'https://www.reddit.com/r/Python/comments/2xmo63/'