Merge pull request #440 from michael-lazar/new_rate_limiting

New rate limiting
This commit is contained in:
Michael Lazar
2017-09-13 23:58:36 -04:00
committed by GitHub
12 changed files with 2743 additions and 12 deletions

View File

@@ -38,6 +38,7 @@ from .config import Config, copy_default_config, copy_default_mailcap
from .theme import Theme
from .oauth import OAuthHelper
from .terminal import Terminal
from .content import RequestHeaderRateLimiter
from .objects import curses_session, patch_webbrowser
from .subreddit_page import SubredditPage
from .exceptions import ConfigError
@@ -176,7 +177,13 @@ def main():
with term.loader('Initializing', catch_exception=False):
reddit = praw.Reddit(user_agent=user_agent,
decode_html_entities=False,
disable_update_check=True)
disable_update_check=True,
handler=RequestHeaderRateLimiter())
# Dial the request cache up from 30 seconds to 5 minutes
# I'm trying this out to make navigation back and forth
# between pages quicker, it may still need to be fine tuned.
reddit.config.api_request_delay = 300
# Authorize on launch if the refresh token is present
oauth = OAuthHelper(reddit, term, config)

View File

@@ -2,8 +2,10 @@
from __future__ import unicode_literals
import re
import time
import logging
from datetime import datetime
from timeit import default_timer as timer
import six
from kitchen.text.display import wrap
@@ -11,6 +13,8 @@ from kitchen.text.display import wrap
from . import exceptions
from .packages import praw
from .packages.praw.errors import InvalidSubreddit
from .packages.praw.helpers import normalize_url
from .packages.praw.handlers import DefaultHandler
_logger = logging.getLogger(__name__)
@@ -777,3 +781,169 @@ class SubscriptionContent(Content):
data['h_offset'] = 0
return data
class RequestHeaderRateLimiter(DefaultHandler):
"""Custom PRAW request handler for rate-limiting requests.
This is an alternative to PRAW 3's DefaultHandler that uses
Reddit's modern API guidelines to rate-limit requests based
on the X-Ratelimit-* headers returned from Reddit. Most of
these methods are copied from or derived from the DefaultHandler.
References:
https://github.com/reddit/reddit/wiki/API
https://github.com/praw-dev/prawcore/blob/master/prawcore/rate_limit.py
"""
def __init__(self):
# In PRAW's convention, these variables were bound to the
# class so the cache could be shared among all of the ``reddit``
# instances. In RTV's use-case there is only ever a single reddit
# instance so it made sense to clean up the globals and transfer them
# to method variables
self.cache = {}
self.timeouts = {}
# These are used for the header rate-limiting
self.used = None
self.remaining = None
self.seconds_to_reset = None
self.next_request_timestamp = None
super(RequestHeaderRateLimiter, self).__init__()
def _delay(self):
"""
Pause before making the next HTTP request.
"""
if self.next_request_timestamp is None:
return
sleep_seconds = self.next_request_timestamp - time.time()
if sleep_seconds <= 0:
return
time.sleep(sleep_seconds)
def _update(self, response_headers):
"""
Update the state of the rate limiter based on the response headers:
X-Ratelimit-Used: Approximate number of requests used this period
X-Ratelimit-Remaining: Approximate number of requests left to use
X-Ratelimit-Reset: Approximate number of seconds to end of period
PRAW 5's rate limiting logic is structured for making hundreds of
evenly-spaced API requests, which makes sense for running something
like a bot or crawler.
This handler's logic, on the other hand, is geared more towards
interactive usage. It allows for short, sporadic bursts of requests.
The assumption is that actual users browsing reddit shouldn't ever be
in danger of hitting the rate limit. If they do hit the limit, they
will be cutoff until the period resets.
"""
if 'x-ratelimit-remaining' not in response_headers:
# This could be because the API returned an error response, or it
# could be because we're using something like read-only credentials
# which Reddit doesn't appear to care about rate limiting.
return
self.used = float(response_headers['x-ratelimit-used'])
self.remaining = float(response_headers['x-ratelimit-remaining'])
self.seconds_to_reset = int(response_headers['x-ratelimit-reset'])
_logger.debug('Rate limit: %s used, %s remaining, %s reset',
self.used, self.remaining, self.seconds_to_reset)
if self.remaining <= 0:
self.next_request_timestamp = time.time() + self.seconds_to_reset
else:
self.next_request_timestamp = None
def _clear_timeouts(self, cache_timeout):
"""
Clear the cache of timed out results.
"""
for key in list(self.timeouts):
if timer() - self.timeouts[key] > cache_timeout:
del self.timeouts[key]
del self.cache[key]
def clear_cache(self):
"""Remove all items from the cache."""
self.cache = {}
self.timeouts = {}
def evict(self, urls):
"""Remove items from cache matching URLs.
Return the number of items removed.
"""
if isinstance(urls, six.text_type):
urls = [urls]
urls = set(normalize_url(url) for url in urls)
retval = 0
for key in list(self.cache):
if key[0] in urls:
retval += 1
del self.cache[key]
del self.timeouts[key]
return retval
def request(self, _cache_key, _cache_ignore, _cache_timeout, **kwargs):
"""
This is a wrapper function that handles the caching of the request.
See DefaultHandler.with_cache for reference.
"""
if _cache_key:
# Pop the request's session cookies from the cache key.
# These appear to be unreliable and change with every
# request. Also, with the introduction of OAuth I don't think
# that cookies are being used to store anything that
# differentiates API requests anyways
url, items = _cache_key
_cache_key = (url, (items[0], items[1], items[3], items[4]))
if kwargs['request'].method != 'GET':
# I added this check for RTV, I have no idea why PRAW would ever
# want to cache POST/PUT/DELETE requests
_cache_ignore = True
if _cache_ignore:
return self._request(**kwargs)
self._clear_timeouts(_cache_timeout)
if _cache_key in self.cache:
return self.cache[_cache_key]
result = self._request(**kwargs)
# The handlers don't call `raise_for_status` so we need to ignore
# status codes that will result in an exception that should not be
# cached.
if result.status_code not in (200, 302):
return result
self.timeouts[_cache_key] = timer()
self.cache[_cache_key] = result
return result
def _request(self, request, proxies, timeout, verify, **_):
"""
This is where we apply rate limiting and make the HTTP request.
"""
settings = self.http.merge_environment_settings(
request.url, proxies, False, verify, None)
self._delay()
response = self.http.send(
request, timeout=timeout, allow_redirects=False, **settings)
self._update(response.headers)
return response

View File

@@ -81,6 +81,11 @@ class Page(object):
ch = self.term.stdscr.getch()
self.controller.trigger(ch)
@PageController.register(Command('REFRESH'))
def reload_page(self):
self.reddit.handler.clear_cache()
self.refresh_content()
@PageController.register(Command('EXIT'))
def exit(self):
if self.term.prompt_y_or_n('Do you really want to quit? (y/n): '):
@@ -255,7 +260,7 @@ class Page(object):
# Give reddit time to process the request
time.sleep(2.0)
if self.term.loader.exception is None:
self.refresh_content()
self.reload_page()
@PageController.register(Command('EDIT'))
@logged_in
@@ -291,7 +296,7 @@ class Page(object):
time.sleep(2.0)
if self.term.loader.exception is None:
self.refresh_content()
self.reload_page()
else:
raise TemporaryFileError()

View File

@@ -66,7 +66,6 @@ class SubmissionPage(Page):
self.active = False
@SubmissionController.register(Command('REFRESH'))
def refresh_content(self, order=None, name=None):
"""
Re-download comments and reset the page index
@@ -188,7 +187,7 @@ class SubmissionPage(Page):
time.sleep(2.0)
if self.term.loader.exception is None:
self.refresh_content()
self.reload_page()
else:
raise TemporaryFileError()

View File

@@ -34,7 +34,6 @@ class SubredditPage(Page):
self.nav = Navigator(self.content.get)
self.toggled_subreddit = None
@SubredditController.register(Command('REFRESH'))
def refresh_content(self, order=None, name=None):
"""
Re-download all submissions and reset the page index
@@ -209,7 +208,7 @@ class SubredditPage(Page):
self.content = page.selected_subreddit
self.nav = Navigator(self.content.get)
else:
self.refresh_content()
self.reload_page()
@SubredditController.register(Command('SUBREDDIT_OPEN_SUBSCRIPTIONS'))
@logged_in

View File

@@ -25,7 +25,6 @@ class SubscriptionPage(Page):
self.content_type = content_type
self.selected_subreddit = None
@SubscriptionController.register(Command('REFRESH'))
def refresh_content(self, order=None, name=None):
"""
Re-download all subscriptions and reset the page index