Commit bbc7094c authored by Jed Simson's avatar Jed Simson

Add some documentation about searching

parent 59dc4dce
......@@ -26,6 +26,17 @@ class Search:
def generate(self, subreddit, low_timestamp, high_timestamp,
limit=100, sort='hot', use_sliding_window=False):
'''Search for submissions within a given range.
If ``limit`` is less than 1000 (i.e. we are searching for
< 1000 posts), a generic reddit search using the 'cloudsearch'
functionality is yielded. This search may omit posts as it is
not a fully complete search of all posts within the range.
If ``limit`` is greater than 1000, a sliding window search
(see _generate()) is used which will be guaranteed to fetch
``limit`` posts if that many posts exist.
'''
if sort not in self.sorts:
raise ValueError('Invalid sort parameter `{}`... Please use one of {}' \
......@@ -36,7 +47,8 @@ class Search:
# will get pretty slow as the rate limiting catches up with us.
# TODO: Inform the user and add option to override.
return self._generate(subreddit, low_timestamp, high_timestamp, limit=limit, sort=sort)
return self._generate(subreddit, low_timestamp, high_timestamp,
limit=limit, sort=sort)
# We assume that `low `and `high` timestamps are given
if low_timestamp is None or high_timestamp is None:
......@@ -55,8 +67,8 @@ class Search:
high_timestamp += self.OUT_OF_ORDER_OFFSET
# Create a reddit session to use
r = Reddit('Reddit Slider by /u/oracular_demon - v{}'.format(self.version))
backoff = self.BACKOFF_START
r = Reddit('Reddit Slider by /u/oracular_demon - v{}'
.format(self.version))
try:
t1 = low_timestamp
......@@ -69,11 +81,10 @@ class Search:
syntax='cloudsearch',
sort=sort))
except HTTPException as e:
time.sleep(backoff)
backoff *= 2
pass
except InvalidSubreddit as e:
raise ValueError('Invalid Subreddit provided... Subreddit probably does not exist.')
raise ValueError('Invalid Subreddit provided... ' +
'Subreddit probably does not exist.')
search_results = [s for s in results
if original_lowest_timestamp <= s.created and
......@@ -82,21 +93,31 @@ class Search:
# Give results as a generator
return (submission for submission in search_results)
def _generate(self, subreddit, low_timestamp, high_timestamp, limit=100, sort='hot'):
def _generate(self, subreddit, low_timestamp, high_timestamp,
limit=100, sort='hot'):
''' Yield all submissions between ``low_timestamp``
and high_timestamp``.
# Create a reddit session to use
r = Reddit('Reddit Slider by /u/oracular_demon - v{}'.format(self.version))
Uses the submissions_between helper method provided
by PRAW, which internally uses a sliding window based
method to find all posts with matching criteria within
the range given.
processed = 0
generator = submissions_between(r, subreddit, low_timestamp, high_timestamp)
submissions = []
Note that this method does not yield a list of submissions
sorted according to ``sort``, and this needs to be at a
higher layer once the generator has been exhausted.
Also, ``limit`` is not taken into regard and this method
will generate all submissions between the range and will
need to be truncated accordingly.
'''
# Create a reddit session to use
r = Reddit('Reddit Slider by /u/oracular_demon - v{}'
.format(self.version))
generator = submissions_between(r,
subreddit,
low_timestamp,
high_timestamp)
for submission in generator:
yield submission
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment