Commit 612d1c17 authored by Jed Simson's avatar Jed Simson

Add Sliding Window search option

parent 5f2f5af2
......@@ -19,11 +19,18 @@ def subreddit(subreddit):
delta = request.args.get('delta', None)
sort = request.args.get('sort', 'hot')
limit = int(request.args.get('limit', 100))
sliding_window = request.args.get('use_sliding_window', False)
sliding_window = True if sliding_window == 'true' else False
t0 = time.time()
try:
submissions = slider.get_subreddit_posts(subreddit, dimension, delta, sort, limit)
submissions = slider.get_subreddit_posts(subreddit,
dimension,
delta,
sort,
limit,
sliding_window=sliding_window)
except ValueError as e:
t1 = time.time()
t = round(t1 - t0, 4)
......
from praw import Reddit
from praw.errors import HTTPException, InvalidSubreddit
from praw.helpers import submissions_between
from .util import top_sort, hot_sort, new_sort, controversial_sort
import time
......@@ -22,23 +25,24 @@ class Search:
# To be sure, let's set the workaround offset to 2 hours
self.OUT_OF_ORDER_OFFSET = 7200
self.sort_fns = {'hot': hot_sort,
'top': top_sort,
'new': new_sort,
'controversial': controversial_sort}
def generate(self, subreddit, low_timestamp, high_timestamp,
limit=100, sort='hot'):
limit=100, sort='hot', use_sliding_window=False):
if not sort in self.sorts:
raise ValueError('Invalid sort parameter `{}`... Please use one of {}'\
.format(sort, self.sorts))
if limit > 1000:
if limit > 1000 or use_sliding_window:
# If we're retrieving more than 1000 posts, it is likely things
# will get pretty slow as the rate limiting catches up with us.
# TODO: Inform the user and add option to override.
return self._generate(subreddit, low_timestamp, high_timestamp, limit=limit, sort=sort)
#raise ValueError('Limit is too high ({} > 1000)... query would take too long.\n' \
# 'There is an alternative query API that can be used through the ' \
# '/search/slow/ route.'
# .format(limit))
# We assume that `low `and `high` timestamps are given
if low_timestamp is None or high_timestamp is None:
......@@ -85,109 +89,30 @@ class Search:
# Give results as a generator
return (submission for submission in search_results)
def _generate(self, subreddit, low_timestamp, high_timestamp,
limit=100, sort='hot'):
if not sort in self.sorts:
raise ValueError('Invalid sort parameter `{}`... Please use one of {}'\
.format(sort, self.sorts))
# We assume that `low `and `high` timestamps are given
if low_timestamp is None or high_timestamp is None:
raise ValueError('Timestamp must be given...')
# Take into account the broken reddit/unix timestamps
low_timestamp += self.REDDIT_TIMESTAMP_OFFSET
high_timestamp += self.REDDIT_TIMESTAMP_OFFSET
original_lowest_timestamp = low_timestamp
original_highest_timestamp = high_timestamp
# Take into account the fact that reddit can miss submissions in the
# timestamp range by ~1 hour.
low_timestamp -= self.OUT_OF_ORDER_OFFSET
high_timestamp += self.OUT_OF_ORDER_OFFSET
def _generate(self, subreddit, low_timestamp, high_timestamp, limit=100, sort='hot'):
# Create a reddit session to use
r = Reddit('Reddit Slider by /u/oracular_demon - v{}'.format(self.version))
window_size = 60 * 60
search_limit = 100
min_search_results_in_window = 50
window_adjustment_ratio = 1.25
backoff = self.BACKOFF_START
processed_submissions = 0
prev_win_increased = False
prev_win_decreased = False
about_to_hit_limit = False
generator = submissions_between(r, subreddit, low_timestamp, high_timestamp)
submissions = []
count = 0
while high_timestamp >= low_timestamp:
while count <= limit:
try:
t1 = max(high_timestamp - window_size, low_timestamp)
t2 = high_timestamp
query = 'timestamp:{}..{}'.format(t1, t2)
results = list(r.search(query,
subreddit=subreddit,
limit=search_limit,
syntax='cloudsearch',
sort=sort))
except HTTPException as exc:
time.sleep(backoff)
backoff *= 2
continue
if len(results) >= search_limit:
# Decrease the window size as we've got to many results in our window
power = 2 if prev_win_decreased else 1
window_size = int(window_size / window_adjustment_ratio**power)
prev_win_decreased = True
# Since it is possible that there are more submissions
# in the current window, we have to re-do the request
# with reduced window (i.e. don't yield results yet)
continue
else:
prev_win_decreased = False
results = [s for s in results
if original_lowest_timestamp <= s.created and
s.created <= original_highest_timestamp]
# If we're going to go over the user-specified limit, only
# generate submissions until the limit is hit (i.e. not the
# entire search results - just the difference)
if (processed_submissions + len(results)):
# Number of submissions we can generate before hitting
# the limit
diff = limit - processed_submissions
# Set a flag
about_to_hit_limit = True
else:
diff = 0
for i, submission in enumerate(results):
# If we reach the user specified limit, don't generate
# any more results
if about_to_hit_limit and i == diff:
return
else:
yield submission
processed_submissions += len(results)
high_timestamp -= (window_size + 1)
if len(results) < min_search_results_in_window:
power = 2 if prev_win_increased else 1
window_size = int(window_size * window_adjustment_ratio**power)
prev_win_increased = True
else:
prev_win_increased = False
submission = next(generator)
count += 1
submissions.append(submission)
except StopIteration:
print('Submission generator has been exhausted: count = {}, limit = {}' \
.format(count, limit))
break
sorted_submissions = sorted(submissions, key=self.sort_fns[sort])
for submission in sorted_submissions:
yield submission
......
......@@ -24,7 +24,7 @@ class Slider:
def get_all_posts(self, filter):
pass
def get_subreddit_posts(self, name, dimension, delta, sort, limit):
def get_subreddit_posts(self, name, dimension, delta, sort, limit, sliding_window=False):
subreddit = name
# Can't have a dimension and a delta
......@@ -60,7 +60,8 @@ class Slider:
start,
end,
sort=sort,
limit=limit)
limit=limit,
use_sliding_window=sliding_window)
except ValueError as e:
raise e
......
from datetime import datetime, timedelta
from math import log
# Copying the functionally from the reddit
# sorting algorithms so we can 'fake' it
epoch = datetime(1970, 1, 1)
def epoch_seconds(date):
td = date - epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
def score(ups, downs):
return ups - downs
def hot(ups, downs, timestamp):
date = datetime.utcfromtimestamp(timestamp)
return _hot(ups, downs, date)
def _hot(ups, downs, date):
s = score(ups, downs)
order = log(max(abs(s), 1), 10)
sign = 1 if s > 0 else -1 if s < 0 else 0
seconds = epoch_seconds(date) - 1134028003
return round(sign * order + seconds / 45000, 7)
def controversy(ups, downs):
if downs <= 0 or ups <= 0:
return 0
magnitude = ups + downs
balance = float(downs) / ups if ups > downs else float(ups) / downs
return magnitude ** balance
# Top and Hot are multiplied by -1 as an easy way of reversing the lists
top_sort = lambda s: -1 * s.ups
hot_sort = lambda s: -1 * hot(s.ups, s.downs, s.created_utc)
new_sort = lambda s: -1 * s.created_utc
controversial_sort = lambda s: controversy(s.ups, s.downs)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
const InformationSection = React.createClass({
render: function() {
return (
<div>
<div id="information-section">
<br/>
<h1>Reddit Slider</h1>
<hr/>
......
......@@ -3,7 +3,10 @@ const OptionsPanel = React.createClass({
return {datePickerInfo: 'Using the Date Picker allows for more fine-grained range choosing'
+ ' instead of using fuzzy string matching.',
slidingWindowInfo: 'Using a Sliding Window searching method may provide more results'
+ ' as the default search method doesn\'t ensure that the limit is reached.'};
+ ' as the default search method doesn\'t ensure that the limit is reached.'
+ '\n\n<strong>NOTE</strong>: Although the Sliding Window search will likely provide more accurate'
+ ' and comprehensive results for a given query, this comes at a cost of the time it'
+ ' takes to perform the search (be prepared to be patient!).'};
},
componentDidMount: function() {
$('[data-toggle="popover"]').popover({
......@@ -25,6 +28,15 @@ const OptionsPanel = React.createClass({
this.props.onChange({checkbox: checkbox, checked: checked,
name: name, type: optionType});
},
handleSlidingWindowOptionChange: function(event) {
var checkbox = $(event.target);
var checked = checkbox.is(':checked');
var name = checkbox.attr('name');
var optionType = 'slidingWindow';
this.props.onChange({checkbox: checkbox, checked: checked,
name: name, type: optionType});
},
render: function() {
return (
<div style={{'padding-bottom': '1em'}}>
......@@ -49,13 +61,14 @@ const OptionsPanel = React.createClass({
</a>
</label>
<label className="checkbox-inline">
<input type="checkbox" id="optionUseDatePicker" value="" />
<input onChange={this.handleSlidingWindowOptionChange} type="checkbox" id="optionUseSlidingWindow" name="useSlidingWindow" />
Use Sliding Window Search
&nbsp;
<a href="#"
data-toggle="popover"
title="Sliding Window Search"
data-content={this.state.slidingWindowInfo}>
data-content={this.state.slidingWindowInfo}
data-html="true">
<i className="fa fa-question-circle-o" aria-hidden="true"></i></a>
</label>
</div>
......
......@@ -12,6 +12,11 @@ const ResultsPanel = React.createClass({
showResults: true
});
},
position: function() {
var me = $(ReactDOM.findDOMNode(this));
return me.offset();
},
render: function() {
let noSubmissions = this.state.submissions.length == 0;
......
......@@ -7,15 +7,18 @@ const SearchForm = React.createClass({
let queryParams = {subreddit: 'programming', range: 'today',
sort: 'hot', limit: '100'};
let query = this.serializeQuery(queryParams);
//let query = this.serializeQuery(queryParams);
let urlBase = '/api/search/';
return {params: queryParams, query: query, url: urlBase};
return {params: queryParams, query: '', url: urlBase,
useSlidingWindow: false};
},
serializeQuery: function(params) {
let query = params.subreddit + '/?range=' + params.range.split(' ').join('+') +
'&sort=' + params.sort + '&limit=' + params.limit;
query = this.state.useSlidingWindow ? query + '&use_sliding_window=true' : query;
return query;
},
fetchResults: function() {
......@@ -63,6 +66,14 @@ const SearchForm = React.createClass({
loading: false
});
// We have to do this after the results are loaded
// due to the way the component is rendered
var offset = resultsPanel.position();
$("html,body").animate({
scrollTop: offset.top
}, 500);
}.bind(this));
},
handleSubmit: function(event) {
......@@ -86,6 +97,23 @@ const SearchForm = React.createClass({
useDateRangePicker: option.checked
});
}
else if (option.type == 'slidingWindow') {
this.setState({
useSlidingWindow: option.checked
});
this.serializeQuery(this.state.params);
}
else {
console.log('Unexpected option changed...');
}
},
componentDidMount: function() {
var query = this.serializeQuery(this.state.params);
this.setState({
query: query
});
},
render: function() {
return (
......
......@@ -2,6 +2,8 @@
<head>
<title>Reddit Slider | Search</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<script src="{{ url_for('static', filename='bower_components/react/react.min.js') }}"></script>
<script src="{{ url_for('static', filename='bower_components/react/react-dom.min.js') }}"></script>
......@@ -17,6 +19,12 @@
<script src="{{ url_for('static', filename='bower_components/bootstrap-daterangepicker/daterangepicker.js') }}"></script>
<link rel="stylesheet" href="{{ url_for('static', filename='bower_components/bootstrap-daterangepicker/daterangepicker.css') }}">
<style type="text/css">
.popover {
white-space: pre-line;
}
</style>
</head>
<body>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment