Commit 742bdd07 authored by Michael Rose's avatar Michael Rose

hpff: add crawler for story content

parent d95230fd
require 'pp'
class IndexHpffStoryContent
include Sidekiq::Worker
sidekiq_options :backtrace => true
def perform(story_id)
story = HpffStory.where('id = ?', story_id).first
unless story.nil?
prev_url = "https://www.harrypotterfanfiction.com/viewstory.php?psid=#{story.hpff_id}"
url = "https://www.harrypotterfanfiction.com/printerfriendly.php?mode=story&object=#{story.hpff_id}"
logger.info "HPFF: Pulling story #{url}..."
response = Faraday.get(url) do |req|
req.headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
# req.headers['Accept-Encoding'] = 'gzip, deflate, br'
req.headers['Accept-Language'] = 'en-US,en;q=0.9'
req.headers['Referer'] = prev_url
req.headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
end
story.content = response.body
story.save!
end
if story_id+2 <= 83478
#IndexHpffStoryContent.perform_async(story_id+2)
end
end
end
class AddContentToHpffStory < ActiveRecord::Migration[5.1]
def change
add_column :hpff_stories, :content, :text
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment