Commit 8c0a40ce authored by Aloshi's avatar Aloshi

Refactored scraper code to support multiple requests, even multiple requests...

Refactored scraper code to support multiple requests, even multiple requests mixed between scrapers.
parent 40ca44e5
cmake_minimum_required(VERSION 2.6)
INCLUDE(CPack)
project(emulationstation)
#-------------------------------------------------------------------------------
......
......@@ -3,7 +3,6 @@
#include "pugiXML/pugixml.hpp"
#include "platform.h"
#include <boost/filesystem.hpp>
#include "scrapers/GamesDBScraper.h"
Settings* Settings::sInstance = NULL;
......@@ -50,8 +49,7 @@ void Settings::setDefaults()
mStringMap["TransitionStyle"] = "fade";
mStringMap["ThemeSet"] = "";
mStringMap["ScreenSaverBehavior"] = "dim";
mScraper = std::shared_ptr<Scraper>(new GamesDBScraper());
mStringMap["Scraper"] = "TheGamesDB";
}
template <typename K, typename V>
......@@ -83,9 +81,6 @@ void Settings::saveFile()
node.append_attribute("value").set_value(iter->second.c_str());
}
pugi::xml_node scraperNode = doc.append_child("scraper");
scraperNode.append_attribute("value").set_value(mScraper->getName());
doc.save_file(path.c_str());
}
......@@ -112,23 +107,6 @@ void Settings::loadFile()
setFloat(node.attribute("name").as_string(), node.attribute("value").as_float());
for(pugi::xml_node node = doc.child("string"); node; node = node.next_sibling("string"))
setString(node.attribute("name").as_string(), node.attribute("value").as_string());
if(doc.child("scraper"))
{
std::shared_ptr<Scraper> scr = createScraperByName(doc.child("scraper").attribute("value").as_string());
if(scr)
mScraper = scr;
}
}
std::shared_ptr<Scraper> Settings::getScraper()
{
return mScraper;
}
void Settings::setScraper(std::shared_ptr<Scraper> scraper)
{
mScraper = scraper;
}
//Print a warning message if the setting we're trying to get doesn't already exist in the map, then return the value in the map.
......
#ifndef _SETTINGS_H_
#define _SETTINGS_H_
#pragma once
#include <string>
#include <map>
#include "scrapers/Scraper.h"
//This is a singleton for storing settings.
class Settings
......@@ -25,9 +22,6 @@ public:
void setFloat(const std::string& name, float value);
void setString(const std::string& name, const std::string& value);
std::shared_ptr<Scraper> getScraper();
void setScraper(std::shared_ptr<Scraper> scraper);
private:
static Settings* sInstance;
......@@ -41,8 +35,5 @@ private:
std::map<std::string, float> mFloatMap;
std::map<std::string, std::string> mStringMap;
std::shared_ptr<Scraper> mScraper;
std::string mHomePathOverride;
};
#endif
......@@ -212,7 +212,7 @@ void ScraperSearchComponent::search(const ScraperSearchParams& params)
updateInfoPane();
mLastSearch = params;
mSearchHandle = Settings::getInstance()->getScraper()->getResultsAsync(params);
mSearchHandle = startScraperSearch(params);
}
void ScraperSearchComponent::stop()
......
......@@ -38,16 +38,13 @@ GuiMenu::GuiMenu(Window* window) : GuiComponent(window), mMenu(window, "MAIN MEN
auto s = new GuiSettings(mWindow, "SCRAPER");
// scrape from
auto scraper_list = std::make_shared< OptionListComponent< std::shared_ptr<Scraper> > >(mWindow, "SCRAPE FROM", false);
std::vector< std::shared_ptr<Scraper> > scrapers;
scrapers.push_back(std::make_shared<GamesDBScraper>());
scrapers.push_back(std::make_shared<TheArchiveScraper>());
auto scraper_list = std::make_shared< OptionListComponent< std::string > >(mWindow, "SCRAPE FROM", false);
std::vector<std::string> scrapers = getScraperList();
for(auto it = scrapers.begin(); it != scrapers.end(); it++)
scraper_list->add((*it)->getName(), *it, (*it)->getName() == Settings::getInstance()->getScraper()->getName());
scraper_list->add(*it, *it, *it == Settings::getInstance()->getString("Scraper"));
s->addWithLabel("SCRAPE FROM", scraper_list);
s->addSaveFunc([scraper_list] { Settings::getInstance()->setScraper(scraper_list->getSelected()); });
s->addSaveFunc([scraper_list] { Settings::getInstance()->setString("Scraper", scraper_list->getSelected()); });
// scrape ratings
auto scrape_ratings = std::make_shared<SwitchComponent>(mWindow);
......
#include "GuiSettings.h"
#include "../Window.h"
#include "../Settings.h"
#include "../views/ViewController.h"
......
#include "GamesDBScraper.h"
#include "../components/ScraperSearchComponent.h"
#include "../components/AsyncReqComponent.h"
#include "Scraper.h"
#include "../Log.h"
#include "../pugiXML/pugixml.hpp"
#include "../MetaData.h"
#include "../Settings.h"
#include <boost/assign.hpp>
const char* GamesDBScraper::getName() { return "TheGamesDB"; }
using namespace PlatformIds;
const std::map<PlatformId, const char*> gamesdb_platformid_map = boost::assign::map_list_of
(THREEDO, "3DO")
......@@ -61,14 +59,15 @@ const std::map<PlatformId, const char*> gamesdb_platformid_map = boost::assign::
(ZX_SPECTRUM, "Sinclair ZX Spectrum");
std::unique_ptr<ScraperSearchHandle> GamesDBScraper::getResultsAsync(const ScraperSearchParams& params)
void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr<ScraperRequest> >& requests,
std::vector<ScraperSearchResult>& results)
{
std::string path = "/api/GetGame.php?";
std::string path = "thegamesdb.net/api/GetGame.php?";
std::string cleanName = params.nameOverride;
if(cleanName.empty())
cleanName = params.game->getCleanName();
path += "name=" + HttpReq::urlEncode(cleanName);
if(params.system->getPlatformId() != PLATFORM_UNKNOWN)
......@@ -78,58 +77,33 @@ std::unique_ptr<ScraperSearchHandle> GamesDBScraper::getResultsAsync(const Scrap
{
path += "&platform=";
path += HttpReq::urlEncode(platformIt->second);
}else{
}
else{
LOG(LogWarning) << "TheGamesDB scraper warning - no support for platform " << getPlatformName(params.system->getPlatformId());
}
}
path = "thegamesdb.net" + path;
return std::unique_ptr<ScraperSearchHandle>(new GamesDBHandle(params, path));
}
GamesDBHandle::GamesDBHandle(const ScraperSearchParams& params, const std::string& url) :
mReq(std::unique_ptr<HttpReq>(new HttpReq(url)))
{
setStatus(ASYNC_IN_PROGRESS);
requests.push(std::unique_ptr<ScraperRequest>(new ScraperHttpRequest(results, path, &thegamesdb_process_httpreq)));
}
void GamesDBHandle::update()
void thegamesdb_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results)
{
if(mStatus == ASYNC_DONE)
return;
if(mReq->status() == HttpReq::REQ_IN_PROGRESS)
return;
if(mReq->status() != HttpReq::REQ_SUCCESS)
{
std::stringstream ss;
ss << "Network error - " << mReq->getErrorMsg();
setError(ss.str());
return;
}
// our HTTP request was successful
// try to build our result list
std::vector<ScraperSearchResult> results;
assert(req->status() == HttpReq::REQ_SUCCESS);
pugi::xml_document doc;
pugi::xml_parse_result parseResult = doc.load(mReq->getContent().c_str());
pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str());
if(!parseResult)
{
setError("Error parsing XML");
LOG(LogError) << "GamesDBRequest - Error parsing XML. \n\t" << parseResult.description() << "";
return;
}
pugi::xml_node data = doc.child("Data");
std::string baseImageUrl = data.child("baseImgUrl").text().get();
unsigned int resultNum = 0;
pugi::xml_node game = data.child("Game");
while(game && resultNum < MAX_SCRAPER_RESULTS)
while(game && results.size() < MAX_SCRAPER_RESULTS)
{
ScraperSearchResult result;
......@@ -166,12 +140,6 @@ void GamesDBHandle::update()
}
results.push_back(result);
resultNum++;
game = game.next_sibling("Game");
}
setStatus(ASYNC_DONE);
setResults(results);
return;
}
#pragma once
#include "Scraper.h"
#include "../HttpReq.h"
class GamesDBHandle : public ScraperSearchHandle
{
public:
GamesDBHandle(const ScraperSearchParams& params, const std::string& url);
void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr<ScraperRequest> >& requests,
std::vector<ScraperSearchResult>& results);
void update() override;
private:
std::unique_ptr<HttpReq> mReq;
ScraperSearchParams mParams;
};
class GamesDBScraper : public Scraper
{
public:
std::unique_ptr<ScraperSearchHandle> getResultsAsync(const ScraperSearchParams& params) override;
const char* getName();
};
void thegamesdb_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results);
......@@ -4,21 +4,90 @@
#include "../Settings.h"
#include <FreeImage.h>
#include <boost/filesystem.hpp>
#include <boost/regex.hpp>
#include <boost/assign.hpp>
#include "GamesDBScraper.h"
#include "TheArchiveScraper.h"
std::shared_ptr<Scraper> createScraperByName(const std::string& name)
const std::map<std::string, generate_scraper_requests_func> scraper_request_funcs = boost::assign::map_list_of
("TheGamesDB", &thegamesdb_generate_scraper_requests)
("TheArchive", &thearchive_generate_scraper_requests);
std::unique_ptr<ScraperSearchHandle> startScraperSearch(const ScraperSearchParams& params)
{
if(name == "TheGamesDB")
return std::shared_ptr<Scraper>(new GamesDBScraper());
else if(name == "TheArchive")
return std::shared_ptr<Scraper>(new TheArchiveScraper());
const std::string& name = Settings::getInstance()->getString("Scraper");
return nullptr;
std::unique_ptr<ScraperSearchHandle> handle(new ScraperSearchHandle());
scraper_request_funcs.at(name)(params, handle->mRequestQueue, handle->mResults);
return handle;
}
std::vector<std::string> getScraperList()
{
std::vector<std::string> list;
for(auto it = scraper_request_funcs.begin(); it != scraper_request_funcs.end(); it++)
{
list.push_back(it->first);
}
return list;
}
// ScraperSearchHandle
ScraperSearchHandle::ScraperSearchHandle()
{
setStatus(ASYNC_IN_PROGRESS);
}
void ScraperSearchHandle::update()
{
if(mStatus == ASYNC_DONE)
return;
while(!mRequestQueue.empty() && mRequestQueue.front()->update())
mRequestQueue.pop();
if(mRequestQueue.empty())
{
setStatus(ASYNC_DONE);
return;
}
}
// ScraperRequest
ScraperRequest::ScraperRequest(std::vector<ScraperSearchResult>& resultsWrite) : mResults(resultsWrite)
{
}
// ScraperHttpRequest
ScraperHttpRequest::ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url, scraper_process_httpreq processFunc)
: ScraperRequest(resultsWrite), mProcessFunc(processFunc)
{
mReq = std::unique_ptr<HttpReq>(new HttpReq(url));
}
bool ScraperHttpRequest::update()
{
if(mReq->status() == HttpReq::REQ_SUCCESS)
{
mProcessFunc(mReq, mResults);
return true;
}
if(mReq->status() == HttpReq::REQ_IN_PROGRESS)
return false;
// everything else is some sort of error
LOG(LogError) << "ScraperHttpRequest network error - " << mReq->getErrorMsg();
return true;
}
// metadata resolving stuff
std::unique_ptr<MDResolveHandle> resolveMetaDataAssets(const ScraperSearchResult& result, const ScraperSearchParams& search)
{
return std::unique_ptr<MDResolveHandle>(new MDResolveHandle(result, search));
......
......@@ -6,6 +6,7 @@
#include "../AsyncHandle.h"
#include <vector>
#include <functional>
#include <queue>
struct ScraperSearchParams
{
......@@ -24,29 +25,87 @@ struct ScraperSearchResult
std::string thumbnailUrl;
};
class ScraperSearchHandle : public AsyncHandle
// So let me explain why I've abstracted this so heavily.
// There are two ways I can think of that you'd want to write a scraper.
// 1. Do some HTTP request(s) -> process it -> return the results
// 2. Do some local filesystem queries (an offline scraper) -> return the results
// The first way needs to be asynchronous while it's waiting for the HTTP request to return.
// The second doesn't.
// It would be nice if we could write it like this:
// search = generate_http_request(searchparams);
// wait_until_done(search);
// ... process search ...
// return results;
// We could do this if we used threads. Right now ES doesn't because I'm pretty sure I'll fuck it up,
// and I'm not sure of the performance of threads on the Pi (single-core ARM).
// We could also do this if we used coroutines.
// I can't find a really good cross-platform coroutine library (x86/64/ARM Linux + Windows),
// and I don't want to spend more time chasing libraries than just writing it the long way once.
// So, I did it the "long" way.
// ScraperSearchHandle - one logical search, e.g. "search for mario"
// ScraperRequest - encapsulates some sort of asynchronous request that will ultimately return some results
// ScraperHttpRequest - implementation of ScraperRequest that waits on an HttpReq, then processes it with some processing function.
// a scraper search gathers results from (potentially multiple) ScraperRequests
class ScraperRequest
{
public:
virtual void update() = 0;
inline const std::vector<ScraperSearchResult>& getResults() const { assert(mStatus != ASYNC_IN_PROGRESS); return mResults; }
ScraperRequest(std::vector<ScraperSearchResult>& resultsWrite);
// returns "true" once we're done
virtual bool update() = 0;
protected:
inline void setResults(const std::vector<ScraperSearchResult>& results) { mResults = results; }
std::vector<ScraperSearchResult>& mResults;
};
typedef void (*scraper_process_httpreq)(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results);
// a single HTTP request that needs to be processed to get the results
class ScraperHttpRequest : ScraperRequest
{
public:
ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url, scraper_process_httpreq processFunc);
bool update() override;
private:
std::vector<ScraperSearchResult> mResults;
scraper_process_httpreq mProcessFunc;
std::unique_ptr<HttpReq> mReq;
};
class Scraper
// a request to get a list of results
class ScraperSearchHandle : public AsyncHandle
{
public:
//Get a list of potential results.
virtual std::unique_ptr<ScraperSearchHandle> getResultsAsync(const ScraperSearchParams& params) = 0;
ScraperSearchHandle();
void update();
inline const std::vector<ScraperSearchResult>& getResults() const { assert(mStatus != ASYNC_IN_PROGRESS); return mResults; }
virtual const char* getName() = 0;
protected:
friend std::unique_ptr<ScraperSearchHandle> startScraperSearch(const ScraperSearchParams& params);
std::queue< std::unique_ptr<ScraperRequest> > mRequestQueue;
std::vector<ScraperSearchResult> mResults;
};
std::shared_ptr<Scraper> createScraperByName(const std::string& name);
// will use the current scraper settings to pick the result source
std::unique_ptr<ScraperSearchHandle> startScraperSearch(const ScraperSearchParams& params);
// returns a list of valid scraper names
std::vector<std::string> getScraperList();
typedef void (*generate_scraper_requests_func)(const ScraperSearchParams& params, std::queue< std::unique_ptr<ScraperRequest> >& requests, std::vector<ScraperSearchResult>& results);
// -------------------------------------------------------------------------
// Meta data asset downloading stuff.
......
......@@ -4,64 +4,37 @@
#include "../Log.h"
#include "../pugiXML/pugixml.hpp"
const char* TheArchiveScraper::getName() { return "TheArchive"; }
std::unique_ptr<ScraperSearchHandle> TheArchiveScraper::getResultsAsync(const ScraperSearchParams& params)
void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr<ScraperRequest> >& requests,
std::vector<ScraperSearchResult>& results)
{
std::string path = "/2.0/Archive.search/xml/7TTRM4MNTIKR2NNAGASURHJOZJ3QXQC5/";
std::string path = "api.archive.vg/2.0/Archive.search/xml/7TTRM4MNTIKR2NNAGASURHJOZJ3QXQC5/";
std::string cleanName = params.nameOverride;
if(cleanName.empty())
cleanName = params.game->getCleanName();
path += HttpReq::urlEncode(cleanName);
//platform TODO, should use some params.system get method
path = "api.archive.vg" + path;
return std::unique_ptr<ScraperSearchHandle>(new TheArchiveHandle(params, path));
}
TheArchiveHandle::TheArchiveHandle(const ScraperSearchParams& params, const std::string& url) :
mReq(std::unique_ptr<HttpReq>(new HttpReq(url)))
{
setStatus(ASYNC_IN_PROGRESS);
requests.push(std::unique_ptr<ScraperRequest>(new ScraperHttpRequest(results, path, &thearchive_process_httpreq)));
}
void TheArchiveHandle::update()
void thearchive_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results)
{
if(mStatus == ASYNC_DONE)
return;
if(mReq->status() == HttpReq::REQ_IN_PROGRESS)
return;
if(mReq->status() != HttpReq::REQ_SUCCESS)
{
std::stringstream ss;
ss << "Network error: " << mReq->getErrorMsg();
setError(ss.str());
return;
}
// if we're here, our HTTP request finished successfully
// so, let's try building our result list
std::vector<ScraperSearchResult> results;
assert(req->status() == HttpReq::REQ_SUCCESS);
pugi::xml_document doc;
pugi::xml_parse_result parseResult = doc.load(mReq->getContent().c_str());
pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str());
if(!parseResult)
{
setError("Error parsing XML");
LOG(LogError) << "TheArchiveRequest - error parsing XML.\n\t" << parseResult.description();
return;
}
pugi::xml_node data = doc.child("OpenSearchDescription").child("games");
unsigned int resultNum = 0;
pugi::xml_node game = data.child("game");
while(game && resultNum < MAX_SCRAPER_RESULTS)
while(game && results.size() < MAX_SCRAPER_RESULTS)
{
ScraperSearchResult result;
......@@ -86,11 +59,6 @@ void TheArchiveHandle::update()
result.thumbnailUrl = thumbnail.text().get();
results.push_back(result);
resultNum++;
game = game.next_sibling("game");
}
setStatus(ASYNC_DONE);
setResults(results);
}
#pragma once
#include "Scraper.h"
#include "../HttpReq.h"
class TheArchiveHandle : public ScraperSearchHandle
{
public:
TheArchiveHandle(const ScraperSearchParams& params, const std::string& url);
void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr<ScraperRequest> >& requests,
std::vector<ScraperSearchResult>& results);
void update() override;
private:
std::unique_ptr<HttpReq> mReq;
ScraperSearchParams mParams;
};
class TheArchiveScraper : public Scraper
{
public:
std::unique_ptr<ScraperSearchHandle> getResultsAsync(const ScraperSearchParams& params) override;
const char* getName();
};
void thearchive_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment