Skip to content
Snippets Groups Projects
Commit f6654f4a authored by Julian Schmidhuber's avatar Julian Schmidhuber
Browse files

provider(duckduckgo): Add duckduckgo search results

Also made dependencies in `mastersearch-all` optional and enabled with
features.
parent e77c2162
Branches
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ members = [
"mastersearch-wikimedia",
"mastersearch-crates-io",
"mastersearch-github",
"mastersearch-duckduckgo",
"mastersearch-all",
"mastersearch-cli",
]
......@@ -10,19 +10,19 @@ Mastersearch combines the search functionality of some popular websites into one
- [StackExchange](https://stackexchange.com/) for [Stackoverflow](https://stackoverflow.com) and similar websites (id: `stackexchange`).
- [crates.io](https://crates.io) (id: `crates.io`).
- [github.com](https://github.com) (id: `github`)
- [DuckDuckGo](https://duckduckgo.com) (id: `duckduckgo`)
For more details, please see the README of the respective module.
## Planned Websites
- [gitlab.com](https://gitlab.com) (TODO: Needs authentication)
- Some popular "normal" search engine ([Brave Search](https://search.brave.com) or [DuckDuckGo](https://duckduckgo.com) or similar)
If you want to see other websites supported, please create a issue or consider working on it yourself and open a PR.
## Usage
Build `mastersearch-cli` (`cargo build --release -p mastersearch-cli`) and install the resulting binary from `target/release/mastersearch-cli` somewhere in your `$PATH`. You should also consider creating an alias.
Build `mastersearch-cli` (`cargo build --release -p mastersearch-cli`, this neads a pretty new Rust version) and install the resulting binary from `target/release/mastersearch-cli` somewhere in your `$PATH`. You should also consider creating an alias.
Now just type `mastersearch-cli <Your Search Goes Here>` to search. By default, [en.wikipedia.org](https://en.wikipedia.org) and [stackoverflow.com](https://stackoverflow.com) will be searched. You can change your searchers the settings. For more information, see the `Settings`-section.
......
......@@ -7,14 +7,16 @@ edition = "2021"
[dependencies]
ms = { package = "mastersearch", path = "../mastersearch" }
ms_stackexchange = { package = "mastersearch-stackexchange", path = "../mastersearch-stackexchange" }
ms_wikimedia = { package = "mastersearch-wikimedia", path = "../mastersearch-wikimedia" }
ms_crates_io = { package = "mastersearch-crates-io", path = "../mastersearch-crates-io" }
ms_github = { package = "mastersearch-github", path = "../mastersearch-github" }
ms_stackexchange = { package = "mastersearch-stackexchange", path = "../mastersearch-stackexchange", optional = true }
ms_wikimedia = { package = "mastersearch-wikimedia", path = "../mastersearch-wikimedia", optional = true }
ms_crates_io = { package = "mastersearch-crates-io", path = "../mastersearch-crates-io", optional = true }
ms_github = { package = "mastersearch-github", path = "../mastersearch-github", optional = true }
ms_duckduckgo = { package = "mastersearch-duckduckgo", path = "../mastersearch-duckduckgo", optional = true }
[features]
default = ["stackexchange", "wikimedia", "crates_io", "github"]
stackexchange = []
wikimedia = []
crates_io = []
github = []
default = ["stackexchange", "wikimedia", "crates_io", "github", "duckduckgo"]
stackexchange = ["dep:ms_stackexchange"]
wikimedia = ["dep:ms_wikimedia"]
crates_io = ["dep:ms_crates_io"]
github = ["dep:ms_github"]
duckduckgo = ["dep:ms_duckduckgo"]
......@@ -9,6 +9,8 @@ pub fn all_creators(config: MasterConfig) -> SearchCreatorResolver {
#[cfg(feature = "crates_io")]
all.insert(ms_crates_io::CratesIOCreator::from(config.clone()));
#[cfg(feature = "github")]
all.insert(ms_github::GithubCreator::from(config));
all.insert(ms_github::GithubCreator::from(config.clone()));
#[cfg(feature = "duckduckgo")]
all.insert(ms_duckduckgo::DuckDuckGoCreator::from(config));
all
}
[package]
name = "mastersearch-duckduckgo"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "^0.1"
reqwest = { version = "^0.11", features = ["gzip", "json"] }
ms = { package = "mastersearch", path = "../mastersearch" }
scraper = "^0.12"
urlencoding = "^2.1"
[dev-dependencies]
tokio = { version = "1.17", features = ["full"] }
env_logger = "0.9"
# Mastersearch-DuckDuckGo (ID: `duckduckgo`)
The mastersearch interface to DuckDuckGo search results.
## Configuration
This provider has no provider-specific configuration options.
use std::collections::HashMap;
use mastersearch_duckduckgo::DuckDuckGoCreator;
use ms::{SearchCreator, SearchQuery};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let client = reqwest::ClientBuilder::new()
.gzip(true)
.user_agent("Mastersearch/0.1.0 (mastersearch@schmiddi.anonaddy.com) reqwest/0.11")
.build()
.expect("Failed to build reqwest::Client");
let config = HashMap::new();
let provider = DuckDuckGoCreator { client }
.create(&config)
.expect("Failed to create DuckDuckGoProvider");
for result in provider.search(&SearchQuery("hello".to_string())).await? {
println!("Result: {} ({})", result.title, result.url);
if let Some(description) = result.description {
println!("\t{}", description)
}
}
Ok(())
}
mod scraper;
use std::collections::HashMap;
use async_trait::async_trait;
use ms::{
MasterConfig, ProviderCreationError, SearchCreator, SearchProvider, SearchProviderWrapper,
};
pub struct DuckDuckGoCreator {
pub client: reqwest::Client,
}
impl From<MasterConfig> for DuckDuckGoCreator {
fn from(c: MasterConfig) -> Self {
Self { client: c.client }
}
}
pub struct DuckDuckGoProvider {
client: reqwest::Client,
}
impl SearchCreator for DuckDuckGoCreator {
fn id(&self) -> &'static str {
"duckduckgo"
}
fn create(
&self,
config: &HashMap<String, String>,
) -> Result<Box<dyn SearchProvider>, ProviderCreationError> {
Ok(Box::new(SearchProviderWrapper::wrap(
DuckDuckGoProvider {
client: self.client.clone(),
},
config,
)))
}
}
#[async_trait(?Send)]
impl SearchProvider for DuckDuckGoProvider {
async fn search(
&self,
query: &ms::SearchQuery,
) -> Result<Vec<ms::SearchResult>, ms::SearchError> {
let text = self
.client
.get(format!("https://html.duckduckgo.com/html&q={}", query.0))
.send()
.await?
.text()
.await?;
Ok(scraper::extract(&text))
}
}
use ms::SearchResult;
use scraper::{Html, Selector};
pub fn extract(string: &str) -> Vec<SearchResult> {
let html = Html::parse_document(string);
let selector_web_result =
Selector::parse(".web-result").expect("Failed to parse `selector_web_result`");
let selector_title =
Selector::parse(".result__title").expect("Failed to parse `selector_title`");
let selector_url = Selector::parse(".result__a").expect("Failed to parse `selector_url`");
let selector_snippet =
Selector::parse(".result__snippet").expect("Failed to parse `selector_snippet`");
html.select(&selector_web_result)
.map(|web_result| SearchResult {
title: web_result
.select(&selector_title)
.map(|title| title.text().collect::<String>().trim().to_owned())
.next()
.unwrap_or_else(|| "".to_string()),
url: web_result
.select(&selector_url)
.map(|url| convert_url(url.value().attr("href").unwrap_or("")))
.next()
.unwrap_or_else(|| "".to_string()),
description: web_result
.select(&selector_snippet)
.map(|description| description.text().collect::<String>().trim().to_owned())
.next(),
})
.collect()
}
fn convert_url(url: &str) -> String {
let without_prefix = url.strip_prefix("//duckduckgo.com/l/?uddg=").unwrap_or(url);
let without_postfix = without_prefix
.rsplit_once("&rut=")
.map(|(p1, _p2)| p1)
.unwrap_or(without_prefix);
urlencoding::decode(without_postfix)
.map(|c| c.into_owned())
.unwrap_or(without_prefix.to_owned())
}
......@@ -22,6 +22,12 @@ impl Display for SearchError {
impl std::error::Error for SearchError {}
impl From<reqwest::Error> for SearchError {
fn from(e: reqwest::Error) -> Self {
Self::Request(e)
}
}
/// Failed to create the [SearchProvider][crate::SearchProvider].
#[derive(Debug)]
pub struct ProviderCreationError {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment