diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1cada4a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "rust-analyzer.linkedProjects": [ + "./tatoeba/Cargo.toml", + "./images/Cargo.toml", + "./utils/Cargo.toml" + ] +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 94073b2..fd33e89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,12 @@ version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + [[package]] name = "bytes" version = "1.4.0" @@ -412,6 +418,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.2", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.28", +] + [[package]] name = "deranged" version = "0.3.7" @@ -441,6 +470,27 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "encoding_rs" version = "0.8.32" @@ -517,6 +567,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.28" @@ -556,6 +616,15 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -566,6 +635,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -614,6 +692,20 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "http" version = "0.2.9" @@ -695,6 +787,20 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "images" +version = "0.1.0" +dependencies = [ + "actix-web", + "lazy_static", + "mime", + "rand", + "reqwest", + "scraper", + "serde_json", + "utils", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -793,6 +899,26 @@ version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "memchr" version = "2.5.0" @@ -844,6 +970,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "num_cpus" version = "1.16.0" @@ -948,6 +1080,86 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.28", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.11" @@ -972,6 +1184,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.66" @@ -1144,6 +1362,23 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c95a930e03325234c18c7071fd2b60118307e025d6fff3e12745ffbf63a3d29c" +dependencies = [ + "ahash 0.8.3", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "once_cell", + "selectors", + "smallvec", + "tendril", +] + [[package]] name = "security-framework" version = "2.9.2" @@ -1167,6 +1402,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +dependencies = [ + "bitflags 2.3.3", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.18" @@ -1202,6 +1456,15 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.5" @@ -1222,6 +1485,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "slab" version = "0.4.8" @@ -1247,6 +1516,38 @@ dependencies = [ "winapi", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + [[package]] name = "syn" version = "1.0.109" @@ -1277,6 +1578,7 @@ dependencies = [ "derive_more", "mime", "reqwest", + "utils", ] [[package]] @@ -1292,6 +1594,17 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "time" version = "0.3.25" @@ -1437,6 +1750,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + [[package]] name = "url" version = "2.4.0" @@ -1448,6 +1767,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utils" +version = "0.1.0" +dependencies = [ + "actix-web", + "derive_more", + "reqwest", +] + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 4d9c0bb..3e1e467 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,2 @@ -[package] -name = "tatoeba-api" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -actix-web = "4.3.1" -derive_more = "0.99.17" -mime = "0.3.17" -reqwest = "0.11.18" +[workspace] +members = ["tatoeba", "images", "utils"] diff --git a/images/Cargo.toml b/images/Cargo.toml new file mode 100644 index 0000000..15a5ab9 --- /dev/null +++ b/images/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "images" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +utils = { path = "../utils" } +actix-web = "4.3.1" +mime = "0.3.17" +reqwest = "0.11.18" +scraper = "0.17.1" +lazy_static = "1.4.0" +rand = "0.8.5" +serde_json = "1.0.104" diff --git a/images/src/main.rs b/images/src/main.rs new file mode 100644 index 0000000..91f26e4 --- /dev/null +++ b/images/src/main.rs @@ -0,0 +1,76 @@ +use utils::error::Result; + +use actix_web::{get, http::header, web, App, HttpResponse, HttpServer, Responder}; +use rand::seq::SliceRandom; + +pub const BING: &str = "https://www.bing.com/images/search"; + +async fn get_images(query: &str) -> reqwest::Result> { + use lazy_static::lazy_static; + use scraper::{Html, Selector}; + lazy_static! { + static ref MIMG: Selector = Selector::parse("img.mimg").unwrap(); + } + let target = format!("{BING}?q={query}"); + let document = Html::parse_document(&reqwest::get(&target).await?.text().await?); + Ok(document + .select(&MIMG) + .filter_map(|element| element.value().attr("src")) + .filter(|src| src.starts_with("http")) + .map(|href| href.to_owned()) + .collect()) +} + +async fn route(query: &str, handler: F) -> Result +where + F: Fn(&Vec) -> Option<&String>, +{ + let images = get_images(query).await?; + Ok(match handler(&images) { + Some(image) => HttpResponse::Found() + .append_header((header::LOCATION, image.as_str())) + .finish(), + None => HttpResponse::Ok().body("No results"), + }) +} + +#[get("/{query}/list")] +async fn route_query_list(path: web::Path) -> Result { + let query = path.into_inner(); + let images = get_images(&query).await?; + Ok(HttpResponse::Ok() + .append_header(header::ContentType(mime::APPLICATION_JSON)) + .body(serde_json::to_string(&images).unwrap_or_else(|_| "[]".to_string()))) +} + +#[get("/{query}")] +async fn route_query(path: web::Path) -> Result { + let query = path.into_inner(); + route(&query, |images| images.get(0)).await +} + +#[get("/{query}/random")] +async fn route_query_random(path: web::Path) -> Result { + let query = path.into_inner(); + route(&query, |images| images.choose(&mut rand::thread_rng())).await +} + +#[get("/{query}/{index}")] +async fn route_query_index(path: web::Path<(String, usize)>) -> Result { + let (query, index) = path.into_inner(); + route(&query, move |images| images.get(index % images.len())).await +} + +#[actix_web::main] +async fn main() -> std::io::Result<()> { + HttpServer::new(|| { + App::new() + .service(route_query_list) + .service(route_query) + .service(route_query_random) + .service(route_query_index) + }) + .bind(("127.0.0.1", 3002))? + .run() + .await +} diff --git a/tatoeba/Cargo.toml b/tatoeba/Cargo.toml new file mode 100644 index 0000000..731c104 --- /dev/null +++ b/tatoeba/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "tatoeba-api" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +utils = { path = "../utils", features = [ "nothuman" ] } +actix-web = "4.3.1" +derive_more = "0.99.17" +mime = "0.3.17" +reqwest = "0.11.18" diff --git a/src/main.rs b/tatoeba/src/main.rs similarity index 90% rename from src/main.rs rename to tatoeba/src/main.rs index 368871c..c39817a 100644 --- a/src/main.rs +++ b/tatoeba/src/main.rs @@ -1,8 +1,4 @@ -mod error; -pub use error::{Error, Result}; - -pub mod utils; -use utils::is_human; +use utils::{error::{Error, Result}, is_human}; use actix_web::{get, http::header, App, HttpRequest, HttpResponse, HttpServer, Responder}; diff --git a/utils/Cargo.toml b/utils/Cargo.toml new file mode 100644 index 0000000..1ffca7a --- /dev/null +++ b/utils/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "utils" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +actix-web = "4.3.1" +derive_more = "0.99.17" +reqwest = "0.11.18" + +[features] +nothuman = [] \ No newline at end of file diff --git a/src/error.rs b/utils/src/error.rs similarity index 69% rename from src/error.rs rename to utils/src/error.rs index bff6131..0f2502b 100644 --- a/src/error.rs +++ b/utils/src/error.rs @@ -1,4 +1,7 @@ -use actix_web::{HttpResponse, ResponseError}; +#[cfg(feature = "nothuman")] +use actix_web::HttpResponse; + +use actix_web::ResponseError; use derive_more::From; use std::fmt::{self, Display}; @@ -6,7 +9,8 @@ pub type Result = std::result::Result; #[derive(From, Debug)] pub enum Error { - TatoebaApi(reqwest::Error), + Reqwest(reqwest::Error), + #[cfg(feature = "nothuman")] NotHuman { target: String }, } @@ -21,13 +25,16 @@ impl ResponseError for Error { use reqwest::StatusCode; use Error::*; match self { - // 503 Service Unavailable - TatoebaApi(error) => error.status().unwrap_or(StatusCode::SERVICE_UNAVAILABLE), + // Theoretically could be 503 Service Unavailable in case remote is actually down, + // but will also throw for network errors, so a generic 500 Internal Server Error is more appropriate. + Reqwest(error) => error.status().unwrap_or(StatusCode::INTERNAL_SERVER_ERROR), // 403 Forbidden + #[cfg(feature = "nothuman")] NotHuman { .. } => StatusCode::FORBIDDEN, } } + #[cfg(feature = "nothuman")] fn error_response(&self) -> HttpResponse { HttpResponse::build(self.status_code()).body(match self { Self::NotHuman { target } => format!( @@ -40,3 +47,4 @@ impl ResponseError for Error { }) } } + diff --git a/src/utils.rs b/utils/src/is_human.rs similarity index 99% rename from src/utils.rs rename to utils/src/is_human.rs index 034c0ba..b9ba6eb 100644 --- a/src/utils.rs +++ b/utils/src/is_human.rs @@ -18,4 +18,4 @@ pub fn is_human(request: &HttpRequest) -> bool { .any(|&human| ua.contains(human)) }) .unwrap_or(false) -} +} \ No newline at end of file diff --git a/utils/src/lib.rs b/utils/src/lib.rs new file mode 100644 index 0000000..f5844d3 --- /dev/null +++ b/utils/src/lib.rs @@ -0,0 +1,6 @@ +pub mod error; + +#[cfg(feature = "nothuman")] +mod is_human; +#[cfg(feature = "nothuman")] +pub use is_human::is_human;