mod comment; pub use comment::*; mod database; pub use database::*; mod error; pub use error::*; use actix_cors::Cors; use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer}; use clap::Parser; use sanitize_html::{rules::predefined::DEFAULT, sanitize_str}; use scraper::{Html, Selector}; use serde::Deserialize; use std::fs::File; use std::sync::Mutex; use std::{collections::HashMap, sync::MutexGuard}; use validator::Validate; use webhook::client::WebhookClient; struct AppState { databases: HashMap>, } impl AppState { fn get_db<'a>(&'a self, origin: &str) -> Result, Error> { match self.databases.get(origin) { Some(database) => Ok(database.lock()?), None => return Err(Error::InvalidOrigin), } } } fn trim_protocol(url: &str) -> String { url.replace("http://", "").replace("https://", "") } fn get_request_origin(request: &HttpRequest) -> Result { match request.head().headers().get("Origin") { Some(origin) => match origin.to_str() { Ok(origin) => Ok(trim_protocol(origin)), Err(_) => Err(Error::InvalidOrigin), }, None => Err(Error::InvalidOrigin), } } #[derive(Default, Parser)] #[clap(author, version, about)] struct Arguments { #[clap(default_value = "soudan.yaml", help = "Set configuration file")] config: String, #[clap( short, long, default_value = "8080", help = "Set port where HTTP requests will be received" )] port: u16, #[clap( short, long, help = "Run in testing mode, with in-memory database(s) and permissive CORS policy" )] testing: bool, } async fn _get_comments( data: web::Data, request: HttpRequest, content_id: web::Path, ) -> Result, Error> { let origin = get_request_origin(&request)?; web::block(move || Ok(data.get_db(&origin)?.get_comments(&content_id)?)).await? } #[get("/{content_id}")] async fn get_comments( data: web::Data, request: HttpRequest, content_id: web::Path, ) -> HttpResponse { match _get_comments(data, request, content_id).await { Ok(comments) => HttpResponse::Ok().json(comments), Err(err) => err.to_http_response(), } } #[derive(Deserialize)] struct PostCommentsRequest { url: String, comment: Comment, } async fn _post_comment( data: web::Data, request: HttpRequest, bytes: web::Bytes, ) -> Result<(), Error> { let PostCommentsRequest { url, comment } = { let mut req = serde_json::from_str::(&String::from_utf8(bytes.to_vec())?)?; req.comment.text = sanitize_str(&DEFAULT, &req.comment.text)?.replace(">", ">"); // required for markdown quotes if let Some(ref mut author) = req.comment.author { *author = sanitize_str(&DEFAULT, &author)?; } req }; comment.validate()?; let origin = get_request_origin(&request)?; // Check to see if provided URL is in scope. // This is to prevent malicious requests that try to get server to fetch external websites. // (requires loop because "labels on blocks are unstable") // https://github.com/rust-lang/rust/issues/48594 'outer: loop { for site_root in data.databases.keys() { if site_root.eq(&origin) && trim_protocol(&url).starts_with(site_root) { break 'outer; } } return Err(Error::InvalidUrl); } let page_data = match get_page_data(&url).await? { Some(page_data) => { if page_data.content_id != comment.content_id { return Err(Error::InvalidContentId); } page_data } None => return Err(Error::InvalidUrl), // e.g. 404 }; // TODO: Use web::block // Create comment in database let database = data.get_db(&origin)?; if comment.author.is_none() && database.settings.name_required { return Err(Error::NameRequired); } if comment.email.is_none() && database.settings.email_required { return Err(Error::EmailRequired); } if let Some(parent) = comment.parent { 'outer2: loop { let comments = database.get_comments(&comment.content_id)?; for other_comment in comments.iter() { if other_comment.id.unwrap() == parent { if other_comment.parent.is_none() { break 'outer2; } break; } } return Err(Error::InvalidParent); } } database.create_comment(&comment)?; // Send notification webhook if let Some(webhook) = &database.settings.webhook { let client = WebhookClient::new(&webhook); client.send(|message| { let author = match &comment.author { Some(author) => &author, None => "Annonymous", }; message .username(&author) .avatar_url(&format!( "https://www.gravatar.com/avatar/{}?d=mp", get_gravatar(&comment.email) )) .embed(|embed| embed .title(&format!("New comment on {}", page_data.content_id)) .description(&comment.text) .field("Link", &format!("{}#{}", &url // Remove any trailing hash from URL .split("#") .next() .unwrap(), &database // Get ID of just created comment .get_comments(&page_data.content_id) .unwrap() .first() // returned in reverse chronological order .unwrap() .id .unwrap() ), true) .field("Content ID", &page_data.content_id, true) .field("Email", match &comment.email { Some(email) => email, None => "None", }, false) ) } ).await.unwrap(); } Ok(()) } #[post("/")] async fn post_comment( data: web::Data, request: HttpRequest, bytes: web::Bytes, ) -> HttpResponse { match _post_comment(data, request, bytes).await { Ok(_) => HttpResponse::Ok().finish(), Err(err) => err.to_http_response(), } } // Contains all page details stored in meta tags. // Currently, only content_id, but this is wrapped in this struct // to make adding other meta tags, such as locked comments, in the future struct PageData { content_id: String, } async fn get_page_data(url: &str) -> Result, reqwest::Error> { let response = reqwest::get(url).await?; if !response.status().is_success() { return Ok(None); } let content = response.text_with_charset("utf-8").await?; let document = Html::parse_document(&content); let get_meta = |name: &str| -> Option { let selector = Selector::parse(&format!("meta[name=\"{}\"]", name)).unwrap(); match document.select(&selector).next() { Some(element) => match element.value().attr("content") { Some(value) => Some(value.to_owned()), None => return None, }, None => return None, } }; return Ok(Some(PageData { content_id: match get_meta("soudan-content-id") { Some(id) => id, None => return Ok(None), }, })); } #[actix_web::main] async fn main() -> std::io::Result<()> { let arguments = Arguments::parse(); let database_settings: HashMap = match serde_yaml::from_reader(File::open(arguments.config)?) { Ok(settings) => settings, Err(_) => { return Err(std::io::Error::new( std::io::ErrorKind::Other, "invalid config file", )) } }; let mut databases = HashMap::new(); for (site, settings) in database_settings.iter() { databases.insert( site.to_owned(), Mutex::new(Database::new(arguments.testing, site, settings.clone()).unwrap()), ); } let port = arguments.port; let state = web::Data::new(AppState { databases }); HttpServer::new(move || { App::new() .service(get_comments) .service(post_comment) .app_data(state.clone()) // Issue with CORS on POST requests, // keeping permissive for now .wrap( Cors::permissive(), /* if arguments.testing { Cors::permissive() } else { let mut cors = Cors::default() .allowed_methods(vec!["GET", "POST"]); for domain in arguments.sites.iter() { cors = cors.allowed_origin(domain); } cors } */ ) }) .bind(("127.0.0.1", port))? .run() .await }