diff --git a/Cargo.lock b/Cargo.lock index 1333daf..2511b48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,7 +100,6 @@ dependencies = [ "rust-embed", "rustls", "rustyline", - "scraper", "serde", "serde_json", "signal-hook", @@ -298,12 +297,6 @@ version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.10.1" @@ -519,46 +512,12 @@ dependencies = [ "typenum", ] -[[package]] -name = "cssparser" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" -dependencies = [ - "cssparser-macros", - "dtoa-short", - "itoa", - "phf", - "smallvec", -] - -[[package]] -name = "cssparser-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" -dependencies = [ - "quote", - "syn", -] - [[package]] name = "data-encoding" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" -[[package]] -name = "derive_more" -version = "0.99.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "digest" version = "0.10.7" @@ -589,33 +548,12 @@ dependencies = [ "const-random", ] -[[package]] -name = "dtoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" - -[[package]] -name = "dtoa-short" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" -dependencies = [ - "dtoa", -] - [[package]] name = "dunce" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" -[[package]] -name = "ego-tree" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" - [[package]] name = "either" version = "1.15.0" @@ -698,16 +636,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "futf" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" -dependencies = [ - "mac", - "new_debug_unreachable", -] - [[package]] name = "futures-channel" version = "0.3.31" @@ -770,15 +698,6 @@ dependencies = [ "slab", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -855,18 +774,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "html5ever" -version = "0.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" -dependencies = [ - "log", - "mac", - "markup5ever", - "match_token", -] - [[package]] name = "http" version = "1.3.1" @@ -1240,37 +1147,6 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" -[[package]] -name = "mac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" - -[[package]] -name = "markup5ever" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" -dependencies = [ - "log", - "phf", - "phf_codegen", - "string_cache", - "string_cache_codegen", - "tendril", -] - -[[package]] -name = "match_token" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "matchers" version = "0.1.0" @@ -1334,12 +1210,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "new_debug_unreachable" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" - [[package]] name = "nibble_vec" version = "0.1.0" @@ -1470,58 +1340,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_macros", - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - -[[package]] -name = "phf_macros" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" -dependencies = [ - "phf_generator", - "phf_shared", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1543,12 +1361,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - [[package]] name = "prettyplease" version = "0.2.32" @@ -1596,7 +1408,7 @@ checksum = "bcbafbbdbb0f638fe3f35f3c56739f77a8a1d070cb25603226c83339b391472b" dependencies = [ "bytes", "getrandom 0.3.2", - "rand 0.9.1", + "rand", "ring", "rustc-hash 2.1.1", "rustls", @@ -1647,15 +1459,6 @@ dependencies = [ "nibble_vec", ] -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "rand_core 0.6.4", -] - [[package]] name = "rand" version = "0.9.1" @@ -1663,7 +1466,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ "rand_chacha", - "rand_core 0.9.3", + "rand_core", ] [[package]] @@ -1673,15 +1476,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core", ] -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" - [[package]] name = "rand_core" version = "0.9.3" @@ -2039,20 +1836,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "scraper" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" -dependencies = [ - "cssparser", - "ego-tree", - "html5ever", - "precomputed-hash", - "selectors", - "tendril", -] - [[package]] name = "security-framework" version = "3.2.0" @@ -2076,25 +1859,6 @@ dependencies = [ "libc", ] -[[package]] -name = "selectors" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" -dependencies = [ - "bitflags 2.9.0", - "cssparser", - "derive_more", - "fxhash", - "log", - "new_debug_unreachable", - "phf", - "phf_codegen", - "precomputed-hash", - "servo_arc", - "smallvec", -] - [[package]] name = "serde" version = "1.0.219" @@ -2148,15 +1912,6 @@ dependencies = [ "serde", ] -[[package]] -name = "servo_arc" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" -dependencies = [ - "stable_deref_trait", -] - [[package]] name = "sha1" version = "0.10.6" @@ -2213,12 +1968,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "slab" version = "0.4.9" @@ -2250,31 +1999,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "string_cache" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" -dependencies = [ - "new_debug_unreachable", - "parking_lot", - "phf_shared", - "precomputed-hash", - "serde", -] - -[[package]] -name = "string_cache_codegen" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" -dependencies = [ - "phf_generator", - "phf_shared", - "proc-macro2", - "quote", -] - [[package]] name = "strsim" version = "0.11.1" @@ -2331,17 +2055,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "tendril" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" -dependencies = [ - "futf", - "mac", - "utf-8", -] - [[package]] name = "termion" version = "3.0.0" @@ -2659,7 +2372,7 @@ dependencies = [ "http", "httparse", "log", - "rand 0.9.1", + "rand", "rustls", "rustls-pki-types", "sha1", diff --git a/Cargo.toml b/Cargo.toml index bb36ae6..5f7ee87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,6 @@ rustyline = "13.0.0" config = { version = "0.15.11", default-features = false, features = ["toml", "ini"] } which = "6.0.3" tempfile = "3.9.0" -scraper = { version = "0.23.1", default-features = false } avt = "0.15.1" axum = { version = "0.8.4", default-features = false, features = ["http1", "ws"] } tokio = { version = "1.44.2", features = ["full"] } diff --git a/src/html.rs b/src/html.rs new file mode 100644 index 0000000..c054fed --- /dev/null +++ b/src/html.rs @@ -0,0 +1,71 @@ +pub fn extract_asciicast_link(html: &str) -> Option { + let html_lc = html.to_ascii_lowercase(); + let head_start = html_lc.find("")? + head_start; + let head = &html[head_start..head_end]; + let head_lc = head.to_ascii_lowercase(); + let mut head_offset = 0; + + while let Some(link_pos) = head_lc[head_offset..].find("')? + link_start + 1; + let link = &head[link_start..link_end]; + head_offset = link_end; + + if let Some(rel) = attr(link, "rel") { + if rel + .split_whitespace() + .any(|t| t.eq_ignore_ascii_case("alternate")) + { + if let Some(t) = attr(link, "type") { + if t.eq_ignore_ascii_case("application/x-asciicast") + || t.eq_ignore_ascii_case("application/asciicast+json") + { + if let Some(href) = attr(link, "href") { + return Some(href.to_string()); + } + } + } + } + } + } + + None +} + +fn attr<'a>(tag: &'a str, name: &str) -> Option<&'a str> { + let tag_lc = tag.to_ascii_lowercase(); + let prefix = format!("{}=", name.to_ascii_lowercase()); + let mut i = tag_lc.find(&prefix)? + prefix.len(); + let bytes = tag.as_bytes(); + + while i < bytes.len() && bytes[i].is_ascii_whitespace() { + i += 1; + } + + if i >= bytes.len() { + return None; + } + + let quote = bytes[i]; + + if quote == b'\'' || quote == b'"' { + let start = i + 1; + let end = tag[start..].find(quote as char)? + start; + + Some(&tag[start..end]) + } else { + let start = i; + let mut end = i; + + while end < bytes.len() + && !bytes[end].is_ascii_whitespace() + && bytes[end] != b'>' + && bytes[end] != b'/' + { + end += 1; + } + + Some(&tag[start..end]) + } +} diff --git a/src/main.rs b/src/main.rs index acef273..4d497ac 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ mod encoder; mod file_writer; mod forwarder; mod hash; +mod html; mod io; mod leb128; mod locale; diff --git a/src/util.rs b/src/util.rs index d8bb787..f54d6c7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -5,6 +5,8 @@ use anyhow::{anyhow, bail, Result}; use reqwest::Url; use tempfile::NamedTempFile; +use crate::html; + pub fn get_local_path(filename: &str) -> Result>> { if filename.starts_with("https://") || filename.starts_with("http://") { match download_asciicast(filename) { @@ -16,11 +18,8 @@ pub fn get_local_path(filename: &str) -> Result>> { } } -const LINK_REL_SELECTOR: &str = r#"link[rel="alternate"][type="application/x-asciicast"], link[rel="alternate"][type="application/asciicast+json"]"#; - fn download_asciicast(url: &str) -> Result { use reqwest::blocking::get; - use scraper::{Html, Selector}; let mut response = get(Url::parse(url)?)?; response.error_for_status_ref()?; @@ -33,12 +32,8 @@ fn download_asciicast(url: &str) -> Result { .to_str()?; if content_type.starts_with("text/html") { - let document = Html::parse_document(&response.text()?); - let selector = Selector::parse(LINK_REL_SELECTOR).unwrap(); - let mut elements = document.select(&selector); - - if let Some(url) = elements.find_map(|e| e.value().attr("href")) { - let mut response = get(Url::parse(url)?)?; + if let Some(url) = html::extract_asciicast_link(&response.text()?) { + let mut response = get(Url::parse(&url)?)?; response.error_for_status_ref()?; io::copy(&mut response, &mut file)?;