From 19b584cc5d92be4028a5586af3f850982df582a3 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Fri, 14 Feb 2020 13:35:58 +0000 Subject: [PATCH] Add a bit of sophistication to the storkcli with a --max-depth flag --- README.md | 34 +++++++++++++++++++++++++++++++++- storkcli/Cargo.toml | 4 ++++ storkcli/src/main.rs | 47 +++++++++++++++++++++++++++++++---------------- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 829a6a9..8168170 100644 --- a/README.md +++ b/README.md @@ -17,4 +17,36 @@ View the docs for examples of how to use `stork`: - [stork](https://docs.rs/stork/) - [stork_http](https://docs.rs/stork_http/) -or look in the [examples/](https://github.com/w4/stork/tree/master/examples) directory for some real-world examples! \ No newline at end of file +or look in the [examples/](https://github.com/w4/stork/tree/master/examples) directory for some real-world examples! + +## storkcli + +`storkcli` is built off the back of stork. It can be used to scrape websites for links using various +filters, though basic right now `stork` gives us the ability to make this CLI as sophisticated as we like. + +Usage: + +``` +Usage: ./storkcli [--max-depth ] + +Link hunter with a little bit of magic. + +Options: + --max-depth specifies how deep we should go from the origin, leave this + value unspecified to recurse until there's nothing left to + follow. + --help display usage information +``` + +Example: + +``` +$ ./storkcli "https://doyle.la/" --max-depth 0 +↳ https://instagram.com/doyl_e +↳ https://linkedin.com/in/jordanjdoyle +↳ https://stackoverflow.com/users/2132800/jordan-doyle +↳ https://last.fm/user/doyle- +↳ https://github.com/w4 +↳ mailto:jordan@doyle.la +↳ https://keybase.io/jrd +``` diff --git a/storkcli/Cargo.toml b/storkcli/Cargo.toml index 3758c2b..50d9622 100644 --- a/storkcli/Cargo.toml +++ b/storkcli/Cargo.toml @@ -13,4 +13,8 @@ stork_http = { path = "../stork_http", version = "0.0.3" } tokio = { version = "0.2", features = ["full"] } futures = "0.3" +argh = "" + +twox-hash = "" + failure = "" \ No newline at end of file diff --git a/storkcli/src/main.rs b/storkcli/src/main.rs index ed229e7..90895ad 100644 --- a/storkcli/src/main.rs +++ b/storkcli/src/main.rs @@ -1,26 +1,29 @@ use futures::{pin_mut, StreamExt}; use std::collections::VecDeque; -use stork_http::HttpStorkable; +use stork_http::{HttpStorkable, Link}; -#[tokio::main] -async fn main() -> failure::Fallible<()> { - let args: Vec = std::env::args().collect(); - let url = args - .get(1) - .expect("Expecting URL parameter") - .parse() - .unwrap(); - - traverse(HttpStorkable::new(url)).await?; +#[derive(argh::FromArgs)] +/// Link hunter with a little bit of magic. +struct Args { + #[argh(option)] + /// specifies how deep we should go from the origin, leave this + /// value unspecified to recurse until there's nothing left to + /// follow. + max_depth: Option, - Ok(()) + #[argh(positional)] + url: Link, } -async fn traverse(storkable: HttpStorkable) -> failure::Fallible<()> { - let stream = storkable.exec(); +#[tokio::main] +async fn main() -> failure::Fallible<()> { + let args: Args = argh::from_env(); + let url = args.url; + + let stream = HttpStorkable::new(url).exec(); pin_mut!(stream); // needed for iteration - let mut queue: VecDeque<_> = stream.map(|v| (v, 0)).collect::>().await; + let mut queue = stream.map(|v| (v, 0)).collect::>().await; if queue.is_empty() { panic!("Failed to find any links on the page!"); @@ -33,10 +36,22 @@ async fn traverse(storkable: HttpStorkable) -> failure::Fallible<()> { } let (link, depth) = queue.pop_front().unwrap(); - let link: HttpStorkable = link?; + + if let Err(e) = link { + eprintln!("Failed to grab a link: {}", e); + continue; + } + + let link = link.unwrap(); println!("{}↳ {}", " ".repeat(depth), link.val().url()); + if let Some(max_depth) = args.max_depth { + if depth >= max_depth { + continue; + } + } + // add children of this storkable to the front of the queue with // 1 depth added on let children = link.exec(); -- libgit2 1.7.2