Add a bit of sophistication to the storkcli with a --max-depth flag
Diff
README.md | 34 +++++++++++++++++++++++++++++++++-
storkcli/Cargo.toml | 4 ++++
storkcli/src/main.rs | 47 +++++++++++++++++++++++++++++++----------------
3 files changed, 68 insertions(+), 17 deletions(-)
@@ -17,4 +17,36 @@ View the docs for examples of how to use `stork`:
- [stork](https://docs.rs/stork/)
- [stork_http](https://docs.rs/stork_http/)
or look in the [examples/](https://github.com/w4/stork/tree/master/examples) directory for some real-world examples!
\ No newline at end of file
or look in the [examples/](https://github.com/w4/stork/tree/master/examples) directory for some real-world examples!
## storkcli
`storkcli` is built off the back of stork. It can be used to scrape websites for links using various
filters, though basic right now `stork` gives us the ability to make this CLI as sophisticated as we like.
Usage:
```
Usage: ./storkcli <url> [--max-depth <max-depth>]
Link hunter with a little bit of magic.
Options:
--max-depth specifies how deep we should go from the origin, leave this
value unspecified to recurse until there's nothing left to
follow.
--help display usage information
```
Example:
```
$ ./storkcli "https://doyle.la/" --max-depth 0
↳ https://instagram.com/doyl_e
↳ https://linkedin.com/in/jordanjdoyle
↳ https://stackoverflow.com/users/2132800/jordan-doyle
↳ https://last.fm/user/doyle-
↳ https://github.com/w4
↳ mailto:jordan@doyle.la
↳ https://keybase.io/jrd
```
@@ -13,4 +13,8 @@ stork_http = { path = "../stork_http", version = "0.0.3" }
tokio = { version = "0.2", features = ["full"] }
futures = "0.3"
argh = ""
twox-hash = ""
failure = ""
\ No newline at end of file
@@ -1,26 +1,29 @@
use futures::{pin_mut, StreamExt};
use std::collections::VecDeque;
use stork_http::HttpStorkable;
use stork_http::{HttpStorkable, Link};
#[tokio::main]
async fn main() -> failure::Fallible<()> {
let args: Vec<String> = std::env::args().collect();
let url = args
.get(1)
.expect("Expecting URL parameter")
.parse()
.unwrap();
traverse(HttpStorkable::new(url)).await?;
#[derive(argh::FromArgs)]
struct Args {
#[argh(option)]
max_depth: Option<usize>,
Ok(())
#[argh(positional)]
url: Link,
}
async fn traverse(storkable: HttpStorkable) -> failure::Fallible<()> {
let stream = storkable.exec();
#[tokio::main]
async fn main() -> failure::Fallible<()> {
let args: Args = argh::from_env();
let url = args.url;
let stream = HttpStorkable::new(url).exec();
pin_mut!(stream);
let mut queue: VecDeque<_> = stream.map(|v| (v, 0)).collect::<VecDeque<_>>().await;
let mut queue = stream.map(|v| (v, 0)).collect::<VecDeque<_>>().await;
if queue.is_empty() {
panic!("Failed to find any links on the page!");
@@ -33,10 +36,22 @@ async fn traverse(storkable: HttpStorkable) -> failure::Fallible<()> {
}
let (link, depth) = queue.pop_front().unwrap();
let link: HttpStorkable = link?;
if let Err(e) = link {
eprintln!("Failed to grab a link: {}", e);
continue;
}
let link = link.unwrap();
println!("{}↳ {}", " ".repeat(depth), link.val().url());
if let Some(max_depth) = args.max_depth {
if depth >= max_depth {
continue;
}
}
let children = link.exec();