🏡 index : ~doyle/stork.git

author Jordan Doyle <jordan@doyle.la> 2020-02-14 13:35:58.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2020-02-14 13:37:50.0 +00:00:00
commit
19b584cc5d92be4028a5586af3f850982df582a3 [patch]
tree
0fa2ae60db7a9390ad7b10d66d44df1d302154d1
parent
5d3d72522eb212a2771085f1f0fac2d8fb5f7e9e
download
19b584cc5d92be4028a5586af3f850982df582a3.tar.gz

Add a bit of sophistication to the storkcli with a --max-depth flag



Diff

 README.md            | 34 +++++++++++++++++++++++++++++++++-
 storkcli/Cargo.toml  |  4 ++++
 storkcli/src/main.rs | 47 +++++++++++++++++++++++++++++++----------------
 3 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 829a6a9..8168170 100644
--- a/README.md
+++ b/README.md
@@ -17,4 +17,36 @@ View the docs for examples of how to use `stork`:
- [stork]https://docs.rs/stork/
- [stork_http]https://docs.rs/stork_http/

or look in the [examples/]https://github.com/w4/stork/tree/master/examples directory for some real-world examples!
\ No newline at end of file
or look in the [examples/]https://github.com/w4/stork/tree/master/examples directory for some real-world examples!

## storkcli

`storkcli` is built off the back of stork. It can be used to scrape websites for links using various
filters, though basic right now `stork` gives us the ability to make this CLI as sophisticated as we like.

Usage:

```
Usage: ./storkcli <url> [--max-depth <max-depth>]

Link hunter with a little bit of magic.

Options:
  --max-depth       specifies how deep we should go from the origin, leave this
                    value unspecified to recurse until there's nothing left to
                    follow.
  --help            display usage information
```

Example:

```
$ ./storkcli "https://doyle.la/" --max-depth 0
https://instagram.com/doyl_e
https://linkedin.com/in/jordanjdoyle
https://stackoverflow.com/users/2132800/jordan-doyle
https://last.fm/user/doyle-
https://github.com/w4
↳ mailto:jordan@doyle.la
https://keybase.io/jrd
```
diff --git a/storkcli/Cargo.toml b/storkcli/Cargo.toml
index 3758c2b..50d9622 100644
--- a/storkcli/Cargo.toml
+++ b/storkcli/Cargo.toml
@@ -13,4 +13,8 @@ stork_http = { path = "../stork_http", version = "0.0.3" }
tokio = { version = "0.2", features = ["full"] }
futures = "0.3"

argh = ""

twox-hash = ""

failure = ""
\ No newline at end of file
diff --git a/storkcli/src/main.rs b/storkcli/src/main.rs
index ed229e7..90895ad 100644
--- a/storkcli/src/main.rs
+++ b/storkcli/src/main.rs
@@ -1,26 +1,29 @@
use futures::{pin_mut, StreamExt};
use std::collections::VecDeque;
use stork_http::HttpStorkable;
use stork_http::{HttpStorkable, Link};

#[tokio::main]
async fn main() -> failure::Fallible<()> {
    let args: Vec<String> = std::env::args().collect();
    let url = args
        .get(1)
        .expect("Expecting URL parameter")
        .parse()
        .unwrap();

    traverse(HttpStorkable::new(url)).await?;
#[derive(argh::FromArgs)]
/// Link hunter with a little bit of magic.
struct Args {
    #[argh(option)]
    /// specifies how deep we should go from the origin, leave this
    /// value unspecified to recurse until there's nothing left to
    /// follow.
    max_depth: Option<usize>,

    Ok(())
    #[argh(positional)]
    url: Link,
}

async fn traverse(storkable: HttpStorkable) -> failure::Fallible<()> {
    let stream = storkable.exec();
#[tokio::main]
async fn main() -> failure::Fallible<()> {
    let args: Args = argh::from_env();
    let url = args.url;

    let stream = HttpStorkable::new(url).exec();
    pin_mut!(stream); // needed for iteration

    let mut queue: VecDeque<_> = stream.map(|v| (v, 0)).collect::<VecDeque<_>>().await;
    let mut queue = stream.map(|v| (v, 0)).collect::<VecDeque<_>>().await;

    if queue.is_empty() {
        panic!("Failed to find any links on the page!");
@@ -33,10 +36,22 @@ async fn traverse(storkable: HttpStorkable) -> failure::Fallible<()> {
        }

        let (link, depth) = queue.pop_front().unwrap();
        let link: HttpStorkable = link?;

        if let Err(e) = link {
            eprintln!("Failed to grab a link: {}", e);
            continue;
        }

        let link = link.unwrap();

        println!("{}{}", " ".repeat(depth), link.val().url());

        if let Some(max_depth) = args.max_depth {
            if depth >= max_depth {
                continue;
            }
        }

        // add children of this storkable to the front of the queue with
        // 1 depth added on
        let children = link.exec();