use std::hash::{Hash, Hasher};
use futures::{pin_mut, StreamExt};
use failure::Fallible;
use stork::FilterSet;
use stork_http::{filters::*, HttpStorkable, Link};
#[derive(argh::FromArgs)]
struct Args {
#[argh(option)]
max_depth: Option<usize>,
#[argh(switch, short = 'o')]
same_origin: bool,
#[argh(positional)]
url: Link,
}
fn make_tuple_fn(
depth: usize,
) -> impl Fn(failure::Fallible<HttpStorkable>) -> (Fallible<HttpStorkable>, usize) {
move |v| (v, depth)
}
#[tokio::main]
async fn main() -> failure::Fallible<()> {
let args: Args = argh::from_env();
let url = args.url;
let mut filters = FilterSet::default();
if args.same_origin {
filters = filters.add_filter(DomainFilter::new(url.url().host().unwrap().to_string()));
}
let queue = futures::stream::SelectAll::new();
pin_mut!(queue);
queue.push(Box::pin(
HttpStorkable::new(url)
.with_filters(filters)
.exec()
.map(make_tuple_fn(0)),
));
let mut seen = Vec::new();
loop {
let value = queue.next().await;
if value.is_none() {
break;
}
let (link, depth) = value.unwrap();
if let Err(e) = link {
eprintln!("Failed to grab a link: {}", e);
continue;
}
let link = link.unwrap();
let hash = {
let mut hash = twox_hash::XxHash64::default();
link.val().hash(&mut hash);
hash.finish()
};
if seen.contains(&hash) {
continue;
} else {
seen.push(hash);
}
println!("{}", link.val().url());
if let Some(max_depth) = args.max_depth {
if depth >= max_depth {
continue;
}
}
queue.push(Box::pin(link.exec().map(make_tuple_fn(depth + 1))));
}
Ok(())
}