Fixes #1, stops the same Storkable being yielded multiple times by a single Storkable
Diff
stork/Cargo.toml | 2 ++
stork/src/lib.rs | 30 ++++++++++++++++++++++++++++--
stork_http/src/lib.rs | 11 +++++++++--
3 files changed, 39 insertions(+), 4 deletions(-)
@@ -16,5 +16,7 @@ dyn-clone = "1.0.1"
futures = "0.3.4"
async-stream = "0.2.1"
twox-hash = ""
[dev-dependencies]
tokio = { version = "0.2", features = ["full"] }
\ No newline at end of file
@@ -27,10 +27,11 @@ use async_stream::try_stream;
use futures::prelude::*;
use std::pin::Pin;
use std::sync::Arc;
use std::sync::{Arc, RwLock};
use failure::Error;
use failure::ResultExt;
use std::hash::{Hash, Hasher};
@@ -53,9 +54,10 @@ pub struct Storkable<T: Unpin + PartialEq + Hash, C: StorkClient<T>> {
filters: FilterSet<T>,
client: Arc<C>,
parent: Option<Arc<Storkable<T, C>>>,
seen: Arc<RwLock<Vec<u64>>>,
}
impl<'a, T: Unpin + PartialEq + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
impl<'a, T: Unpin + PartialEq + Hash + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
pub fn new(val: T) -> Self {
@@ -64,6 +66,7 @@ impl<'a, T: Unpin + PartialEq + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
filters: FilterSet::default(),
client: Arc::new(C::default()),
parent: None,
seen: Arc::new(RwLock::new(Vec::new())),
}
}
@@ -113,6 +116,22 @@ impl<'a, T: Unpin + PartialEq + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
false
}
fn check_has_seen(&self, value: &T) -> bool {
let mut hasher = twox_hash::XxHash64::default();
value.hash(&mut hasher);
let hash = hasher.finish();
return if self.seen.read().unwrap().contains(&hash) {
true
} else {
self.seen.write().unwrap().push(hash);
false
};
}
@@ -131,6 +150,12 @@ impl<'a, T: Unpin + PartialEq + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
continue;
}
if this.check_has_seen(&child) {
continue;
}
@@ -143,6 +168,7 @@ impl<'a, T: Unpin + PartialEq + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
client: Arc::clone(&this.client),
filters: this.filters.clone(),
parent: Some(Arc::clone(&this)),
seen: Arc::new(RwLock::new(Vec::new())),
};
}
}
@@ -88,6 +88,7 @@ use failure::ResultExt;
use std::sync::Arc;
pub use reqwest::Client as ReqwestClient;
use std::hash::{Hash, Hasher};
pub type HttpStorkable = Storkable<Link, HttpStorkClient>;
@@ -107,7 +108,12 @@ impl Link {
}
impl PartialEq for Link {
fn eq(&self, other: &Self) -> bool {
self.url().as_str() == other.url().as_str()
self.url() == other.url()
}
}
impl Hash for Link {
fn hash<H: Hasher>(&self, state: &mut H) {
self.url().hash(state)
}
}
impl std::str::FromStr for Link {
@@ -174,11 +180,12 @@ impl StorkClient<Link> for HttpStorkClient {
if let Some(href) = href {
let href = if href.starts_with('/') || !href.contains("://") {
let mut href = if href.starts_with('/') || !href.contains("://") {
root.join(href).context(StorkHttpError::UrlParseError)?
} else {
Url::parse(href).context(StorkHttpError::UrlParseError)?
};
href.set_fragment(None);
yield Link {
url: href,