🏡 index : ~doyle/stork.git

author Jordan Doyle <jordan@doyle.la> 2020-02-14 13:08:42.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2020-02-14 13:08:42.0 +00:00:00
commit
5d3d72522eb212a2771085f1f0fac2d8fb5f7e9e [patch]
tree
75d5270368244630f948832827e93f8aa889b9f6
parent
cb677ad04c59235ce73e1ba4b953b7d87fd405a8
download
5d3d72522eb212a2771085f1f0fac2d8fb5f7e9e.tar.gz

Fixes #1, stops the same Storkable being yielded multiple times by a single Storkable



Diff

 stork/Cargo.toml      |  2 ++
 stork/src/lib.rs      | 30 +++++++++++++++++++++++++++++-
 stork_http/src/lib.rs | 11 +++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/stork/Cargo.toml b/stork/Cargo.toml
index 8982023..8394ade 100644
--- a/stork/Cargo.toml
+++ a/stork/Cargo.toml
@@ -16,5 +16,7 @@
futures = "0.3.4"
async-stream = "0.2.1"

twox-hash = ""

[dev-dependencies]
tokio = { version = "0.2", features = ["full"] }
diff --git a/stork/src/lib.rs b/stork/src/lib.rs
index 364722b..4f3212a 100644
--- a/stork/src/lib.rs
+++ a/stork/src/lib.rs
@@ -27,10 +27,11 @@
use futures::prelude::*;

use std::pin::Pin;
use std::sync::Arc;
use std::sync::{Arc, RwLock};

use failure::Error;
use failure::ResultExt;
use std::hash::{Hash, Hasher};

/// A [Storkable] represents a "thing" which is traversable ("storkable").

///

@@ -53,9 +54,10 @@
    filters: FilterSet<T>,
    client: Arc<C>,
    parent: Option<Arc<Storkable<T, C>>>,
    seen: Arc<RwLock<Vec<u64>>>,
}

impl<'a, T: Unpin + PartialEq + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
impl<'a, T: Unpin + PartialEq + Hash + 'a, C: StorkClient<T> + 'a> Storkable<T, C> {
    /// Instantiates a new [Storkable] from a T, storking can then

    /// begin on the given entrypoint using the [Storkable::exec] method.

    pub fn new(val: T) -> Self {
@@ -64,6 +66,7 @@
            filters: FilterSet::default(),
            client: Arc::new(C::default()),
            parent: None,
            seen: Arc::new(RwLock::new(Vec::new())),
        }
    }

@@ -113,6 +116,22 @@
        false
    }

    /// Checks if this Storkable has seen this `value` before. If it

    /// hasn't, this method will return false but any subsequent calls

    /// with the same value will return true.

    fn check_has_seen(&self, value: &T) -> bool {
        let mut hasher = twox_hash::XxHash64::default();
        value.hash(&mut hasher);
        let hash = hasher.finish();

        return if self.seen.read().unwrap().contains(&hash) {
            true
        } else {
            self.seen.write().unwrap().push(hash);
            false
        };
    }

    /// Start storking this [Storkable].

    ///

    /// Finds all the followable links on this [Storkable] and returns

@@ -128,6 +147,12 @@
                let child = child.context(StorkError::ClientError)?;

                if !this.filters.matches(&child) {
                    continue;
                }

                // ensure we haven't returned this link before from this
                // Storkable
                if this.check_has_seen(&child) {
                    continue;
                }

@@ -143,6 +168,7 @@
                    client: Arc::clone(&this.client),
                    filters: this.filters.clone(),
                    parent: Some(Arc::clone(&this)),
                    seen: Arc::new(RwLock::new(Vec::new())),
                };
            }
        }
diff --git a/stork_http/src/lib.rs b/stork_http/src/lib.rs
index 5de75ef..246f4b1 100644
--- a/stork_http/src/lib.rs
+++ a/stork_http/src/lib.rs
@@ -88,6 +88,7 @@
use std::sync::Arc;

pub use reqwest::Client as ReqwestClient;
use std::hash::{Hash, Hasher};

pub type HttpStorkable = Storkable<Link, HttpStorkClient>;

@@ -107,9 +108,14 @@
}
impl PartialEq for Link {
    fn eq(&self, other: &Self) -> bool {
        self.url().as_str() == other.url().as_str()
        self.url() == other.url()
    }
}
impl Hash for Link {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.url().hash(state)
    }
}
impl std::str::FromStr for Link {
    type Err = failure::Error;

@@ -174,11 +180,12 @@

                if let Some(href) = href {
                    // if this looks like a relative url append it to the root
                    let href = if href.starts_with('/') || !href.contains("://") {
                    let mut href = if href.starts_with('/') || !href.contains("://") {
                        root.join(href).context(StorkHttpError::UrlParseError)?
                    } else {
                        Url::parse(href).context(StorkHttpError::UrlParseError)?
                    };
                    href.set_fragment(None);

                    yield Link {
                        url: href,