🏡 index : ~doyle/stork.git

author Jordan Doyle <jordan@doyle.la> 2020-02-12 21:01:08.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2020-02-13 5:28:42.0 +00:00:00
commit
93eff45e530f336cc169b57c7e12904d18c69fa1 [patch]
tree
f73cb9f77718b1fa29d8a3a7c2cb478033e19f73
parent
c8713fa1f7e5f78075308a5568b6bafaecf2563b
download
93eff45e530f336cc169b57c7e12904d18c69fa1.tar.gz

Refactor Filters to not require private interfaces from the main lib

Also renames Filters to a FilterSet to make the struct more
understandable in its role.

Diff

 crawler/src/errors.rs  |  2 +-
 crawler/src/filters.rs | 15 +++++++++++++--
 crawler/src/lib.rs     | 15 +++++++++++----
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/crawler/src/errors.rs b/crawler/src/errors.rs
index 101f4fa..d1cdc5d 100644
--- a/crawler/src/errors.rs
+++ a/crawler/src/errors.rs
@@ -11,4 +11,4 @@
    HtmlParseError,
    #[fail(display = "failed to send http request")]
    HttpError,
}
}
diff --git a/crawler/src/filters.rs b/crawler/src/filters.rs
index a93545a..3011410 100644
--- a/crawler/src/filters.rs
+++ a/crawler/src/filters.rs
@@ -1,10 +1,14 @@
use url::Url;

#[derive(Clone)]
pub struct Filters {
/// List of filters that can be used to filter down results from a

/// [Storkable](crate::Storkable). Once constructed, these can be

/// attached using [Storkable::with_filters](crate::Storkable::with_filters).

#[derive(Debug, Clone)]
pub struct FilterSet {
    url: Option<Vec<UrlFilter>>,
}
impl Filters {
impl FilterSet {
    /// Filter results by a URL predicate.

    pub fn add_url_filter(mut self, filter: UrlFilter) -> Self {
        if self.url.is_none() {
            self.url = Some(Vec::new());
@@ -16,10 +20,11 @@
        self
    }

    pub(crate) fn matches_url(&self, link: &crate::PageLink) -> bool {
    /// Check if this `Filters` matches the given `link`.

    pub(crate) fn matches_url(&self, link: &Url) -> bool {
        if let Some(filters) = &self.url {
            for filter in filters.iter() {
                if !filter.matches(&link.url) {
                if !filter.matches(&link) {
                    return false;
                }
            }
diff --git a/crawler/src/lib.rs b/crawler/src/lib.rs
index d09869b..e046c81 100644
--- a/crawler/src/lib.rs
+++ a/crawler/src/lib.rs
@@ -5,8 +5,8 @@
pub mod filters;
pub mod errors;

pub use filters::Filters;
pub use errors::StorkError;
pub use filters::FilterSet;

pub use url::Url;

@@ -24,7 +24,7 @@
/// A `Storkable` represents a website link which is traversable.

pub struct Storkable {
    url: Url,
    filters: Arc<Filters>,
    filters: Arc<FilterSet>,
    client: Arc<reqwest::Client>,
    parent: Option<Arc<Storkable>>,
}
@@ -32,7 +32,7 @@
    pub fn new(url: Url) -> Self {
        Self {
            url,
            filters: Arc::new(Filters::default()),
            filters: Arc::new(FilterSet::default()),
            client: Arc::new(
                reqwest::Client::builder()
                    .user_agent(concat!(
@@ -47,7 +47,8 @@
        }
    }

    pub fn with_filters(mut self, filters: Filters) -> Self {
    /// Attaches a [FilterSet] to this, and child, [Storkable]s.

    pub fn with_filters(mut self, filters: FilterSet) -> Self {
        self.filters = Arc::new(filters);
        self
    }
@@ -71,7 +72,7 @@
            while let Some(link) = links.next().await {
                let link = link?;

                if !this.filters.matches_url(&link) {
                if !this.filters.matches_url(&link.url) {
                    continue;
                }

@@ -86,7 +87,7 @@
    }
}

pub(crate) struct PageLink {
struct PageLink {
    pub name: String,
    pub url: Url
}
@@ -120,4 +121,4 @@
            }
        }
    }
}
}