From 93eff45e530f336cc169b57c7e12904d18c69fa1 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Wed, 12 Feb 2020 21:01:08 +0000 Subject: [PATCH] Refactor Filters to not require private interfaces from the main lib Also renames Filters to a FilterSet to make the struct more understandable in its role. --- crawler/src/errors.rs | 2 +- crawler/src/filters.rs | 15 ++++++++++----- crawler/src/lib.rs | 15 ++++++++------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/crawler/src/errors.rs b/crawler/src/errors.rs index 101f4fa..d1cdc5d 100644 --- a/crawler/src/errors.rs +++ b/crawler/src/errors.rs @@ -11,4 +11,4 @@ pub enum StorkError { HtmlParseError, #[fail(display = "failed to send http request")] HttpError, -} \ No newline at end of file +} diff --git a/crawler/src/filters.rs b/crawler/src/filters.rs index a93545a..3011410 100644 --- a/crawler/src/filters.rs +++ b/crawler/src/filters.rs @@ -1,10 +1,14 @@ use url::Url; -#[derive(Clone)] -pub struct Filters { +/// List of filters that can be used to filter down results from a +/// [Storkable](crate::Storkable). Once constructed, these can be +/// attached using [Storkable::with_filters](crate::Storkable::with_filters). +#[derive(Debug, Clone)] +pub struct FilterSet { url: Option>, } -impl Filters { +impl FilterSet { + /// Filter results by a URL predicate. pub fn add_url_filter(mut self, filter: UrlFilter) -> Self { if self.url.is_none() { self.url = Some(Vec::new()); @@ -16,10 +20,11 @@ impl Filters { self } - pub(crate) fn matches_url(&self, link: &crate::PageLink) -> bool { + /// Check if this `Filters` matches the given `link`. + pub(crate) fn matches_url(&self, link: &Url) -> bool { if let Some(filters) = &self.url { for filter in filters.iter() { - if !filter.matches(&link.url) { + if !filter.matches(&link) { return false; } } diff --git a/crawler/src/lib.rs b/crawler/src/lib.rs index d09869b..e046c81 100644 --- a/crawler/src/lib.rs +++ b/crawler/src/lib.rs @@ -5,8 +5,8 @@ pub mod filters; pub mod errors; -pub use filters::Filters; pub use errors::StorkError; +pub use filters::FilterSet; pub use url::Url; @@ -24,7 +24,7 @@ use failure::ResultExt; /// A `Storkable` represents a website link which is traversable. pub struct Storkable { url: Url, - filters: Arc, + filters: Arc, client: Arc, parent: Option>, } @@ -32,7 +32,7 @@ impl Storkable { pub fn new(url: Url) -> Self { Self { url, - filters: Arc::new(Filters::default()), + filters: Arc::new(FilterSet::default()), client: Arc::new( reqwest::Client::builder() .user_agent(concat!( @@ -47,7 +47,8 @@ impl Storkable { } } - pub fn with_filters(mut self, filters: Filters) -> Self { + /// Attaches a [FilterSet] to this, and child, [Storkable]s. + pub fn with_filters(mut self, filters: FilterSet) -> Self { self.filters = Arc::new(filters); self } @@ -71,7 +72,7 @@ impl Storkable { while let Some(link) = links.next().await { let link = link?; - if !this.filters.matches_url(&link) { + if !this.filters.matches_url(&link.url) { continue; } @@ -86,7 +87,7 @@ impl Storkable { } } -pub(crate) struct PageLink { +struct PageLink { pub name: String, pub url: Url } @@ -120,4 +121,4 @@ fn get_all_links_from_page<'a>(storkable: &'a Storkable) -> impl futures::Stream } } } -} \ No newline at end of file +} -- libgit2 1.7.2