Refactor Filters to not require private interfaces from the main lib
Also renames Filters to a FilterSet to make the struct more
understandable in its role.
Diff
crawler/src/errors.rs | 2 +-
crawler/src/filters.rs | 15 +++++++++++++--
crawler/src/lib.rs | 15 +++++++++++----
3 files changed, 19 insertions(+), 13 deletions(-)
@@ -11,4 +11,4 @@
HtmlParseError,
#[fail(display = "failed to send http request")]
HttpError,
}
}
@@ -1,10 +1,14 @@
use url::Url;
#[derive(Clone)]
pub struct Filters {
#[derive(Debug, Clone)]
pub struct FilterSet {
url: Option<Vec<UrlFilter>>,
}
impl Filters {
impl FilterSet {
pub fn add_url_filter(mut self, filter: UrlFilter) -> Self {
if self.url.is_none() {
self.url = Some(Vec::new());
@@ -16,10 +20,11 @@
self
}
pub(crate) fn matches_url(&self, link: &crate::PageLink) -> bool {
pub(crate) fn matches_url(&self, link: &Url) -> bool {
if let Some(filters) = &self.url {
for filter in filters.iter() {
if !filter.matches(&link.url) {
if !filter.matches(&link) {
return false;
}
}
@@ -5,8 +5,8 @@
pub mod filters;
pub mod errors;
pub use filters::Filters;
pub use errors::StorkError;
pub use filters::FilterSet;
pub use url::Url;
@@ -24,7 +24,7 @@
pub struct Storkable {
url: Url,
filters: Arc<Filters>,
filters: Arc<FilterSet>,
client: Arc<reqwest::Client>,
parent: Option<Arc<Storkable>>,
}
@@ -32,7 +32,7 @@
pub fn new(url: Url) -> Self {
Self {
url,
filters: Arc::new(Filters::default()),
filters: Arc::new(FilterSet::default()),
client: Arc::new(
reqwest::Client::builder()
.user_agent(concat!(
@@ -47,7 +47,8 @@
}
}
pub fn with_filters(mut self, filters: Filters) -> Self {
pub fn with_filters(mut self, filters: FilterSet) -> Self {
self.filters = Arc::new(filters);
self
}
@@ -71,7 +72,7 @@
while let Some(link) = links.next().await {
let link = link?;
if !this.filters.matches_url(&link) {
if !this.filters.matches_url(&link.url) {
continue;
}
@@ -86,7 +87,7 @@
}
}
pub(crate) struct PageLink {
struct PageLink {
pub name: String,
pub url: Url
}
@@ -120,4 +121,4 @@
}
}
}
}
}