🏡 index : ~doyle/stork.git

author Jordan Doyle <jordan@doyle.la> 2020-02-12 21:06:27.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2020-02-13 5:28:43.0 +00:00:00
commit
1b69b24a8f8aa604f999390c56a7d9f7be70bfbd [patch]
tree
7d98686dd623d18e023eee60c2c29c94746e6819
parent
93eff45e530f336cc169b57c7e12904d18c69fa1
download
1b69b24a8f8aa604f999390c56a7d9f7be70bfbd.tar.gz

Add a Scheme filter for links



Diff

 crawler/src/filters.rs | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/crawler/src/filters.rs b/crawler/src/filters.rs
index 3011410..b2dd894 100644
--- a/crawler/src/filters.rs
+++ a/crawler/src/filters.rs
@@ -33,25 +33,28 @@
        true
    }
}
impl Default for Filters {
impl Default for FilterSet {
    /// Creates an empty filter set.

    fn default() -> Self {
        Filters {
            url: None,
        }
        FilterSet { url: None }
    }
}

#[derive(Clone)]
#[derive(Debug, Clone)]
pub enum FilterType {
    StartsWith, EndsWith, Contains
    StartsWith,
    EndsWith,
    Contains,
}

#[derive(Clone)]
#[derive(Debug, Clone)]
pub enum UrlFilterType {
    Path(FilterType), Domain
    Path(FilterType),
    Domain,
    Scheme,
}

#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct UrlFilter {
    kind: UrlFilterType,
    value: String,
@@ -76,12 +79,13 @@
            UrlFilterType::Path(FilterType::StartsWith) => url.path().starts_with(&self.value),
            UrlFilterType::Path(FilterType::EndsWith) => url.path().ends_with(&self.value),
            UrlFilterType::Path(FilterType::Contains) => url.path().contains(&self.value),
            UrlFilterType::Domain => url.host_str().map_or(false, |v| v == &self.value)
            UrlFilterType::Domain => url.host_str().map_or(false, |v| v == &self.value),
            UrlFilterType::Scheme => url.scheme() == &self.value,
        };

        match self.negated {
            true => !matches,
            false => matches
            false => matches,
        }
    }
}
}