Add some basic rustdocs and run clippy over codebase
Diff
README.md | 2 +-
crawler/Cargo.toml | 5 ++-
crawler/src/lib.rs | 94 +++++++++++++++++++++++++++++++++++++++++++++++--------
src/main.rs | 21 +-----------
4 files changed, 91 insertions(+), 31 deletions(-)
@@ -0,0 +1,2 @@
# stork.rs
`stork` is a basic website scraper written in Rust.
\ No newline at end of file
@@ -17,4 +17,7 @@ async-stream = ""
digest = ""
meowhash = ""
generic-array = ""
\ No newline at end of file
generic-array = ""
[dev-dependencies]
tokio = { version = "0.2", features = ["full"] }
\ No newline at end of file
@@ -1,9 +1,17 @@
#![recursion_limit="512"]
#![recursion_limit = "512"]
#[macro_use]
extern crate failure_derive;
#[macro_use] extern crate failure_derive;
pub mod filters;
pub mod errors;
pub mod filters;
pub use errors::StorkError;
pub use filters::FilterSet;
@@ -11,17 +19,58 @@ pub use filters::FilterSet;
pub use url::Url;
use select::document::Document;
use select::predicate::{Attr, Name, And, Not};
use select::predicate::{And, Attr, Name, Not};
use futures::prelude::*;
use futures::pin_mut;
use async_stream::try_stream;
use futures::pin_mut;
use futures::prelude::*;
use std::sync::Arc;
use failure::Error;
use failure::ResultExt;
#[derive(Debug, Clone)]
pub struct Storkable {
url: Url,
filters: Arc<FilterSet>,
@@ -29,6 +78,8 @@ pub struct Storkable {
parent: Option<Arc<Storkable>>,
}
impl Storkable {
pub fn new(url: Url) -> Self {
Self {
url,
@@ -53,15 +104,29 @@ impl Storkable {
self
}
pub fn with_client(mut self, client: reqwest::Client) -> Self {
self.client = Arc::new(client);
self
}
pub fn url(&self) -> &Url {
&self.url
}
pub fn parent(&self) -> Option<&Storkable> {
self.parent.as_ref().map(Arc::as_ref)
}
pub fn exec<'a>(self) -> impl futures::Stream<Item = Result<Storkable, Error>> + 'a {
let this = Arc::new(self);
@@ -89,14 +154,19 @@ impl Storkable {
struct PageLink {
pub name: String,
pub url: Url
pub url: Url,
}
fn get_all_links_from_page<'a>(storkable: &'a Storkable) -> impl futures::Stream<Item = Result<PageLink, Error>> + 'a {
fn get_all_links_from_page<'a>(
storkable: &'a Storkable,
) -> impl futures::Stream<Item = Result<PageLink, Error>> + 'a {
try_stream! {
let root = storkable.url.clone();
let doc = storkable.client.get(root.clone())
.send().await.context(StorkError::HttpError)?
.bytes().await.context(StorkError::HttpError)?;
@@ -7,22 +7,11 @@ async fn main() -> failure::Fallible<()> {
let args: Vec<String> = std::env::args().collect();
let url = args.get(1).expect("Expecting URL parameter").parse().unwrap();
let stream = stork::Storkable::new(url)
.exec();
let stream = stork::Storkable::new(url).exec();
pin_mut!(stream);
while let Some(link) = stream.next().await {
if let Err(err) = link {
eprintln!("{:#?}", err);
continue;
}
let link = link.unwrap();
let link = link?;
println!("{}", link.url());
@@ -30,11 +19,7 @@ async fn main() -> failure::Fallible<()> {
pin_mut!(stream);
while let Some(link) = stream.next().await {
if let Err(err) = link {
eprintln!("{:#?}", err);
continue;
}
println!("> {}", link.unwrap().url());
println!("> {}", link?.url());
}
}