From 478bde40d1ca2fda1007d4d80a86e7d058554565 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Thu, 21 Jul 2022 20:32:26 +0100 Subject: [PATCH] Add markdown parsing to README with syntax highlighting --- Cargo.lock | 315 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/git.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- src/git_cgi.rs | 1 + src/main.rs | 1 + src/syntax_highlight.rs | 39 +++++++++++++++++++++++++++++++++++++++ src/methods/filters.rs | 8 ++++++++ src/methods/repo.rs | 4 ++-- templates/repo/about.html | 15 ++++++++++++++- 9 files changed, 429 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 459df44..58c6bad 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -227,7 +227,7 @@ "semver", "serde", "serde_yaml", - "syntect", + "syntect 5.0.0", "thiserror", "unicode-width", "walkdir", @@ -243,10 +243,46 @@ ] [[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] [[package]] name = "bstr" @@ -262,6 +298,12 @@ version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" [[package]] name = "bytecount" @@ -362,6 +404,21 @@ "num-traits", "time 0.1.43", "winapi", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim 0.8.0", + "textwrap 0.11.0", + "unicode-width", + "vec_map", ] [[package]] @@ -375,9 +432,9 @@ "clap_lex", "indexmap", "once_cell", - "strsim", + "strsim 0.10.0", "termcolor", - "textwrap", + "textwrap 0.15.0", ] [[package]] @@ -411,6 +468,26 @@ ] [[package]] +name = "comrak" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15bf1e432b302dc6236dd0db580d182ce520bb24af82d6462e2d7a5e0a31c50d" +dependencies = [ + "clap 2.34.0", + "entities", + "lazy_static", + "memchr", + "pest", + "pest_derive", + "regex", + "shell-words", + "syntect 4.6.0", + "typed-arena", + "unicode_categories", + "xdg", +] + +[[package]] name = "concurrent-queue" version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -510,6 +587,35 @@ dependencies = [ "cfg-if 1.0.0", "once_cell", +] + +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", ] [[package]] @@ -581,6 +687,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" + +[[package]] +name = "entities" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" [[package]] name = "error-chain" @@ -596,6 +708,22 @@ version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71" + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + +[[package]] +name = "fancy-regex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6b8560a05112eb52f04b00e5d3790c0dd75d9d980eb8a122fb23b92a623ccf" +dependencies = [ + "bit-set", + "regex", +] [[package]] name = "fastrand" @@ -759,6 +887,15 @@ checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" dependencies = [ "byteorder", +] + +[[package]] +name = "generic-array" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" +dependencies = [ + "typenum", ] [[package]] @@ -971,6 +1108,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" @@ -1060,6 +1203,12 @@ dependencies = [ "libc", ] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "matches" @@ -1287,6 +1436,12 @@ ] [[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] name = "openssl-probe" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1385,8 +1540,51 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] [[package]] +name = "pest_meta" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d" +dependencies = [ + "maplit", + "pest", + "sha-1", +] + +[[package]] name = "phf" version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1665,6 +1863,17 @@ checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" dependencies = [ "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall", + "thiserror", ] [[package]] @@ -1712,7 +1921,8 @@ "bat", "bincode", "bytes", - "clap", + "clap 3.2.8", + "comrak", "futures", "git2", "hex", @@ -1726,7 +1936,7 @@ "rsass", "serde", "sled", - "syntect", + "syntect 5.0.0", "time 0.3.11", "timeago", "tokio", @@ -1855,6 +2065,18 @@ "ryu", "serde", "yaml-rust", +] + +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer", + "digest", + "fake-simd", + "opaque-debug", ] [[package]] @@ -1865,6 +2087,12 @@ dependencies = [ "lazy_static", ] + +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "signal-hook-registry" @@ -1949,6 +2177,12 @@ [[package]] name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" @@ -1980,6 +2214,29 @@ "quote", "syn", "unicode-xid", +] + +[[package]] +name = "syntect" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b20815bbe80ee0be06e6957450a841185fcf690fe0178f14d77a05ce2caa031" +dependencies = [ + "bincode", + "bitflags", + "fancy-regex", + "flate2", + "fnv", + "lazy_static", + "lazycell", + "onig", + "plist", + "regex-syntax", + "serde", + "serde_derive", + "serde_json", + "walkdir", + "yaml-rust", ] [[package]] @@ -2042,6 +2299,15 @@ dependencies = [ "libc", "winapi", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", ] [[package]] @@ -2285,6 +2551,24 @@ checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" [[package]] +name = "typed-arena" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9b2228007eba4120145f785df0f6c92ea538f5a3635a612ecf4e334c8c1446d" + +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "ucd-trie" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89570599c4fe5585de2b388aab47e99f7fa4e9238a1399f707a02e356058141c" + +[[package]] name = "unicase" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2325,6 +2609,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" [[package]] name = "unix_mode" @@ -2364,6 +2654,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" @@ -2556,6 +2852,15 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "xdg" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4583db5cbd4c4c0303df2d15af80f0539db703fa1c68802d4cbbd2dd0f88f6" +dependencies = [ + "dirs", +] [[package]] name = "xml-rs" diff --git a/Cargo.toml b/Cargo.toml index d234d9c..2577212 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -11,6 +11,7 @@ bat = { version = "0.21", default-features = false, features = ["build-assets"] } bytes = "1.1" bincode = "1.3" +comrak = "0.14" clap = { version = "3.2", features = ["cargo"] } futures = "0.3" git2 = "0.14" diff --git a/src/git.rs b/src/git.rs index 6468db9..048b73c 100644 --- a/src/git.rs +++ a/src/git.rs @@ -1,6 +1,10 @@ +use std::ffi::OsStr; +use std::path::Path; use std::{borrow::Cow, fmt::Write, path::PathBuf, sync::Arc, time::Duration}; +use crate::syntax_highlight::ComrakSyntectAdapter; use bytes::{Bytes, BytesMut}; +use comrak::{ComrakOptions, ComrakPlugins}; use git2::{ BranchType, DiffFormat, DiffLineType, DiffOptions, DiffStatsFormat, ObjectType, Oid, Signature, }; @@ -14,7 +18,7 @@ pub struct Git { commits: Cache>, - readme_cache: Cache>>, + readme_cache: Cache)>>, refs: Cache>, syntax_set: SyntaxSet, } @@ -230,7 +234,7 @@ } #[instrument(skip(self))] - pub async fn readme(self: Arc) -> Option> { + pub async fn readme(self: Arc) -> Option<(ReadmeFormat, Arc)> { const README_FILES: &[&str] = &["README.md", "README", "README.txt"]; let git = self.git.clone(); @@ -244,19 +248,38 @@ let commit = head.peel_to_commit().unwrap(); let tree = commit.tree().unwrap(); - let blob = README_FILES - .iter() - .map(|file| tree.get_name(file)) - .next() - .flatten()? - .to_object(&repo) - .unwrap() - .into_blob() - .unwrap(); - - Some(Arc::from( - String::from_utf8(blob.content().to_vec()).unwrap(), - )) + for name in README_FILES { + let tree_entry = if let Some(file) = tree.get_name(name) { + file + } else { + continue; + }; + + let blob = if let Some(blob) = tree_entry + .to_object(&repo) + .ok() + .and_then(|v| v.into_blob().ok()) + { + blob + } else { + continue; + }; + + let content = if let Ok(content) = std::str::from_utf8(blob.content()) { + content + } else { + continue; + }; + + if Path::new(name).extension().and_then(OsStr::to_str) == Some("md") { + let value = parse_and_transform_markdown(content, &self.git.syntax_set); + return Some((ReadmeFormat::Markdown, Arc::from(value))); + } + + return Some((ReadmeFormat::Plaintext, Arc::from(content))); + } + + None }) .await .unwrap() @@ -311,6 +334,21 @@ }) .await } +} + +fn parse_and_transform_markdown(s: &str, syntax_set: &SyntaxSet) -> String { + let mut plugins = ComrakPlugins::default(); + + let highlighter = ComrakSyntectAdapter { syntax_set }; + plugins.render.codefence_syntax_highlighter = Some(&highlighter); + + comrak::markdown_to_html_with_plugins(s, &ComrakOptions::default(), &plugins) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ReadmeFormat { + Markdown, + Plaintext, } pub enum PathDestination { diff --git a/src/git_cgi.rs b/src/git_cgi.rs index c0443b2..029971a 100644 --- a/src/git_cgi.rs +++ a/src/git_cgi.rs @@ -1,9 +1,10 @@ use axum::body::{boxed, Body}; use axum::http::header::HeaderName; use axum::http::HeaderValue; use axum::response::Response; use std::str::FromStr; +// https://en.wikipedia.org/wiki/Common_Gateway_Interface pub fn cgi_to_response(buffer: &[u8]) -> Response { let mut headers = [httparse::EMPTY_HEADER; 10]; let (body_offset, headers) = httparse::parse_headers(buffer, &mut headers) diff --git a/src/main.rs b/src/main.rs index e5cb6ac..e205371 100644 --- a/src/main.rs +++ a/src/main.rs @@ -21,6 +21,7 @@ mod git_cgi; mod layers; mod methods; +mod syntax_highlight; const CRATE_VERSION: &str = clap::crate_version!(); diff --git a/src/syntax_highlight.rs b/src/syntax_highlight.rs new file mode 100644 index 0000000..c75068d 100644 --- /dev/null +++ a/src/syntax_highlight.rs @@ -1,0 +1,39 @@ +use comrak::adapters::SyntaxHighlighterAdapter; +use std::collections::HashMap; +use syntect::html::{ClassStyle, ClassedHTMLGenerator}; +use syntect::parsing::SyntaxSet; +use syntect::util::LinesWithEndings; + +pub struct ComrakSyntectAdapter<'a> { + pub(crate) syntax_set: &'a SyntaxSet, +} + +impl SyntaxHighlighterAdapter for ComrakSyntectAdapter<'_> { + fn highlight(&self, lang: Option<&str>, code: &str) -> String { + let syntax = lang + .and_then(|v| self.syntax_set.find_syntax_by_token(v)) + .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()); + + let mut html_generator = + ClassedHTMLGenerator::new_with_class_style(syntax, self.syntax_set, ClassStyle::Spaced); + + for line in LinesWithEndings::from(code) { + html_generator + .parse_html_for_line_which_includes_newline(line) + .unwrap(); + } + + format!( + "{}", + html_generator.finalize().replace('\n', "\n") + ) + } + + fn build_pre_tag(&self, _attributes: &HashMap) -> String { + r#"
"#.to_string()
+    }
+
+    fn build_code_tag(&self, _attributes: &HashMap) -> String {
+        String::with_capacity(0)
+    }
+}
diff --git a/src/methods/filters.rs b/src/methods/filters.rs
index 95b52a1..f524809 100644
--- a/src/methods/filters.rs
+++ a/src/methods/filters.rs
@@ -16,3 +16,11 @@
 pub fn md5(s: &str) -> Result {
     Ok(hex::encode(&md5::compute(s).0))
 }
+
+#[allow(dead_code)]
+pub fn md(md: &str) -> Result {
+    Ok(comrak::markdown_to_html(
+        md,
+        &comrak::ComrakOptions::default(),
+    ))
+}
diff --git a/src/methods/repo.rs b/src/methods/repo.rs
index 5d6422e..4be9dcd 100644
--- a/src/methods/repo.rs
+++ a/src/methods/repo.rs
@@ -25,7 +25,7 @@
 use yoke::Yoke;
 
 use super::filters;
-use crate::git::{DetailedTag, FileWithContent, PathDestination, Refs, TreeItem};
+use crate::git::{DetailedTag, FileWithContent, PathDestination, ReadmeFormat, Refs, TreeItem};
 use crate::{git::Commit, into_response, layers::UnwrapInfallible, Git};
 
 #[derive(Clone)]
@@ -295,7 +295,7 @@
 #[template(path = "repo/about.html")]
 pub struct AboutView {
     repo: Repository,
-    readme: Option>,
+    readme: Option<(ReadmeFormat, Arc)>,
 }
 
 pub async fn handle_about(
diff --git a/templates/repo/about.html b/templates/repo/about.html
index 3f899d0..9fe7e67 100644
--- a/templates/repo/about.html
+++ a/templates/repo/about.html
@@ -1,10 +1,23 @@
 {% extends "repo/base.html" %}
 
+{% block head %}
+{% if let Some(readme) = readme %}
+    {% if readme.0 == crate::git::ReadmeFormat::Markdown %}
+        
+    {% endif %}
+{% endif %}
+{% endblock %}
+
 {% block about_nav_class %}active{% endblock %}
 
 {% block content %}
 {% if let Some(readme) = readme %}
-    
{{ readme }}
+ {% match readme.0 %} + {% when crate::git::ReadmeFormat::Markdown %} + {{ readme.1|safe }} + {% when crate::git::ReadmeFormat::Plaintext %} +
{{ readme.1 }}
+ {% endmatch %} {% else %} No README in repository HEAD. {% endif %} -- rgit 0.1.3