From c5fd07bab831e014a9859393f71ddf5ca19cf52d Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 29 Sep 2024 04:07:03 +0400 Subject: [PATCH] Move from syntect to tree-sitter --- Cargo.lock | 396 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- Cargo.toml | 31 +++++++++++++++++++++++++------ src/git.rs | 224 ++++++++++++++++++++++++-------------------------------------------------------- src/main.rs | 55 ++++++++++++++++++++++++++++++------------------------- src/syntax_highlight.rs | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/theme.rs | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ themes/README | 3 +++ themes/github_light.toml | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ themes/onedark.toml | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ grammar/html/highlights.scm | 20 ++++++++++++++++++++ grammar/html/injections.scm | 10 ++++++++++ 11 files changed, 1091 insertions(+), 424 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b529169..2c4bd6c 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -46,15 +46,6 @@ checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" [[package]] -name = "ansi_colours" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14eec43e0298190790f41679fe69ef7a829d2a2ddd78c8c00339e84710e435fe" -dependencies = [ - "rgb", -] - -[[package]] name = "anstream" version = "0.6.15" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -314,38 +305,8 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "823388e228f614e9558c6804262db37960ec8821856535f5c3f59913140558f8" -dependencies = [ - "serde", -] - -[[package]] -name = "bat" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcc9e5637c2330d8eb7b920f2aa5d9e184446c258466f825ea1412c7614cc86" dependencies = [ - "ansi_colours", - "bincode", - "bytesize", - "clircle", - "console", - "content_inspector", - "encoding_rs", - "flate2", - "globset", - "home", - "nu-ansi-term 0.49.0", - "once_cell", - "path_abs", - "plist", - "regex", - "semver", "serde", - "serde_yaml", - "syntect", - "thiserror", - "unicode-width", - "walkdir", ] [[package]] @@ -425,16 +386,16 @@ ] [[package]] -name = "bumpalo" -version = "3.16.0" +name = "buf-min" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "22d5698cf6842742ed64805705798f8b351fff53fa546fd45c52184bee58dc90" [[package]] -name = "bytemuck" -version = "1.18.0" +name = "bumpalo" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byteorder" @@ -447,12 +408,6 @@ version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" - -[[package]] -name = "bytesize" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" [[package]] name = "bzip2-sys" @@ -552,18 +507,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" - -[[package]] -name = "clircle" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e87cbed5354f17bd8ca8821a097fb62599787fe8f611743fad7ee156a0a600" -dependencies = [ - "cfg-if", - "libc", - "serde", - "winapi", -] [[package]] name = "clru" @@ -614,19 +557,6 @@ checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ "crossbeam-utils", -] - -[[package]] -name = "console" -version = "0.15.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" -dependencies = [ - "encode_unicode", - "lazy_static", - "libc", - "unicode-width", - "windows-sys 0.52.0", ] [[package]] @@ -723,15 +653,6 @@ ] [[package]] -name = "content_inspector" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7bda66e858c683005a53a9a60c69a4aca7eeaa45d124526e389f7aec8e62f38" -dependencies = [ - "memchr", -] - -[[package]] name = "cpufeatures" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -915,12 +836,6 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" - -[[package]] -name = "encode_unicode" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" @@ -1791,19 +1706,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "globset" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" -dependencies = [ - "aho-corasick", - "bstr", - "log", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", -] [[package]] name = "h2" @@ -2351,15 +2253,6 @@ dependencies = [ "overload", "winapi", -] - -[[package]] -name = "nu-ansi-term" -version = "0.49.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c073d3c1930d0751774acf49e66653acecb416c3a54c6ec095a9b11caddb5a68" -dependencies = [ - "windows-sys 0.48.0", ] [[package]] @@ -2495,15 +2388,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" - -[[package]] -name = "path_abs" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ef02f6342ac01d8a93b65f96db53fe68a92a15f41144f97fb00a9e669633c3" -dependencies = [ - "std_prelude", -] [[package]] name = "percent-encoding" @@ -2773,15 +2657,6 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" - -[[package]] -name = "rgb" -version = "0.8.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" -dependencies = [ - "bytemuck", -] [[package]] name = "rgit" @@ -2792,7 +2667,6 @@ "askama", "axum", "axum-macros", - "bat", "bincode", "bytes", "clap", @@ -2815,21 +2689,36 @@ "rust-ini", "serde", "simdutf8", - "syntect", "tar", "time", "timeago", "tokio", "tokio-stream", "tokio-util", + "toml", "tower 0.5.1", "tower-http", "tower-layer", "tower-service", "tracing", "tracing-subscriber", + "tree-sitter-css", + "tree-sitter-fortran", + "tree-sitter-haskell", + "tree-sitter-highlight", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-md", + "tree-sitter-rust", + "tree-sitter-scss", + "tree-sitter-svelte-ng", + "tree-sitter-toml-ng", + "tree-sitter-typescript", + "tree-sitter-yaml", "unix_mode", "uuid", + "v_htmlescape", "xxhash-rust", "yoke", ] @@ -2982,28 +2871,24 @@ ] [[package]] -name = "serde_urlencoded" -version = "0.7.1" +name = "serde_spanned" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" dependencies = [ - "form_urlencoded", - "itoa", - "ryu", "serde", ] [[package]] -name = "serde_yaml" -version = "0.9.34+deprecated" +name = "serde_urlencoded" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ - "indexmap 2.5.0", + "form_urlencoded", "itoa", "ryu", "serde", - "unsafe-libyaml", ] [[package]] @@ -3117,12 +3002,6 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "std_prelude" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" - -[[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -3368,6 +3247,40 @@ "futures-sink", "pin-project-lite", "tokio", +] + +[[package]] +name = "toml" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap 2.5.0", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", ] [[package]] @@ -3515,7 +3428,7 @@ checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ "matchers", - "nu-ansi-term 0.46.0", + "nu-ansi-term", "once_cell", "parking_lot", "regex", @@ -3525,6 +3438,166 @@ "tracing", "tracing-core", "tracing-log", +] + +[[package]] +name = "tree-sitter" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20f4cd3642c47a85052a887d86704f4eac272969f61b686bdd3f772122aabaff" +dependencies = [ + "cc", + "regex", + "regex-syntax 0.8.4", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-css" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d0018d6b1692a806f9cddaa1e5616951fd58840c39a0b21401b55ab3df12292" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-fortran" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d655214a848bfb63dfdc2e7eeef5c3c323807a220b3117a1aef46b2bb95a12" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b92c8a4c4ceaae105621b00624ee8d9029fb23116f400832e4be30d0639d054" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395d7a477a4504fd7d5e4d003e0dd41bd5b9c4985d53592a943a8354ec452dae" +dependencies = [ + "lazy_static", + "regex", + "thiserror", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-html" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d52d710a3723360ebade986d3f0ae2aa2c3bcfb87bb1cdf60988ec51c81c40d" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38b26736e6e97421760201f7a91c859f3b0d44382d48ac18aa963828f784ebf" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e1f62f8babb640b909f30675d1addeb1f17802f2a4d2af287569753b243977" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2545046bd1473dac6c626659cc2567c6c0ff302fc8b84a56c4243378276f7f57" + +[[package]] +name = "tree-sitter-md" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f968c22a01010b83fc960455ae729db08dbeb6388617d9113897cb9204b030" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cffbbcb780348fbae8395742ae5b34c1fd794e4085d43aac9f259387f9a84dc8" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scss" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33909a9ca86390ebbf3461e9949c4bbe2767d2d024b486306d27616641d4ba24" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-svelte-ng" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef0a71f9cf5e94373cc86c64893630c8a29bb25d3390a248268d08af2165fa37" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-toml-ng" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "695d20cd83acf16c02c773f03e76d7b43b19883d4e2ce3652a8f06b5e0da7455" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aecf1585ae2a9dddc2b1d4c0e2140b2ec9876e2a25fd79de47fcf7dae0384685" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aad27ec46ad343d8b514f64dd3fdffb478c592ece561b6c935d90ef55589c6b6" +dependencies = [ + "cc", + "tree-sitter", ] [[package]] @@ -3598,12 +3671,6 @@ dependencies = [ "tinyvec", ] - -[[package]] -name = "unicode-width" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-xid" @@ -3622,12 +3689,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b55eedc365f81a3c32aea49baf23fa965e3cd85bcc28fb8045708c7388d124ef" - -[[package]] -name = "unsafe-libyaml" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" [[package]] name = "url" @@ -3653,6 +3714,15 @@ checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", +] + +[[package]] +name = "v_htmlescape" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" +dependencies = [ + "buf-min", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index cee9e25..0f09299 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -14,9 +14,6 @@ askama = "0.12.0" axum = { version = "0.7", features = ["macros"] } axum-macros = "0.4.1" -bat = { version = "0.24.0", default-features = false, features = [ - "build-assets", -] } bincode = "1.3" bytes = "1.5" clap = { version = "4.4.10", features = ["cargo", "derive"] } @@ -26,7 +23,14 @@ const_format = "0.2" flate2 = { version = "1.0", default-features = false, features = ["zlib-ng"] } futures = "0.3" -gix = { version = "0.66", default-features = false, features = ["fast-sha1", "zlib-ng", "tracing", "parallel", "blob-diff", "revision"] } +gix = { version = "0.66", default-features = false, features = [ + "fast-sha1", + "zlib-ng", + "tracing", + "parallel", + "blob-diff", + "revision", +] } httparse = "1.7" humantime = "2.1" itertools = "0.13.0" @@ -38,21 +42,36 @@ rust-ini = "0.21.1" serde = { version = "1.0", features = ["derive", "rc"] } simdutf8 = "0.1.5" -syntect = "5" tar = "0.4" -time = { version = "0.3", features = ["serde"] } +time = { version = "0.3", features = ["serde", "formatting"] } timeago = { version = "0.4.2", default-features = false } tokio = { version = "1.19", features = ["full", "tracing"] } tokio-stream = "0.1" tokio-util = { version = "0.7.10", features = ["io"] } +toml = "0.8" tower = "0.5" tower-http = { version = "0.6", features = ["cors", "timeout"] } tower-layer = "0.3" tower-service = "0.3" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tree-sitter-css = "0.23" +tree-sitter-fortran = "0.1" +tree-sitter-haskell = "0.23" +tree-sitter-highlight = "0.23" +tree-sitter-html = "0.23" +tree-sitter-java = "0.23" +tree-sitter-javascript = "0.23" +tree-sitter-md = "0.3" +tree-sitter-rust = "0.23" +tree-sitter-scss = "1.0" +tree-sitter-svelte-ng = "1.0" +tree-sitter-toml-ng = "0.6.0" +tree-sitter-typescript = "0.23" +tree-sitter-yaml = "0.6" unix_mode = "0.1" uuid = { version = "1.7", features = ["v4"] } +v_htmlescape = { version = "0.15", features = ["bytes-buf"] } xxhash-rust = { version = "0.8.12", features = ["const_xxh3"] } yoke = { version = "0.7.1", features = ["derive"] } diff --git a/src/git.rs b/src/git.rs index 9086544..b091838 100644 --- a/src/git.rs +++ a/src/git.rs @@ -26,16 +26,12 @@ ObjectId, ThreadSafeRepository, }; use moka::future::Cache; -use syntect::{ - parsing::{BasicScopeStackOp, ParseState, Scope, ScopeStack, SyntaxSet, SCOPE_REPO}, - util::LinesWithEndings, -}; use tar::Builder; use time::{OffsetDateTime, UtcOffset}; use tracing::{error, instrument, warn}; use crate::{ - syntax_highlight::ComrakSyntectAdapter, + syntax_highlight::{format_file, format_file_inner, ComrakHighlightAdapter, FileIdentifier}, unified_diff_builder::{Callback, UnifiedDiffBuilder}, }; @@ -45,12 +41,11 @@ commits: Cache<(ObjectId, bool), Arc>, readme_cache: Cache)>>, open_repositories: Cache, - syntax_set: SyntaxSet, } impl Git { - #[instrument(skip(syntax_set))] - pub fn new(syntax_set: SyntaxSet) -> Self { + #[instrument] + pub fn new() -> Self { Self { commits: Cache::builder() .time_to_live(Duration::from_secs(30)) @@ -64,7 +59,6 @@ .time_to_idle(Duration::from_secs(120)) .max_capacity(100) .build(), - syntax_set, } } } @@ -155,14 +149,14 @@ let extension = path .extension() .or_else(|| path.file_name()) - .map_or_else(|| Cow::Borrowed(""), OsStr::to_string_lossy); + .and_then(OsStr::to_str) + .unwrap_or_default(); let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) { (true, Err(_)) => Content::Binary(vec![]), (true, Ok(data)) => Content::Text(Cow::Owned(format_file( data, - &extension, - &self.git.syntax_set, + FileIdentifier::Extension(extension), )?)), (false, Err(_)) => Content::Binary(blob.take_data()), (false, Ok(_data)) => Content::Text(Cow::Owned(unsafe { @@ -302,7 +296,7 @@ }; if Path::new(name).extension().and_then(OsStr::to_str) == Some("md") { - let value = parse_and_transform_markdown(content, &self.git.syntax_set); + let value = parse_and_transform_markdown(content); return Ok(Some((ReadmeFormat::Markdown, Arc::from(value)))); } @@ -342,8 +336,7 @@ let commit = head .peel_to_commit() .context("Couldn't find commit HEAD of repository refers to")?; - let (diff_output, diff_stats) = - fetch_diff_and_stats(&repo, &commit, highlighted.then_some(&self.git.syntax_set))?; + let (diff_output, diff_stats) = fetch_diff_and_stats(&repo, &commit, highlighted)?; let mut commit = Commit::try_from(commit)?; commit.diff_stats = diff_stats; @@ -428,11 +421,8 @@ let commit = repo.find_commit(commit)?; - let (diff_output, diff_stats) = fetch_diff_and_stats( - &repo, - &commit, - highlighted.then_some(&self.git.syntax_set), - )?; + let (diff_output, diff_stats) = + fetch_diff_and_stats(&repo, &commit, highlighted)?; let mut commit = Commit::try_from(commit)?; commit.diff_stats = diff_stats; @@ -539,11 +529,10 @@ } } -fn parse_and_transform_markdown(s: &str, syntax_set: &SyntaxSet) -> String { +fn parse_and_transform_markdown(s: &str) -> String { let mut plugins = ComrakPlugins::default(); - let highlighter = ComrakSyntectAdapter { syntax_set }; - plugins.render.codefence_syntax_highlighter = Some(&highlighter); + plugins.render.codefence_syntax_highlighter = Some(&ComrakHighlightAdapter); // enable gfm extensions // https://github.github.com/gfm/ @@ -738,11 +727,11 @@ } } -#[instrument(skip(repo, commit, syntax_set))] +#[instrument(skip(repo, commit))] fn fetch_diff_and_stats( repo: &gix::Repository, commit: &gix::Commit<'_>, - syntax_set: Option<&SyntaxSet>, + highlight: bool, ) -> Result<(String, String)> { const WIDTH: usize = 80; @@ -770,14 +759,13 @@ ¤t_tree, &mut repo.diff_resource_cache_for_tree_diff()?, |change| { - if let Some(syntax_set) = syntax_set { + if highlight { DiffBuilder { output: &mut diff_output, resource_cache: &mut resource_cache, diffs: &mut diffs, formatter: SyntaxHighlightedDiffFormatter::new( change.location.to_path().unwrap(), - syntax_set, ), } .handle(change) @@ -870,161 +858,6 @@ path: String, insertions: usize, deletions: usize, -} - -fn format_file(content: &str, extension: &str, syntax_set: &SyntaxSet) -> Result { - let mut out = String::new(); - format_file_inner(&mut out, content, extension, syntax_set, true)?; - Ok(out) -} - -// TODO: this is in some serious need of refactoring -fn format_file_inner( - out: &mut String, - content: &str, - extension: &str, - syntax_set: &SyntaxSet, - code_tag: bool, -) -> Result<()> { - let syntax = syntax_set - .find_syntax_by_extension(extension) - .unwrap_or_else(|| syntax_set.find_syntax_plain_text()); - let mut parse_state = ParseState::new(syntax); - - let mut scope_stack = ScopeStack::new(); - let mut span_empty = false; - let mut span_start = 0; - let mut open_spans = Vec::new(); - - for line in LinesWithEndings::from(content) { - if code_tag { - out.push_str(""); - } - - if line.len() > 2048 { - // avoid highlighting overly complex lines - let line = if code_tag { line.trim_end() } else { line }; - write!(out, "{}", Escape(line))?; - } else { - let mut cur_index = 0; - let ops = parse_state.parse_line(line, syntax_set)?; - out.reserve(line.len() + ops.len() * 8); - - if code_tag { - for scope in &open_spans { - out.push_str(""); - } - } - - // mostly copied from syntect, but slightly modified to keep track - // of open spans, so we can open and close them for each line - for &(i, ref op) in &ops { - if i > cur_index { - let prefix = &line[cur_index..i]; - let prefix = if code_tag { - prefix.trim_end_matches('\n') - } else { - prefix - }; - write!(out, "{}", Escape(prefix))?; - - span_empty = false; - cur_index = i; - } - - scope_stack.apply_with_hook(op, |basic_op, _| match basic_op { - BasicScopeStackOp::Push(scope) => { - span_start = out.len(); - span_empty = true; - out.push_str(""); - } - BasicScopeStackOp::Pop => { - open_spans.pop(); - if span_empty { - out.truncate(span_start); - } else { - out.push_str(""); - } - span_empty = false; - } - })?; - } - - let line = if code_tag { line.trim_end() } else { line }; - if line.len() > cur_index { - write!(out, "{}", Escape(&line[cur_index..]))?; - } - - if code_tag { - for _scope in &open_spans { - out.push_str(""); - } - } - } - - if code_tag { - out.push_str("\n"); - } - } - - if !code_tag { - for _scope in &open_spans { - out.push_str(""); - } - } - - Ok(()) -} - -fn scope_to_classes(s: &mut String, scope: Scope) { - let repo = SCOPE_REPO.lock().unwrap(); - for i in 0..(scope.len()) { - let atom = scope.atom_at(i as usize); - let atom_s = repo.atom_str(atom); - if i != 0 { - s.push(' '); - } - s.push_str(atom_s); - } -} - -// Copied from syntect as it isn't exposed from there. -pub struct Escape<'a>(pub &'a str); - -impl<'a> fmt::Display for Escape<'a> { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - let Escape(s) = *self; - let pile_o_bits = s; - let mut last = 0; - for (i, ch) in s.bytes().enumerate() { - match ch as char { - '<' | '>' | '&' | '\'' | '"' => { - fmt.write_str(&pile_o_bits[last..i])?; - let s = match ch as char { - '>' => ">", - '<' => "<", - '&' => "&", - '\'' => "'", - '"' => """, - _ => unreachable!(), - }; - fmt.write_str(s)?; - last = i + 1; - } - _ => {} - } - } - - if last < s.len() { - fmt.write_str(&pile_o_bits[last..])?; - } - Ok(()) - } } trait DiffFormatter { @@ -1217,21 +1050,18 @@ } struct SyntaxHighlightedDiffFormatter<'a> { - syntax_set: &'a SyntaxSet, - extension: Cow<'a, str>, + extension: &'a str, } impl<'a> SyntaxHighlightedDiffFormatter<'a> { - fn new(path: &'a Path, syntax_set: &'a SyntaxSet) -> Self { + fn new(path: &'a Path) -> Self { let extension = path .extension() .or_else(|| path.file_name()) - .map_or_else(|| Cow::Borrowed(""), OsStr::to_string_lossy); + .and_then(OsStr::to_str) + .unwrap_or_default(); - Self { - syntax_set, - extension, - } + Self { extension } } fn write(&self, output: &mut String, class: &str, data: &str) { @@ -1239,8 +1069,7 @@ format_file_inner( output, data, - self.extension.as_ref(), - self.syntax_set, + FileIdentifier::Extension(self.extension), false, ) .unwrap(); @@ -1251,7 +1080,7 @@ impl<'a> DiffFormatter for SyntaxHighlightedDiffFormatter<'a> { fn file_header(&self, output: &mut String, data: Arguments<'_>) { write!(output, r#""#).unwrap(); - format_file_inner(output, &data.to_string(), "patch", self.syntax_set, false).unwrap(); + write!(output, "{data}").unwrap(); writeln!(output, r#""#).unwrap(); } @@ -1263,14 +1092,7 @@ _left_content: &[u8], _right_content: &[u8], ) { - format_file_inner( - output, - &format!("Binary files {left} and {right} differ"), - "patch", - self.syntax_set, - false, - ) - .unwrap(); + write!(output, "Binary files {left} and {right} differ").unwrap(); } } diff --git a/src/main.rs b/src/main.rs index 566cf86..d33fa01 100644 --- a/src/main.rs +++ a/src/main.rs @@ -21,12 +21,10 @@ routing::get, Extension, Router, }; -use bat::assets::HighlightingAssets; use clap::Parser; use const_format::formatcp; use database::schema::SCHEMA_VERSION; use rocksdb::{Options, SliceTransform}; -use syntect::html::ClassStyle; use tokio::{ net::TcpListener, signal::unix::{signal, SignalKind}, @@ -44,6 +42,7 @@ }, git::Git, layers::logger::LoggingMiddleware, + theme::Theme, }; mod database; @@ -51,6 +50,7 @@ mod layers; mod methods; mod syntax_highlight; +mod theme; mod unified_diff_builder; const CRATE_VERSION: &str = clap::crate_version!(); @@ -113,6 +113,7 @@ } #[tokio::main] +#[allow(clippy::too_many_lines)] async fn main() -> Result<(), anyhow::Error> { let args: Args = Args::parse(); @@ -141,29 +142,31 @@ let indexer_wakeup_task = run_indexer(db.clone(), args.scan_path.clone(), args.refresh_interval); - let bat_assets = HighlightingAssets::from_binary(); - let syntax_set = bat_assets.get_syntax_set().unwrap().clone(); - - let theme = bat_assets.get_theme("GitHub"); - let css = syntect::html::css_for_theme_with_class_style(theme, ClassStyle::Spaced).unwrap(); - let css = Box::leak( - format!(r#"@media (prefers-color-scheme: light){{{css}}}"#) - .into_boxed_str() - .into_boxed_bytes(), - ); - HIGHLIGHT_CSS_HASH.set(build_asset_hash(css)).unwrap(); - - let dark_theme = bat_assets.get_theme("TwoDark"); - let dark_css = - syntect::html::css_for_theme_with_class_style(dark_theme, ClassStyle::Spaced).unwrap(); - let dark_css = Box::leak( - format!(r#"@media (prefers-color-scheme: dark){{{dark_css}}}"#) - .into_boxed_str() - .into_boxed_bytes(), - ); - DARK_HIGHLIGHT_CSS_HASH - .set(build_asset_hash(dark_css)) - .unwrap(); + let css = { + let theme = toml::from_str::(include_str!("../themes/github_light.toml")) + .unwrap() + .build_css(); + let css = Box::leak( + format!(r#"@media (prefers-color-scheme: light){{{theme}}}"#) + .into_boxed_str() + .into_boxed_bytes(), + ); + HIGHLIGHT_CSS_HASH.set(build_asset_hash(css)).unwrap(); + css + }; + + let dark_css = { + let theme = toml::from_str::(include_str!("../themes/onedark.toml")) + .unwrap() + .build_css(); + let css = Box::leak( + format!(r#"@media (prefers-color-scheme: dark){{{theme}}}"#) + .into_boxed_str() + .into_boxed_bytes(), + ); + DARK_HIGHLIGHT_CSS_HASH.set(build_asset_hash(css)).unwrap(); + css + }; let static_favicon = |content: &'static [u8]| { move || async move { @@ -211,7 +214,7 @@ .fallback(methods::repo::service) .layer(TimeoutLayer::new(args.request_timeout.into())) .layer(layer_fn(LoggingMiddleware)) - .layer(Extension(Arc::new(Git::new(syntax_set)))) + .layer(Extension(Arc::new(Git::new()))) .layer(Extension(db)) .layer(Extension(Arc::new(args.scan_path))) .layer(CorsLayer::new()); diff --git a/src/syntax_highlight.rs b/src/syntax_highlight.rs index f197351..8b427a4 100644 --- a/src/syntax_highlight.rs +++ a/src/syntax_highlight.rs @@ -1,44 +1,189 @@ -use std::{collections::HashMap, io::Write}; +use std::{ + cell::RefCell, + collections::HashMap, + fmt::Write as FmtWrite, + io::{ErrorKind, Write as IoWrite}, + sync::LazyLock, +}; use comrak::adapters::SyntaxHighlighterAdapter; -use syntect::{ - html::{ClassStyle, ClassedHTMLGenerator}, - parsing::SyntaxSet, - util::LinesWithEndings, -}; +use tracing::debug; +use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter}; + +thread_local! { + static HIGHLIGHTER: RefCell = RefCell::new(Highlighter::new()); +} + +macro_rules! count { + () => (0); + ($e:expr) => (1); + ($e:expr, $($rest:expr),*) => (1 + count!($($rest),*)); +} + +macro_rules! define_classes { + ($($name:literal => $class:literal),*,) => { + static HIGHLIGHT_NAMES: [&str; count!($($name),*)] = [ + $($name),* + ]; + + static HIGHLIGHT_CLASSES: [&str; count!($($name),*)] = [ + $($class),* + ]; + }; +} + +define_classes! { + "keyword.directive" => "keyword directive", + "markup.strikethrough" => "markup strikethrough", + "markup.link" => "markup link", + "keyword.control.conditional" => "keyword control conditional", + "markup.bold" => "markup bold", + "diff.plus" => "diff plus", + "markup.heading.2" => "markup heading 2", + "markup" => "markup", + "diff.delta" => "diff delta", + "variable.other.member" => "variable other member", + "namespace" => "namespace", + "comment.line" => "comment line", + "function" => "function", + "keyword.operator" => "keyword operator", + "punctuation.bracket" => "punctuation bracket", + "markup.list" => "markup list", + "type.builtin" => "type builtin", + "keyword.storage.modifier" => "keyword storage modifier", + "constant" => "constant", + "markup.italic" => "markup italic", + "variable" => "variable", + "keyword" => "keyword", + "punctuation.special" => "punctuation special", + "string.special.path" => "string special path", + "keyword.storage.type" => "keyword storage type", + "markup.heading.5" => "markup heading 5", + "markup.heading.6" => "markup heading 6", + "markup.link.label" => "markup link label", + "markup.list.numbered" => "markup list numbered", + "diff.delta.moved" => "diff delta moved", + "constant.numeric" => "constant numeric", + "markup.heading" => "markup heading", + "markup.link.text" => "markup link text", + "keyword.function" => "keyword function", + "string.special.url" => "string special url", + "keyword.control.return" => "keyword control return", + "keyword.control.repeat" => "keyword control repeat", + "constant.builtin" => "constant builtin", + "type.enum.variant" => "type enum variant", + "markup.raw.block" => "markup raw block", + "markup.heading.3" => "markup heading 3", + "escape" => "escape", + "comment.block" => "comment block", + "constant.numeric.integer" => "constant numeric integer", + "punctuation.delimiter" => "punctuation delimiter", + "constructor" => "constructor", + "type" => "type", + "string.regexp" => "string regexp", + "variable.parameter" => "variable parameter", + "markup.quote" => "markup quote", + "string.special" => "string special", + "constant.numeric.float" => "constant numeric float", + "constant.character.escape" => "constant character escape", + "tag" => "tag", + "keyword.storage" => "keyword storage", + "string" => "string", + "function.macro" => "function macro", + "markup.list.unnumbered" => "markup list unnumbered", + "diff.minus" => "diff minus", + "punctuation" => "punctuation", + "markup.link.url" => "markup link url", + "function.method" => "function method", + "markup.raw" => "markup raw", + "function.special" => "function special", + "attribute" => "attribute", + "operator" => "operator", + "special" => "special", + "function.builtin" => "function builtin", + "diff" => "diff", + "markup.heading.4" => "markup heading 4", + "keyword.control" => "keyword control", + "markup.list.unchecked" => "markup list unchecked", + "keyword.control.exception" => "keyword control exception", + "constant.builtin.boolean" => "constant builtin boolean", + "markup.heading.1" => "markup heading 1", + "markup.heading.marker" => "markup heading marker", + "constant.character" => "constant character", + "markup.raw.inline" => "markup raw inline", + "variable.builtin" => "variable builtin", + "variable.other" => "variable other", + "tag.builtin" => "tag builtin", + "type.enum" => "type enum", + "comment.block.documentation" => "comment block documentation", + "comment" => "comment", + "string.special.symbol" => "string special symbol", + "label" => "label", + "keyword.control.import" => "keyword control import", + "markup.list.checked" => "markup list checked", +} + +macro_rules! build_highlighter_configs { + ($($i:literal => $($extension:literal)|* => $($token:literal)|* => $config:expr),*,) => { + static BUILD_HIGHLIGHTER_CONFIGS: LazyLock<[HighlightConfiguration; count!($($config),*)]> = LazyLock::new(|| [ + $({ + let mut config = $config.unwrap(); + config.configure(&HIGHLIGHT_NAMES); + config + }),* + ]); + + pub fn fetch_highlighter_config(extension: &str) -> Option<&'static HighlightConfiguration> { + match extension { + $($($extension)|* => Some(&BUILD_HIGHLIGHTER_CONFIGS[$i])),*, + _ => None, + } + } + + pub fn fetch_highlighter_config_by_token(extension: &str) -> Option<&'static HighlightConfiguration> { + match extension { + $($($token)|* => Some(&BUILD_HIGHLIGHTER_CONFIGS[$i])),*, + _ => None, + } + } + }; +} -pub struct ComrakSyntectAdapter<'a> { - pub(crate) syntax_set: &'a SyntaxSet, +build_highlighter_configs! { + 0 => "java" => "java" => HighlightConfiguration::new(tree_sitter_java::LANGUAGE.into(), "java", tree_sitter_java::HIGHLIGHTS_QUERY, "", ""), + 1 => "html" => "html" => HighlightConfiguration::new(tree_sitter_html::LANGUAGE.into(), "html", include_str!("../grammar/html/highlights.scm"), include_str!("../grammar/html/injections.scm"), ""), + 2 => "md" => "markdown" => HighlightConfiguration::new(tree_sitter_md::LANGUAGE.into(), "markdown", tree_sitter_md::HIGHLIGHT_QUERY_BLOCK, tree_sitter_md::INJECTION_QUERY_BLOCK, ""), + 3 => "rs" => "rust" => HighlightConfiguration::new(tree_sitter_rust::LANGUAGE.into(), "rust", tree_sitter_rust::HIGHLIGHTS_QUERY, tree_sitter_rust::INJECTIONS_QUERY, tree_sitter_rust::TAGS_QUERY), + 4 => "toml" => "toml" => HighlightConfiguration::new(tree_sitter_toml_ng::language(), "toml", tree_sitter_toml_ng::HIGHLIGHTS_QUERY, "", ""), + 5 => "yaml" | "yml" => "yaml" | "yml" => HighlightConfiguration::new(tree_sitter_yaml::language(), "yaml", tree_sitter_yaml::HIGHLIGHTS_QUERY, "", ""), + 6 => "hs" => "haskell" => HighlightConfiguration::new(tree_sitter_haskell::LANGUAGE.into(), "haskell", tree_sitter_haskell::HIGHLIGHTS_QUERY, tree_sitter_haskell::INJECTIONS_QUERY, tree_sitter_haskell::LOCALS_QUERY), + 7 => "f" | "f90" | "for" => "fortran" => HighlightConfiguration::new(tree_sitter_fortran::LANGUAGE.into(), "fortran", "", "", ""), + 8 => "svelte" => "svelte" => HighlightConfiguration::new(tree_sitter_svelte_ng::LANGUAGE.into(), "svelte", tree_sitter_svelte_ng::HIGHLIGHTS_QUERY, tree_sitter_svelte_ng::INJECTIONS_QUERY, tree_sitter_svelte_ng::LOCALS_QUERY), + 9 => "js" => "js" | "javascript" => HighlightConfiguration::new(tree_sitter_javascript::LANGUAGE.into(), "javascript", tree_sitter_javascript::HIGHLIGHT_QUERY, tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY), + 10 => "jsx" => "jsx" => HighlightConfiguration::new(tree_sitter_javascript::LANGUAGE.into(), "jsx", tree_sitter_javascript::JSX_HIGHLIGHT_QUERY, tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY), + 11 => "ts" => "ts" | "typescript" => HighlightConfiguration::new(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), "typescript", tree_sitter_typescript::HIGHLIGHTS_QUERY, "", tree_sitter_typescript::TAGS_QUERY), + 12 => "tsx" => "tsx" => HighlightConfiguration::new(tree_sitter_typescript::LANGUAGE_TSX.into(), "tsx", tree_sitter_typescript::HIGHLIGHTS_QUERY, "", tree_sitter_typescript::TAGS_QUERY), + 13 => "scss" => "scss" => HighlightConfiguration::new(tree_sitter_scss::language(), "scss", tree_sitter_scss::HIGHLIGHTS_QUERY, "", ""), + 14 => "css" => "css" => HighlightConfiguration::new(tree_sitter_css::LANGUAGE.into(), "css", tree_sitter_css::HIGHLIGHTS_QUERY, "", ""), } + +pub struct ComrakHighlightAdapter; -impl SyntaxHighlighterAdapter for ComrakSyntectAdapter<'_> { +impl SyntaxHighlighterAdapter for ComrakHighlightAdapter { fn write_highlighted( &self, - output: &mut dyn Write, + output: &mut dyn IoWrite, lang: Option<&str>, code: &str, ) -> std::io::Result<()> { - let syntax = lang - .and_then(|v| self.syntax_set.find_syntax_by_token(v)) - .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()); - - let mut html_generator = - ClassedHTMLGenerator::new_with_class_style(syntax, self.syntax_set, ClassStyle::Spaced); - - for line in LinesWithEndings::from(code) { - let _res = html_generator.parse_html_for_line_which_includes_newline(line); - } - - write!( - output, - "{}", - html_generator.finalize().replace('\n', "\n") - ) + let out = format_file(code, FileIdentifier::Token(lang.unwrap_or_default())) + .map_err(|e| std::io::Error::new(ErrorKind::Other, e))?; + output.write_all(out.as_bytes()) } fn write_pre_tag( &self, - output: &mut dyn Write, + output: &mut dyn IoWrite, _attributes: HashMap, ) -> std::io::Result<()> { write!(output, r#"
"#)
@@ -46,9 +191,102 @@
 
     fn write_code_tag(
         &self,
-        _output: &mut dyn Write,
+        _output: &mut dyn IoWrite,
         _attributes: HashMap,
     ) -> std::io::Result<()> {
         Ok(())
     }
+}
+
+#[derive(Copy, Clone)]
+pub enum FileIdentifier<'a> {
+    Extension(&'a str),
+    Token(&'a str),
+}
+
+pub fn format_file(content: &str, identifier: FileIdentifier<'_>) -> anyhow::Result {
+    let mut out = String::new();
+    format_file_inner(&mut out, content, identifier, true)?;
+    Ok(out)
+}
+
+pub fn format_file_inner(
+    out: &mut String,
+    content: &str,
+    identifier: FileIdentifier<'_>,
+    code_tag: bool,
+) -> anyhow::Result<()> {
+    let config = match identifier {
+        FileIdentifier::Extension(v) => fetch_highlighter_config(v),
+        FileIdentifier::Token(v) => fetch_highlighter_config_by_token(v),
+    };
+
+    let line_prefix = if code_tag { "" } else { "" };
+
+    let line_suffix = if code_tag { "\n" } else { "\n" };
+
+    let Some(config) = config else {
+        for line in content.lines() {
+            out.push_str(line_prefix);
+            v_htmlescape::b_escape(line.as_bytes(), out);
+            out.push_str(line_suffix);
+        }
+
+        return Ok(());
+    };
+
+    HIGHLIGHTER.with_borrow_mut(|highlighter| {
+        let mut spans = highlighter.highlight(config, content.as_bytes(), None, |extension| {
+            debug!(extension, "Highlighter switch requested");
+            fetch_highlighter_config(extension).or(fetch_highlighter_config_by_token(extension))
+        })?;
+
+        let mut tag_open = true;
+        out.push_str(line_prefix);
+
+        while let Some(span) = spans.next().transpose()? {
+            if !tag_open {
+                out.push_str(line_prefix);
+                tag_open = true;
+            }
+
+            match span {
+                HighlightEvent::Source { start, end } => {
+                    let content = &content[start..end];
+
+                    for (i, line) in content.lines().enumerate() {
+                        if i != 0 {
+                            out.push_str(line_suffix);
+                            out.push_str(line_prefix);
+                        }
+
+                        v_htmlescape::b_escape(line.as_bytes(), out);
+                    }
+
+                    if content.ends_with('\n') {
+                        out.push_str(line_suffix);
+                        tag_open = false;
+                    }
+                }
+                HighlightEvent::HighlightStart(highlight) => {
+                    write!(
+                        out,
+                        r#""#,
+                        HIGHLIGHT_CLASSES[highlight.0]
+                    )?;
+                }
+                HighlightEvent::HighlightEnd => {
+                    out.push_str("");
+                }
+            }
+        }
+
+        if tag_open {
+            out.push_str(line_suffix);
+        }
+
+        Ok::<_, anyhow::Error>(())
+    })?;
+
+    Ok(())
 }
diff --git a/src/theme.rs b/src/theme.rs
new file mode 100644
index 0000000..11aa6d1 100644
--- /dev/null
+++ a/src/theme.rs
@@ -1,0 +1,130 @@
+use std::{
+    collections::HashMap,
+    fmt::{Formatter, Write},
+};
+
+use serde::{
+    de::{value::MapAccessDeserializer, Error, MapAccess, Visitor},
+    Deserialize, Deserializer,
+};
+
+#[derive(Deserialize)]
+pub struct Theme {
+    palette: HashMap,
+    #[serde(flatten)]
+    definitions: HashMap,
+}
+
+pub enum PaletteReference {
+    Foreground(String),
+    WithModifiers(PaletteReferenceWithModifiers),
+}
+
+impl<'de> Deserialize<'de> for PaletteReference {
+    fn deserialize(deserializer: D) -> Result
+    where
+        D: Deserializer<'de>,
+    {
+        struct PaletteReferenceVisitor;
+
+        impl<'de> Visitor<'de> for PaletteReferenceVisitor {
+            type Value = PaletteReference;
+
+            fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
+                formatter.write_str("palette reference")
+            }
+
+            fn visit_str(self, v: &str) -> Result
+            where
+                E: Error,
+            {
+                Ok(PaletteReference::Foreground(v.to_string()))
+            }
+
+            fn visit_map(self, map: A) -> Result
+            where
+                A: MapAccess<'de>,
+            {
+                PaletteReferenceWithModifiers::deserialize(MapAccessDeserializer::new(map))
+                    .map(PaletteReference::WithModifiers)
+            }
+        }
+
+        deserializer.deserialize_any(PaletteReferenceVisitor)
+    }
+}
+
+#[derive(Deserialize)]
+pub struct PaletteReferenceWithModifiers {
+    bg: Option,
+    fg: Option,
+    #[serde(default)]
+    modifiers: Vec,
+}
+
+#[derive(Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum Modifiers {
+    Underlined,
+    Bold,
+    Italic,
+    CrossedOut,
+    Reversed,
+    Dim,
+}
+
+impl Theme {
+    fn get_color<'a>(&'a self, reference: &'a str) -> &'a str {
+        if reference.starts_with('#') {
+            reference
+        } else {
+            self.palette
+                .get(reference)
+                .unwrap_or_else(|| panic!("bad palette ref {reference}"))
+        }
+    }
+
+    pub fn build_css(&self) -> String {
+        let mut out = String::new();
+
+        for (kind, palette_ref) in &self.definitions {
+            write!(out, ".highlight.{kind} {{").unwrap();
+
+            match palette_ref {
+                PaletteReference::Foreground(color) => {
+                    let color = self.get_color(color);
+                    write!(out, "color:{color};").unwrap();
+                }
+                PaletteReference::WithModifiers(PaletteReferenceWithModifiers {
+                    bg,
+                    fg,
+                    modifiers,
+                }) => {
+                    if let Some(color) = bg {
+                        let color = self.get_color(color);
+                        write!(out, "background:{color};").unwrap();
+                    }
+
+                    if let Some(color) = fg {
+                        let color = self.get_color(color);
+                        write!(out, "color:{color};").unwrap();
+                    }
+
+                    for modifier in modifiers {
+                        match modifier {
+                            Modifiers::Underlined => out.push_str("text-decoration:underline;"),
+                            Modifiers::Bold => out.push_str("font-weight:bold;"),
+                            Modifiers::Italic => out.push_str("font-style:italic;"),
+                            Modifiers::CrossedOut => out.push_str("text-decoration:line-through;"),
+                            Modifiers::Reversed | Modifiers::Dim => {}
+                        }
+                    }
+                }
+            }
+
+            out.push('}');
+        }
+
+        out
+    }
+}
diff --git a/themes/README b/themes/README
new file mode 100644
index 0000000..bed8274 100644
--- /dev/null
+++ a/themes/README
@@ -1,0 +1,3 @@
+These themes are sourced from Helix in the same format they publish them in.
+
+https://github.com/helix-editor/helix/tree/82dd96369302f60a9c83a2d54d021458f82bcd36/runtime/themesdiff --git a/themes/github_light.toml b/themes/github_light.toml
new file mode 100644
index 0000000..f8b5180 100644
--- /dev/null
+++ a/themes/github_light.toml
@@ -1,0 +1,232 @@
+# Author : OwOSwordsman 
+# An unofficial GitHub theme, generated using colors from: https://primer.style/primitives/colors
+# Credit goes to the original VSCode theme: https://github.com/primer/github-vscode-theme
+# Only the Light and Dark variants were specifically tested
+
+attribute = "fg.default"
+keyword = "scale.red.5"
+"keyword.directive" = "scale.red.5"          # -- preprocessor comments (#if in C)
+namespace = "scale.orange.6"
+punctuation = "fg.default"
+"punctuation.delimiter" = "fg.default"
+operator = "scale.blue.8"
+special = "scale.blue.8"
+"variable.other.member" = "scale.blue.8"
+variable = "fg.default"
+"variable.parameter" = "scale.orange.6"
+"variable.builtin" = "scale.red.5"
+type = "scale.orange.6"
+"type.builtin" = "scale.blue.6"
+constructor = "done.fg"
+function = "done.fg"
+"function.macro" = "done.fg"
+tag = "scale.green.6"
+comment = "fg.muted"
+constant = "scale.blue.6"
+"constant.builtin" = "scale.blue.6"
+string = "scale.blue.8"
+"constant.numeric" = "scale.blue.6"
+"constant.character.escape" = "scale.blue.6"
+# used for lifetimes
+label = "scale.red.5"
+
+"markup.heading" = "scale.blue.6"
+"markup.bold" = { modifiers = ["bold"] }
+"markup.italic" = { modifiers = ["italic"] }
+"markup.strikethrough" = { modifiers = ["crossed_out"] }
+"markup.link.url" = { modifiers = ["underlined"] }
+"markup.link.text" = { fg = "scale.blue.8", modifiers = ["underlined"] }
+"markup.raw" = "scale.blue.6"
+
+"diff.plus" = "open.fg"
+"diff.minus" = "closed.fg"
+"diff.delta" = "attention.fg"
+
+"ui.background" = { bg = "canvas.default" }
+"ui.background.separator" = { fg = "fg.subtle" }
+"ui.linenr" = { fg = "fg.subtle" }
+"ui.linenr.selected" = { fg = "fg.default" }
+"ui.statusline" = { fg = "fg.muted", bg = "neutral.subtle" }
+"ui.statusline.active" = { fg = "fg.default", bg = "canvas.default", underline = { color = "scale.coral.3", style = "line" } }
+"ui.statusline.normal" = { fg = "fg.default", bg = "accent.muted" }
+"ui.statusline.insert" = { fg = "fg.default", bg = "attention.muted" }
+"ui.statusline.select" = { fg = "fg.default", bg = "sponsors.muted" }
+"ui.popup" = { bg = "scale.gray.0" }
+"ui.popup.info" = { fg = "fg.default", bg = "scale.gray.0" }
+"ui.window" = { fg = "border.default" }
+"ui.help" = { fg = "fg.default", bg = "scale.gray.0" }
+
+"ui.text" = { fg = "fg.muted" }
+"ui.text.focus" = { fg = "fg.default" }
+"ui.text.inactive" = "fg.subtle"
+"ui.virtual" = { fg = "scale.gray.2" }
+"ui.virtual.ruler" = { bg = "canvas.subtle" }
+
+"ui.selection" = { bg = "scale.blue.0" }
+"ui.selection.primary" = { bg = "scale.blue.1" }
+"ui.cursor.match" = { fg = "attention.fg", modifiers = [
+  "bold",
+], underline = { style = "line" } }
+"ui.cursor" = { modifiers = ["reversed"] }
+"ui.cursorline.primary" = { bg = "canvas.subtle" }
+
+"ui.menu" = { fg = "fg.default", bg = "scale.gray.0" }
+"ui.menu.selected" = { bg = "scale.gray.1" }
+"ui.menu.scroll" = { fg = "scale.gray.2", bg = "scale.gray.0" }
+
+"diagnostic.hint" = { underline = { color = "success.fg", style = "curl" } }
+"diagnostic.info" = { underline = { color = "accent.fg", style = "curl" } }
+"diagnostic.warning" = { underline = { color = "attention.fg", style = "curl" } }
+"diagnostic.error" = { underline = { color = "danger.fg", style = "curl" } }
+"diagnostic.unnecessary" = { modifiers = ["dim"] }
+"diagnostic.deprecated" = { modifiers = ["crossed_out"] }
+
+hint = "success.fg"
+info = "accent.fg"
+warning = "attention.fg"
+error = "danger.fg"
+
+[palette]
+"accent.emphasis" = "#0969da"
+"accent.fg" = "#0969da"
+"accent.muted" = "#54aeff66"
+"accent.subtle" = "#ddf4ff"
+"attention.emphasis" = "#bf8700"
+"attention.fg" = "#9a6700"
+"attention.muted" = "#d4a72c66"
+"attention.subtle" = "#fff8c5"
+"border.default" = "#d0d7de"
+"border.muted" = "#d8dee4"
+"border.subtle" = "#1b1f2426"
+"canvas.default" = "#ffffff"
+"canvas.inset" = "#f6f8fa"
+"canvas.overlay" = "#ffffff"
+"canvas.subtle" = "#f6f8fa"
+"closed.emphasis" = "#cf222e"
+"closed.fg" = "#cf222e"
+"closed.muted" = "#ff818266"
+"closed.subtle" = "#ffebe9"
+"danger.emphasis" = "#cf222e"
+"danger.fg" = "#cf222e"
+"danger.muted" = "#ff818266"
+"danger.subtle" = "#ffebe9"
+"done.emphasis" = "#8250df"
+"done.fg" = "#8250df"
+"done.muted" = "#c297ff66"
+"done.subtle" = "#fbefff"
+"fg.default" = "#24292f"
+"fg.muted" = "#57606a"
+"fg.onEmphasis" = "#ffffff"
+"fg.subtle" = "#6e7781"
+"neutral.emphasis" = "#6e7781"
+"neutral.emphasisPlus" = "#24292f"
+"neutral.muted" = "#afb8c133"
+"neutral.subtle" = "#eaeef280"
+"open.emphasis" = "#2da44e"
+"open.fg" = "#1a7f37"
+"open.muted" = "#4ac26b66"
+"open.subtle" = "#dafbe1"
+"scale.black" = "#1b1f24"
+"scale.blue.0" = "#ddf4ff"
+"scale.blue.1" = "#b6e3ff"
+"scale.blue.2" = "#80ccff"
+"scale.blue.3" = "#54aeff"
+"scale.blue.4" = "#218bff"
+"scale.blue.5" = "#0969da"
+"scale.blue.6" = "#0550ae"
+"scale.blue.7" = "#033d8b"
+"scale.blue.8" = "#0a3069"
+"scale.blue.9" = "#002155"
+"scale.coral.0" = "#fff0eb"
+"scale.coral.1" = "#ffd6cc"
+"scale.coral.2" = "#ffb4a1"
+"scale.coral.3" = "#fd8c73"
+"scale.coral.4" = "#ec6547"
+"scale.coral.5" = "#c4432b"
+"scale.coral.6" = "#9e2f1c"
+"scale.coral.7" = "#801f0f"
+"scale.coral.8" = "#691105"
+"scale.coral.9" = "#510901"
+"scale.gray.0" = "#f6f8fa"
+"scale.gray.1" = "#eaeef2"
+"scale.gray.2" = "#d0d7de"
+"scale.gray.3" = "#afb8c1"
+"scale.gray.4" = "#8c959f"
+"scale.gray.5" = "#6e7781"
+"scale.gray.6" = "#57606a"
+"scale.gray.7" = "#424a53"
+"scale.gray.8" = "#32383f"
+"scale.gray.9" = "#24292f"
+"scale.green.0" = "#dafbe1"
+"scale.green.1" = "#aceebb"
+"scale.green.2" = "#6fdd8b"
+"scale.green.3" = "#4ac26b"
+"scale.green.4" = "#2da44e"
+"scale.green.5" = "#1a7f37"
+"scale.green.6" = "#116329"
+"scale.green.7" = "#044f1e"
+"scale.green.8" = "#003d16"
+"scale.green.9" = "#002d11"
+"scale.orange.0" = "#fff1e5"
+"scale.orange.1" = "#ffd8b5"
+"scale.orange.2" = "#ffb77c"
+"scale.orange.3" = "#fb8f44"
+"scale.orange.4" = "#e16f24"
+"scale.orange.5" = "#bc4c00"
+"scale.orange.6" = "#953800"
+"scale.orange.7" = "#762c00"
+"scale.orange.8" = "#5c2200"
+"scale.orange.9" = "#471700"
+"scale.pink.0" = "#ffeff7"
+"scale.pink.1" = "#ffd3eb"
+"scale.pink.2" = "#ffadda"
+"scale.pink.3" = "#ff80c8"
+"scale.pink.4" = "#e85aad"
+"scale.pink.5" = "#bf3989"
+"scale.pink.6" = "#99286e"
+"scale.pink.7" = "#772057"
+"scale.pink.8" = "#611347"
+"scale.pink.9" = "#4d0336"
+"scale.purple.0" = "#fbefff"
+"scale.purple.1" = "#ecd8ff"
+"scale.purple.2" = "#d8b9ff"
+"scale.purple.3" = "#c297ff"
+"scale.purple.4" = "#a475f9"
+"scale.purple.5" = "#8250df"
+"scale.purple.6" = "#6639ba"
+"scale.purple.7" = "#512a97"
+"scale.purple.8" = "#3e1f79"
+"scale.purple.9" = "#2e1461"
+"scale.red.0" = "#ffebe9"
+"scale.red.1" = "#ffcecb"
+"scale.red.2" = "#ffaba8"
+"scale.red.3" = "#ff8182"
+"scale.red.4" = "#fa4549"
+"scale.red.5" = "#cf222e"
+"scale.red.6" = "#a40e26"
+"scale.red.7" = "#82071e"
+"scale.red.8" = "#660018"
+"scale.red.9" = "#4c0014"
+"scale.white" = "#ffffff"
+"scale.yellow.0" = "#fff8c5"
+"scale.yellow.1" = "#fae17d"
+"scale.yellow.2" = "#eac54f"
+"scale.yellow.3" = "#d4a72c"
+"scale.yellow.4" = "#bf8700"
+"scale.yellow.5" = "#9a6700"
+"scale.yellow.6" = "#7d4e00"
+"scale.yellow.7" = "#633c01"
+"scale.yellow.8" = "#4d2d00"
+"scale.yellow.9" = "#3b2300"
+"severe.emphasis" = "#bc4c00"
+"severe.fg" = "#bc4c00"
+"severe.muted" = "#fb8f4466"
+"severe.subtle" = "#fff1e5"
+"sponsors.emphasis" = "#bf3989"
+"sponsors.fg" = "#bf3989"
+"sponsors.muted" = "#ff80c866"
+"sponsors.subtle" = "#ffeff7"
+"success.emphasis" = "#2da44e"
+"success.fg" = "#1a7f37"
+"success.muted" = "#4ac26b66"
+"success.subtle" = "#dafbe1"
diff --git a/themes/onedark.toml b/themes/onedark.toml
new file mode 100644
index 0000000..cc9c88c 100644
--- /dev/null
+++ a/themes/onedark.toml
@@ -1,0 +1,120 @@
+# Author : Gokul Soumya 
+
+"tag" = { fg = "red" }
+"attribute" = { fg = "yellow" }
+"comment" = { fg = "light-gray", modifiers = ["italic"] }
+"constant" = { fg = "cyan" }
+"constant.numeric" = { fg = "gold" }
+"constant.builtin" = { fg = "gold" }
+"constant.character.escape" = { fg = "gold" }
+"constructor" = { fg = "blue" }
+"function" = { fg = "blue" }
+"function.builtin" = { fg = "blue" }
+"function.macro" = { fg = "purple" }
+"keyword" = { fg = "red" }
+"keyword.control" = { fg = "purple" }
+"keyword.control.import" = { fg = "red" }
+"keyword.directive" = { fg = "purple" }
+"label" = { fg = "purple" }
+"namespace" = { fg = "blue" }
+"operator" = { fg = "purple" }
+"keyword.operator" = { fg = "purple" }
+"special" = { fg = "blue" }
+"string" = { fg = "green" }
+"type" = { fg = "yellow" }
+# "variable" = { fg = "blue" }
+"variable.builtin" = { fg = "blue" }
+"variable.parameter" = { fg = "red" }
+"variable.other.member" = { fg = "red" }
+
+"markup.heading" = { fg = "red" }
+"markup.raw.inline" = { fg = "green" }
+"markup.bold" = { fg = "gold", modifiers = ["bold"] }
+"markup.italic" = { fg = "purple", modifiers = ["italic"] }
+"markup.strikethrough" = { modifiers = ["crossed_out"] }
+"markup.list" = { fg = "red" }
+"markup.quote" = { fg = "yellow" }
+"markup.link.url" = { fg = "cyan", modifiers = ["underlined"] }
+"markup.link.text" = { fg = "purple" }
+
+"diff.plus" = "green"
+"diff.delta" = "gold"
+"diff.minus" = "red"
+
+"diagnostic.info".underline = { color = "blue", style = "curl" }
+"diagnostic.hint".underline = { color = "green", style = "curl" }
+"diagnostic.warning".underline = { color = "yellow", style = "curl" }
+"diagnostic.error".underline = { color = "red", style = "curl" }
+"diagnostic.unnecessary" = { modifiers = ["dim"] }
+"diagnostic.deprecated" = { modifiers = ["crossed_out"] }
+"info" = { fg = "blue", modifiers = ["bold"] }
+"hint" = { fg = "green", modifiers = ["bold"] }
+"warning" = { fg = "yellow", modifiers = ["bold"] }
+"error" = { fg = "red", modifiers = ["bold"] }
+
+"ui.background" = { bg = "black" }
+"ui.virtual" = { fg = "faint-gray" }
+"ui.virtual.indent-guide" = { fg = "faint-gray" }
+"ui.virtual.whitespace" = { fg = "light-gray" }
+"ui.virtual.ruler" = { bg = "gray" }
+"ui.virtual.inlay-hint" = { fg = "light-gray" }
+"ui.virtual.jump-label" = { fg = "light-gray", modifiers = ["bold"] }
+
+"ui.cursor" = { fg = "white", modifiers = ["reversed"] }
+"ui.cursor.primary" = { fg = "white", modifiers = ["reversed"] }
+"ui.cursor.match" = { fg = "blue", modifiers = ["underlined"] }
+
+"ui.selection" = { bg = "faint-gray" }
+"ui.selection.primary" = { bg = "gray" }
+"ui.cursorline.primary" = { bg = "light-black" }
+
+"ui.highlight" = { bg = "gray" }
+"ui.highlight.frameline" = { bg = "#97202a" }
+
+"ui.linenr" = { fg = "linenr" }
+"ui.linenr.selected" = { fg = "white" }
+
+"ui.statusline" = { fg = "white", bg = "light-black" }
+"ui.statusline.inactive" = { fg = "light-gray", bg = "light-black" }
+"ui.statusline.normal" = { fg = "light-black", bg = "blue", modifiers = [
+  "bold",
+] }
+"ui.statusline.insert" = { fg = "light-black", bg = "green", modifiers = [
+  "bold",
+] }
+"ui.statusline.select" = { fg = "light-black", bg = "purple", modifiers = [
+  "bold",
+] }
+
+"ui.bufferline" = { fg = "light-gray", bg = "light-black" }
+"ui.bufferline.active" = { fg = "light-black", bg = "blue", underline = { color = "light-black", style = "line" } }
+"ui.bufferline.background" = { bg = "light-black" }
+
+"ui.text" = { fg = "white" }
+"ui.text.focus" = { fg = "white", bg = "light-black", modifiers = ["bold"] }
+
+"ui.help" = { fg = "white", bg = "gray" }
+"ui.popup" = { bg = "gray" }
+"ui.window" = { fg = "gray" }
+"ui.menu" = { fg = "white", bg = "gray" }
+"ui.menu.selected" = { fg = "black", bg = "blue" }
+"ui.menu.scroll" = { fg = "white", bg = "light-gray" }
+
+"ui.debug" = { fg = "red" }
+
+[palette]
+
+yellow = "#E5C07B"
+blue = "#61AFEF"
+red = "#E06C75"
+purple = "#C678DD"
+green = "#98C379"
+gold = "#D19A66"
+cyan = "#56B6C2"
+white = "#ABB2BF"
+black = "#282C34"
+light-black = "#2C323C"
+gray = "#3E4452"
+faint-gray = "#3B4048"
+light-gray = "#5C6370"
+linenr = "#4B5263"
diff --git a/grammar/html/highlights.scm b/grammar/html/highlights.scm
new file mode 100644
index 0000000..99f39c9 100644
--- /dev/null
+++ a/grammar/html/highlights.scm
@@ -1,0 +1,20 @@
+(tag_name) @tag
+(erroneous_end_tag_name) @tag.error
+(doctype) @constant
+(attribute_name) @attribute
+(comment) @comment
+
+[
+  "\""
+  (attribute_value)
+] @string
+
+[
+  "<"
+  ">"
+  ""
+  "