🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2024-09-29 22:11:42.0 +04:00:00
committer Jordan Doyle <jordan@doyle.la> 2024-09-29 23:39:17.0 +04:00:00
commit
1b34a74891423e118c567368c95b7e04f3b9ca63 [patch]
tree
2f8bfb5f884e91712f056b3feec34373434fefda
parent
755876c268f288e104abd126ee7c2c3e8ac4fcb3
download
1b34a74891423e118c567368c95b7e04f3b9ca63.tar.gz

Use Helix's tree-sitter grammar registry



Diff

 Cargo.lock                                | 352 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
 Cargo.toml                                |  29 ++++-------------------------
 flake.lock                                |  17 +++++++++++++++++
 flake.nix                                 |  18 +++++++++++++++++-
 src/git.rs                                |  52 +++++++++++++++++-----------------------------------
 src/main.rs                               |   5 +++++
 src/syntax_highlight.rs                   | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 tree-sitter-grammar-repository/Cargo.toml |  26 ++++++++++++++++++++++++++
 tree-sitter-grammar-repository/build.rs   | 518 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tree-sitter-grammar-repository/src/lib.rs |  66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 833 insertions(+), 492 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fefe4df..a1da4ba 100644
--- a/Cargo.lock
+++ a/Cargo.lock
@@ -381,7 +381,7 @@
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
dependencies = [
 "memchr",
 "regex-automata 0.4.7",
 "regex-automata 0.4.8",
 "serde",
]

@@ -1708,6 +1708,19 @@
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"

[[package]]
name = "globset"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
dependencies = [
 "aho-corasick",
 "bstr",
 "log",
 "regex-automata 0.4.8",
 "regex-syntax 0.8.5",
]

[[package]]
name = "h2"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2299,6 +2312,16 @@
dependencies = [
 "autocfg",
 "libm",
]

[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
 "hermit-abi",
 "libc",
]

[[package]]
@@ -2459,6 +2482,16 @@
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
 "zerocopy",
]

[[package]]
name = "prettyplease"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
dependencies = [
 "proc-macro2",
 "syn",
]

[[package]]
@@ -2488,7 +2521,7 @@
 "rand",
 "rand_chacha",
 "rand_xorshift",
 "regex-syntax 0.8.4",
 "regex-syntax 0.8.5",
 "unarray",
]

@@ -2616,14 +2649,14 @@

[[package]]
name = "regex"
version = "1.10.6"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
dependencies = [
 "aho-corasick",
 "memchr",
 "regex-automata 0.4.7",
 "regex-syntax 0.8.4",
 "regex-automata 0.4.8",
 "regex-syntax 0.8.5",
]

[[package]]
@@ -2637,13 +2670,13 @@

[[package]]
name = "regex-automata"
version = "0.4.7"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
dependencies = [
 "aho-corasick",
 "memchr",
 "regex-syntax 0.8.4",
 "regex-syntax 0.8.5",
]

[[package]]
@@ -2654,9 +2687,9 @@

[[package]]
name = "regex-syntax"
version = "0.8.4"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"

[[package]]
name = "rgit"
@@ -2702,32 +2735,8 @@
 "tower-service",
 "tracing",
 "tracing-subscriber",
 "tree-sitter-bash",
 "tree-sitter-c",
 "tree-sitter-c-sharp",
 "tree-sitter-cpp",
 "tree-sitter-css",
 "tree-sitter-elixir",
 "tree-sitter-fortran",
 "tree-sitter-go",
 "tree-sitter-haskell",
 "tree-sitter-grammar-repository",
 "tree-sitter-highlight",
 "tree-sitter-html",
 "tree-sitter-java",
 "tree-sitter-javascript",
 "tree-sitter-json",
 "tree-sitter-md",
 "tree-sitter-ocaml",
 "tree-sitter-php",
 "tree-sitter-python",
 "tree-sitter-regex",
 "tree-sitter-ruby",
 "tree-sitter-rust",
 "tree-sitter-scss",
 "tree-sitter-svelte-ng",
 "tree-sitter-toml-ng",
 "tree-sitter-typescript",
 "tree-sitter-yaml",
 "unix_mode",
 "uuid",
 "v_htmlescape",
@@ -3067,7 +3076,7 @@
 "once_cell",
 "onig",
 "plist",
 "regex-syntax 0.8.4",
 "regex-syntax 0.8.5",
 "serde",
 "serde_derive",
 "serde_json",
@@ -3144,6 +3153,15 @@
dependencies = [
 "cfg-if",
 "once_cell",
]

[[package]]
name = "threadpool"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
dependencies = [
 "num_cpus",
]

[[package]]
@@ -3460,97 +3478,27 @@
dependencies = [
 "cc",
 "regex",
 "regex-syntax 0.8.4",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-bash"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3aa5e1c6bd02c0053f3f68edcf5d8866b38a8640584279e30fca88149ce14dda"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-c"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8b3fb515e498e258799a31d78e6603767cd6892770d9e2290ec00af5c3ad80b"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-c-sharp"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04c0f6d2209a3cd6d0bb9d2934715da15a15710d3c09c7c1ecd4c9804c3ecd10"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-cpp"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d67e862242878d6ee50e1e5814f267ee3eea0168aea2cdbd700ccfb4c74b6d3"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-css"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d0018d6b1692a806f9cddaa1e5616951fd58840c39a0b21401b55ab3df12292"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-elixir"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97bf0efa4be41120018f23305b105ad4dfd3be1b7f302dc4071d0e6c2dec3a32"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-fortran"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d655214a848bfb63dfdc2e7eeef5c3c323807a220b3117a1aef46b2bb95a12"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-go"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf57626e4c9b6d6efaf8a8d5ee1241c5f178ae7bfdf693713ae6a774f01424e"
dependencies = [
 "cc",
 "regex-syntax 0.8.5",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-haskell"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b92c8a4c4ceaae105621b00624ee8d9029fb23116f400832e4be30d0639d054"
name = "tree-sitter-grammar-repository"
version = "0.0.1"
dependencies = [
 "anyhow",
 "cc",
 "globset",
 "heck",
 "prettyplease",
 "proc-macro2",
 "quote",
 "regex",
 "serde",
 "serde_json",
 "syn",
 "threadpool",
 "toml",
 "tree-sitter-language",
]

@@ -3564,46 +3512,6 @@
 "regex",
 "thiserror",
 "tree-sitter",
]

[[package]]
name = "tree-sitter-html"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d52d710a3723360ebade986d3f0ae2aa2c3bcfb87bb1cdf60988ec51c81c40d"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-java"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b38b26736e6e97421760201f7a91c859f3b0d44382d48ac18aa963828f784ebf"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-javascript"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59e1f62f8babb640b909f30675d1addeb1f17802f2a4d2af287569753b243977"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-json"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
@@ -3611,126 +3519,6 @@
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2545046bd1473dac6c626659cc2567c6c0ff302fc8b84a56c4243378276f7f57"

[[package]]
name = "tree-sitter-md"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17f968c22a01010b83fc960455ae729db08dbeb6388617d9113897cb9204b030"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-ocaml"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0534f94f006cf4d4994e964212e91d4626efcaf6769b023d3f17530399a4d6e1"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-php"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e0470ddcab3cab948615d50b0395da28e4ab886c0f78363e607cf7f0724cf4a"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-python"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65661b1a3e24139e2e54207e47d910ab07e28790d78efc7d5dc3a11ce2a110eb"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-regex"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b9a7087b1cf769c96b7e74414947df067fb6135f04d176fd23be08b9396cc0e"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-ruby"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ec5ee842e27791e0adffa0b2a177614de51d2a26e5c7e84d014ed7f097e5ed0"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-rust"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cffbbcb780348fbae8395742ae5b34c1fd794e4085d43aac9f259387f9a84dc8"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-scss"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33909a9ca86390ebbf3461e9949c4bbe2767d2d024b486306d27616641d4ba24"
dependencies = [
 "cc",
 "tree-sitter",
]

[[package]]
name = "tree-sitter-svelte-ng"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef0a71f9cf5e94373cc86c64893630c8a29bb25d3390a248268d08af2165fa37"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-toml-ng"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "695d20cd83acf16c02c773f03e76d7b43b19883d4e2ce3652a8f06b5e0da7455"
dependencies = [
 "cc",
 "tree-sitter",
]

[[package]]
name = "tree-sitter-typescript"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aecf1585ae2a9dddc2b1d4c0e2140b2ec9876e2a25fd79de47fcf7dae0384685"
dependencies = [
 "cc",
 "tree-sitter-language",
]

[[package]]
name = "tree-sitter-yaml"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aad27ec46ad343d8b514f64dd3fdffb478c592ece561b6c935d90ef55589c6b6"
dependencies = [
 "cc",
 "tree-sitter",
]

[[package]]
name = "trim-in-place"
diff --git a/Cargo.toml b/Cargo.toml
index f578351..a997f3e 100644
--- a/Cargo.toml
+++ a/Cargo.toml
@@ -6,6 +6,9 @@
authors = ["Jordan Doyle <jordan@doyle.la>"]
license = "WTFPL"

[workspace]
members = ["tree-sitter-grammar-repository"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
@@ -55,32 +58,8 @@
tower-service = "0.3"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tree-sitter-bash = "0.23"
tree-sitter-c = "0.23"
tree-sitter-cpp = "0.23"
tree-sitter-c-sharp = "0.23"
tree-sitter-elixir = "0.3"
tree-sitter-go = "0.23"
tree-sitter-php = "0.23"
tree-sitter-json = "0.23"
tree-sitter-ocaml = "0.23"
tree-sitter-python = "0.23"
tree-sitter-regex = "0.23"
tree-sitter-ruby = "0.23"
tree-sitter-css = "0.23"
tree-sitter-fortran = "0.1"
tree-sitter-haskell = "0.23"
tree-sitter-grammar-repository = { path = "./tree-sitter-grammar-repository" }
tree-sitter-highlight = "0.23"
tree-sitter-html = "0.23"
tree-sitter-java = "0.23"
tree-sitter-javascript = "0.23"
tree-sitter-md = "0.3"
tree-sitter-rust = "0.23"
tree-sitter-scss = "1.0"
tree-sitter-svelte-ng = "1.0"
tree-sitter-toml-ng = "0.6.0"
tree-sitter-typescript = "0.23"
tree-sitter-yaml = "0.6"
unix_mode = "0.1"
uuid = { version = "1.7", features = ["v4"] }
v_htmlescape = { version = "0.15", features = ["bytes-buf"] }
diff --git a/flake.lock b/flake.lock
index 4041d5d..3c29400 100644
--- a/flake.lock
+++ a/flake.lock
@@ -31,6 +31,22 @@
        "type": "github"
      }

    },

    "helix": {
      "flake": false,
      "locked": {
        "lastModified": 1727613050,
        "narHash": "sha256-vxf/5aCNjy0OKzkkkNoeUnjr1lWQDmcKW+UXKpU4weE=",
        "owner": "helix-editor",
        "repo": "helix",
        "rev": "2ce4c6d5fa3e50464b41a3d0190ad0e5ada2fc3c",
        "type": "github"
      },

      "original": {
        "owner": "helix-editor",
        "repo": "helix",
        "type": "github"
      }

    },

    "nixpkgs": {
      "locked": {
        "lastModified": 1727335715,
@@ -65,6 +81,7 @@
      "inputs": {
        "advisory-db": "advisory-db",
        "crane": "crane",
        "helix": "helix",
        "nixpkgs": "nixpkgs",
        "treefmt-nix": "treefmt-nix",
        "utils": "utils"
diff --git a/flake.nix b/flake.nix
index 0303b38..0160f3c 100644
--- a/flake.nix
+++ a/flake.nix
@@ -8,14 +8,28 @@
      url = "github:rustsec/advisory-db";
      flake = false;
    };

    helix = {
      url = "github:helix-editor/helix";
      flake = false;
    };
  };

  outputs = { self, nixpkgs, utils, crane, advisory-db, treefmt-nix }:
  outputs = { self, nixpkgs, utils, crane, advisory-db, treefmt-nix, helix }:
    utils.lib.eachDefaultSystem (system:
      let
        pkgs = import nixpkgs { inherit system; };
        craneLib = crane.mkLib pkgs;
        src = craneLib.cleanCargoSource ./.;
        helix-grammar = pkgs.callPackage "${helix}/grammars.nix" { inherit pkgs; };
        rgit-grammar = pkgs.runCommand "consolidated-rgit-grammars" { } ''
          mkdir -p $out
          for file in ${helix-grammar}/*; do
            ln -s "$file" "$out/libtree-sitter-$(basename "$file")"
          done
          ln -s "${helix}/languages.toml" "$out/languages.toml"
          ln -s "${helix}/runtime/queries" "$out/queries"
        '';
        commonArgs = {
          inherit src;
          strictDeps = true;
@@ -23,6 +37,7 @@
          nativeBuildInputs = with pkgs; [ cmake clang ];
          LIBCLANG_PATH = "${pkgs.clang.cc.lib}/lib";
          ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib";
          TREE_SITTER_GRAMMAR_LIB_DIR = "${rgit-grammar}";
        };
        cargoArtifacts = craneLib.buildDepsOnly commonArgs;
        rgit = craneLib.buildPackage (commonArgs // {
@@ -33,6 +48,7 @@
            fileset = pkgs.lib.fileset.unions [
              ./Cargo.toml
              ./Cargo.lock
              ./tree-sitter-grammar-repository
              ./src
              ./statics
              ./templates
diff --git a/src/git.rs b/src/git.rs
index 67eda1b..1ab96a7 100644
--- a/src/git.rs
+++ a/src/git.rs
@@ -1,3 +1,15 @@
use std::{
    borrow::Cow,
    collections::{BTreeMap, VecDeque},
    ffi::OsStr,
    fmt::{self, Arguments, Write},
    io::ErrorKind,
    path::{Path, PathBuf},
    str::FromStr,
    sync::Arc,
    time::Duration,
};

use anyhow::{anyhow, Context, Result};
use axum::response::IntoResponse;
use bytes::{buf::Writer, BufMut, Bytes, BytesMut};
@@ -7,8 +19,7 @@
    actor::SignatureRef,
    bstr::{BStr, BString, ByteSlice, ByteVec},
    diff::blob::{platform::prepare_diff::Operation, Sink},
    object::tree::EntryKind,
    object::Kind,
    object::{tree::EntryKind, Kind},
    objs::tree::EntryRef,
    prelude::TreeEntryRefExt,
    traverse::tree::visit::Action,
@@ -16,17 +27,6 @@
    ObjectId, ThreadSafeRepository, Url,
};
use moka::future::Cache;
use std::{
    borrow::Cow,
    collections::{BTreeMap, VecDeque},
    ffi::OsStr,
    fmt::{self, Arguments, Write},
    io::ErrorKind,
    path::{Path, PathBuf},
    str::FromStr,
    sync::Arc,
    time::Duration,
};
use tar::Builder;
use time::{OffsetDateTime, UtcOffset};
use tracing::{error, instrument, warn};
@@ -144,21 +144,15 @@

                match object.kind {
                    Kind::Blob => {
                        let path = path.join(item.filename().to_path_lossy());
                        let mut blob = object.into_blob();

                        let size = blob.data.len();
                        let extension = path
                            .extension()
                            .or_else(|| path.file_name())
                            .and_then(OsStr::to_str)
                            .unwrap_or_default();

                        let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) {
                            (true, Err(_)) => Content::Binary(vec![]),
                            (true, Ok(data)) => Content::Text(Cow::Owned(format_file(
                                data,
                                FileIdentifier::Extension(extension),
                                FileIdentifier::Path(path.as_path()),
                            )?)),
                            (false, Err(_)) => Content::Binary(blob.take_data()),
                            (false, Ok(_data)) => Content::Text(Cow::Owned(unsafe {
@@ -1091,29 +1085,17 @@
}

struct SyntaxHighlightedDiffFormatter<'a> {
    extension: &'a str,
    path: &'a Path,
}

impl<'a> SyntaxHighlightedDiffFormatter<'a> {
    fn new(path: &'a Path) -> Self {
        let extension = path
            .extension()
            .or_else(|| path.file_name())
            .and_then(OsStr::to_str)
            .unwrap_or_default();

        Self { extension }
        Self { path }
    }

    fn write(&self, output: &mut String, class: &str, data: &str) {
        write!(output, r#"<span class="diff-{class}">"#).unwrap();
        format_file_inner(
            output,
            data,
            FileIdentifier::Extension(self.extension),
            false,
        )
        .unwrap();
        format_file_inner(output, data, FileIdentifier::Path(self.path), false).unwrap();
        write!(output, r#"</span>"#).unwrap();
    }
}
diff --git a/src/main.rs b/src/main.rs
index d33fa01..e55d0d1 100644
--- a/src/main.rs
+++ a/src/main.rs
@@ -42,6 +42,7 @@
    },
    git::Git,
    layers::logger::LoggingMiddleware,
    syntax_highlight::prime_highlighters,
    theme::Theme,
};

@@ -189,6 +190,10 @@
            resp
        }
    };

    info!("Priming highlighters...");
    prime_highlighters();
    info!("Server starting up...");

    let app = Router::new()
        .route("/", get(methods::index::handle))
diff --git a/src/syntax_highlight.rs b/src/syntax_highlight.rs
index d75713f..139e598 100644
--- a/src/syntax_highlight.rs
+++ a/src/syntax_highlight.rs
@@ -1,13 +1,15 @@
use std::{
    cell::RefCell,
    collections::HashMap,
    fmt::Write as FmtWrite,
    io::{ErrorKind, Write as IoWrite},
    path::Path,
    sync::LazyLock,
};

use comrak::adapters::SyntaxHighlighterAdapter;
use tracing::debug;
use tree_sitter_grammar_repository::{Grammar, Language};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter};

thread_local! {
@@ -33,152 +35,95 @@
}

define_classes! {
    "keyword.directive" => "keyword directive",
    "markup.strikethrough" => "markup strikethrough",
    "markup.link" => "markup link",
    "keyword.control.conditional" => "keyword control conditional",
    "markup.bold" => "markup bold",
    "diff.plus" => "diff plus",
    "markup.heading.2" => "markup heading 2",
    "markup" => "markup",
    "diff.delta" => "diff delta",
    "variable.other.member" => "variable other member",
    "namespace" => "namespace",
    "comment.line" => "comment line",
    "function" => "function",
    "keyword.operator" => "keyword operator",
    "punctuation.bracket" => "punctuation bracket",
    "markup.list" => "markup list",
    "type.builtin" => "type builtin",
    "keyword.storage.modifier" => "keyword storage modifier",
    "constant" => "constant",
    "markup.italic" => "markup italic",
    "variable" => "variable",
    "keyword" => "keyword",
    "punctuation.special" => "punctuation special",
    "string.special.path" => "string special path",
    "keyword.storage.type" => "keyword storage type",
    "markup.heading.5" => "markup heading 5",
    "markup.heading.6" => "markup heading 6",
    "markup.link.label" => "markup link label",
    "markup.list.numbered" => "markup list numbered",
    "diff.delta.moved" => "diff delta moved",
    "constant.numeric" => "constant numeric",
    "markup.heading" => "markup heading",
    "markup.link.text" => "markup link text",
    "keyword.function" => "keyword function",
    "string.special.url" => "string special url",
    "keyword.control.return" => "keyword control return",
    "keyword.control.repeat" => "keyword control repeat",
    "constant.builtin" => "constant builtin",
    "type.enum.variant" => "type enum variant",
    "markup.raw.block" => "markup raw block",
    "markup.heading.3" => "markup heading 3",
    "escape" => "escape",
    "comment.block" => "comment block",
    "constant.numeric.integer" => "constant numeric integer",
    "punctuation.delimiter" => "punctuation delimiter",
    "constructor" => "constructor",
    "type" => "type",
    "string.regexp" => "string regexp",
    "variable.parameter" => "variable parameter",
    "markup.quote" => "markup quote",
    "string.special" => "string special",
    "constant.numeric.float" => "constant numeric float",
    "constant.character.escape" => "constant character escape",
    "tag" => "tag",
    "keyword.storage" => "keyword storage",
    "string" => "string",
    "function.macro" => "function macro",
    "markup.list.unnumbered" => "markup list unnumbered",
    "diff.minus" => "diff minus",
    "punctuation" => "punctuation",
    "markup.link.url" => "markup link url",
    "function.method" => "function method",
    "markup.raw" => "markup raw",
    "function.special" => "function special",
    "attribute" => "attribute",
    "operator" => "operator",
    "special" => "special",
    "function.builtin" => "function builtin",
    "diff" => "diff",
    "markup.heading.4" => "markup heading 4",
    "keyword.control" => "keyword control",
    "markup.list.unchecked" => "markup list unchecked",
    "keyword.control.exception" => "keyword control exception",
    "constant.builtin.boolean" => "constant builtin boolean",
    "markup.heading.1" => "markup heading 1",
    "markup.heading.marker" => "markup heading marker",
    "constant.character" => "constant character",
    "markup.raw.inline" => "markup raw inline",
    "variable.builtin" => "variable builtin",
    "variable.other" => "variable other",
    "tag.builtin" => "tag builtin",
    "type.enum" => "type enum",
    "comment.block.documentation" => "comment block documentation",
    "comment" => "comment",
    "string.special.symbol" => "string special symbol",
    "label" => "label",
    "keyword.control.import" => "keyword control import",
    "markup.list.checked" => "markup list checked",
"attribute" => "attribute",
"boolean" => "boolean",
"carriage-return" => "carriage-return",
"comment" => "comment",
"comment.documentation" => "comment documentation",
"constant" => "constant",
"constant.builtin" => "constant builtin",
"constructor" => "constructor",
"constructor.builtin" => "constructor builtin",
"embedded" => "embedded",
"error" => "error",
"escape" => "escape",
"function" => "function",
"function.builtin" => "function builtin",
"keyword" => "keyword",
"markup" => "markup",
"markup.bold" => "markup bold",
"markup.heading" => "markup heading",
"markup.italic" => "markup italic",
"markup.link" => "markup link",
"markup.link.url" => "markup link url",
"markup.list" => "markup list",
"markup.list.checked" => "markup list checked",
"markup.list.numbered" => "markup list numbered",
"markup.list.unchecked" => "markup list unchecked",
"markup.list.unnumbered" => "markup list unnumbered",
"markup.quote" => "markup quote",
"markup.raw" => "markup raw",
"markup.raw.block" => "markup raw block",
"markup.raw.inline" => "markup raw inline",
"markup.strikethrough" => "markup strikethrough",
"module" => "module",
"number" => "number",
"operator" => "operator",
"property" => "property",
"property.builtin" => "property builtin",
"punctuation" => "punctuation",
"punctuation.bracket" => "punctuation bracket",
"punctuation.delimiter" => "punctuation delimiter",
"punctuation.special" => "punctuation special",
"string" => "string",
"string.escape" => "string escape",
"string.regexp" => "string regexp",
"string.special" => "string special",
"string.special.symbol" => "string special symbol",
"tag" => "tag",
"type" => "type",
"type.builtin" => "type builtin",
"variable" => "variable",
"variable.builtin" => "variable builtin",
"variable.member" => "variable member",
"variable.parameter" => "variable parameter",}

pub fn prime_highlighters() {
    let _res = HIGHLIGHTER_CONFIGS.len();
}

macro_rules! build_highlighter_configs {
    ($($i:literal => $($extension:literal)|* => $($token:literal)|* => $config:expr),*,) => {
        static BUILD_HIGHLIGHTER_CONFIGS: LazyLock<[HighlightConfiguration; count!($($config),*)]> = LazyLock::new(|| [
            $({
                let mut config = $config.unwrap();
                config.configure(&HIGHLIGHT_NAMES);
                config
            }),*
        ]);

        pub fn fetch_highlighter_config(extension: &str) -> Option<&'static HighlightConfiguration> {
            match extension {
                $($($extension)|* => Some(&BUILD_HIGHLIGHTER_CONFIGS[$i])),*,
                _ => None,
            }
        }

        pub fn fetch_highlighter_config_by_token(extension: &str) -> Option<&'static HighlightConfiguration> {
            match extension {
                $($($token)|* => Some(&BUILD_HIGHLIGHTER_CONFIGS[$i])),*,
                _ => None,
            }
        }
    };
static HIGHLIGHTER_CONFIGS: LazyLock<Vec<HighlightConfiguration>> = LazyLock::new(|| {
    Grammar::VARIANTS
        .iter()
        .copied()
        .map(Grammar::highlight_configuration_params)
        .map(|v| {
            let mut configuration = HighlightConfiguration::new(
                v.language.into(),
                v.name,
                v.highlights_query,
                v.injection_query,
                v.locals_query,
            )
            .unwrap_or_else(|e| panic!("bad query for {}: {e}", v.name));
            configuration.configure(&HIGHLIGHT_NAMES);
            configuration
        })
        .collect()
});

pub fn fetch_highlighter_config(file: &Path) -> Option<&'static HighlightConfiguration> {
    Language::from_file_name(file)
        .map(Language::grammar)
        .map(Grammar::idx)
        .map(|idx| &HIGHLIGHTER_CONFIGS[idx])
}

build_highlighter_configs! {
    // #   extensions             name/aliases
    0  => "java"              => "java"                    => HighlightConfiguration::new(tree_sitter_java::LANGUAGE.into(), "java", tree_sitter_java::HIGHLIGHTS_QUERY, "", ""),
    1  => "html"              => "html"                    => HighlightConfiguration::new(tree_sitter_html::LANGUAGE.into(), "html", include_str!("../grammar/html/highlights.scm"), include_str!("../grammar/html/injections.scm"), ""),
    2  => "md"                => "markdown"                => HighlightConfiguration::new(tree_sitter_md::LANGUAGE.into(), "markdown", tree_sitter_md::HIGHLIGHT_QUERY_BLOCK, tree_sitter_md::INJECTION_QUERY_BLOCK, ""),
    3  => "rs"                => "rust"                    => HighlightConfiguration::new(tree_sitter_rust::LANGUAGE.into(), "rust", tree_sitter_rust::HIGHLIGHTS_QUERY, tree_sitter_rust::INJECTIONS_QUERY, ""),
    4  => "toml"              => "toml"                    => HighlightConfiguration::new(tree_sitter_toml_ng::language(), "toml", tree_sitter_toml_ng::HIGHLIGHTS_QUERY, "", ""),
    5  => "yaml" | "yml"      => "yaml" | "yml"            => HighlightConfiguration::new(tree_sitter_yaml::language(), "yaml", tree_sitter_yaml::HIGHLIGHTS_QUERY, "", ""),
    6  => "hs"                => "haskell"                 => HighlightConfiguration::new(tree_sitter_haskell::LANGUAGE.into(), "haskell", tree_sitter_haskell::HIGHLIGHTS_QUERY, tree_sitter_haskell::INJECTIONS_QUERY, tree_sitter_haskell::LOCALS_QUERY),
    7  => "f" | "f90" | "for" => "fortran"                 => HighlightConfiguration::new(tree_sitter_fortran::LANGUAGE.into(), "fortran", include_str!("../grammar/fortran/highlights.scm"), "", ""),
    8  => "svelte"            => "svelte"                  => HighlightConfiguration::new(tree_sitter_svelte_ng::LANGUAGE.into(), "svelte", tree_sitter_svelte_ng::HIGHLIGHTS_QUERY, tree_sitter_svelte_ng::INJECTIONS_QUERY, tree_sitter_svelte_ng::LOCALS_QUERY),
    9  => "js"                => "js" | "javascript"       => HighlightConfiguration::new(tree_sitter_javascript::LANGUAGE.into(), "javascript", tree_sitter_javascript::HIGHLIGHT_QUERY, tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY),
    10 => "jsx"               => "jsx"                     => HighlightConfiguration::new(tree_sitter_javascript::LANGUAGE.into(), "jsx", tree_sitter_javascript::JSX_HIGHLIGHT_QUERY, tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY),
    11 => "ts"                => "ts" | "typescript"       => HighlightConfiguration::new(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), "typescript", tree_sitter_typescript::HIGHLIGHTS_QUERY, "", ""),
    12 => "tsx"               => "tsx"                     => HighlightConfiguration::new(tree_sitter_typescript::LANGUAGE_TSX.into(), "tsx", tree_sitter_typescript::HIGHLIGHTS_QUERY, "", ""),
    13 => "scss"              => "scss"                    => HighlightConfiguration::new(tree_sitter_scss::language(), "scss", tree_sitter_scss::HIGHLIGHTS_QUERY, "", ""),
    14 => "css"               => "css"                     => HighlightConfiguration::new(tree_sitter_css::LANGUAGE.into(), "css", tree_sitter_css::HIGHLIGHTS_QUERY, "", ""),
    15 => "bash" | "sh"       => "bash" | "shell" | "sh"   => HighlightConfiguration::new(tree_sitter_bash::LANGUAGE.into(), "css", tree_sitter_bash::HIGHLIGHT_QUERY, "", ""),
    16 => "c"                 => "c"                       => HighlightConfiguration::new(tree_sitter_c::LANGUAGE.into(), "c", tree_sitter_c::HIGHLIGHT_QUERY, "", ""),
    17 => "cpp" | "c++"       => "cpp" | "c++"             => HighlightConfiguration::new(tree_sitter_cpp::LANGUAGE.into(), "c++", tree_sitter_cpp::HIGHLIGHT_QUERY, "", ""),
    18 => "cs"                => "c#" | "cs" | "csharp"    => HighlightConfiguration::new(tree_sitter_c_sharp::LANGUAGE.into(), "c#", tree_sitter_c_sharp::HIGHLIGHTS_QUERY, "", ""),
    19 => "ex" | "exs"        => "elixir"                  => HighlightConfiguration::new(tree_sitter_elixir::LANGUAGE.into(), "elixir", tree_sitter_elixir::HIGHLIGHTS_QUERY, tree_sitter_elixir::INJECTIONS_QUERY, ""),
    21 => "go"                => "go" | "golang"           => HighlightConfiguration::new(tree_sitter_go::LANGUAGE.into(), "go", tree_sitter_go::HIGHLIGHTS_QUERY, "", ""),
    22 => "php"               => "php"                     => HighlightConfiguration::new(tree_sitter_php::LANGUAGE_PHP.into(), "php", tree_sitter_php::HIGHLIGHTS_QUERY, tree_sitter_php::INJECTIONS_QUERY, ""),
    23 => "json"              => "json"                    => HighlightConfiguration::new(tree_sitter_json::LANGUAGE.into(), "json", tree_sitter_json::HIGHLIGHTS_QUERY, "", ""),
    24 => "ml"                => "ml" | "ocaml"            => HighlightConfiguration::new(tree_sitter_ocaml::LANGUAGE_OCAML.into(), "ocaml", tree_sitter_ocaml::HIGHLIGHTS_QUERY, "", tree_sitter_ocaml::LOCALS_QUERY),
    25 => "mli"               => "mli" | "ocaml-interface" => HighlightConfiguration::new(tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), "ocaml", tree_sitter_ocaml::HIGHLIGHTS_QUERY, "", tree_sitter_ocaml::LOCALS_QUERY),
    26 => "py"                => "py" | "python"           => HighlightConfiguration::new(tree_sitter_python::LANGUAGE.into(), "python", tree_sitter_python::HIGHLIGHTS_QUERY, "", ""),
    27 => "regex"             => "regex"                   => HighlightConfiguration::new(tree_sitter_regex::LANGUAGE.into(), "regex", tree_sitter_regex::HIGHLIGHTS_QUERY, "", ""),
    28 => "rb"                 => "rb" | "ruby"            => HighlightConfiguration::new(tree_sitter_ruby::LANGUAGE.into(), "ruby", tree_sitter_ruby::HIGHLIGHTS_QUERY, "", tree_sitter_ruby::LOCALS_QUERY),
pub fn fetch_highlighter_config_by_token(token: &str) -> Option<&'static HighlightConfiguration> {
    Language::from_injection(token)
        .map(Language::grammar)
        .map(Grammar::idx)
        .map(|idx| &HIGHLIGHTER_CONFIGS[idx])
}

pub struct ComrakHighlightAdapter;
@@ -212,9 +157,9 @@
    }
}

#[derive(Copy, Clone)]
#[derive(Copy, Clone, Debug)]
pub enum FileIdentifier<'a> {
    Extension(&'a str),
    Path(&'a Path),
    Token(&'a str),
}

@@ -231,12 +176,11 @@
    code_tag: bool,
) -> anyhow::Result<()> {
    let config = match identifier {
        FileIdentifier::Extension(v) => fetch_highlighter_config(v),
        FileIdentifier::Path(v) => fetch_highlighter_config(v),
        FileIdentifier::Token(v) => fetch_highlighter_config_by_token(v),
    };

    let line_prefix = if code_tag { "<code>" } else { "" };

    let line_suffix = if code_tag { "</code>\n" } else { "\n" };

    let Some(config) = config else {
@@ -250,9 +194,9 @@
    };

    HIGHLIGHTER.with_borrow_mut(|highlighter| {
        let mut spans = highlighter.highlight(config, content.as_bytes(), None, |extension| {
            debug!(extension, "Highlighter switch requested");
            fetch_highlighter_config(extension).or(fetch_highlighter_config_by_token(extension))
        let mut spans = highlighter.highlight(config, content.as_bytes(), None, |injection| {
            debug!(injection, "Highlighter switch requested");
            fetch_highlighter_config_by_token(injection)
        })?;

        let mut tag_open = true;
diff --git a/tree-sitter-grammar-repository/Cargo.toml b/tree-sitter-grammar-repository/Cargo.toml
new file mode 100644
index 0000000..7b8d9a3 100644
--- /dev/null
+++ a/tree-sitter-grammar-repository/Cargo.toml
@@ -1,0 +1,26 @@
[package]
name = "tree-sitter-grammar-repository"
description = "tree-sitter grammars built from Helix with support for dynamic linking"
version = "0.0.1"
edition = "2021"
authors = ["Jordan Doyle <jordan@doyle.la>"]
license = "WTFPL"

[dependencies]
globset = "0.4"
regex = "1.11"
tree-sitter-language = "0.1"

[build-dependencies]
anyhow = "1.0"
cc = "1.1"
serde = { version = "1.0", features = ["derive"] }
toml = "0.8"
threadpool = "1.8"
quote = "1.0"
proc-macro2 = "1.0"
prettyplease = "0.2"
heck = "0.5"
syn = "2.0"
serde_json = "1.0"
regex = "1.11"
diff --git a/tree-sitter-grammar-repository/build.rs b/tree-sitter-grammar-repository/build.rs
new file mode 100644
index 0000000..8ef7664 100644
--- /dev/null
+++ a/tree-sitter-grammar-repository/build.rs
@@ -1,0 +1,518 @@
use std::{
    borrow::Cow,
    ffi::OsStr,
    fmt::Write,
    fs,
    path::{Path, PathBuf},
    process::Command,
    sync::LazyLock,
};

use anyhow::{bail, Context};
use heck::{ToSnakeCase, ToUpperCamelCase};
use quote::{format_ident, quote};
use serde::Deserialize;
use threadpool::ThreadPool;

const GRAMMAR_REPOSITORY_URL: &str = "https://github.com/helix-editor/helix";
const GRAMMAR_REPOSITORY_REF: &str = "82dd96369302f60a9c83a2d54d021458f82bcd36";
const GRAMMAR_REPOSITORY_CONFIG_PATH: &str = "languages.toml";

static BLACKLISTED_MODULES: &[&str] = &[
    // these languages all don't have corresponding grammars
    "cabal",
    "idris",
    "llvm-mir-yaml",
    "prolog",
    "mint",
    "hare",
    "wren",
    // doesn't compile on macos
    "gemini",
];

fn main() -> anyhow::Result<()> {
    let out_dir = PathBuf::from(std::env::var("OUT_DIR").context("OUT_DIR not set by rustc")?);

    let root = std::env::var("TREE_SITTER_GRAMMAR_LIB_DIR").ok();
    println!("cargo::rerun-if-env-changed=TREE_SITTER_GRAMMAR_LIB_DIR");

    let (root, dylib) = if let Some(root) = root.as_deref() {
        (Path::new(root), true)
    } else {
        (out_dir.as_path(), false)
    };

    let (config, query_path) = if dylib {
        let config: HelixLanguages = toml::from_str(
            &fs::read_to_string(root.join("languages.toml"))
                .context("failed to read languages.toml")?,
        )
        .context("failed to parse helix languages.toml")?;

        println!("cargo::rustc-link-search=native={}", root.display());

        for grammar in &config.grammar {
            if BLACKLISTED_MODULES.contains(&grammar.name.as_str()) {
                continue;
            }

            println!("cargo::rustc-link-lib=dylib=tree-sitter-{}", grammar.name);
        }

        (config, root.join("queries"))
    } else {
        let sources = out_dir.join("sources");
        fs::create_dir_all(&sources)?;

        let helix_root = sources.join("helix");

        fetch_git_repository(GRAMMAR_REPOSITORY_URL, GRAMMAR_REPOSITORY_REF, &helix_root)
            .context(GRAMMAR_REPOSITORY_URL)?;

        let config: HelixLanguages = toml::from_str(
            &fs::read_to_string(helix_root.join(GRAMMAR_REPOSITORY_CONFIG_PATH))
                .context("failed to read helix languages.toml")?,
        )
        .context("failed to parse helix languages.toml")?;

        fetch_and_build_grammar(config.grammar.clone(), &sources)?;

        (config, helix_root.join("runtime/queries"))
    };

    let mut grammar_defs = Vec::new();
    for grammar in &config.grammar {
        let name = &grammar.name;
        if let Some(tokens) =
            build_language_module(name, query_path.as_path()).with_context(|| name.to_string())?
        {
            grammar_defs.push(tokens);
        }
    }
    fs::write(
        &out_dir.join("grammar.defs.rs"),
        prettyplease::unparse(
            &syn::parse2(quote!(#(#grammar_defs)*)).context("failed to parse grammar defs")?,
        ),
    )
    .context("failed to write grammar defs")?;

    let registry = build_grammar_registry(config.grammar.iter().map(|v| v.name.clone()));
    fs::write(
        &out_dir.join("grammar.registry.rs"),
        prettyplease::unparse(&syn::parse2(registry).context("failed to parse grammar registry")?),
    )
    .context("failed to write grammar registry")?;

    let language = build_language_registry(config.language)?;
    fs::write(
        &out_dir.join("language.registry.rs"),
        prettyplease::unparse(&syn::parse2(language)?),
    )?;

    Ok(())
}

fn build_language_registry(
    language_definition: Vec<LanguageDefinition>,
) -> anyhow::Result<proc_macro2::TokenStream> {
    let mut camel = Vec::new();
    let mut grammars = Vec::new();

    let mut globs = Vec::new();
    let mut globs_to_camel = Vec::new();

    let mut injection_regex = Vec::new();
    let mut injection_regex_str_len = Vec::new();

    for language in &language_definition {
        if BLACKLISTED_MODULES.contains(&language.name.as_str()) {
            continue;
        }

        let camel_cased_name = format_ident!("{}", language.name.to_upper_camel_case());
        camel.push(camel_cased_name.clone());

        let grammar = language
            .grammar
            .as_deref()
            .unwrap_or(language.name.as_str());
        grammars.push(format_ident!("{}", grammar.to_upper_camel_case()));

        for ty in &language.file_types {
            match ty {
                FileType::Glob { glob } => globs.push(Cow::Borrowed(glob)),
                FileType::Extension(ext) => globs.push(Cow::Owned(format!("*.{ext}"))),
            }

            globs_to_camel.push(camel_cased_name.clone());
        }

        if let Some(regex) = language.injection_regex.as_deref() {
            injection_regex.push(regex);
            injection_regex_str_len.push(regex.len());
        }
    }

    let injection_regex_len = injection_regex.len();
    let globs_array_len = globs.len();
    let globs_string_len = globs.iter().map(|v| v.len()).collect::<Vec<_>>();

    Ok(quote! {
        #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
        pub enum Language {
            #(#camel),*
        }

        impl Language {
            pub const VARIANTS: &[Self] = &[
                #(Self::#camel),*
            ];

            pub const fn grammar(self) -> Grammar {
                match self {
                    #(Self::#camel => Grammar::#grammars),*
                }
            }

            pub fn from_file_name<P: AsRef<::std::path::Path>>(name: P) -> Option<Self> {
                const LENGTHS: [usize; #globs_array_len] = [#(#globs_string_len),*];
                const GLOB_TO_VARIANT: [Language; #globs_array_len] = [#(Language::#globs_to_camel),*];

                thread_local! {
                    static GLOB: ::std::cell::LazyCell<::globset::GlobSet> = ::std::cell::LazyCell::new(|| {
                        ::globset::GlobSetBuilder::new()
                            #(.add(::globset::Glob::new(#globs).unwrap()))*
                            .build()
                            .unwrap()
                    });
                }

                let mut max = usize::MAX;
                let mut curr = None;

                GLOB.with(|glob| {
                    for m in glob.matches(name) {
                        let curr_length = LENGTHS[m];

                        if curr_length < max {
                            max = curr_length;
                            curr = Some(GLOB_TO_VARIANT[m]);
                        }
                    }
                });

                curr
            }

            pub fn from_injection(name: &str) -> Option<Self> {
                const LENGTHS: [usize; #injection_regex_len] = [#(#injection_regex_str_len),*];

                thread_local! {
                    static REGEX: ::std::cell::LazyCell<::regex::RegexSet> = ::std::cell::LazyCell::new(|| {
                        ::regex::RegexSet::new(&[
                            #(#injection_regex),*
                        ])
                        .unwrap()
                    });
                }

                let mut max = usize::MAX;
                let mut curr = None;

                REGEX.with(|regex| {
                    for m in regex.matches(name) {
                        let curr_length = LENGTHS[m];

                        if curr_length < max {
                            max = curr_length;
                            curr = Some(Self::VARIANTS[m]);
                        }
                    }
                });

                curr
            }
        }
    })
}

fn build_grammar_registry(names: impl Iterator<Item = String>) -> proc_macro2::TokenStream {
    let (ids, plain, camel, snake) = names
        .filter(|name| !BLACKLISTED_MODULES.contains(&name.as_str()))
        .enumerate()
        .fold(
            (Vec::new(), Vec::new(), Vec::new(), Vec::new()),
            |(mut ids, mut plain_acc, mut camel_acc, mut snake_acc), (i, name)| {
                camel_acc.push(format_ident!("{}", name.to_upper_camel_case()));

                if name == "move" {
                    snake_acc.push(format_ident!("r#{}", name.to_snake_case()));
                } else {
                    snake_acc.push(format_ident!("{}", name.to_snake_case()));
                }

                plain_acc.push(name);

                ids.push(i);
                (ids, plain_acc, camel_acc, snake_acc)
            },
        );

    quote! {
        #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
        pub enum Grammar {
            #(#camel),*
        }

        impl Grammar {
            pub const VARIANTS: &[Self] = &[
                #(Self::#camel),*
            ];

            pub const fn highlight_configuration_params(self) -> crate::HighlightConfigurationParams {
                match self {
                    #(Self::#camel => crate::HighlightConfigurationParams {
                        language: crate::grammar::#snake::LANGUAGE,
                        name: #plain,
                        highlights_query: crate::grammar::#snake::HIGHLIGHTS_QUERY,
                        injection_query: crate::grammar::#snake::INJECTIONS_QUERY,
                        locals_query: crate::grammar::#snake::LOCALS_QUERY,
                    }),*
                }
            }

            pub const fn idx(self) -> usize {
                match self {
                    #(Self::#camel => #ids),*
                }
            }
        }
    }
}

fn build_language_module(
    name: &str,
    query_path: &Path,
) -> anyhow::Result<Option<proc_macro2::TokenStream>> {
    if BLACKLISTED_MODULES.contains(&name) {
        return Ok(None);
    }

    let highlights_query = read_local_query(query_path, name, "highlights.scm");
    let injections_query = read_local_query(query_path, name, "injections.scm");
    let locals_query = read_local_query(query_path, name, "locals.scm");

    let ffi = format_ident!("tree_sitter_{}", name.to_snake_case());
    let name = if name == "move" {
        format_ident!("r#{}", name.to_snake_case())
    } else {
        format_ident!("{}", name.to_snake_case())
    };

    Ok(Some(quote! {
        pub mod #name {
            extern "C" {
                fn #ffi() -> *const ();
            }

            pub const LANGUAGE: tree_sitter_language::LanguageFn = unsafe { tree_sitter_language::LanguageFn::from_raw(#ffi) };
            pub const HIGHLIGHTS_QUERY: &str = #highlights_query;
            pub const INJECTIONS_QUERY: &str = #injections_query;
            pub const LOCALS_QUERY: &str = #locals_query;
        }
    }))
}

// taken from https://github.com/helix-editor/helix/blob/2ce4c6d5fa3e50464b41a3d0190ad0e5ada2fc3c/helix-core/src/syntax.rs#L721
fn read_local_query(query_path: &Path, language: &str, filename: &str) -> String {
    static INHERITS_REGEX: LazyLock<regex::Regex> =
        LazyLock::new(|| regex::Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());

    let path = query_path.join(language).join(filename);

    if !path.exists() {
        return String::new();
    }

    let query =
        fs::read_to_string(&path).unwrap_or_else(|e| panic!("failed to fetch {path:?}: {e:?}"));

    INHERITS_REGEX
        .replace_all(&query, |captures: &regex::Captures| {
            captures[1]
                .split(',')
                .fold(String::new(), |mut output, language| {
                    // `write!` to a String cannot fail.
                    write!(
                        output,
                        "\n{}\n",
                        read_local_query(query_path, language, filename)
                    )
                    .unwrap();
                    output
                })
        })
        .to_string()
}

fn fetch_and_build_grammar(
    grammars: Vec<GrammarDefinition>,
    source_dir: &Path,
) -> anyhow::Result<()> {
    let pool = ThreadPool::new(std::thread::available_parallelism()?.get());

    for grammar in grammars {
        if BLACKLISTED_MODULES.contains(&grammar.name.as_str()) {
            continue;
        }

        let mut grammar_root = source_dir.join(&grammar.name);

        pool.execute(move || {
            let grammar_root = match grammar.source {
                GrammarSource::Git {
                    remote,
                    revision,
                    subpath,
                } => {
                    fetch_git_repository(&remote, &revision, &grammar_root)
                        .context(GRAMMAR_REPOSITORY_URL)
                        .expect("failed to fetch git repository");

                    if let Some(subpath) = subpath {
                        grammar_root.push(subpath);
                    }

                    grammar_root
                }
                GrammarSource::Local { path } => path,
            };

            let grammar_src = grammar_root.join("src");

            let parser_file = Some(grammar_src.join("parser.c"))
                .filter(|s| s.exists())
                .or_else(|| Some(grammar_src.join("parser.cc")))
                .filter(|s| s.exists());
            let scanner_file = Some(grammar_src.join("scanner.c"))
                .filter(|s| s.exists())
                .or_else(|| Some(grammar_src.join("scanner.cc")))
                .filter(|s| s.exists());

            if let Some(parser_file) = parser_file {
                cc::Build::new()
                    .cpp(parser_file.extension() == Some(OsStr::new("cc")))
                    .file(parser_file)
                    .flag_if_supported("-w")
                    .flag_if_supported("-s")
                    .include(&grammar_src)
                    .compile(&format!("{}-parser", grammar.name));
            }

            if let Some(scanner_file) = scanner_file {
                cc::Build::new()
                    .cpp(scanner_file.extension() == Some(OsStr::new("cc")))
                    .file(scanner_file)
                    .flag_if_supported("-w")
                    .flag_if_supported("-s")
                    .include(&grammar_src)
                    .compile(&format!("{}-scanner", grammar.name));
            }
        });
    }

    pool.join();

    Ok(())
}

fn fetch_git_repository(url: &str, ref_: &str, destination: &Path) -> anyhow::Result<()> {
    if !destination.exists() {
        let res = Command::new("git").arg("init").arg(&destination).status()?;
        if !res.success() {
            bail!("git init failed with exit code {res}");
        }

        let res = Command::new("git")
            .args(&["remote", "add", "origin", url])
            .current_dir(&destination)
            .status()?;
        if !res.success() {
            bail!("git remote failed with exit code {res}");
        }
    }

    let res = Command::new("git")
        .args(&["rev-parse", "HEAD"])
        .current_dir(&destination)
        .output()?
        .stdout;
    if res == ref_.as_bytes() {
        return Ok(());
    }

    let res = Command::new("git")
        .args(&["fetch", "--depth", "1", "origin", ref_])
        .current_dir(&destination)
        .status()?;
    if !res.success() {
        bail!("git fetch failed with exit code {res}");
    }

    let res = Command::new("git")
        .args(&["reset", "--hard", ref_])
        .current_dir(&destination)
        .status()?;
    if !res.success() {
        bail!("git fetch failed with exit code {res}");
    }

    Ok(())
}

#[derive(Deserialize)]
#[serde(rename_all = "kebab-case")]
struct LanguageDefinition {
    name: String,
    injection_regex: Option<String>,
    file_types: Vec<FileType>,
    grammar: Option<String>,
}

#[derive(Deserialize)]
#[serde(untagged)]
pub enum FileType {
    Glob { glob: String },
    Extension(String),
}

#[derive(Deserialize, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct GrammarDefinition {
    name: String,
    source: GrammarSource,
}

#[derive(Deserialize, Clone)]
#[serde(rename_all = "lowercase", untagged)]
enum GrammarSource {
    Git {
        #[serde(rename = "git")]
        remote: String,
        #[serde(rename = "rev")]
        revision: String,
        subpath: Option<String>,
    },
    Local {
        path: PathBuf,
    },
}

#[derive(Deserialize)]
#[serde(rename_all = "kebab-case")]
struct HelixLanguages {
    language: Vec<LanguageDefinition>,
    grammar: Vec<GrammarDefinition>,
}
diff --git a/tree-sitter-grammar-repository/src/lib.rs b/tree-sitter-grammar-repository/src/lib.rs
new file mode 100644
index 0000000..ab1047a 100644
--- /dev/null
+++ a/tree-sitter-grammar-repository/src/lib.rs
@@ -1,0 +1,66 @@
//! # tree-sitter-grammar-repository

//!

//! This crate loads in all known languages and grammars from `helix`'s

//! `languages.toml` at compile time and provides an easy way for you

//! to easily map the language to a highlighter configuration.

//!

//! `tree-sitter` grammars can be dynamically linked by setting the

//! `TREE_SITTER_GRAMMAR_LIB_DIR` environment variable. If set, this library

//! expects a directory of the format:

//!

//! ```text

//! - TREE_SITTER_GRAMMAR_LIB_DIR

//!   - sources/

//!     - html/

//!       - queries/

//!         - highlights.scm

//!         - injections.scm

//!       - package.json

//!     - javascript/

//!       - queries/

//!         - highlights.scm

//!         - injections.scm

//!       - package.json

//!   - libhtml-parser.so

//!   - libhtml-scanner.so

//!   - libjavascsript-scanner.so

//!   - ...

//! ```

//!

//! Usage:

//!

//! ```ignore

//! use std::collections::HashMap;

//! use tree_sitter_grammar_repository::Grammar;

//! use tree_sitter_highlight::HighlightConfiguration;

//!

//! let highlighter_configurations = Grammar::VARIANTS

//!     .iter()

//!     .copied()

//!     .map(Grammar::highlight_configuration_params)

//!     .map(|v| (v, HighlightConfiguration::new(

//!         v.language.into(),

//!         v.name,

//!         v.highlights_query,

//!         v.injection_query,

//!         v.locals_query

//!     )))

//!     .collect::<HashMap<Grammar, HighlightConfiguration>>();

//!

//! let highlighter_configuration = highlighter_configurations

//!     .get(&Language::from_file_name("hello_world.toml").grammar());

//! ```


include!(concat!(env!("OUT_DIR"), "/grammar.registry.rs"));
include!(concat!(env!("OUT_DIR"), "/language.registry.rs"));
pub mod grammar {
    include!(concat!(env!("OUT_DIR"), "/grammar.defs.rs"));
}

pub struct HighlightConfigurationParams {
    pub language: tree_sitter_language::LanguageFn,
    pub name: &'static str,
    pub highlights_query: &'static str,
    pub injection_query: &'static str,
    pub locals_query: &'static str,
}