Use Helix's tree-sitter grammar registry
Diff
Cargo.lock | 352 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Cargo.toml | 29 ++++-------------------------
flake.lock | 17 +++++++++++++++++
flake.nix | 18 +++++++++++++++++-
src/git.rs | 52 +++++++++++++++++-----------------------------------
src/main.rs | 5 +++++
src/syntax_highlight.rs | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
tree-sitter-grammar-repository/Cargo.toml | 26 ++++++++++++++++++++++++++
tree-sitter-grammar-repository/build.rs | 518 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tree-sitter-grammar-repository/src/lib.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 833 insertions(+), 492 deletions(-)
@@ -381,7 +381,7 @@
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
dependencies = [
"memchr",
"regex-automata 0.4.7",
"regex-automata 0.4.8",
"serde",
]
@@ -1708,6 +1708,19 @@
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata 0.4.8",
"regex-syntax 0.8.5",
]
[[package]]
name = "h2"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2299,6 +2312,16 @@
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
@@ -2459,6 +2482,16 @@
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
"zerocopy",
]
[[package]]
name = "prettyplease"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
@@ -2488,7 +2521,7 @@
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax 0.8.4",
"regex-syntax 0.8.5",
"unarray",
]
@@ -2616,14 +2649,14 @@
[[package]]
name = "regex"
version = "1.10.6"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.7",
"regex-syntax 0.8.4",
"regex-automata 0.4.8",
"regex-syntax 0.8.5",
]
[[package]]
@@ -2637,13 +2670,13 @@
[[package]]
name = "regex-automata"
version = "0.4.7"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.4",
"regex-syntax 0.8.5",
]
[[package]]
@@ -2654,9 +2687,9 @@
[[package]]
name = "regex-syntax"
version = "0.8.4"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rgit"
@@ -2702,32 +2735,8 @@
"tower-service",
"tracing",
"tracing-subscriber",
"tree-sitter-bash",
"tree-sitter-c",
"tree-sitter-c-sharp",
"tree-sitter-cpp",
"tree-sitter-css",
"tree-sitter-elixir",
"tree-sitter-fortran",
"tree-sitter-go",
"tree-sitter-haskell",
"tree-sitter-grammar-repository",
"tree-sitter-highlight",
"tree-sitter-html",
"tree-sitter-java",
"tree-sitter-javascript",
"tree-sitter-json",
"tree-sitter-md",
"tree-sitter-ocaml",
"tree-sitter-php",
"tree-sitter-python",
"tree-sitter-regex",
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-scss",
"tree-sitter-svelte-ng",
"tree-sitter-toml-ng",
"tree-sitter-typescript",
"tree-sitter-yaml",
"unix_mode",
"uuid",
"v_htmlescape",
@@ -3067,7 +3076,7 @@
"once_cell",
"onig",
"plist",
"regex-syntax 0.8.4",
"regex-syntax 0.8.5",
"serde",
"serde_derive",
"serde_json",
@@ -3144,6 +3153,15 @@
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "threadpool"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
dependencies = [
"num_cpus",
]
[[package]]
@@ -3460,97 +3478,27 @@
dependencies = [
"cc",
"regex",
"regex-syntax 0.8.4",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-bash"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3aa5e1c6bd02c0053f3f68edcf5d8866b38a8640584279e30fca88149ce14dda"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-c"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8b3fb515e498e258799a31d78e6603767cd6892770d9e2290ec00af5c3ad80b"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-c-sharp"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04c0f6d2209a3cd6d0bb9d2934715da15a15710d3c09c7c1ecd4c9804c3ecd10"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-cpp"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d67e862242878d6ee50e1e5814f267ee3eea0168aea2cdbd700ccfb4c74b6d3"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-css"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d0018d6b1692a806f9cddaa1e5616951fd58840c39a0b21401b55ab3df12292"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-elixir"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97bf0efa4be41120018f23305b105ad4dfd3be1b7f302dc4071d0e6c2dec3a32"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-fortran"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d655214a848bfb63dfdc2e7eeef5c3c323807a220b3117a1aef46b2bb95a12"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-go"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf57626e4c9b6d6efaf8a8d5ee1241c5f178ae7bfdf693713ae6a774f01424e"
dependencies = [
"cc",
"regex-syntax 0.8.5",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-haskell"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b92c8a4c4ceaae105621b00624ee8d9029fb23116f400832e4be30d0639d054"
name = "tree-sitter-grammar-repository"
version = "0.0.1"
dependencies = [
"anyhow",
"cc",
"globset",
"heck",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"serde",
"serde_json",
"syn",
"threadpool",
"toml",
"tree-sitter-language",
]
@@ -3564,46 +3512,6 @@
"regex",
"thiserror",
"tree-sitter",
]
[[package]]
name = "tree-sitter-html"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d52d710a3723360ebade986d3f0ae2aa2c3bcfb87bb1cdf60988ec51c81c40d"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-java"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b38b26736e6e97421760201f7a91c859f3b0d44382d48ac18aa963828f784ebf"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-javascript"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59e1f62f8babb640b909f30675d1addeb1f17802f2a4d2af287569753b243977"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-json"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86a5d6b3ea17e06e7a34aabeadd68f5866c0d0f9359155d432095f8b751865e4"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
@@ -3611,126 +3519,6 @@
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2545046bd1473dac6c626659cc2567c6c0ff302fc8b84a56c4243378276f7f57"
[[package]]
name = "tree-sitter-md"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17f968c22a01010b83fc960455ae729db08dbeb6388617d9113897cb9204b030"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-ocaml"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0534f94f006cf4d4994e964212e91d4626efcaf6769b023d3f17530399a4d6e1"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-php"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e0470ddcab3cab948615d50b0395da28e4ab886c0f78363e607cf7f0724cf4a"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-python"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65661b1a3e24139e2e54207e47d910ab07e28790d78efc7d5dc3a11ce2a110eb"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-regex"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b9a7087b1cf769c96b7e74414947df067fb6135f04d176fd23be08b9396cc0e"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-ruby"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ec5ee842e27791e0adffa0b2a177614de51d2a26e5c7e84d014ed7f097e5ed0"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-rust"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cffbbcb780348fbae8395742ae5b34c1fd794e4085d43aac9f259387f9a84dc8"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-scss"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33909a9ca86390ebbf3461e9949c4bbe2767d2d024b486306d27616641d4ba24"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-svelte-ng"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef0a71f9cf5e94373cc86c64893630c8a29bb25d3390a248268d08af2165fa37"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-toml-ng"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "695d20cd83acf16c02c773f03e76d7b43b19883d4e2ce3652a8f06b5e0da7455"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-typescript"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aecf1585ae2a9dddc2b1d4c0e2140b2ec9876e2a25fd79de47fcf7dae0384685"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-yaml"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aad27ec46ad343d8b514f64dd3fdffb478c592ece561b6c935d90ef55589c6b6"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "trim-in-place"
@@ -6,6 +6,9 @@
authors = ["Jordan Doyle <jordan@doyle.la>"]
license = "WTFPL"
[workspace]
members = ["tree-sitter-grammar-repository"]
[dependencies]
@@ -55,32 +58,8 @@
tower-service = "0.3"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tree-sitter-bash = "0.23"
tree-sitter-c = "0.23"
tree-sitter-cpp = "0.23"
tree-sitter-c-sharp = "0.23"
tree-sitter-elixir = "0.3"
tree-sitter-go = "0.23"
tree-sitter-php = "0.23"
tree-sitter-json = "0.23"
tree-sitter-ocaml = "0.23"
tree-sitter-python = "0.23"
tree-sitter-regex = "0.23"
tree-sitter-ruby = "0.23"
tree-sitter-css = "0.23"
tree-sitter-fortran = "0.1"
tree-sitter-haskell = "0.23"
tree-sitter-grammar-repository = { path = "./tree-sitter-grammar-repository" }
tree-sitter-highlight = "0.23"
tree-sitter-html = "0.23"
tree-sitter-java = "0.23"
tree-sitter-javascript = "0.23"
tree-sitter-md = "0.3"
tree-sitter-rust = "0.23"
tree-sitter-scss = "1.0"
tree-sitter-svelte-ng = "1.0"
tree-sitter-toml-ng = "0.6.0"
tree-sitter-typescript = "0.23"
tree-sitter-yaml = "0.6"
unix_mode = "0.1"
uuid = { version = "1.7", features = ["v4"] }
v_htmlescape = { version = "0.15", features = ["bytes-buf"] }
@@ -31,6 +31,22 @@
"type": "github"
}
},
"helix": {
"flake": false,
"locked": {
"lastModified": 1727613050,
"narHash": "sha256-vxf/5aCNjy0OKzkkkNoeUnjr1lWQDmcKW+UXKpU4weE=",
"owner": "helix-editor",
"repo": "helix",
"rev": "2ce4c6d5fa3e50464b41a3d0190ad0e5ada2fc3c",
"type": "github"
},
"original": {
"owner": "helix-editor",
"repo": "helix",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1727335715,
@@ -65,6 +81,7 @@
"inputs": {
"advisory-db": "advisory-db",
"crane": "crane",
"helix": "helix",
"nixpkgs": "nixpkgs",
"treefmt-nix": "treefmt-nix",
"utils": "utils"
@@ -8,14 +8,28 @@
url = "github:rustsec/advisory-db";
flake = false;
};
helix = {
url = "github:helix-editor/helix";
flake = false;
};
};
outputs = { self, nixpkgs, utils, crane, advisory-db, treefmt-nix }:
outputs = { self, nixpkgs, utils, crane, advisory-db, treefmt-nix, helix }:
utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
craneLib = crane.mkLib pkgs;
src = craneLib.cleanCargoSource ./.;
helix-grammar = pkgs.callPackage "${helix}/grammars.nix" { inherit pkgs; };
rgit-grammar = pkgs.runCommand "consolidated-rgit-grammars" { } ''
mkdir -p $out
for file in ${helix-grammar}/*; do
ln -s "$file" "$out/libtree-sitter-$(basename "$file")"
done
ln -s "${helix}/languages.toml" "$out/languages.toml"
ln -s "${helix}/runtime/queries" "$out/queries"
'';
commonArgs = {
inherit src;
strictDeps = true;
@@ -23,6 +37,7 @@
nativeBuildInputs = with pkgs; [ cmake clang ];
LIBCLANG_PATH = "${pkgs.clang.cc.lib}/lib";
ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib";
TREE_SITTER_GRAMMAR_LIB_DIR = "${rgit-grammar}";
};
cargoArtifacts = craneLib.buildDepsOnly commonArgs;
rgit = craneLib.buildPackage (commonArgs // {
@@ -33,6 +48,7 @@
fileset = pkgs.lib.fileset.unions [
./Cargo.toml
./Cargo.lock
./tree-sitter-grammar-repository
./src
./statics
./templates
@@ -1,3 +1,15 @@
use std::{
borrow::Cow,
collections::{BTreeMap, VecDeque},
ffi::OsStr,
fmt::{self, Arguments, Write},
io::ErrorKind,
path::{Path, PathBuf},
str::FromStr,
sync::Arc,
time::Duration,
};
use anyhow::{anyhow, Context, Result};
use axum::response::IntoResponse;
use bytes::{buf::Writer, BufMut, Bytes, BytesMut};
@@ -7,8 +19,7 @@
actor::SignatureRef,
bstr::{BStr, BString, ByteSlice, ByteVec},
diff::blob::{platform::prepare_diff::Operation, Sink},
object::tree::EntryKind,
object::Kind,
object::{tree::EntryKind, Kind},
objs::tree::EntryRef,
prelude::TreeEntryRefExt,
traverse::tree::visit::Action,
@@ -16,17 +27,6 @@
ObjectId, ThreadSafeRepository, Url,
};
use moka::future::Cache;
use std::{
borrow::Cow,
collections::{BTreeMap, VecDeque},
ffi::OsStr,
fmt::{self, Arguments, Write},
io::ErrorKind,
path::{Path, PathBuf},
str::FromStr,
sync::Arc,
time::Duration,
};
use tar::Builder;
use time::{OffsetDateTime, UtcOffset};
use tracing::{error, instrument, warn};
@@ -144,21 +144,15 @@
match object.kind {
Kind::Blob => {
let path = path.join(item.filename().to_path_lossy());
let mut blob = object.into_blob();
let size = blob.data.len();
let extension = path
.extension()
.or_else(|| path.file_name())
.and_then(OsStr::to_str)
.unwrap_or_default();
let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) {
(true, Err(_)) => Content::Binary(vec![]),
(true, Ok(data)) => Content::Text(Cow::Owned(format_file(
data,
FileIdentifier::Extension(extension),
FileIdentifier::Path(path.as_path()),
)?)),
(false, Err(_)) => Content::Binary(blob.take_data()),
(false, Ok(_data)) => Content::Text(Cow::Owned(unsafe {
@@ -1091,29 +1085,17 @@
}
struct SyntaxHighlightedDiffFormatter<'a> {
extension: &'a str,
path: &'a Path,
}
impl<'a> SyntaxHighlightedDiffFormatter<'a> {
fn new(path: &'a Path) -> Self {
let extension = path
.extension()
.or_else(|| path.file_name())
.and_then(OsStr::to_str)
.unwrap_or_default();
Self { extension }
Self { path }
}
fn write(&self, output: &mut String, class: &str, data: &str) {
write!(output, r#"<span class="diff-{class}">"#).unwrap();
format_file_inner(
output,
data,
FileIdentifier::Extension(self.extension),
false,
)
.unwrap();
format_file_inner(output, data, FileIdentifier::Path(self.path), false).unwrap();
write!(output, r#"</span>"#).unwrap();
}
}
@@ -42,6 +42,7 @@
},
git::Git,
layers::logger::LoggingMiddleware,
syntax_highlight::prime_highlighters,
theme::Theme,
};
@@ -189,6 +190,10 @@
resp
}
};
info!("Priming highlighters...");
prime_highlighters();
info!("Server starting up...");
let app = Router::new()
.route("/", get(methods::index::handle))
@@ -1,13 +1,15 @@
use std::{
cell::RefCell,
collections::HashMap,
fmt::Write as FmtWrite,
io::{ErrorKind, Write as IoWrite},
path::Path,
sync::LazyLock,
};
use comrak::adapters::SyntaxHighlighterAdapter;
use tracing::debug;
use tree_sitter_grammar_repository::{Grammar, Language};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter};
thread_local! {
@@ -33,152 +35,95 @@
}
define_classes! {
"keyword.directive" => "keyword directive",
"markup.strikethrough" => "markup strikethrough",
"markup.link" => "markup link",
"keyword.control.conditional" => "keyword control conditional",
"markup.bold" => "markup bold",
"diff.plus" => "diff plus",
"markup.heading.2" => "markup heading 2",
"markup" => "markup",
"diff.delta" => "diff delta",
"variable.other.member" => "variable other member",
"namespace" => "namespace",
"comment.line" => "comment line",
"function" => "function",
"keyword.operator" => "keyword operator",
"punctuation.bracket" => "punctuation bracket",
"markup.list" => "markup list",
"type.builtin" => "type builtin",
"keyword.storage.modifier" => "keyword storage modifier",
"constant" => "constant",
"markup.italic" => "markup italic",
"variable" => "variable",
"keyword" => "keyword",
"punctuation.special" => "punctuation special",
"string.special.path" => "string special path",
"keyword.storage.type" => "keyword storage type",
"markup.heading.5" => "markup heading 5",
"markup.heading.6" => "markup heading 6",
"markup.link.label" => "markup link label",
"markup.list.numbered" => "markup list numbered",
"diff.delta.moved" => "diff delta moved",
"constant.numeric" => "constant numeric",
"markup.heading" => "markup heading",
"markup.link.text" => "markup link text",
"keyword.function" => "keyword function",
"string.special.url" => "string special url",
"keyword.control.return" => "keyword control return",
"keyword.control.repeat" => "keyword control repeat",
"constant.builtin" => "constant builtin",
"type.enum.variant" => "type enum variant",
"markup.raw.block" => "markup raw block",
"markup.heading.3" => "markup heading 3",
"escape" => "escape",
"comment.block" => "comment block",
"constant.numeric.integer" => "constant numeric integer",
"punctuation.delimiter" => "punctuation delimiter",
"constructor" => "constructor",
"type" => "type",
"string.regexp" => "string regexp",
"variable.parameter" => "variable parameter",
"markup.quote" => "markup quote",
"string.special" => "string special",
"constant.numeric.float" => "constant numeric float",
"constant.character.escape" => "constant character escape",
"tag" => "tag",
"keyword.storage" => "keyword storage",
"string" => "string",
"function.macro" => "function macro",
"markup.list.unnumbered" => "markup list unnumbered",
"diff.minus" => "diff minus",
"punctuation" => "punctuation",
"markup.link.url" => "markup link url",
"function.method" => "function method",
"markup.raw" => "markup raw",
"function.special" => "function special",
"attribute" => "attribute",
"operator" => "operator",
"special" => "special",
"function.builtin" => "function builtin",
"diff" => "diff",
"markup.heading.4" => "markup heading 4",
"keyword.control" => "keyword control",
"markup.list.unchecked" => "markup list unchecked",
"keyword.control.exception" => "keyword control exception",
"constant.builtin.boolean" => "constant builtin boolean",
"markup.heading.1" => "markup heading 1",
"markup.heading.marker" => "markup heading marker",
"constant.character" => "constant character",
"markup.raw.inline" => "markup raw inline",
"variable.builtin" => "variable builtin",
"variable.other" => "variable other",
"tag.builtin" => "tag builtin",
"type.enum" => "type enum",
"comment.block.documentation" => "comment block documentation",
"comment" => "comment",
"string.special.symbol" => "string special symbol",
"label" => "label",
"keyword.control.import" => "keyword control import",
"markup.list.checked" => "markup list checked",
"attribute" => "attribute",
"boolean" => "boolean",
"carriage-return" => "carriage-return",
"comment" => "comment",
"comment.documentation" => "comment documentation",
"constant" => "constant",
"constant.builtin" => "constant builtin",
"constructor" => "constructor",
"constructor.builtin" => "constructor builtin",
"embedded" => "embedded",
"error" => "error",
"escape" => "escape",
"function" => "function",
"function.builtin" => "function builtin",
"keyword" => "keyword",
"markup" => "markup",
"markup.bold" => "markup bold",
"markup.heading" => "markup heading",
"markup.italic" => "markup italic",
"markup.link" => "markup link",
"markup.link.url" => "markup link url",
"markup.list" => "markup list",
"markup.list.checked" => "markup list checked",
"markup.list.numbered" => "markup list numbered",
"markup.list.unchecked" => "markup list unchecked",
"markup.list.unnumbered" => "markup list unnumbered",
"markup.quote" => "markup quote",
"markup.raw" => "markup raw",
"markup.raw.block" => "markup raw block",
"markup.raw.inline" => "markup raw inline",
"markup.strikethrough" => "markup strikethrough",
"module" => "module",
"number" => "number",
"operator" => "operator",
"property" => "property",
"property.builtin" => "property builtin",
"punctuation" => "punctuation",
"punctuation.bracket" => "punctuation bracket",
"punctuation.delimiter" => "punctuation delimiter",
"punctuation.special" => "punctuation special",
"string" => "string",
"string.escape" => "string escape",
"string.regexp" => "string regexp",
"string.special" => "string special",
"string.special.symbol" => "string special symbol",
"tag" => "tag",
"type" => "type",
"type.builtin" => "type builtin",
"variable" => "variable",
"variable.builtin" => "variable builtin",
"variable.member" => "variable member",
"variable.parameter" => "variable parameter",}
pub fn prime_highlighters() {
let _res = HIGHLIGHTER_CONFIGS.len();
}
macro_rules! build_highlighter_configs {
($($i:literal => $($extension:literal)|* => $($token:literal)|* => $config:expr),*,) => {
static BUILD_HIGHLIGHTER_CONFIGS: LazyLock<[HighlightConfiguration; count!($($config),*)]> = LazyLock::new(|| [
$({
let mut config = $config.unwrap();
config.configure(&HIGHLIGHT_NAMES);
config
}),*
]);
pub fn fetch_highlighter_config(extension: &str) -> Option<&'static HighlightConfiguration> {
match extension {
$($($extension)|* => Some(&BUILD_HIGHLIGHTER_CONFIGS[$i])),*,
_ => None,
}
}
pub fn fetch_highlighter_config_by_token(extension: &str) -> Option<&'static HighlightConfiguration> {
match extension {
$($($token)|* => Some(&BUILD_HIGHLIGHTER_CONFIGS[$i])),*,
_ => None,
}
}
};
static HIGHLIGHTER_CONFIGS: LazyLock<Vec<HighlightConfiguration>> = LazyLock::new(|| {
Grammar::VARIANTS
.iter()
.copied()
.map(Grammar::highlight_configuration_params)
.map(|v| {
let mut configuration = HighlightConfiguration::new(
v.language.into(),
v.name,
v.highlights_query,
v.injection_query,
v.locals_query,
)
.unwrap_or_else(|e| panic!("bad query for {}: {e}", v.name));
configuration.configure(&HIGHLIGHT_NAMES);
configuration
})
.collect()
});
pub fn fetch_highlighter_config(file: &Path) -> Option<&'static HighlightConfiguration> {
Language::from_file_name(file)
.map(Language::grammar)
.map(Grammar::idx)
.map(|idx| &HIGHLIGHTER_CONFIGS[idx])
}
build_highlighter_configs! {
0 => "java" => "java" => HighlightConfiguration::new(tree_sitter_java::LANGUAGE.into(), "java", tree_sitter_java::HIGHLIGHTS_QUERY, "", ""),
1 => "html" => "html" => HighlightConfiguration::new(tree_sitter_html::LANGUAGE.into(), "html", include_str!("../grammar/html/highlights.scm"), include_str!("../grammar/html/injections.scm"), ""),
2 => "md" => "markdown" => HighlightConfiguration::new(tree_sitter_md::LANGUAGE.into(), "markdown", tree_sitter_md::HIGHLIGHT_QUERY_BLOCK, tree_sitter_md::INJECTION_QUERY_BLOCK, ""),
3 => "rs" => "rust" => HighlightConfiguration::new(tree_sitter_rust::LANGUAGE.into(), "rust", tree_sitter_rust::HIGHLIGHTS_QUERY, tree_sitter_rust::INJECTIONS_QUERY, ""),
4 => "toml" => "toml" => HighlightConfiguration::new(tree_sitter_toml_ng::language(), "toml", tree_sitter_toml_ng::HIGHLIGHTS_QUERY, "", ""),
5 => "yaml" | "yml" => "yaml" | "yml" => HighlightConfiguration::new(tree_sitter_yaml::language(), "yaml", tree_sitter_yaml::HIGHLIGHTS_QUERY, "", ""),
6 => "hs" => "haskell" => HighlightConfiguration::new(tree_sitter_haskell::LANGUAGE.into(), "haskell", tree_sitter_haskell::HIGHLIGHTS_QUERY, tree_sitter_haskell::INJECTIONS_QUERY, tree_sitter_haskell::LOCALS_QUERY),
7 => "f" | "f90" | "for" => "fortran" => HighlightConfiguration::new(tree_sitter_fortran::LANGUAGE.into(), "fortran", include_str!("../grammar/fortran/highlights.scm"), "", ""),
8 => "svelte" => "svelte" => HighlightConfiguration::new(tree_sitter_svelte_ng::LANGUAGE.into(), "svelte", tree_sitter_svelte_ng::HIGHLIGHTS_QUERY, tree_sitter_svelte_ng::INJECTIONS_QUERY, tree_sitter_svelte_ng::LOCALS_QUERY),
9 => "js" => "js" | "javascript" => HighlightConfiguration::new(tree_sitter_javascript::LANGUAGE.into(), "javascript", tree_sitter_javascript::HIGHLIGHT_QUERY, tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY),
10 => "jsx" => "jsx" => HighlightConfiguration::new(tree_sitter_javascript::LANGUAGE.into(), "jsx", tree_sitter_javascript::JSX_HIGHLIGHT_QUERY, tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY),
11 => "ts" => "ts" | "typescript" => HighlightConfiguration::new(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), "typescript", tree_sitter_typescript::HIGHLIGHTS_QUERY, "", ""),
12 => "tsx" => "tsx" => HighlightConfiguration::new(tree_sitter_typescript::LANGUAGE_TSX.into(), "tsx", tree_sitter_typescript::HIGHLIGHTS_QUERY, "", ""),
13 => "scss" => "scss" => HighlightConfiguration::new(tree_sitter_scss::language(), "scss", tree_sitter_scss::HIGHLIGHTS_QUERY, "", ""),
14 => "css" => "css" => HighlightConfiguration::new(tree_sitter_css::LANGUAGE.into(), "css", tree_sitter_css::HIGHLIGHTS_QUERY, "", ""),
15 => "bash" | "sh" => "bash" | "shell" | "sh" => HighlightConfiguration::new(tree_sitter_bash::LANGUAGE.into(), "css", tree_sitter_bash::HIGHLIGHT_QUERY, "", ""),
16 => "c" => "c" => HighlightConfiguration::new(tree_sitter_c::LANGUAGE.into(), "c", tree_sitter_c::HIGHLIGHT_QUERY, "", ""),
17 => "cpp" | "c++" => "cpp" | "c++" => HighlightConfiguration::new(tree_sitter_cpp::LANGUAGE.into(), "c++", tree_sitter_cpp::HIGHLIGHT_QUERY, "", ""),
18 => "cs" => "c#" | "cs" | "csharp" => HighlightConfiguration::new(tree_sitter_c_sharp::LANGUAGE.into(), "c#", tree_sitter_c_sharp::HIGHLIGHTS_QUERY, "", ""),
19 => "ex" | "exs" => "elixir" => HighlightConfiguration::new(tree_sitter_elixir::LANGUAGE.into(), "elixir", tree_sitter_elixir::HIGHLIGHTS_QUERY, tree_sitter_elixir::INJECTIONS_QUERY, ""),
21 => "go" => "go" | "golang" => HighlightConfiguration::new(tree_sitter_go::LANGUAGE.into(), "go", tree_sitter_go::HIGHLIGHTS_QUERY, "", ""),
22 => "php" => "php" => HighlightConfiguration::new(tree_sitter_php::LANGUAGE_PHP.into(), "php", tree_sitter_php::HIGHLIGHTS_QUERY, tree_sitter_php::INJECTIONS_QUERY, ""),
23 => "json" => "json" => HighlightConfiguration::new(tree_sitter_json::LANGUAGE.into(), "json", tree_sitter_json::HIGHLIGHTS_QUERY, "", ""),
24 => "ml" => "ml" | "ocaml" => HighlightConfiguration::new(tree_sitter_ocaml::LANGUAGE_OCAML.into(), "ocaml", tree_sitter_ocaml::HIGHLIGHTS_QUERY, "", tree_sitter_ocaml::LOCALS_QUERY),
25 => "mli" => "mli" | "ocaml-interface" => HighlightConfiguration::new(tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), "ocaml", tree_sitter_ocaml::HIGHLIGHTS_QUERY, "", tree_sitter_ocaml::LOCALS_QUERY),
26 => "py" => "py" | "python" => HighlightConfiguration::new(tree_sitter_python::LANGUAGE.into(), "python", tree_sitter_python::HIGHLIGHTS_QUERY, "", ""),
27 => "regex" => "regex" => HighlightConfiguration::new(tree_sitter_regex::LANGUAGE.into(), "regex", tree_sitter_regex::HIGHLIGHTS_QUERY, "", ""),
28 => "rb" => "rb" | "ruby" => HighlightConfiguration::new(tree_sitter_ruby::LANGUAGE.into(), "ruby", tree_sitter_ruby::HIGHLIGHTS_QUERY, "", tree_sitter_ruby::LOCALS_QUERY),
pub fn fetch_highlighter_config_by_token(token: &str) -> Option<&'static HighlightConfiguration> {
Language::from_injection(token)
.map(Language::grammar)
.map(Grammar::idx)
.map(|idx| &HIGHLIGHTER_CONFIGS[idx])
}
pub struct ComrakHighlightAdapter;
@@ -212,9 +157,9 @@
}
}
#[derive(Copy, Clone)]
#[derive(Copy, Clone, Debug)]
pub enum FileIdentifier<'a> {
Extension(&'a str),
Path(&'a Path),
Token(&'a str),
}
@@ -231,12 +176,11 @@
code_tag: bool,
) -> anyhow::Result<()> {
let config = match identifier {
FileIdentifier::Extension(v) => fetch_highlighter_config(v),
FileIdentifier::Path(v) => fetch_highlighter_config(v),
FileIdentifier::Token(v) => fetch_highlighter_config_by_token(v),
};
let line_prefix = if code_tag { "<code>" } else { "" };
let line_suffix = if code_tag { "</code>\n" } else { "\n" };
let Some(config) = config else {
@@ -250,9 +194,9 @@
};
HIGHLIGHTER.with_borrow_mut(|highlighter| {
let mut spans = highlighter.highlight(config, content.as_bytes(), None, |extension| {
debug!(extension, "Highlighter switch requested");
fetch_highlighter_config(extension).or(fetch_highlighter_config_by_token(extension))
let mut spans = highlighter.highlight(config, content.as_bytes(), None, |injection| {
debug!(injection, "Highlighter switch requested");
fetch_highlighter_config_by_token(injection)
})?;
let mut tag_open = true;
@@ -1,0 +1,26 @@
[package]
name = "tree-sitter-grammar-repository"
description = "tree-sitter grammars built from Helix with support for dynamic linking"
version = "0.0.1"
edition = "2021"
authors = ["Jordan Doyle <jordan@doyle.la>"]
license = "WTFPL"
[dependencies]
globset = "0.4"
regex = "1.11"
tree-sitter-language = "0.1"
[build-dependencies]
anyhow = "1.0"
cc = "1.1"
serde = { version = "1.0", features = ["derive"] }
toml = "0.8"
threadpool = "1.8"
quote = "1.0"
proc-macro2 = "1.0"
prettyplease = "0.2"
heck = "0.5"
syn = "2.0"
serde_json = "1.0"
regex = "1.11"
@@ -1,0 +1,518 @@
use std::{
borrow::Cow,
ffi::OsStr,
fmt::Write,
fs,
path::{Path, PathBuf},
process::Command,
sync::LazyLock,
};
use anyhow::{bail, Context};
use heck::{ToSnakeCase, ToUpperCamelCase};
use quote::{format_ident, quote};
use serde::Deserialize;
use threadpool::ThreadPool;
const GRAMMAR_REPOSITORY_URL: &str = "https://github.com/helix-editor/helix";
const GRAMMAR_REPOSITORY_REF: &str = "82dd96369302f60a9c83a2d54d021458f82bcd36";
const GRAMMAR_REPOSITORY_CONFIG_PATH: &str = "languages.toml";
static BLACKLISTED_MODULES: &[&str] = &[
"cabal",
"idris",
"llvm-mir-yaml",
"prolog",
"mint",
"hare",
"wren",
"gemini",
];
fn main() -> anyhow::Result<()> {
let out_dir = PathBuf::from(std::env::var("OUT_DIR").context("OUT_DIR not set by rustc")?);
let root = std::env::var("TREE_SITTER_GRAMMAR_LIB_DIR").ok();
println!("cargo::rerun-if-env-changed=TREE_SITTER_GRAMMAR_LIB_DIR");
let (root, dylib) = if let Some(root) = root.as_deref() {
(Path::new(root), true)
} else {
(out_dir.as_path(), false)
};
let (config, query_path) = if dylib {
let config: HelixLanguages = toml::from_str(
&fs::read_to_string(root.join("languages.toml"))
.context("failed to read languages.toml")?,
)
.context("failed to parse helix languages.toml")?;
println!("cargo::rustc-link-search=native={}", root.display());
for grammar in &config.grammar {
if BLACKLISTED_MODULES.contains(&grammar.name.as_str()) {
continue;
}
println!("cargo::rustc-link-lib=dylib=tree-sitter-{}", grammar.name);
}
(config, root.join("queries"))
} else {
let sources = out_dir.join("sources");
fs::create_dir_all(&sources)?;
let helix_root = sources.join("helix");
fetch_git_repository(GRAMMAR_REPOSITORY_URL, GRAMMAR_REPOSITORY_REF, &helix_root)
.context(GRAMMAR_REPOSITORY_URL)?;
let config: HelixLanguages = toml::from_str(
&fs::read_to_string(helix_root.join(GRAMMAR_REPOSITORY_CONFIG_PATH))
.context("failed to read helix languages.toml")?,
)
.context("failed to parse helix languages.toml")?;
fetch_and_build_grammar(config.grammar.clone(), &sources)?;
(config, helix_root.join("runtime/queries"))
};
let mut grammar_defs = Vec::new();
for grammar in &config.grammar {
let name = &grammar.name;
if let Some(tokens) =
build_language_module(name, query_path.as_path()).with_context(|| name.to_string())?
{
grammar_defs.push(tokens);
}
}
fs::write(
&out_dir.join("grammar.defs.rs"),
prettyplease::unparse(
&syn::parse2(quote!(#(#grammar_defs)*)).context("failed to parse grammar defs")?,
),
)
.context("failed to write grammar defs")?;
let registry = build_grammar_registry(config.grammar.iter().map(|v| v.name.clone()));
fs::write(
&out_dir.join("grammar.registry.rs"),
prettyplease::unparse(&syn::parse2(registry).context("failed to parse grammar registry")?),
)
.context("failed to write grammar registry")?;
let language = build_language_registry(config.language)?;
fs::write(
&out_dir.join("language.registry.rs"),
prettyplease::unparse(&syn::parse2(language)?),
)?;
Ok(())
}
fn build_language_registry(
language_definition: Vec<LanguageDefinition>,
) -> anyhow::Result<proc_macro2::TokenStream> {
let mut camel = Vec::new();
let mut grammars = Vec::new();
let mut globs = Vec::new();
let mut globs_to_camel = Vec::new();
let mut injection_regex = Vec::new();
let mut injection_regex_str_len = Vec::new();
for language in &language_definition {
if BLACKLISTED_MODULES.contains(&language.name.as_str()) {
continue;
}
let camel_cased_name = format_ident!("{}", language.name.to_upper_camel_case());
camel.push(camel_cased_name.clone());
let grammar = language
.grammar
.as_deref()
.unwrap_or(language.name.as_str());
grammars.push(format_ident!("{}", grammar.to_upper_camel_case()));
for ty in &language.file_types {
match ty {
FileType::Glob { glob } => globs.push(Cow::Borrowed(glob)),
FileType::Extension(ext) => globs.push(Cow::Owned(format!("*.{ext}"))),
}
globs_to_camel.push(camel_cased_name.clone());
}
if let Some(regex) = language.injection_regex.as_deref() {
injection_regex.push(regex);
injection_regex_str_len.push(regex.len());
}
}
let injection_regex_len = injection_regex.len();
let globs_array_len = globs.len();
let globs_string_len = globs.iter().map(|v| v.len()).collect::<Vec<_>>();
Ok(quote! {
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Language {
#(#camel),*
}
impl Language {
pub const VARIANTS: &[Self] = &[
#(Self::#camel),*
];
pub const fn grammar(self) -> Grammar {
match self {
#(Self::#camel => Grammar::#grammars),*
}
}
pub fn from_file_name<P: AsRef<::std::path::Path>>(name: P) -> Option<Self> {
const LENGTHS: [usize; #globs_array_len] = [#(#globs_string_len),*];
const GLOB_TO_VARIANT: [Language; #globs_array_len] = [#(Language::#globs_to_camel),*];
thread_local! {
static GLOB: ::std::cell::LazyCell<::globset::GlobSet> = ::std::cell::LazyCell::new(|| {
::globset::GlobSetBuilder::new()
#(.add(::globset::Glob::new(#globs).unwrap()))*
.build()
.unwrap()
});
}
let mut max = usize::MAX;
let mut curr = None;
GLOB.with(|glob| {
for m in glob.matches(name) {
let curr_length = LENGTHS[m];
if curr_length < max {
max = curr_length;
curr = Some(GLOB_TO_VARIANT[m]);
}
}
});
curr
}
pub fn from_injection(name: &str) -> Option<Self> {
const LENGTHS: [usize; #injection_regex_len] = [#(#injection_regex_str_len),*];
thread_local! {
static REGEX: ::std::cell::LazyCell<::regex::RegexSet> = ::std::cell::LazyCell::new(|| {
::regex::RegexSet::new(&[
#(#injection_regex),*
])
.unwrap()
});
}
let mut max = usize::MAX;
let mut curr = None;
REGEX.with(|regex| {
for m in regex.matches(name) {
let curr_length = LENGTHS[m];
if curr_length < max {
max = curr_length;
curr = Some(Self::VARIANTS[m]);
}
}
});
curr
}
}
})
}
fn build_grammar_registry(names: impl Iterator<Item = String>) -> proc_macro2::TokenStream {
let (ids, plain, camel, snake) = names
.filter(|name| !BLACKLISTED_MODULES.contains(&name.as_str()))
.enumerate()
.fold(
(Vec::new(), Vec::new(), Vec::new(), Vec::new()),
|(mut ids, mut plain_acc, mut camel_acc, mut snake_acc), (i, name)| {
camel_acc.push(format_ident!("{}", name.to_upper_camel_case()));
if name == "move" {
snake_acc.push(format_ident!("r#{}", name.to_snake_case()));
} else {
snake_acc.push(format_ident!("{}", name.to_snake_case()));
}
plain_acc.push(name);
ids.push(i);
(ids, plain_acc, camel_acc, snake_acc)
},
);
quote! {
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Grammar {
#(#camel),*
}
impl Grammar {
pub const VARIANTS: &[Self] = &[
#(Self::#camel),*
];
pub const fn highlight_configuration_params(self) -> crate::HighlightConfigurationParams {
match self {
#(Self::#camel => crate::HighlightConfigurationParams {
language: crate::grammar::#snake::LANGUAGE,
name: #plain,
highlights_query: crate::grammar::#snake::HIGHLIGHTS_QUERY,
injection_query: crate::grammar::#snake::INJECTIONS_QUERY,
locals_query: crate::grammar::#snake::LOCALS_QUERY,
}),*
}
}
pub const fn idx(self) -> usize {
match self {
#(Self::#camel => #ids),*
}
}
}
}
}
fn build_language_module(
name: &str,
query_path: &Path,
) -> anyhow::Result<Option<proc_macro2::TokenStream>> {
if BLACKLISTED_MODULES.contains(&name) {
return Ok(None);
}
let highlights_query = read_local_query(query_path, name, "highlights.scm");
let injections_query = read_local_query(query_path, name, "injections.scm");
let locals_query = read_local_query(query_path, name, "locals.scm");
let ffi = format_ident!("tree_sitter_{}", name.to_snake_case());
let name = if name == "move" {
format_ident!("r#{}", name.to_snake_case())
} else {
format_ident!("{}", name.to_snake_case())
};
Ok(Some(quote! {
pub mod #name {
extern "C" {
fn #ffi() -> *const ();
}
pub const LANGUAGE: tree_sitter_language::LanguageFn = unsafe { tree_sitter_language::LanguageFn::from_raw(#ffi) };
pub const HIGHLIGHTS_QUERY: &str = #highlights_query;
pub const INJECTIONS_QUERY: &str = #injections_query;
pub const LOCALS_QUERY: &str = #locals_query;
}
}))
}
fn read_local_query(query_path: &Path, language: &str, filename: &str) -> String {
static INHERITS_REGEX: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
let path = query_path.join(language).join(filename);
if !path.exists() {
return String::new();
}
let query =
fs::read_to_string(&path).unwrap_or_else(|e| panic!("failed to fetch {path:?}: {e:?}"));
INHERITS_REGEX
.replace_all(&query, |captures: ®ex::Captures| {
captures[1]
.split(',')
.fold(String::new(), |mut output, language| {
write!(
output,
"\n{}\n",
read_local_query(query_path, language, filename)
)
.unwrap();
output
})
})
.to_string()
}
fn fetch_and_build_grammar(
grammars: Vec<GrammarDefinition>,
source_dir: &Path,
) -> anyhow::Result<()> {
let pool = ThreadPool::new(std::thread::available_parallelism()?.get());
for grammar in grammars {
if BLACKLISTED_MODULES.contains(&grammar.name.as_str()) {
continue;
}
let mut grammar_root = source_dir.join(&grammar.name);
pool.execute(move || {
let grammar_root = match grammar.source {
GrammarSource::Git {
remote,
revision,
subpath,
} => {
fetch_git_repository(&remote, &revision, &grammar_root)
.context(GRAMMAR_REPOSITORY_URL)
.expect("failed to fetch git repository");
if let Some(subpath) = subpath {
grammar_root.push(subpath);
}
grammar_root
}
GrammarSource::Local { path } => path,
};
let grammar_src = grammar_root.join("src");
let parser_file = Some(grammar_src.join("parser.c"))
.filter(|s| s.exists())
.or_else(|| Some(grammar_src.join("parser.cc")))
.filter(|s| s.exists());
let scanner_file = Some(grammar_src.join("scanner.c"))
.filter(|s| s.exists())
.or_else(|| Some(grammar_src.join("scanner.cc")))
.filter(|s| s.exists());
if let Some(parser_file) = parser_file {
cc::Build::new()
.cpp(parser_file.extension() == Some(OsStr::new("cc")))
.file(parser_file)
.flag_if_supported("-w")
.flag_if_supported("-s")
.include(&grammar_src)
.compile(&format!("{}-parser", grammar.name));
}
if let Some(scanner_file) = scanner_file {
cc::Build::new()
.cpp(scanner_file.extension() == Some(OsStr::new("cc")))
.file(scanner_file)
.flag_if_supported("-w")
.flag_if_supported("-s")
.include(&grammar_src)
.compile(&format!("{}-scanner", grammar.name));
}
});
}
pool.join();
Ok(())
}
fn fetch_git_repository(url: &str, ref_: &str, destination: &Path) -> anyhow::Result<()> {
if !destination.exists() {
let res = Command::new("git").arg("init").arg(&destination).status()?;
if !res.success() {
bail!("git init failed with exit code {res}");
}
let res = Command::new("git")
.args(&["remote", "add", "origin", url])
.current_dir(&destination)
.status()?;
if !res.success() {
bail!("git remote failed with exit code {res}");
}
}
let res = Command::new("git")
.args(&["rev-parse", "HEAD"])
.current_dir(&destination)
.output()?
.stdout;
if res == ref_.as_bytes() {
return Ok(());
}
let res = Command::new("git")
.args(&["fetch", "--depth", "1", "origin", ref_])
.current_dir(&destination)
.status()?;
if !res.success() {
bail!("git fetch failed with exit code {res}");
}
let res = Command::new("git")
.args(&["reset", "--hard", ref_])
.current_dir(&destination)
.status()?;
if !res.success() {
bail!("git fetch failed with exit code {res}");
}
Ok(())
}
#[derive(Deserialize)]
#[serde(rename_all = "kebab-case")]
struct LanguageDefinition {
name: String,
injection_regex: Option<String>,
file_types: Vec<FileType>,
grammar: Option<String>,
}
#[derive(Deserialize)]
#[serde(untagged)]
pub enum FileType {
Glob { glob: String },
Extension(String),
}
#[derive(Deserialize, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct GrammarDefinition {
name: String,
source: GrammarSource,
}
#[derive(Deserialize, Clone)]
#[serde(rename_all = "lowercase", untagged)]
enum GrammarSource {
Git {
#[serde(rename = "git")]
remote: String,
#[serde(rename = "rev")]
revision: String,
subpath: Option<String>,
},
Local {
path: PathBuf,
},
}
#[derive(Deserialize)]
#[serde(rename_all = "kebab-case")]
struct HelixLanguages {
language: Vec<LanguageDefinition>,
grammar: Vec<GrammarDefinition>,
}
@@ -1,0 +1,66 @@
include!(concat!(env!("OUT_DIR"), "/grammar.registry.rs"));
include!(concat!(env!("OUT_DIR"), "/language.registry.rs"));
pub mod grammar {
include!(concat!(env!("OUT_DIR"), "/grammar.defs.rs"));
}
pub struct HighlightConfigurationParams {
pub language: tree_sitter_language::LanguageFn,
pub name: &'static str,
pub highlights_query: &'static str,
pub injection_query: &'static str,
pub locals_query: &'static str,
}