🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2025-04-28 1:16:54.0 +07:00:00
committer Jordan Doyle <jordan@doyle.la> 2025-04-28 3:14:17.0 +07:00:00
commit
1a476f691146854c0afb059d625fd3faac19b924 [patch]
tree
a990d24868a885bcb2acf75b5a6ce042bbe556ad
parent
0eda73f2dc056efd20f2dace9a985f71b932879f
download
1a476f691146854c0afb059d625fd3faac19b924.tar.gz

Index trees into RocksDB



Diff

 Cargo.toml                      |   2 +-
 src/git.rs                      | 271 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
 src/main.rs                     |  12 +++++++++++-
 src/database/indexer.rs         | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/methods/filters.rs          |   4 ++--
 templates/repo/tree.html        |  59 ++++++++++++++++++++++++++++-------------------------------
 src/database/schema/commit.rs   |   3 +++
 src/database/schema/mod.rs      |   3 ++-
 src/database/schema/prefixes.rs |   2 ++
 src/database/schema/tag.rs      |   9 +++++++--
 src/database/schema/tree.rs     | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/methods/repo/mod.rs         |   6 ++++++
 src/methods/repo/tree.rs        | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 13 files changed, 608 insertions(+), 261 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index fac855b..777cf65 100644
--- a/Cargo.toml
+++ a/Cargo.toml
@@ -84,7 +84,7 @@
unix_mode = "0.1"
uuid = { version = "1.11", features = ["v4"] }
v_htmlescape = { version = "0.15", features = ["bytes-buf"] }
xxhash-rust = { version = "0.8.15", features = ["const_xxh3"] }
xxhash-rust = { version = "0.8.15", features = ["const_xxh3", "xxh3"] }
yoke = { version = "0.7.5", features = ["derive"] }

[features]
diff --git a/src/git.rs b/src/git.rs
index f682517..f3ed9c4 100644
--- a/src/git.rs
+++ a/src/git.rs
@@ -7,18 +7,17 @@
    actor::SignatureRef,
    bstr::{BStr, BString, ByteSlice, ByteVec},
    diff::blob::{platform::prepare_diff::Operation, Sink},
    object::{tree::EntryKind, Kind},
    object::Kind,
    objs::{tree::EntryRef, CommitRef, TagRef},
    prelude::TreeEntryRefExt,
    traverse::tree::visit::Action,
    url::Scheme,
    ObjectId, ThreadSafeRepository, Url,
    ObjectId, ThreadSafeRepository,
};
use itertools::{Either, Itertools};
use itertools::Either;
use moka::future::Cache;
use std::{
    borrow::Cow,
    collections::{BTreeMap, VecDeque},
    collections::VecDeque,
    ffi::OsStr,
    fmt::{self, Arguments, Write},
    io::ErrorKind,
@@ -121,7 +120,7 @@
        path: Option<PathBuf>,
        tree_id: Option<&str>,
        formatted: bool,
    ) -> Result<PathDestination> {
    ) -> Result<FileWithContent> {
        let tree_id = tree_id
            .map(ObjectId::from_str)
            .transpose()
@@ -154,8 +153,6 @@
                    Kind::Blob => {
                        let mut blob = object.into_blob();

                        let size = blob.data.len();

                        let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) {
                            (true, Err(_)) => Content::Binary(vec![]),
                            (true, Ok(data)) => Content::Text(Cow::Owned(format_file(
@@ -168,107 +165,13 @@
                            })),
                        };

                        return Ok(PathDestination::File(FileWithContent {
                            metadata: File {
                                mode: item.mode().0,
                                size,
                                path: path.clone(),
                                name: item.filename().to_string(),
                            },
                            content,
                        }));
                        return Ok(FileWithContent { content });
                    }
                    Kind::Tree => {
                        tree = object.into_tree();
                    }
                    _ => anyhow::bail!("bad object of type {:?}", object.kind),
                }
            }

            let mut tree_items = Vec::new();
            let submodules = repo
                .submodules()?
                .into_iter()
                .flatten()
                .filter_map(|v| Some((v.name().to_path_lossy().to_path_buf(), v.url().ok()?)))
                .collect::<BTreeMap<_, _>>();

            for item in tree.iter() {
                let item = item?;

                let path = path
                    .clone()
                    .unwrap_or_default()
                    .join(item.filename().to_path_lossy());

                match item.mode().kind() {
                    EntryKind::Tree
                    | EntryKind::Blob
                    | EntryKind::BlobExecutable
                    | EntryKind::Link => {
                        let mut object = item
                            .object()
                            .context("Expected item in tree to be object but it wasn't")?;

                        tree_items.push(match object.kind {
                            Kind::Blob => TreeItem::File(File {
                                mode: item.mode().0,
                                size: object.into_blob().data.len(),
                                path,
                                name: item.filename().to_string(),
                            }),
                            Kind::Tree => {
                                let mut children = PathBuf::new();

                                // if the tree only has one child, flatten it down
                                while let Ok(Some(Ok(item))) = object
                                    .try_into_tree()
                                    .iter()
                                    .flat_map(gix::Tree::iter)
                                    .at_most_one()
                                {
                                    let nested_object = item.object().context(
                                        "Expected item in tree to be object but it wasn't",
                                    )?;

                                    if nested_object.kind != Kind::Tree {
                                        break;
                                    }

                                    object = nested_object;
                                    children.push(item.filename().to_path_lossy());
                                }

                                TreeItem::Tree(Tree {
                                    mode: item.mode().0,
                                    path,
                                    children,
                                    name: item.filename().to_string(),
                                })
                            }
                            _ => continue,
                        });
                    }
                    EntryKind::Commit => {
                        if let Some(mut url) = submodules.get(path.as_path()).cloned() {
                            if matches!(url.scheme, Scheme::Git | Scheme::Ssh) {
                                url.scheme = Scheme::Https;
                            }

                            tree_items.push(TreeItem::Submodule(Submodule {
                                mode: item.mode().0,
                                name: item.filename().to_string(),
                                url,
                                oid: item.object_id(),
                            }));

                            continue;
                        }
                    }
                }
            }

            Ok(PathDestination::Tree(tree_items))
            anyhow::bail!("bad object");
        })
        .await
        .context("Failed to join Tokio task")?
@@ -442,16 +345,16 @@
            }

            let buffer = BytesMut::with_capacity(BUFFER_CAP + 1024);
            let mut visitor = ArchivalVisitor {
            let mut visitor = PathVisitor::new(ArchivalVisitor {
                repository: &repo,
                res,
                archive: Builder::new(GzEncoder::new(buffer.writer(), flate2::Compression::fast())),
                path_deque: VecDeque::new(),
                path: BString::default(),
            };
            });

            tree.traverse().breadthfirst(&mut visitor)?;

            let visitor = visitor.into_inner();

            visitor.res.blocking_send(Ok(visitor
                .archive
                .into_inner()?
@@ -514,16 +417,77 @@
}

const BUFFER_CAP: usize = 512 * 1024;

pub trait PathVisitorHandler {
    fn visit(&mut self, entry: &EntryRef<'_>, path: &BStr) -> Action;
}

pub struct ArchivalVisitor<'a> {
struct ArchivalVisitor<'a> {
    repository: &'a gix::Repository,
    res: tokio::sync::mpsc::Sender<Result<Bytes, anyhow::Error>>,
    archive: Builder<GzEncoder<Writer<BytesMut>>>,
}

impl PathVisitorHandler for ArchivalVisitor<'_> {
    fn visit(&mut self, entry: &EntryRef<'_>, path: &BStr) -> Action {
        let entry = entry.attach(self.repository);

        let Ok(object) = entry.object() else {
            return Action::Continue;
        };

        if object.kind != Kind::Blob {
            return Action::Continue;
        }

        let blob = object.into_blob();

        let mut header = tar::Header::new_gnu();
        if let Err(error) = header.set_path(path.to_path_lossy()) {
            warn!(%error, "Attempted to write invalid path to archive");
            return Action::Continue;
        }
        header.set_size(blob.data.len() as u64);
        #[allow(clippy::cast_sign_loss)]
        header.set_mode(entry.mode().0.into());
        header.set_cksum();

        if let Err(error) = self.archive.append(&header, blob.data.as_slice()) {
            warn!(%error, "Failed to append to archive");
            return Action::Cancel;
        }

        if self.archive.get_ref().get_ref().get_ref().len() >= BUFFER_CAP {
            let b = self.archive.get_mut().get_mut().get_mut().split().freeze();

            if self.res.blocking_send(Ok(b)).is_err() {
                return Action::Cancel;
            }
        }

        Action::Continue
    }
}

pub struct PathVisitor<T> {
    path_deque: VecDeque<BString>,
    path: BString,
    inner: T,
}

impl<T> PathVisitor<T> {
    pub fn new(inner: T) -> Self {
        Self {
            path_deque: VecDeque::new(),
            path: BString::default(),
            inner,
        }
    }

    pub fn into_inner(self) -> T {
        self.inner
    }

impl ArchivalVisitor<'_> {
    fn pop_element(&mut self) {
        if let Some(pos) = memchr::memrchr(b'/', &self.path) {
            self.path.resize(pos, 0);
@@ -533,6 +497,9 @@
    }

    fn push_element(&mut self, name: &BStr) {
        if name.is_empty() {
            return;
        }
        if !self.path.is_empty() {
            self.path.push(b'/');
        }
@@ -540,7 +507,7 @@
    }
}

impl gix::traverse::tree::Visit for ArchivalVisitor<'_> {
impl<T: PathVisitorHandler> gix::traverse::tree::Visit for PathVisitor<T> {
    fn pop_front_tracked_path_and_set_current(&mut self) {
        self.path = self
            .path_deque
@@ -549,10 +516,7 @@
    }

    fn pop_back_tracked_path_and_set_current(&mut self) {
        self.path = self
            .path_deque
            .pop_back()
            .expect("every call is matched with push_tracked_path_component");
        self.path = self.path_deque.pop_back().unwrap_or_default();
    }

    fn push_back_tracked_path_component(&mut self, component: &BStr) {
@@ -568,47 +532,12 @@
        self.pop_element();
    }

    fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> Action {
        Action::Continue
    fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action {
        self.inner.visit(entry, self.path.as_ref())
    }

    fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action {
        let entry = entry.attach(self.repository);

        let Ok(object) = entry.object() else {
            return Action::Continue;
        };

        if object.kind != Kind::Blob {
            return Action::Continue;
        }

        let blob = object.into_blob();

        let mut header = tar::Header::new_gnu();
        if let Err(error) = header.set_path(self.path.to_path_lossy()) {
            warn!(%error, "Attempted to write invalid path to archive");
            return Action::Continue;
        }
        header.set_size(blob.data.len() as u64);
        #[allow(clippy::cast_sign_loss)]
        header.set_mode(entry.mode().0.into());
        header.set_cksum();

        if let Err(error) = self.archive.append(&header, blob.data.as_slice()) {
            warn!(%error, "Failed to append to archive");
            return Action::Cancel;
        }

        if self.archive.get_ref().get_ref().get_ref().len() >= BUFFER_CAP {
            let b = self.archive.get_mut().get_mut().get_mut().split().freeze();

            if self.res.blocking_send(Ok(b)).is_err() {
                return Action::Cancel;
            }
        }

        Action::Continue
        self.inner.visit(entry, self.path.as_ref())
    }
}

@@ -634,47 +563,11 @@
pub enum ReadmeFormat {
    Markdown,
    Plaintext,
}

pub enum PathDestination {
    Tree(Vec<TreeItem>),
    File(FileWithContent),
}

pub enum TreeItem {
    Tree(Tree),
    File(File),
    Submodule(Submodule),
}

#[derive(Debug)]
pub struct Submodule {
    pub mode: u16,
    pub name: String,
    pub url: Url,
    pub oid: ObjectId,
}

#[derive(Debug)]
pub struct Tree {
    pub mode: u16,
    pub name: String,
    pub children: PathBuf,
    pub path: PathBuf,
}

#[derive(Debug)]
pub struct File {
    pub mode: u16,
    pub size: usize,
    pub name: String,
    pub path: PathBuf,
}

#[derive(Debug)]
#[allow(unused)]
pub struct FileWithContent {
    pub metadata: File,
    pub content: Content,
}

diff --git a/src/main.rs b/src/main.rs
index 6e8a4b1..9105436 100644
--- a/src/main.rs
+++ a/src/main.rs
@@ -23,7 +23,10 @@
};
use clap::Parser;
use const_format::formatcp;
use database::schema::SCHEMA_VERSION;
use database::schema::{
    prefixes::{TREE_FAMILY, TREE_ITEM_FAMILY},
    SCHEMA_VERSION,
};
use rocksdb::{Options, SliceTransform};
use tokio::{
    net::TcpListener,
@@ -257,6 +260,11 @@
        tag_family_options.set_prefix_extractor(SliceTransform::create_fixed_prefix(
            std::mem::size_of::<u64>(),
        )); // repository id prefix

        let mut tree_item_family_options = Options::default();
        tree_item_family_options.set_prefix_extractor(SliceTransform::create_fixed_prefix(
            std::mem::size_of::<u64>() + std::mem::size_of::<usize>(),
        ));

        let db = rocksdb::DB::open_cf_with_opts(
            &db_options,
@@ -267,6 +275,8 @@
                (TAG_FAMILY, tag_family_options),
                (REFERENCE_FAMILY, Options::default()),
                (COMMIT_COUNT_FAMILY, Options::default()),
                (TREE_FAMILY, Options::default()),
                (TREE_ITEM_FAMILY, tree_item_family_options),
            ],
        )?;

diff --git a/src/database/indexer.rs b/src/database/indexer.rs
index 7311c32..9f9b007 100644
--- a/src/database/indexer.rs
+++ a/src/database/indexer.rs
@@ -1,5 +1,5 @@
use std::{
    collections::HashSet,
    collections::{BTreeMap, HashSet},
    ffi::OsStr,
    fmt::Debug,
    io::{BufRead, BufReader},
@@ -8,18 +8,30 @@
};

use anyhow::Context;
use gix::{bstr::ByteSlice, refs::Category, Reference};
use gix::{
    bstr::{BStr, ByteSlice},
    objs::tree::EntryKind,
    refs::Category,
    url::Scheme,
    ObjectId, Reference, Url,
};
use itertools::{Either, Itertools};
use rocksdb::WriteBatch;
use time::{OffsetDateTime, UtcOffset};
use tracing::{error, info, info_span, instrument, warn};

use crate::database::schema::{
    commit::Commit,
    repository::{ArchivedRepository, Repository, RepositoryId},
    tag::{Tag, TagTree},
use xxhash_rust::xxh3::Xxh3;

use crate::{
    database::schema::{
        commit::Commit,
        repository::{ArchivedRepository, Repository, RepositoryId},
        tag::{Tag, TagTree},
    },
    git::{PathVisitor, PathVisitorHandler},
};

use super::schema::tree::{Tree, TreeItem, TreeItemKind};

pub fn run(scan_path: &Path, repository_list: Option<&Path>, db: &Arc<rocksdb::DB>) {
    let span = info_span!("index_update");
    let _entered = span.enter();
@@ -157,6 +169,18 @@
            Ok(v) => v,
            Err(error) => {
                error!(%error, "Failed to read references for {relative_path}");
                continue;
            }
        };

        let submodules = match git_repository.submodules() {
            Ok(submodules) => submodules
                .into_iter()
                .flatten()
                .filter_map(|v| Some((v.name().to_path_lossy().to_path_buf(), v.url().ok()?)))
                .collect::<BTreeMap<_, _>>(),
            Err(error) => {
                error!(%error, "Failed to read submodules for {relative_path}");
                continue;
            }
        };
@@ -189,6 +213,7 @@
                db.clone(),
                &git_repository,
                false,
                &submodules,
            ) {
                error!(%error, "Failed to update reflog for {relative_path}@{:?}", valid_references.last());
            }
@@ -208,6 +233,7 @@
    db: Arc<rocksdb::DB>,
    git_repository: &gix::Repository,
    force_reindex: bool,
    submodules: &BTreeMap<PathBuf, Url>,
) -> Result<(), anyhow::Error> {
    info!("Refreshing indexes");

@@ -238,6 +264,8 @@
        .into_iter()
        .rev();

    let mut hasher = Xxh3::new();

    let tree_len = commit_tree.len()?;
    let mut seen = false;
    let mut i = 0;
@@ -266,12 +294,16 @@
            let commit = commit.decode()?;
            let author = commit.author();
            let committer = commit.committer();

            let tree = git_repository.find_tree(commit.tree())?;
            let tree_id = index_tree(&db, &mut batch, &tree, &mut hasher, submodules)?;

            Commit::new(oid, &commit, author, committer)?.insert(
            Commit::new(oid, &commit, author, committer, tree_id)?.insert(
                &commit_tree,
                tree_len + i,
                &mut batch,
            )?;

            i += 1;
        }

@@ -289,10 +321,117 @@
            db,
            git_repository,
            true,
            submodules,
        );
    }

    Ok(())
}

fn index_tree(
    database: &rocksdb::DB,
    batch: &mut WriteBatch,
    tree: &gix::Tree<'_>,
    hasher: &mut Xxh3,
    submodules: &BTreeMap<PathBuf, Url>,
) -> Result<u64, anyhow::Error> {
    hasher.reset();
    tree.traverse()
        .breadthfirst(&mut PathVisitor::new(TreeHasherVisitor { hasher }))?;
    let digest = hasher.digest();

    if !TreeItem::contains(database, digest)? {
        tree.traverse()
            .breadthfirst(&mut PathVisitor::new(TreeItemIndexerVisitor {
                buffer: Vec::new(),
                digest,
                database,
                batch,
                submodules,
            }))?;
    }

    Tree {
        indexed_tree_id: digest,
    }
    .insert(database, batch, tree.id)?;

    Ok(digest)
}

/// Walks the entire tree and hashes all the (path, mode)s so trees can be deduplicated.

///

/// Note: unlike git's tree oid, this does not take into account blob contents.

struct TreeHasherVisitor<'a> {
    hasher: &'a mut Xxh3,
}

impl PathVisitorHandler for TreeHasherVisitor<'_> {
    fn visit(
        &mut self,
        entry: &gix::objs::tree::EntryRef<'_>,
        path: &BStr,
    ) -> gix::traverse::tree::visit::Action {
        self.hasher.update(path);
        self.hasher.update(&entry.mode.to_ne_bytes());
        gix::traverse::tree::visit::Action::Continue
    }
}

struct TreeItemIndexerVisitor<'a> {
    digest: u64,
    buffer: Vec<u8>,
    database: &'a rocksdb::DB,
    batch: &'a mut WriteBatch,
    submodules: &'a BTreeMap<PathBuf, Url>,
}

impl PathVisitorHandler for TreeItemIndexerVisitor<'_> {
    fn visit(
        &mut self,
        entry: &gix::objs::tree::EntryRef<'_>,
        path: &BStr,
    ) -> gix::traverse::tree::visit::Action {
        let kind = match entry.mode.kind() {
            EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link => TreeItemKind::File,
            EntryKind::Commit => {
                let Some(mut url) = self
                    .submodules
                    .get(&path.to_path_lossy().into_owned())
                    .cloned()
                else {
                    return gix::traverse::tree::visit::Action::Continue;
                };

                if matches!(url.scheme, Scheme::Git | Scheme::Ssh) {
                    url.scheme = Scheme::Https;
                }

                TreeItemKind::Submodule(match entry.oid.to_owned() {
                    ObjectId::Sha1(oid) => super::schema::tree::Submodule {
                        url: url.to_string(),
                        oid,
                    },
                })
            }
            EntryKind::Tree => TreeItemKind::Tree,
        };

        TreeItem {
            mode: entry.mode.0,
            kind,
        }
        .insert(
            &mut self.buffer,
            self.digest,
            path,
            self.database,
            self.batch,
        )
        .expect("failed to insert TreeItem");

        gix::traverse::tree::visit::Action::Continue
    }
}

#[instrument(skip(db))]
@@ -309,6 +448,18 @@
        let Some(git_repository) = open_repo(scan_path, &relative_path, db_repository.get(), &db)
        else {
            continue;
        };

        let submodules = match git_repository.submodules() {
            Ok(submodules) => submodules
                .into_iter()
                .flatten()
                .filter_map(|v| Some((v.name().to_path_lossy().to_path_buf(), v.url().ok()?)))
                .collect::<BTreeMap<_, _>>(),
            Err(error) => {
                error!(%error, "Failed to read submodules for {relative_path}");
                continue;
            }
        };

        if let Err(error) = tag_index_scan(
@@ -316,6 +467,7 @@
            db_repository.get(),
            db.clone(),
            &git_repository,
            &submodules,
        ) {
            error!(%error, "Failed to update tags for {relative_path}");
        }
@@ -328,6 +480,7 @@
    db_repository: &ArchivedRepository,
    db: Arc<rocksdb::DB>,
    git_repository: &gix::Repository,
    submodules: &BTreeMap<PathBuf, Url>,
) -> Result<(), anyhow::Error> {
    let tag_tree = db_repository.tag_tree(db);

@@ -343,7 +496,7 @@

    // insert any git tags that are missing from the index
    for tag_name in git_tags.difference(&indexed_tags) {
        tag_index_update(tag_name, git_repository, &tag_tree)?;
        tag_index_update(tag_name, git_repository, &tag_tree, submodules)?;
    }

    // remove any extra tags that the index has
@@ -360,15 +513,31 @@
    tag_name: &str,
    git_repository: &gix::Repository,
    tag_tree: &TagTree,
    submodules: &BTreeMap<PathBuf, Url>,
) -> Result<(), anyhow::Error> {
    let mut reference = git_repository
        .find_reference(tag_name)
        .context("Failed to read newly discovered tag")?;

    let tree_id = if let Ok(tree) = reference.peel_to_tree() {
        let mut batch = WriteBatch::default();
        let tree_id = index_tree(
            &tag_tree.db,
            &mut batch,
            &tree,
            &mut Xxh3::new(),
            submodules,
        )?;
        tag_tree.db.write_without_wal(batch)?;
        Some(tree_id)
    } else {
        None
    };

    if let Ok(tag) = reference.peel_to_tag() {
        info!("Inserting newly discovered tag to index");

        Tag::new(tag.tagger()?)?.insert(tag_tree, tag_name)?;
        Tag::new(tag.tagger()?, tree_id)?.insert(tag_tree, tag_name)?;
    }

    Ok(())
@@ -420,7 +589,7 @@
    discovered_repos: &mut Vec<(PathBuf, gix::Repository)>,
) {
    let dirs = if let Some(repo_list) = repository_list {
        let mut repo_list = match std::fs::File::open(&repo_list) {
        let repo_list = match std::fs::File::open(repo_list) {
            Ok(v) => BufReader::new(v).lines(),
            Err(error) => {
                error!(%error, "Failed to open repository list file");
@@ -430,7 +599,7 @@

        let mut out = Vec::new();

        while let Some(line) = repo_list.next() {
        for line in repo_list {
            let line = match line {
                Ok(v) => v,
                Err(error) => {
diff --git a/src/methods/filters.rs b/src/methods/filters.rs
index a0e15f9..e39b3d2 100644
--- a/src/methods/filters.rs
+++ a/src/methods/filters.rs
@@ -37,8 +37,8 @@
        .convert((OffsetDateTime::now_utc() - s.into().0).try_into().unwrap()))
}

pub fn file_perms(s: &u16) -> Result<String, askama::Error> {
    Ok(unix_mode::to_string(u32::from(*s)))
pub fn file_perms(s: u16) -> Result<String, askama::Error> {
    Ok(unix_mode::to_string(u32::from(s)))
}

pub struct DisplayHexBuffer<const N: usize>(pub const_hex::Buffer<N>);
diff --git a/templates/repo/tree.html b/templates/repo/tree.html
index 1d87346..4f69b3c 100644
--- a/templates/repo/tree.html
+++ a/templates/repo/tree.html
@@ -1,52 +1,47 @@
{% import "macros/breadcrumbs.html" as breadcrumbs %}
{% extends "repo/base.html" %}

{% block tree_nav_class %}active{% endblock %}

{% block subnav %}
    {% call breadcrumbs::breadcrumbs(repo_path, query) %}
{% call breadcrumbs::breadcrumbs(repo_path, query) %}
{% endblock %}

{% block content %}
<div class="table-responsive">
<table class="repositories">
  <table class="repositories">
    <thead>
    <tr>
      <tr>
        <th style="width: 10rem;">Mode</th>
        <th>Name</th>
        <th>Size</th>
    </tr>
      </tr>
    </thead>

    <tbody>
    {% for item in items -%}
    <tr>
        {% match item -%}
        {%- when crate::git::TreeItem::Tree with (tree) -%}
        <td><pre>{{ tree.mode|file_perms }}</pre></td>
        <td><pre><a class="nested-tree" href="/{{ repo.display() }}/tree/{{ tree.path.display() }}{{ query }}">{{ tree.name }}</a>
            {%- for child in tree.children.ancestors().collect_vec().into_iter().rev() -%}
                {%- if let Some(file_name) = child.file_name() %} / <a class="nested-tree" href="/{{ repo.display() }}/tree/{{ tree.path.display() }}/{{ child.display() }}{{ query }}">{{ file_name.to_string_lossy() }}</a>{%- endif -%}
            {%- endfor -%}
        </pre></td>
        <td></td>
        <td></td>

        {%- when crate::git::TreeItem::File with (file) -%}
        <td><pre>{{ file.mode|file_perms }}</pre></td>
        <td><pre><a href="/{{ repo.display() }}/tree/{{ file.path.display() }}{{ query }}">{{ file.name }}</a></pre></td>
        <td><pre>{{ file.size }}</pre></td>
        <td></td>

        {%- when crate::git::TreeItem::Submodule with (submodule) -%}
        <td><pre>{{ submodule.mode|file_perms }}</pre></td>
        <td><pre>🔗 <a href="{{ submodule.url }}">{{ submodule.name }}</a> @ {{ submodule.oid.to_hex_with_len(7) }}</pre></td>
        <td></td>
        <td></td>
      {% for (name, name_split, item) in items -%}
      <tr>
        <td>
          <pre>{{ item.get().mode.to_native()|file_perms }}</pre>
        </td>
        {% set local_name = name.get()[*name_split..] -%}
        {% set local_name = local_name.strip_prefix('/').unwrap_or(local_name) -%}
        {% match item.get().kind -%}
        {%- when ArchivedTreeItemKind::Tree -%}
        <td>
          <pre><a class="nested-tree" href="/{{ repo.display() }}/tree/{{ name.get() }}{{ query }}">{{ local_name }}</a></pre>
        </td>
        {%- when ArchivedTreeItemKind::File -%}
        <td>
          <pre><a href="/{{ repo.display() }}/tree/{{ name.get() }}{{ query }}">{{ local_name }}</a></pre>
        </td>
        {%- when ArchivedTreeItemKind::Submodule with (submodule) -%}
        <td>
          <pre>🔗 <a href="{{ submodule.url }}">{{ local_name }}</a> @ {{ submodule.oid|hex }}</pre>
        </td>
        {%- endmatch %}
    </tr>
    {% endfor -%}
      </tr>
      {% endfor -%}
    </tbody>
</table>
  </table>
</div>
{% endblock %}
diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs
index 6ebbb4a..12307b4 100644
--- a/src/database/schema/commit.rs
+++ a/src/database/schema/commit.rs
@@ -21,6 +21,7 @@
    pub author: Author,
    pub committer: Author,
    pub hash: [u8; 20],
    pub tree: u64,
}

impl Commit {
@@ -29,6 +30,7 @@
        commit: &CommitRef<'_>,
        author: SignatureRef<'_>,
        committer: SignatureRef<'_>,
        tree: u64,
    ) -> Result<Self, anyhow::Error> {
        let message = commit.message();

@@ -40,6 +42,7 @@
            hash: match oid {
                ObjectId::Sha1(d) => d,
            },
            tree,
        })
    }

diff --git a/src/database/schema/mod.rs b/src/database/schema/mod.rs
index 3e6f177..2e8a91b 100644
--- a/src/database/schema/mod.rs
+++ a/src/database/schema/mod.rs
@@ -6,7 +6,8 @@
pub mod prefixes;
pub mod repository;
pub mod tag;
pub mod tree;

pub type Yoked<T> = Yoke<T, Box<[u8]>>;

pub const SCHEMA_VERSION: &str = "3";
pub const SCHEMA_VERSION: &str = "4";
diff --git a/src/database/schema/prefixes.rs b/src/database/schema/prefixes.rs
index 299364b..d6cd311 100644
--- a/src/database/schema/prefixes.rs
+++ a/src/database/schema/prefixes.rs
@@ -1,5 +1,7 @@
pub const COMMIT_FAMILY: &str = "commit";
pub const COMMIT_COUNT_FAMILY: &str = "commit_count";
pub const REPOSITORY_FAMILY: &str = "repository";
pub const TAG_FAMILY: &str = "tag";
pub const REFERENCE_FAMILY: &str = "repository_refs";
pub const TREE_FAMILY: &str = "tree";
pub const TREE_ITEM_FAMILY: &str = "tree_item";
diff --git a/src/database/schema/tag.rs b/src/database/schema/tag.rs
index e57dfa9..b2db248 100644
--- a/src/database/schema/tag.rs
+++ a/src/database/schema/tag.rs
@@ -15,12 +15,17 @@
#[derive(Serialize, Archive, Debug, Yokeable)]
pub struct Tag {
    pub tagger: Option<Author>,
    pub tree_id: Option<u64>,
}

impl Tag {
    pub fn new(tagger: Option<SignatureRef<'_>>) -> Result<Self, anyhow::Error> {
    pub fn new(
        tagger: Option<SignatureRef<'_>>,
        tree_id: Option<u64>,
    ) -> Result<Self, anyhow::Error> {
        Ok(Self {
            tagger: tagger.map(TryFrom::try_from).transpose()?,
            tree_id,
        })
    }

@@ -30,7 +35,7 @@
}

pub struct TagTree {
    db: Arc<rocksdb::DB>,
    pub db: Arc<rocksdb::DB>,
    prefix: RepositoryId,
}

diff --git a/src/database/schema/tree.rs b/src/database/schema/tree.rs
new file mode 100644
index 0000000..2cb67c9 100644
--- /dev/null
+++ a/src/database/schema/tree.rs
@@ -1,0 +1,191 @@
use anyhow::Context;
use gix::{bstr::BStr, ObjectId};
use itertools::{Either, Itertools};
use rkyv::{Archive, Serialize};
use rocksdb::{WriteBatch, DB};
use yoke::{Yoke, Yokeable};

use super::{
    prefixes::{TREE_FAMILY, TREE_ITEM_FAMILY},
    Yoked,
};

#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash)]
pub struct Tree {
    pub indexed_tree_id: u64,
}

impl Tree {
    pub fn insert(
        &self,
        database: &DB,
        batch: &mut WriteBatch,
        tree_oid: ObjectId,
    ) -> Result<(), anyhow::Error> {
        let cf = database
            .cf_handle(TREE_FAMILY)
            .context("tree column family missing")?;

        batch.put_cf(
            cf,
            tree_oid.as_slice(),
            rkyv::to_bytes::<rkyv::rancor::Error>(self)?,
        );

        Ok(())
    }

    pub fn find(database: &DB, tree_oid: ObjectId) -> Result<Option<u64>, anyhow::Error> {
        let cf = database
            .cf_handle(TREE_FAMILY)
            .context("tree column family missing")?;

        let Some(data) = database.get_pinned_cf(cf, tree_oid.as_slice())? else {
            return Ok(None);
        };

        let data = rkyv::access::<<Self as Archive>::Archived, rkyv::rancor::Error>(data.as_ref())?;

        Ok(Some(data.indexed_tree_id.to_native()))
    }
}

#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash)]
pub struct Submodule {
    pub url: String,
    pub oid: [u8; 20],
}

#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash)]
pub enum TreeItemKind {
    Submodule(Submodule),
    Tree,
    File,
}

#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash, Yokeable)]
pub struct TreeItem {
    pub mode: u16,
    pub kind: TreeItemKind,
}

pub type YokedTreeItem = Yoked<&'static <TreeItem as Archive>::Archived>;
pub type YokedTreeItemKey = Yoked<&'static [u8]>;
pub type YokedTreeItemKeyUtf8 = Yoked<&'static str>;

impl TreeItem {
    pub fn insert(
        &self,
        buffer: &mut Vec<u8>,
        digest: u64,
        path: &BStr,
        database: &DB,
        batch: &mut WriteBatch,
    ) -> Result<(), anyhow::Error> {
        let cf = database
            .cf_handle(TREE_ITEM_FAMILY)
            .context("tree column family missing")?;

        buffer.clear();
        buffer.reserve(std::mem::size_of::<u64>() + path.len() + std::mem::size_of::<usize>());
        buffer.extend_from_slice(&digest.to_ne_bytes());
        buffer.extend_from_slice(&memchr::memchr_iter(b'/', path).count().to_be_bytes());
        buffer.extend_from_slice(path.as_ref());

        batch.put_cf(cf, &buffer, rkyv::to_bytes::<rkyv::rancor::Error>(self)?);

        Ok(())
    }

    pub fn find_exact(
        database: &DB,
        digest: u64,
        path: &[u8],
    ) -> Result<Option<YokedTreeItem>, anyhow::Error> {
        let cf = database
            .cf_handle(TREE_ITEM_FAMILY)
            .expect("tree column family missing");

        let mut buffer = Vec::with_capacity(std::mem::size_of::<u64>() + path.len());
        buffer.extend_from_slice(&digest.to_ne_bytes());
        buffer.extend_from_slice(&memchr::memchr_iter(b'/', path).count().to_be_bytes());
        buffer.extend_from_slice(path);

        database
            .get_cf(cf, buffer)?
            .map(|data| {
                Yoke::try_attach_to_cart(data.into_boxed_slice(), |data| {
                    rkyv::access::<_, rkyv::rancor::Error>(data)
                })
            })
            .transpose()
            .context("failed to parse tree item")
    }

    pub fn find_prefix<'a>(
        database: &'a DB,
        digest: u64,
        prefix: &[u8],
    ) -> impl Iterator<Item = Result<(YokedTreeItemKey, YokedTreeItem), anyhow::Error>> + use<'a>
    {
        let cf = database
            .cf_handle(TREE_ITEM_FAMILY)
            .expect("tree column family missing");

        let (iterator, key) = if prefix.is_empty() {
            let mut buffer = [0_u8; std::mem::size_of::<u64>() + std::mem::size_of::<usize>()];
            buffer[..std::mem::size_of::<u64>()].copy_from_slice(&digest.to_ne_bytes());
            buffer[std::mem::size_of::<u64>()..].copy_from_slice(&0_usize.to_be_bytes());

            let iterator = database.prefix_iterator_cf(cf, buffer);

            (iterator, Either::Left(buffer))
        } else {
            let mut buffer = Vec::with_capacity(
                std::mem::size_of::<u64>() + prefix.len() + std::mem::size_of::<usize>(),
            );
            buffer.extend_from_slice(&digest.to_ne_bytes());
            buffer
                .extend_from_slice(&(memchr::memchr_iter(b'/', prefix).count() + 1).to_be_bytes());
            buffer.extend_from_slice(prefix);
            buffer.push(b'/');

            let iterator = database.prefix_iterator_cf(cf, &buffer);

            (iterator, Either::Right(buffer))
        };

        iterator
            .take_while(move |v| {
                v.as_ref().is_ok_and(|(k, _)| {
                    k.starts_with(match key.as_ref() {
                        Either::Left(v) => v.as_ref(),
                        Either::Right(v) => v.as_ref(),
                    })
                })
            })
            .map_ok(|(key, value)| {
                let key = Yoke::attach_to_cart(key, |data| {
                    &data[std::mem::size_of::<u64>() + std::mem::size_of::<usize>()..]
                });
                let value = Yoke::try_attach_to_cart(value, |data| {
                    rkyv::access::<_, rkyv::rancor::Error>(data)
                })
                .context("Failed to open repository")?;
                Ok((key, value))
            })
            .flatten()
    }

    pub fn contains(database: &DB, digest: u64) -> Result<bool, anyhow::Error> {
        let cf = database
            .cf_handle(TREE_ITEM_FAMILY)
            .context("tree column family missing")?;

        Ok(database
            .prefix_iterator_cf(cf, digest.to_ne_bytes())
            .next()
            .transpose()?
            .is_some())
    }
}
diff --git a/src/methods/repo/mod.rs b/src/methods/repo/mod.rs
index 34d8c9e..c108ba3 100644
--- a/src/methods/repo/mod.rs
+++ a/src/methods/repo/mod.rs
@@ -275,6 +275,12 @@
    }
}

impl From<Error> for anyhow::Error {
    fn from(value: Error) -> Self {
        value.0
    }
}

impl IntoResponse for Error {
    fn into_response(self) -> Response {
        (StatusCode::INTERNAL_SERVER_ERROR, format!("{:?}", self.0)).into_response()
diff --git a/src/methods/repo/tree.rs b/src/methods/repo/tree.rs
index 2a90019..67c1f56 100644
--- a/src/methods/repo/tree.rs
+++ a/src/methods/repo/tree.rs
@@ -1,5 +1,7 @@
use anyhow::{bail, Context};
use askama::Template;
use axum::{extract::Query, response::IntoResponse, Extension};
use gix::ObjectId;
use itertools::Itertools;
use serde::Deserialize;
use std::path::PathBuf;
@@ -8,8 +10,11 @@
    sync::Arc,
};

use crate::database::schema::tree::{
    ArchivedTreeItemKind, Tree, TreeItem, YokedTreeItem, YokedTreeItemKeyUtf8,
};
use crate::{
    git::{FileWithContent, PathDestination, TreeItem},
    git::FileWithContent,
    into_response,
    methods::{
        filters,
@@ -17,6 +22,8 @@
    },
    Git, ResponseEither,
};

use super::log::get_branch_commits;

#[derive(Deserialize)]
pub struct UriQuery {
@@ -49,7 +56,7 @@
#[allow(clippy::module_name_repetitions)]
pub struct TreeView {
    pub repo: Repository,
    pub items: Vec<TreeItem>,
    pub items: Vec<(YokedTreeItemKeyUtf8, usize, YokedTreeItem)>,
    pub query: UriQuery,
    pub repo_path: PathBuf,
    pub branch: Option<Arc<str>>,
@@ -62,6 +69,11 @@
    pub repo_path: PathBuf,
    pub file: FileWithContent,
    pub branch: Option<Arc<str>>,
}

enum LookupResult {
    RealPath,
    Children(Vec<(YokedTreeItemKeyUtf8, usize, YokedTreeItem)>),
}

pub async fn handle(
@@ -69,26 +81,77 @@
    Extension(RepositoryPath(repository_path)): Extension<RepositoryPath>,
    Extension(ChildPath(child_path)): Extension<ChildPath>,
    Extension(git): Extension<Arc<Git>>,
    Extension(db): Extension<Arc<rocksdb::DB>>,
    Query(query): Query<UriQuery>,
) -> Result<impl IntoResponse> {
    let open_repo = git.repo(repository_path, query.branch.clone()).await?;

    Ok(
        match open_repo
            .path(child_path.clone(), query.id.as_deref(), !query.raw)
            .await?
        {
            PathDestination::Tree(items) => {
                ResponseEither::Left(ResponseEither::Left(into_response(TreeView {
                    repo,
                    items,
                    branch: query.branch.clone(),
                    query,
                    repo_path: child_path.unwrap_or_default(),
                })))
    // TODO: bit messy
    let (repo, query, child_path, lookup_result) = tokio::task::spawn_blocking(move || {
        let tree_id = if let Some(id) = query.id.as_deref() {
            let hex = const_hex::decode_to_array(id).context("Failed to parse tree hash")?;
            Tree::find(&db, ObjectId::Sha1(hex))
                .context("Failed to lookup tree")?
                .context("Couldn't find tree with given id")?
        } else {
            let repository = crate::database::schema::repository::Repository::open(&db, &*repo)?
                .context("Repository does not exist")?;
            let commit = get_branch_commits(&repository, &db, query.branch.as_deref(), 1, 0)?
                .into_iter()
                .next()
                .context("Branch not found")?;
            commit.get().tree.to_native()
        };

        if let Some(path) = &child_path {
            if let Some(item) =
                TreeItem::find_exact(&db, tree_id, path.as_os_str().as_encoded_bytes())?
            {
                if let ArchivedTreeItemKind::File = item.get().kind {
                    return Ok((repo, query, child_path, LookupResult::RealPath));
                }
            }
            PathDestination::File(file) if query.raw => ResponseEither::Right(file.content),
            PathDestination::File(file) => {
        }

        let path = child_path
            .as_ref()
            .map(|v| v.as_os_str().as_encoded_bytes())
            .unwrap_or_default();

        let tree_items = TreeItem::find_prefix(&db, tree_id, path)
            // don't take the current path the user is on
            .filter_ok(|(k, _)| !k.get()[path.len()..].is_empty())
            // only take direct descendents
            .filter_ok(|(k, _)| {
                memchr::memrchr(b'/', &k.get()[path.len()..]).is_none_or(|v| v == 0)
            })
            .map_ok(|(k, v)| {
                (
                    k.try_map_project(|v, _| simdutf8::basic::from_utf8(v))
                        .expect("invalid utf8"),
                    path.len(),
                    v,
                )
            })
            .try_collect::<_, Vec<_>, _>()?;

        if tree_items.is_empty() {
            bail!("Path doesn't exist in tree");
        }

        Ok::<_, anyhow::Error>((repo, query, child_path, LookupResult::Children(tree_items)))
    })
    .await
    .context("Failed to join on task")??;

    Ok(match lookup_result {
        LookupResult::RealPath => {
            let open_repo = git.repo(repository_path, query.branch.clone()).await?;
            let file = open_repo
                .path(child_path.clone(), query.id.as_deref(), !query.raw)
                .await?;

            if query.raw {
                ResponseEither::Right(file.content)
            } else {
                ResponseEither::Left(ResponseEither::Right(into_response(FileView {
                    repo,
                    file,
@@ -96,6 +159,15 @@
                    repo_path: child_path.unwrap_or_default(),
                })))
            }
        },
    )
        }
        LookupResult::Children(items) => {
            ResponseEither::Left(ResponseEither::Left(into_response(TreeView {
                repo,
                items,
                branch: query.branch.clone(),
                query,
                repo_path: child_path.unwrap_or_default(),
            })))
        }
    })
}