Index trees into RocksDB
Diff
Cargo.toml | 2 +-
src/git.rs | 271 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
src/main.rs | 12 +++++++++++-
src/database/indexer.rs | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
src/methods/filters.rs | 4 ++--
templates/repo/tree.html | 59 ++++++++++++++++++++++++++++-------------------------------
src/database/schema/commit.rs | 3 +++
src/database/schema/mod.rs | 3 ++-
src/database/schema/prefixes.rs | 2 ++
src/database/schema/tag.rs | 9 +++++++--
src/database/schema/tree.rs | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/methods/repo/mod.rs | 6 ++++++
src/methods/repo/tree.rs | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
13 files changed, 608 insertions(+), 261 deletions(-)
@@ -84,7 +84,7 @@
unix_mode = "0.1"
uuid = { version = "1.11", features = ["v4"] }
v_htmlescape = { version = "0.15", features = ["bytes-buf"] }
xxhash-rust = { version = "0.8.15", features = ["const_xxh3"] }
xxhash-rust = { version = "0.8.15", features = ["const_xxh3", "xxh3"] }
yoke = { version = "0.7.5", features = ["derive"] }
[features]
@@ -7,18 +7,17 @@
actor::SignatureRef,
bstr::{BStr, BString, ByteSlice, ByteVec},
diff::blob::{platform::prepare_diff::Operation, Sink},
object::{tree::EntryKind, Kind},
object::Kind,
objs::{tree::EntryRef, CommitRef, TagRef},
prelude::TreeEntryRefExt,
traverse::tree::visit::Action,
url::Scheme,
ObjectId, ThreadSafeRepository, Url,
ObjectId, ThreadSafeRepository,
};
use itertools::{Either, Itertools};
use itertools::Either;
use moka::future::Cache;
use std::{
borrow::Cow,
collections::{BTreeMap, VecDeque},
collections::VecDeque,
ffi::OsStr,
fmt::{self, Arguments, Write},
io::ErrorKind,
@@ -121,7 +120,7 @@
path: Option<PathBuf>,
tree_id: Option<&str>,
formatted: bool,
) -> Result<PathDestination> {
) -> Result<FileWithContent> {
let tree_id = tree_id
.map(ObjectId::from_str)
.transpose()
@@ -154,8 +153,6 @@
Kind::Blob => {
let mut blob = object.into_blob();
let size = blob.data.len();
let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) {
(true, Err(_)) => Content::Binary(vec![]),
(true, Ok(data)) => Content::Text(Cow::Owned(format_file(
@@ -168,107 +165,13 @@
})),
};
return Ok(PathDestination::File(FileWithContent {
metadata: File {
mode: item.mode().0,
size,
path: path.clone(),
name: item.filename().to_string(),
},
content,
}));
return Ok(FileWithContent { content });
}
Kind::Tree => {
tree = object.into_tree();
}
_ => anyhow::bail!("bad object of type {:?}", object.kind),
}
}
let mut tree_items = Vec::new();
let submodules = repo
.submodules()?
.into_iter()
.flatten()
.filter_map(|v| Some((v.name().to_path_lossy().to_path_buf(), v.url().ok()?)))
.collect::<BTreeMap<_, _>>();
for item in tree.iter() {
let item = item?;
let path = path
.clone()
.unwrap_or_default()
.join(item.filename().to_path_lossy());
match item.mode().kind() {
EntryKind::Tree
| EntryKind::Blob
| EntryKind::BlobExecutable
| EntryKind::Link => {
let mut object = item
.object()
.context("Expected item in tree to be object but it wasn't")?;
tree_items.push(match object.kind {
Kind::Blob => TreeItem::File(File {
mode: item.mode().0,
size: object.into_blob().data.len(),
path,
name: item.filename().to_string(),
}),
Kind::Tree => {
let mut children = PathBuf::new();
while let Ok(Some(Ok(item))) = object
.try_into_tree()
.iter()
.flat_map(gix::Tree::iter)
.at_most_one()
{
let nested_object = item.object().context(
"Expected item in tree to be object but it wasn't",
)?;
if nested_object.kind != Kind::Tree {
break;
}
object = nested_object;
children.push(item.filename().to_path_lossy());
}
TreeItem::Tree(Tree {
mode: item.mode().0,
path,
children,
name: item.filename().to_string(),
})
}
_ => continue,
});
}
EntryKind::Commit => {
if let Some(mut url) = submodules.get(path.as_path()).cloned() {
if matches!(url.scheme, Scheme::Git | Scheme::Ssh) {
url.scheme = Scheme::Https;
}
tree_items.push(TreeItem::Submodule(Submodule {
mode: item.mode().0,
name: item.filename().to_string(),
url,
oid: item.object_id(),
}));
continue;
}
}
}
}
Ok(PathDestination::Tree(tree_items))
anyhow::bail!("bad object");
})
.await
.context("Failed to join Tokio task")?
@@ -442,16 +345,16 @@
}
let buffer = BytesMut::with_capacity(BUFFER_CAP + 1024);
let mut visitor = ArchivalVisitor {
let mut visitor = PathVisitor::new(ArchivalVisitor {
repository: &repo,
res,
archive: Builder::new(GzEncoder::new(buffer.writer(), flate2::Compression::fast())),
path_deque: VecDeque::new(),
path: BString::default(),
};
});
tree.traverse().breadthfirst(&mut visitor)?;
let visitor = visitor.into_inner();
visitor.res.blocking_send(Ok(visitor
.archive
.into_inner()?
@@ -514,16 +417,77 @@
}
const BUFFER_CAP: usize = 512 * 1024;
pub trait PathVisitorHandler {
fn visit(&mut self, entry: &EntryRef<'_>, path: &BStr) -> Action;
}
pub struct ArchivalVisitor<'a> {
struct ArchivalVisitor<'a> {
repository: &'a gix::Repository,
res: tokio::sync::mpsc::Sender<Result<Bytes, anyhow::Error>>,
archive: Builder<GzEncoder<Writer<BytesMut>>>,
}
impl PathVisitorHandler for ArchivalVisitor<'_> {
fn visit(&mut self, entry: &EntryRef<'_>, path: &BStr) -> Action {
let entry = entry.attach(self.repository);
let Ok(object) = entry.object() else {
return Action::Continue;
};
if object.kind != Kind::Blob {
return Action::Continue;
}
let blob = object.into_blob();
let mut header = tar::Header::new_gnu();
if let Err(error) = header.set_path(path.to_path_lossy()) {
warn!(%error, "Attempted to write invalid path to archive");
return Action::Continue;
}
header.set_size(blob.data.len() as u64);
#[allow(clippy::cast_sign_loss)]
header.set_mode(entry.mode().0.into());
header.set_cksum();
if let Err(error) = self.archive.append(&header, blob.data.as_slice()) {
warn!(%error, "Failed to append to archive");
return Action::Cancel;
}
if self.archive.get_ref().get_ref().get_ref().len() >= BUFFER_CAP {
let b = self.archive.get_mut().get_mut().get_mut().split().freeze();
if self.res.blocking_send(Ok(b)).is_err() {
return Action::Cancel;
}
}
Action::Continue
}
}
pub struct PathVisitor<T> {
path_deque: VecDeque<BString>,
path: BString,
inner: T,
}
impl<T> PathVisitor<T> {
pub fn new(inner: T) -> Self {
Self {
path_deque: VecDeque::new(),
path: BString::default(),
inner,
}
}
pub fn into_inner(self) -> T {
self.inner
}
impl ArchivalVisitor<'_> {
fn pop_element(&mut self) {
if let Some(pos) = memchr::memrchr(b'/', &self.path) {
self.path.resize(pos, 0);
@@ -533,6 +497,9 @@
}
fn push_element(&mut self, name: &BStr) {
if name.is_empty() {
return;
}
if !self.path.is_empty() {
self.path.push(b'/');
}
@@ -540,7 +507,7 @@
}
}
impl gix::traverse::tree::Visit for ArchivalVisitor<'_> {
impl<T: PathVisitorHandler> gix::traverse::tree::Visit for PathVisitor<T> {
fn pop_front_tracked_path_and_set_current(&mut self) {
self.path = self
.path_deque
@@ -549,10 +516,7 @@
}
fn pop_back_tracked_path_and_set_current(&mut self) {
self.path = self
.path_deque
.pop_back()
.expect("every call is matched with push_tracked_path_component");
self.path = self.path_deque.pop_back().unwrap_or_default();
}
fn push_back_tracked_path_component(&mut self, component: &BStr) {
@@ -568,47 +532,12 @@
self.pop_element();
}
fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> Action {
Action::Continue
fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action {
self.inner.visit(entry, self.path.as_ref())
}
fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action {
let entry = entry.attach(self.repository);
let Ok(object) = entry.object() else {
return Action::Continue;
};
if object.kind != Kind::Blob {
return Action::Continue;
}
let blob = object.into_blob();
let mut header = tar::Header::new_gnu();
if let Err(error) = header.set_path(self.path.to_path_lossy()) {
warn!(%error, "Attempted to write invalid path to archive");
return Action::Continue;
}
header.set_size(blob.data.len() as u64);
#[allow(clippy::cast_sign_loss)]
header.set_mode(entry.mode().0.into());
header.set_cksum();
if let Err(error) = self.archive.append(&header, blob.data.as_slice()) {
warn!(%error, "Failed to append to archive");
return Action::Cancel;
}
if self.archive.get_ref().get_ref().get_ref().len() >= BUFFER_CAP {
let b = self.archive.get_mut().get_mut().get_mut().split().freeze();
if self.res.blocking_send(Ok(b)).is_err() {
return Action::Cancel;
}
}
Action::Continue
self.inner.visit(entry, self.path.as_ref())
}
}
@@ -634,47 +563,11 @@
pub enum ReadmeFormat {
Markdown,
Plaintext,
}
pub enum PathDestination {
Tree(Vec<TreeItem>),
File(FileWithContent),
}
pub enum TreeItem {
Tree(Tree),
File(File),
Submodule(Submodule),
}
#[derive(Debug)]
pub struct Submodule {
pub mode: u16,
pub name: String,
pub url: Url,
pub oid: ObjectId,
}
#[derive(Debug)]
pub struct Tree {
pub mode: u16,
pub name: String,
pub children: PathBuf,
pub path: PathBuf,
}
#[derive(Debug)]
pub struct File {
pub mode: u16,
pub size: usize,
pub name: String,
pub path: PathBuf,
}
#[derive(Debug)]
#[allow(unused)]
pub struct FileWithContent {
pub metadata: File,
pub content: Content,
}
@@ -23,7 +23,10 @@
};
use clap::Parser;
use const_format::formatcp;
use database::schema::SCHEMA_VERSION;
use database::schema::{
prefixes::{TREE_FAMILY, TREE_ITEM_FAMILY},
SCHEMA_VERSION,
};
use rocksdb::{Options, SliceTransform};
use tokio::{
net::TcpListener,
@@ -257,6 +260,11 @@
tag_family_options.set_prefix_extractor(SliceTransform::create_fixed_prefix(
std::mem::size_of::<u64>(),
));
let mut tree_item_family_options = Options::default();
tree_item_family_options.set_prefix_extractor(SliceTransform::create_fixed_prefix(
std::mem::size_of::<u64>() + std::mem::size_of::<usize>(),
));
let db = rocksdb::DB::open_cf_with_opts(
&db_options,
@@ -267,6 +275,8 @@
(TAG_FAMILY, tag_family_options),
(REFERENCE_FAMILY, Options::default()),
(COMMIT_COUNT_FAMILY, Options::default()),
(TREE_FAMILY, Options::default()),
(TREE_ITEM_FAMILY, tree_item_family_options),
],
)?;
@@ -1,5 +1,5 @@
use std::{
collections::HashSet,
collections::{BTreeMap, HashSet},
ffi::OsStr,
fmt::Debug,
io::{BufRead, BufReader},
@@ -8,18 +8,30 @@
};
use anyhow::Context;
use gix::{bstr::ByteSlice, refs::Category, Reference};
use gix::{
bstr::{BStr, ByteSlice},
objs::tree::EntryKind,
refs::Category,
url::Scheme,
ObjectId, Reference, Url,
};
use itertools::{Either, Itertools};
use rocksdb::WriteBatch;
use time::{OffsetDateTime, UtcOffset};
use tracing::{error, info, info_span, instrument, warn};
use crate::database::schema::{
commit::Commit,
repository::{ArchivedRepository, Repository, RepositoryId},
tag::{Tag, TagTree},
use xxhash_rust::xxh3::Xxh3;
use crate::{
database::schema::{
commit::Commit,
repository::{ArchivedRepository, Repository, RepositoryId},
tag::{Tag, TagTree},
},
git::{PathVisitor, PathVisitorHandler},
};
use super::schema::tree::{Tree, TreeItem, TreeItemKind};
pub fn run(scan_path: &Path, repository_list: Option<&Path>, db: &Arc<rocksdb::DB>) {
let span = info_span!("index_update");
let _entered = span.enter();
@@ -157,6 +169,18 @@
Ok(v) => v,
Err(error) => {
error!(%error, "Failed to read references for {relative_path}");
continue;
}
};
let submodules = match git_repository.submodules() {
Ok(submodules) => submodules
.into_iter()
.flatten()
.filter_map(|v| Some((v.name().to_path_lossy().to_path_buf(), v.url().ok()?)))
.collect::<BTreeMap<_, _>>(),
Err(error) => {
error!(%error, "Failed to read submodules for {relative_path}");
continue;
}
};
@@ -189,6 +213,7 @@
db.clone(),
&git_repository,
false,
&submodules,
) {
error!(%error, "Failed to update reflog for {relative_path}@{:?}", valid_references.last());
}
@@ -208,6 +233,7 @@
db: Arc<rocksdb::DB>,
git_repository: &gix::Repository,
force_reindex: bool,
submodules: &BTreeMap<PathBuf, Url>,
) -> Result<(), anyhow::Error> {
info!("Refreshing indexes");
@@ -238,6 +264,8 @@
.into_iter()
.rev();
let mut hasher = Xxh3::new();
let tree_len = commit_tree.len()?;
let mut seen = false;
let mut i = 0;
@@ -266,12 +294,16 @@
let commit = commit.decode()?;
let author = commit.author();
let committer = commit.committer();
let tree = git_repository.find_tree(commit.tree())?;
let tree_id = index_tree(&db, &mut batch, &tree, &mut hasher, submodules)?;
Commit::new(oid, &commit, author, committer)?.insert(
Commit::new(oid, &commit, author, committer, tree_id)?.insert(
&commit_tree,
tree_len + i,
&mut batch,
)?;
i += 1;
}
@@ -289,10 +321,117 @@
db,
git_repository,
true,
submodules,
);
}
Ok(())
}
fn index_tree(
database: &rocksdb::DB,
batch: &mut WriteBatch,
tree: &gix::Tree<'_>,
hasher: &mut Xxh3,
submodules: &BTreeMap<PathBuf, Url>,
) -> Result<u64, anyhow::Error> {
hasher.reset();
tree.traverse()
.breadthfirst(&mut PathVisitor::new(TreeHasherVisitor { hasher }))?;
let digest = hasher.digest();
if !TreeItem::contains(database, digest)? {
tree.traverse()
.breadthfirst(&mut PathVisitor::new(TreeItemIndexerVisitor {
buffer: Vec::new(),
digest,
database,
batch,
submodules,
}))?;
}
Tree {
indexed_tree_id: digest,
}
.insert(database, batch, tree.id)?;
Ok(digest)
}
struct TreeHasherVisitor<'a> {
hasher: &'a mut Xxh3,
}
impl PathVisitorHandler for TreeHasherVisitor<'_> {
fn visit(
&mut self,
entry: &gix::objs::tree::EntryRef<'_>,
path: &BStr,
) -> gix::traverse::tree::visit::Action {
self.hasher.update(path);
self.hasher.update(&entry.mode.to_ne_bytes());
gix::traverse::tree::visit::Action::Continue
}
}
struct TreeItemIndexerVisitor<'a> {
digest: u64,
buffer: Vec<u8>,
database: &'a rocksdb::DB,
batch: &'a mut WriteBatch,
submodules: &'a BTreeMap<PathBuf, Url>,
}
impl PathVisitorHandler for TreeItemIndexerVisitor<'_> {
fn visit(
&mut self,
entry: &gix::objs::tree::EntryRef<'_>,
path: &BStr,
) -> gix::traverse::tree::visit::Action {
let kind = match entry.mode.kind() {
EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link => TreeItemKind::File,
EntryKind::Commit => {
let Some(mut url) = self
.submodules
.get(&path.to_path_lossy().into_owned())
.cloned()
else {
return gix::traverse::tree::visit::Action::Continue;
};
if matches!(url.scheme, Scheme::Git | Scheme::Ssh) {
url.scheme = Scheme::Https;
}
TreeItemKind::Submodule(match entry.oid.to_owned() {
ObjectId::Sha1(oid) => super::schema::tree::Submodule {
url: url.to_string(),
oid,
},
})
}
EntryKind::Tree => TreeItemKind::Tree,
};
TreeItem {
mode: entry.mode.0,
kind,
}
.insert(
&mut self.buffer,
self.digest,
path,
self.database,
self.batch,
)
.expect("failed to insert TreeItem");
gix::traverse::tree::visit::Action::Continue
}
}
#[instrument(skip(db))]
@@ -309,6 +448,18 @@
let Some(git_repository) = open_repo(scan_path, &relative_path, db_repository.get(), &db)
else {
continue;
};
let submodules = match git_repository.submodules() {
Ok(submodules) => submodules
.into_iter()
.flatten()
.filter_map(|v| Some((v.name().to_path_lossy().to_path_buf(), v.url().ok()?)))
.collect::<BTreeMap<_, _>>(),
Err(error) => {
error!(%error, "Failed to read submodules for {relative_path}");
continue;
}
};
if let Err(error) = tag_index_scan(
@@ -316,6 +467,7 @@
db_repository.get(),
db.clone(),
&git_repository,
&submodules,
) {
error!(%error, "Failed to update tags for {relative_path}");
}
@@ -328,6 +480,7 @@
db_repository: &ArchivedRepository,
db: Arc<rocksdb::DB>,
git_repository: &gix::Repository,
submodules: &BTreeMap<PathBuf, Url>,
) -> Result<(), anyhow::Error> {
let tag_tree = db_repository.tag_tree(db);
@@ -343,7 +496,7 @@
for tag_name in git_tags.difference(&indexed_tags) {
tag_index_update(tag_name, git_repository, &tag_tree)?;
tag_index_update(tag_name, git_repository, &tag_tree, submodules)?;
}
@@ -360,15 +513,31 @@
tag_name: &str,
git_repository: &gix::Repository,
tag_tree: &TagTree,
submodules: &BTreeMap<PathBuf, Url>,
) -> Result<(), anyhow::Error> {
let mut reference = git_repository
.find_reference(tag_name)
.context("Failed to read newly discovered tag")?;
let tree_id = if let Ok(tree) = reference.peel_to_tree() {
let mut batch = WriteBatch::default();
let tree_id = index_tree(
&tag_tree.db,
&mut batch,
&tree,
&mut Xxh3::new(),
submodules,
)?;
tag_tree.db.write_without_wal(batch)?;
Some(tree_id)
} else {
None
};
if let Ok(tag) = reference.peel_to_tag() {
info!("Inserting newly discovered tag to index");
Tag::new(tag.tagger()?)?.insert(tag_tree, tag_name)?;
Tag::new(tag.tagger()?, tree_id)?.insert(tag_tree, tag_name)?;
}
Ok(())
@@ -420,7 +589,7 @@
discovered_repos: &mut Vec<(PathBuf, gix::Repository)>,
) {
let dirs = if let Some(repo_list) = repository_list {
let mut repo_list = match std::fs::File::open(&repo_list) {
let repo_list = match std::fs::File::open(repo_list) {
Ok(v) => BufReader::new(v).lines(),
Err(error) => {
error!(%error, "Failed to open repository list file");
@@ -430,7 +599,7 @@
let mut out = Vec::new();
while let Some(line) = repo_list.next() {
for line in repo_list {
let line = match line {
Ok(v) => v,
Err(error) => {
@@ -37,8 +37,8 @@
.convert((OffsetDateTime::now_utc() - s.into().0).try_into().unwrap()))
}
pub fn file_perms(s: &u16) -> Result<String, askama::Error> {
Ok(unix_mode::to_string(u32::from(*s)))
pub fn file_perms(s: u16) -> Result<String, askama::Error> {
Ok(unix_mode::to_string(u32::from(s)))
}
pub struct DisplayHexBuffer<const N: usize>(pub const_hex::Buffer<N>);
@@ -1,52 +1,47 @@
{% import "macros/breadcrumbs.html" as breadcrumbs %}
{% extends "repo/base.html" %}
{% block tree_nav_class %}active{% endblock %}
{% block subnav %}
{% call breadcrumbs::breadcrumbs(repo_path, query) %}
{% call breadcrumbs::breadcrumbs(repo_path, query) %}
{% endblock %}
{% block content %}
<div class="table-responsive">
<table class="repositories">
<table class="repositories">
<thead>
<tr>
<tr>
<th style="width: 10rem;">Mode</th>
<th>Name</th>
<th>Size</th>
</tr>
</tr>
</thead>
<tbody>
{% for item in items -%}
<tr>
{% match item -%}
{%- when crate::git::TreeItem::Tree with (tree) -%}
<td><pre>{{ tree.mode|file_perms }}</pre></td>
<td><pre><a class="nested-tree" href="/{{ repo.display() }}/tree/{{ tree.path.display() }}{{ query }}">{{ tree.name }}</a>
{%- for child in tree.children.ancestors().collect_vec().into_iter().rev() -%}
{%- if let Some(file_name) = child.file_name() %} / <a class="nested-tree" href="/{{ repo.display() }}/tree/{{ tree.path.display() }}/{{ child.display() }}{{ query }}">{{ file_name.to_string_lossy() }}</a>{%- endif -%}
{%- endfor -%}
</pre></td>
<td></td>
<td></td>
{%- when crate::git::TreeItem::File with (file) -%}
<td><pre>{{ file.mode|file_perms }}</pre></td>
<td><pre><a href="/{{ repo.display() }}/tree/{{ file.path.display() }}{{ query }}">{{ file.name }}</a></pre></td>
<td><pre>{{ file.size }}</pre></td>
<td></td>
{%- when crate::git::TreeItem::Submodule with (submodule) -%}
<td><pre>{{ submodule.mode|file_perms }}</pre></td>
<td><pre>🔗 <a href="{{ submodule.url }}">{{ submodule.name }}</a> @ {{ submodule.oid.to_hex_with_len(7) }}</pre></td>
<td></td>
<td></td>
{% for (name, name_split, item) in items -%}
<tr>
<td>
<pre>{{ item.get().mode.to_native()|file_perms }}</pre>
</td>
{% set local_name = name.get()[*name_split..] -%}
{% set local_name = local_name.strip_prefix('/').unwrap_or(local_name) -%}
{% match item.get().kind -%}
{%- when ArchivedTreeItemKind::Tree -%}
<td>
<pre><a class="nested-tree" href="/{{ repo.display() }}/tree/{{ name.get() }}{{ query }}">{{ local_name }}</a></pre>
</td>
{%- when ArchivedTreeItemKind::File -%}
<td>
<pre><a href="/{{ repo.display() }}/tree/{{ name.get() }}{{ query }}">{{ local_name }}</a></pre>
</td>
{%- when ArchivedTreeItemKind::Submodule with (submodule) -%}
<td>
<pre>🔗 <a href="{{ submodule.url }}">{{ local_name }}</a> @ {{ submodule.oid|hex }}</pre>
</td>
{%- endmatch %}
</tr>
{% endfor -%}
</tr>
{% endfor -%}
</tbody>
</table>
</table>
</div>
{% endblock %}
@@ -21,6 +21,7 @@
pub author: Author,
pub committer: Author,
pub hash: [u8; 20],
pub tree: u64,
}
impl Commit {
@@ -29,6 +30,7 @@
commit: &CommitRef<'_>,
author: SignatureRef<'_>,
committer: SignatureRef<'_>,
tree: u64,
) -> Result<Self, anyhow::Error> {
let message = commit.message();
@@ -40,6 +42,7 @@
hash: match oid {
ObjectId::Sha1(d) => d,
},
tree,
})
}
@@ -6,7 +6,8 @@
pub mod prefixes;
pub mod repository;
pub mod tag;
pub mod tree;
pub type Yoked<T> = Yoke<T, Box<[u8]>>;
pub const SCHEMA_VERSION: &str = "3";
pub const SCHEMA_VERSION: &str = "4";
@@ -1,5 +1,7 @@
pub const COMMIT_FAMILY: &str = "commit";
pub const COMMIT_COUNT_FAMILY: &str = "commit_count";
pub const REPOSITORY_FAMILY: &str = "repository";
pub const TAG_FAMILY: &str = "tag";
pub const REFERENCE_FAMILY: &str = "repository_refs";
pub const TREE_FAMILY: &str = "tree";
pub const TREE_ITEM_FAMILY: &str = "tree_item";
@@ -15,12 +15,17 @@
#[derive(Serialize, Archive, Debug, Yokeable)]
pub struct Tag {
pub tagger: Option<Author>,
pub tree_id: Option<u64>,
}
impl Tag {
pub fn new(tagger: Option<SignatureRef<'_>>) -> Result<Self, anyhow::Error> {
pub fn new(
tagger: Option<SignatureRef<'_>>,
tree_id: Option<u64>,
) -> Result<Self, anyhow::Error> {
Ok(Self {
tagger: tagger.map(TryFrom::try_from).transpose()?,
tree_id,
})
}
@@ -30,7 +35,7 @@
}
pub struct TagTree {
db: Arc<rocksdb::DB>,
pub db: Arc<rocksdb::DB>,
prefix: RepositoryId,
}
@@ -1,0 +1,191 @@
use anyhow::Context;
use gix::{bstr::BStr, ObjectId};
use itertools::{Either, Itertools};
use rkyv::{Archive, Serialize};
use rocksdb::{WriteBatch, DB};
use yoke::{Yoke, Yokeable};
use super::{
prefixes::{TREE_FAMILY, TREE_ITEM_FAMILY},
Yoked,
};
#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash)]
pub struct Tree {
pub indexed_tree_id: u64,
}
impl Tree {
pub fn insert(
&self,
database: &DB,
batch: &mut WriteBatch,
tree_oid: ObjectId,
) -> Result<(), anyhow::Error> {
let cf = database
.cf_handle(TREE_FAMILY)
.context("tree column family missing")?;
batch.put_cf(
cf,
tree_oid.as_slice(),
rkyv::to_bytes::<rkyv::rancor::Error>(self)?,
);
Ok(())
}
pub fn find(database: &DB, tree_oid: ObjectId) -> Result<Option<u64>, anyhow::Error> {
let cf = database
.cf_handle(TREE_FAMILY)
.context("tree column family missing")?;
let Some(data) = database.get_pinned_cf(cf, tree_oid.as_slice())? else {
return Ok(None);
};
let data = rkyv::access::<<Self as Archive>::Archived, rkyv::rancor::Error>(data.as_ref())?;
Ok(Some(data.indexed_tree_id.to_native()))
}
}
#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash)]
pub struct Submodule {
pub url: String,
pub oid: [u8; 20],
}
#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash)]
pub enum TreeItemKind {
Submodule(Submodule),
Tree,
File,
}
#[derive(Serialize, Archive, Debug, PartialEq, Eq, Hash, Yokeable)]
pub struct TreeItem {
pub mode: u16,
pub kind: TreeItemKind,
}
pub type YokedTreeItem = Yoked<&'static <TreeItem as Archive>::Archived>;
pub type YokedTreeItemKey = Yoked<&'static [u8]>;
pub type YokedTreeItemKeyUtf8 = Yoked<&'static str>;
impl TreeItem {
pub fn insert(
&self,
buffer: &mut Vec<u8>,
digest: u64,
path: &BStr,
database: &DB,
batch: &mut WriteBatch,
) -> Result<(), anyhow::Error> {
let cf = database
.cf_handle(TREE_ITEM_FAMILY)
.context("tree column family missing")?;
buffer.clear();
buffer.reserve(std::mem::size_of::<u64>() + path.len() + std::mem::size_of::<usize>());
buffer.extend_from_slice(&digest.to_ne_bytes());
buffer.extend_from_slice(&memchr::memchr_iter(b'/', path).count().to_be_bytes());
buffer.extend_from_slice(path.as_ref());
batch.put_cf(cf, &buffer, rkyv::to_bytes::<rkyv::rancor::Error>(self)?);
Ok(())
}
pub fn find_exact(
database: &DB,
digest: u64,
path: &[u8],
) -> Result<Option<YokedTreeItem>, anyhow::Error> {
let cf = database
.cf_handle(TREE_ITEM_FAMILY)
.expect("tree column family missing");
let mut buffer = Vec::with_capacity(std::mem::size_of::<u64>() + path.len());
buffer.extend_from_slice(&digest.to_ne_bytes());
buffer.extend_from_slice(&memchr::memchr_iter(b'/', path).count().to_be_bytes());
buffer.extend_from_slice(path);
database
.get_cf(cf, buffer)?
.map(|data| {
Yoke::try_attach_to_cart(data.into_boxed_slice(), |data| {
rkyv::access::<_, rkyv::rancor::Error>(data)
})
})
.transpose()
.context("failed to parse tree item")
}
pub fn find_prefix<'a>(
database: &'a DB,
digest: u64,
prefix: &[u8],
) -> impl Iterator<Item = Result<(YokedTreeItemKey, YokedTreeItem), anyhow::Error>> + use<'a>
{
let cf = database
.cf_handle(TREE_ITEM_FAMILY)
.expect("tree column family missing");
let (iterator, key) = if prefix.is_empty() {
let mut buffer = [0_u8; std::mem::size_of::<u64>() + std::mem::size_of::<usize>()];
buffer[..std::mem::size_of::<u64>()].copy_from_slice(&digest.to_ne_bytes());
buffer[std::mem::size_of::<u64>()..].copy_from_slice(&0_usize.to_be_bytes());
let iterator = database.prefix_iterator_cf(cf, buffer);
(iterator, Either::Left(buffer))
} else {
let mut buffer = Vec::with_capacity(
std::mem::size_of::<u64>() + prefix.len() + std::mem::size_of::<usize>(),
);
buffer.extend_from_slice(&digest.to_ne_bytes());
buffer
.extend_from_slice(&(memchr::memchr_iter(b'/', prefix).count() + 1).to_be_bytes());
buffer.extend_from_slice(prefix);
buffer.push(b'/');
let iterator = database.prefix_iterator_cf(cf, &buffer);
(iterator, Either::Right(buffer))
};
iterator
.take_while(move |v| {
v.as_ref().is_ok_and(|(k, _)| {
k.starts_with(match key.as_ref() {
Either::Left(v) => v.as_ref(),
Either::Right(v) => v.as_ref(),
})
})
})
.map_ok(|(key, value)| {
let key = Yoke::attach_to_cart(key, |data| {
&data[std::mem::size_of::<u64>() + std::mem::size_of::<usize>()..]
});
let value = Yoke::try_attach_to_cart(value, |data| {
rkyv::access::<_, rkyv::rancor::Error>(data)
})
.context("Failed to open repository")?;
Ok((key, value))
})
.flatten()
}
pub fn contains(database: &DB, digest: u64) -> Result<bool, anyhow::Error> {
let cf = database
.cf_handle(TREE_ITEM_FAMILY)
.context("tree column family missing")?;
Ok(database
.prefix_iterator_cf(cf, digest.to_ne_bytes())
.next()
.transpose()?
.is_some())
}
}
@@ -275,6 +275,12 @@
}
}
impl From<Error> for anyhow::Error {
fn from(value: Error) -> Self {
value.0
}
}
impl IntoResponse for Error {
fn into_response(self) -> Response {
(StatusCode::INTERNAL_SERVER_ERROR, format!("{:?}", self.0)).into_response()
@@ -1,5 +1,7 @@
use anyhow::{bail, Context};
use askama::Template;
use axum::{extract::Query, response::IntoResponse, Extension};
use gix::ObjectId;
use itertools::Itertools;
use serde::Deserialize;
use std::path::PathBuf;
@@ -8,8 +10,11 @@
sync::Arc,
};
use crate::database::schema::tree::{
ArchivedTreeItemKind, Tree, TreeItem, YokedTreeItem, YokedTreeItemKeyUtf8,
};
use crate::{
git::{FileWithContent, PathDestination, TreeItem},
git::FileWithContent,
into_response,
methods::{
filters,
@@ -17,6 +22,8 @@
},
Git, ResponseEither,
};
use super::log::get_branch_commits;
#[derive(Deserialize)]
pub struct UriQuery {
@@ -49,7 +56,7 @@
#[allow(clippy::module_name_repetitions)]
pub struct TreeView {
pub repo: Repository,
pub items: Vec<TreeItem>,
pub items: Vec<(YokedTreeItemKeyUtf8, usize, YokedTreeItem)>,
pub query: UriQuery,
pub repo_path: PathBuf,
pub branch: Option<Arc<str>>,
@@ -62,6 +69,11 @@
pub repo_path: PathBuf,
pub file: FileWithContent,
pub branch: Option<Arc<str>>,
}
enum LookupResult {
RealPath,
Children(Vec<(YokedTreeItemKeyUtf8, usize, YokedTreeItem)>),
}
pub async fn handle(
@@ -69,26 +81,77 @@
Extension(RepositoryPath(repository_path)): Extension<RepositoryPath>,
Extension(ChildPath(child_path)): Extension<ChildPath>,
Extension(git): Extension<Arc<Git>>,
Extension(db): Extension<Arc<rocksdb::DB>>,
Query(query): Query<UriQuery>,
) -> Result<impl IntoResponse> {
let open_repo = git.repo(repository_path, query.branch.clone()).await?;
Ok(
match open_repo
.path(child_path.clone(), query.id.as_deref(), !query.raw)
.await?
{
PathDestination::Tree(items) => {
ResponseEither::Left(ResponseEither::Left(into_response(TreeView {
repo,
items,
branch: query.branch.clone(),
query,
repo_path: child_path.unwrap_or_default(),
})))
let (repo, query, child_path, lookup_result) = tokio::task::spawn_blocking(move || {
let tree_id = if let Some(id) = query.id.as_deref() {
let hex = const_hex::decode_to_array(id).context("Failed to parse tree hash")?;
Tree::find(&db, ObjectId::Sha1(hex))
.context("Failed to lookup tree")?
.context("Couldn't find tree with given id")?
} else {
let repository = crate::database::schema::repository::Repository::open(&db, &*repo)?
.context("Repository does not exist")?;
let commit = get_branch_commits(&repository, &db, query.branch.as_deref(), 1, 0)?
.into_iter()
.next()
.context("Branch not found")?;
commit.get().tree.to_native()
};
if let Some(path) = &child_path {
if let Some(item) =
TreeItem::find_exact(&db, tree_id, path.as_os_str().as_encoded_bytes())?
{
if let ArchivedTreeItemKind::File = item.get().kind {
return Ok((repo, query, child_path, LookupResult::RealPath));
}
}
PathDestination::File(file) if query.raw => ResponseEither::Right(file.content),
PathDestination::File(file) => {
}
let path = child_path
.as_ref()
.map(|v| v.as_os_str().as_encoded_bytes())
.unwrap_or_default();
let tree_items = TreeItem::find_prefix(&db, tree_id, path)
.filter_ok(|(k, _)| !k.get()[path.len()..].is_empty())
.filter_ok(|(k, _)| {
memchr::memrchr(b'/', &k.get()[path.len()..]).is_none_or(|v| v == 0)
})
.map_ok(|(k, v)| {
(
k.try_map_project(|v, _| simdutf8::basic::from_utf8(v))
.expect("invalid utf8"),
path.len(),
v,
)
})
.try_collect::<_, Vec<_>, _>()?;
if tree_items.is_empty() {
bail!("Path doesn't exist in tree");
}
Ok::<_, anyhow::Error>((repo, query, child_path, LookupResult::Children(tree_items)))
})
.await
.context("Failed to join on task")??;
Ok(match lookup_result {
LookupResult::RealPath => {
let open_repo = git.repo(repository_path, query.branch.clone()).await?;
let file = open_repo
.path(child_path.clone(), query.id.as_deref(), !query.raw)
.await?;
if query.raw {
ResponseEither::Right(file.content)
} else {
ResponseEither::Left(ResponseEither::Right(into_response(FileView {
repo,
file,
@@ -96,6 +159,15 @@
repo_path: child_path.unwrap_or_default(),
})))
}
},
)
}
LookupResult::Children(items) => {
ResponseEither::Left(ResponseEither::Left(into_response(TreeView {
repo,
items,
branch: query.branch.clone(),
query,
repo_path: child_path.unwrap_or_default(),
})))
}
})
}