From 74e31cb660c6126a1f913c3c28c727cf4b3feb40 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 18 May 2024 19:44:28 +0200 Subject: [PATCH] Avoid syntax highlighting overly long lines --- src/git.rs | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 151 insertions(+), 22 deletions(-) diff --git a/src/git.rs b/src/git.rs index 7f40cf3..6055553 100644 --- a/src/git.rs +++ a/src/git.rs @@ -1,6 +1,7 @@ use std::{ borrow::Cow, ffi::OsStr, + fmt, fmt::Write, path::{Path, PathBuf}, sync::Arc, @@ -18,8 +19,8 @@ use moka::future::Cache; use parking_lot::Mutex; use syntect::{ - html::{ClassStyle, ClassedHTMLGenerator}, parsing::SyntaxSet, + parsing::{BasicScopeStackOp, ParseState, Scope, ScopeStack, SCOPE_REPO}, util::LinesWithEndings, }; use time::OffsetDateTime; @@ -133,7 +134,12 @@ let content = match (formatted, blob.is_binary()) { (true, true) => Content::Binary(vec![]), (true, false) => Content::Text( - format_file(blob.content(), &extension, &self.git.syntax_set)?.into(), + format_file( + &String::from_utf8_lossy(blob.content()), + &extension, + &self.git.syntax_set, + )? + .into(), ), (false, true) => Content::Binary(blob.content().to_vec()), (false, false) => Content::Text( @@ -673,25 +679,151 @@ Ok((diff_plain.freeze(), diff_output, diff_stats)) } -fn format_file(content: &[u8], extension: &str, syntax_set: &SyntaxSet) -> Result { - let content = String::from_utf8_lossy(content); +fn format_file(content: &str, extension: &str, syntax_set: &SyntaxSet) -> Result { + let mut out = String::new(); + format_file_inner(&mut out, content, extension, syntax_set, true)?; + Ok(out) +} +// TODO: this is in some serious need of refactoring +fn format_file_inner( + out: &mut String, + content: &str, + extension: &str, + syntax_set: &SyntaxSet, + code_tag: bool, +) -> Result<()> { let syntax = syntax_set .find_syntax_by_extension(extension) .unwrap_or_else(|| syntax_set.find_syntax_plain_text()); - let mut html_generator = - ClassedHTMLGenerator::new_with_class_style(syntax, syntax_set, ClassStyle::Spaced); - - for line in LinesWithEndings::from(&content) { - html_generator - .parse_html_for_line_which_includes_newline(line) - .context("Couldn't parse line of file")?; + let mut parse_state = ParseState::new(syntax); + + let mut scope_stack = ScopeStack::new(); + let mut span_empty = false; + let mut span_start = 0; + let mut open_spans = Vec::new(); + + for line in LinesWithEndings::from(content) { + if code_tag { + out.push_str(""); + } + + if line.len() > 2048 { + // avoid highlighting overly complex lines + write!(out, "{}", Escape(line.trim_end()))?; + } else { + let mut cur_index = 0; + let ops = parse_state.parse_line(line, syntax_set)?; + out.reserve(line.len() + ops.len() * 8); + + if code_tag { + for scope in &open_spans { + out.push_str(""); + } + } + + // mostly copied from syntect, but slightly modified to keep track + // of open spans, so we can open and close them for each line + for &(i, ref op) in &ops { + if i > cur_index { + span_empty = false; + write!(out, "{}", Escape(&line[cur_index..i]))?; + cur_index = i; + } + + scope_stack.apply_with_hook(op, |basic_op, _| match basic_op { + BasicScopeStackOp::Push(scope) => { + span_start = out.len(); + span_empty = true; + out.push_str(""); + } + BasicScopeStackOp::Pop => { + open_spans.pop(); + if span_empty { + out.truncate(span_start); + } else { + out.push_str(""); + } + span_empty = false; + } + })?; + } + + let line = line.trim_end(); + if line.len() > cur_index { + write!(out, "{}", Escape(&line[cur_index..]))?; + } + + if code_tag { + for _scope in &open_spans { + out.push_str(""); + } + } + } + + if code_tag { + out.push_str("\n"); + } + } + + if !code_tag { + for _scope in &open_spans { + out.push_str(""); + } + } + + Ok(()) +} + +fn scope_to_classes(s: &mut String, scope: Scope) { + let repo = SCOPE_REPO.lock().unwrap(); + for i in 0..(scope.len()) { + let atom = scope.atom_at(i as usize); + let atom_s = repo.atom_str(atom); + if i != 0 { + s.push(' '); + } + s.push_str(atom_s); } +} - Ok(format!( - "{}", - html_generator.finalize().replace('\n', "\n") - )) +// Copied from syntect as it isn't exposed from there. +pub struct Escape<'a>(pub &'a str); + +impl<'a> fmt::Display for Escape<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let Escape(s) = *self; + let pile_o_bits = s; + let mut last = 0; + for (i, ch) in s.bytes().enumerate() { + match ch as char { + '<' | '>' | '&' | '\'' | '"' => { + fmt.write_str(&pile_o_bits[last..i])?; + let s = match ch as char { + '>' => ">", + '<' => "<", + '&' => "&", + '\'' => "'", + '"' => """, + _ => unreachable!(), + }; + fmt.write_str(s)?; + last = i + 1; + } + _ => {} + } + } + + if last < s.len() { + fmt.write_str(&pile_o_bits[last..])?; + } + Ok(()) + } } #[instrument(skip(diff, syntax_set))] @@ -722,16 +854,13 @@ } else { Cow::Borrowed("patch") }; - let syntax = syntax_set - .find_syntax_by_extension(&extension) - .unwrap_or_else(|| syntax_set.find_syntax_plain_text()); - let mut html_generator = - ClassedHTMLGenerator::new_with_class_style(syntax, syntax_set, ClassStyle::Spaced); - let _res = html_generator.parse_html_for_line_which_includes_newline(&line); + if let Some(class) = class { let _ = write!(diff_output, r#""#); } - diff_output.push_str(&html_generator.finalize()); + + let _res = format_file_inner(&mut diff_output, &line, &extension, syntax_set, false); + if class.is_some() { diff_output.push_str(""); } -- rgit 0.1.3