Skip to content

Commit

Permalink
diff: add option to display complex color-words diffs without inlining
Browse files Browse the repository at this point in the history
In this patch, I use the number of adds<->removes alternation as a threshold,
which approximates the visual complexity of diff hunks. I don't think user can
choose the threshold intuitively, but we need a config knob to try out some.
I set `max-inline-alternation = 3` locally. 0 and 1 mean "disable inlining"
and "inline adds-only/removes-only lines" respectively.

I've added "diff.<format>" config namespace assuming "ui.diff" will be
reorganized as "ui.diff-formatter" or something. #3327

Some other metrics I've tried:
```
// Per-line alternation. This also works well, but can't measure complexity of
// changes across lines.
fn count_max_diff_alternation_per_line(diff_lines: &[DiffLine]) -> usize {
    diff_lines
        .iter()
        .map(|line| {
            let sides = line.hunks.iter().map(|&(side, _)| side);
            sides
                .filter(|&side| side != DiffLineHunkSide::Both)
                .dedup() // omit e.g. left->both->left
                .count()
        })
        .max()
        .unwrap_or(0)
}

// Per-line occupancy of changes. Large diffs don't always look complex.
fn max_diff_token_ratio_per_line(diff_lines: &[DiffLine]) -> f32 {
    diff_lines
        .iter()
        .filter_map(|line| {
            let [both_len, left_len, right_len] =
                line.hunks.iter().fold([0, 0, 0], |mut acc, (side, data)| {
                    let index = match side {
                        DiffLineHunkSide::Both => 0,
                        DiffLineHunkSide::Left => 1,
                        DiffLineHunkSide::Right => 2,
                    };
                    acc[index] += data.len();
                    acc
                });
            // left/right-only change is readable
            (left_len != 0 && right_len != 0).then(|| {
                let diff_len = left_len + right_len;
                let total_len = both_len + left_len + right_len;
                (diff_len as f32) / (total_len as f32)
            })
        })
        .reduce(f32::max)
        .unwrap_or(0.0)
}

// Total occupancy of changes. Large diffs don't always look complex.
fn total_change_ratio(diff_lines: &[DiffLine]) -> f32 {
    let (diff_len, total_len) = diff_lines
        .iter()
        .flat_map(|line| &line.hunks)
        .fold((0, 0), |(diff_len, total_len), (side, data)| {
            let l = data.len();
            match side {
                DiffLineHunkSide::Both => (diff_len, total_len + l),
                DiffLineHunkSide::Left => (diff_len + l, total_len + l),
                DiffLineHunkSide::Right => (diff_len + l, total_len + l),
            }
        });
    (diff_len as f32) / (total_len as f32)
}
```
  • Loading branch information
yuja committed Aug 21, 2024
1 parent be9b7ed commit a83dadd
Show file tree
Hide file tree
Showing 7 changed files with 725 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
`--color-words`, `--git`, `--stat`, `--summary`, `--types`, and external diff
tools in file-by-file mode.

* Color-words diff has gained [an option to display complex changes as separate
lines](docs/config.md#color-words-diff-options).

* A tilde (`~`) at the start of the path will now be expanded to the user's home
directory when configuring a `signing.key` for SSH commit signing.

Expand Down
2 changes: 2 additions & 0 deletions cli/src/commit_templater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1345,8 +1345,10 @@ fn builtin_tree_diff_methods<'repo>() -> CommitTemplateBuildMethodFnMap<'repo, T
let path_converter = language.path_converter;
let template = (self_property, context_property)
.map(move |(diff, context)| {
// TODO: load defaults from UserSettings?
let options = diff_util::ColorWordsOptions {
context: context.unwrap_or(diff_util::DEFAULT_CONTEXT_LINES),
max_inline_alternation: None,
};
diff.into_formatted(move |formatter, store, tree_diff| {
diff_util::show_color_words_diff(
Expand Down
16 changes: 16 additions & 0 deletions cli/src/config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,22 @@
]
}
},
"diff": {
"type": "object",
"description": "Builtin diff formats settings",
"properties": {
"color-words": {
"type": "object",
"description": "Options for color-words diffs",
"properties": {
"max-inline-alternation": {
"type": "integer",
"description": "Maximum number of removed/added word alternation to inline"
}
}
}
}
},
"git": {
"type": "object",
"description": "Settings for git behavior (when using git backend)",
Expand Down
3 changes: 3 additions & 0 deletions cli/src/config/misc.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ amend = ["squash"]
co = ["checkout"]
unamend = ["unsquash"]

[diff.color-words]
max-inline-alternation = -1

[ui]
# TODO: delete ui.allow-filesets in jj 0.26+
allow-filesets = true
Expand Down
100 changes: 93 additions & 7 deletions cli/src/diff_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,15 +407,28 @@ fn collect_copied_sources<'a>(
pub struct ColorWordsOptions {
/// Number of context lines to show.
pub context: usize,
/// Maximum number of removed/added word alternation to inline.
pub max_inline_alternation: Option<usize>,
}

impl ColorWordsOptions {
fn from_settings_and_args(
_settings: &UserSettings,
settings: &UserSettings,
args: &DiffFormatArgs,
) -> Result<Self, config::ConfigError> {
let config = settings.config();
let max_inline_alternation = {
let key = "diff.color-words.max-inline-alternation";
match config.get_int(key)? {
-1 => None, // unlimited
n => Some(usize::try_from(n).map_err(|err| {
config::ConfigError::Message(format!("invalid {key}: {err}"))
})?),
}
};
Ok(ColorWordsOptions {
context: args.context.unwrap_or(DEFAULT_CONTEXT_LINES),
max_inline_alternation,
})
}
}
Expand Down Expand Up @@ -467,13 +480,35 @@ fn show_color_words_diff_hunks(
)?;
}
DiffHunk::Different(contents) => {
let word_diff = Diff::by_word(&contents);
let mut diff_line_iter =
DiffLineIterator::with_line_number(word_diff.hunks(), line_number);
for diff_line in diff_line_iter.by_ref() {
show_color_words_diff_line(formatter, &diff_line)?;
let word_diff_hunks = Diff::by_word(&contents).hunks().collect_vec();
let can_inline = match options.max_inline_alternation {
None => true, // unlimited
Some(0) => false, // no need to count alternation
Some(max_num) => {
let groups = split_diff_hunks_by_matching_newline(&word_diff_hunks);
groups.map(count_diff_alternation).max().unwrap_or(0) <= max_num
}
};
if can_inline {
let mut diff_line_iter =
DiffLineIterator::with_line_number(word_diff_hunks.iter(), line_number);
for diff_line in diff_line_iter.by_ref() {
show_color_words_diff_line(formatter, &diff_line)?;
}
line_number = diff_line_iter.next_line_number();
} else {
let (left_lines, right_lines) = unzip_diff_hunks_to_lines(&word_diff_hunks);
for tokens in &left_lines {
show_color_words_line_number(formatter, Some(line_number.left), None)?;
show_color_words_single_sided_line(formatter, tokens, "removed")?;
line_number.left += 1;
}
for tokens in &right_lines {
show_color_words_line_number(formatter, None, Some(line_number.right))?;
show_color_words_single_sided_line(formatter, tokens, "added")?;
line_number.right += 1;
}
}
line_number = diff_line_iter.next_line_number();
}
}
}
Expand Down Expand Up @@ -544,6 +579,7 @@ fn show_color_words_line_number(
Ok(())
}

/// Prints `diff_line` which may contain tokens originating from both sides.
fn show_color_words_diff_line(
formatter: &mut dyn Formatter,
diff_line: &DiffLine,
Expand Down Expand Up @@ -578,6 +614,56 @@ fn show_color_words_diff_line(
Ok(())
}

/// Prints left/right-only line tokens with the given label.
fn show_color_words_single_sided_line(
formatter: &mut dyn Formatter,
tokens: &[(DiffTokenType, &[u8])],
label: &str,
) -> io::Result<()> {
formatter.with_label(label, |formatter| show_diff_line_tokens(formatter, tokens))?;
let (_, data) = tokens.last().expect("diff line must not be empty");
if !data.ends_with(b"\n") {
writeln!(formatter)?;
};
Ok(())
}

/// Counts number of diff-side alternation, ignoring matching hunks.
///
/// This function is meant to measure visual complexity of diff hunks. It's easy
/// to read hunks containing some removed or added words, but is getting harder
/// as more removes and adds interleaved.
///
/// For example,
/// - `[matching]` -> 0
/// - `[left]` -> 1
/// - `[left, matching, left]` -> 1
/// - `[matching, left, right, matching, right]` -> 2
/// - `[left, right, matching, right, left]` -> 3
fn count_diff_alternation(diff_hunks: &[DiffHunk]) -> usize {
diff_hunks
.iter()
.filter_map(|hunk| match hunk {
DiffHunk::Matching(_) => None,
DiffHunk::Different(contents) => Some(contents),
})
// Map non-empty diff side to index (0: left, 1: right)
.flat_map(|contents| contents.iter().positions(|content| !content.is_empty()))
// Omit e.g. left->(matching->)*left
.dedup()
.count()
}

/// Splits hunks into slices of contiguous changed lines.
fn split_diff_hunks_by_matching_newline<'a, 'b>(
diff_hunks: &'a [DiffHunk<'b>],
) -> impl Iterator<Item = &'a [DiffHunk<'b>]> {
diff_hunks.split_inclusive(|hunk| match hunk {
DiffHunk::Matching(content) => content.contains(&b'\n'),
DiffHunk::Different(_) => false,
})
}

struct FileContent {
/// false if this file is likely text; true if it is likely binary.
is_binary: bool,
Expand Down
Loading

0 comments on commit a83dadd

Please sign in to comment.