Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

diff: minor cleanup of unchanged regions #4629

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 68 additions & 83 deletions lib/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#![allow(missing_docs)]

use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::hash::BuildHasher;
use std::hash::Hash;
Expand Down Expand Up @@ -528,20 +527,9 @@ impl UnchangedRange {
.collect();
UnchangedRange { base, others }
}
}

impl PartialOrd for UnchangedRange {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for UnchangedRange {
fn cmp(&self, other: &Self) -> Ordering {
self.base
.start
.cmp(&other.base.start)
.then_with(|| self.base.end.cmp(&other.base.end))
fn is_all_empty(&self) -> bool {
self.base.is_empty() && self.others.iter().all(|r| r.is_empty())
}
}

Expand All @@ -551,6 +539,10 @@ impl Ord for UnchangedRange {
pub struct Diff<'input> {
base_input: &'input BStr,
other_inputs: Vec<&'input BStr>,
/// Sorted list of ranges of unchanged regions in bytes.
///
/// The list should never be empty. The first and the last region may be
/// empty if inputs start/end with changes.
unchanged_regions: Vec<UnchangedRange>,
}

Expand Down Expand Up @@ -602,7 +594,7 @@ impl<'input> Diff<'input> {
let other_sources = iter::zip(&other_inputs, other_token_ranges)
.map(|(input, token_ranges)| DiffSource::new(input, token_ranges))
.collect_vec();
let mut unchanged_regions = match &*other_sources {
let unchanged_regions = match &*other_sources {
// Consider the whole range of the base input as unchanged compared
// to itself.
[] => {
Expand All @@ -615,6 +607,12 @@ impl<'input> Diff<'input> {
// Diff each other input against the base. Intersect the previously
// found ranges with the ranges in the diff.
[first_other_source, tail_other_sources @ ..] => {
let mut unchanged_regions = Vec::new();
// Add an empty range at the start to make life easier for hunks().
unchanged_regions.push(UnchangedRange {
base: 0..0,
others: vec![0..0; other_inputs.len()],
});
let mut first_positions = Vec::new();
collect_unchanged_words(
&mut first_positions,
Expand All @@ -623,17 +621,16 @@ impl<'input> Diff<'input> {
&comp,
);
if tail_other_sources.is_empty() {
first_positions
.iter()
.map(|&(base_pos, other_pos)| {
unchanged_regions.extend(first_positions.iter().map(
|&(base_pos, other_pos)| {
UnchangedRange::from_word_positions(
&base_source,
&other_sources,
base_pos,
&[other_pos],
)
})
.collect()
},
));
} else {
let first_positions = first_positions
.iter()
Expand All @@ -652,28 +649,28 @@ impl<'input> Diff<'input> {
intersect_unchanged_words(current_positions, &new_positions)
},
);
intersected_positions
.iter()
.map(|(base_pos, other_positions)| {
unchanged_regions.extend(intersected_positions.iter().map(
|(base_pos, other_positions)| {
UnchangedRange::from_word_positions(
&base_source,
&other_sources,
*base_pos,
other_positions,
)
})
.collect()
}
},
));
};
// Add an empty range at the end to make life easier for hunks().
unchanged_regions.push(UnchangedRange {
base: base_input.len()..base_input.len(),
others: other_inputs
.iter()
.map(|input| input.len()..input.len())
.collect(),
});
unchanged_regions
}
};
// Add an empty range at the end to make life easier for hunks().
unchanged_regions.push(UnchangedRange {
base: base_input.len()..base_input.len(),
others: other_inputs
.iter()
.map(|input| input.len()..input.len())
.collect(),
});

let mut diff = Self {
base_input,
Expand Down Expand Up @@ -709,16 +706,8 @@ impl<'input> Diff<'input> {
diff
}

pub fn hunks<'diff>(&'diff self) -> DiffHunkIterator<'diff, 'input> {
DiffHunkIterator {
diff: self,
previous: UnchangedRange {
base: 0..0,
others: vec![0..0; self.other_inputs.len()],
},
unchanged_emitted: true,
unchanged_iter: self.unchanged_regions.iter(),
}
pub fn hunks(&self) -> DiffHunkIterator<'_, 'input> {
DiffHunkIterator::new(self)
}

/// Returns contents at the unchanged `range`.
Expand Down Expand Up @@ -749,18 +738,15 @@ impl<'input> Diff<'input> {
tokenizer: impl Fn(&[u8]) -> Vec<Range<usize>>,
compare: impl CompareBytes,
) {
let mut previous = UnchangedRange {
base: 0..0,
others: vec![0..0; self.other_inputs.len()],
};
let mut new_unchanged_ranges = vec![];
for current in &self.unchanged_regions {
let mut new_unchanged_ranges = vec![self.unchanged_regions[0].clone()];
for window in self.unchanged_regions.windows(2) {
let [previous, current]: &[_; 2] = window.try_into().unwrap();
// For the changed region between the previous region and the current one,
// create a new Diff instance. Then adjust the start positions and
// offsets to be valid in the context of the larger Diff instance
// (`self`).
let refined_diff =
Diff::for_tokenizer(self.hunk_between(&previous, current), &tokenizer, &compare);
Diff::for_tokenizer(self.hunk_between(previous, current), &tokenizer, &compare);
for refined in &refined_diff.unchanged_regions {
let new_base_start = refined.base.start + previous.base.end;
let new_base_end = refined.base.end + previous.base.end;
Expand All @@ -772,14 +758,9 @@ impl<'input> Diff<'input> {
others: new_others,
});
}
previous = current.clone();
new_unchanged_ranges.push(current.clone());
}
self.unchanged_regions = self
.unchanged_regions
.iter()
.cloned()
.merge(new_unchanged_ranges)
.collect_vec();
self.unchanged_regions = new_unchanged_ranges;
self.compact_unchanged_regions();
}

Expand Down Expand Up @@ -845,40 +826,44 @@ pub enum DiffHunkKind {

pub struct DiffHunkIterator<'diff, 'input> {
diff: &'diff Diff<'input>,
previous: UnchangedRange,
previous: &'diff UnchangedRange,
unchanged_emitted: bool,
unchanged_iter: slice::Iter<'diff, UnchangedRange>,
}

impl<'diff, 'input> DiffHunkIterator<'diff, 'input> {
fn new(diff: &'diff Diff<'input>) -> Self {
let mut unchanged_iter = diff.unchanged_regions.iter();
let previous = unchanged_iter.next().unwrap();
DiffHunkIterator {
diff,
previous,
unchanged_emitted: previous.is_all_empty(),
unchanged_iter,
}
}
}

impl<'diff, 'input> Iterator for DiffHunkIterator<'diff, 'input> {
type Item = DiffHunk<'input>;

fn next(&mut self) -> Option<Self::Item> {
loop {
if !self.unchanged_emitted {
self.unchanged_emitted = true;
let contents = self.diff.hunk_at(&self.previous).collect_vec();
if contents.iter().any(|content| !content.is_empty()) {
let kind = DiffHunkKind::Matching;
return Some(DiffHunk { kind, contents });
}
}
if let Some(current) = self.unchanged_iter.next() {
let contents = self
.diff
.hunk_between(&self.previous, current)
.collect_vec();
self.previous = current.clone();
self.unchanged_emitted = false;
if contents.iter().any(|content| !content.is_empty()) {
let kind = DiffHunkKind::Different;
return Some(DiffHunk { kind, contents });
}
} else {
break;
}
if !self.unchanged_emitted {
self.unchanged_emitted = true;
let contents = self.diff.hunk_at(self.previous).collect_vec();
let kind = DiffHunkKind::Matching;
return Some(DiffHunk { kind, contents });
}
None
let current = self.unchanged_iter.next()?;
let contents = self.diff.hunk_between(self.previous, current).collect_vec();
debug_assert!(
contents.iter().any(|content| !content.is_empty()),
"unchanged regions should have been compacted"
);
self.previous = current;
self.unchanged_emitted = self.previous.is_all_empty();
let kind = DiffHunkKind::Different;
Some(DiffHunk { kind, contents })
}
}

Expand Down