From 10c9a133b7a02f3d81ee87e85196e49626ca3c2b Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 12:50:24 +0530 Subject: [PATCH 1/9] first implementation of myers diff [1] [1]: https://www.xmailserver.org/diff2.pdf --- benches/benches.rs | 32 ++++ src/lib.rs | 397 +++++++++++++++++++++++++++++++++++++-------- tests/tests.rs | 50 ++++++ 3 files changed, 407 insertions(+), 72 deletions(-) diff --git a/benches/benches.rs b/benches/benches.rs index 691d44c..bad40de 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -70,6 +70,22 @@ fn bench_real_world(c: &mut Criterion) { }) }); + c.bench_function( + "diff::myers::lines on gitignore files from rust-lang/rust", + |b| { + b.iter(|| { + for (i, left) in gitignores.iter().enumerate() { + // diff with previous 3, itself, and next 3 + for right in + gitignores[i.saturating_sub(3)..(i + 3).min(gitignores.len())].iter() + { + ::diff::myers::lines(&left, &right); + } + } + }) + }, + ); + c.bench_function("diff::chars on gitignore files from rust-lang/rust", |b| { b.iter(|| { for (i, left) in gitignores.iter().enumerate() { @@ -80,4 +96,20 @@ fn bench_real_world(c: &mut Criterion) { } }) }); + + c.bench_function( + "diff::myers::chars on gitignore files from rust-lang/rust", + |b| { + b.iter(|| { + for (i, left) in gitignores.iter().enumerate() { + // diff with previous 2, itself, and next 2 + for right in + gitignores[i.saturating_sub(2)..(i + 2).min(gitignores.len())].iter() + { + ::diff::myers::chars(&left, &right); + } + } + }) + }, + ); } diff --git a/src/lib.rs b/src/lib.rs index 5150744..29f168c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ #![forbid(unsafe_code)] +use std::marker::PhantomData; + /// A fragment of a computed diff. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Result { @@ -13,12 +15,54 @@ pub enum Result { /// Computes the diff between two slices. pub fn slice<'a, T: PartialEq>(left: &'a [T], right: &'a [T]) -> Vec> { - do_diff(left, right, |t| t) + do_slice::<_, LeadingTrailing>(left, right) } /// Computes the diff between the lines of two strings. pub fn lines<'a>(left: &'a str, right: &'a str) -> Vec> { - let mut diff = do_diff( + do_lines::>(left, right) +} + +/// Computes the diff between the chars of two strings. +pub fn chars<'a>(left: &'a str, right: &'a str) -> Vec> { + do_chars::>(left, right) +} + +/// Computes diffs using the Myers Diff algorithm [1]. +/// This algorithm uses O(M + N) memory and runs in expected O(M + N + D^2) +/// time, where N and M are the lengths of the inputs and D is the number of +/// differences. +/// In practice, this algorithm is faster than the naive algorithm if the inputs +/// have few differences, and always uses much less memory. +/// It is slower than the naive algorithm if the inputs have many differences, +/// which is why this implementation is provided in a separate module and not +/// the default. +/// [1]: https://www.xmailserver.org/diff2.pdf +pub mod myers { + use super::{LeadingTrailing, Myers, Result}; + + /// Computes the diff between two slices. + pub fn slice<'a, T: PartialEq>(left: &'a [T], right: &'a [T]) -> Vec> { + super::do_slice::<_, LeadingTrailing>(left, right) + } + + /// Computes the diff between the lines of two strings. + pub fn lines<'a>(left: &'a str, right: &'a str) -> Vec> { + super::do_lines::>(left, right) + } + + /// Computes the diff between the chars of two strings. + pub fn chars<'a>(left: &'a str, right: &'a str) -> Vec> { + super::do_chars::>(left, right) + } +} + +fn do_slice<'a, T: PartialEq, D: Diff>(left: &'a [T], right: &'a [T]) -> Vec> { + diff::<_, _, D>(left, right, |t| t) +} + +fn do_lines<'a, D: Diff>(left: &'a str, right: &'a str) -> Vec> { + let mut diff = diff::<_, _, D>( &left.lines().collect::>(), &right.lines().collect::>(), |str| *str, @@ -41,99 +85,308 @@ pub fn lines<'a>(left: &'a str, right: &'a str) -> Vec> { diff } -/// Computes the diff between the chars of two strings. -pub fn chars<'a>(left: &'a str, right: &'a str) -> Vec> { - do_diff( +fn do_chars<'a, D: Diff>(left: &'a str, right: &'a str) -> Vec> { + diff::<_, _, D>( &left.chars().collect::>(), &right.chars().collect::>(), |char| *char, ) } -fn do_diff<'a, T, F, U>(left: &'a [T], right: &'a [T], mapper: F) -> Vec> +trait Diff { + fn diff<'a, T, U, Mapper>( + left: &'a [T], + right: &'a [T], + mapper: &'_ Mapper, + diff: &'_ mut Vec>, + ) where + T: PartialEq, + Mapper: Fn(&'a T) -> U; +} + +fn diff<'a, T, U, D>(left: &'a [T], right: &'a [T], mapper: impl Fn(&'a T) -> U) -> Vec> where T: PartialEq, - F: Fn(&'a T) -> U, + D: Diff, { - let leading_equals = left - .iter() - .zip(right.iter()) - .take_while(|(l, r)| l == r) - .count(); - let trailing_equals = left[leading_equals..] - .iter() - .rev() - .zip(right[leading_equals..].iter().rev()) - .take_while(|(l, r)| l == r) - .count(); - let mut diff = Vec::with_capacity(left.len().max(right.len())); + D::diff(left, right, &mapper, &mut diff); + diff +} - diff.extend( - left[..leading_equals] - .iter() - .zip(&right[..leading_equals]) - .map(|(l, r)| Result::Both(mapper(l), mapper(r))), - ); +struct LeadingTrailing { + phantom: PhantomData, +} - do_naive_diff( - &left[leading_equals..left.len() - trailing_equals], - &right[leading_equals..right.len() - trailing_equals], - &mapper, - &mut diff, - ); +impl Diff for LeadingTrailing { + fn diff<'a, T, U, Mapper>( + left: &'a [T], + right: &'a [T], + mapper: &'_ Mapper, + diff: &'_ mut Vec>, + ) where + T: PartialEq, + Mapper: Fn(&'a T) -> U, + { + let leading_equals = left.iter().zip(right).take_while(|(l, r)| l == r).count(); - diff.extend( - left[left.len() - trailing_equals..] + let trailing_equals = left[leading_equals..] .iter() - .zip(&right[right.len() - trailing_equals..]) - .map(|(l, r)| Result::Both(mapper(l), mapper(r))), - ); + .rev() + .zip(right[leading_equals..].iter().rev()) + .take_while(|(l, r)| l == r) + .count(); - diff + diff.extend( + left[..leading_equals] + .iter() + .zip(right) + .map(|(l, r)| Result::Both(mapper(l), mapper(r))), + ); + + Inner::diff( + &left[leading_equals..left.len() - trailing_equals], + &right[leading_equals..right.len() - trailing_equals], + &mapper, + diff, + ); + + diff.extend( + left[left.len() - trailing_equals..] + .iter() + .zip(&right[right.len() - trailing_equals..]) + .map(|(l, r)| Result::Both(mapper(l), mapper(r))), + ); + } } -fn do_naive_diff<'a, T, F, U>(left: &'a [T], right: &'a [T], mapper: F, diff: &mut Vec>) -where - T: PartialEq, - F: Fn(&'a T) -> U, -{ - let mut table = Vec2::new(0u32, [left.len() + 1, right.len() + 1]); - - for (i, l) in left.iter().enumerate() { - for (j, r) in right.iter().enumerate() { - table.set( - [i + 1, j + 1], - if l == r { - table.get([i, j]) + 1 - } else { - *table.get([i, j + 1]).max(table.get([i + 1, j])) - }, - ); +struct Naive {} + +impl Diff for Naive { + fn diff<'a, T, U, Mapper>( + left: &'a [T], + right: &'a [T], + mapper: &'_ Mapper, + diff: &'_ mut Vec>, + ) where + T: PartialEq, + Mapper: Fn(&'a T) -> U, + { + let mut table = Vec2::new(0u32, [left.len() + 1, right.len() + 1]); + + for (i, l) in left.iter().enumerate() { + for (j, r) in right.iter().enumerate() { + table.set( + [i + 1, j + 1], + if l == r { + table.get([i, j]) + 1 + } else { + *table.get([i, j + 1]).max(table.get([i + 1, j])) + }, + ); + } } + + let start = diff.len(); + + let mut i = table.len[0] - 1; + let mut j = table.len[1] - 1; + loop { + if j > 0 && (i == 0 || table.get([i, j]) == table.get([i, j - 1])) { + j -= 1; + diff.push(Result::Right(mapper(&right[j]))); + } else if i > 0 && (j == 0 || table.get([i, j]) == table.get([i - 1, j])) { + i -= 1; + diff.push(Result::Left(mapper(&left[i]))); + } else if i > 0 && j > 0 { + i -= 1; + j -= 1; + diff.push(Result::Both(mapper(&left[i]), mapper(&right[j]))); + } else { + break; + } + } + + diff[start..].reverse(); } +} + +struct Snake { + d: usize, + x: usize, + y: usize, + u: usize, + v: usize, +} + +impl Snake { + fn find(left: &[T], right: &[T], forward: &mut [isize], backward: &mut [isize]) -> Self + where + T: PartialEq, + { + let (n, m) = (left.len() as isize, right.len() as isize); + + debug_assert!(forward.len() >= (n + m + 1) as usize); + debug_assert!(backward.len() >= (n + m + 1) as usize); + + let delta = n - m; + + let index = |k: isize| (k - 1).rem_euclid(n + m + 1) as usize; + + forward[index(1)] = 0; + backward[index(1)] = n; + + for d in 0..=((n + m + 1) / 2) { + for k in (-d..=d).step_by(2) { + let x = if k == -d || k != d && forward[index(k - 1)] < forward[index(k + 1)] { + forward[index(k + 1)] + } else { + forward[index(k - 1)] + 1 + }; + + let y = x - k; + + let (mut u, mut v) = (x, y); + + while u < n && v < m && left[u as usize] == right[v as usize] { + u += 1; + v += 1; + } + + forward[index(k)] = u; + + if delta.rem_euclid(2) == 1 + && (delta - (d - 1)..=delta + (d - 1)).contains(&k) + && backward[index(delta - k)] <= u + { + debug_assert!(d >= 1 && x >= 0 && y >= 0 && u >= 0 && v >= 0); - let start = diff.len(); - - let mut i = table.len[0] - 1; - let mut j = table.len[1] - 1; - loop { - if j > 0 && (i == 0 || table.get([i, j]) == table.get([i, j - 1])) { - j -= 1; - diff.push(Result::Right(mapper(&right[j]))); - } else if i > 0 && (j == 0 || table.get([i, j]) == table.get([i - 1, j])) { - i -= 1; - diff.push(Result::Left(mapper(&left[i]))); - } else if i > 0 && j > 0 { - i -= 1; - j -= 1; - diff.push(Result::Both(mapper(&left[i]), mapper(&right[j]))); - } else { - break; + return Snake { + d: (2 * d - 1) as usize, + x: x as usize, + y: y as usize, + u: u as usize, + v: v as usize, + }; + } + } + + for k in (-d..=d).step_by(2) { + let u = if k == -d || k != d && backward[index(k - 1)] > backward[index(k + 1)] { + backward[index(k + 1)] + } else { + backward[index(k - 1)] - 1 + }; + + let v = u + k - delta; + + let (mut x, mut y) = (u, v); + + while x > 0 && y > 0 && left[(x - 1) as usize] == right[(y - 1) as usize] { + x -= 1; + y -= 1; + } + + backward[index(k)] = x; + + if delta.rem_euclid(2) == 0 + && (-d..=d).contains(&(k - delta)) + && forward[index(delta - k)] >= x + { + debug_assert!(d >= 0 && x >= 0 && y >= 0 && u >= 0 && v >= 0); + + return Snake { + d: (2 * d) as usize, + x: x as usize, + y: y as usize, + u: u as usize, + v: v as usize, + }; + } + } } + + unreachable!() } +} - diff[start..].reverse(); +struct Myers {} + +impl Diff for Myers { + fn diff<'a, T, U, Mapper>( + left: &'a [T], + right: &'a [T], + mapper: &'_ Mapper, + diff: &'_ mut Vec>, + ) where + T: PartialEq, + Mapper: Fn(&'a T) -> U, + { + let mut buffer = vec![0; 2 * (left.len() + right.len() + 1)]; + + let (forward, backward) = buffer.split_at_mut(left.len() + right.len() + 1); + + recur(left, right, mapper, diff, forward, backward); + + fn recur<'a, T, U, Mapper>( + left: &'a [T], + right: &'a [T], + mapper: &Mapper, + diff: &mut Vec>, + forward: &mut [isize], + backward: &mut [isize], + ) where + T: PartialEq, + Mapper: Fn(&'a T) -> U, + { + let (n, m) = (left.len(), right.len()); + if n > 0 && m > 0 { + let snake = Snake::find(left, right, forward, backward); + + debug_assert_eq!(snake.u - snake.x, snake.v - snake.y); + + if snake.d > 1 { + recur( + &left[..snake.x], + &right[..snake.y], + mapper, + diff, + forward, + backward, + ); + diff.extend( + left[snake.x..snake.u] + .iter() + .zip(&right[snake.y..snake.v]) + .map(|(l, r)| Result::Both(mapper(l), mapper(r))), + ); + recur( + &left[snake.u..], + &right[snake.v..], + mapper, + diff, + forward, + backward, + ); + } else { + diff.extend( + left.iter() + .zip(right) + .map(|(l, r)| Result::Both(mapper(l), mapper(r))), + ); + if m > n { + diff.extend(right[n..].iter().map(|r| Result::Right(mapper(r)))); + } else { + diff.extend(left[m..].iter().map(|l| Result::Left(mapper(l)))); + } + } + } else if n > 0 { + diff.extend(left.iter().map(|l| Result::Left(mapper(l)))); + } else if m > 0 { + diff.extend(right.iter().map(|r| Result::Right(mapper(r)))); + } + } + } } struct Vec2 { diff --git a/tests/tests.rs b/tests/tests.rs index 7e5f28c..08994f5 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -59,6 +59,14 @@ fn test_slice() { let (left_, right_) = undiff(&diff); assert_eq!(left, &left_[..]); assert_eq!(right, &right_[..]); + + let diff2 = ::diff::myers::slice(left, right); + assert_eq!(diff2.len(), len); + let (left_, right_) = undiff(&diff2); + assert_eq!(left, &left_[..]); + assert_eq!(right, &right_[..]); + + assert_eq!(diff.len(), diff2.len()); } let slice: &[()] = &[]; @@ -95,6 +103,14 @@ fn test_slice_quickcheck() { } ::quickcheck::quickcheck(prop as fn(Vec, Vec) -> bool); + + fn prop_myers(left: Vec, right: Vec) -> bool { + let diff = ::diff::myers::slice(&left, &right); + let (left_, right_) = undiff(&diff); + left == left_[..] && right == right_[..] + } + + ::quickcheck::quickcheck(prop_myers as fn(Vec, Vec) -> bool); } #[test] @@ -105,6 +121,14 @@ fn test_lines() { let (left_, right_) = undiff_lines(&diff); assert_eq!(left, left_); assert_eq!(right, right_); + + let diff2 = ::diff::myers::lines(left, right); + assert_eq!(diff2.len(), len); + let (left_, right_) = undiff_lines(&diff2); + assert_eq!(left, left_); + assert_eq!(right, right_); + + assert_eq!(diff.len(), diff2.len()); } go("", "", 0); @@ -133,6 +157,14 @@ fn test_chars() { let (left_, right_) = undiff_chars(&diff); assert_eq!(left, left_); assert_eq!(right, right_); + + let diff2 = ::diff::myers::chars(left, right); + assert_eq!(diff2.len(), len); + let (left_, right_) = undiff_chars(&diff2); + assert_eq!(left, left_); + assert_eq!(right, right_); + + assert_eq!(diff.len(), diff2.len()); } go("", "", 0); @@ -149,6 +181,9 @@ fn test_chars() { fn test_issue_4() { assert_eq!(::diff::slice(&[1], &[2]), vec![Left(&1), Right(&2)]); assert_eq!(::diff::lines("a", "b"), vec![Left("a"), Right("b")]); + + assert_eq!(::diff::myers::slice(&[1], &[2]), vec![Left(&1), Right(&2)]); + assert_eq!(::diff::myers::lines("a", "b"), vec![Left("a"), Right("b")]); } #[test] @@ -190,6 +225,7 @@ BacktraceNode { ] }"#; ::diff::lines(actual, expected); + ::diff::myers::lines(actual, expected); } #[test] @@ -216,6 +252,20 @@ fn gitignores() { "tests/data/gitignores.chars.txt", ); + go( + &all, + ::diff::myers::lines, + undiff_lines, + "tests/data/gitignores.lines.txt", + ); + + go( + &all, + ::diff::myers::chars, + undiff_chars, + "tests/data/gitignores.chars.txt", + ); + fn go<'a, T, Diff, Undiff>(all: &'a [String], diff: Diff, undiff: Undiff, path: &str) where Diff: Fn(&'a str, &'a str) -> Vec<::diff::Result>, From eb843e55bc7d80add5bc0106ccbd329ddbceeb59 Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 17:00:35 +0530 Subject: [PATCH 2/9] allow diffing using myers with --myers in diff example --- examples/diff.rs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/diff.rs b/examples/diff.rs index 849c93c..04459e4 100644 --- a/examples/diff.rs +++ b/examples/diff.rs @@ -1,18 +1,28 @@ extern crate diff; fn main() { - let args = std::env::args().collect::>(); + let mut args = std::env::args().skip(1).collect::>(); - if args.len() != 3 { - println!("usage: cargo run --example diff "); + let myers = args.iter().any(|arg| arg == "--myers"); + + args.retain(|arg| arg != "--myers"); + + if args.len() != 2 { + println!("usage: cargo run --example diff [--myers] "); std::process::exit(1); } - let left = std::fs::read_to_string(&args[1]).unwrap(); - let right = std::fs::read_to_string(&args[2]).unwrap(); + let left = std::fs::read_to_string(&args[0]).unwrap(); + let right = std::fs::read_to_string(&args[1]).unwrap(); + + let diff = if myers { + diff::myers::lines(&left, &right) + } else { + diff::lines(&left, &right) + }; - for diff in diff::lines(&left, &right) { - match diff { + for d in diff { + match d { diff::Result::Left(l) => println!("-{}", l), diff::Result::Both(l, _) => println!(" {}", l), diff::Result::Right(r) => println!("+{}", r), From a4dfbfd8b2a8b65c892fd03bb19b13f0a732f8c1 Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 17:12:42 +0530 Subject: [PATCH 3/9] add char diffing to diff example with --chars --- examples/diff.rs | 54 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/examples/diff.rs b/examples/diff.rs index 04459e4..5d6b530 100644 --- a/examples/diff.rs +++ b/examples/diff.rs @@ -5,7 +5,9 @@ fn main() { let myers = args.iter().any(|arg| arg == "--myers"); - args.retain(|arg| arg != "--myers"); + let chars = args.iter().any(|arg| arg == "--chars"); + + args.retain(|arg| arg != "--myers" && arg != "--chars"); if args.len() != 2 { println!("usage: cargo run --example diff [--myers] "); @@ -15,17 +17,47 @@ fn main() { let left = std::fs::read_to_string(&args[0]).unwrap(); let right = std::fs::read_to_string(&args[1]).unwrap(); - let diff = if myers { - diff::myers::lines(&left, &right) + if chars { + let diff = if myers { + diff::myers::chars(&left, &right) + } else { + diff::chars(&left, &right) + }; + + let mut open = None; + + for d in diff { + match (d, open) { + (diff::Result::Left(l), Some("-]")) => print!("{}", l), + (diff::Result::Left(l), open_) => { + print!("{}[-{}", open_.unwrap_or(""), l); + open = Some("-]"); + } + (diff::Result::Right(r), Some("+}")) => print!("{}", r), + (diff::Result::Right(r), open_) => { + print!("{}{{+{}", open_.unwrap_or(""), r); + open = Some("+}"); + } + (diff::Result::Both(l, _), Some(open_)) => { + print!("{}{}", open_, l); + open = None; + } + (diff::Result::Both(l, _), None) => print!("{}", l), + } + } } else { - diff::lines(&left, &right) - }; - - for d in diff { - match d { - diff::Result::Left(l) => println!("-{}", l), - diff::Result::Both(l, _) => println!(" {}", l), - diff::Result::Right(r) => println!("+{}", r), + let diff = if myers { + diff::myers::lines(&left, &right) + } else { + diff::lines(&left, &right) + }; + + for d in diff { + match d { + diff::Result::Left(l) => println!("-{}", l), + diff::Result::Both(l, _) => println!(" {}", l), + diff::Result::Right(r) => println!("+{}", r), + } } } } From 704cd6f7bc326e0aa615b0dcf7d462dba85b03bd Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 17:38:48 +0530 Subject: [PATCH 4/9] "close" is a better name for this --- examples/diff.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/diff.rs b/examples/diff.rs index 5d6b530..561b358 100644 --- a/examples/diff.rs +++ b/examples/diff.rs @@ -24,23 +24,23 @@ fn main() { diff::chars(&left, &right) }; - let mut open = None; + let mut close = None; for d in diff { - match (d, open) { + match (d, close) { (diff::Result::Left(l), Some("-]")) => print!("{}", l), (diff::Result::Left(l), open_) => { print!("{}[-{}", open_.unwrap_or(""), l); - open = Some("-]"); + close = Some("-]"); } (diff::Result::Right(r), Some("+}")) => print!("{}", r), (diff::Result::Right(r), open_) => { print!("{}{{+{}", open_.unwrap_or(""), r); - open = Some("+}"); + close = Some("+}"); } (diff::Result::Both(l, _), Some(open_)) => { print!("{}{}", open_, l); - open = None; + close = None; } (diff::Result::Both(l, _), None) => print!("{}", l), } From a3c3ea66260c02b5554063257543b6accf63e454 Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 17:58:42 +0530 Subject: [PATCH 5/9] add better slice benchmarks that show the benefits of myers --- Cargo.toml | 3 ++- benches/benches.rs | 60 ++++++++++++++++++++++++---------------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 29736db..41254cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,9 @@ homepage = "https://github.com/utkarshkukreti/diff.rs" repository = "https://github.com/utkarshkukreti/diff.rs" [dev-dependencies] -quickcheck = "1.0.3" criterion = "0.5.1" +fastrand = "2.0.1" +quickcheck = "1.0.3" [[bench]] name = "benches" diff --git a/benches/benches.rs b/benches/benches.rs index bad40de..d8909de 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -7,40 +7,44 @@ criterion::criterion_group!(benches, bench_slice, bench_chars, bench_real_world) criterion::criterion_main!(benches); fn bench_slice(c: &mut Criterion) { - c.bench_function("empty", |b| { - let slice = [0u8; 0]; - b.iter(|| ::diff::slice(&slice, &slice)); - }); + let mut rng = fastrand::Rng::with_seed(0); - c.bench_function("10 equal items", |b| { - let slice = [0u8; 10]; - b.iter(|| ::diff::slice(&slice, &slice)); - }); + let left = (0..1000).map(|_| rng.u8(..)).collect::>(); - c.bench_function("10 non-equal items", |b| { - let (left, right) = ([0u8; 10], [1u8; 10]); - b.iter(|| ::diff::slice(&left, &right)); - }); + let swap_10 = swap(&left, 10, &mut rng); + let swap_50 = swap(&left, 50, &mut rng); + let swap_100 = swap(&left, 100, &mut rng); + let swap_500 = swap(&left, 500, &mut rng); + let swap_1000 = swap(&left, 1000, &mut rng); - c.bench_function("100 equal items", |b| { - let slice = [0u8; 100]; - b.iter(|| ::diff::slice(&slice, &slice)); - }); + for (name, vec) in [ + ("swap_10", &swap_10), + ("swap_50", &swap_50), + ("swap_100", &swap_100), + ("swap_500", &swap_500), + ("swap_1000", &swap_1000), + ] { + assert_eq!( + ::diff::slice(&left, vec).len(), + ::diff::myers::slice(&left, vec).len() + ); - c.bench_function("100 non-equal items", |b| { - let (left, right) = ([0u8; 100], [1u8; 100]); - b.iter(|| ::diff::slice(&left, &right)); - }); + c.bench_function(&format!("diff::slice {}", name), |b| { + b.iter(|| ::diff::slice(&left, &vec)); + }); - c.bench_function("1000 equal items", |b| { - let slice = [0u8; 1000]; - b.iter(|| ::diff::slice(&slice, &slice)); - }); + c.bench_function(&format!("diff::myers::slice {}", name), |b| { + b.iter(|| ::diff::myers::slice(&left, &vec)); + }); + } - c.bench_function("1000 non-equal items", |b| { - let (left, right) = ([0u8; 1000], [1u8; 1000]); - b.iter(|| ::diff::slice(&left, &right)); - }); + fn swap(slice: &[T], swaps: usize, rng: &mut fastrand::Rng) -> Vec { + let mut vec = slice.to_vec(); + for _ in 0..swaps { + vec.swap(rng.usize(..slice.len()), rng.usize(..slice.len())); + } + vec + } } fn bench_chars(c: &mut Criterion) { From 0abcbd5034fd1b8ea53e30e4d7fcc3d2a6d731f0 Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 18:00:45 +0530 Subject: [PATCH 6/9] update github workflows --- .github/workflows/main.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c601bff..627b565 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,11 +11,10 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] rust: [stable, beta, nightly] steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust }} - override: true components: clippy, rustfmt - run: cargo build - run: cargo test From 2206b48b53a9e5b25f97b0aa1dbe069613a38a10 Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 18:01:18 +0530 Subject: [PATCH 7/9] run benchmarks on github workflow on ubuntu --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 627b565..3cb12c9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,3 +22,5 @@ jobs: if: ${{ matrix.os == 'ubuntu-latest' }} - run: cargo clippy continue-on-error: true + - run: cargo bench + if: ${{ matrix.os == 'ubuntu-latest' }} From 9c929fa66192af39cbf518dcdebfc5f17f4eb296 Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 18:11:07 +0530 Subject: [PATCH 8/9] remove not useful diff::chars benchmark --- benches/benches.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/benches/benches.rs b/benches/benches.rs index d8909de..00fbce0 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -3,7 +3,7 @@ extern crate diff; use criterion::Criterion; -criterion::criterion_group!(benches, bench_slice, bench_chars, bench_real_world); +criterion::criterion_group!(benches, bench_slice, bench_real_world); criterion::criterion_main!(benches); fn bench_slice(c: &mut Criterion) { @@ -47,15 +47,6 @@ fn bench_slice(c: &mut Criterion) { } } -fn bench_chars(c: &mut Criterion) { - c.bench_function("1024 byte string, last 256 different", |b| { - let left = "?".repeat(768) + &"_".repeat(256); - let right = "?".repeat(768) + &"!".repeat(256); - assert_eq!(left.len(), right.len()); - b.iter(|| ::diff::chars(&left, &right)); - }); -} - fn bench_real_world(c: &mut Criterion) { let gitignores = std::fs::read_to_string("tests/data/gitignores.txt") .unwrap() From 9216dbfddf8a564f0227a32678522b306d60536a Mon Sep 17 00:00:00 2001 From: Utkarsh Kukreti Date: Sun, 4 Feb 2024 20:05:56 +0530 Subject: [PATCH 9/9] tweak benchmark name --- benches/benches.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/benches/benches.rs b/benches/benches.rs index 00fbce0..bc065b7 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -29,13 +29,18 @@ fn bench_slice(c: &mut Criterion) { ::diff::myers::slice(&left, vec).len() ); - c.bench_function(&format!("diff::slice {}", name), |b| { + let diffs = ::diff::slice(&left, vec).len() - left.len(); + + c.bench_function(&format!("diff::slice {} ({} diffs)", name, diffs), |b| { b.iter(|| ::diff::slice(&left, &vec)); }); - c.bench_function(&format!("diff::myers::slice {}", name), |b| { - b.iter(|| ::diff::myers::slice(&left, &vec)); - }); + c.bench_function( + &format!("diff::myers::slice {} ({} diffs)", name, diffs), + |b| { + b.iter(|| ::diff::myers::slice(&left, &vec)); + }, + ); } fn swap(slice: &[T], swaps: usize, rng: &mut fastrand::Rng) -> Vec {