diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs index 57ccc73ef32..56b92c067ee 100644 --- a/crates/edit/benches/lib.rs +++ b/crates/edit/benches/lib.rs @@ -10,9 +10,8 @@ use std::{mem, vec}; use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use edit::helpers::*; use edit::simd::MemsetSafe; -use edit::{buffer, hash, json, oklab, simd, unicode}; -use stdext::arena; -use stdext::arena::{Arena, scratch_arena}; +use edit::{buffer, glob, hash, json, oklab, simd, unicode}; +use stdext::arena::{self, Arena, scratch_arena}; struct EditingTracePatch<'a>(usize, usize, &'a str); @@ -136,6 +135,15 @@ fn bench_buffer(c: &mut Criterion) { }); } +fn bench_glob(c: &mut Criterion) { + // Same benchmark as in glob-match + const PATH: &str = "foo/bar/foo/bar/foo/bar/foo/bar/foo/bar.txt"; + const GLOB: &str = "foo/**/bar.txt"; + + c.benchmark_group("glob") + .bench_function("glob_match", |b| b.iter(|| assert!(glob::glob_match(GLOB, PATH)))); +} + fn bench_hash(c: &mut Criterion) { c.benchmark_group("hash") .throughput(Throughput::Bytes(8)) @@ -275,6 +283,7 @@ fn bench(c: &mut Criterion) { arena::init(128 * MEBI).unwrap(); bench_buffer(c); + bench_glob(c); bench_hash(c); bench_json(c); bench_oklab(c); diff --git a/crates/edit/src/glob.rs b/crates/edit/src/glob.rs new file mode 100644 index 00000000000..2a9a75adb4d --- /dev/null +++ b/crates/edit/src/glob.rs @@ -0,0 +1,273 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Simple glob matching. +//! +//! Supported patterns: +//! - `*` matches any characters except for path separators, including an empty string. +//! - `**` matches any characters, including an empty string. +//! For convenience, `/**/` also matches `/`. + +use std::path::is_separator; + +#[inline] +pub fn glob_match, N: AsRef<[u8]>>(pattern: P, name: N) -> bool { + glob(pattern.as_ref(), name.as_ref()) +} + +fn glob(pattern: &[u8], name: &[u8]) -> bool { + fast_path(pattern, name).unwrap_or_else(|| slow_path(pattern, name)) +} + +// Fast-pass for the most common patterns: +// * Matching files by extension (e.g., **/*.rs) +// * Matching files by name (e.g., **/Cargo.toml) +fn fast_path(pattern: &[u8], name: &[u8]) -> Option { + // In either case, the glob must start with "**/". + let mut suffix = pattern.strip_prefix(b"**/")?; + if suffix.is_empty() { + return None; + } + + // Determine whether it's "**/" or "**/*". + let mut needs_dir_anchor = true; + if let Some(s) = suffix.strip_prefix(b"*") { + suffix = s; + needs_dir_anchor = false; + } + + // Restrict down to anything we can handle with a suffix check. + if suffix.is_empty() || contains_magic(suffix) { + return None; + } + + Some( + match_path_suffix(name, suffix) + && ( + // In case of "**/*extension" a simple suffix match is sufficient. + !needs_dir_anchor + // But for "**/filename" we need to ensure that path is either "filename"... + || name.len() == suffix.len() + // ...or that it is ".../filename". + || is_separator(name[name.len() - suffix.len() - 1] as char) + ), + ) +} + +fn contains_magic(pattern: &[u8]) -> bool { + pattern.contains(&b'*') +} + +fn match_path_suffix(path: &[u8], suffix: &[u8]) -> bool { + if path.len() < suffix.len() { + return false; + } + + let path = &path[path.len() - suffix.len()..]; + + #[cfg(windows)] + { + path.iter().zip(suffix.iter()).all(|(a, b)| { + let a = if *a == b'\\' { b'/' } else { *a }; + let b = if *b == b'\\' { b'/' } else { *b }; + a.eq_ignore_ascii_case(&b) + }) + } + + #[cfg(not(windows))] + path.eq_ignore_ascii_case(suffix) +} + +// This code is based on https://research.swtch.com/glob.go +// It's not particularly fast, but it doesn't need to be. It doesn't run often. +#[cold] +fn slow_path(pattern: &[u8], name: &[u8]) -> bool { + let mut px = 0; + let mut nx = 0; + let mut next_px = 0; + let mut next_nx = 0; + let mut is_double_star = false; + + while px < pattern.len() || nx < name.len() { + if px < pattern.len() { + match pattern[px] { + b'*' => { + // Try to match at nx. If that doesn't work out, restart at nx+1 next. + next_px = px; + next_nx = nx + 1; + px += 1; + is_double_star = false; + + if px < pattern.len() && pattern[px] == b'*' { + px += 1; + is_double_star = true; + + // For convenience, /**/ also matches / + if px >= 3 + && px < pattern.len() + && pattern[px] == b'/' + && pattern[px - 3] == b'/' + { + px += 1; + } + } + continue; + } + c => { + if nx < name.len() && name[nx].eq_ignore_ascii_case(&c) { + px += 1; + nx += 1; + continue; + } + } + } + } + + // Mismatch. Maybe restart. + if next_nx > 0 + && next_nx <= name.len() + && (is_double_star || !is_separator(name[next_nx - 1] as char)) + { + px = next_px; + nx = next_nx; + continue; + } + + return false; + } + + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_glob_match() { + let tests = [ + // Test cases from https://research.swtch.com/glob.go + ("", "", true), + ("x", "", false), + ("", "x", false), + ("abc", "abc", true), + ("*", "abc", true), + ("*c", "abc", true), + ("*b", "abc", false), + ("a*", "abc", true), + ("b*", "abc", false), + ("a*", "a", true), + ("*a", "a", true), + ("a*b*c*d*e*", "axbxcxdxe", true), + ("a*b*c*d*e*", "axbxcxdxexxx", true), + ("*x", "xxx", true), + // Test cases from https://github.com/golang/go/blob/master/src/path/filepath/match_test.go + ("a*", "ab/c", false), + ("a*b", "a/b", false), + ("a*/b", "abc/b", true), + ("a*/b", "a/c/b", false), + ("a*b*c*d*e*/f", "axbxcxdxe/f", true), + ("a*b*c*d*e*/f", "axbxcxdxexxx/f", true), + ("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false), + ("a*b*c*d*e*/f", "axbxcxdxexxx/fff", false), + // Single star (*) + // - Empty string + ("*", "", true), + // - Anything else is covered above + // Double star (**) + // - Empty string + ("**", "", true), + ("a**", "a", true), + ("**a", "a", true), + // - Prefix + ("**", "abc", true), + ("**", "foo/baz/bar", true), + ("**c", "abc", true), + ("**b", "abc", false), + // - Infix + ("a**c", "ac", true), + ("a**c", "abc", true), + ("a**c", "abd", false), + ("a**d", "abc", false), + ("a**c", "a/bc", true), + ("a**c", "ab/c", true), + ("a**c", "a/b/c", true), + // -- Infix with left separator + ("a/**c", "ac", false), + ("a/**c", "a/c", true), + ("a/**c", "b/c", false), + ("a/**c", "a/d", false), + ("a/**c", "a/b/c", true), + ("a/**c", "a/b/d", false), + ("a/**c", "d/b/c", false), + // -- Infix with right separator + ("a**/c", "ac", false), + ("a**/c", "a/c", true), + ("a**/c", "b/c", false), + ("a**/c", "a/d", false), + ("a**/c", "a/b/c", true), + ("a**/c", "a/b/d", false), + ("a**/c", "d/b/c", false), + // - Infix with two separators + ("a/**/c", "ac", false), + ("a/**/c", "a/c", true), + ("a/**/c", "b/c", false), + ("a/**/c", "a/d", false), + ("a/**/c", "a/b/c", true), + ("a/**/c", "a/b/d", false), + ("a/**/c", "d/b/c", false), + // - * + * is covered above + // - * + ** + ("a*b**c", "abc", true), + ("a*b**c", "aXbYc", true), + ("a*b**c", "aXb/Yc", true), + ("a*b**c", "aXbY/Yc", true), + ("a*b**c", "aXb/Y/c", true), + ("a*b**c", "a/XbYc", false), + ("a*b**c", "aX/XbYc", false), + ("a*b**c", "a/X/bYc", false), + // - ** + * + ("a**b*c", "abc", true), + ("a**b*c", "aXbYc", true), + ("a**b*c", "aXb/Yc", false), + ("a**b*c", "aXbY/Yc", false), + ("a**b*c", "aXb/Y/c", false), + ("a**b*c", "a/XbYc", true), + ("a**b*c", "aX/XbYc", true), + ("a**b*c", "a/X/bYc", true), + // - ** + ** + ("a**b**c", "abc", true), + ("a**b**c", "aXbYc", true), + ("a**b**c", "aXb/Yc", true), + ("a**b**c", "aXbY/Yc", true), + ("a**b**c", "aXb/Y/c", true), + ("a**b**c", "aXbYc", true), + ("a**b**c", "a/XbYc", true), + ("a**b**c", "aX/XbYc", true), + ("a**b**c", "a/X/bYc", true), + // Case insensitivity + ("*.txt", "file.TXT", true), + ("**/*.rs", "dir/file.RS", true), + // Optimized patterns: **/*.ext and **/name + ("**/*.rs", "foo.rs", true), + ("**/*.rs", "dir/foo.rs", true), + ("**/*.rs", "dir/sub/foo.rs", true), + ("**/*.rs", "foo.txt", false), + ("**/*.rs", "dir/foo.txt", false), + ("**/Cargo.toml", "Cargo.toml", true), + ("**/Cargo.toml", "dir/Cargo.toml", true), + ("**/Cargo.toml", "dir/sub/Cargo.toml", true), + ("**/Cargo.toml", "Cargo.lock", false), + ("**/Cargo.toml", "dir/Cargo.lock", false), + ]; + + for (pattern, name, expected) in tests { + let result = glob_match(pattern, name); + assert_eq!( + result, expected, + "test case ({:?}, {:?}, {}) failed, got {}", + pattern, name, expected, result + ); + } + } +} diff --git a/crates/edit/src/lib.rs b/crates/edit/src/lib.rs index 126cbce525f..ca19efca1a1 100644 --- a/crates/edit/src/lib.rs +++ b/crates/edit/src/lib.rs @@ -25,6 +25,7 @@ pub mod clipboard; pub mod document; pub mod framebuffer; pub mod fuzzy; +pub mod glob; pub mod hash; pub mod helpers; pub mod icu;