From a5861872b14d0ff928e4cdaa87942ddf41dc3e0d Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 20 Jan 2026 19:18:35 +0100 Subject: [PATCH 1/5] Add a glob matcher --- crates/edit/src/glob.rs | 234 ++++++++++++++++++++++++++++++++++++++++ crates/edit/src/lib.rs | 1 + 2 files changed, 235 insertions(+) create mode 100644 crates/edit/src/glob.rs diff --git a/crates/edit/src/glob.rs b/crates/edit/src/glob.rs new file mode 100644 index 00000000000..dd56b9bc2cd --- /dev/null +++ b/crates/edit/src/glob.rs @@ -0,0 +1,234 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::path::is_separator; + +pub fn glob_match(glob: &[u8], path: &[u8]) -> bool { + fast_path(glob, path).unwrap_or_else(|| slow_path(glob, path)) +} + +// Fast-pass for the most common patterns: +// * Matching files by extension (e.g., **/*.rs) +// * Matching files by name (e.g., **/Cargo.toml) +fn fast_path(glob: &[u8], path: &[u8]) -> Option { + // In either case, the glob must start with "**/". + let mut suffix = glob.strip_prefix(b"**/")?; + if suffix.is_empty() { + return None; + } + + // Determine whether it's "**/" or "**/*". + let mut needs_dir_anchor = true; + if let Some(s) = suffix.strip_prefix(b"*") { + suffix = s; + needs_dir_anchor = false; + } + + // Restrict down to anything we can handle with a suffix check. + if suffix.is_empty() || contains_magic(suffix) { + return None; + } + + Some( + match_path_suffix(path, suffix) + && ( + // In case of "**/*extension" a simple suffix match is sufficient. + !needs_dir_anchor + // But for "**/filename" we need to ensure that path is either "filename"... + || path.len() == suffix.len() + // ...or that it is ".../filename". + || is_separator(path[path.len() - suffix.len() - 1] as char) + ), + ) +} + +fn contains_magic(glob: &[u8]) -> bool { + glob.iter().any(|&b| b == b'*' || b == b'?') +} + +fn match_path_suffix(path: &[u8], suffix: &[u8]) -> bool { + if path.len() < suffix.len() { + return false; + } + + let path = &path[path.len() - suffix.len()..]; + + #[cfg(windows)] + { + path.iter().zip(suffix.iter()).all(|(a, b)| { + let a = if *a == b'\\' { b'/' } else { *a }; + let b = if *b == b'\\' { b'/' } else { *b }; + a.eq_ignore_ascii_case(&b) + }) + } + + #[cfg(not(windows))] + path.eq_ignore_ascii_case(suffix) +} + +// This code is based on https://research.swtch.com/glob.go +// It's not particularly fast, but it doesn't need to be. It doesn't run often. +#[cold] +fn slow_path(glob: &[u8], path: &[u8]) -> bool { + let mut px = 0; + let mut nx = 0; + let mut next_px = 0; + let mut next_nx = 0; + let mut next_double_px = 0; + let mut next_double_nx = 0; + + while px < glob.len() || nx < path.len() { + if px < glob.len() { + match glob[px] { + b'?' => { + // single-character wildcard + if nx < path.len() && !is_separator(path[nx] as char) { + px += 1; + nx += 1; + continue; + } + } + b'*' => { + // Check for doublestar + if px + 1 < glob.len() && glob[px + 1] == b'*' { + // doublestar - matches across path separators + // Handle trailing slash after ** (e.g., **/ should skip the slash) + let skip = if px + 2 < glob.len() && glob[px + 2] == b'/' { 3 } else { 2 }; + // Try to match at nx first (zero-length match). If that doesn't work, restart at nx+1. + next_double_px = px; + next_double_nx = nx + 1; + px += skip; + } else { + // single star - does not match path separators + // Try to match at nx. If that doesn't work out, restart at nx+1 next. + next_px = px; + next_nx = nx + 1; + px += 1; + } + continue; + } + c => { + // ordinary character + if nx < path.len() && path[nx].eq_ignore_ascii_case(&c) { + px += 1; + nx += 1; + continue; + } + } + } + } + + // Mismatch. Maybe restart. + // Try single-star backtracking first, but only if we don't cross a separator + if 0 < next_nx && next_nx <= path.len() && !is_separator(path[next_nx - 1] as char) { + px = next_px; + nx = next_nx; + continue; + } + + // Try doublestar backtracking + if 0 < next_double_nx && next_double_nx <= path.len() { + px = next_double_px; + nx = next_double_nx; + continue; + } + + return false; + } + + // Matched all of pattern to all of name. Success. + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_glob_match() { + let tests = [ + // Test cases from https://research.swtch.com/glob.go + ("", "", true), + ("x", "", false), + ("", "x", false), + ("abc", "abc", true), + ("*", "abc", true), + ("*c", "abc", true), + ("*b", "abc", false), + ("a*", "abc", true), + ("b*", "abc", false), + ("a*", "a", true), + ("*a", "a", true), + ("a*b*c*d*e*", "axbxcxdxe", true), + ("a*b*c*d*e*", "axbxcxdxexxx", true), + ("a*b?c*x", "abxbbxdbxebxczzx", true), + ("a*b?c*x", "abxbbxdbxebxczzy", false), + ("*x", "xxx", true), + // Test cases from https://github.com/golang/go/blob/master/src/path/filepath/match_test.go + ("abc", "abc", true), + ("*", "abc", true), + ("*c", "abc", true), + ("a*", "a", true), + ("a*", "abc", true), + ("a*", "ab/c", false), + ("a*/b", "abc/b", true), + ("a*/b", "a/c/b", false), + ("a*b*c*d*e*/f", "axbxcxdxe/f", true), + ("a*b*c*d*e*/f", "axbxcxdxexxx/f", true), + ("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false), + ("a*b*c*d*e*/f", "axbxcxdxexxx/fff", false), + ("a*b?c*x", "abxbbxdbxebxczzx", true), + ("a*b?c*x", "abxbbxdbxebxczzy", false), + ("a?b", "a/b", false), + ("a*b", "a/b", false), + ("*x", "xxx", true), + // Basic doublestar tests + ("**", "foo", true), + ("**", "foo/bar", true), + ("**", "foo/bar/baz", true), + ("**/foo", "foo", true), + ("**/foo", "bar/foo", true), + ("**/foo", "bar/baz/foo", true), + ("foo/**", "foo/bar", true), + ("foo/**", "foo/bar/baz", true), + ("foo/**/baz", "foo/baz", true), + ("foo/**/baz", "foo/bar/baz", true), + ("foo/**/baz", "foo/bar/qux/baz", true), + // Doublestar should not match if literal parts don't match + ("**/foo", "bar", false), + ("foo/**", "bar/baz", false), + ("foo/**/baz", "foo/bar/qux", false), + // Single star should NOT match separators + ("foo/*/bar", "foo/bar", false), + ("foo/*/bar", "foo/baz/bar", true), + ("foo/*/bar", "foo/baz/qux/bar", false), + // Mix of single and double star + ("foo/*/baz/**", "foo/bar/baz/qux", true), + ("foo/*/baz/**", "foo/bar/qux/baz/test", false), + // Edge cases + ("**/**", "foo/bar", true), + ("**/*/foo", "bar/baz/foo", true), + ("**/*/foo", "bar/foo", true), + // Optimized patterns: **/*.ext and **/name + ("**/*.rs", "foo.rs", true), + ("**/*.rs", "dir/foo.rs", true), + ("**/*.rs", "dir/sub/foo.rs", true), + ("**/*.rs", "foo.txt", false), + ("**/*.rs", "dir/foo.txt", false), + ("**/Cargo.toml", "Cargo.toml", true), + ("**/Cargo.toml", "dir/Cargo.toml", true), + ("**/Cargo.toml", "dir/sub/Cargo.toml", true), + ("**/Cargo.toml", "Cargo.lock", false), + ("**/Cargo.toml", "dir/Cargo.lock", false), + ]; + + for (pattern, name, expected) in tests { + let result = glob_match(pattern.as_bytes(), name.as_bytes()); + assert_eq!( + result, expected, + "glob_match({:?}, {:?}) = {}, want {}", + pattern, name, result, expected + ); + } + } +} diff --git a/crates/edit/src/lib.rs b/crates/edit/src/lib.rs index 59e3df0ed02..dab5177c3c0 100644 --- a/crates/edit/src/lib.rs +++ b/crates/edit/src/lib.rs @@ -25,6 +25,7 @@ pub mod clipboard; pub mod document; pub mod framebuffer; pub mod fuzzy; +pub mod glob; pub mod hash; pub mod helpers; pub mod icu; From 07ac98cf6e975f62f9eab95e83d71cdc07681621 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 21 Jan 2026 17:15:39 +0100 Subject: [PATCH 2/5] Fix /**/ matching, More unit tests --- crates/edit/src/glob.rs | 128 ++++++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 58 deletions(-) diff --git a/crates/edit/src/glob.rs b/crates/edit/src/glob.rs index dd56b9bc2cd..1db47881a06 100644 --- a/crates/edit/src/glob.rs +++ b/crates/edit/src/glob.rs @@ -1,18 +1,30 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +//! Simple glob matching. +//! +//! Supported patterns: +//! - `*` matches any characters except for path separators, including an empty string. +//! - `**` matches any characters, including an empty string. +//! For convenience, `/**/` also matches `/`. + use std::path::is_separator; -pub fn glob_match(glob: &[u8], path: &[u8]) -> bool { - fast_path(glob, path).unwrap_or_else(|| slow_path(glob, path)) +#[inline] +pub fn glob_match, N: AsRef<[u8]>>(pattern: P, needle: N) -> bool { + glob(pattern.as_ref(), needle.as_ref()) +} + +fn glob(pattern: &[u8], needle: &[u8]) -> bool { + fast_path(pattern, needle).unwrap_or_else(|| slow_path(pattern, needle)) } // Fast-pass for the most common patterns: // * Matching files by extension (e.g., **/*.rs) // * Matching files by name (e.g., **/Cargo.toml) -fn fast_path(glob: &[u8], path: &[u8]) -> Option { +fn fast_path(pattern: &[u8], needle: &[u8]) -> Option { // In either case, the glob must start with "**/". - let mut suffix = glob.strip_prefix(b"**/")?; + let mut suffix = pattern.strip_prefix(b"**/")?; if suffix.is_empty() { return None; } @@ -30,20 +42,20 @@ fn fast_path(glob: &[u8], path: &[u8]) -> Option { } Some( - match_path_suffix(path, suffix) + match_path_suffix(needle, suffix) && ( // In case of "**/*extension" a simple suffix match is sufficient. !needs_dir_anchor // But for "**/filename" we need to ensure that path is either "filename"... - || path.len() == suffix.len() + || needle.len() == suffix.len() // ...or that it is ".../filename". - || is_separator(path[path.len() - suffix.len() - 1] as char) + || is_separator(needle[needle.len() - suffix.len() - 1] as char) ), ) } -fn contains_magic(glob: &[u8]) -> bool { - glob.iter().any(|&b| b == b'*' || b == b'?') +fn contains_magic(pattern: &[u8]) -> bool { + pattern.contains(&b'*') } fn match_path_suffix(path: &[u8], suffix: &[u8]) -> bool { @@ -69,7 +81,7 @@ fn match_path_suffix(path: &[u8], suffix: &[u8]) -> bool { // This code is based on https://research.swtch.com/glob.go // It's not particularly fast, but it doesn't need to be. It doesn't run often. #[cold] -fn slow_path(glob: &[u8], path: &[u8]) -> bool { +fn slow_path(pattern: &[u8], needle: &[u8]) -> bool { let mut px = 0; let mut nx = 0; let mut next_px = 0; @@ -77,27 +89,24 @@ fn slow_path(glob: &[u8], path: &[u8]) -> bool { let mut next_double_px = 0; let mut next_double_nx = 0; - while px < glob.len() || nx < path.len() { - if px < glob.len() { - match glob[px] { - b'?' => { - // single-character wildcard - if nx < path.len() && !is_separator(path[nx] as char) { - px += 1; - nx += 1; - continue; - } - } + while px < pattern.len() || nx < needle.len() { + if px < pattern.len() { + match pattern[px] { b'*' => { - // Check for doublestar - if px + 1 < glob.len() && glob[px + 1] == b'*' { - // doublestar - matches across path separators - // Handle trailing slash after ** (e.g., **/ should skip the slash) - let skip = if px + 2 < glob.len() && glob[px + 2] == b'/' { 3 } else { 2 }; - // Try to match at nx first (zero-length match). If that doesn't work, restart at nx+1. + if pattern.get(px + 1) == Some(&b'*') { + // doublestar - matches any characters including / next_double_px = px; next_double_nx = nx + 1; - px += skip; + px += 2; + + // For convenience, /**/ also matches /. + if px >= 3 + && px < pattern.len() + && pattern[px] == b'/' + && pattern[px - 3] == b'/' + { + px += 1; + } } else { // single star - does not match path separators // Try to match at nx. If that doesn't work out, restart at nx+1 next. @@ -109,7 +118,7 @@ fn slow_path(glob: &[u8], path: &[u8]) -> bool { } c => { // ordinary character - if nx < path.len() && path[nx].eq_ignore_ascii_case(&c) { + if nx < needle.len() && needle[nx].eq_ignore_ascii_case(&c) { px += 1; nx += 1; continue; @@ -119,15 +128,15 @@ fn slow_path(glob: &[u8], path: &[u8]) -> bool { } // Mismatch. Maybe restart. - // Try single-star backtracking first, but only if we don't cross a separator - if 0 < next_nx && next_nx <= path.len() && !is_separator(path[next_nx - 1] as char) { + // Try single-star backtracking first, but only if we don't cross a separator. + if next_nx > 0 && next_nx <= needle.len() && !is_separator(needle[next_nx - 1] as char) { px = next_px; nx = next_nx; continue; } // Try doublestar backtracking - if 0 < next_double_nx && next_double_nx <= path.len() { + if next_double_nx > 0 && next_double_nx <= needle.len() { px = next_double_px; nx = next_double_nx; continue; @@ -136,7 +145,6 @@ fn slow_path(glob: &[u8], path: &[u8]) -> bool { return false; } - // Matched all of pattern to all of name. Success. true } @@ -161,54 +169,58 @@ mod tests { ("*a", "a", true), ("a*b*c*d*e*", "axbxcxdxe", true), ("a*b*c*d*e*", "axbxcxdxexxx", true), - ("a*b?c*x", "abxbbxdbxebxczzx", true), - ("a*b?c*x", "abxbbxdbxebxczzy", false), ("*x", "xxx", true), // Test cases from https://github.com/golang/go/blob/master/src/path/filepath/match_test.go - ("abc", "abc", true), - ("*", "abc", true), - ("*c", "abc", true), - ("a*", "a", true), - ("a*", "abc", true), ("a*", "ab/c", false), + ("a*b", "a/b", false), ("a*/b", "abc/b", true), ("a*/b", "a/c/b", false), ("a*b*c*d*e*/f", "axbxcxdxe/f", true), ("a*b*c*d*e*/f", "axbxcxdxexxx/f", true), ("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false), ("a*b*c*d*e*/f", "axbxcxdxexxx/fff", false), - ("a*b?c*x", "abxbbxdbxebxczzx", true), - ("a*b?c*x", "abxbbxdbxebxczzy", false), - ("a?b", "a/b", false), - ("a*b", "a/b", false), - ("*x", "xxx", true), - // Basic doublestar tests + // Single star (*) + ("*", "", true), + ("foo/*/bar", "foo/bar", false), + ("foo/*/bar", "foo/baz/bar", true), + ("foo/*/bar", "foo/baz/qux/bar", false), + // Double star (**) + ("**", "", true), ("**", "foo", true), ("**", "foo/bar", true), ("**", "foo/bar/baz", true), ("**/foo", "foo", true), ("**/foo", "bar/foo", true), ("**/foo", "bar/baz/foo", true), + ("**/foo", "bar", false), ("foo/**", "foo/bar", true), ("foo/**", "foo/bar/baz", true), + ("foo/**", "bar/baz", false), ("foo/**/baz", "foo/baz", true), ("foo/**/baz", "foo/bar/baz", true), ("foo/**/baz", "foo/bar/qux/baz", true), - // Doublestar should not match if literal parts don't match - ("**/foo", "bar", false), - ("foo/**", "bar/baz", false), ("foo/**/baz", "foo/bar/qux", false), - // Single star should NOT match separators - ("foo/*/bar", "foo/bar", false), - ("foo/*/bar", "foo/baz/bar", true), - ("foo/*/bar", "foo/baz/qux/bar", false), - // Mix of single and double star + ("**/**", "foo/bar", true), + ("foo**bar", "foobar", true), + ("foo**bar", "fooxbar", true), + ("foo**bar", "foo/bar", true), + ("foo**/bar", "foobar", false), + ("foo/**bar", "foobar", false), + ("**/", "foo/", true), + ("/**", "/", true), + ("/**", "/foo", true), + ("foo/**", "foo/", true), + ("a/**/b/**/c", "a/b/c", true), + ("a/**/b/**/c", "a/x/b/y/c", true), + ("a/**/b/**/c", "a/x/y/b/z/w/c", true), + // Mix of * and ** ("foo/*/baz/**", "foo/bar/baz/qux", true), ("foo/*/baz/**", "foo/bar/qux/baz/test", false), - // Edge cases - ("**/**", "foo/bar", true), ("**/*/foo", "bar/baz/foo", true), ("**/*/foo", "bar/foo", true), + // Case insensitivity + ("*.txt", "file.TXT", true), + ("**/*.rs", "dir/file.RS", true), // Optimized patterns: **/*.ext and **/name ("**/*.rs", "foo.rs", true), ("**/*.rs", "dir/foo.rs", true), @@ -223,10 +235,10 @@ mod tests { ]; for (pattern, name, expected) in tests { - let result = glob_match(pattern.as_bytes(), name.as_bytes()); + let result = glob_match(pattern, name); assert_eq!( result, expected, - "glob_match({:?}, {:?}) = {}, want {}", + "glob_match({:?}, {:?}), got {}, expected {}", pattern, name, result, expected ); } From 1e7952b96234f398ca3a878d2cd16d78a1544ff2 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 21 Jan 2026 20:36:08 +0100 Subject: [PATCH 3/5] It's name, not needle, duh --- crates/edit/src/glob.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/crates/edit/src/glob.rs b/crates/edit/src/glob.rs index 1db47881a06..274c91de622 100644 --- a/crates/edit/src/glob.rs +++ b/crates/edit/src/glob.rs @@ -11,18 +11,18 @@ use std::path::is_separator; #[inline] -pub fn glob_match, N: AsRef<[u8]>>(pattern: P, needle: N) -> bool { - glob(pattern.as_ref(), needle.as_ref()) +pub fn glob_match, N: AsRef<[u8]>>(pattern: P, name: N) -> bool { + glob(pattern.as_ref(), name.as_ref()) } -fn glob(pattern: &[u8], needle: &[u8]) -> bool { - fast_path(pattern, needle).unwrap_or_else(|| slow_path(pattern, needle)) +fn glob(pattern: &[u8], name: &[u8]) -> bool { + fast_path(pattern, name).unwrap_or_else(|| slow_path(pattern, name)) } // Fast-pass for the most common patterns: // * Matching files by extension (e.g., **/*.rs) // * Matching files by name (e.g., **/Cargo.toml) -fn fast_path(pattern: &[u8], needle: &[u8]) -> Option { +fn fast_path(pattern: &[u8], name: &[u8]) -> Option { // In either case, the glob must start with "**/". let mut suffix = pattern.strip_prefix(b"**/")?; if suffix.is_empty() { @@ -42,14 +42,14 @@ fn fast_path(pattern: &[u8], needle: &[u8]) -> Option { } Some( - match_path_suffix(needle, suffix) + match_path_suffix(name, suffix) && ( // In case of "**/*extension" a simple suffix match is sufficient. !needs_dir_anchor // But for "**/filename" we need to ensure that path is either "filename"... - || needle.len() == suffix.len() + || name.len() == suffix.len() // ...or that it is ".../filename". - || is_separator(needle[needle.len() - suffix.len() - 1] as char) + || is_separator(name[name.len() - suffix.len() - 1] as char) ), ) } @@ -81,7 +81,7 @@ fn match_path_suffix(path: &[u8], suffix: &[u8]) -> bool { // This code is based on https://research.swtch.com/glob.go // It's not particularly fast, but it doesn't need to be. It doesn't run often. #[cold] -fn slow_path(pattern: &[u8], needle: &[u8]) -> bool { +fn slow_path(pattern: &[u8], name: &[u8]) -> bool { let mut px = 0; let mut nx = 0; let mut next_px = 0; @@ -89,7 +89,7 @@ fn slow_path(pattern: &[u8], needle: &[u8]) -> bool { let mut next_double_px = 0; let mut next_double_nx = 0; - while px < pattern.len() || nx < needle.len() { + while px < pattern.len() || nx < name.len() { if px < pattern.len() { match pattern[px] { b'*' => { @@ -118,7 +118,7 @@ fn slow_path(pattern: &[u8], needle: &[u8]) -> bool { } c => { // ordinary character - if nx < needle.len() && needle[nx].eq_ignore_ascii_case(&c) { + if nx < name.len() && name[nx].eq_ignore_ascii_case(&c) { px += 1; nx += 1; continue; @@ -129,14 +129,14 @@ fn slow_path(pattern: &[u8], needle: &[u8]) -> bool { // Mismatch. Maybe restart. // Try single-star backtracking first, but only if we don't cross a separator. - if next_nx > 0 && next_nx <= needle.len() && !is_separator(needle[next_nx - 1] as char) { + if next_nx > 0 && next_nx <= name.len() && !is_separator(name[next_nx - 1] as char) { px = next_px; nx = next_nx; continue; } // Try doublestar backtracking - if next_double_nx > 0 && next_double_nx <= needle.len() { + if next_double_nx > 0 && next_double_nx <= name.len() { px = next_double_px; nx = next_double_nx; continue; From 9869b707fbe790153824630ffe26a80325fbe07a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 21 Jan 2026 20:59:14 +0100 Subject: [PATCH 4/5] It's O(n) now --- crates/edit/benches/lib.rs | 12 +++++++++++- crates/edit/src/glob.rs | 21 ++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs index 4c8fcc37df3..9f0cc3fb206 100644 --- a/crates/edit/benches/lib.rs +++ b/crates/edit/benches/lib.rs @@ -8,7 +8,7 @@ use std::{mem, vec}; use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use edit::helpers::*; use edit::simd::MemsetSafe; -use edit::{buffer, hash, oklab, simd, unicode}; +use edit::{buffer, glob, hash, oklab, simd, unicode}; use serde::Deserialize; use stdext::arena; @@ -107,6 +107,15 @@ fn bench_buffer(c: &mut Criterion) { }); } +fn bench_glob(c: &mut Criterion) { + // Same benchmark as in glob-match + const PATH: &str = "foo/bar/foo/bar/foo/bar/foo/bar/foo/bar.txt"; + const GLOB: &str = "foo/**/bar.txt"; + + c.benchmark_group("glob") + .bench_function("glob_match", |b| b.iter(|| assert!(glob::glob_match(GLOB, PATH)))); +} + fn bench_hash(c: &mut Criterion) { c.benchmark_group("hash") .throughput(Throughput::Bytes(8)) @@ -231,6 +240,7 @@ fn bench(c: &mut Criterion) { arena::init(128 * MEBI).unwrap(); bench_buffer(c); + bench_glob(c); bench_hash(c); bench_oklab(c); bench_simd_lines_fwd(c); diff --git a/crates/edit/src/glob.rs b/crates/edit/src/glob.rs index 274c91de622..b9123724d08 100644 --- a/crates/edit/src/glob.rs +++ b/crates/edit/src/glob.rs @@ -86,8 +86,7 @@ fn slow_path(pattern: &[u8], name: &[u8]) -> bool { let mut nx = 0; let mut next_px = 0; let mut next_nx = 0; - let mut next_double_px = 0; - let mut next_double_nx = 0; + let mut is_double_star = false; while px < pattern.len() || nx < name.len() { if px < pattern.len() { @@ -95,8 +94,9 @@ fn slow_path(pattern: &[u8], name: &[u8]) -> bool { b'*' => { if pattern.get(px + 1) == Some(&b'*') { // doublestar - matches any characters including / - next_double_px = px; - next_double_nx = nx + 1; + next_px = px; + next_nx = nx + 1; + is_double_star = true; px += 2; // For convenience, /**/ also matches /. @@ -128,20 +128,15 @@ fn slow_path(pattern: &[u8], name: &[u8]) -> bool { } // Mismatch. Maybe restart. - // Try single-star backtracking first, but only if we don't cross a separator. - if next_nx > 0 && next_nx <= name.len() && !is_separator(name[next_nx - 1] as char) { + if next_nx > 0 + && next_nx <= name.len() + && (is_double_star || !is_separator(name[next_nx - 1] as char)) + { px = next_px; nx = next_nx; continue; } - // Try doublestar backtracking - if next_double_nx > 0 && next_double_nx <= name.len() { - px = next_double_px; - nx = next_double_nx; - continue; - } - return false; } From 2cc14491a706950c2b977050153ed03dbc6769bd Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 22 Jan 2026 14:18:20 +0100 Subject: [PATCH 5/5] Improve tests, fix is_double_star --- crates/edit/src/glob.rs | 132 +++++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 50 deletions(-) diff --git a/crates/edit/src/glob.rs b/crates/edit/src/glob.rs index b9123724d08..2a9a75adb4d 100644 --- a/crates/edit/src/glob.rs +++ b/crates/edit/src/glob.rs @@ -92,14 +92,17 @@ fn slow_path(pattern: &[u8], name: &[u8]) -> bool { if px < pattern.len() { match pattern[px] { b'*' => { - if pattern.get(px + 1) == Some(&b'*') { - // doublestar - matches any characters including / - next_px = px; - next_nx = nx + 1; + // Try to match at nx. If that doesn't work out, restart at nx+1 next. + next_px = px; + next_nx = nx + 1; + px += 1; + is_double_star = false; + + if px < pattern.len() && pattern[px] == b'*' { + px += 1; is_double_star = true; - px += 2; - // For convenience, /**/ also matches /. + // For convenience, /**/ also matches / if px >= 3 && px < pattern.len() && pattern[px] == b'/' @@ -107,17 +110,10 @@ fn slow_path(pattern: &[u8], name: &[u8]) -> bool { { px += 1; } - } else { - // single star - does not match path separators - // Try to match at nx. If that doesn't work out, restart at nx+1 next. - next_px = px; - next_nx = nx + 1; - px += 1; } continue; } c => { - // ordinary character if nx < name.len() && name[nx].eq_ignore_ascii_case(&c) { px += 1; nx += 1; @@ -175,44 +171,80 @@ mod tests { ("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false), ("a*b*c*d*e*/f", "axbxcxdxexxx/fff", false), // Single star (*) + // - Empty string ("*", "", true), - ("foo/*/bar", "foo/bar", false), - ("foo/*/bar", "foo/baz/bar", true), - ("foo/*/bar", "foo/baz/qux/bar", false), + // - Anything else is covered above // Double star (**) + // - Empty string ("**", "", true), - ("**", "foo", true), - ("**", "foo/bar", true), - ("**", "foo/bar/baz", true), - ("**/foo", "foo", true), - ("**/foo", "bar/foo", true), - ("**/foo", "bar/baz/foo", true), - ("**/foo", "bar", false), - ("foo/**", "foo/bar", true), - ("foo/**", "foo/bar/baz", true), - ("foo/**", "bar/baz", false), - ("foo/**/baz", "foo/baz", true), - ("foo/**/baz", "foo/bar/baz", true), - ("foo/**/baz", "foo/bar/qux/baz", true), - ("foo/**/baz", "foo/bar/qux", false), - ("**/**", "foo/bar", true), - ("foo**bar", "foobar", true), - ("foo**bar", "fooxbar", true), - ("foo**bar", "foo/bar", true), - ("foo**/bar", "foobar", false), - ("foo/**bar", "foobar", false), - ("**/", "foo/", true), - ("/**", "/", true), - ("/**", "/foo", true), - ("foo/**", "foo/", true), - ("a/**/b/**/c", "a/b/c", true), - ("a/**/b/**/c", "a/x/b/y/c", true), - ("a/**/b/**/c", "a/x/y/b/z/w/c", true), - // Mix of * and ** - ("foo/*/baz/**", "foo/bar/baz/qux", true), - ("foo/*/baz/**", "foo/bar/qux/baz/test", false), - ("**/*/foo", "bar/baz/foo", true), - ("**/*/foo", "bar/foo", true), + ("a**", "a", true), + ("**a", "a", true), + // - Prefix + ("**", "abc", true), + ("**", "foo/baz/bar", true), + ("**c", "abc", true), + ("**b", "abc", false), + // - Infix + ("a**c", "ac", true), + ("a**c", "abc", true), + ("a**c", "abd", false), + ("a**d", "abc", false), + ("a**c", "a/bc", true), + ("a**c", "ab/c", true), + ("a**c", "a/b/c", true), + // -- Infix with left separator + ("a/**c", "ac", false), + ("a/**c", "a/c", true), + ("a/**c", "b/c", false), + ("a/**c", "a/d", false), + ("a/**c", "a/b/c", true), + ("a/**c", "a/b/d", false), + ("a/**c", "d/b/c", false), + // -- Infix with right separator + ("a**/c", "ac", false), + ("a**/c", "a/c", true), + ("a**/c", "b/c", false), + ("a**/c", "a/d", false), + ("a**/c", "a/b/c", true), + ("a**/c", "a/b/d", false), + ("a**/c", "d/b/c", false), + // - Infix with two separators + ("a/**/c", "ac", false), + ("a/**/c", "a/c", true), + ("a/**/c", "b/c", false), + ("a/**/c", "a/d", false), + ("a/**/c", "a/b/c", true), + ("a/**/c", "a/b/d", false), + ("a/**/c", "d/b/c", false), + // - * + * is covered above + // - * + ** + ("a*b**c", "abc", true), + ("a*b**c", "aXbYc", true), + ("a*b**c", "aXb/Yc", true), + ("a*b**c", "aXbY/Yc", true), + ("a*b**c", "aXb/Y/c", true), + ("a*b**c", "a/XbYc", false), + ("a*b**c", "aX/XbYc", false), + ("a*b**c", "a/X/bYc", false), + // - ** + * + ("a**b*c", "abc", true), + ("a**b*c", "aXbYc", true), + ("a**b*c", "aXb/Yc", false), + ("a**b*c", "aXbY/Yc", false), + ("a**b*c", "aXb/Y/c", false), + ("a**b*c", "a/XbYc", true), + ("a**b*c", "aX/XbYc", true), + ("a**b*c", "a/X/bYc", true), + // - ** + ** + ("a**b**c", "abc", true), + ("a**b**c", "aXbYc", true), + ("a**b**c", "aXb/Yc", true), + ("a**b**c", "aXbY/Yc", true), + ("a**b**c", "aXb/Y/c", true), + ("a**b**c", "aXbYc", true), + ("a**b**c", "a/XbYc", true), + ("a**b**c", "aX/XbYc", true), + ("a**b**c", "a/X/bYc", true), // Case insensitivity ("*.txt", "file.TXT", true), ("**/*.rs", "dir/file.RS", true), @@ -233,8 +265,8 @@ mod tests { let result = glob_match(pattern, name); assert_eq!( result, expected, - "glob_match({:?}, {:?}), got {}, expected {}", - pattern, name, result, expected + "test case ({:?}, {:?}, {}) failed, got {}", + pattern, name, expected, result ); } }