Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions crates/edit/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ use std::{mem, vec};
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use edit::helpers::*;
use edit::simd::MemsetSafe;
use edit::{buffer, hash, json, oklab, simd, unicode};
use stdext::arena;
use stdext::arena::{Arena, scratch_arena};
use edit::{buffer, glob, hash, json, oklab, simd, unicode};
use stdext::arena::{self, Arena, scratch_arena};

struct EditingTracePatch<'a>(usize, usize, &'a str);

Expand Down Expand Up @@ -136,6 +135,15 @@ fn bench_buffer(c: &mut Criterion) {
});
}

fn bench_glob(c: &mut Criterion) {
// Same benchmark as in glob-match
const PATH: &str = "foo/bar/foo/bar/foo/bar/foo/bar/foo/bar.txt";
const GLOB: &str = "foo/**/bar.txt";

c.benchmark_group("glob")
.bench_function("glob_match", |b| b.iter(|| assert!(glob::glob_match(GLOB, PATH))));
}

fn bench_hash(c: &mut Criterion) {
c.benchmark_group("hash")
.throughput(Throughput::Bytes(8))
Expand Down Expand Up @@ -275,6 +283,7 @@ fn bench(c: &mut Criterion) {
arena::init(128 * MEBI).unwrap();

bench_buffer(c);
bench_glob(c);
bench_hash(c);
bench_json(c);
bench_oklab(c);
Expand Down
273 changes: 273 additions & 0 deletions crates/edit/src/glob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

//! Simple glob matching.
//!
//! Supported patterns:
//! - `*` matches any characters except for path separators, including an empty string.
//! - `**` matches any characters, including an empty string.
//! For convenience, `/**/` also matches `/`.

use std::path::is_separator;

#[inline]
pub fn glob_match<P: AsRef<[u8]>, N: AsRef<[u8]>>(pattern: P, name: N) -> bool {
glob(pattern.as_ref(), name.as_ref())
}

fn glob(pattern: &[u8], name: &[u8]) -> bool {
fast_path(pattern, name).unwrap_or_else(|| slow_path(pattern, name))
}

// Fast-pass for the most common patterns:
// * Matching files by extension (e.g., **/*.rs)
// * Matching files by name (e.g., **/Cargo.toml)
fn fast_path(pattern: &[u8], name: &[u8]) -> Option<bool> {
// In either case, the glob must start with "**/".
let mut suffix = pattern.strip_prefix(b"**/")?;
if suffix.is_empty() {
return None;
}

// Determine whether it's "**/" or "**/*".
let mut needs_dir_anchor = true;
if let Some(s) = suffix.strip_prefix(b"*") {
suffix = s;
needs_dir_anchor = false;
}

// Restrict down to anything we can handle with a suffix check.
if suffix.is_empty() || contains_magic(suffix) {
return None;
}

Some(
match_path_suffix(name, suffix)
&& (
// In case of "**/*extension" a simple suffix match is sufficient.
!needs_dir_anchor
// But for "**/filename" we need to ensure that path is either "filename"...
|| name.len() == suffix.len()
// ...or that it is ".../filename".
|| is_separator(name[name.len() - suffix.len() - 1] as char)
),
)
}

fn contains_magic(pattern: &[u8]) -> bool {
pattern.contains(&b'*')
}

fn match_path_suffix(path: &[u8], suffix: &[u8]) -> bool {
if path.len() < suffix.len() {
return false;
}

let path = &path[path.len() - suffix.len()..];

#[cfg(windows)]
{
path.iter().zip(suffix.iter()).all(|(a, b)| {
let a = if *a == b'\\' { b'/' } else { *a };
let b = if *b == b'\\' { b'/' } else { *b };
a.eq_ignore_ascii_case(&b)
})
}

#[cfg(not(windows))]
path.eq_ignore_ascii_case(suffix)
}

// This code is based on https://research.swtch.com/glob.go
// It's not particularly fast, but it doesn't need to be. It doesn't run often.
#[cold]
fn slow_path(pattern: &[u8], name: &[u8]) -> bool {
let mut px = 0;
let mut nx = 0;
let mut next_px = 0;
let mut next_nx = 0;
let mut is_double_star = false;

while px < pattern.len() || nx < name.len() {
if px < pattern.len() {
match pattern[px] {
b'*' => {
// Try to match at nx. If that doesn't work out, restart at nx+1 next.
next_px = px;
next_nx = nx + 1;
px += 1;
is_double_star = false;

if px < pattern.len() && pattern[px] == b'*' {
px += 1;
is_double_star = true;

// For convenience, /**/ also matches /
if px >= 3
&& px < pattern.len()
&& pattern[px] == b'/'
&& pattern[px - 3] == b'/'
{
px += 1;
}
}
continue;
}
c => {
if nx < name.len() && name[nx].eq_ignore_ascii_case(&c) {
px += 1;
nx += 1;
continue;
}
}
}
}

// Mismatch. Maybe restart.
if next_nx > 0
&& next_nx <= name.len()
&& (is_double_star || !is_separator(name[next_nx - 1] as char))
{
px = next_px;
nx = next_nx;
continue;
}

return false;
}

true
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_glob_match() {
let tests = [
// Test cases from https://research.swtch.com/glob.go
("", "", true),
("x", "", false),
("", "x", false),
("abc", "abc", true),
("*", "abc", true),
("*c", "abc", true),
("*b", "abc", false),
("a*", "abc", true),
("b*", "abc", false),
("a*", "a", true),
("*a", "a", true),
("a*b*c*d*e*", "axbxcxdxe", true),
("a*b*c*d*e*", "axbxcxdxexxx", true),
("*x", "xxx", true),
// Test cases from https://github.com/golang/go/blob/master/src/path/filepath/match_test.go
("a*", "ab/c", false),
("a*b", "a/b", false),
("a*/b", "abc/b", true),
("a*/b", "a/c/b", false),
("a*b*c*d*e*/f", "axbxcxdxe/f", true),
("a*b*c*d*e*/f", "axbxcxdxexxx/f", true),
("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false),
("a*b*c*d*e*/f", "axbxcxdxexxx/fff", false),
// Single star (*)
// - Empty string
("*", "", true),
// - Anything else is covered above
// Double star (**)
// - Empty string
("**", "", true),
("a**", "a", true),
("**a", "a", true),
// - Prefix
("**", "abc", true),
("**", "foo/baz/bar", true),
("**c", "abc", true),
("**b", "abc", false),
// - Infix
("a**c", "ac", true),
("a**c", "abc", true),
("a**c", "abd", false),
("a**d", "abc", false),
("a**c", "a/bc", true),
("a**c", "ab/c", true),
("a**c", "a/b/c", true),
// -- Infix with left separator
("a/**c", "ac", false),
("a/**c", "a/c", true),
("a/**c", "b/c", false),
("a/**c", "a/d", false),
("a/**c", "a/b/c", true),
("a/**c", "a/b/d", false),
("a/**c", "d/b/c", false),
// -- Infix with right separator
("a**/c", "ac", false),
("a**/c", "a/c", true),
("a**/c", "b/c", false),
("a**/c", "a/d", false),
("a**/c", "a/b/c", true),
("a**/c", "a/b/d", false),
("a**/c", "d/b/c", false),
// - Infix with two separators
("a/**/c", "ac", false),
("a/**/c", "a/c", true),
("a/**/c", "b/c", false),
("a/**/c", "a/d", false),
("a/**/c", "a/b/c", true),
("a/**/c", "a/b/d", false),
("a/**/c", "d/b/c", false),
// - * + * is covered above
// - * + **
("a*b**c", "abc", true),
("a*b**c", "aXbYc", true),
("a*b**c", "aXb/Yc", true),
("a*b**c", "aXbY/Yc", true),
("a*b**c", "aXb/Y/c", true),
("a*b**c", "a/XbYc", false),
("a*b**c", "aX/XbYc", false),
("a*b**c", "a/X/bYc", false),
// - ** + *
("a**b*c", "abc", true),
("a**b*c", "aXbYc", true),
("a**b*c", "aXb/Yc", false),
("a**b*c", "aXbY/Yc", false),
("a**b*c", "aXb/Y/c", false),
("a**b*c", "a/XbYc", true),
("a**b*c", "aX/XbYc", true),
("a**b*c", "a/X/bYc", true),
// - ** + **
("a**b**c", "abc", true),
("a**b**c", "aXbYc", true),
("a**b**c", "aXb/Yc", true),
("a**b**c", "aXbY/Yc", true),
("a**b**c", "aXb/Y/c", true),
("a**b**c", "aXbYc", true),
("a**b**c", "a/XbYc", true),
("a**b**c", "aX/XbYc", true),
("a**b**c", "a/X/bYc", true),
// Case insensitivity
("*.txt", "file.TXT", true),
("**/*.rs", "dir/file.RS", true),
// Optimized patterns: **/*.ext and **/name
("**/*.rs", "foo.rs", true),
("**/*.rs", "dir/foo.rs", true),
("**/*.rs", "dir/sub/foo.rs", true),
("**/*.rs", "foo.txt", false),
("**/*.rs", "dir/foo.txt", false),
("**/Cargo.toml", "Cargo.toml", true),
("**/Cargo.toml", "dir/Cargo.toml", true),
("**/Cargo.toml", "dir/sub/Cargo.toml", true),
("**/Cargo.toml", "Cargo.lock", false),
("**/Cargo.toml", "dir/Cargo.lock", false),
];

for (pattern, name, expected) in tests {
let result = glob_match(pattern, name);
assert_eq!(
result, expected,
"test case ({:?}, {:?}, {}) failed, got {}",
pattern, name, expected, result
);
}
}
}
1 change: 1 addition & 0 deletions crates/edit/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub mod clipboard;
pub mod document;
pub mod framebuffer;
pub mod fuzzy;
pub mod glob;
pub mod hash;
pub mod helpers;
pub mod icu;
Expand Down