globbing by regex
This commit is contained in:
122
benches/bench.rs
Normal file
122
benches/bench.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
#![feature(test)]
|
||||
|
||||
extern crate glob;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
const SHORT: &'static str = "some/needle.txt";
|
||||
const SHORT_PAT: &'static str = "some/**/needle.txt";
|
||||
|
||||
const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
|
||||
const LONG_PAT: &'static str = "some/**/needle.txt";
|
||||
|
||||
#[allow(dead_code, unused_variables)]
|
||||
#[path = "../src/glob.rs"]
|
||||
mod reglob;
|
||||
|
||||
fn new_glob(pat: &str) -> glob::Pattern {
|
||||
glob::Pattern::new(pat).unwrap()
|
||||
}
|
||||
|
||||
fn new_reglob(pat: &str) -> reglob::Set {
|
||||
let mut builder = reglob::SetBuilder::new();
|
||||
builder.add(pat).unwrap();
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
fn new_reglob_many(pats: &[&str]) -> reglob::Set {
|
||||
let mut builder = reglob::SetBuilder::new();
|
||||
for pat in pats {
|
||||
builder.add(pat).unwrap();
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn short_glob(b: &mut test::Bencher) {
|
||||
let pat = new_glob(SHORT_PAT);
|
||||
b.iter(|| assert!(pat.matches(SHORT)));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn short_regex(b: &mut test::Bencher) {
|
||||
let set = new_reglob(SHORT_PAT);
|
||||
b.iter(|| assert!(set.is_match(SHORT)));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn long_glob(b: &mut test::Bencher) {
|
||||
let pat = new_glob(LONG_PAT);
|
||||
b.iter(|| assert!(pat.matches(LONG)));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn long_regex(b: &mut test::Bencher) {
|
||||
let set = new_reglob(LONG_PAT);
|
||||
b.iter(|| assert!(set.is_match(LONG)));
|
||||
}
|
||||
|
||||
const MANY_SHORT_GLOBS: &'static [&'static str] = &[
|
||||
// Taken from a random .gitignore on my system.
|
||||
".*.swp",
|
||||
"tags",
|
||||
"target",
|
||||
"*.lock",
|
||||
"tmp",
|
||||
"*.csv",
|
||||
"*.fst",
|
||||
"*-got",
|
||||
"*.csv.idx",
|
||||
"words",
|
||||
"98m*",
|
||||
"dict",
|
||||
"test",
|
||||
"months",
|
||||
];
|
||||
|
||||
const MANY_SHORT_SEARCH: &'static str = "98m-blah.csv.idx";
|
||||
|
||||
#[bench]
|
||||
fn many_short_glob(b: &mut test::Bencher) {
|
||||
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| new_glob(s)).collect();
|
||||
b.iter(|| {
|
||||
let mut count = 0;
|
||||
for pat in &pats {
|
||||
if pat.matches(MANY_SHORT_SEARCH) {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
assert_eq!(2, count);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn many_short_regex_set(b: &mut test::Bencher) {
|
||||
let set = new_reglob_many(MANY_SHORT_GLOBS);
|
||||
b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
|
||||
}
|
||||
|
||||
// This is the fastest on my system (beating many_glob by about 2x). This
|
||||
// suggests that a RegexSet needs quite a few regexes (or a larger haystack)
|
||||
// in order for it to scale.
|
||||
//
|
||||
// TODO(burntsushi): come up with a benchmark that uses more complex patterns
|
||||
// or a longer haystack.
|
||||
#[bench]
|
||||
fn many_short_regex_pattern(b: &mut test::Bencher) {
|
||||
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| {
|
||||
let pat = reglob::Pattern::new(s).unwrap();
|
||||
regex::Regex::new(&pat.to_regex()).unwrap()
|
||||
}).collect();
|
||||
b.iter(|| {
|
||||
let mut count = 0;
|
||||
for pat in &pats {
|
||||
if pat.is_match(MANY_SHORT_SEARCH) {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
assert_eq!(2, count);
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user