globset: add allow_unclosed_class toggle
When enabled, patterns like `[abc`, `[]`, `[!]` are treated as if the opening `[` is just a literal. This is in contrast the default behavior, which prioritizes better error messages, of returning a parse error. Fixes #3127, Closes #3145
This commit is contained in:
@@ -229,6 +229,11 @@ struct GlobOptions {
|
|||||||
/// Whether or not an empty case in an alternate will be removed.
|
/// Whether or not an empty case in an alternate will be removed.
|
||||||
/// e.g., when enabled, `{,a}` will match "" and "a".
|
/// e.g., when enabled, `{,a}` will match "" and "a".
|
||||||
empty_alternates: bool,
|
empty_alternates: bool,
|
||||||
|
/// Whether or not an unclosed character class is allowed. When an unclosed
|
||||||
|
/// character class is found, the opening `[` is treated as a literal `[`.
|
||||||
|
/// When this isn't enabled, an opening `[` without a corresponding `]` is
|
||||||
|
/// treated as an error.
|
||||||
|
allow_unclosed_class: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GlobOptions {
|
impl GlobOptions {
|
||||||
@@ -238,6 +243,7 @@ impl GlobOptions {
|
|||||||
literal_separator: false,
|
literal_separator: false,
|
||||||
backslash_escape: !is_separator('\\'),
|
backslash_escape: !is_separator('\\'),
|
||||||
empty_alternates: false,
|
empty_alternates: false,
|
||||||
|
allow_unclosed_class: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -594,6 +600,7 @@ impl<'a> GlobBuilder<'a> {
|
|||||||
chars: self.glob.chars().peekable(),
|
chars: self.glob.chars().peekable(),
|
||||||
prev: None,
|
prev: None,
|
||||||
cur: None,
|
cur: None,
|
||||||
|
found_unclosed_class: false,
|
||||||
opts: &self.opts,
|
opts: &self.opts,
|
||||||
};
|
};
|
||||||
p.parse()?;
|
p.parse()?;
|
||||||
@@ -657,6 +664,22 @@ impl<'a> GlobBuilder<'a> {
|
|||||||
self.opts.empty_alternates = yes;
|
self.opts.empty_alternates = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Toggle whether unclosed character classes are allowed. When allowed,
|
||||||
|
/// a `[` without a matching `]` is treated literally instead of resulting
|
||||||
|
/// in a parse error.
|
||||||
|
///
|
||||||
|
/// For example, if this is set then the glob `[abc` will be treated as the
|
||||||
|
/// literal string `[abc` instead of returning an error.
|
||||||
|
///
|
||||||
|
/// By default, this is false. Generally speaking, enabling this leads to
|
||||||
|
/// worse failure modes since the glob parser becomes more permissive. You
|
||||||
|
/// might want to enable this when compatibility (e.g., with POSIX glob
|
||||||
|
/// implementations) is more important than good error messages.
|
||||||
|
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||||
|
self.opts.allow_unclosed_class = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tokens {
|
impl Tokens {
|
||||||
@@ -782,15 +805,29 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct Parser<'a> {
|
struct Parser<'a> {
|
||||||
|
/// The glob to parse.
|
||||||
glob: &'a str,
|
glob: &'a str,
|
||||||
// Marks the index in `stack` where the alternation started.
|
/// Marks the index in `stack` where the alternation started.
|
||||||
alternates_stack: Vec<usize>,
|
alternates_stack: Vec<usize>,
|
||||||
// The set of active alternation branches being parsed.
|
/// The set of active alternation branches being parsed.
|
||||||
// Tokens are added to the end of the last one.
|
/// Tokens are added to the end of the last one.
|
||||||
branches: Vec<Tokens>,
|
branches: Vec<Tokens>,
|
||||||
|
/// A character iterator over the glob pattern to parse.
|
||||||
chars: std::iter::Peekable<std::str::Chars<'a>>,
|
chars: std::iter::Peekable<std::str::Chars<'a>>,
|
||||||
|
/// The previous character seen.
|
||||||
prev: Option<char>,
|
prev: Option<char>,
|
||||||
|
/// The current character.
|
||||||
cur: Option<char>,
|
cur: Option<char>,
|
||||||
|
/// Whether we failed to find a closing `]` for a character
|
||||||
|
/// class. This can only be true when `GlobOptions::allow_unclosed_class`
|
||||||
|
/// is enabled. When enabled, it is impossible to ever parse another
|
||||||
|
/// character class with this glob. That's because classes cannot be
|
||||||
|
/// nested *and* the only way this happens is when there is never a `]`.
|
||||||
|
///
|
||||||
|
/// We track this state so that we don't end up spending quadratic time
|
||||||
|
/// trying to parse something like `[[[[[[[[[[[[[[[[[[[[[[[...`.
|
||||||
|
found_unclosed_class: bool,
|
||||||
|
/// Glob options, which may influence parsing.
|
||||||
opts: &'a GlobOptions,
|
opts: &'a GlobOptions,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -804,7 +841,7 @@ impl<'a> Parser<'a> {
|
|||||||
match c {
|
match c {
|
||||||
'?' => self.push_token(Token::Any)?,
|
'?' => self.push_token(Token::Any)?,
|
||||||
'*' => self.parse_star()?,
|
'*' => self.parse_star()?,
|
||||||
'[' => self.parse_class()?,
|
'[' if !self.found_unclosed_class => self.parse_class()?,
|
||||||
'{' => self.push_alternate()?,
|
'{' => self.push_alternate()?,
|
||||||
'}' => self.pop_alternate()?,
|
'}' => self.pop_alternate()?,
|
||||||
',' => self.parse_comma()?,
|
',' => self.parse_comma()?,
|
||||||
@@ -939,6 +976,11 @@ impl<'a> Parser<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_class(&mut self) -> Result<(), Error> {
|
fn parse_class(&mut self) -> Result<(), Error> {
|
||||||
|
// Save parser state for potential rollback to literal '[' parsing.
|
||||||
|
let saved_chars = self.chars.clone();
|
||||||
|
let saved_prev = self.prev;
|
||||||
|
let saved_cur = self.cur;
|
||||||
|
|
||||||
fn add_to_last_range(
|
fn add_to_last_range(
|
||||||
glob: &str,
|
glob: &str,
|
||||||
r: &mut (char, char),
|
r: &mut (char, char),
|
||||||
@@ -966,11 +1008,17 @@ impl<'a> Parser<'a> {
|
|||||||
let mut first = true;
|
let mut first = true;
|
||||||
let mut in_range = false;
|
let mut in_range = false;
|
||||||
loop {
|
loop {
|
||||||
let c = match self.bump() {
|
let Some(c) = self.bump() else {
|
||||||
Some(c) => c,
|
return if self.opts.allow_unclosed_class == true {
|
||||||
// The only way to successfully break this loop is to observe
|
self.chars = saved_chars;
|
||||||
// a ']'.
|
self.cur = saved_cur;
|
||||||
None => return Err(self.error(ErrorKind::UnclosedClass)),
|
self.prev = saved_prev;
|
||||||
|
self.found_unclosed_class = true;
|
||||||
|
|
||||||
|
self.push_token(Token::Literal('['))
|
||||||
|
} else {
|
||||||
|
Err(self.error(ErrorKind::UnclosedClass))
|
||||||
|
};
|
||||||
};
|
};
|
||||||
match c {
|
match c {
|
||||||
']' => {
|
']' => {
|
||||||
@@ -1055,6 +1103,7 @@ mod tests {
|
|||||||
litsep: Option<bool>,
|
litsep: Option<bool>,
|
||||||
bsesc: Option<bool>,
|
bsesc: Option<bool>,
|
||||||
ealtre: Option<bool>,
|
ealtre: Option<bool>,
|
||||||
|
unccls: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! syntax {
|
macro_rules! syntax {
|
||||||
@@ -1097,6 +1146,10 @@ mod tests {
|
|||||||
if let Some(ealtre) = $options.ealtre {
|
if let Some(ealtre) = $options.ealtre {
|
||||||
builder.empty_alternates(ealtre);
|
builder.empty_alternates(ealtre);
|
||||||
}
|
}
|
||||||
|
if let Some(unccls) = $options.unccls {
|
||||||
|
builder.allow_unclosed_class(unccls);
|
||||||
|
}
|
||||||
|
|
||||||
let pat = builder.build().unwrap();
|
let pat = builder.build().unwrap();
|
||||||
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
||||||
}
|
}
|
||||||
@@ -1242,24 +1295,73 @@ mod tests {
|
|||||||
syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
|
syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
|
||||||
syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
|
syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
|
||||||
|
|
||||||
const CASEI: Options =
|
const CASEI: Options = Options {
|
||||||
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
|
casei: Some(true),
|
||||||
const SLASHLIT: Options =
|
litsep: None,
|
||||||
Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
|
bsesc: None,
|
||||||
|
ealtre: None,
|
||||||
|
unccls: None,
|
||||||
|
};
|
||||||
|
const SLASHLIT: Options = Options {
|
||||||
|
casei: None,
|
||||||
|
litsep: Some(true),
|
||||||
|
bsesc: None,
|
||||||
|
ealtre: None,
|
||||||
|
unccls: None,
|
||||||
|
};
|
||||||
const NOBSESC: Options = Options {
|
const NOBSESC: Options = Options {
|
||||||
casei: None,
|
casei: None,
|
||||||
litsep: None,
|
litsep: None,
|
||||||
bsesc: Some(false),
|
bsesc: Some(false),
|
||||||
ealtre: None,
|
ealtre: None,
|
||||||
|
unccls: None,
|
||||||
|
};
|
||||||
|
const BSESC: Options = Options {
|
||||||
|
casei: None,
|
||||||
|
litsep: None,
|
||||||
|
bsesc: Some(true),
|
||||||
|
ealtre: None,
|
||||||
|
unccls: None,
|
||||||
};
|
};
|
||||||
const BSESC: Options =
|
|
||||||
Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
|
|
||||||
const EALTRE: Options = Options {
|
const EALTRE: Options = Options {
|
||||||
casei: None,
|
casei: None,
|
||||||
litsep: None,
|
litsep: None,
|
||||||
bsesc: Some(true),
|
bsesc: Some(true),
|
||||||
ealtre: Some(true),
|
ealtre: Some(true),
|
||||||
|
unccls: None,
|
||||||
};
|
};
|
||||||
|
const UNCCLS: Options = Options {
|
||||||
|
casei: None,
|
||||||
|
litsep: None,
|
||||||
|
bsesc: None,
|
||||||
|
ealtre: None,
|
||||||
|
unccls: Some(true),
|
||||||
|
};
|
||||||
|
|
||||||
|
toregex!(allow_unclosed_class_single, r"[", r"^\[$", &UNCCLS);
|
||||||
|
toregex!(allow_unclosed_class_many, r"[abc", r"^\[abc$", &UNCCLS);
|
||||||
|
toregex!(allow_unclosed_class_empty1, r"[]", r"^\[\]$", &UNCCLS);
|
||||||
|
toregex!(allow_unclosed_class_empty2, r"[][", r"^\[\]\[$", &UNCCLS);
|
||||||
|
toregex!(allow_unclosed_class_negated_unclosed, r"[!", r"^\[!$", &UNCCLS);
|
||||||
|
toregex!(allow_unclosed_class_negated_empty, r"[!]", r"^\[!\]$", &UNCCLS);
|
||||||
|
toregex!(
|
||||||
|
allow_unclosed_class_brace1,
|
||||||
|
r"{[abc,xyz}",
|
||||||
|
r"^(?:\[abc|xyz)$",
|
||||||
|
&UNCCLS
|
||||||
|
);
|
||||||
|
toregex!(
|
||||||
|
allow_unclosed_class_brace2,
|
||||||
|
r"{[abc,[xyz}",
|
||||||
|
r"^(?:\[abc|\[xyz)$",
|
||||||
|
&UNCCLS
|
||||||
|
);
|
||||||
|
toregex!(
|
||||||
|
allow_unclosed_class_brace3,
|
||||||
|
r"{[abc],[xyz}",
|
||||||
|
r"^(?:[abc]|\[xyz)$",
|
||||||
|
&UNCCLS
|
||||||
|
);
|
||||||
|
|
||||||
toregex!(re_empty, "", "^$");
|
toregex!(re_empty, "", "^$");
|
||||||
|
|
||||||
|
|||||||
@@ -308,6 +308,7 @@ pub struct GitignoreBuilder {
|
|||||||
root: PathBuf,
|
root: PathBuf,
|
||||||
globs: Vec<Glob>,
|
globs: Vec<Glob>,
|
||||||
case_insensitive: bool,
|
case_insensitive: bool,
|
||||||
|
allow_unclosed_class: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GitignoreBuilder {
|
impl GitignoreBuilder {
|
||||||
@@ -324,6 +325,7 @@ impl GitignoreBuilder {
|
|||||||
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
|
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
|
||||||
globs: vec![],
|
globs: vec![],
|
||||||
case_insensitive: false,
|
case_insensitive: false,
|
||||||
|
allow_unclosed_class: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -511,6 +513,7 @@ impl GitignoreBuilder {
|
|||||||
.literal_separator(true)
|
.literal_separator(true)
|
||||||
.case_insensitive(self.case_insensitive)
|
.case_insensitive(self.case_insensitive)
|
||||||
.backslash_escape(true)
|
.backslash_escape(true)
|
||||||
|
.allow_unclosed_class(self.allow_unclosed_class)
|
||||||
.build()
|
.build()
|
||||||
.map_err(|err| Error::Glob {
|
.map_err(|err| Error::Glob {
|
||||||
glob: Some(glob.original.clone()),
|
glob: Some(glob.original.clone()),
|
||||||
@@ -536,6 +539,26 @@ impl GitignoreBuilder {
|
|||||||
self.case_insensitive = yes;
|
self.case_insensitive = yes;
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Toggle whether unclosed character classes are allowed. When allowed,
|
||||||
|
/// a `[` without a matching `]` is treated literally instead of resulting
|
||||||
|
/// in a parse error.
|
||||||
|
///
|
||||||
|
/// For example, if this is set then the glob `[abc` will be treated as the
|
||||||
|
/// literal string `[abc` instead of returning an error.
|
||||||
|
///
|
||||||
|
/// By default, this is true in order to match established `gitignore`
|
||||||
|
/// semantics. Generally speaking, enabling this leads to worse failure
|
||||||
|
/// modes since the glob parser becomes more permissive. You might want to
|
||||||
|
/// enable this when compatibility (e.g., with POSIX glob implementations)
|
||||||
|
/// is more important than good error messages.
|
||||||
|
pub fn allow_unclosed_class(
|
||||||
|
&mut self,
|
||||||
|
yes: bool,
|
||||||
|
) -> &mut GitignoreBuilder {
|
||||||
|
self.allow_unclosed_class = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the file path of the current environment's global gitignore file.
|
/// Return the file path of the current environment's global gitignore file.
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
/*!
|
/*!
|
||||||
The overrides module provides a way to specify a set of override globs.
|
The overrides module provides a way to specify a set of override globs.
|
||||||
|
|
||||||
This provides functionality similar to `--include` or `--exclude` in command
|
This provides functionality similar to `--include` or `--exclude` in command
|
||||||
line tools.
|
line tools.
|
||||||
*/
|
*/
|
||||||
@@ -120,7 +121,9 @@ impl OverrideBuilder {
|
|||||||
///
|
///
|
||||||
/// Matching is done relative to the directory path provided.
|
/// Matching is done relative to the directory path provided.
|
||||||
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
|
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
|
||||||
OverrideBuilder { builder: GitignoreBuilder::new(path) }
|
let mut builder = GitignoreBuilder::new(path);
|
||||||
|
builder.allow_unclosed_class(false);
|
||||||
|
OverrideBuilder { builder }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Builds a new override matcher from the globs added so far.
|
/// Builds a new override matcher from the globs added so far.
|
||||||
@@ -143,7 +146,8 @@ impl OverrideBuilder {
|
|||||||
|
|
||||||
/// Toggle whether the globs should be matched case insensitively or not.
|
/// Toggle whether the globs should be matched case insensitively or not.
|
||||||
///
|
///
|
||||||
/// When this option is changed, only globs added after the change will be affected.
|
/// When this option is changed, only globs added after the change will be
|
||||||
|
/// affected.
|
||||||
///
|
///
|
||||||
/// This is disabled by default.
|
/// This is disabled by default.
|
||||||
pub fn case_insensitive(
|
pub fn case_insensitive(
|
||||||
@@ -155,6 +159,28 @@ impl OverrideBuilder {
|
|||||||
self.builder.case_insensitive(yes)?;
|
self.builder.case_insensitive(yes)?;
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Toggle whether unclosed character classes are allowed. When allowed,
|
||||||
|
/// a `[` without a matching `]` is treated literally instead of resulting
|
||||||
|
/// in a parse error.
|
||||||
|
///
|
||||||
|
/// For example, if this is set then the glob `[abc` will be treated as the
|
||||||
|
/// literal string `[abc` instead of returning an error.
|
||||||
|
///
|
||||||
|
/// By default, this is false. Generally speaking, enabling this leads to
|
||||||
|
/// worse failure modes since the glob parser becomes more permissive. You
|
||||||
|
/// might want to enable this when compatibility (e.g., with POSIX glob
|
||||||
|
/// implementations) is more important than good error messages.
|
||||||
|
///
|
||||||
|
/// This default is different from the default for [`Gitignore`]. Namely,
|
||||||
|
/// [`Gitignore`] is intended to match git's behavior as-is. But this
|
||||||
|
/// abstraction for "override" globs does not necessarily conform to any
|
||||||
|
/// other known specification and instead prioritizes better error
|
||||||
|
/// messages.
|
||||||
|
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut OverrideBuilder {
|
||||||
|
self.builder.allow_unclosed_class(yes);
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -1519,3 +1519,28 @@ rgtest!(r3108_files_without_match_quiet_exit, |dir: Dir, _: TestCommand| {
|
|||||||
.stdout();
|
.stdout();
|
||||||
eqnice!("", got);
|
eqnice!("", got);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/3127
|
||||||
|
rgtest!(
|
||||||
|
r3127_gitignore_allow_unclosed_class,
|
||||||
|
|dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_dir(".git");
|
||||||
|
dir.create(".gitignore", "[abc");
|
||||||
|
dir.create("[abc", "");
|
||||||
|
dir.create("test", "");
|
||||||
|
|
||||||
|
let got = cmd.args(&["--files"]).stdout();
|
||||||
|
eqnice!("test\n", got);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/3127
|
||||||
|
rgtest!(
|
||||||
|
r3127_glob_flag_not_allow_unclosed_class,
|
||||||
|
|dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create("[abc", "");
|
||||||
|
dir.create("test", "");
|
||||||
|
|
||||||
|
cmd.args(&["--files", "-g", "[abc"]).assert_err();
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user