diff --git a/CHANGELOG.md b/CHANGELOG.md index d0d294d..aa92d4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ Bug fixes: * [BUG #1291](https://github.com/BurntSushi/ripgrep/issues/1291): ripgrep now works in non-existent directories. +* [BUG #1319](https://github.com/BurntSushi/ripgrep/issues/1319): + Fix match bug due to errant literal detection. * [**BUG #1335**](https://github.com/BurntSushi/ripgrep/issues/1335): Fixes a performance bug when searching plain text files with very long lines. This was a serious performance regression in some cases. diff --git a/grep-regex/src/literal.rs b/grep-regex/src/literal.rs index e4c8e65..52f0bc7 100644 --- a/grep-regex/src/literal.rs +++ b/grep-regex/src/literal.rs @@ -5,8 +5,6 @@ the regex engine doesn't look for inner literals. Since we're doing line based searching, we can use them, so we need to do it ourselves. */ -use std::cmp; - use regex_syntax::hir::{self, Hir, HirKind}; use regex_syntax::hir::literal::{Literal, Literals}; @@ -248,7 +246,7 @@ fn union_required(expr: &Hir, lits: &mut Literals) { fn repeat_range_literals( e: &Hir, min: u32, - max: Option, + _max: Option, _greedy: bool, lits: &mut Literals, mut f: F, @@ -259,19 +257,13 @@ fn repeat_range_literals( // just treat it as `e*`. lits.cut(); } else { - let n = cmp::min(lits.limit_size(), min as usize); // We only extract literals from a single repetition, even though // we could do more. e.g., `a{3}` will have `a` extracted instead of // `aaa`. The reason is that inner literal extraction can't be unioned // across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}` // is wrong. f(e, lits); - if n < min as usize { - lits.cut(); - } - if max.map_or(true, |max| min < max) { - lits.cut(); - } + lits.cut(); } } @@ -383,4 +375,13 @@ mod tests { // assert_eq!(one_regex(r"a.*c"), pat("a")); assert_eq!(one_regex(r"a(.*c)"), pat("a")); } + + #[test] + fn regression_1319() { + // Regression from: + // https://github.com/BurntSushi/ripgrep/issues/1319 + assert_eq!(one_regex(r"TTGAGTCCAGGAG[ATCG]{2}C"), + pat("TTGAGTCCAGGAGA|TTGAGTCCAGGAGC|\ + TTGAGTCCAGGAGG|TTGAGTCCAGGAGT")); + } } diff --git a/tests/regression.rs b/tests/regression.rs index 6d92574..72f5859 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -729,6 +729,14 @@ rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| { eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout()); }); +// See: https://github.com/BurntSushi/ripgrep/issues/1319 +rgtest!(r1319, |dir: Dir, mut cmd: TestCommand| { + dir.create("input", "CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC"); + eqnice!( + "input:CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC\n", + cmd.arg("TTGAGTCCAGGAG[ATCG]{2}C").stdout()); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/1334 rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| { dir.create("patterns", &"1.208.0.0/12\n".repeat(40));