searcher: move "max matches" from printer to searcher
This is a bit of a brutal change, but I believe is necessary in order to fix a bug in how we handle the "max matches" limit in multi-line mode while simultaneously handling context lines correctly. The main problem here is that "max matches" refers to the shorter of "one match per line" or "a single match." In typical grep, matches *can't* span multiple lines, so there's never a difference. But in multi-line mode, they can. So match counts necessarily must be handled differently for multi-line mode. The printer was previously responsible for this. But for $reasons, the printer is fundamentally not in charge of how matches are found and reported. See my comments in #3094 for even more context. This is a breaking change for `grep-printer`. Fixes #3076, Closes #3094
This commit is contained in:
committed by
Andrew Gallant
parent
a60e62d9ac
commit
a6e0be3c90
@@ -25,6 +25,8 @@ Bug fixes:
|
||||
Fix a bug where the "bytes searched" in `--stats` output could be incorrect.
|
||||
* [BUG #2990](https://github.com/BurntSushi/ripgrep/issues/2990):
|
||||
Fix a bug where ripgrep would mishandle globs that ended with a `.`.
|
||||
* [BUG #3076](https://github.com/BurntSushi/ripgrep/issues/3076):
|
||||
Fix bug with `-m/--max-count` and `-U/--multiline` showing too many matches.
|
||||
* [BUG #3108](https://github.com/BurntSushi/ripgrep/issues/3108):
|
||||
Fix a bug where `-q --files-without-match` inverted the exit code.
|
||||
* [BUG #3140](https://github.com/BurntSushi/ripgrep/issues/3140):
|
||||
|
||||
@@ -3877,6 +3877,10 @@ impl Flag for MaxCount {
|
||||
r"
|
||||
Limit the number of matching lines per file searched to \fINUM\fP.
|
||||
.sp
|
||||
When \flag{multiline} is used, a single match that spans multiple lines is only
|
||||
counted once for the purposes of this limit. Multiple matches in a single line
|
||||
are counted only once, as they would be in non-multiline mode.
|
||||
.sp
|
||||
Note that \fB0\fP is a legal value but not likely to be useful. When used,
|
||||
ripgrep won't search anything.
|
||||
"
|
||||
|
||||
@@ -617,7 +617,6 @@ impl HiArgs {
|
||||
.hyperlink(self.hyperlink_config.clone())
|
||||
.max_columns_preview(self.max_columns_preview)
|
||||
.max_columns(self.max_columns)
|
||||
.max_matches(self.max_count)
|
||||
.only_matching(self.only_matching)
|
||||
.path(self.with_filename)
|
||||
.path_terminator(self.path_terminator.clone())
|
||||
@@ -719,6 +718,7 @@ impl HiArgs {
|
||||
};
|
||||
let mut builder = grep::searcher::SearcherBuilder::new();
|
||||
builder
|
||||
.max_matches(self.max_count)
|
||||
.line_terminator(line_term)
|
||||
.invert_match(self.invert_match)
|
||||
.line_number(self.line_number)
|
||||
|
||||
@@ -11,8 +11,7 @@ use {
|
||||
bstr::ByteSlice,
|
||||
grep_matcher::{Match, Matcher},
|
||||
grep_searcher::{
|
||||
LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish,
|
||||
SinkMatch,
|
||||
LineStep, Searcher, Sink, SinkContext, SinkFinish, SinkMatch,
|
||||
},
|
||||
termcolor::{ColorSpec, NoColor, WriteColor},
|
||||
};
|
||||
@@ -46,7 +45,6 @@ struct Config {
|
||||
replacement: Arc<Option<Vec<u8>>>,
|
||||
max_columns: Option<u64>,
|
||||
max_columns_preview: bool,
|
||||
max_matches: Option<u64>,
|
||||
column: bool,
|
||||
byte_offset: bool,
|
||||
trim_ascii: bool,
|
||||
@@ -72,7 +70,6 @@ impl Default for Config {
|
||||
replacement: Arc::new(None),
|
||||
max_columns: None,
|
||||
max_columns_preview: false,
|
||||
max_matches: None,
|
||||
column: false,
|
||||
byte_offset: false,
|
||||
trim_ascii: false,
|
||||
@@ -326,16 +323,6 @@ impl StandardBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the maximum amount of matching lines that are printed.
|
||||
///
|
||||
/// If multi line search is enabled and a match spans multiple lines, then
|
||||
/// that match is counted exactly once for the purposes of enforcing this
|
||||
/// limit, regardless of how many lines it spans.
|
||||
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut StandardBuilder {
|
||||
self.config.max_matches = limit;
|
||||
self
|
||||
}
|
||||
|
||||
/// Print the column number of the first match in a line.
|
||||
///
|
||||
/// This option is convenient for use with `per_match` which will print a
|
||||
@@ -541,7 +528,6 @@ impl<W: WriteColor> Standard<W> {
|
||||
path: None,
|
||||
start_time: Instant::now(),
|
||||
match_count: 0,
|
||||
after_context_remaining: 0,
|
||||
binary_byte_offset: None,
|
||||
stats,
|
||||
needs_match_granularity,
|
||||
@@ -578,7 +564,6 @@ impl<W: WriteColor> Standard<W> {
|
||||
path: Some(ppath),
|
||||
start_time: Instant::now(),
|
||||
match_count: 0,
|
||||
after_context_remaining: 0,
|
||||
binary_byte_offset: None,
|
||||
stats,
|
||||
needs_match_granularity,
|
||||
@@ -659,7 +644,6 @@ pub struct StandardSink<'p, 's, M: Matcher, W> {
|
||||
path: Option<PrinterPath<'p>>,
|
||||
start_time: Instant,
|
||||
match_count: u64,
|
||||
after_context_remaining: u64,
|
||||
binary_byte_offset: Option<u64>,
|
||||
stats: Option<Stats>,
|
||||
needs_match_granularity: bool,
|
||||
@@ -774,32 +758,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns true if this printer should quit.
|
||||
///
|
||||
/// This implements the logic for handling quitting after seeing a certain
|
||||
/// amount of matches. In most cases, the logic is simple, but we must
|
||||
/// permit all "after" contextual lines to print after reaching the limit.
|
||||
fn should_quit(&self) -> bool {
|
||||
let limit = match self.standard.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
if self.match_count < limit {
|
||||
return false;
|
||||
}
|
||||
self.after_context_remaining == 0
|
||||
}
|
||||
|
||||
/// Returns whether the current match count exceeds the configured limit.
|
||||
/// If there is no limit, then this always returns false.
|
||||
fn match_more_than_limit(&self) -> bool {
|
||||
let limit = match self.standard.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
self.match_count > limit
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
@@ -811,19 +769,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
mat: &SinkMatch<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.match_count += 1;
|
||||
// When we've exceeded our match count, then the remaining context
|
||||
// lines should not be reset, but instead, decremented. This avoids a
|
||||
// bug where we display more matches than a configured limit. The main
|
||||
// idea here is that 'matched' might be called again while printing
|
||||
// an after-context line. In that case, we should treat this as a
|
||||
// contextual line rather than a matching line for the purposes of
|
||||
// termination.
|
||||
if self.match_more_than_limit() {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
} else {
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
}
|
||||
|
||||
self.record_matches(
|
||||
searcher,
|
||||
@@ -841,9 +786,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
StandardImpl::from_match(searcher, self, mat).sink()?;
|
||||
Ok(!self.should_quit())
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn context(
|
||||
@@ -854,10 +798,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
self.standard.matches.clear();
|
||||
self.replacer.clear();
|
||||
|
||||
if ctx.kind() == &SinkContextKind::After {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
}
|
||||
if searcher.invert_match() {
|
||||
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
@@ -869,7 +809,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
}
|
||||
|
||||
StandardImpl::from_context(searcher, self, ctx).sink()?;
|
||||
Ok(!self.should_quit())
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn context_break(
|
||||
@@ -902,11 +842,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
self.standard.wtr.borrow_mut().reset_count();
|
||||
self.start_time = Instant::now();
|
||||
self.match_count = 0;
|
||||
self.after_context_remaining = 0;
|
||||
self.binary_byte_offset = None;
|
||||
if self.standard.config.max_matches == Some(0) {
|
||||
return Ok(false);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
@@ -1450,7 +1386,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
}
|
||||
|
||||
fn write_binary_message(&self, offset: u64) -> io::Result<()> {
|
||||
if self.sink.match_count == 0 {
|
||||
if !self.sink.has_match() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -2742,11 +2678,10 @@ and exhibited clearly, with a label attached.
|
||||
#[test]
|
||||
fn max_matches() {
|
||||
let matcher = RegexMatcher::new("Sherlock").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(false)
|
||||
.max_matches(Some(1))
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
@@ -2766,10 +2701,9 @@ For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
fn max_matches_context() {
|
||||
// after context: 1
|
||||
let matcher = RegexMatcher::new("Doctor Watsons").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.line_number(false)
|
||||
.after_context(1)
|
||||
.build()
|
||||
@@ -2788,10 +2722,9 @@ Holmeses, success in the province of detective work must always
|
||||
assert_eq_printed!(expected, got);
|
||||
|
||||
// after context: 4
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.line_number(false)
|
||||
.after_context(4)
|
||||
.build()
|
||||
@@ -2814,10 +2747,9 @@ but Doctor Watson has to have it taken out for him and dusted,
|
||||
|
||||
// after context: 1, max matches: 2
|
||||
let matcher = RegexMatcher::new("Doctor Watsons|but Doctor").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(2))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.max_matches(Some(2))
|
||||
.line_number(false)
|
||||
.after_context(1)
|
||||
.build()
|
||||
@@ -2839,10 +2771,114 @@ and exhibited clearly, with a label attached.
|
||||
assert_eq_printed!(expected, got);
|
||||
|
||||
// after context: 4, max matches: 2
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(2))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.max_matches(Some(2))
|
||||
.line_number(false)
|
||||
.after_context(4)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_matches_context_invert() {
|
||||
// after context: 1
|
||||
let matcher =
|
||||
RegexMatcher::new("success|extent|clew|dusted|exhibited").unwrap();
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.invert_match(true)
|
||||
.max_matches(Some(1))
|
||||
.line_number(false)
|
||||
.after_context(1)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
|
||||
// after context: 4
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.invert_match(true)
|
||||
.max_matches(Some(1))
|
||||
.line_number(false)
|
||||
.after_context(4)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
|
||||
// after context: 1, max matches: 2
|
||||
let matcher =
|
||||
RegexMatcher::new("success|extent|clew|exhibited").unwrap();
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.invert_match(true)
|
||||
.max_matches(Some(2))
|
||||
.line_number(false)
|
||||
.after_context(1)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
--
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
|
||||
// after context: 4, max matches: 2
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.invert_match(true)
|
||||
.max_matches(Some(2))
|
||||
.line_number(false)
|
||||
.after_context(4)
|
||||
.build()
|
||||
@@ -2868,12 +2904,11 @@ and exhibited clearly, with a label attached.
|
||||
#[test]
|
||||
fn max_matches_multi_line1() {
|
||||
let matcher = RegexMatcher::new("(?s:.{0})Sherlock").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(false)
|
||||
.multi_line(true)
|
||||
.max_matches(Some(1))
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
@@ -2893,12 +2928,11 @@ For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
fn max_matches_multi_line2() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(false)
|
||||
.multi_line(true)
|
||||
.max_matches(Some(1))
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
@@ -2915,6 +2949,55 @@ Holmeses, success in the province of detective work must always
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_matches_multi_line3() {
|
||||
let matcher = RegexMatcher::new(r"line 2\nline 3").unwrap();
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(false)
|
||||
.multi_line(true)
|
||||
.max_matches(Some(1))
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"line 2\nline 3 x\nline 2\nline 3\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
line 2
|
||||
line 3 x
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_matches_multi_line4() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"line 2\nline 3|x\nline 2\n").unwrap();
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(false)
|
||||
.multi_line(true)
|
||||
.max_matches(Some(1))
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"line 2\nline 3 x\nline 2\nline 3 x\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
line 2
|
||||
line 3 x
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_matching() {
|
||||
let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap();
|
||||
@@ -3847,10 +3930,9 @@ e
|
||||
";
|
||||
|
||||
let matcher = RegexMatcherBuilder::new().build(r"d").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.line_number(true)
|
||||
.after_context(2)
|
||||
.build()
|
||||
|
||||
@@ -33,6 +33,7 @@ pub(crate) struct Core<'s, M: 's, S> {
|
||||
after_context_left: usize,
|
||||
has_sunk: bool,
|
||||
has_matched: bool,
|
||||
count: u64,
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
@@ -59,6 +60,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
after_context_left: 0,
|
||||
has_sunk: false,
|
||||
has_matched: false,
|
||||
count: 0,
|
||||
};
|
||||
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
||||
if core.is_line_by_line_fast() {
|
||||
@@ -78,6 +80,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
self.pos = pos;
|
||||
}
|
||||
|
||||
fn count(&self) -> u64 {
|
||||
self.count
|
||||
}
|
||||
|
||||
fn increment_count(&mut self) {
|
||||
self.count += 1;
|
||||
}
|
||||
|
||||
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset.map(|offset| offset as u64)
|
||||
}
|
||||
@@ -101,6 +111,47 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
self.sink.binary_data(&self.searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
fn is_match(&self, line: &[u8]) -> Result<bool, S::Error> {
|
||||
// We need to strip the line terminator here to match the
|
||||
// semantics of line-by-line searching. Namely, regexes
|
||||
// like `(?m)^$` can match at the final position beyond a
|
||||
// line terminator, which is non-sensical in line oriented
|
||||
// matching.
|
||||
let line = lines::without_terminator(line, self.config.line_term);
|
||||
self.matcher.is_match(line).map_err(S::Error::error_message)
|
||||
}
|
||||
|
||||
pub(crate) fn find(
|
||||
&mut self,
|
||||
slice: &[u8],
|
||||
) -> Result<Option<Range>, S::Error> {
|
||||
if self.has_exceeded_match_limit() {
|
||||
return Ok(None);
|
||||
}
|
||||
match self.matcher().find(slice) {
|
||||
Err(err) => Err(S::Error::error_message(err)),
|
||||
Ok(None) => Ok(None),
|
||||
Ok(Some(m)) => {
|
||||
self.increment_count();
|
||||
Ok(Some(m))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn shortest_match(
|
||||
&mut self,
|
||||
slice: &[u8],
|
||||
) -> Result<Option<usize>, S::Error> {
|
||||
if self.has_exceeded_match_limit() {
|
||||
return Ok(None);
|
||||
}
|
||||
match self.matcher.shortest_match(slice) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(None) => return Ok(None),
|
||||
Ok(Some(m)) => Ok(Some(m)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
|
||||
self.sink.begin(&self.searcher)
|
||||
}
|
||||
@@ -226,6 +277,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
if self.after_context_left == 0 {
|
||||
return Ok(true);
|
||||
}
|
||||
let exceeded_match_limit = self.has_exceeded_match_limit();
|
||||
let range = Range::new(self.last_line_visited, upto);
|
||||
let mut stepper = LineStep::new(
|
||||
self.config.line_term.as_byte(),
|
||||
@@ -233,7 +285,16 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
range.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
if !self.sink_after_context(buf, &line)? {
|
||||
if exceeded_match_limit
|
||||
&& self.is_match(&buf[line])? != self.config.invert_match
|
||||
{
|
||||
let after_context_left = self.after_context_left;
|
||||
self.set_pos(line.end());
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.after_context_left = after_context_left - 1;
|
||||
} else if !self.sink_after_context(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
if self.after_context_left == 0 {
|
||||
@@ -272,6 +333,12 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
range.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
if self.has_exceeded_match_limit()
|
||||
&& !self.config.passthru
|
||||
&& self.after_context_left == 0
|
||||
{
|
||||
return Ok(false);
|
||||
}
|
||||
let matched = {
|
||||
// Stripping the line terminator is necessary to prevent some
|
||||
// classes of regexes from matching the empty position *after*
|
||||
@@ -281,15 +348,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&buf[line],
|
||||
self.config.line_term,
|
||||
);
|
||||
match self.matcher.shortest_match(slice) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(result) => result.is_some(),
|
||||
}
|
||||
self.shortest_match(slice)?.is_some()
|
||||
};
|
||||
self.set_pos(line.end());
|
||||
|
||||
let success = matched != self.config.invert_match;
|
||||
if success {
|
||||
self.has_matched = true;
|
||||
self.increment_count();
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
@@ -325,10 +391,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
if self.config.invert_match {
|
||||
if !self.match_by_line_fast_invert(buf)? {
|
||||
return Ok(Stop);
|
||||
break;
|
||||
}
|
||||
} else if let Some(line) = self.find_by_line_fast(buf)? {
|
||||
self.has_matched = true;
|
||||
self.increment_count();
|
||||
if self.config.max_context() > 0 {
|
||||
if !self.after_context_by_line(buf, line.start())? {
|
||||
return Ok(Stop);
|
||||
@@ -348,6 +415,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
if !self.after_context_by_line(buf, buf.len())? {
|
||||
return Ok(Stop);
|
||||
}
|
||||
if self.has_exceeded_match_limit() && self.after_context_left == 0 {
|
||||
return Ok(Stop);
|
||||
}
|
||||
self.set_pos(buf.len());
|
||||
Ok(Continue)
|
||||
}
|
||||
@@ -387,16 +457,20 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
invert_match.end(),
|
||||
);
|
||||
while let Some(line) = stepper.next_match(buf) {
|
||||
self.increment_count();
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
return Ok(false);
|
||||
}
|
||||
if self.has_exceeded_match_limit() {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn find_by_line_fast(
|
||||
&self,
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
) -> Result<Option<Range>, S::Error> {
|
||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||
@@ -404,6 +478,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
let mut pos = self.pos();
|
||||
while !buf[pos..].is_empty() {
|
||||
if self.has_exceeded_match_limit() {
|
||||
return Ok(None);
|
||||
}
|
||||
match self.matcher.find_candidate_line(&buf[pos..]) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(None) => return Ok(None),
|
||||
@@ -427,23 +504,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
self.config.line_term.as_byte(),
|
||||
Range::zero(i).offset(pos),
|
||||
);
|
||||
// We need to strip the line terminator here to match the
|
||||
// semantics of line-by-line searching. Namely, regexes
|
||||
// like `(?m)^$` can match at the final position beyond a
|
||||
// line terminator, which is non-sensical in line oriented
|
||||
// matching.
|
||||
let slice = lines::without_terminator(
|
||||
&buf[line],
|
||||
self.config.line_term,
|
||||
);
|
||||
match self.matcher.is_match(slice) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(true) => return Ok(Some(line)),
|
||||
Ok(false) => {
|
||||
pos = line.end();
|
||||
continue;
|
||||
}
|
||||
if self.is_match(&buf[line])? {
|
||||
return Ok(Some(line));
|
||||
}
|
||||
pos = line.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -638,4 +702,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn has_exceeded_match_limit(&self) -> bool {
|
||||
self.config.max_matches.map_or(false, |limit| self.count() >= limit)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -325,11 +325,9 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
}
|
||||
|
||||
fn find(&mut self) -> Result<Option<Range>, S::Error> {
|
||||
match self.core.matcher().find(&self.slice[self.core.pos()..]) {
|
||||
Err(err) => Err(S::Error::error_message(err)),
|
||||
Ok(None) => Ok(None),
|
||||
Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),
|
||||
}
|
||||
self.core
|
||||
.find(&self.slice[self.core.pos()..])
|
||||
.map(|m| m.map(|m| m.offset(self.core.pos())))
|
||||
}
|
||||
|
||||
/// Advance the search position based on the previous match.
|
||||
|
||||
@@ -180,6 +180,8 @@ pub struct Config {
|
||||
/// Whether to stop searching when a non-matching line is found after a
|
||||
/// matching line.
|
||||
stop_on_nonmatch: bool,
|
||||
/// The maximum number of matches this searcher should emit.
|
||||
max_matches: Option<u64>,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -198,6 +200,7 @@ impl Default for Config {
|
||||
encoding: None,
|
||||
bom_sniffing: true,
|
||||
stop_on_nonmatch: false,
|
||||
max_matches: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -564,6 +567,23 @@ impl SearcherBuilder {
|
||||
self.config.stop_on_nonmatch = stop_on_nonmatch;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the maximum number of matches that should be emitted by this
|
||||
/// searcher.
|
||||
///
|
||||
/// If multi line search is enabled and a match spans multiple lines, then
|
||||
/// that match is counted exactly once for the purposes of enforcing this
|
||||
/// limit, regardless of how many lines it spans.
|
||||
///
|
||||
/// Note that `0` is a legal value. This will cause the searcher to
|
||||
/// immediately quick without searching anything.
|
||||
///
|
||||
/// By default, no limit is set.
|
||||
#[inline]
|
||||
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut SearcherBuilder {
|
||||
self.config.max_matches = limit;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A searcher executes searches over a haystack and writes results to a caller
|
||||
@@ -845,13 +865,27 @@ impl Searcher {
|
||||
self.config.multi_line
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher is configured to stop when in
|
||||
/// Returns true if and only if this searcher is configured to stop when it
|
||||
/// finds a non-matching line after a matching one.
|
||||
#[inline]
|
||||
pub fn stop_on_nonmatch(&self) -> bool {
|
||||
self.config.stop_on_nonmatch
|
||||
}
|
||||
|
||||
/// Returns the maximum number of matches emitted by this searcher, if
|
||||
/// such a limit was set.
|
||||
///
|
||||
/// If multi line search is enabled and a match spans multiple lines, then
|
||||
/// that match is counted exactly once for the purposes of enforcing this
|
||||
/// limit, regardless of how many lines it spans.
|
||||
///
|
||||
/// Note that `0` is a legal value. This will cause the searcher to
|
||||
/// immediately quick without searching anything.
|
||||
#[inline]
|
||||
pub fn max_matches(&self) -> Option<u64> {
|
||||
self.config.max_matches
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher will choose a multi-line
|
||||
/// strategy given the provided matcher.
|
||||
///
|
||||
|
||||
@@ -1459,6 +1459,7 @@ rgtest!(r2658_null_data_line_regexp, |dir: Dir, mut cmd: TestCommand| {
|
||||
rgtest!(r2944_incorrect_bytes_searched, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("haystack", "foo1\nfoo2\nfoo3\nfoo4\nfoo5\n");
|
||||
let got = cmd.args(&["--stats", "-m2", "foo", "."]).stdout();
|
||||
println!("{got}");
|
||||
assert!(got.contains("10 bytes searched\n"));
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user