diff --git a/CHANGELOG.md b/CHANGELOG.md index 89a9639..dcc23d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ Bug fixes: Fix a bug where the "bytes searched" in `--stats` output could be incorrect. * [BUG #2990](https://github.com/BurntSushi/ripgrep/issues/2990): Fix a bug where ripgrep would mishandle globs that ended with a `.`. +* [BUG #3076](https://github.com/BurntSushi/ripgrep/issues/3076): + Fix bug with `-m/--max-count` and `-U/--multiline` showing too many matches. * [BUG #3108](https://github.com/BurntSushi/ripgrep/issues/3108): Fix a bug where `-q --files-without-match` inverted the exit code. * [BUG #3140](https://github.com/BurntSushi/ripgrep/issues/3140): diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 6e53a4f..c2320d7 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -3877,6 +3877,10 @@ impl Flag for MaxCount { r" Limit the number of matching lines per file searched to \fINUM\fP. .sp +When \flag{multiline} is used, a single match that spans multiple lines is only +counted once for the purposes of this limit. Multiple matches in a single line +are counted only once, as they would be in non-multiline mode. +.sp Note that \fB0\fP is a legal value but not likely to be useful. When used, ripgrep won't search anything. " diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index 7bf63e7..6f440ff 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -617,7 +617,6 @@ impl HiArgs { .hyperlink(self.hyperlink_config.clone()) .max_columns_preview(self.max_columns_preview) .max_columns(self.max_columns) - .max_matches(self.max_count) .only_matching(self.only_matching) .path(self.with_filename) .path_terminator(self.path_terminator.clone()) @@ -719,6 +718,7 @@ impl HiArgs { }; let mut builder = grep::searcher::SearcherBuilder::new(); builder + .max_matches(self.max_count) .line_terminator(line_term) .invert_match(self.invert_match) .line_number(self.line_number) diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index b6f62ff..494901a 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -11,8 +11,7 @@ use { bstr::ByteSlice, grep_matcher::{Match, Matcher}, grep_searcher::{ - LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, - SinkMatch, + LineStep, Searcher, Sink, SinkContext, SinkFinish, SinkMatch, }, termcolor::{ColorSpec, NoColor, WriteColor}, }; @@ -46,7 +45,6 @@ struct Config { replacement: Arc>>, max_columns: Option, max_columns_preview: bool, - max_matches: Option, column: bool, byte_offset: bool, trim_ascii: bool, @@ -72,7 +70,6 @@ impl Default for Config { replacement: Arc::new(None), max_columns: None, max_columns_preview: false, - max_matches: None, column: false, byte_offset: false, trim_ascii: false, @@ -326,16 +323,6 @@ impl StandardBuilder { self } - /// Set the maximum amount of matching lines that are printed. - /// - /// If multi line search is enabled and a match spans multiple lines, then - /// that match is counted exactly once for the purposes of enforcing this - /// limit, regardless of how many lines it spans. - pub fn max_matches(&mut self, limit: Option) -> &mut StandardBuilder { - self.config.max_matches = limit; - self - } - /// Print the column number of the first match in a line. /// /// This option is convenient for use with `per_match` which will print a @@ -541,7 +528,6 @@ impl Standard { path: None, start_time: Instant::now(), match_count: 0, - after_context_remaining: 0, binary_byte_offset: None, stats, needs_match_granularity, @@ -578,7 +564,6 @@ impl Standard { path: Some(ppath), start_time: Instant::now(), match_count: 0, - after_context_remaining: 0, binary_byte_offset: None, stats, needs_match_granularity, @@ -659,7 +644,6 @@ pub struct StandardSink<'p, 's, M: Matcher, W> { path: Option>, start_time: Instant, match_count: u64, - after_context_remaining: u64, binary_byte_offset: Option, stats: Option, needs_match_granularity: bool, @@ -774,32 +758,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> { } Ok(()) } - - /// Returns true if this printer should quit. - /// - /// This implements the logic for handling quitting after seeing a certain - /// amount of matches. In most cases, the logic is simple, but we must - /// permit all "after" contextual lines to print after reaching the limit. - fn should_quit(&self) -> bool { - let limit = match self.standard.config.max_matches { - None => return false, - Some(limit) => limit, - }; - if self.match_count < limit { - return false; - } - self.after_context_remaining == 0 - } - - /// Returns whether the current match count exceeds the configured limit. - /// If there is no limit, then this always returns false. - fn match_more_than_limit(&self) -> bool { - let limit = match self.standard.config.max_matches { - None => return false, - Some(limit) => limit, - }; - self.match_count > limit - } } impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { @@ -811,19 +769,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { mat: &SinkMatch<'_>, ) -> Result { self.match_count += 1; - // When we've exceeded our match count, then the remaining context - // lines should not be reset, but instead, decremented. This avoids a - // bug where we display more matches than a configured limit. The main - // idea here is that 'matched' might be called again while printing - // an after-context line. In that case, we should treat this as a - // contextual line rather than a matching line for the purposes of - // termination. - if self.match_more_than_limit() { - self.after_context_remaining = - self.after_context_remaining.saturating_sub(1); - } else { - self.after_context_remaining = searcher.after_context() as u64; - } self.record_matches( searcher, @@ -841,9 +786,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { return Ok(false); } } - StandardImpl::from_match(searcher, self, mat).sink()?; - Ok(!self.should_quit()) + Ok(true) } fn context( @@ -854,10 +798,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { self.standard.matches.clear(); self.replacer.clear(); - if ctx.kind() == &SinkContextKind::After { - self.after_context_remaining = - self.after_context_remaining.saturating_sub(1); - } if searcher.invert_match() { self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?; self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?; @@ -869,7 +809,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { } StandardImpl::from_context(searcher, self, ctx).sink()?; - Ok(!self.should_quit()) + Ok(true) } fn context_break( @@ -902,11 +842,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { self.standard.wtr.borrow_mut().reset_count(); self.start_time = Instant::now(); self.match_count = 0; - self.after_context_remaining = 0; self.binary_byte_offset = None; - if self.standard.config.max_matches == Some(0) { - return Ok(false); - } Ok(true) } @@ -1450,7 +1386,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { } fn write_binary_message(&self, offset: u64) -> io::Result<()> { - if self.sink.match_count == 0 { + if !self.sink.has_match() { return Ok(()); } @@ -2742,11 +2678,10 @@ and exhibited clearly, with a label attached. #[test] fn max_matches() { let matcher = RegexMatcher::new("Sherlock").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) + .max_matches(Some(1)) .build() .search_reader( &matcher, @@ -2766,10 +2701,9 @@ For the Doctor Watsons of this world, as opposed to the Sherlock fn max_matches_context() { // after context: 1 let matcher = RegexMatcher::new("Doctor Watsons").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(1)) .line_number(false) .after_context(1) .build() @@ -2788,10 +2722,9 @@ Holmeses, success in the province of detective work must always assert_eq_printed!(expected, got); // after context: 4 - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(1)) .line_number(false) .after_context(4) .build() @@ -2814,10 +2747,9 @@ but Doctor Watson has to have it taken out for him and dusted, // after context: 1, max matches: 2 let matcher = RegexMatcher::new("Doctor Watsons|but Doctor").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(2)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(2)) .line_number(false) .after_context(1) .build() @@ -2839,10 +2771,114 @@ and exhibited clearly, with a label attached. assert_eq_printed!(expected, got); // after context: 4, max matches: 2 - let mut printer = StandardBuilder::new() - .max_matches(Some(2)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(2)) + .line_number(false) + .after_context(4) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_context_invert() { + // after context: 1 + let matcher = + RegexMatcher::new("success|extent|clew|dusted|exhibited").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(1)) + .line_number(false) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +"; + assert_eq_printed!(expected, got); + + // after context: 4 + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(1)) + .line_number(false) + .after_context(4) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + + // after context: 1, max matches: 2 + let matcher = + RegexMatcher::new("success|extent|clew|exhibited").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(2)) + .line_number(false) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + + // after context: 4, max matches: 2 + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(2)) .line_number(false) .after_context(4) .build() @@ -2868,12 +2904,11 @@ and exhibited clearly, with a label attached. #[test] fn max_matches_multi_line1() { let matcher = RegexMatcher::new("(?s:.{0})Sherlock").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) + .max_matches(Some(1)) .build() .search_reader( &matcher, @@ -2893,12 +2928,11 @@ For the Doctor Watsons of this world, as opposed to the Sherlock fn max_matches_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) + .max_matches(Some(1)) .build() .search_reader( &matcher, @@ -2915,6 +2949,55 @@ Holmeses, success in the province of detective work must always assert_eq_printed!(expected, got); } + #[test] + fn max_matches_multi_line3() { + let matcher = RegexMatcher::new(r"line 2\nline 3").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .max_matches(Some(1)) + .build() + .search_reader( + &matcher, + "line 2\nline 3 x\nline 2\nline 3\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +line 2 +line 3 x +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_multi_line4() { + let matcher = + RegexMatcher::new(r"line 2\nline 3|x\nline 2\n").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .max_matches(Some(1)) + .build() + .search_reader( + &matcher, + "line 2\nline 3 x\nline 2\nline 3 x\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +line 2 +line 3 x +"; + assert_eq_printed!(expected, got); + } + #[test] fn only_matching() { let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); @@ -3847,10 +3930,9 @@ e "; let matcher = RegexMatcherBuilder::new().build(r"d").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(1)) .line_number(true) .after_context(2) .build() diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs index 7d7e5cd..87eaf7f 100644 --- a/crates/searcher/src/searcher/core.rs +++ b/crates/searcher/src/searcher/core.rs @@ -33,6 +33,7 @@ pub(crate) struct Core<'s, M: 's, S> { after_context_left: usize, has_sunk: bool, has_matched: bool, + count: u64, } impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { @@ -59,6 +60,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { after_context_left: 0, has_sunk: false, has_matched: false, + count: 0, }; if !core.searcher.multi_line_with_matcher(&core.matcher) { if core.is_line_by_line_fast() { @@ -78,6 +80,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.pos = pos; } + fn count(&self) -> u64 { + self.count + } + + fn increment_count(&mut self) { + self.count += 1; + } + pub(crate) fn binary_byte_offset(&self) -> Option { self.binary_byte_offset.map(|offset| offset as u64) } @@ -101,6 +111,47 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.sink.binary_data(&self.searcher, binary_byte_offset) } + fn is_match(&self, line: &[u8]) -> Result { + // We need to strip the line terminator here to match the + // semantics of line-by-line searching. Namely, regexes + // like `(?m)^$` can match at the final position beyond a + // line terminator, which is non-sensical in line oriented + // matching. + let line = lines::without_terminator(line, self.config.line_term); + self.matcher.is_match(line).map_err(S::Error::error_message) + } + + pub(crate) fn find( + &mut self, + slice: &[u8], + ) -> Result, S::Error> { + if self.has_exceeded_match_limit() { + return Ok(None); + } + match self.matcher().find(slice) { + Err(err) => Err(S::Error::error_message(err)), + Ok(None) => Ok(None), + Ok(Some(m)) => { + self.increment_count(); + Ok(Some(m)) + } + } + } + + fn shortest_match( + &mut self, + slice: &[u8], + ) -> Result, S::Error> { + if self.has_exceeded_match_limit() { + return Ok(None); + } + match self.matcher.shortest_match(slice) { + Err(err) => return Err(S::Error::error_message(err)), + Ok(None) => return Ok(None), + Ok(Some(m)) => Ok(Some(m)), + } + } + pub(crate) fn begin(&mut self) -> Result { self.sink.begin(&self.searcher) } @@ -226,6 +277,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if self.after_context_left == 0 { return Ok(true); } + let exceeded_match_limit = self.has_exceeded_match_limit(); let range = Range::new(self.last_line_visited, upto); let mut stepper = LineStep::new( self.config.line_term.as_byte(), @@ -233,7 +285,16 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { range.end(), ); while let Some(line) = stepper.next_match(buf) { - if !self.sink_after_context(buf, &line)? { + if exceeded_match_limit + && self.is_match(&buf[line])? != self.config.invert_match + { + let after_context_left = self.after_context_left; + self.set_pos(line.end()); + if !self.sink_matched(buf, &line)? { + return Ok(false); + } + self.after_context_left = after_context_left - 1; + } else if !self.sink_after_context(buf, &line)? { return Ok(false); } if self.after_context_left == 0 { @@ -272,6 +333,12 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { range.end(), ); while let Some(line) = stepper.next_match(buf) { + if self.has_exceeded_match_limit() + && !self.config.passthru + && self.after_context_left == 0 + { + return Ok(false); + } let matched = { // Stripping the line terminator is necessary to prevent some // classes of regexes from matching the empty position *after* @@ -281,15 +348,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { &buf[line], self.config.line_term, ); - match self.matcher.shortest_match(slice) { - Err(err) => return Err(S::Error::error_message(err)), - Ok(result) => result.is_some(), - } + self.shortest_match(slice)?.is_some() }; self.set_pos(line.end()); + let success = matched != self.config.invert_match; if success { self.has_matched = true; + self.increment_count(); if !self.before_context_by_line(buf, line.start())? { return Ok(false); } @@ -325,10 +391,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } if self.config.invert_match { if !self.match_by_line_fast_invert(buf)? { - return Ok(Stop); + break; } } else if let Some(line) = self.find_by_line_fast(buf)? { self.has_matched = true; + self.increment_count(); if self.config.max_context() > 0 { if !self.after_context_by_line(buf, line.start())? { return Ok(Stop); @@ -348,6 +415,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if !self.after_context_by_line(buf, buf.len())? { return Ok(Stop); } + if self.has_exceeded_match_limit() && self.after_context_left == 0 { + return Ok(Stop); + } self.set_pos(buf.len()); Ok(Continue) } @@ -387,16 +457,20 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { invert_match.end(), ); while let Some(line) = stepper.next_match(buf) { + self.increment_count(); if !self.sink_matched(buf, &line)? { return Ok(false); } + if self.has_exceeded_match_limit() { + return Ok(false); + } } Ok(true) } #[inline(always)] fn find_by_line_fast( - &self, + &mut self, buf: &[u8], ) -> Result, S::Error> { debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); @@ -404,6 +478,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { let mut pos = self.pos(); while !buf[pos..].is_empty() { + if self.has_exceeded_match_limit() { + return Ok(None); + } match self.matcher.find_candidate_line(&buf[pos..]) { Err(err) => return Err(S::Error::error_message(err)), Ok(None) => return Ok(None), @@ -427,23 +504,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.config.line_term.as_byte(), Range::zero(i).offset(pos), ); - // We need to strip the line terminator here to match the - // semantics of line-by-line searching. Namely, regexes - // like `(?m)^$` can match at the final position beyond a - // line terminator, which is non-sensical in line oriented - // matching. - let slice = lines::without_terminator( - &buf[line], - self.config.line_term, - ); - match self.matcher.is_match(slice) { - Err(err) => return Err(S::Error::error_message(err)), - Ok(true) => return Ok(Some(line)), - Ok(false) => { - pos = line.end(); - continue; - } + if self.is_match(&buf[line])? { + return Ok(Some(line)); } + pos = line.end(); } } } @@ -638,4 +702,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } false } + + fn has_exceeded_match_limit(&self) -> bool { + self.config.max_matches.map_or(false, |limit| self.count() >= limit) + } } diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs index 5db5701..8c15bb3 100644 --- a/crates/searcher/src/searcher/glue.rs +++ b/crates/searcher/src/searcher/glue.rs @@ -325,11 +325,9 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { } fn find(&mut self) -> Result, S::Error> { - match self.core.matcher().find(&self.slice[self.core.pos()..]) { - Err(err) => Err(S::Error::error_message(err)), - Ok(None) => Ok(None), - Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))), - } + self.core + .find(&self.slice[self.core.pos()..]) + .map(|m| m.map(|m| m.offset(self.core.pos()))) } /// Advance the search position based on the previous match. diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index c7d3b1d..ccd8945 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -180,6 +180,8 @@ pub struct Config { /// Whether to stop searching when a non-matching line is found after a /// matching line. stop_on_nonmatch: bool, + /// The maximum number of matches this searcher should emit. + max_matches: Option, } impl Default for Config { @@ -198,6 +200,7 @@ impl Default for Config { encoding: None, bom_sniffing: true, stop_on_nonmatch: false, + max_matches: None, } } } @@ -564,6 +567,23 @@ impl SearcherBuilder { self.config.stop_on_nonmatch = stop_on_nonmatch; self } + + /// Sets the maximum number of matches that should be emitted by this + /// searcher. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + /// + /// Note that `0` is a legal value. This will cause the searcher to + /// immediately quick without searching anything. + /// + /// By default, no limit is set. + #[inline] + pub fn max_matches(&mut self, limit: Option) -> &mut SearcherBuilder { + self.config.max_matches = limit; + self + } } /// A searcher executes searches over a haystack and writes results to a caller @@ -845,13 +865,27 @@ impl Searcher { self.config.multi_line } - /// Returns true if and only if this searcher is configured to stop when in + /// Returns true if and only if this searcher is configured to stop when it /// finds a non-matching line after a matching one. #[inline] pub fn stop_on_nonmatch(&self) -> bool { self.config.stop_on_nonmatch } + /// Returns the maximum number of matches emitted by this searcher, if + /// such a limit was set. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + /// + /// Note that `0` is a legal value. This will cause the searcher to + /// immediately quick without searching anything. + #[inline] + pub fn max_matches(&self) -> Option { + self.config.max_matches + } + /// Returns true if and only if this searcher will choose a multi-line /// strategy given the provided matcher. /// diff --git a/tests/regression.rs b/tests/regression.rs index 44675c4..ac806ae 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -1459,6 +1459,7 @@ rgtest!(r2658_null_data_line_regexp, |dir: Dir, mut cmd: TestCommand| { rgtest!(r2944_incorrect_bytes_searched, |dir: Dir, mut cmd: TestCommand| { dir.create("haystack", "foo1\nfoo2\nfoo3\nfoo4\nfoo5\n"); let got = cmd.args(&["--stats", "-m2", "foo", "."]).stdout(); + println!("{got}"); assert!(got.contains("10 bytes searched\n")); });