From a6e0be3c909c5c09e5fb402c907f3beb88cfb4c4 Mon Sep 17 00:00:00 2001 From: Pavel Safronov Date: Sun, 6 Jul 2025 17:37:07 +0000 Subject: [PATCH] searcher: move "max matches" from printer to searcher This is a bit of a brutal change, but I believe is necessary in order to fix a bug in how we handle the "max matches" limit in multi-line mode while simultaneously handling context lines correctly. The main problem here is that "max matches" refers to the shorter of "one match per line" or "a single match." In typical grep, matches *can't* span multiple lines, so there's never a difference. But in multi-line mode, they can. So match counts necessarily must be handled differently for multi-line mode. The printer was previously responsible for this. But for $reasons, the printer is fundamentally not in charge of how matches are found and reported. See my comments in #3094 for even more context. This is a breaking change for `grep-printer`. Fixes #3076, Closes #3094 --- CHANGELOG.md | 2 + crates/core/flags/defs.rs | 4 + crates/core/flags/hiargs.rs | 2 +- crates/printer/src/standard.rs | 266 ++++++++++++++++++--------- crates/searcher/src/searcher/core.rs | 114 +++++++++--- crates/searcher/src/searcher/glue.rs | 8 +- crates/searcher/src/searcher/mod.rs | 36 +++- tests/regression.rs | 1 + 8 files changed, 311 insertions(+), 122 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89a9639..dcc23d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ Bug fixes: Fix a bug where the "bytes searched" in `--stats` output could be incorrect. * [BUG #2990](https://github.com/BurntSushi/ripgrep/issues/2990): Fix a bug where ripgrep would mishandle globs that ended with a `.`. +* [BUG #3076](https://github.com/BurntSushi/ripgrep/issues/3076): + Fix bug with `-m/--max-count` and `-U/--multiline` showing too many matches. * [BUG #3108](https://github.com/BurntSushi/ripgrep/issues/3108): Fix a bug where `-q --files-without-match` inverted the exit code. * [BUG #3140](https://github.com/BurntSushi/ripgrep/issues/3140): diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 6e53a4f..c2320d7 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -3877,6 +3877,10 @@ impl Flag for MaxCount { r" Limit the number of matching lines per file searched to \fINUM\fP. .sp +When \flag{multiline} is used, a single match that spans multiple lines is only +counted once for the purposes of this limit. Multiple matches in a single line +are counted only once, as they would be in non-multiline mode. +.sp Note that \fB0\fP is a legal value but not likely to be useful. When used, ripgrep won't search anything. " diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index 7bf63e7..6f440ff 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -617,7 +617,6 @@ impl HiArgs { .hyperlink(self.hyperlink_config.clone()) .max_columns_preview(self.max_columns_preview) .max_columns(self.max_columns) - .max_matches(self.max_count) .only_matching(self.only_matching) .path(self.with_filename) .path_terminator(self.path_terminator.clone()) @@ -719,6 +718,7 @@ impl HiArgs { }; let mut builder = grep::searcher::SearcherBuilder::new(); builder + .max_matches(self.max_count) .line_terminator(line_term) .invert_match(self.invert_match) .line_number(self.line_number) diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index b6f62ff..494901a 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -11,8 +11,7 @@ use { bstr::ByteSlice, grep_matcher::{Match, Matcher}, grep_searcher::{ - LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, - SinkMatch, + LineStep, Searcher, Sink, SinkContext, SinkFinish, SinkMatch, }, termcolor::{ColorSpec, NoColor, WriteColor}, }; @@ -46,7 +45,6 @@ struct Config { replacement: Arc>>, max_columns: Option, max_columns_preview: bool, - max_matches: Option, column: bool, byte_offset: bool, trim_ascii: bool, @@ -72,7 +70,6 @@ impl Default for Config { replacement: Arc::new(None), max_columns: None, max_columns_preview: false, - max_matches: None, column: false, byte_offset: false, trim_ascii: false, @@ -326,16 +323,6 @@ impl StandardBuilder { self } - /// Set the maximum amount of matching lines that are printed. - /// - /// If multi line search is enabled and a match spans multiple lines, then - /// that match is counted exactly once for the purposes of enforcing this - /// limit, regardless of how many lines it spans. - pub fn max_matches(&mut self, limit: Option) -> &mut StandardBuilder { - self.config.max_matches = limit; - self - } - /// Print the column number of the first match in a line. /// /// This option is convenient for use with `per_match` which will print a @@ -541,7 +528,6 @@ impl Standard { path: None, start_time: Instant::now(), match_count: 0, - after_context_remaining: 0, binary_byte_offset: None, stats, needs_match_granularity, @@ -578,7 +564,6 @@ impl Standard { path: Some(ppath), start_time: Instant::now(), match_count: 0, - after_context_remaining: 0, binary_byte_offset: None, stats, needs_match_granularity, @@ -659,7 +644,6 @@ pub struct StandardSink<'p, 's, M: Matcher, W> { path: Option>, start_time: Instant, match_count: u64, - after_context_remaining: u64, binary_byte_offset: Option, stats: Option, needs_match_granularity: bool, @@ -774,32 +758,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> { } Ok(()) } - - /// Returns true if this printer should quit. - /// - /// This implements the logic for handling quitting after seeing a certain - /// amount of matches. In most cases, the logic is simple, but we must - /// permit all "after" contextual lines to print after reaching the limit. - fn should_quit(&self) -> bool { - let limit = match self.standard.config.max_matches { - None => return false, - Some(limit) => limit, - }; - if self.match_count < limit { - return false; - } - self.after_context_remaining == 0 - } - - /// Returns whether the current match count exceeds the configured limit. - /// If there is no limit, then this always returns false. - fn match_more_than_limit(&self) -> bool { - let limit = match self.standard.config.max_matches { - None => return false, - Some(limit) => limit, - }; - self.match_count > limit - } } impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { @@ -811,19 +769,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { mat: &SinkMatch<'_>, ) -> Result { self.match_count += 1; - // When we've exceeded our match count, then the remaining context - // lines should not be reset, but instead, decremented. This avoids a - // bug where we display more matches than a configured limit. The main - // idea here is that 'matched' might be called again while printing - // an after-context line. In that case, we should treat this as a - // contextual line rather than a matching line for the purposes of - // termination. - if self.match_more_than_limit() { - self.after_context_remaining = - self.after_context_remaining.saturating_sub(1); - } else { - self.after_context_remaining = searcher.after_context() as u64; - } self.record_matches( searcher, @@ -841,9 +786,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { return Ok(false); } } - StandardImpl::from_match(searcher, self, mat).sink()?; - Ok(!self.should_quit()) + Ok(true) } fn context( @@ -854,10 +798,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { self.standard.matches.clear(); self.replacer.clear(); - if ctx.kind() == &SinkContextKind::After { - self.after_context_remaining = - self.after_context_remaining.saturating_sub(1); - } if searcher.invert_match() { self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?; self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?; @@ -869,7 +809,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { } StandardImpl::from_context(searcher, self, ctx).sink()?; - Ok(!self.should_quit()) + Ok(true) } fn context_break( @@ -902,11 +842,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { self.standard.wtr.borrow_mut().reset_count(); self.start_time = Instant::now(); self.match_count = 0; - self.after_context_remaining = 0; self.binary_byte_offset = None; - if self.standard.config.max_matches == Some(0) { - return Ok(false); - } Ok(true) } @@ -1450,7 +1386,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { } fn write_binary_message(&self, offset: u64) -> io::Result<()> { - if self.sink.match_count == 0 { + if !self.sink.has_match() { return Ok(()); } @@ -2742,11 +2678,10 @@ and exhibited clearly, with a label attached. #[test] fn max_matches() { let matcher = RegexMatcher::new("Sherlock").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) + .max_matches(Some(1)) .build() .search_reader( &matcher, @@ -2766,10 +2701,9 @@ For the Doctor Watsons of this world, as opposed to the Sherlock fn max_matches_context() { // after context: 1 let matcher = RegexMatcher::new("Doctor Watsons").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(1)) .line_number(false) .after_context(1) .build() @@ -2788,10 +2722,9 @@ Holmeses, success in the province of detective work must always assert_eq_printed!(expected, got); // after context: 4 - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(1)) .line_number(false) .after_context(4) .build() @@ -2814,10 +2747,9 @@ but Doctor Watson has to have it taken out for him and dusted, // after context: 1, max matches: 2 let matcher = RegexMatcher::new("Doctor Watsons|but Doctor").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(2)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(2)) .line_number(false) .after_context(1) .build() @@ -2839,10 +2771,114 @@ and exhibited clearly, with a label attached. assert_eq_printed!(expected, got); // after context: 4, max matches: 2 - let mut printer = StandardBuilder::new() - .max_matches(Some(2)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(2)) + .line_number(false) + .after_context(4) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_context_invert() { + // after context: 1 + let matcher = + RegexMatcher::new("success|extent|clew|dusted|exhibited").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(1)) + .line_number(false) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +"; + assert_eq_printed!(expected, got); + + // after context: 4 + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(1)) + .line_number(false) + .after_context(4) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +"; + assert_eq_printed!(expected, got); + + // after context: 1, max matches: 2 + let matcher = + RegexMatcher::new("success|extent|clew|exhibited").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(2)) + .line_number(false) + .after_context(1) + .build() + .search_reader( + &matcher, + SHERLOCK.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + assert_eq_printed!(expected, got); + + // after context: 4, max matches: 2 + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .invert_match(true) + .max_matches(Some(2)) .line_number(false) .after_context(4) .build() @@ -2868,12 +2904,11 @@ and exhibited clearly, with a label attached. #[test] fn max_matches_multi_line1() { let matcher = RegexMatcher::new("(?s:.{0})Sherlock").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) + .max_matches(Some(1)) .build() .search_reader( &matcher, @@ -2893,12 +2928,11 @@ For the Doctor Watsons of this world, as opposed to the Sherlock fn max_matches_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) + .max_matches(Some(1)) .build() .search_reader( &matcher, @@ -2915,6 +2949,55 @@ Holmeses, success in the province of detective work must always assert_eq_printed!(expected, got); } + #[test] + fn max_matches_multi_line3() { + let matcher = RegexMatcher::new(r"line 2\nline 3").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .max_matches(Some(1)) + .build() + .search_reader( + &matcher, + "line 2\nline 3 x\nline 2\nline 3\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +line 2 +line 3 x +"; + assert_eq_printed!(expected, got); + } + + #[test] + fn max_matches_multi_line4() { + let matcher = + RegexMatcher::new(r"line 2\nline 3|x\nline 2\n").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(false) + .multi_line(true) + .max_matches(Some(1)) + .build() + .search_reader( + &matcher, + "line 2\nline 3 x\nline 2\nline 3 x\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "\ +line 2 +line 3 x +"; + assert_eq_printed!(expected, got); + } + #[test] fn only_matching() { let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); @@ -3847,10 +3930,9 @@ e "; let matcher = RegexMatcherBuilder::new().build(r"d").unwrap(); - let mut printer = StandardBuilder::new() - .max_matches(Some(1)) - .build(NoColor::new(vec![])); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() + .max_matches(Some(1)) .line_number(true) .after_context(2) .build() diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs index 7d7e5cd..87eaf7f 100644 --- a/crates/searcher/src/searcher/core.rs +++ b/crates/searcher/src/searcher/core.rs @@ -33,6 +33,7 @@ pub(crate) struct Core<'s, M: 's, S> { after_context_left: usize, has_sunk: bool, has_matched: bool, + count: u64, } impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { @@ -59,6 +60,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { after_context_left: 0, has_sunk: false, has_matched: false, + count: 0, }; if !core.searcher.multi_line_with_matcher(&core.matcher) { if core.is_line_by_line_fast() { @@ -78,6 +80,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.pos = pos; } + fn count(&self) -> u64 { + self.count + } + + fn increment_count(&mut self) { + self.count += 1; + } + pub(crate) fn binary_byte_offset(&self) -> Option { self.binary_byte_offset.map(|offset| offset as u64) } @@ -101,6 +111,47 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.sink.binary_data(&self.searcher, binary_byte_offset) } + fn is_match(&self, line: &[u8]) -> Result { + // We need to strip the line terminator here to match the + // semantics of line-by-line searching. Namely, regexes + // like `(?m)^$` can match at the final position beyond a + // line terminator, which is non-sensical in line oriented + // matching. + let line = lines::without_terminator(line, self.config.line_term); + self.matcher.is_match(line).map_err(S::Error::error_message) + } + + pub(crate) fn find( + &mut self, + slice: &[u8], + ) -> Result, S::Error> { + if self.has_exceeded_match_limit() { + return Ok(None); + } + match self.matcher().find(slice) { + Err(err) => Err(S::Error::error_message(err)), + Ok(None) => Ok(None), + Ok(Some(m)) => { + self.increment_count(); + Ok(Some(m)) + } + } + } + + fn shortest_match( + &mut self, + slice: &[u8], + ) -> Result, S::Error> { + if self.has_exceeded_match_limit() { + return Ok(None); + } + match self.matcher.shortest_match(slice) { + Err(err) => return Err(S::Error::error_message(err)), + Ok(None) => return Ok(None), + Ok(Some(m)) => Ok(Some(m)), + } + } + pub(crate) fn begin(&mut self) -> Result { self.sink.begin(&self.searcher) } @@ -226,6 +277,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if self.after_context_left == 0 { return Ok(true); } + let exceeded_match_limit = self.has_exceeded_match_limit(); let range = Range::new(self.last_line_visited, upto); let mut stepper = LineStep::new( self.config.line_term.as_byte(), @@ -233,7 +285,16 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { range.end(), ); while let Some(line) = stepper.next_match(buf) { - if !self.sink_after_context(buf, &line)? { + if exceeded_match_limit + && self.is_match(&buf[line])? != self.config.invert_match + { + let after_context_left = self.after_context_left; + self.set_pos(line.end()); + if !self.sink_matched(buf, &line)? { + return Ok(false); + } + self.after_context_left = after_context_left - 1; + } else if !self.sink_after_context(buf, &line)? { return Ok(false); } if self.after_context_left == 0 { @@ -272,6 +333,12 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { range.end(), ); while let Some(line) = stepper.next_match(buf) { + if self.has_exceeded_match_limit() + && !self.config.passthru + && self.after_context_left == 0 + { + return Ok(false); + } let matched = { // Stripping the line terminator is necessary to prevent some // classes of regexes from matching the empty position *after* @@ -281,15 +348,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { &buf[line], self.config.line_term, ); - match self.matcher.shortest_match(slice) { - Err(err) => return Err(S::Error::error_message(err)), - Ok(result) => result.is_some(), - } + self.shortest_match(slice)?.is_some() }; self.set_pos(line.end()); + let success = matched != self.config.invert_match; if success { self.has_matched = true; + self.increment_count(); if !self.before_context_by_line(buf, line.start())? { return Ok(false); } @@ -325,10 +391,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } if self.config.invert_match { if !self.match_by_line_fast_invert(buf)? { - return Ok(Stop); + break; } } else if let Some(line) = self.find_by_line_fast(buf)? { self.has_matched = true; + self.increment_count(); if self.config.max_context() > 0 { if !self.after_context_by_line(buf, line.start())? { return Ok(Stop); @@ -348,6 +415,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if !self.after_context_by_line(buf, buf.len())? { return Ok(Stop); } + if self.has_exceeded_match_limit() && self.after_context_left == 0 { + return Ok(Stop); + } self.set_pos(buf.len()); Ok(Continue) } @@ -387,16 +457,20 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { invert_match.end(), ); while let Some(line) = stepper.next_match(buf) { + self.increment_count(); if !self.sink_matched(buf, &line)? { return Ok(false); } + if self.has_exceeded_match_limit() { + return Ok(false); + } } Ok(true) } #[inline(always)] fn find_by_line_fast( - &self, + &mut self, buf: &[u8], ) -> Result, S::Error> { debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); @@ -404,6 +478,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { let mut pos = self.pos(); while !buf[pos..].is_empty() { + if self.has_exceeded_match_limit() { + return Ok(None); + } match self.matcher.find_candidate_line(&buf[pos..]) { Err(err) => return Err(S::Error::error_message(err)), Ok(None) => return Ok(None), @@ -427,23 +504,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.config.line_term.as_byte(), Range::zero(i).offset(pos), ); - // We need to strip the line terminator here to match the - // semantics of line-by-line searching. Namely, regexes - // like `(?m)^$` can match at the final position beyond a - // line terminator, which is non-sensical in line oriented - // matching. - let slice = lines::without_terminator( - &buf[line], - self.config.line_term, - ); - match self.matcher.is_match(slice) { - Err(err) => return Err(S::Error::error_message(err)), - Ok(true) => return Ok(Some(line)), - Ok(false) => { - pos = line.end(); - continue; - } + if self.is_match(&buf[line])? { + return Ok(Some(line)); } + pos = line.end(); } } } @@ -638,4 +702,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } false } + + fn has_exceeded_match_limit(&self) -> bool { + self.config.max_matches.map_or(false, |limit| self.count() >= limit) + } } diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs index 5db5701..8c15bb3 100644 --- a/crates/searcher/src/searcher/glue.rs +++ b/crates/searcher/src/searcher/glue.rs @@ -325,11 +325,9 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { } fn find(&mut self) -> Result, S::Error> { - match self.core.matcher().find(&self.slice[self.core.pos()..]) { - Err(err) => Err(S::Error::error_message(err)), - Ok(None) => Ok(None), - Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))), - } + self.core + .find(&self.slice[self.core.pos()..]) + .map(|m| m.map(|m| m.offset(self.core.pos()))) } /// Advance the search position based on the previous match. diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index c7d3b1d..ccd8945 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -180,6 +180,8 @@ pub struct Config { /// Whether to stop searching when a non-matching line is found after a /// matching line. stop_on_nonmatch: bool, + /// The maximum number of matches this searcher should emit. + max_matches: Option, } impl Default for Config { @@ -198,6 +200,7 @@ impl Default for Config { encoding: None, bom_sniffing: true, stop_on_nonmatch: false, + max_matches: None, } } } @@ -564,6 +567,23 @@ impl SearcherBuilder { self.config.stop_on_nonmatch = stop_on_nonmatch; self } + + /// Sets the maximum number of matches that should be emitted by this + /// searcher. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + /// + /// Note that `0` is a legal value. This will cause the searcher to + /// immediately quick without searching anything. + /// + /// By default, no limit is set. + #[inline] + pub fn max_matches(&mut self, limit: Option) -> &mut SearcherBuilder { + self.config.max_matches = limit; + self + } } /// A searcher executes searches over a haystack and writes results to a caller @@ -845,13 +865,27 @@ impl Searcher { self.config.multi_line } - /// Returns true if and only if this searcher is configured to stop when in + /// Returns true if and only if this searcher is configured to stop when it /// finds a non-matching line after a matching one. #[inline] pub fn stop_on_nonmatch(&self) -> bool { self.config.stop_on_nonmatch } + /// Returns the maximum number of matches emitted by this searcher, if + /// such a limit was set. + /// + /// If multi line search is enabled and a match spans multiple lines, then + /// that match is counted exactly once for the purposes of enforcing this + /// limit, regardless of how many lines it spans. + /// + /// Note that `0` is a legal value. This will cause the searcher to + /// immediately quick without searching anything. + #[inline] + pub fn max_matches(&self) -> Option { + self.config.max_matches + } + /// Returns true if and only if this searcher will choose a multi-line /// strategy given the provided matcher. /// diff --git a/tests/regression.rs b/tests/regression.rs index 44675c4..ac806ae 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -1459,6 +1459,7 @@ rgtest!(r2658_null_data_line_regexp, |dir: Dir, mut cmd: TestCommand| { rgtest!(r2944_incorrect_bytes_searched, |dir: Dir, mut cmd: TestCommand| { dir.create("haystack", "foo1\nfoo2\nfoo3\nfoo4\nfoo5\n"); let got = cmd.args(&["--stats", "-m2", "foo", "."]).stdout(); + println!("{got}"); assert!(got.contains("10 bytes searched\n")); });