From 7e5a59027638952ae55a1cc3d9d3b2ce7d69be98 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 17 Jul 2018 20:23:31 -0400 Subject: [PATCH] grep: small literal detection fix This commit tweaks the inner literal detection heuristic such that if it comes up with any literal that is all whitespace, then it's likely a bad literal to look for since it's so common. Therefore, we simply reject the inner literal optimization in this case and let the regex engine do its thang. --- grep/src/literals.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/grep/src/literals.rs b/grep/src/literals.rs index 3e1c385..5e3dc8e 100644 --- a/grep/src/literals.rs +++ b/grep/src/literals.rs @@ -67,6 +67,16 @@ impl LiteralSets { lit = req; } + // Special case: if we have any literals that are all whitespace, + // then this is probably a failing of the literal detection since + // whitespace is typically pretty common. In this case, don't bother + // with inner literal scanning at all and just defer to the regex. + let any_all_white = req_lits.iter() + .any(|lit| lit.iter().all(|&b| (b as char).is_whitespace())); + if any_all_white { + return None; + } + // Special case: if we detected an alternation of inner required // literals and its longest literal is bigger than the longest // prefix/suffix, then choose the alternation. In practice, this