Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
This commit is contained in:
36
src/args.rs
36
src/args.rs
@@ -15,6 +15,7 @@ use grep::{Grep, GrepBuilder};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use same_file;
|
||||
use termcolor;
|
||||
|
||||
use app;
|
||||
@@ -65,6 +66,7 @@ pub struct Args {
|
||||
quiet_matched: QuietMatched,
|
||||
replace: Option<Vec<u8>>,
|
||||
sort_files: bool,
|
||||
stdout_handle: Option<same_file::Handle>,
|
||||
text: bool,
|
||||
threads: usize,
|
||||
type_list: bool,
|
||||
@@ -182,6 +184,17 @@ impl Args {
|
||||
termcolor::Stdout::new(self.color_choice)
|
||||
}
|
||||
|
||||
/// Returns a handle to stdout for filtering search.
|
||||
///
|
||||
/// A handle is returned if and only if ripgrep's stdout is being
|
||||
/// redirected to a file. The handle returned corresponds to that file.
|
||||
///
|
||||
/// This can be used to ensure that we do not attempt to search a file
|
||||
/// that ripgrep is writing to.
|
||||
pub fn stdout_handle(&self) -> Option<&same_file::Handle> {
|
||||
self.stdout_handle.as_ref()
|
||||
}
|
||||
|
||||
/// Create a new buffer writer for multi-threaded searching with color
|
||||
/// support.
|
||||
pub fn buffer_writer(&self) -> termcolor::BufferWriter {
|
||||
@@ -338,6 +351,7 @@ impl<'a> ArgMatches<'a> {
|
||||
quiet_matched: QuietMatched::new(quiet),
|
||||
replace: self.replace(),
|
||||
sort_files: self.is_present("sort-files"),
|
||||
stdout_handle: self.stdout_handle(),
|
||||
text: self.text(),
|
||||
threads: try!(self.threads()),
|
||||
type_list: self.is_present("type-list"),
|
||||
@@ -518,6 +532,28 @@ impl<'a> ArgMatches<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a handle to stdout for filtering search.
|
||||
///
|
||||
/// A handle is returned if and only if ripgrep's stdout is being
|
||||
/// redirected to a file. The handle returned corresponds to that file.
|
||||
///
|
||||
/// This can be used to ensure that we do not attempt to search a file
|
||||
/// that ripgrep is writing to.
|
||||
fn stdout_handle(&self) -> Option<same_file::Handle> {
|
||||
let h = match same_file::Handle::stdout() {
|
||||
Err(_) => return None,
|
||||
Ok(h) => h,
|
||||
};
|
||||
let md = match h.as_file().metadata() {
|
||||
Err(_) => return None,
|
||||
Ok(md) => md,
|
||||
};
|
||||
if !md.is_file() {
|
||||
return None;
|
||||
}
|
||||
Some(h)
|
||||
}
|
||||
|
||||
/// Returns true if and only if memory map searching should be tried.
|
||||
///
|
||||
/// `paths` should be a slice of all top-level file paths that ripgrep
|
||||
|
||||
53
src/atty.rs
53
src/atty.rs
@@ -11,15 +11,10 @@ use winapi::winnt::HANDLE;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn stdin_is_readable() -> bool {
|
||||
use std::fs::File;
|
||||
use std::os::unix::fs::FileTypeExt;
|
||||
use std::os::unix::io::{FromRawFd, IntoRawFd};
|
||||
use libc;
|
||||
use same_file::Handle;
|
||||
|
||||
let file = unsafe { File::from_raw_fd(libc::STDIN_FILENO) };
|
||||
let md = file.metadata();
|
||||
let _ = file.into_raw_fd();
|
||||
let ft = match md {
|
||||
let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
|
||||
Err(_) => return false,
|
||||
Ok(md) => md.file_type(),
|
||||
};
|
||||
@@ -101,7 +96,7 @@ pub fn on_stdout() -> bool {
|
||||
|
||||
/// Returns true if there is an MSYS tty on the given handle.
|
||||
#[cfg(windows)]
|
||||
fn msys_tty_on_handle(handle: HANDLE) -> bool {
|
||||
unsafe fn msys_tty_on_handle(handle: HANDLE) -> bool {
|
||||
use std::ffi::OsString;
|
||||
use std::mem;
|
||||
use std::os::raw::c_void;
|
||||
@@ -113,27 +108,25 @@ fn msys_tty_on_handle(handle: HANDLE) -> bool {
|
||||
use winapi::minwinbase::FileNameInfo;
|
||||
use winapi::minwindef::MAX_PATH;
|
||||
|
||||
unsafe {
|
||||
let size = mem::size_of::<FILE_NAME_INFO>();
|
||||
let mut name_info_bytes = vec![0u8; size + MAX_PATH];
|
||||
let res = GetFileInformationByHandleEx(
|
||||
handle,
|
||||
FileNameInfo,
|
||||
&mut *name_info_bytes as *mut _ as *mut c_void,
|
||||
name_info_bytes.len() as u32);
|
||||
if res == 0 {
|
||||
return true;
|
||||
}
|
||||
let name_info: FILE_NAME_INFO =
|
||||
*(name_info_bytes[0..size].as_ptr() as *const FILE_NAME_INFO);
|
||||
let name_bytes =
|
||||
&name_info_bytes[size..size + name_info.FileNameLength as usize];
|
||||
let name_u16 = slice::from_raw_parts(
|
||||
name_bytes.as_ptr() as *const u16, name_bytes.len() / 2);
|
||||
let name = OsString::from_wide(name_u16)
|
||||
.as_os_str().to_string_lossy().into_owned();
|
||||
name.contains("msys-") || name.contains("-pty")
|
||||
let size = mem::size_of::<FILE_NAME_INFO>();
|
||||
let mut name_info_bytes = vec![0u8; size + MAX_PATH];
|
||||
let res = GetFileInformationByHandleEx(
|
||||
handle,
|
||||
FileNameInfo,
|
||||
&mut *name_info_bytes as *mut _ as *mut c_void,
|
||||
name_info_bytes.len() as u32);
|
||||
if res == 0 {
|
||||
return true;
|
||||
}
|
||||
let name_info: FILE_NAME_INFO =
|
||||
*(name_info_bytes[0..size].as_ptr() as *const FILE_NAME_INFO);
|
||||
let name_bytes =
|
||||
&name_info_bytes[size..size + name_info.FileNameLength as usize];
|
||||
let name_u16 = slice::from_raw_parts(
|
||||
name_bytes.as_ptr() as *const u16, name_bytes.len() / 2);
|
||||
let name = OsString::from_wide(name_u16)
|
||||
.as_os_str().to_string_lossy().into_owned();
|
||||
name.contains("msys-") || name.contains("-pty")
|
||||
}
|
||||
|
||||
/// Returns true if there is a console on the given file descriptor.
|
||||
@@ -145,8 +138,8 @@ unsafe fn console_on_fd(fd: DWORD) -> bool {
|
||||
|
||||
/// Returns true if there is a console on the given handle.
|
||||
#[cfg(windows)]
|
||||
fn console_on_handle(handle: HANDLE) -> bool {
|
||||
unsafe fn console_on_handle(handle: HANDLE) -> bool {
|
||||
use kernel32::GetConsoleMode;
|
||||
let mut out = 0;
|
||||
unsafe { GetConsoleMode(handle, &mut out) != 0 }
|
||||
GetConsoleMode(handle, &mut out) != 0
|
||||
}
|
||||
|
||||
80
src/main.rs
80
src/main.rs
@@ -15,6 +15,7 @@ extern crate memchr;
|
||||
extern crate memmap;
|
||||
extern crate num_cpus;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate termcolor;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi;
|
||||
@@ -106,7 +107,11 @@ fn run_parallel(args: Arc<Args>) -> Result<u64> {
|
||||
if quiet_matched.has_match() {
|
||||
return Quit;
|
||||
}
|
||||
let dent = match get_or_log_dir_entry(result, args.no_messages()) {
|
||||
let dent = match get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.no_messages(),
|
||||
) {
|
||||
None => return Continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
@@ -148,7 +153,11 @@ fn run_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
let mut paths_searched: u64 = 0;
|
||||
let mut match_count = 0;
|
||||
for result in args.walker() {
|
||||
let dent = match get_or_log_dir_entry(result, args.no_messages()) {
|
||||
let dent = match get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.no_messages(),
|
||||
) {
|
||||
None => continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
@@ -190,11 +199,15 @@ fn run_files_parallel(args: Arc<Args>) -> Result<u64> {
|
||||
}
|
||||
file_count
|
||||
});
|
||||
let no_messages = args.no_messages();
|
||||
args.walker_parallel().run(move || {
|
||||
let args = args.clone();
|
||||
let tx = tx.clone();
|
||||
Box::new(move |result| {
|
||||
if let Some(dent) = get_or_log_dir_entry(result, no_messages) {
|
||||
if let Some(dent) = get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.no_messages(),
|
||||
) {
|
||||
tx.send(dent).unwrap();
|
||||
}
|
||||
ignore::WalkState::Continue
|
||||
@@ -208,7 +221,11 @@ fn run_files_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
let mut printer = args.printer(stdout.lock());
|
||||
let mut file_count = 0;
|
||||
for result in args.walker() {
|
||||
let dent = match get_or_log_dir_entry(result, args.no_messages()) {
|
||||
let dent = match get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.no_messages(),
|
||||
) {
|
||||
None => continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
@@ -231,6 +248,7 @@ fn run_types(args: Arc<Args>) -> Result<u64> {
|
||||
|
||||
fn get_or_log_dir_entry(
|
||||
result: result::Result<ignore::DirEntry, ignore::Error>,
|
||||
stdout_handle: Option<&same_file::Handle>,
|
||||
no_messages: bool,
|
||||
) -> Option<ignore::DirEntry> {
|
||||
match result {
|
||||
@@ -253,16 +271,58 @@ fn get_or_log_dir_entry(
|
||||
// A depth of 0 means the user gave the path explicitly, so we
|
||||
// should always try to search it.
|
||||
if dent.depth() == 0 && !ft.is_dir() {
|
||||
Some(dent)
|
||||
} else if ft.is_file() {
|
||||
Some(dent)
|
||||
} else {
|
||||
None
|
||||
return Some(dent);
|
||||
} else if !ft.is_file() {
|
||||
return None;
|
||||
}
|
||||
// If we are redirecting stdout to a file, then don't search that
|
||||
// file.
|
||||
if is_stdout_file(&dent, stdout_handle, no_messages) {
|
||||
return None;
|
||||
}
|
||||
Some(dent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_stdout_file(
|
||||
dent: &ignore::DirEntry,
|
||||
stdout_handle: Option<&same_file::Handle>,
|
||||
no_messages: bool,
|
||||
) -> bool {
|
||||
let stdout_handle = match stdout_handle {
|
||||
None => return false,
|
||||
Some(stdout_handle) => stdout_handle,
|
||||
};
|
||||
// If we know for sure that these two things aren't equal, then avoid
|
||||
// the costly extra stat call to determine equality.
|
||||
if !maybe_dent_eq_handle(dent, stdout_handle) {
|
||||
return false;
|
||||
}
|
||||
match same_file::Handle::from_path(dent.path()) {
|
||||
Ok(h) => stdout_handle == &h,
|
||||
Err(err) => {
|
||||
if !no_messages {
|
||||
eprintln!("{}: {}", dent.path().display(), err);
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn maybe_dent_eq_handle(
|
||||
dent: &ignore::DirEntry,
|
||||
handle: &same_file::Handle,
|
||||
) -> bool {
|
||||
dent.ino() == Some(handle.ino())
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn eprint_nothing_searched() {
|
||||
eprintln!("No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
|
||||
Reference in New Issue
Block a user