Move all gitignore matching to separate crate.
This PR introduces a new sub-crate, `ignore`, which primarily provides a fast recursive directory iterator that respects ignore files like gitignore and other configurable filtering rules based on globs or even file types. This results in a substantial source of complexity moved out of ripgrep's core and into a reusable component that others can now (hopefully) benefit from. While much of the ignore code carried over from ripgrep's core, a substantial portion of it was rewritten with the following goals in mind: 1. Reuse matchers built from gitignore files across directory iteration. 2. Design the matcher data structure to be amenable for parallelizing directory iteration. (Indeed, writing the parallel iterator is the next step.) Fixes #9, #44, #45
This commit is contained in:
803
ignore/src/dir.rs
Normal file
803
ignore/src/dir.rs
Normal file
@@ -0,0 +1,803 @@
|
||||
// This module provides a data structure, `Ignore`, that connects "directory
|
||||
// traversal" with "ignore matchers." Specifically, it knows about gitignore
|
||||
// semantics and precedence, and is organized based on directory hierarchy.
|
||||
// Namely, every matcher logically corresponds to ignore rules from a single
|
||||
// directory, and points to the matcher for its corresponding parent directory.
|
||||
// In this sense, `Ignore` is a *persistent* data structure.
|
||||
//
|
||||
// This design was specifically chosen to make it possible to use this data
|
||||
// structure in a parallel directory iterator.
|
||||
//
|
||||
// My initial intention was to expose this module as part of this crate's
|
||||
// public API, but I think the data structure's public API is too complicated
|
||||
// with non-obvious failure modes. Alas, such things haven't been documented
|
||||
// well.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use overrides::{self, Override};
|
||||
use types::{self, Types};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
/// the `Ignore` matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>);
|
||||
|
||||
/// IgnoreMatchInner describes precisely where the match information came from.
|
||||
/// This is private to allow expansion to more matchers in the future.
|
||||
#[derive(Clone, Debug)]
|
||||
enum IgnoreMatchInner<'a> {
|
||||
Override(overrides::Glob<'a>),
|
||||
Gitignore(&'a gitignore::Glob),
|
||||
Types(types::Glob<'a>),
|
||||
Hidden,
|
||||
}
|
||||
|
||||
impl<'a> IgnoreMatch<'a> {
|
||||
fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Override(x))
|
||||
}
|
||||
|
||||
fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Gitignore(x))
|
||||
}
|
||||
|
||||
fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Types(x))
|
||||
}
|
||||
|
||||
fn hidden() -> IgnoreMatch<'static> {
|
||||
IgnoreMatch(IgnoreMatchInner::Hidden)
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for the ignore matcher, shared between the matcher itself and the
|
||||
/// builder.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct IgnoreOptions {
|
||||
/// Whether to ignore hidden file paths or not.
|
||||
hidden: bool,
|
||||
/// Whether to read .ignore files.
|
||||
ignore: bool,
|
||||
/// Whether to read git's global gitignore file.
|
||||
git_global: bool,
|
||||
/// Whether to read .gitignore files.
|
||||
git_ignore: bool,
|
||||
/// Whether to read .git/info/exclude files.
|
||||
git_exclude: bool,
|
||||
}
|
||||
|
||||
impl IgnoreOptions {
|
||||
/// Returns true if at least one type of ignore rules should be matched.
|
||||
fn should_ignores(&self) -> bool {
|
||||
self.ignore || self.git_global || self.git_ignore || self.git_exclude
|
||||
}
|
||||
}
|
||||
|
||||
/// Ignore is a matcher useful for recursively walking one or more directories.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Ignore(Arc<IgnoreInner>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct IgnoreInner {
|
||||
/// A map of all existing directories that have already been
|
||||
/// compiled into matchers.
|
||||
///
|
||||
/// Note that this is never used during matching, only when adding new
|
||||
/// parent directory matchers. This avoids needing to rebuild glob sets for
|
||||
/// parent directories if many paths are being searched.
|
||||
compiled: Arc<RwLock<HashMap<OsString, Ignore>>>,
|
||||
/// The path to the directory that this matcher was built from.
|
||||
dir: PathBuf,
|
||||
/// An override matcher (default is empty).
|
||||
overrides: Arc<Override>,
|
||||
/// A file type matcher.
|
||||
types: Arc<Types>,
|
||||
/// The parent directory to match next.
|
||||
///
|
||||
/// If this is the root directory or there are otherwise no more
|
||||
/// directories to match, then `parent` is `None`.
|
||||
parent: Option<Ignore>,
|
||||
/// Whether this is an absolute parent matcher, as added by add_parent.
|
||||
is_absolute_parent: bool,
|
||||
/// The absolute base path of this matcher. Populated only if parent
|
||||
/// directories are added.
|
||||
absolute_base: Option<Arc<PathBuf>>,
|
||||
/// Explicit ignore matchers specified by the caller.
|
||||
explicit_ignores: Arc<Vec<Gitignore>>,
|
||||
/// The matcher for .ignore files.
|
||||
ignore_matcher: Gitignore,
|
||||
/// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore.
|
||||
git_global_matcher: Arc<Gitignore>,
|
||||
/// The matcher for .gitignore files.
|
||||
git_ignore_matcher: Gitignore,
|
||||
/// Special matcher for `.git/info/exclude` files.
|
||||
git_exclude_matcher: Gitignore,
|
||||
/// Whether this directory contains a .git sub-directory.
|
||||
has_git: bool,
|
||||
/// Ignore config.
|
||||
opts: IgnoreOptions,
|
||||
}
|
||||
|
||||
impl Ignore {
|
||||
/// Return the directory path of this matcher.
|
||||
#[allow(dead_code)]
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.0.dir
|
||||
}
|
||||
|
||||
/// Return true if this matcher has no parent.
|
||||
pub fn is_root(&self) -> bool {
|
||||
self.0.parent.is_none()
|
||||
}
|
||||
|
||||
/// Return this matcher's parent, if one exists.
|
||||
pub fn parent(&self) -> Option<Ignore> {
|
||||
self.0.parent.clone()
|
||||
}
|
||||
|
||||
/// Create a new `Ignore` matcher with the parent directories of `dir`.
|
||||
///
|
||||
/// Note that this can only be called on an `Ignore` matcher with no
|
||||
/// parents (i.e., `is_root` returns `true`). This will panic otherwise.
|
||||
pub fn add_parents<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
if !self.is_root() {
|
||||
panic!("Ignore::add_parents called on non-root matcher");
|
||||
}
|
||||
let absolute_base = match path.as_ref().canonicalize() {
|
||||
Ok(path) => Arc::new(path),
|
||||
Err(_) => {
|
||||
// There's not much we can do here, so just return our
|
||||
// existing matcher. We drop the error to be consistent
|
||||
// with our general pattern of ignoring I/O errors when
|
||||
// processing ignore files.
|
||||
return (self.clone(), None);
|
||||
}
|
||||
};
|
||||
// List of parents, from child to root.
|
||||
let mut parents = vec![];
|
||||
let mut path = &**absolute_base;
|
||||
while let Some(parent) = path.parent() {
|
||||
parents.push(parent);
|
||||
path = parent;
|
||||
}
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let mut ig = self.clone();
|
||||
for parent in parents.into_iter().rev() {
|
||||
let mut compiled = self.0.compiled.write().unwrap();
|
||||
if let Some(prebuilt) = compiled.get(parent.as_os_str()) {
|
||||
ig = prebuilt.clone();
|
||||
continue;
|
||||
}
|
||||
let (mut igtmp, err) = ig.add_child_path(parent);
|
||||
errs.maybe_push(err);
|
||||
igtmp.is_absolute_parent = true;
|
||||
igtmp.absolute_base = Some(absolute_base.clone());
|
||||
ig = Ignore(Arc::new(igtmp));
|
||||
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
||||
}
|
||||
(ig, errs.into_error_option())
|
||||
}
|
||||
|
||||
/// Create a new `Ignore` matcher for the given child directory.
|
||||
///
|
||||
/// Since building the matcher may require reading from multiple
|
||||
/// files, it's possible that this method partially succeeds. Therefore,
|
||||
/// a matcher is always returned (which may match nothing) and an error is
|
||||
/// returned if it exists.
|
||||
///
|
||||
/// Note that all I/O errors are completely ignored.
|
||||
pub fn add_child<P: AsRef<Path>>(
|
||||
&self,
|
||||
dir: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
let (ig, err) = self.add_child_path(dir.as_ref());
|
||||
(Ignore(Arc::new(ig)), err)
|
||||
}
|
||||
|
||||
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
||||
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
||||
static IG_NAMES: &'static [&'static str] = &[".rgignore", ".ignore"];
|
||||
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let ig_matcher =
|
||||
if !self.0.opts.ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, IG_NAMES);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_matcher =
|
||||
if !self.0.opts.git_ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".gitignore"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_exclude_matcher =
|
||||
if !self.0.opts.git_exclude {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let ig = IgnoreInner {
|
||||
compiled: self.0.compiled.clone(),
|
||||
dir: dir.to_path_buf(),
|
||||
overrides: self.0.overrides.clone(),
|
||||
types: self.0.types.clone(),
|
||||
parent: Some(self.clone()),
|
||||
is_absolute_parent: false,
|
||||
absolute_base: self.0.absolute_base.clone(),
|
||||
explicit_ignores: self.0.explicit_ignores.clone(),
|
||||
ignore_matcher: ig_matcher,
|
||||
git_global_matcher: self.0.git_global_matcher.clone(),
|
||||
git_ignore_matcher: gi_matcher,
|
||||
git_exclude_matcher: gi_exclude_matcher,
|
||||
has_git: dir.join(".git").is_dir(),
|
||||
opts: self.0.opts,
|
||||
};
|
||||
(ig, errs.into_error_option())
|
||||
}
|
||||
|
||||
/// Returns a match indicating whether the given file path should be
|
||||
/// ignored or not.
|
||||
///
|
||||
/// The match contains information about its origin.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
// We need to be careful with our path. If it has a leading ./, then
|
||||
// strip it because it causes nothing but trouble.
|
||||
let mut path = path.as_ref();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Match against the override patterns. If an override matches
|
||||
// regardless of whether it's whitelist/ignore, then we quit and
|
||||
// return that result immediately. Overrides have the highest
|
||||
// precedence.
|
||||
if !self.0.overrides.is_empty() {
|
||||
let mat =
|
||||
self.0.overrides.matched(path, is_dir)
|
||||
.map(IgnoreMatch::overrides);
|
||||
if !mat.is_none() {
|
||||
return mat;
|
||||
}
|
||||
}
|
||||
let mut whitelisted = Match::None;
|
||||
if self.0.opts.should_ignores() {
|
||||
let mat = self.matched_ignore(path, is_dir);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if !self.0.types.is_empty() {
|
||||
let mat =
|
||||
self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
whitelisted
|
||||
}
|
||||
|
||||
/// Performs matching only on the ignore files for this directory and
|
||||
/// all parent directories.
|
||||
fn matched_ignore<'a>(
|
||||
&'a self,
|
||||
path: &Path,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
let (mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) =
|
||||
(Match::None, Match::None, Match::None, Match::None);
|
||||
let mut saw_git = false;
|
||||
for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
if let Some(abs_parent_path) = self.absolute_base() {
|
||||
let path = abs_parent_path.join(path);
|
||||
for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) {
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
}
|
||||
for gi in self.0.explicit_ignores.iter().rev() {
|
||||
if !m_explicit.is_none() {
|
||||
break;
|
||||
}
|
||||
m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
|
||||
}
|
||||
let m_global = self.0.git_global_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
if !m_ignore.is_none() {
|
||||
m_ignore
|
||||
} else if !m_gi.is_none() {
|
||||
m_gi
|
||||
} else if !m_gi_exclude.is_none() {
|
||||
m_gi_exclude
|
||||
} else if !m_global.is_none() {
|
||||
m_global
|
||||
} else if !m_explicit.is_none() {
|
||||
m_explicit
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over parent ignore matchers, including this one.
|
||||
fn parents(&self) -> Parents {
|
||||
Parents(Some(self))
|
||||
}
|
||||
|
||||
/// Returns the first absolute path of the first absolute parent, if
|
||||
/// one exists.
|
||||
fn absolute_base(&self) -> Option<&Path> {
|
||||
self.0.absolute_base.as_ref().map(|p| &***p)
|
||||
}
|
||||
}
|
||||
|
||||
struct Parents<'a>(Option<&'a Ignore>);
|
||||
|
||||
impl<'a> Iterator for Parents<'a> {
|
||||
type Item = &'a Ignore;
|
||||
|
||||
fn next(&mut self) -> Option<&'a Ignore> {
|
||||
match self.0.take() {
|
||||
None => None,
|
||||
Some(ig) => {
|
||||
self.0 = ig.0.parent.as_ref();
|
||||
Some(ig)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for creating an Ignore matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IgnoreBuilder {
|
||||
/// The root directory path for this ignore matcher.
|
||||
dir: PathBuf,
|
||||
/// An override matcher (default is empty).
|
||||
overrides: Arc<Override>,
|
||||
/// A type matcher (default is empty).
|
||||
types: Arc<Types>,
|
||||
/// Explicit ignore matchers.
|
||||
explicit_ignores: Vec<Gitignore>,
|
||||
/// Ignore config.
|
||||
opts: IgnoreOptions,
|
||||
}
|
||||
|
||||
impl IgnoreBuilder {
|
||||
/// Create a new builder for an `Ignore` matcher.
|
||||
///
|
||||
/// All relative file paths are resolved with respect to the current
|
||||
/// working directory.
|
||||
pub fn new() -> IgnoreBuilder {
|
||||
IgnoreBuilder {
|
||||
dir: Path::new("").to_path_buf(),
|
||||
overrides: Arc::new(Override::empty()),
|
||||
types: Arc::new(Types::empty()),
|
||||
explicit_ignores: vec![],
|
||||
opts: IgnoreOptions {
|
||||
hidden: true,
|
||||
ignore: true,
|
||||
git_global: true,
|
||||
git_ignore: true,
|
||||
git_exclude: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new `Ignore` matcher.
|
||||
///
|
||||
/// The matcher returned won't match anything until ignore rules from
|
||||
/// directories are added to it.
|
||||
pub fn build(&self) -> Ignore {
|
||||
let git_global_matcher =
|
||||
if !self.opts.git_global {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (gi, err) = Gitignore::global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
Ignore(Arc::new(IgnoreInner {
|
||||
compiled: Arc::new(RwLock::new(HashMap::new())),
|
||||
dir: self.dir.clone(),
|
||||
overrides: self.overrides.clone(),
|
||||
types: self.types.clone(),
|
||||
parent: None,
|
||||
is_absolute_parent: true,
|
||||
absolute_base: None,
|
||||
explicit_ignores: Arc::new(self.explicit_ignores.clone()),
|
||||
ignore_matcher: Gitignore::empty(),
|
||||
git_global_matcher: Arc::new(git_global_matcher),
|
||||
git_ignore_matcher: Gitignore::empty(),
|
||||
git_exclude_matcher: Gitignore::empty(),
|
||||
has_git: false,
|
||||
opts: self.opts,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Add an override matcher.
|
||||
///
|
||||
/// By default, no override matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder {
|
||||
self.overrides = Arc::new(overrides);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher.
|
||||
///
|
||||
/// By default, no file type matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder {
|
||||
self.types = Arc::new(types);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a new global ignore matcher from the ignore file path given.
|
||||
pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder {
|
||||
self.explicit_ignores.push(ig);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables ignoring hidden files.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.hidden = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.ignore` files.
|
||||
///
|
||||
/// `.ignore` files have the same semantics as `gitignore` files and are
|
||||
/// supported by search tools such as ripgrep and The Silver Searcher.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a global gitignore matcher.
|
||||
///
|
||||
/// Its precedence is lower than both normal `.gitignore` files and
|
||||
/// `.git/info/exclude` files.
|
||||
///
|
||||
/// This overwrites any previous global gitignore setting.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_global = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.gitignore` files.
|
||||
///
|
||||
/// `.gitignore` files have match semantics as described in the `gitignore`
|
||||
/// man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.git/info/exclude` files.
|
||||
///
|
||||
/// `.git/info/exclude` files have match semantics as described in the
|
||||
/// `gitignore` man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_exclude = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher for the directory given.
|
||||
///
|
||||
/// Ignore globs are extracted from each of the file names in `dir` in the
|
||||
/// order given (earlier names have lower precedence than later names).
|
||||
///
|
||||
/// I/O errors are ignored.
|
||||
pub fn create_gitignore(
|
||||
dir: &Path,
|
||||
names: &[&str],
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let mut builder = GitignoreBuilder::new(dir);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
for name in names {
|
||||
let gipath = dir.join(name);
|
||||
errs.maybe_push_ignore_io(builder.add(gipath));
|
||||
}
|
||||
let gi = match builder.build() {
|
||||
Ok(gi) => gi,
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
GitignoreBuilder::new(dir).build().unwrap()
|
||||
}
|
||||
};
|
||||
(gi, errs.into_error_option())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
use Error;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
file.write_all(contents.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
fn mkdirp<P: AsRef<Path>>(path: P) {
|
||||
fs::create_dir_all(path).unwrap();
|
||||
}
|
||||
|
||||
fn partial(err: Error) -> Vec<Error> {
|
||||
match err {
|
||||
Error::Partial(errs) => errs,
|
||||
_ => panic!("expected partial error but got {:?}", err),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
||||
|
||||
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
||||
assert!(err.is_none());
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_ignore(gi).build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_exclude() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".ignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
// Tests that an .ignore will override a .gitignore.
|
||||
#[test]
|
||||
fn ignore_over_gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "!foo");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
}
|
||||
|
||||
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
||||
#[test]
|
||||
fn exclude_lowest() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "!foo");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("baz", false).is_ignore());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_both() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
wfile(td.path().join(".ignore"), "fo**o");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert_eq!(2, partial(err.expect("an error")).len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_partial() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
assert!(ig.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_partial_and_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_present_empty() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stops_at_git_dir() {
|
||||
// This tests that .gitignore files beyond a .git barrier aren't
|
||||
// matched, but .ignore files are.
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo/.git"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "bar");
|
||||
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child(ig1.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
|
||||
assert!(ig1.matched("foo", false).is_ignore());
|
||||
assert!(ig2.matched("foo", false).is_none());
|
||||
|
||||
assert!(ig1.matched("bar", false).is_ignore());
|
||||
assert!(ig2.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn absolute_parent() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo"));
|
||||
wfile(td.path().join(".gitignore"), "bar");
|
||||
|
||||
// First, check that the parent gitignore file isn't detected if the
|
||||
// parent isn't added. This establishes a baseline.
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_child(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
assert!(ig1.matched("bar", false).is_none());
|
||||
|
||||
// Second, check that adding a parent directory actually works.
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_parents(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
assert!(ig2.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn absolute_parent_anchored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("src/llvm"));
|
||||
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
||||
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_parents(td.path().join("src"));
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child("src");
|
||||
assert!(err.is_none());
|
||||
|
||||
assert!(ig1.matched("llvm", true).is_none());
|
||||
assert!(ig2.matched("llvm", true).is_none());
|
||||
assert!(ig2.matched("src/llvm", true).is_none());
|
||||
assert!(ig2.matched("foo", false).is_ignore());
|
||||
assert!(ig2.matched("src/foo", false).is_ignore());
|
||||
}
|
||||
}
|
||||
607
ignore/src/gitignore.rs
Normal file
607
ignore/src/gitignore.rs
Normal file
@@ -0,0 +1,607 @@
|
||||
/*!
|
||||
The gitignore module provides a way to match globs from a gitignore file
|
||||
against file paths.
|
||||
|
||||
Note that this module implements the specification as described in the
|
||||
`gitignore` man page from scratch. That is, this module does *not* shell out to
|
||||
the `git` command line tool.
|
||||
*/
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str;
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::bytes::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// Glob represents a single glob in a gitignore file.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob that
|
||||
/// matched in one or more gitignore files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob {
|
||||
/// The file path that this glob was extracted from.
|
||||
from: Option<PathBuf>,
|
||||
/// The original glob string.
|
||||
original: String,
|
||||
/// The actual glob string used to convert to a regex.
|
||||
actual: String,
|
||||
/// Whether this is a whitelisted glob or not.
|
||||
is_whitelist: bool,
|
||||
/// Whether this glob should only match directories or not.
|
||||
is_only_dir: bool,
|
||||
}
|
||||
|
||||
impl Glob {
|
||||
/// Returns the file path that defined this glob.
|
||||
pub fn from(&self) -> Option<&Path> {
|
||||
self.from.as_ref().map(|p| &**p)
|
||||
}
|
||||
|
||||
/// The original glob as it was defined in a gitignore file.
|
||||
pub fn original(&self) -> &str {
|
||||
&self.original
|
||||
}
|
||||
|
||||
/// The actual glob that was compiled to respect gitignore
|
||||
/// semantics.
|
||||
pub fn actual(&self) -> &str {
|
||||
&self.actual
|
||||
}
|
||||
|
||||
/// Whether this was a whitelisted glob or not.
|
||||
pub fn is_whitelist(&self) -> bool {
|
||||
self.is_whitelist
|
||||
}
|
||||
|
||||
/// Whether this glob must match a directory or not.
|
||||
pub fn is_only_dir(&self) -> bool {
|
||||
self.is_only_dir
|
||||
}
|
||||
}
|
||||
|
||||
/// Gitignore is a matcher for the globs in one or more gitignore files
|
||||
/// in the same directory.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Gitignore {
|
||||
set: GlobSet,
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
num_ignores: u64,
|
||||
num_whitelists: u64,
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
impl Gitignore {
|
||||
/// Creates a new gitignore matcher from the gitignore file path given.
|
||||
///
|
||||
/// If it's desirable to include multiple gitignore files in a single
|
||||
/// matcher, or read gitignore globs from a different source, then
|
||||
/// use `GitignoreBuilder`.
|
||||
///
|
||||
/// This always returns a valid matcher, even if it's empty. In particular,
|
||||
/// a Gitignore file can be partially valid, e.g., when one glob is invalid
|
||||
/// but the rest aren't.
|
||||
///
|
||||
/// Note that I/O errors are ignored. For more granular control over
|
||||
/// errors, use `GitignoreBuilder`.
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
gitignore_path: P,
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let path = gitignore_path.as_ref();
|
||||
let parent = path.parent().unwrap_or(Path::new("/"));
|
||||
let mut builder = GitignoreBuilder::new(parent);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(builder.add(path));
|
||||
match builder.build() {
|
||||
Ok(gi) => (gi, errs.into_error_option()),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
(Gitignore::empty(), errs.into_error_option())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher from the global ignore file, if one
|
||||
/// exists.
|
||||
///
|
||||
/// The global config file path is specified by git's `core.excludesFile`
|
||||
/// config option.
|
||||
///
|
||||
/// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
|
||||
/// does not exist or does not specify `core.excludesFile`, then
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn global() -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
Gitignore::new(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new empty gitignore matcher that never matches anything.
|
||||
///
|
||||
/// Its path is empty.
|
||||
pub fn empty() -> Gitignore {
|
||||
GitignoreBuilder::new("").build().unwrap()
|
||||
}
|
||||
|
||||
/// Returns the directory containing this gitignore matcher.
|
||||
///
|
||||
/// All matches are done relative to this path.
|
||||
pub fn path(&self) -> &Path {
|
||||
&*self.root
|
||||
}
|
||||
|
||||
/// Returns true if and only if this gitignore has zero globs, and
|
||||
/// therefore never matches any file path.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.set.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the total number of globs, which should be equivalent to
|
||||
/// `num_ignores + num_whitelists`.
|
||||
pub fn len(&self) -> usize {
|
||||
self.set.len()
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore globs.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.num_ignores
|
||||
}
|
||||
|
||||
/// Returns the total number of whitelisted globs.
|
||||
pub fn num_whitelists(&self) -> u64 {
|
||||
self.num_whitelists
|
||||
}
|
||||
|
||||
/// Returns whether the given file path matched a pattern in this gitignore
|
||||
/// matcher.
|
||||
///
|
||||
/// `is_dir` should be true if the path refers to a directory and false
|
||||
/// otherwise.
|
||||
///
|
||||
/// The given path is matched relative to the path given when building
|
||||
/// the matcher. Specifically, before matching `path`, its prefix (as
|
||||
/// determined by a common suffix of the directory containing this
|
||||
/// gitignore) is stripped. If there is no common suffix/prefix overlap,
|
||||
/// then `path` is assumed to be relative to this matcher.
|
||||
pub fn matched<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<&Glob> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
self.matched_stripped(self.strip(path.as_ref()), is_dir)
|
||||
}
|
||||
|
||||
/// Like matched, but takes a path that has already been stripped.
|
||||
fn matched_stripped<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<&Glob> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
let path = path.as_ref();
|
||||
let _matches = self.matches.get_default();
|
||||
let mut matches = _matches.borrow_mut();
|
||||
let candidate = Candidate::new(path);
|
||||
self.set.matches_candidate_into(&candidate, &mut *matches);
|
||||
for &i in matches.iter().rev() {
|
||||
let glob = &self.globs[i];
|
||||
if !glob.is_only_dir() || is_dir {
|
||||
return if glob.is_whitelist() {
|
||||
Match::Whitelist(glob)
|
||||
} else {
|
||||
Match::Ignore(glob)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
}
|
||||
|
||||
/// Strips the given path such that it's suitable for matching with this
|
||||
/// gitignore matcher.
|
||||
fn strip<'a, P: 'a + AsRef<Path> + ?Sized>(
|
||||
&'a self,
|
||||
path: &'a P,
|
||||
) -> &'a Path {
|
||||
let mut path = path.as_ref();
|
||||
// A leading ./ is completely superfluous. We also strip it from
|
||||
// our gitignore root path, so we need to strip it from our candidate
|
||||
// path too.
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Strip any common prefix between the candidate path and the root
|
||||
// of the gitignore, to make sure we get relative matching right.
|
||||
// BUT, a file name might not have any directory components to it,
|
||||
// in which case, we don't want to accidentally strip any part of the
|
||||
// file name.
|
||||
if !is_file_name(path) {
|
||||
if let Some(p) = strip_prefix(&self.root, path) {
|
||||
path = p;
|
||||
// If we're left with a leading slash, get rid of it.
|
||||
if let Some(p) = strip_prefix("/", path) {
|
||||
path = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a matcher for a single set of globs from a .gitignore file.
|
||||
pub struct GitignoreBuilder {
|
||||
builder: GlobSetBuilder,
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
}
|
||||
|
||||
impl GitignoreBuilder {
|
||||
/// Create a new builder for a gitignore file.
|
||||
///
|
||||
/// The path given should be the path at which the globs for this gitignore
|
||||
/// file should be matched. Note that paths are always matched relative
|
||||
/// to the root path given here. Generally, the root path should correspond
|
||||
/// to the *directory* containing a `.gitignore` file.
|
||||
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
|
||||
let root = root.as_ref();
|
||||
GitignoreBuilder {
|
||||
builder: GlobSetBuilder::new(),
|
||||
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
|
||||
globs: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new matcher from the globs added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new globs can be added to it.
|
||||
pub fn build(&self) -> Result<Gitignore, Error> {
|
||||
let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count();
|
||||
let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count();
|
||||
let set = try!(
|
||||
self.builder.build().map_err(|err| Error::Glob(err.to_string())));
|
||||
Ok(Gitignore {
|
||||
set: set,
|
||||
root: self.root.clone(),
|
||||
globs: self.globs.clone(),
|
||||
num_ignores: nignore as u64,
|
||||
num_whitelists: nwhite as u64,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Add each glob from the file path given.
|
||||
///
|
||||
/// The file given should be formatted as a `gitignore` file.
|
||||
///
|
||||
/// Note that partial errors can be returned. For example, if there was
|
||||
/// a problem adding one glob, an error for that will be returned, but
|
||||
/// all other valid globs will still be added.
|
||||
pub fn add<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
|
||||
let path = path.as_ref();
|
||||
let file = match File::open(path) {
|
||||
Err(err) => return Some(Error::Io(err).with_path(path)),
|
||||
Ok(file) => file,
|
||||
};
|
||||
let rdr = io::BufReader::new(file);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
for (i, line) in rdr.lines().enumerate() {
|
||||
let lineno = (i + 1) as u64;
|
||||
let line = match line {
|
||||
Ok(line) => line,
|
||||
Err(err) => {
|
||||
errs.push(Error::Io(err).tagged(path, lineno));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) {
|
||||
errs.push(err.tagged(path, lineno));
|
||||
}
|
||||
}
|
||||
errs.into_error_option()
|
||||
}
|
||||
|
||||
/// Add each glob line from the string given.
|
||||
///
|
||||
/// If this string came from a particular `gitignore` file, then its path
|
||||
/// should be provided here.
|
||||
///
|
||||
/// The string given should be formatted as a `gitignore` file.
|
||||
#[cfg(test)]
|
||||
fn add_str(
|
||||
&mut self,
|
||||
from: Option<PathBuf>,
|
||||
gitignore: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
for line in gitignore.lines() {
|
||||
try!(self.add_line(from.clone(), line));
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Add a line from a gitignore file to this builder.
|
||||
///
|
||||
/// If this line came from a particular `gitignore` file, then its path
|
||||
/// should be provided here.
|
||||
///
|
||||
/// If the line could not be parsed as a glob, then an error is returned.
|
||||
pub fn add_line(
|
||||
&mut self,
|
||||
from: Option<PathBuf>,
|
||||
mut line: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
if line.starts_with("#") {
|
||||
return Ok(self);
|
||||
}
|
||||
if !line.ends_with("\\ ") {
|
||||
line = line.trim_right();
|
||||
}
|
||||
if line.is_empty() {
|
||||
return Ok(self);
|
||||
}
|
||||
let mut glob = Glob {
|
||||
from: from,
|
||||
original: line.to_string(),
|
||||
actual: String::new(),
|
||||
is_whitelist: false,
|
||||
is_only_dir: false,
|
||||
};
|
||||
let mut literal_separator = false;
|
||||
let has_slash = line.chars().any(|c| c == '/');
|
||||
let is_absolute = line.chars().nth(0).unwrap() == '/';
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
} else {
|
||||
if line.starts_with("!") {
|
||||
glob.is_whitelist = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
if line.starts_with("/") {
|
||||
// `man gitignore` says that if a glob starts with a slash,
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
literal_separator = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
glob.actual = line.to_string();
|
||||
if has_slash {
|
||||
literal_separator = true;
|
||||
}
|
||||
// If there was a leading slash, then this is a glob that must
|
||||
// match the entire path name. Otherwise, we should let it match
|
||||
// anywhere, so use a **/ prefix.
|
||||
if !is_absolute {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !glob.actual.starts_with("**/") {
|
||||
glob.actual = format!("**/{}", glob.actual);
|
||||
}
|
||||
}
|
||||
// If the glob ends with `/**`, then we should only match everything
|
||||
// inside a directory, but not the directory itself. Standard globs
|
||||
// will match the directory. So we add `/*` to force the issue.
|
||||
if glob.actual.ends_with("/**") {
|
||||
glob.actual = format!("{}/*", glob.actual);
|
||||
}
|
||||
let parsed = try!(
|
||||
GlobBuilder::new(&glob.actual)
|
||||
.literal_separator(literal_separator)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob(err.to_string())));
|
||||
self.builder.add(parsed);
|
||||
self.globs.push(glob);
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the file path of the current environment's global gitignore file.
|
||||
///
|
||||
/// Note that the file path returned may not exist.
|
||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||
gitconfig_contents()
|
||||
.and_then(|data| parse_excludes_file(&data))
|
||||
.or_else(excludes_file_default)
|
||||
}
|
||||
|
||||
/// Returns the file contents of git's global config file, if one exists.
|
||||
fn gitconfig_contents() -> Option<Vec<u8>> {
|
||||
let home = match env::var_os("HOME") {
|
||||
None => return None,
|
||||
Some(home) => PathBuf::from(home),
|
||||
};
|
||||
let mut file = match File::open(home.join(".gitconfig")) {
|
||||
Err(_) => return None,
|
||||
Ok(file) => io::BufReader::new(file),
|
||||
};
|
||||
let mut contents = vec![];
|
||||
file.read_to_end(&mut contents).ok().map(|_| contents)
|
||||
}
|
||||
|
||||
/// Returns the default file path for a global .gitignore file.
|
||||
///
|
||||
/// Specifically, this respects XDG_CONFIG_HOME.
|
||||
fn excludes_file_default() -> Option<PathBuf> {
|
||||
env::var_os("XDG_CONFIG_HOME")
|
||||
.and_then(|x| if x.is_empty() { None } else { Some(x) })
|
||||
.or_else(|| env::var_os("HOME"))
|
||||
.map(|x| PathBuf::from(x).join("git/ignore"))
|
||||
}
|
||||
|
||||
/// Extract git's `core.excludesfile` config setting from the raw file contents
|
||||
/// given.
|
||||
fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
||||
// N.B. This is the lazy approach, and isn't technically correct, but
|
||||
// probably works in more circumstances. I guess we would ideally have
|
||||
// a full INI parser. Yuck.
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(
|
||||
r"(?ium)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
||||
};
|
||||
let caps = match RE.captures(data) {
|
||||
None => return None,
|
||||
Some(caps) => caps,
|
||||
};
|
||||
str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s)))
|
||||
}
|
||||
|
||||
/// Expands ~ in file paths to the value of $HOME.
|
||||
fn expand_tilde(path: &str) -> String {
|
||||
let home = match env::var("HOME") {
|
||||
Err(_) => return path.to_string(),
|
||||
Ok(home) => home,
|
||||
};
|
||||
path.replace("~", &home)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
use super::{Gitignore, GitignoreBuilder};
|
||||
|
||||
fn gi_from_str<P: AsRef<Path>>(root: P, s: &str) -> Gitignore {
|
||||
let mut builder = GitignoreBuilder::new(root);
|
||||
builder.add_str(None, s).unwrap();
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
macro_rules! ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = gi_from_str($root, $gi);
|
||||
assert!(gi.matched($path, $is_dir).is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
not_ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = gi_from_str($root, $gi);
|
||||
assert!(!gi.matched($path, $is_dir).is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored!(ig1, ROOT, "months", "months");
|
||||
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
|
||||
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
|
||||
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
|
||||
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
|
||||
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
|
||||
ignored!(ig8, ROOT, "foo/", "foo", true);
|
||||
ignored!(ig9, ROOT, "**/foo", "foo");
|
||||
ignored!(ig10, ROOT, "**/foo", "src/foo");
|
||||
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
|
||||
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
|
||||
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
|
||||
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
|
||||
ignored!(ig15, ROOT, "abc/**", "abc/x");
|
||||
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
|
||||
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
|
||||
ignored!(ig18, ROOT, "a/**/b", "a/b");
|
||||
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
|
||||
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
|
||||
ignored!(ig21, ROOT, r"\!xy", "!xy");
|
||||
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||
ignored!(ig23, ROOT, "foo", "./foo");
|
||||
ignored!(ig24, ROOT, "target", "grep/target");
|
||||
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
|
||||
ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
|
||||
ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
|
||||
ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
|
||||
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
|
||||
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
|
||||
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
|
||||
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
|
||||
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
|
||||
not_ignored!(ignot11, ROOT, "#foo", "#foo");
|
||||
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
|
||||
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
|
||||
not_ignored!(
|
||||
ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
|
||||
"./third_party/protobuf/csharp/src/packages/repositories.config");
|
||||
|
||||
fn bytes(s: &str) -> Vec<u8> {
|
||||
s.to_string().into_bytes()
|
||||
}
|
||||
|
||||
fn path_string<P: AsRef<Path>>(path: P) -> String {
|
||||
path.as_ref().to_str().unwrap().to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file1() {
|
||||
let data = bytes("[core]\nexcludesFile = /foo/bar");
|
||||
let got = super::parse_excludes_file(&data).unwrap();
|
||||
assert_eq!(path_string(got), "/foo/bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file2() {
|
||||
let data = bytes("[core]\nexcludesFile = ~/foo/bar");
|
||||
let got = super::parse_excludes_file(&data).unwrap();
|
||||
assert_eq!(path_string(got), super::expand_tilde("~/foo/bar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file3() {
|
||||
let data = bytes("[core]\nexcludeFile = /foo/bar");
|
||||
assert!(super::parse_excludes_file(&data).is_none());
|
||||
}
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/106
|
||||
#[test]
|
||||
fn regression_106() {
|
||||
gi_from_str("/", " ");
|
||||
}
|
||||
}
|
||||
300
ignore/src/lib.rs
Normal file
300
ignore/src/lib.rs
Normal file
@@ -0,0 +1,300 @@
|
||||
/*!
|
||||
The ignore crate provides a fast recursive directory iterator that respects
|
||||
various filters such as globs, file types and `.gitignore` files. The precise
|
||||
matching rules and precedence is explained in the documentation for
|
||||
`WalkBuilder`.
|
||||
|
||||
Secondarily, this crate exposes gitignore and file type matchers for use cases
|
||||
that demand more fine-grained control.
|
||||
|
||||
# Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
recursively traverse the current directory while automatically filtering out
|
||||
files and directories according to ignore globs found in files like
|
||||
`.ignore` and `.gitignore`:
|
||||
|
||||
|
||||
```rust,no_run
|
||||
use ignore::Walk;
|
||||
|
||||
for result in Walk::new("./") {
|
||||
// Each item yielded by the iterator is either a directory entry or an
|
||||
// error, so either print the path or the error.
|
||||
match result {
|
||||
Ok(entry) => println!("{}", entry.path().display()),
|
||||
Err(err) => println!("ERROR: {}", err),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# Example: advanced
|
||||
|
||||
By default, the recursive directory iterator will ignore hidden files and
|
||||
directories. This can be disabled by building the iterator with `WalkBuilder`:
|
||||
|
||||
```rust,no_run
|
||||
use ignore::WalkBuilder;
|
||||
|
||||
for result in WalkBuilder::new("./").hidden(false).build() {
|
||||
println!("{:?}", result);
|
||||
}
|
||||
```
|
||||
|
||||
See the documentation for `WalkBuilder` for many other options.
|
||||
*/
|
||||
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
#[cfg(test)]
|
||||
extern crate tempdir;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub use walk::{DirEntry, Walk, WalkBuilder};
|
||||
|
||||
mod dir;
|
||||
pub mod gitignore;
|
||||
mod pathutil;
|
||||
pub mod overrides;
|
||||
pub mod types;
|
||||
mod walk;
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// A collection of "soft" errors. These occur when adding an ignore
|
||||
/// file partially succeeded.
|
||||
Partial(Vec<Error>),
|
||||
/// An error associated with a specific line number.
|
||||
WithLineNumber { line: u64, err: Box<Error> },
|
||||
/// An error associated with a particular file path.
|
||||
WithPath { path: PathBuf, err: Box<Error> },
|
||||
/// An error that occurs when doing I/O, such as reading an ignore file.
|
||||
Io(io::Error),
|
||||
/// An error that occurs when trying to parse a glob.
|
||||
Glob(String),
|
||||
/// A type selection for a file type that is not defined.
|
||||
UnrecognizedFileType(String),
|
||||
/// A user specified file type definition could not be parsed.
|
||||
InvalidDefinition,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Returns true if this is a partial error.
|
||||
///
|
||||
/// A partial error occurs when only some operations failed while others
|
||||
/// may have succeeded. For example, an ignore file may contain an invalid
|
||||
/// glob among otherwise valid globs.
|
||||
pub fn is_partial(&self) -> bool {
|
||||
match *self {
|
||||
Error::Partial(_) => true,
|
||||
Error::WithLineNumber { ref err, .. } => err.is_partial(),
|
||||
Error::WithPath { ref err, .. } => err.is_partial(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this error is exclusively an I/O error.
|
||||
pub fn is_io(&self) -> bool {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
|
||||
Error::WithLineNumber { ref err, .. } => err.is_io(),
|
||||
Error::WithPath { ref err, .. } => err.is_io(),
|
||||
Error::Io(_) => true,
|
||||
Error::Glob(_) => false,
|
||||
Error::UnrecognizedFileType(_) => false,
|
||||
Error::InvalidDefinition => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given file path.
|
||||
fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
|
||||
Error::WithPath {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
err: Box::new(self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given file path and line
|
||||
/// number. If path is empty, then it is omitted from the error.
|
||||
fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
|
||||
let errline = Error::WithLineNumber {
|
||||
line: lineno,
|
||||
err: Box::new(self),
|
||||
};
|
||||
if path.as_ref().as_os_str().is_empty() {
|
||||
return errline;
|
||||
}
|
||||
errline.with_path(path)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Partial(_) => "partial error",
|
||||
Error::WithLineNumber { ref err, .. } => err.description(),
|
||||
Error::WithPath { ref err, .. } => err.description(),
|
||||
Error::Io(ref err) => err.description(),
|
||||
Error::Glob(ref msg) => msg,
|
||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||
Error::InvalidDefinition => "invalid definition",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => {
|
||||
let msgs: Vec<String> =
|
||||
errs.iter().map(|err| err.to_string()).collect();
|
||||
write!(f, "{}", msgs.join("\n"))
|
||||
}
|
||||
Error::WithLineNumber { line, ref err } => {
|
||||
write!(f, "line {}: {}", line, err)
|
||||
}
|
||||
Error::WithPath { ref path, ref err } => {
|
||||
write!(f, "{}: {}", path.display(), err)
|
||||
}
|
||||
Error::Io(ref err) => err.fmt(f),
|
||||
Error::Glob(ref msg) => write!(f, "{}", msg),
|
||||
Error::UnrecognizedFileType(ref ty) => {
|
||||
write!(f, "unrecognized file type: {}", ty)
|
||||
}
|
||||
Error::InvalidDefinition => {
|
||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||
html:*.html)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(err: io::Error) -> Error {
|
||||
Error::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct PartialErrorBuilder(Vec<Error>);
|
||||
|
||||
impl PartialErrorBuilder {
|
||||
fn push(&mut self, err: Error) {
|
||||
self.0.push(err);
|
||||
}
|
||||
|
||||
fn push_ignore_io(&mut self, err: Error) {
|
||||
if !err.is_io() {
|
||||
self.push(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_push(&mut self, err: Option<Error>) {
|
||||
if let Some(err) = err {
|
||||
self.push(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
|
||||
if let Some(err) = err {
|
||||
self.push_ignore_io(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn into_error_option(mut self) -> Option<Error> {
|
||||
if self.0.is_empty() {
|
||||
None
|
||||
} else if self.0.len() == 1 {
|
||||
Some(self.0.pop().unwrap())
|
||||
} else {
|
||||
Some(Error::Partial(self.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of a glob match.
|
||||
///
|
||||
/// The type parameter `T` typically refers to a type that provides more
|
||||
/// information about a particular match. For example, it might identify
|
||||
/// the specific gitignore file and the specific glob pattern that caused
|
||||
/// the match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Match<T> {
|
||||
/// The path didn't match any glob.
|
||||
None,
|
||||
/// The highest precedent glob matched indicates the path should be
|
||||
/// ignored.
|
||||
Ignore(T),
|
||||
/// The highest precedent glob matched indicates the path should be
|
||||
/// whitelisted.
|
||||
Whitelist(T),
|
||||
}
|
||||
|
||||
impl<T> Match<T> {
|
||||
/// Returns true if the match result didn't match any globs.
|
||||
pub fn is_none(&self) -> bool {
|
||||
match *self {
|
||||
Match::None => true,
|
||||
Match::Ignore(_) | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result implies the path should be ignored.
|
||||
pub fn is_ignore(&self) -> bool {
|
||||
match *self {
|
||||
Match::Ignore(_) => true,
|
||||
Match::None | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result implies the path should be
|
||||
/// whitelisted.
|
||||
pub fn is_whitelist(&self) -> bool {
|
||||
match *self {
|
||||
Match::Whitelist(_) => true,
|
||||
Match::None | Match::Ignore(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts the match so that `Ignore` becomes `Whitelist` and
|
||||
/// `Whitelist` becomes `Ignore`. A non-match remains the same.
|
||||
pub fn invert(self) -> Match<T> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignore(t) => Match::Whitelist(t),
|
||||
Match::Whitelist(t) => Match::Ignore(t),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the value inside this match if it exists.
|
||||
pub fn inner(&self) -> Option<&T> {
|
||||
match *self {
|
||||
Match::None => None,
|
||||
Match::Ignore(ref t) => Some(t),
|
||||
Match::Whitelist(ref t) => Some(t),
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the given function to the value inside this match.
|
||||
///
|
||||
/// If the match has no value, then return the match unchanged.
|
||||
pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignore(t) => Match::Ignore(f(t)),
|
||||
Match::Whitelist(t) => Match::Whitelist(f(t)),
|
||||
}
|
||||
}
|
||||
}
|
||||
202
ignore/src/overrides.rs
Normal file
202
ignore/src/overrides.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
/*!
|
||||
The overrides module provides a way to specify a set of override globs.
|
||||
This provides functionality similar to `--include` or `--exclude` in command
|
||||
line tools.
|
||||
*/
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use {Error, Match};
|
||||
|
||||
/// Glob represents a single glob in an override matcher.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob
|
||||
/// that matched.
|
||||
///
|
||||
/// Note that not all matches necessarily correspond to a specific glob. For
|
||||
/// example, if there are one or more whitelist globs and a file path doesn't
|
||||
/// match any glob in the set, then the file path is considered to be ignored.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the matcher that produced
|
||||
/// this glob.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob<'a>(GlobInner<'a>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobInner<'a> {
|
||||
/// No glob matched, but the file path should still be ignored.
|
||||
UnmatchedIgnore,
|
||||
/// A glob matched.
|
||||
Matched(&'a gitignore::Glob),
|
||||
}
|
||||
|
||||
impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a set of overrides provided explicitly by the end user.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Override(Gitignore);
|
||||
|
||||
impl Override {
|
||||
/// Returns an empty matcher that never matches any file path.
|
||||
pub fn empty() -> Override {
|
||||
Override(Gitignore::empty())
|
||||
}
|
||||
|
||||
/// Returns the directory of this override set.
|
||||
///
|
||||
/// All matches are done relative to this path.
|
||||
pub fn path(&self) -> &Path {
|
||||
self.0.path()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this matcher is empty.
|
||||
///
|
||||
/// When a matcher is empty, it will never match any file path.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore globs.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.0.num_whitelists()
|
||||
}
|
||||
|
||||
/// Returns the total number of whitelisted globs.
|
||||
pub fn num_whitelists(&self) -> u64 {
|
||||
self.0.num_ignores()
|
||||
}
|
||||
|
||||
/// Returns whether the given file path matched a pattern in this override
|
||||
/// matcher.
|
||||
///
|
||||
/// `is_dir` should be true if the path refers to a directory and false
|
||||
/// otherwise.
|
||||
///
|
||||
/// If there are no overrides, then this always returns `Match::None`.
|
||||
///
|
||||
/// If there is at least one whitelist override, then this never returns
|
||||
/// `Match::None`, since non-matches are interpreted as ignored.
|
||||
///
|
||||
/// The given path is matched to the globs relative to the path given
|
||||
/// when building the override matcher. Specifically, before matching
|
||||
/// `path`, its prefix (as determined by a common suffix of the directory
|
||||
/// given) is stripped. If there is no common suffix/prefix overlap, then
|
||||
/// `path` is assumed to reside in the same directory as the root path for
|
||||
/// this set of overrides.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<Glob<'a>> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
let mat = self.0.matched(path, is_dir).invert();
|
||||
if mat.is_none() && self.num_whitelists() > 0 {
|
||||
return Match::Ignore(Glob::unmatched());
|
||||
}
|
||||
mat.map(move |giglob| Glob(GlobInner::Matched(giglob)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a matcher for a set of glob overrides.
|
||||
pub struct OverrideBuilder {
|
||||
builder: GitignoreBuilder,
|
||||
}
|
||||
|
||||
impl OverrideBuilder {
|
||||
/// Create a new override builder.
|
||||
///
|
||||
/// Matching is done relative to the directory path provided.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
|
||||
OverrideBuilder {
|
||||
builder: GitignoreBuilder::new(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new override matcher from the globs added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new globs can be added to it.
|
||||
pub fn build(&self) -> Result<Override, Error> {
|
||||
Ok(Override(try!(self.builder.build())))
|
||||
}
|
||||
|
||||
/// Add a glob to the set of overrides.
|
||||
///
|
||||
/// Globs provided here have precisely the same semantics as a single
|
||||
/// line in a `gitignore` file, where the meaning of `!` is inverted:
|
||||
/// namely, `!` at the beginning of a glob will ignore a file. Without `!`,
|
||||
/// all matches of the glob provided are treated as whitelist matches.
|
||||
pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> {
|
||||
try!(self.builder.add_line(None, glob));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Override, OverrideBuilder};
|
||||
|
||||
const ROOT: &'static str = "/home/andrew/foo";
|
||||
|
||||
fn ov(globs: &[&str]) -> Override {
|
||||
let mut builder = OverrideBuilder::new(ROOT);
|
||||
for glob in globs {
|
||||
builder.add(glob).unwrap();
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let ov = ov(&[]);
|
||||
assert!(ov.matched("a.foo", false).is_none());
|
||||
assert!(ov.matched("a", false).is_none());
|
||||
assert!(ov.matched("", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let ov = ov(&["*.foo", "!*.bar"]);
|
||||
assert!(ov.matched("a.foo", false).is_whitelist());
|
||||
assert!(ov.matched("a.foo", true).is_whitelist());
|
||||
assert!(ov.matched("a.rs", false).is_ignore());
|
||||
assert!(ov.matched("a.rs", true).is_ignore());
|
||||
assert!(ov.matched("a.bar", false).is_ignore());
|
||||
assert!(ov.matched("a.bar", true).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_ignores() {
|
||||
let ov = ov(&["!*.bar"]);
|
||||
assert!(ov.matched("a.rs", false).is_none());
|
||||
assert!(ov.matched("a.rs", true).is_none());
|
||||
assert!(ov.matched("a.bar", false).is_ignore());
|
||||
assert!(ov.matched("a.bar", true).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn precedence() {
|
||||
let ov = ov(&["*.foo", "!*.bar.foo"]);
|
||||
assert!(ov.matched("a.foo", false).is_whitelist());
|
||||
assert!(ov.matched("a.baz", false).is_ignore());
|
||||
assert!(ov.matched("a.bar.foo", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let ov = ov(&["/foo", "bar/*.rs", "baz/**"]);
|
||||
assert!(ov.matched("bar/wat/lib.rs", false).is_ignore());
|
||||
assert!(ov.matched("wat/bar/lib.rs", false).is_whitelist());
|
||||
assert!(ov.matched("foo", false).is_whitelist());
|
||||
assert!(ov.matched("wat/foo", false).is_ignore());
|
||||
assert!(ov.matched("baz", false).is_ignore());
|
||||
assert!(ov.matched("baz/a", false).is_whitelist());
|
||||
assert!(ov.matched("baz/a/b", false).is_whitelist());
|
||||
}
|
||||
}
|
||||
108
ignore/src/pathutil.rs
Normal file
108
ignore/src/pathutil.rs
Normal file
@@ -0,0 +1,108 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(unix)]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
let path = path.as_os_str().as_bytes();
|
||||
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
|
||||
None
|
||||
} else {
|
||||
Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(not(unix))]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(unix)]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
memchr(b'/', path).is_none()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
||||
568
ignore/src/types.rs
Normal file
568
ignore/src/types.rs
Normal file
@@ -0,0 +1,568 @@
|
||||
/*!
|
||||
The types module provides a way of associating globs on file names to file
|
||||
types.
|
||||
|
||||
This can be used to match specific types of files. For example, among
|
||||
the default file types provided, the Rust file type is defined to be `*.rs`
|
||||
with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
|
||||
name `c`.
|
||||
|
||||
Note that the set of default types may change over time.
|
||||
|
||||
# Example
|
||||
|
||||
This shows how to create and use a simple file type matcher using the default
|
||||
file types defined in this crate.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.select("rust");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("foo.rs", false).is_whitelist());
|
||||
assert!(matcher.matched("foo.c", false).is_ignore());
|
||||
```
|
||||
|
||||
# Example: negation
|
||||
|
||||
This is like the previous example, but shows how negating a file type works.
|
||||
That is, this will let us match file paths that *don't* correspond to a
|
||||
particular file type.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.negate("c");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("foo.rs", false).is_none());
|
||||
assert!(matcher.matched("foo.c", false).is_ignore());
|
||||
```
|
||||
|
||||
# Example: custom file type definitions
|
||||
|
||||
This shows how to extend this library default file type definitions with
|
||||
your own.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.add("foo", "*.foo");
|
||||
// Another way of adding a file type definition.
|
||||
// This is useful when accepting input from an end user.
|
||||
builder.add_def("bar:*.bar");
|
||||
// Note: we only select `foo`, not `bar`.
|
||||
builder.select("foo");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("x.foo", false).is_whitelist());
|
||||
// This is ignored because we only selected the `foo` file type.
|
||||
assert!(matcher.matched("x.bar", false).is_ignore());
|
||||
```
|
||||
*/
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::file_name;
|
||||
use {Error, Match};
|
||||
|
||||
const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("awk", &["*.awk"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
("cmake", &["*.cmake", "CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("config", &["*.config"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp",
|
||||
]),
|
||||
("csharp", &["*.cs"]),
|
||||
("css", &["*.css"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("elisp", &["*.el"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("go", &["*.go"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html"]),
|
||||
("java", &["*.java"]),
|
||||
("jinja", &["*.jinja", "*.jinja2"]),
|
||||
("js", &[
|
||||
"*.js", "*.jsx", "*.vue",
|
||||
]),
|
||||
("json", &["*.json"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lua", &["*.lua"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
|
||||
("markdown", &["*.md"]),
|
||||
("md", &["*.md"]),
|
||||
("matlab", &["*.m"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("nim", &["*.nim"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("py", &["*.py"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("scala", &["*.scala"]),
|
||||
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
|
||||
("spark", &["*.spark"]),
|
||||
("sql", &["*.sql"]),
|
||||
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||
("swift", &["*.swift"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.cls", "*.sty"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vb", &["*.vb"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("xml", &["*.xml"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
];
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
///
|
||||
/// There may be more than one glob for a particular file type.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob
|
||||
/// that matched.
|
||||
///
|
||||
/// Note that not all matches necessarily correspond to a specific glob.
|
||||
/// For example, if there are one or more selections and a file path doesn't
|
||||
/// match any of those selections, then the file path is considered to be
|
||||
/// ignored.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the underlying file type
|
||||
/// definition, which corresponds to the lifetime of the file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob<'a>(GlobInner<'a>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobInner<'a> {
|
||||
/// No glob matched, but the file path should still be ignored.
|
||||
UnmatchedIgnore,
|
||||
/// A glob matched.
|
||||
Matched {
|
||||
/// The file type definition which provided the glob.
|
||||
def: &'a FileTypeDef,
|
||||
/// The index of the glob that matched inside the file type definition.
|
||||
which: usize,
|
||||
/// Whether the selection was negated or not.
|
||||
negated: bool,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
///
|
||||
/// File type definitions can be retrieved in aggregate from a file type
|
||||
/// matcher. File type definitions are also reported when its responsible
|
||||
/// for a match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileTypeDef {
|
||||
name: String,
|
||||
globs: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileTypeDef {
|
||||
/// Return the name of this file type.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Return the globs used to recognize this file type.
|
||||
pub fn globs(&self) -> &[String] {
|
||||
&self.globs
|
||||
}
|
||||
}
|
||||
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
/// All of the file type definitions, sorted lexicographically by name.
|
||||
defs: Vec<FileTypeDef>,
|
||||
/// All of the selections made by the user.
|
||||
selections: Vec<Selection<FileTypeDef>>,
|
||||
/// Whether there is at least one Selection::Select in our selections.
|
||||
/// When this is true, a Match::None is converted to Match::Ignore.
|
||||
has_selected: bool,
|
||||
/// A mapping from glob index in the set to two indices. The first is an
|
||||
/// index into `selections` and the second is an index into the
|
||||
/// corresponding file type definition's list of globs.
|
||||
glob_to_selection: Vec<(usize, usize)>,
|
||||
/// The set of all glob selections, used for actual matching.
|
||||
set: GlobSet,
|
||||
/// Temporary storage for globs that match.
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
/// Indicates the type of a selection for a particular file type.
|
||||
#[derive(Clone, Debug)]
|
||||
enum Selection<T> {
|
||||
Select(String, T),
|
||||
Negate(String, T),
|
||||
}
|
||||
|
||||
impl<T> Selection<T> {
|
||||
fn is_negated(&self) -> bool {
|
||||
match *self {
|
||||
Selection::Select(..) => false,
|
||||
Selection::Negate(..) => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
match *self {
|
||||
Selection::Select(ref name, _) => name,
|
||||
Selection::Negate(ref name, _) => name,
|
||||
}
|
||||
}
|
||||
|
||||
fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
|
||||
match self {
|
||||
Selection::Select(name, inner) => {
|
||||
Selection::Select(name, f(inner))
|
||||
}
|
||||
Selection::Negate(name, inner) => {
|
||||
Selection::Negate(name, f(inner))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> &T {
|
||||
match *self {
|
||||
Selection::Select(_, ref inner) => inner,
|
||||
Selection::Negate(_, ref inner) => inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Types {
|
||||
/// Creates a new file type matcher that never matches any path and
|
||||
/// contains no file type definitions.
|
||||
pub fn empty() -> Types {
|
||||
Types {
|
||||
defs: vec![],
|
||||
selections: vec![],
|
||||
has_selected: false,
|
||||
glob_to_selection: vec![],
|
||||
set: GlobSetBuilder::new().build().unwrap(),
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this matcher has zero selections.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.selections.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the number of selections used in this matcher.
|
||||
pub fn len(&self) -> usize {
|
||||
self.selections.len()
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
///
|
||||
/// Definitions and globs are sorted.
|
||||
pub fn definitions(&self) -> &[FileTypeDef] {
|
||||
&self.defs
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this file type matcher.
|
||||
///
|
||||
/// The path is considered whitelisted if it matches a selected file type.
|
||||
/// The path is considered ignored if it matches a negated file type.
|
||||
/// If at least one file type is selected and `path` doesn't match, then
|
||||
/// the path is also considered ignored.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<Glob<'a>> {
|
||||
// File types don't apply to directories, and we can't do anything
|
||||
// if our glob set is empty.
|
||||
if is_dir || self.set.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
// We only want to match against the file name, so extract it.
|
||||
// If one doesn't exist, then we can't match it.
|
||||
let name = match file_name(path.as_ref()) {
|
||||
Some(name) => name,
|
||||
None if self.has_selected => {
|
||||
return Match::Ignore(Glob::unmatched());
|
||||
}
|
||||
None => {
|
||||
return Match::None;
|
||||
}
|
||||
};
|
||||
let mut matches = self.matches.get_default().borrow_mut();
|
||||
self.set.matches_into(name, &mut *matches);
|
||||
// The highest precedent match is the last one.
|
||||
if let Some(&i) = matches.last() {
|
||||
let (isel, iglob) = self.glob_to_selection[i];
|
||||
let sel = &self.selections[isel];
|
||||
let glob = Glob(GlobInner::Matched {
|
||||
def: sel.inner(),
|
||||
which: iglob,
|
||||
negated: sel.is_negated(),
|
||||
});
|
||||
return if sel.is_negated() {
|
||||
Match::Ignore(glob)
|
||||
} else {
|
||||
Match::Whitelist(glob)
|
||||
};
|
||||
}
|
||||
if self.has_selected {
|
||||
Match::Ignore(Glob::unmatched())
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TypesBuilder builds a type matcher from a set of file type definitions and
|
||||
/// a set of file type selections.
|
||||
pub struct TypesBuilder {
|
||||
types: HashMap<String, FileTypeDef>,
|
||||
selections: Vec<Selection<()>>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
/// Create a new builder for a file type matcher.
|
||||
///
|
||||
/// The builder contains *no* type definitions to start with. A set
|
||||
/// of default type definitions can be added with `add_defaults`, and
|
||||
/// additional type definitions can be added with `select` and `negate`.
|
||||
pub fn new() -> TypesBuilder {
|
||||
TypesBuilder {
|
||||
types: HashMap::new(),
|
||||
selections: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the current set of file type definitions *and* selections into
|
||||
/// a file type matcher.
|
||||
pub fn build(&self) -> Result<Types, Error> {
|
||||
let defs = self.definitions();
|
||||
let has_selected = self.selections.iter().any(|s| !s.is_negated());
|
||||
|
||||
let mut selections = vec![];
|
||||
let mut glob_to_selection = vec![];
|
||||
let mut build_set = GlobSetBuilder::new();
|
||||
for (isel, selection) in self.selections.iter().enumerate() {
|
||||
let def = match self.types.get(selection.name()) {
|
||||
Some(def) => def.clone(),
|
||||
None => {
|
||||
let name = selection.name().to_string();
|
||||
return Err(Error::UnrecognizedFileType(name));
|
||||
}
|
||||
};
|
||||
for (iglob, glob) in def.globs.iter().enumerate() {
|
||||
build_set.add(try!(
|
||||
GlobBuilder::new(glob)
|
||||
.literal_separator(true)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob(err.to_string()))));
|
||||
glob_to_selection.push((isel, iglob));
|
||||
}
|
||||
selections.push(selection.clone().map(move |_| def));
|
||||
}
|
||||
let set = try!(build_set.build().map_err(|err| {
|
||||
Error::Glob(err.to_string())
|
||||
}));
|
||||
Ok(Types {
|
||||
defs: defs,
|
||||
selections: selections,
|
||||
has_selected: has_selected,
|
||||
glob_to_selection: glob_to_selection,
|
||||
set: set,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
///
|
||||
/// Definitions and globs are sorted.
|
||||
pub fn definitions(&self) -> Vec<FileTypeDef> {
|
||||
let mut defs = vec![];
|
||||
for def in self.types.values() {
|
||||
let mut def = def.clone();
|
||||
def.globs.sort();
|
||||
defs.push(def);
|
||||
}
|
||||
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
|
||||
defs
|
||||
}
|
||||
|
||||
/// Select the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types currently defined are selected.
|
||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selections.push(Selection::Select(name.to_string(), ()));
|
||||
}
|
||||
} else {
|
||||
self.selections.push(Selection::Select(name.to_string(), ()));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Ignore the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types currently defined are negated.
|
||||
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selections.push(Selection::Negate(name.to_string(), ()));
|
||||
}
|
||||
} else {
|
||||
self.selections.push(Selection::Negate(name.to_string(), ()));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear any file type definitions for the type name given.
|
||||
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.types.remove(name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition. `name` can be arbitrary and `pat`
|
||||
/// should be a glob recognizing file paths belonging to the `name` type.
|
||||
///
|
||||
/// If `name` is `all` or otherwise contains a `:`, then an error is
|
||||
/// returned.
|
||||
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
|
||||
if name == "all" || name.contains(':') {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
let (key, glob) = (name.to_string(), glob.to_string());
|
||||
self.types.entry(key).or_insert_with(|| {
|
||||
FileTypeDef { name: name.to_string(), globs: vec![] }
|
||||
}).globs.push(glob);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a new file type definition specified in string form. The format
|
||||
/// is `name:glob`. Names may not include a colon.
|
||||
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
|
||||
let name: String = def.chars().take_while(|&c| c != ':').collect();
|
||||
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
|
||||
if name.is_empty() || pat.is_empty() {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
self.add(&name, &pat)
|
||||
}
|
||||
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
static MSG: &'static str = "adding a default type should never fail";
|
||||
for &(name, exts) in DEFAULT_TYPES {
|
||||
for ext in exts {
|
||||
self.add(name, ext).expect(MSG);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TypesBuilder;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, true);
|
||||
};
|
||||
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, false);
|
||||
};
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for tydef in $types {
|
||||
btypes.add_def(tydef).unwrap();
|
||||
}
|
||||
for sel in $sel {
|
||||
btypes.select(sel);
|
||||
}
|
||||
for selnot in $selnot {
|
||||
btypes.negate(selnot);
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let mat = types.matched($path, false);
|
||||
assert_eq!($matched, !mat.is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> Vec<&'static str> {
|
||||
vec![
|
||||
"html:*.html",
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
"foo:*.{rs,foo}",
|
||||
]
|
||||
}
|
||||
|
||||
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
|
||||
matched!(match2, types(), vec!["html"], vec![], "index.html");
|
||||
matched!(match3, types(), vec!["html"], vec![], "index.htm");
|
||||
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
|
||||
matched!(match5, types(), vec![], vec![], "index.html");
|
||||
matched!(match6, types(), vec![], vec!["rust"], "index.html");
|
||||
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
|
||||
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
|
||||
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
|
||||
}
|
||||
592
ignore/src/walk.rs
Normal file
592
ignore/src/walk.rs
Normal file
@@ -0,0 +1,592 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs::{FileType, Metadata};
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::vec;
|
||||
|
||||
use walkdir::{self, WalkDir, WalkDirIterator};
|
||||
|
||||
use dir::{Ignore, IgnoreBuilder};
|
||||
use gitignore::GitignoreBuilder;
|
||||
use overrides::Override;
|
||||
use types::Types;
|
||||
use {Error, PartialErrorBuilder};
|
||||
|
||||
/// WalkBuilder builds a recursive directory iterator.
|
||||
///
|
||||
/// The builder supports a large number of configurable options. This includes
|
||||
/// specific glob overrides, file type matching, toggling whether hidden
|
||||
/// files are ignored or not, and of course, support for respecting gitignore
|
||||
/// files.
|
||||
///
|
||||
/// By default, all ignore files found are respected. This includes `.ignore`,
|
||||
/// `.gitignore`, `.git/info/exclude` and even your global gitignore
|
||||
/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
|
||||
///
|
||||
/// Some standard recursive directory options are also supported, such as
|
||||
/// limiting the recursive depth or whether to follow symbolic links (disabled
|
||||
/// by default).
|
||||
///
|
||||
/// # Ignore rules
|
||||
///
|
||||
/// There are many rules that influence whether a particular file or directory
|
||||
/// is skipped by this iterator. Those rules are documented here. Note that
|
||||
/// the rules assume a default configuration.
|
||||
///
|
||||
/// * First, glob overrides are checked. If a path matches a glob override,
|
||||
/// then matching stops. The path is then only skipped if the glob that matched
|
||||
/// the path is an ignore glob. (An override glob is a whitelist glob unless it
|
||||
/// starts with a `!`, in which case it is an ignore glob.)
|
||||
/// * Second, ignore files are checked. Ignore files currently only come from
|
||||
/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
|
||||
/// global gitignore file), plain `.ignore` files, which have the same format
|
||||
/// as gitignore files, or explicitly added ignore files. The precedence order
|
||||
/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
|
||||
/// finally explicitly added ignore files. Note that precedence between
|
||||
/// different types of ignore files is not impacted by the directory hierarchy;
|
||||
/// any `.ignore` file overrides all `.gitignore` files. Within each
|
||||
/// precedence level, more nested ignore files have a higher precedence over
|
||||
/// less nested ignore files.
|
||||
/// * Third, if the previous step yields an ignore match, than all matching
|
||||
/// is stopped and the path is skipped.. If it yields a whitelist match, then
|
||||
/// process continues. A whitelist match can be overridden by a later matcher.
|
||||
/// * Fourth, unless the path is a directory, the file type matcher is run on
|
||||
/// the path. As above, if it's an ignore match, then all matching is stopped
|
||||
/// and the path is skipped. If it's a whitelist match, then matching
|
||||
/// continues.
|
||||
/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
|
||||
/// path is skipped.
|
||||
/// * Sixth, if the path has made it this far then it is yielded in the
|
||||
/// iterator.
|
||||
pub struct WalkBuilder {
|
||||
paths: Vec<PathBuf>,
|
||||
ig_builder: IgnoreBuilder,
|
||||
parents: bool,
|
||||
max_depth: Option<usize>,
|
||||
follow_links: bool,
|
||||
}
|
||||
|
||||
impl WalkBuilder {
|
||||
/// Create a new builder for a recursive directory iterator for the
|
||||
/// directory given.
|
||||
///
|
||||
/// Note that if you want to traverse multiple different directories, it
|
||||
/// is better to call `add` on this builder than to create multiple
|
||||
/// `Walk` values.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
|
||||
WalkBuilder {
|
||||
paths: vec![path.as_ref().to_path_buf()],
|
||||
ig_builder: IgnoreBuilder::new(),
|
||||
parents: true,
|
||||
max_depth: None,
|
||||
follow_links: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a new `Walk` iterator.
|
||||
pub fn build(&self) -> Walk {
|
||||
let follow_links = self.follow_links;
|
||||
let max_depth = self.max_depth;
|
||||
let its = self.paths.iter().map(move |p| {
|
||||
if p == Path::new("-") {
|
||||
(p.to_path_buf(), None)
|
||||
} else {
|
||||
let mut wd = WalkDir::new(p);
|
||||
wd = wd.follow_links(follow_links || p.is_file());
|
||||
if let Some(max_depth) = max_depth {
|
||||
wd = wd.max_depth(max_depth);
|
||||
}
|
||||
(p.to_path_buf(), Some(WalkEventIter::from(wd)))
|
||||
}
|
||||
}).collect::<Vec<_>>().into_iter();
|
||||
let ig_root = self.ig_builder.build();
|
||||
Walk {
|
||||
its: its,
|
||||
it: None,
|
||||
ig_root: ig_root.clone(),
|
||||
ig: ig_root.clone(),
|
||||
parents: self.parents,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a file path to the iterator.
|
||||
///
|
||||
/// Each additional file path added is traversed recursively. This should
|
||||
/// be preferred over building multiple `Walk` iterators since this
|
||||
/// enables reusing resources across iteration.
|
||||
pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
|
||||
self.paths.push(path.as_ref().to_path_buf());
|
||||
self
|
||||
}
|
||||
|
||||
/// The maximum depth to recurse.
|
||||
///
|
||||
/// The default, `None`, imposes no depth restriction.
|
||||
pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
|
||||
self.max_depth = depth;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to follow symbolic links or not.
|
||||
pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.follow_links = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an ignore file to the matcher.
|
||||
///
|
||||
/// This has lower precedence than all other sources of ignore rules.
|
||||
///
|
||||
/// If there was a problem adding the ignore file, then an error is
|
||||
/// returned. Note that the error may indicate *partial* failure. For
|
||||
/// example, if an ignore file contains an invalid glob, all other globs
|
||||
/// are still applied.
|
||||
pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
|
||||
let mut builder = GitignoreBuilder::new("");
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(builder.add(path));
|
||||
match builder.build() {
|
||||
Ok(gi) => { self.ig_builder.add_ignore(gi); }
|
||||
Err(err) => { errs.push(err); }
|
||||
}
|
||||
errs.into_error_option()
|
||||
}
|
||||
|
||||
/// Add an override matcher.
|
||||
///
|
||||
/// By default, no override matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
|
||||
self.ig_builder.overrides(overrides);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher.
|
||||
///
|
||||
/// By default, no file type matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
|
||||
self.ig_builder.types(types);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables ignoring hidden files.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.hidden(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading ignore files from parent directories.
|
||||
///
|
||||
/// If this is enabled, then the parent directories of each file path given
|
||||
/// are traversed for ignore files (subject to the ignore settings on
|
||||
/// this builder). Note that file paths are canonicalized with respect to
|
||||
/// the current working directory in order to determine parent directories.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.parents = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.ignore` files.
|
||||
///
|
||||
/// `.ignore` files have the same semantics as `gitignore` files and are
|
||||
/// supported by search tools such as ripgrep and The Silver Searcher.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.ignore(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading a global gitignore file, whose path is specified in
|
||||
/// git's `core.excludesFile` config option.
|
||||
///
|
||||
/// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
|
||||
/// does not exist or does not specify `core.excludesFile`, then
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.git_global(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.gitignore` files.
|
||||
///
|
||||
/// `.gitignore` files have match semantics as described in the `gitignore`
|
||||
/// man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.git_ignore(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.git/info/exclude` files.
|
||||
///
|
||||
/// `.git/info/exclude` files have match semantics as described in the
|
||||
/// `gitignore` man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.git_exclude(yes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk is a recursive directory iterator over file paths in a directory.
|
||||
///
|
||||
/// Only file and directory paths matching the rules are returned. By default,
|
||||
/// ignore files like `.gitignore` are respected. The precise matching rules
|
||||
/// and precedence is explained in the documentation for `WalkBuilder`.
|
||||
pub struct Walk {
|
||||
its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
|
||||
it: Option<WalkEventIter>,
|
||||
ig_root: Ignore,
|
||||
ig: Ignore,
|
||||
parents: bool,
|
||||
}
|
||||
|
||||
impl Walk {
|
||||
/// Creates a new recursive directory iterator for the file path given.
|
||||
///
|
||||
/// Note that this uses default settings, which include respecting
|
||||
/// `.gitignore` files. To configure the iterator, use `WalkBuilder`
|
||||
/// instead.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Walk {
|
||||
WalkBuilder::new(path).build()
|
||||
}
|
||||
|
||||
fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool {
|
||||
if ent.depth() == 0 {
|
||||
// Never skip the root directory.
|
||||
return false;
|
||||
}
|
||||
let m = self.ig.matched(ent.path(), ent.file_type().is_dir());
|
||||
if m.is_ignore() {
|
||||
debug!("ignoring {}: {:?}", ent.path().display(), m);
|
||||
return true;
|
||||
} else if m.is_whitelist() {
|
||||
debug!("whitelisting {}: {:?}", ent.path().display(), m);
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Walk {
|
||||
type Item = Result<DirEntry, Error>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<Result<DirEntry, Error>> {
|
||||
loop {
|
||||
let ev = match self.it.as_mut().and_then(|it| it.next()) {
|
||||
Some(ev) => ev,
|
||||
None => {
|
||||
match self.its.next() {
|
||||
None => return None,
|
||||
Some((_, None)) => {
|
||||
return Some(Ok(DirEntry {
|
||||
dent: None,
|
||||
err: None,
|
||||
}));
|
||||
}
|
||||
Some((path, Some(it))) => {
|
||||
self.it = Some(it);
|
||||
if self.parents && path.is_dir() {
|
||||
let (ig, err) = self.ig_root.add_parents(path);
|
||||
self.ig = ig;
|
||||
if let Some(err) = err {
|
||||
return Some(Err(err));
|
||||
}
|
||||
} else {
|
||||
self.ig = self.ig_root.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
match ev {
|
||||
Err(err) => {
|
||||
let path = err.path().map(|p| p.to_path_buf());
|
||||
let mut ig_err = Error::Io(io::Error::from(err));
|
||||
if let Some(path) = path {
|
||||
ig_err = Error::WithPath {
|
||||
path: path.to_path_buf(),
|
||||
err: Box::new(ig_err),
|
||||
};
|
||||
}
|
||||
return Some(Err(ig_err));
|
||||
}
|
||||
Ok(WalkEvent::Exit) => {
|
||||
self.ig = self.ig.parent().unwrap();
|
||||
}
|
||||
Ok(WalkEvent::Dir(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
self.it.as_mut().unwrap().it.skip_current_dir();
|
||||
// Still need to push this on the stack because
|
||||
// we'll get a WalkEvent::Exit event for this dir.
|
||||
// We don't care if it errors though.
|
||||
let (igtmp, _) = self.ig.add_child(ent.path());
|
||||
self.ig = igtmp;
|
||||
continue;
|
||||
}
|
||||
let (igtmp, err) = self.ig.add_child(ent.path());
|
||||
self.ig = igtmp;
|
||||
return Some(Ok(DirEntry { dent: Some(ent), err: err }));
|
||||
}
|
||||
Ok(WalkEvent::File(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
continue;
|
||||
}
|
||||
// If this isn't actually a file (e.g., a symlink),
|
||||
// then skip it.
|
||||
if !ent.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
return Some(Ok(DirEntry { dent: Some(ent), err: None }));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A directory entry with a possible error attached.
|
||||
///
|
||||
/// The error typically refers to a problem parsing ignore files in a
|
||||
/// particular directory.
|
||||
#[derive(Debug)]
|
||||
pub struct DirEntry {
|
||||
dent: Option<walkdir::DirEntry>,
|
||||
err: Option<Error>,
|
||||
}
|
||||
|
||||
impl DirEntry {
|
||||
/// The full path that this entry represents.
|
||||
pub fn path(&self) -> &Path {
|
||||
self.dent.as_ref().map_or(Path::new("<stdin>"), |x| x.path())
|
||||
}
|
||||
|
||||
/// Whether this entry corresponds to a symbolic link or not.
|
||||
pub fn path_is_symbolic_link(&self) -> bool {
|
||||
self.dent.as_ref().map_or(false, |x| x.path_is_symbolic_link())
|
||||
}
|
||||
|
||||
/// Returns true if and only if this entry corresponds to stdin.
|
||||
///
|
||||
/// i.e., The entry has depth 0 and its file name is `-`.
|
||||
pub fn is_stdin(&self) -> bool {
|
||||
self.dent.is_none()
|
||||
}
|
||||
|
||||
/// Return the metadata for the file that this entry points to.
|
||||
pub fn metadata(&self) -> Result<Metadata, Error> {
|
||||
if let Some(dent) = self.dent.as_ref() {
|
||||
dent.metadata().map_err(|err| Error::WithPath {
|
||||
path: self.path().to_path_buf(),
|
||||
err: Box::new(Error::Io(io::Error::from(err))),
|
||||
})
|
||||
} else {
|
||||
let ioerr = io::Error::new(
|
||||
io::ErrorKind::Other, "stdin has no metadata");
|
||||
Err(Error::WithPath {
|
||||
path: Path::new("<stdin>").to_path_buf(),
|
||||
err: Box::new(Error::Io(ioerr)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the file type for the file that this entry points to.
|
||||
///
|
||||
/// This entry doesn't have a file type if it corresponds to stdin.
|
||||
pub fn file_type(&self) -> Option<FileType> {
|
||||
self.dent.as_ref().map(|x| x.file_type())
|
||||
}
|
||||
|
||||
/// Return the file name of this entry.
|
||||
///
|
||||
/// If this entry has no file name (e.g., `/`), then the full path is
|
||||
/// returned.
|
||||
pub fn file_name(&self) -> &OsStr {
|
||||
self.dent.as_ref().map_or(OsStr::new("<stdin>"), |x| x.file_name())
|
||||
}
|
||||
|
||||
/// Returns the depth at which this entry was created relative to the root.
|
||||
pub fn depth(&self) -> usize {
|
||||
self.dent.as_ref().map_or(0, |x| x.depth())
|
||||
}
|
||||
|
||||
/// Returns an error, if one exists, associated with processing this entry.
|
||||
///
|
||||
/// An example of an error is one that occurred while parsing an ignore
|
||||
/// file.
|
||||
pub fn error(&self) -> Option<&Error> {
|
||||
self.err.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
||||
/// accurately describes the directory tree. Namely, it emits events that are
|
||||
/// one of three types: directory, file or "exit." An "exit" event means that
|
||||
/// the entire contents of a directory have been enumerated.
|
||||
struct WalkEventIter {
|
||||
depth: usize,
|
||||
it: walkdir::Iter,
|
||||
next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum WalkEvent {
|
||||
Dir(walkdir::DirEntry),
|
||||
File(walkdir::DirEntry),
|
||||
Exit,
|
||||
}
|
||||
|
||||
impl From<WalkDir> for WalkEventIter {
|
||||
fn from(it: WalkDir) -> WalkEventIter {
|
||||
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for WalkEventIter {
|
||||
type Item = walkdir::Result<WalkEvent>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||
let dent = self.next.take().or_else(|| self.it.next());
|
||||
let depth = match dent {
|
||||
None => 0,
|
||||
Some(Ok(ref dent)) => dent.depth(),
|
||||
Some(Err(ref err)) => err.depth(),
|
||||
};
|
||||
if depth < self.depth {
|
||||
self.depth -= 1;
|
||||
self.next = dent;
|
||||
return Some(Ok(WalkEvent::Exit));
|
||||
}
|
||||
self.depth = depth;
|
||||
match dent {
|
||||
None => None,
|
||||
Some(Err(err)) => Some(Err(err)),
|
||||
Some(Ok(dent)) => {
|
||||
if dent.file_type().is_dir() {
|
||||
self.depth += 1;
|
||||
Some(Ok(WalkEvent::Dir(dent)))
|
||||
} else {
|
||||
Some(Ok(WalkEvent::File(dent)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::{Walk, WalkBuilder};
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
file.write_all(contents.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
fn mkdirp<P: AsRef<Path>>(path: P) {
|
||||
fs::create_dir_all(path).unwrap();
|
||||
}
|
||||
|
||||
fn normal_path(unix: &str) -> String {
|
||||
if cfg!(windows) {
|
||||
unix.replace("\\", "/")
|
||||
} else {
|
||||
unix.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn walk_collect(prefix: &Path, walk: Walk) -> Vec<String> {
|
||||
let mut paths = vec![];
|
||||
for dent in walk {
|
||||
let dent = dent.unwrap();
|
||||
let path = dent.path().strip_prefix(prefix).unwrap();
|
||||
if path.as_os_str().is_empty() {
|
||||
continue;
|
||||
}
|
||||
paths.push(normal_path(path.to_str().unwrap()));
|
||||
}
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
|
||||
fn mkpaths(paths: &[&str]) -> Vec<String> {
|
||||
let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_ignores() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
mkdirp(td.path().join("a/b/c"));
|
||||
mkdirp(td.path().join("x/y"));
|
||||
wfile(td.path().join("a/b/foo"), "");
|
||||
wfile(td.path().join("x/y/foo"), "");
|
||||
|
||||
let got = walk_collect(td.path(), Walk::new(td.path()));
|
||||
assert_eq!(got, mkpaths(&[
|
||||
"x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c",
|
||||
]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join("foo"), "");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
wfile(td.path().join("bar"), "");
|
||||
wfile(td.path().join("a/bar"), "");
|
||||
|
||||
let got = walk_collect(td.path(), Walk::new(td.path()));
|
||||
assert_eq!(got, mkpaths(&["bar", "a", "a/bar"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let igpath = td.path().join(".not-an-ignore");
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(&igpath, "foo");
|
||||
wfile(td.path().join("foo"), "");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
wfile(td.path().join("bar"), "");
|
||||
wfile(td.path().join("a/bar"), "");
|
||||
|
||||
let mut builder = WalkBuilder::new(td.path());
|
||||
assert!(builder.add_ignore(&igpath).is_none());
|
||||
let got = walk_collect(td.path(), builder.build());
|
||||
assert_eq!(got, mkpaths(&["bar", "a", "a/bar"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore_parent() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
wfile(td.path().join("a/bar"), "");
|
||||
|
||||
let root = td.path().join("a");
|
||||
let got = walk_collect(&root, Walk::new(&root));
|
||||
assert_eq!(got, mkpaths(&["bar"]));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user