diff --git a/globset/src/glob.rs b/globset/src/glob.rs index 05467d5..0bdb9b4 100644 --- a/globset/src/glob.rs +++ b/globset/src/glob.rs @@ -1,4 +1,3 @@ -use std::ffi::{OsStr, OsString}; use std::fmt; use std::hash; use std::iter; @@ -28,7 +27,7 @@ pub enum MatchStrategy { BasenameLiteral(String), /// A pattern matches if and only if the file path's extension matches this /// literal string. - Extension(OsString), + Extension(String), /// A pattern matches if and only if this prefix literal is a prefix of the /// candidate file path. Prefix(String), @@ -47,7 +46,7 @@ pub enum MatchStrategy { /// extension. Note that this is a necessary but NOT sufficient criterion. /// Namely, if the extension matches, then a full regex search is still /// required. - RequiredExtension(OsString), + RequiredExtension(String), /// A regex needs to be used for matching. Regex, } @@ -154,7 +153,7 @@ impl GlobStrategic { lit.as_bytes() == &*candidate.basename } MatchStrategy::Extension(ref ext) => { - candidate.ext == ext + ext.as_bytes() == &*candidate.ext } MatchStrategy::Prefix(ref pre) => { starts_with(pre.as_bytes(), byte_path) @@ -166,7 +165,8 @@ impl GlobStrategic { ends_with(suffix.as_bytes(), byte_path) } MatchStrategy::RequiredExtension(ref ext) => { - candidate.ext == ext && self.re.is_match(byte_path) + let ext = ext.as_bytes(); + &*candidate.ext == ext && self.re.is_match(byte_path) } MatchStrategy::Regex => self.re.is_match(byte_path), } @@ -295,7 +295,7 @@ impl Glob { /// std::path::Path::extension returns. Namely, this extension includes /// the '.'. Also, paths like `.rs` are considered to have an extension /// of `.rs`. - fn ext(&self) -> Option { + fn ext(&self) -> Option { if self.opts.case_insensitive { return None; } @@ -319,11 +319,11 @@ impl Glob { Some(&Token::Literal('.')) => {} _ => return None, } - let mut lit = OsStr::new(".").to_os_string(); + let mut lit = ".".to_string(); for t in self.tokens[start + 2..].iter() { match *t { Token::Literal('.') | Token::Literal('/') => return None, - Token::Literal(c) => lit.push(c.to_string()), + Token::Literal(c) => lit.push(c), _ => return None, } } @@ -337,7 +337,7 @@ impl Glob { /// This is like `ext`, but returns an extension even if it isn't sufficent /// to imply a match. Namely, if an extension is returned, then it is /// necessary but not sufficient for a match. - fn required_ext(&self) -> Option { + fn required_ext(&self) -> Option { if self.opts.case_insensitive { return None; } @@ -360,7 +360,7 @@ impl Glob { None } else { ext.reverse(); - Some(OsString::from(ext.into_iter().collect::())) + Some(ext.into_iter().collect()) } } @@ -927,8 +927,6 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool { #[cfg(test)] mod tests { - use std::ffi::{OsStr, OsString}; - use {GlobSetBuilder, ErrorKind}; use super::{Glob, GlobBuilder, Token}; use super::Token::*; @@ -1021,7 +1019,6 @@ mod tests { } fn s(string: &str) -> String { string.to_string() } - fn os(string: &str) -> OsString { OsStr::new(string).to_os_string() } fn class(s: char, e: char) -> Token { Class { negated: false, ranges: vec![(s, e)] } @@ -1319,19 +1316,19 @@ mod tests { Literal('f'), Literal('o'), ZeroOrMore, Literal('o'), ]), SLASHLIT); - ext!(extract_ext1, "**/*.rs", Some(os(".rs"))); + ext!(extract_ext1, "**/*.rs", Some(s(".rs"))); ext!(extract_ext2, "**/*.rs.bak", None); - ext!(extract_ext3, "*.rs", Some(os(".rs"))); + ext!(extract_ext3, "*.rs", Some(s(".rs"))); ext!(extract_ext4, "a*.rs", None); ext!(extract_ext5, "/*.c", None); ext!(extract_ext6, "*.c", None, SLASHLIT); - ext!(extract_ext7, "*.c", Some(os(".c"))); + ext!(extract_ext7, "*.c", Some(s(".c"))); - required_ext!(extract_req_ext1, "*.rs", Some(os(".rs"))); - required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(os(".rs"))); - required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(os(".rs"))); - required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(os(".rs"))); - required_ext!(extract_req_ext5, ".rs", Some(os(".rs"))); + required_ext!(extract_req_ext1, "*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs"))); + required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs"))); + required_ext!(extract_req_ext5, ".rs", Some(s(".rs"))); required_ext!(extract_req_ext6, "./rs", None); required_ext!(extract_req_ext7, "foo", None); required_ext!(extract_req_ext8, ".foo/", None); diff --git a/globset/src/lib.rs b/globset/src/lib.rs index dd6922e..af11dff 100644 --- a/globset/src/lib.rs +++ b/globset/src/lib.rs @@ -108,7 +108,7 @@ extern crate regex; use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; use std::error::Error as StdError; -use std::ffi::{OsStr, OsString}; +use std::ffi::OsStr; use std::fmt; use std::hash; use std::path::Path; @@ -458,7 +458,7 @@ impl GlobSetBuilder { pub struct Candidate<'a> { path: Cow<'a, [u8]>, basename: Cow<'a, [u8]>, - ext: &'a OsStr, + ext: Cow<'a, [u8]>, } impl<'a> Candidate<'a> { @@ -469,7 +469,7 @@ impl<'a> Candidate<'a> { Candidate { path: normalize_path(path_bytes(path)), basename: os_str_bytes(basename), - ext: file_name_ext(basename).unwrap_or(OsStr::new("")), + ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")), } } @@ -584,22 +584,22 @@ impl BasenameLiteralStrategy { } #[derive(Clone, Debug)] -struct ExtensionStrategy(HashMap, Fnv>); +struct ExtensionStrategy(HashMap, Vec, Fnv>); impl ExtensionStrategy { fn new() -> ExtensionStrategy { ExtensionStrategy(HashMap::with_hasher(Fnv::default())) } - fn add(&mut self, global_index: usize, ext: OsString) { - self.0.entry(ext).or_insert(vec![]).push(global_index); + fn add(&mut self, global_index: usize, ext: String) { + self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); } fn is_match(&self, candidate: &Candidate) -> bool { if candidate.ext.is_empty() { return false; } - self.0.contains_key(candidate.ext) + self.0.contains_key(&*candidate.ext) } #[inline(never)] @@ -607,7 +607,7 @@ impl ExtensionStrategy { if candidate.ext.is_empty() { return; } - if let Some(hits) = self.0.get(candidate.ext) { + if let Some(hits) = self.0.get(&*candidate.ext) { matches.extend(hits); } } @@ -670,14 +670,14 @@ impl SuffixStrategy { } #[derive(Clone, Debug)] -struct RequiredExtensionStrategy(HashMap, Fnv>); +struct RequiredExtensionStrategy(HashMap, Vec<(usize, Regex)>, Fnv>); impl RequiredExtensionStrategy { fn is_match(&self, candidate: &Candidate) -> bool { if candidate.ext.is_empty() { return false; } - match self.0.get(candidate.ext) { + match self.0.get(&*candidate.ext) { None => false, Some(regexes) => { for &(_, ref re) in regexes { @@ -695,7 +695,7 @@ impl RequiredExtensionStrategy { if candidate.ext.is_empty() { return; } - if let Some(regexes) = self.0.get(candidate.ext) { + if let Some(regexes) = self.0.get(&*candidate.ext) { for &(global_index, ref re) in regexes { if re.is_match(&*candidate.path) { matches.push(global_index); @@ -775,7 +775,7 @@ impl MultiStrategyBuilder { #[derive(Clone, Debug)] struct RequiredExtensionStrategyBuilder( - HashMap>, + HashMap, Vec<(usize, String)>>, ); impl RequiredExtensionStrategyBuilder { @@ -783,8 +783,11 @@ impl RequiredExtensionStrategyBuilder { RequiredExtensionStrategyBuilder(HashMap::new()) } - fn add(&mut self, global_index: usize, ext: OsString, regex: String) { - self.0.entry(ext).or_insert(vec![]).push((global_index, regex)); + fn add(&mut self, global_index: usize, ext: String, regex: String) { + self.0 + .entry(ext.into_bytes()) + .or_insert(vec![]) + .push((global_index, regex)); } fn build(self) -> Result { diff --git a/globset/src/pathutil.rs b/globset/src/pathutil.rs index 16bd16f..4b808e8 100644 --- a/globset/src/pathutil.rs +++ b/globset/src/pathutil.rs @@ -54,34 +54,28 @@ pub fn file_name<'a, P: AsRef + ?Sized>( /// a pattern like `*.rs` is obviously trying to match files with a `rs` /// extension, but it also matches files like `.rs`, which doesn't have an /// extension according to std::path::Path::extension. -pub fn file_name_ext(name: &OsStr) -> Option<&OsStr> { - // Yes, these functions are awful, and yes, we are completely violating - // the abstraction barrier of std::ffi. The barrier we're violating is - // that an OsStr's encoding is *ASCII compatible*. While this is obviously - // true on Unix systems, it's also true on Windows because an OsStr uses - // WTF-8 internally: https://simonsapin.github.io/wtf-8/ - // - // We should consider doing the same for the other path utility functions. - // Right now, we don't break any barriers, but Windows users are paying - // for it. - // - // Got any better ideas that don't cost anything? Hit me up. ---AG - unsafe fn os_str_as_u8_slice(s: &OsStr) -> &[u8] { - ::std::mem::transmute(s) - } - unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - ::std::mem::transmute(s) - } +pub fn file_name_ext(name: &OsStr) -> Option> { if name.is_empty() { return None; } - let name = unsafe { os_str_as_u8_slice(name) }; - for (i, &b) in name.iter().enumerate().rev() { - if b == b'.' { - return Some(unsafe { u8_slice_as_os_str(&name[i..]) }); + let name = os_str_bytes(name); + let last_dot_at = { + let result = name + .iter().enumerate().rev() + .find(|&(_, &b)| b == b'.') + .map(|(i, _)| i); + match result { + None => return None, + Some(i) => i, } - } - None + }; + Some(match name { + Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]), + Cow::Owned(mut name) => { + name.drain(..last_dot_at); + Cow::Owned(name) + } + }) } /// Return raw bytes of a path, transcoded to UTF-8 if necessary. @@ -144,7 +138,7 @@ mod tests { #[test] fn $name() { let got = file_name_ext(OsStr::new($file_name)); - assert_eq!($ext.map(OsStr::new), got); + assert_eq!($ext.map(|s| Cow::Borrowed(s.as_bytes())), got); } }; }