diff --git a/Cargo.lock b/Cargo.lock index 329fdf9..f2213e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.75" @@ -50,6 +59,16 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +[[package]] +name = "bstr" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "cc" version = "1.0.86" @@ -62,6 +81,31 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "deno_config" version = "0.19.1" @@ -69,6 +113,7 @@ dependencies = [ "anyhow", "deno_semver", "glob", + "ignore", "import_map", "indexmap", "jsonc-parser", @@ -145,6 +190,19 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "globset" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "hashbrown" version = "0.14.3" @@ -167,6 +225,22 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "ignore" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "import_map" version = "0.20.0" @@ -320,6 +394,23 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -345,6 +436,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.197" @@ -492,6 +592,25 @@ dependencies = [ "serde", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index eb2df69..9529fe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/denoland/deno_config" [features] default = ["workspace"] -deno_json = ["jsonc-parser", "glob", "import_map"] +deno_json = ["jsonc-parser", "glob", "ignore", "import_map"] package_json = ["deno_semver"] sync = [] workspace = ["deno_json", "package_json"] @@ -20,6 +20,7 @@ indexmap = { version = "2", features = ["serde"] } jsonc-parser = { version = "0.23.0", features = ["serde"], optional = true } log = "0.4.20" glob = { version = "0.3.1", optional = true } +ignore = { version = "0.4", optional = true } percent-encoding = "2.3.0" serde = { version = "1.0.149", features = ["derive"] } serde_json = "1.0.85" diff --git a/src/deno_json/mod.rs b/src/deno_json/mod.rs index 9df4adc..0ef5746 100644 --- a/src/deno_json/mod.rs +++ b/src/deno_json/mod.rs @@ -727,7 +727,7 @@ impl ConfigFile { config_path: &Path, parse_options: &ConfigParseOptions, ) -> Result { - let text = fs.read_to_string(config_path).map_err(|err| { + let text = fs.read_to_string_lossy(config_path).map_err(|err| { ConfigFileReadError::FailedReading { specifier: specifier.clone(), source: err, diff --git a/src/fs.rs b/src/fs.rs index 857ea24..13f5100 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,19 +1,81 @@ // Copyright 2018-2024 the Deno authors. MIT license. +use std::borrow::Cow; use std::path::Path; +use std::path::PathBuf; + +#[derive(Debug, Default, Clone)] +pub struct FsMetadata { + pub is_file: bool, + pub is_directory: bool, + pub is_symlink: bool, +} + +#[derive(Debug, Clone)] +pub struct FsDirEntry { + pub path: PathBuf, + pub metadata: FsMetadata, +} pub trait DenoConfigFs { - fn read_to_string(&self, path: &Path) -> Result; + fn stat_sync(&self, path: &Path) -> Result; + fn read_to_string_lossy(&self, path: &Path) + -> Result; + fn read_dir(&self, path: &Path) -> Result, std::io::Error>; + + fn exists(&self, path: &Path) -> bool { + self.stat_sync(path).is_ok() + } } #[derive(Debug, Clone, Copy)] pub struct RealDenoConfigFs; impl DenoConfigFs for RealDenoConfigFs { - fn read_to_string(&self, path: &Path) -> Result { + fn stat_sync(&self, path: &Path) -> Result { + // allowed here for the real fs + #[allow(clippy::disallowed_methods)] + std::fs::metadata(path).map(|metadata| FsMetadata { + is_file: metadata.is_file(), + is_directory: metadata.is_dir(), + is_symlink: metadata.file_type().is_symlink(), + }) + } + + fn read_to_string_lossy( + &self, + path: &Path, + ) -> Result { + // allowed here for the real fs + #[allow(clippy::disallowed_methods)] + let bytes = std::fs::read(path)?; + Ok(string_from_utf8_lossy(bytes)) + } + + fn read_dir(&self, path: &Path) -> Result, std::io::Error> { // allowed here for the real fs #[allow(clippy::disallowed_methods)] - std::fs::read_to_string(path) + let entries = std::fs::read_dir(path)?; + let mut result = Vec::new(); + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let path = entry.path(); + let Ok(metadata) = entry.metadata() else { + continue; + }; + let stat = FsMetadata { + is_file: metadata.is_file(), + is_directory: metadata.is_dir(), + is_symlink: metadata.file_type().is_symlink(), + }; + result.push(FsDirEntry { + path, + metadata: stat, + }); + } + Ok(result) } } @@ -23,11 +85,55 @@ impl<'a> Default for &'a dyn DenoConfigFs { } } +// Like String::from_utf8_lossy but operates on owned values +#[inline(always)] +fn string_from_utf8_lossy(buf: Vec) -> String { + match String::from_utf8_lossy(&buf) { + // buf contained non-utf8 chars than have been patched + Cow::Owned(s) => s, + // SAFETY: if Borrowed then the buf only contains utf8 chars, + // we do this instead of .into_owned() to avoid copying the input buf + Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) }, + } +} + +#[cfg(test)] +#[derive(Debug)] +enum DirEntry { + // todo(dsherret): add symlink here in the future + Directory, + File(String), +} + +#[cfg(test)] +impl DirEntry { + pub fn as_metadata(&self) -> FsMetadata { + match self { + DirEntry::Directory => FsMetadata { + is_file: false, + is_directory: true, + is_symlink: false, + }, + DirEntry::File(_) => FsMetadata { + is_file: true, + is_directory: false, + is_symlink: false, + }, + } + } +} + #[cfg(test)] #[derive(Debug, Default)] -pub(crate) struct TestFileSystem( - pub std::collections::HashMap, -); +struct Dir { + pub entries: std::collections::BTreeMap, +} + +#[cfg(test)] +#[derive(Debug, Default)] +pub(crate) struct TestFileSystem { + directories: std::collections::BTreeMap, +} #[cfg(test)] impl TestFileSystem { @@ -40,17 +146,87 @@ impl TestFileSystem { } pub fn insert(&mut self, path: impl AsRef, contents: impl AsRef) { - self - .0 - .insert(path.as_ref().to_path_buf(), contents.as_ref().to_string()); + let path = path.as_ref(); + self.get_dir_mut(path.parent().unwrap()).entries.insert( + path.file_name().unwrap().to_string_lossy().to_string(), + DirEntry::File(contents.as_ref().to_string()), + ); + } + + fn get_dir_mut(&mut self, path: &Path) -> &mut Dir { + let path = crate::util::normalize_path(path); + if !self.directories.contains_key(&path) { + if let Some(parent) = path.parent() { + let parent_dir = self.get_dir_mut(parent); + let file_name = path.file_name().unwrap().to_string_lossy().to_string(); + parent_dir.entries.insert(file_name, DirEntry::Directory); + } + self.directories.insert( + path.clone(), + Dir { + entries: Default::default(), + }, + ); + } + self.directories.get_mut(&path).unwrap() } } #[cfg(test)] impl DenoConfigFs for TestFileSystem { - fn read_to_string(&self, path: &Path) -> Result { - self.0.get(path).cloned().ok_or_else(|| { - std::io::Error::new(std::io::ErrorKind::NotFound, "file not found") - }) + fn read_to_string_lossy( + &self, + path: &Path, + ) -> Result { + let path = crate::util::normalize_path(path); + path + .parent() + .and_then(|parent| { + self + .directories + .get(parent) + .and_then(|d| d.entries.get(path.file_name()?.to_str()?)) + .and_then(|e| match e { + DirEntry::Directory => None, + DirEntry::File(text) => Some(text.clone()), + }) + }) + .ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::NotFound, "file not found") + }) + } + + fn stat_sync(&self, path: &Path) -> Result { + path + .parent() + .and_then(|parent| { + self + .directories + .get(parent) + .and_then(|d| d.entries.get(path.file_name()?.to_str()?)) + .map(|e| e.as_metadata()) + }) + .ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::NotFound, "not found") + }) + } + + fn read_dir(&self, path: &Path) -> Result, std::io::Error> { + self + .directories + .get(path) + .map(|dir| { + dir + .entries + .iter() + .map(|(name, entry)| FsDirEntry { + path: path.join(name), + metadata: entry.as_metadata(), + }) + .collect::>() + }) + .ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::NotFound, "not found") + }) } } diff --git a/src/glob/collector.rs b/src/glob/collector.rs new file mode 100644 index 0000000..f592a26 --- /dev/null +++ b/src/glob/collector.rs @@ -0,0 +1,368 @@ +// Copyright 2018-2024 the Deno authors. MIT license. + +use std::collections::VecDeque; +use std::path::Path; +use std::path::PathBuf; + +use crate::fs::FsMetadata; +use crate::glob::gitignore::DirGitIgnores; +use crate::glob::gitignore::GitIgnoreTree; +use crate::glob::FilePatternsMatch; +use crate::glob::PathKind; +use crate::glob::PathOrPattern; +use crate::util::normalize_path; +use crate::util::CheckedSet; + +use super::FilePatterns; + +#[derive(Debug, Clone)] +pub struct WalkEntry<'a> { + pub path: &'a Path, + pub metadata: &'a FsMetadata, + pub patterns: &'a FilePatterns, +} + +/// Collects file paths that satisfy the given predicate, by recursively walking `files`. +/// If the walker visits a path that is listed in `ignore`, it skips descending into the directory. +pub struct FileCollector bool> { + file_filter: TFilter, + ignore_git_folder: bool, + ignore_node_modules: bool, + vendor_folder: Option, + use_gitignore: bool, +} + +impl bool> FileCollector { + pub fn new(file_filter: TFilter) -> Self { + Self { + file_filter, + ignore_git_folder: false, + ignore_node_modules: false, + vendor_folder: None, + use_gitignore: false, + } + } + + pub fn ignore_node_modules(mut self) -> Self { + self.ignore_node_modules = true; + self + } + + pub fn set_vendor_folder(mut self, vendor_folder: Option) -> Self { + self.vendor_folder = vendor_folder; + self + } + + pub fn ignore_git_folder(mut self) -> Self { + self.ignore_git_folder = true; + self + } + + pub fn use_gitignore(mut self) -> Self { + self.use_gitignore = true; + self + } + + pub fn collect_file_patterns( + &self, + fs: &dyn crate::fs::DenoConfigFs, + file_patterns: FilePatterns, + ) -> Result, anyhow::Error> { + fn is_pattern_matched( + maybe_git_ignore: Option<&DirGitIgnores>, + path: &Path, + is_dir: bool, + file_patterns: &FilePatterns, + ) -> bool { + let path_kind = match is_dir { + true => PathKind::Directory, + false => PathKind::File, + }; + match file_patterns.matches_path_detail(path, path_kind) { + FilePatternsMatch::Passed => { + // check gitignore + let is_gitignored = maybe_git_ignore + .as_ref() + .map(|git_ignore| git_ignore.is_ignored(path, is_dir)) + .unwrap_or(false); + !is_gitignored + } + FilePatternsMatch::PassedOptedOutExclude => true, + FilePatternsMatch::Excluded => false, + } + } + + let mut maybe_git_ignores = if self.use_gitignore { + // Override explicitly specified include paths in the + // .gitignore file. This does not apply to globs because + // that is way too complicated to reason about. + let include_paths = file_patterns + .include + .as_ref() + .map(|include| { + include + .inner() + .iter() + .filter_map(|path_or_pattern| { + if let PathOrPattern::Path(p) = path_or_pattern { + Some(p.clone()) + } else { + None + } + }) + .collect::>() + }) + .unwrap_or_default(); + Some(GitIgnoreTree::new(fs, include_paths)) + } else { + None + }; + let mut target_files = Vec::new(); + let mut visited_paths: CheckedSet = CheckedSet::default(); + let file_patterns_by_base = file_patterns.split_by_base(); + for file_patterns in file_patterns_by_base { + let specified_path = normalize_path(&file_patterns.base); + let mut pending_dirs = VecDeque::new(); + let mut handle_entry = + |path: PathBuf, + metadata: &FsMetadata, + pending_dirs: &mut VecDeque| { + let maybe_gitignore = + maybe_git_ignores.as_mut().and_then(|git_ignores| { + if metadata.is_directory { + git_ignores.get_resolved_git_ignore_for_dir(&path) + } else { + git_ignores.get_resolved_git_ignore_for_file(&path) + } + }); + if !is_pattern_matched( + maybe_gitignore.as_deref(), + &path, + metadata.is_directory, + &file_patterns, + ) { + // ignore + } else if metadata.is_directory { + // allow the user to opt out of ignoring by explicitly specifying the dir + let opt_out_ignore = specified_path == path; + let should_ignore_dir = + !opt_out_ignore && self.is_ignored_dir(&path); + if !should_ignore_dir && visited_paths.insert(&path) { + pending_dirs.push_back(path); + } + } else if (self.file_filter)(WalkEntry { + path: &path, + metadata, + patterns: &file_patterns, + }) && visited_paths.insert(&path) + { + target_files.push(path); + } + }; + + if let Ok(metadata) = fs.stat_sync(&specified_path) { + handle_entry(specified_path.clone(), &metadata, &mut pending_dirs); + } + + // use an iterator in order to minimize the number of file system operations + while let Some(next_dir) = pending_dirs.pop_front() { + let Ok(entries) = fs.read_dir(&next_dir) else { + continue; + }; + for entry in entries { + handle_entry(entry.path, &entry.metadata, &mut pending_dirs) + } + } + } + Ok(target_files) + } + + fn is_ignored_dir(&self, path: &Path) -> bool { + path + .file_name() + .map(|dir_name| { + let dir_name = dir_name.to_string_lossy().to_lowercase(); + let is_ignored_file = match dir_name.as_str() { + "node_modules" => self.ignore_node_modules, + ".git" => self.ignore_git_folder, + _ => false, + }; + is_ignored_file + }) + .unwrap_or(false) + || self.is_vendor_folder(path) + } + + fn is_vendor_folder(&self, path: &Path) -> bool { + self + .vendor_folder + .as_ref() + .map(|vendor_folder| path == *vendor_folder) + .unwrap_or(false) + } +} + +#[cfg(test)] +mod test { + use std::path::PathBuf; + + use tempfile::TempDir; + + use super::*; + use crate::fs::RealDenoConfigFs; + use crate::glob::FilePatterns; + use crate::glob::PathOrPattern; + use crate::glob::PathOrPatternSet; + + #[allow(clippy::disallowed_methods)] // allow fs methods + #[test] + fn test_collect_files() { + fn create_files(dir_path: &PathBuf, files: &[&str]) { + std::fs::create_dir_all(dir_path).unwrap(); + for f in files { + std::fs::write(dir_path.join(f), "").unwrap(); + } + } + + // dir.ts + // ├── a.ts + // ├── b.js + // ├── child + // | ├── git + // | | └── git.js + // | ├── node_modules + // | | └── node_modules.js + // | ├── vendor + // | | └── vendor.js + // │ ├── e.mjs + // │ ├── f.mjsx + // │ ├── .foo.TS + // │ └── README.md + // ├── c.tsx + // ├── d.jsx + // └── ignore + // ├── g.d.ts + // └── .gitignore + + let t = TempDir::new().unwrap(); + + let root_dir_path = t.path().join("dir.ts"); + let root_dir_files = ["a.ts", "b.js", "c.tsx", "d.jsx"]; + create_files(&root_dir_path, &root_dir_files); + + let child_dir_path = root_dir_path.join("child"); + let child_dir_files = ["e.mjs", "f.mjsx", ".foo.TS", "README.md"]; + create_files(&child_dir_path, &child_dir_files); + + std::fs::create_dir_all(t.path().join("dir.ts/child/node_modules")) + .unwrap(); + std::fs::write( + t.path().join("dir.ts/child/node_modules/node_modules.js"), + "", + ) + .unwrap(); + std::fs::create_dir_all(t.path().join("dir.ts/child/.git")).unwrap(); + std::fs::write(t.path().join("dir.ts/child/.git/git.js"), "").unwrap(); + std::fs::create_dir_all(t.path().join("dir.ts/child/vendor")).unwrap(); + std::fs::write(t.path().join("dir.ts/child/vendor/vendor.js"), "").unwrap(); + + let ignore_dir_path = root_dir_path.join("ignore"); + let ignore_dir_files = ["g.d.ts", ".gitignore"]; + create_files(&ignore_dir_path, &ignore_dir_files); + + let file_patterns = FilePatterns { + base: root_dir_path.to_path_buf(), + include: None, + exclude: PathOrPatternSet::new(vec![PathOrPattern::Path( + ignore_dir_path.to_path_buf(), + )]), + }; + let file_collector = FileCollector::new(|e| { + // exclude dotfiles + e.path + .file_name() + .and_then(|f| f.to_str()) + .map(|f| !f.starts_with('.')) + .unwrap_or(false) + }); + + let result = file_collector + .collect_file_patterns(&RealDenoConfigFs, file_patterns.clone()) + .unwrap(); + let expected = [ + "README.md", + "a.ts", + "b.js", + "c.tsx", + "d.jsx", + "e.mjs", + "f.mjsx", + "git.js", + "node_modules.js", + "vendor.js", + ]; + let mut file_names = result + .into_iter() + .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) + .collect::>(); + file_names.sort(); + assert_eq!(file_names, expected); + + // test ignoring the .git and node_modules folder + let file_collector = file_collector + .ignore_git_folder() + .ignore_node_modules() + .set_vendor_folder(Some(child_dir_path.join("vendor").to_path_buf())); + let result = file_collector + .collect_file_patterns(&RealDenoConfigFs, file_patterns.clone()) + .unwrap(); + let expected = [ + "README.md", + "a.ts", + "b.js", + "c.tsx", + "d.jsx", + "e.mjs", + "f.mjsx", + ]; + let mut file_names = result + .into_iter() + .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) + .collect::>(); + file_names.sort(); + assert_eq!(file_names, expected); + + // test opting out of ignoring by specifying the dir + let file_patterns = FilePatterns { + base: root_dir_path.to_path_buf(), + include: Some(PathOrPatternSet::new(vec![ + PathOrPattern::Path(root_dir_path.to_path_buf()), + PathOrPattern::Path( + root_dir_path.to_path_buf().join("child/node_modules/"), + ), + ])), + exclude: PathOrPatternSet::new(vec![PathOrPattern::Path( + ignore_dir_path.to_path_buf(), + )]), + }; + let result = file_collector + .collect_file_patterns(&RealDenoConfigFs, file_patterns) + .unwrap(); + let expected = [ + "README.md", + "a.ts", + "b.js", + "c.tsx", + "d.jsx", + "e.mjs", + "f.mjsx", + "node_modules.js", + ]; + let mut file_names = result + .into_iter() + .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) + .collect::>(); + file_names.sort(); + assert_eq!(file_names, expected); + } +} diff --git a/src/glob/gitignore.rs b/src/glob/gitignore.rs new file mode 100644 index 0000000..2774f20 --- /dev/null +++ b/src/glob/gitignore.rs @@ -0,0 +1,172 @@ +// Copyright 2018-2024 the Deno authors. MIT license. + +use std::collections::HashMap; +use std::path::Path; +use std::path::PathBuf; +use std::rc::Rc; + +/// Resolved gitignore for a directory. +pub struct DirGitIgnores { + current: Option>, + parent: Option>, +} + +impl DirGitIgnores { + pub fn is_ignored(&self, path: &Path, is_dir: bool) -> bool { + let mut is_ignored = false; + if let Some(parent) = &self.parent { + is_ignored = parent.is_ignored(path, is_dir); + } + if let Some(current) = &self.current { + match current.matched(path, is_dir) { + ignore::Match::None => {} + ignore::Match::Ignore(_) => { + is_ignored = true; + } + ignore::Match::Whitelist(_) => { + is_ignored = false; + } + } + } + is_ignored + } +} + +/// Resolves gitignores in a directory tree taking into account +/// ancestor gitignores that may be found in a directory. +pub struct GitIgnoreTree<'a> { + fs: &'a dyn crate::fs::DenoConfigFs, + ignores: HashMap>>, + include_paths: Vec, +} + +impl<'a> GitIgnoreTree<'a> { + pub fn new( + fs: &'a dyn crate::fs::DenoConfigFs, + // paths that should override what's in the gitignore + include_paths: Vec, + ) -> Self { + Self { + fs, + ignores: Default::default(), + include_paths, + } + } + + pub fn get_resolved_git_ignore_for_dir( + &mut self, + dir_path: &Path, + ) -> Option> { + // for directories, provide itself in order to tell + // if it should stop searching for gitignores because + // maybe this dir_path is a .git directory + let parent = dir_path.parent()?; + self.get_resolved_git_ignore_inner(parent, Some(dir_path)) + } + + pub fn get_resolved_git_ignore_for_file( + &mut self, + file_path: &Path, + ) -> Option> { + let dir_path = file_path.parent()?; + self.get_resolved_git_ignore_inner(dir_path, None) + } + + fn get_resolved_git_ignore_inner( + &mut self, + dir_path: &Path, + maybe_parent: Option<&Path>, + ) -> Option> { + let maybe_resolved = self.ignores.get(dir_path).cloned(); + if let Some(resolved) = maybe_resolved { + resolved + } else { + let resolved = self.resolve_gitignore_in_dir(dir_path, maybe_parent); + self.ignores.insert(dir_path.to_owned(), resolved.clone()); + resolved + } + } + + fn resolve_gitignore_in_dir( + &mut self, + dir_path: &Path, + maybe_parent: Option<&Path>, + ) -> Option> { + if let Some(parent) = maybe_parent { + // stop searching if the parent dir had a .git directory in it + if self.fs.exists(&parent.join(".git")) { + return None; + } + } + + let parent = dir_path.parent().and_then(|parent| { + self.get_resolved_git_ignore_inner(parent, Some(dir_path)) + }); + let current = self + .fs + .read_to_string_lossy(&dir_path.join(".gitignore")) + .ok() + .and_then(|text| { + let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path); + for line in text.lines() { + builder.add_line(None, line).ok()?; + } + // override the gitignore contents to include these paths + for path in &self.include_paths { + if let Ok(suffix) = path.strip_prefix(dir_path) { + let suffix = suffix.to_string_lossy().replace('\\', "/"); + let _ignore = builder.add_line(None, &format!("!/{}", suffix)); + if !suffix.ends_with('/') { + let _ignore = builder.add_line(None, &format!("!/{}/", suffix)); + } + } + } + let gitignore = builder.build().ok()?; + Some(Rc::new(gitignore)) + }); + if parent.is_none() && current.is_none() { + None + } else { + Some(Rc::new(DirGitIgnores { current, parent })) + } + } +} + +#[cfg(test)] +mod test { + use crate::fs::TestFileSystem; + + use super::*; + + #[test] + fn git_ignore_tree() { + let mut fs = TestFileSystem::default(); + fs.insert("/.gitignore", "file.txt"); + fs.insert("/sub_dir/.gitignore", "data.txt"); + fs.insert("/sub_dir/sub_dir/.gitignore", "!file.txt\nignore.txt"); + let mut ignore_tree = GitIgnoreTree::new(&fs, Vec::new()); + let mut run_test = |path: &str, expected: bool| { + let path = PathBuf::from(path); + let gitignore = + ignore_tree.get_resolved_git_ignore_for_file(&path).unwrap(); + assert_eq!( + gitignore.is_ignored(&path, /* is_dir */ false), + expected, + "Path: {}", + path.display() + ); + }; + run_test("/file.txt", true); + run_test("/other.txt", false); + run_test("/data.txt", false); + run_test("/sub_dir/file.txt", true); + run_test("/sub_dir/other.txt", false); + run_test("/sub_dir/data.txt", true); + run_test("/sub_dir/sub_dir/file.txt", false); // unignored up here + run_test("/sub_dir/sub_dir/sub_dir/file.txt", false); + run_test("/sub_dir/sub_dir/sub_dir/ignore.txt", true); + run_test("/sub_dir/sub_dir/ignore.txt", true); + run_test("/sub_dir/ignore.txt", false); + run_test("/ignore.txt", false); + } +} diff --git a/src/glob.rs b/src/glob/mod.rs similarity index 99% rename from src/glob.rs rename to src/glob/mod.rs index 96ab19d..96c8987 100644 --- a/src/glob.rs +++ b/src/glob/mod.rs @@ -12,6 +12,12 @@ use url::Url; use crate::util::normalize_path; use crate::util::specifier_to_file_path; +mod collector; +mod gitignore; + +pub use collector::FileCollector; +pub use collector::WalkEntry; + #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum FilePatternsMatch { /// File passes as matching, but further exclude matching (ex. .gitignore) diff --git a/src/package_json.rs b/src/package_json.rs index 5572664..b92b6af 100644 --- a/src/package_json.rs +++ b/src/package_json.rs @@ -87,7 +87,7 @@ impl PackageJson { if let Some(item) = maybe_cache.and_then(|c| c.get(path)) { Ok(item) } else { - match fs.read_to_string(path) { + match fs.read_to_string_lossy(path) { Ok(file_text) => { let pkg_json = PackageJson::load_from_string(path.to_path_buf(), file_text)?;