Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ regex = "1.10.6"
serde_json = "1.0.125"
etcetera = "0.8.0"
table_formatter = "0.6.1"
gix-attributes = "0.28.1"

[dependencies.env_logger]
optional = true
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,13 @@ FLAGS:
--hidden Count hidden files.
-l, --languages Prints out supported languages and their extensions.
--no-ignore Don't respect ignore files (.gitignore, .ignore, etc.). This implies --no-ignore-parent,
--no-ignore-dot, and --no-ignore-vcs.
--no-ignore-dot, --no-ignore-vcs, and --no-ignore-linguist.
--no-ignore-dot Don't respect .ignore and .tokeignore files, including those in parent directories.
--no-ignore-parent Don't respect ignore files (.gitignore, .ignore, etc.) in parent directories.
--no-ignore-vcs Don't respect VCS ignore files (.gitignore, .hgignore, etc.), including those in parent
directories.
--no-ignore-linguist Don't respect linguist-vendored, linguist-generated, and linguist-documentation statements
in .gitattributes files.
-V, --version Prints version information
-v, --verbose Set log output level:
1: to show unknown file extensions,
Expand Down
1 change: 1 addition & 0 deletions fuzz/fuzz_targets/parse_from_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub fn parse_from_slice(input: FuzzInput, check_total: bool) {
no_ignore_parent: None,
no_ignore_dot: None,
no_ignore_vcs: None,
no_ignore_linguist: None,
sort: None,
types: None,
for_each_fn: None,
Expand Down
19 changes: 19 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub struct Cli {
pub no_ignore_parent: bool,
pub no_ignore_dot: bool,
pub no_ignore_vcs: bool,
pub no_ignore_linguist: bool,
pub output: Option<Format>,
pub streaming: Option<Streaming>,
pub print_languages: bool,
Expand Down Expand Up @@ -154,6 +155,15 @@ impl Cli {
those in parent directories.\
",
))
.arg(Arg::new("no_ignore_linguist")
.long("no-ignore-linguist")
.action(ArgAction::SetTrue)
.help(
"\
Don't respect linguist-vendored, linguist-generated, and linguist-documentation \
statements in .gitattributes files, including those in parent directories \
"
))
.arg(
Arg::new("output")
.long("output")
Expand Down Expand Up @@ -242,6 +252,7 @@ impl Cli {
let no_ignore_parent = matches.get_flag("no_ignore_parent");
let no_ignore_dot = matches.get_flag("no_ignore_dot");
let no_ignore_vcs = matches.get_flag("no_ignore_vcs");
let no_ignore_linguist = matches.get_flag("no_ignore_linguist");
let print_languages = matches.get_flag("languages");
let verbose = matches.get_count("verbose") as u64;
let compact = matches.get_flag("compact");
Expand Down Expand Up @@ -304,6 +315,7 @@ impl Cli {
no_ignore_parent,
no_ignore_dot,
no_ignore_vcs,
no_ignore_linguist,
output,
streaming,
print_languages,
Expand Down Expand Up @@ -408,6 +420,7 @@ impl Cli {
/// * `no_ignore_parent`
/// * `no_ignore_dot`
/// * `no_ignore_vcs`
/// * `no_ignore_linguist`
/// * `types`
pub fn override_config(&mut self, mut config: Config) -> Config {
config.hidden = if self.hidden {
Expand Down Expand Up @@ -440,6 +453,12 @@ impl Cli {
config.no_ignore_vcs
};

config.no_ignore_linguist = if self.no_ignore_linguist {
Some(true)
} else {
config.no_ignore_linguist
};

config.for_each_fn = match self.streaming {
Some(Streaming::Json) => Some(|l: LanguageType, e| {
println!("{}", serde_json::json!({"language": l.name(), "stats": e}));
Expand Down
6 changes: 6 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ pub struct Config {
/// Don't respect VCS ignore files (.gitignore, .hgignore, etc.), including those in
/// parent directories. *Default:* `false`.
pub no_ignore_vcs: Option<bool>,
/// Don't respect linguist-vendored, linguist-generated, and linguist-documentation statements,
/// including those in parent directories. *Default:* `false`
pub no_ignore_linguist: Option<bool>,
/// Whether to treat doc strings in languages as comments. *Default:*
/// `false`.
pub treat_doc_strings_as_comments: Option<bool>,
Expand Down Expand Up @@ -135,6 +138,9 @@ impl Config {
no_ignore_vcs: current_dir
.no_ignore_vcs
.or(home_dir.no_ignore_vcs.or(conf_dir.no_ignore_vcs)),
no_ignore_linguist: current_dir
.no_ignore_linguist
.or(home_dir.no_ignore_linguist.or(conf_dir.no_ignore_linguist)),
}
}
}
Expand Down
127 changes: 112 additions & 15 deletions src/utils/fs.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
use std::{collections::BTreeMap, path::Path};
use std::{
collections::BTreeMap,
path::{Path, PathBuf},
};

use gix_attributes::{self, parse::Kind};

use ignore::{overrides::OverrideBuilder, DirEntry, WalkBuilder, WalkState::Continue};

Expand All @@ -10,6 +15,12 @@ use crate::{
};

const IGNORE_FILE: &str = ".tokeignore";
const GITATTRIBUTES: &str = ".gitattributes";
const LINGUIST_IGNORES: &[&str] = &[
"linguist-vendored",
"linguist-generated",
"linguist-documentation",
];

pub fn get_all_files<A: AsRef<Path>>(
paths: &[A],
Expand All @@ -20,26 +31,36 @@ pub fn get_all_files<A: AsRef<Path>>(
let languages = parking_lot::Mutex::new(languages);
let (tx, rx) = crossbeam_channel::unbounded();

let mut paths = paths.iter();
let mut walker = WalkBuilder::new(paths.next().unwrap());
let mut paths_iter = paths.iter();
let mut walker = WalkBuilder::new(paths_iter.next().unwrap());

for path in paths {
for path in paths_iter {
walker.add(path);
}

if !ignored_directories.is_empty() {
let mut overrides = OverrideBuilder::new(".");
let ignore = config.no_ignore.map(|b| !b).unwrap_or(true);
let ignore_dot = ignore && config.no_ignore_dot.map(|b| !b).unwrap_or(true);
let ignore_parent = ignore && config.no_ignore_parent.map(|b| !b).unwrap_or(true);
let ignore_vcs = ignore && config.no_ignore_vcs.map(|b| !b).unwrap_or(true);
let ignore_linguist = ignore && config.no_ignore_linguist.map(|b| !b).unwrap_or(true);

let mut overrides = OverrideBuilder::new(".");
if !ignored_directories.is_empty() {
for ignored in ignored_directories {
rs_error!(overrides.add(&format!("!{}", ignored)));
rs_error!(overrides.add(&flip_rule(ignored)));
}

walker.overrides(overrides.build().expect("Excludes provided were invalid"));
}

let ignore = config.no_ignore.map(|b| !b).unwrap_or(true);
let ignore_dot = ignore && config.no_ignore_dot.map(|b| !b).unwrap_or(true);
let ignore_vcs = ignore && config.no_ignore_vcs.map(|b| !b).unwrap_or(true);
if ignore_linguist {
get_linguist_overrides(&mut overrides, paths, ignore_parent);
}
match overrides.build() {
Ok(overrides) => {
walker.overrides(overrides);
}
Err(err) => {
error!("Error reading overrides: {err}");
}
};

// Custom ignore files always work even if the `ignore` option is false,
// so we only add if that option is not present.
Expand All @@ -53,7 +74,7 @@ pub fn get_all_files<A: AsRef<Path>>(
.git_ignore(ignore_vcs)
.hidden(config.hidden.map(|b| !b).unwrap_or(true))
.ignore(ignore_dot)
.parents(ignore && config.no_ignore_parent.map(|b| !b).unwrap_or(true));
.parents(ignore_parent);

walker.build_parallel().run(move || {
let tx = tx.clone();
Expand Down Expand Up @@ -116,6 +137,41 @@ pub fn get_all_files<A: AsRef<Path>>(
}
}

pub(crate) fn get_linguist_overrides<A: AsRef<Path>>(
overrides: &mut OverrideBuilder,
paths: &[A],
ignore_parent: bool,
) {
let gitattribute_files: Vec<PathBuf> = paths
.iter()
.flat_map(|path| {
if ignore_parent {
vec![path.as_ref()]
} else {
path.as_ref().ancestors().collect::<Vec<&Path>>()
}
})
.map(|dir| dir.join(GITATTRIBUTES))
.filter(|candidate| candidate.exists())
.collect();

for file in gitattribute_files {
let content = rs_error!(std::fs::read(&file));
for assignment in gix_attributes::parse(&content) {
let (kind, attributes, __line_number) = rs_error!(assignment);
if attributes.filter_map(Result::ok).any(|attr| {
LINGUIST_IGNORES
.iter()
.any(|lin| *lin == attr.name.as_str())
}) {
if let Kind::Pattern(pattern) = kind {
rs_error!(overrides.add(&flip_rule(rs_error!(str::from_utf8(&pattern.text)))));
}
}
}
}
}

pub(crate) fn get_extension(path: &Path) -> Option<String> {
path.extension().map(|e| e.to_string_lossy().to_lowercase())
}
Expand All @@ -124,13 +180,19 @@ pub(crate) fn get_filename(path: &Path) -> Option<String> {
path.file_name().map(|e| e.to_string_lossy().to_lowercase())
}

pub(crate) fn flip_rule(rule: &str) -> String {
rule.strip_prefix('!')
.map(|x| x.to_owned())
.unwrap_or(format!("!{}", rule))
}

#[cfg(test)]
mod tests {
use std::fs;

use tempfile::TempDir;

use super::IGNORE_FILE;
use super::{GITATTRIBUTES, IGNORE_FILE};
use crate::{
config::Config,
language::{languages::Languages, LanguageType},
Expand Down Expand Up @@ -452,6 +514,41 @@ mod tests {
assert!(languages.get(LANGUAGE).is_some());
}

#[test]
fn no_ignore_linguist() {
let dir = TempDir::new().expect("Couldn't create temp dir.");
let mut config = Config::default();
let mut languages = Languages::new();

fs::write(
dir.path().join(GITATTRIBUTES),
format!("{} linguist-generated", IGNORE_PATTERN),
)
.unwrap();
fs::write(dir.path().join(FILE_NAME), FILE_CONTENTS).unwrap();

super::get_all_files(
&[dir.path().to_str().unwrap()],
&[],
&mut languages,
&config,
);
dbg!(config.no_ignore_linguist);

assert!(languages.get(LANGUAGE).is_none());

config.no_ignore_linguist = Some(true);

super::get_all_files(
&[dir.path().to_str().unwrap()],
&[],
&mut languages,
&config,
);

assert!(languages.get(LANGUAGE).is_some());
}

#[test]
fn custom_ignore() {
let dir = TempDir::new().expect("Couldn't create temp dir.");
Expand Down
Loading