Skip to content

Commit 629d326

Browse files
added path allow list (modified format)
1 parent 000a7bb commit 629d326

File tree

6 files changed

+108
-57
lines changed

6 files changed

+108
-57
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@
2020
- includes a default allowlist when none is specified
2121
- if the pattern name "<GLOBAL>" is used it will be checked against all patterns
2222
- moved the allowlist code into lib.rs so that all hogs will use it by default
23-
23+
- included a new format for allowlists that include checks for paths as well

README.md

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -392,22 +392,37 @@ As of version 1.0.5, the current default regex JSON used is as follows:
392392

393393
## Allowlist JSON file format
394394

395-
Some of the scanners provide a allowlist feature. This allows you to specific a allowlist file that identifies exceptions
396-
to each regex pattern that should be excluded from the final output.
395+
Scanners provide an allowlist feature. This allows you to specify a list of regular expressions for each pattern that
396+
will be ignored by the scanner. You can now optionally supply a list of regular expressions that are evaluated against
397+
the file path as well.
397398

398399
The format for this allowlist file should be a single json object. Each key in the allowlist should match a key in the
399-
regex json, and the value should be an array of strings that are exceptions for that regex pattern. For example:
400+
regex json, and the value can be one of two things:
401+
1) An array of strings that are exceptions for that regex pattern. For example:
402+
2) An object with at least one key (patterns) and optionally a second key (paths).
403+
404+
In addition, you can specify the key `<GLOBAL>` which is evaluated against all patterns.
405+
406+
The following is the default allowlist included in all scans:
407+
400408

401409
```json
402410
{
403-
"Email address": [
404-
"username@mail.com",
405-
"admin@mail.com"
411+
"Email address": {
412+
"patterns": [
413+
"(?i).*@newrelic.com"
406414
],
407-
"New Relic Account IDs in URL": [
408-
"newrelic.com/accounts/some-unoffensive-account-number",
409-
"newrelic.com/accounts/an-account-that-doesn't-exist-like-this-one",
415+
"paths": [
416+
"(?i)authors",
417+
"(?i)contributors",
418+
"(?i)license",
419+
"(?i)maintainers",
420+
"(?i)third_party_notices"
410421
]
422+
},
423+
"<GLOBAL>": [
424+
"(?i)example"
425+
]
411426
}
412427
```
413428

src/bin/duroc_hog.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ fn run(arg_matches: &ArgMatches) -> Result<(), SimpleError> {
128128
output.extend(scan_file(fspath, &secret_scanner, f, "", unzip));
129129
}
130130

131+
let output: HashSet<FileFinding> = output.into_iter().filter(|ff| !secret_scanner.is_allowlisted_path(&ff.reason, ff.path.as_bytes())).collect();
132+
131133
info!("Found {} secrets", output.len());
132134
match secret_scanner.output_findings(&output) {
133135
Ok(_) => Ok(()),

src/default_allowlist.json

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
{
2-
"Email address":[
3-
".*@newrelic.com"
4-
],
5-
"<GLOBAL>": ["(?i)example"]
6-
}
2+
"Email address": {
3+
"patterns": [
4+
"(?i).*@newrelic.com"
5+
],
6+
"paths": [
7+
"(?i)authors",
8+
"(?i)contributors",
9+
"(?i)license",
10+
"(?i)maintainers",
11+
"(?i)third_party_notices"
12+
]
13+
},
14+
"<GLOBAL>": [
15+
"(?i)example"
16+
]
17+
}

src/git_scanning.rs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,10 @@ impl GitScanner {
223223
);
224224
}
225225
if !secrets.is_empty() {
226-
let create_finding = self.secret_scanner.check_entropy(&reason, new_line);
227-
if create_finding {
226+
let path = delta.new_file().path().unwrap().to_str().unwrap().to_string();
227+
let enough_entropy = self.secret_scanner.check_entropy(&reason, new_line);
228+
let valid_path = !self.secret_scanner.is_allowlisted_path(&reason, path.as_bytes());
229+
if enough_entropy && valid_path {
228230
findings.insert(GitFinding {
229231
commit_hash: commit.id().to_string(),
230232
commit: commit.message().unwrap().to_string(),
@@ -234,13 +236,7 @@ impl GitScanner {
234236
date: NaiveDateTime::from_timestamp(commit.time().seconds(), 0)
235237
.to_string(),
236238
strings_found: secrets.clone(),
237-
path: delta
238-
.new_file()
239-
.path()
240-
.unwrap()
241-
.to_str()
242-
.unwrap()
243-
.to_string(),
239+
path,
244240
reason: reason.clone(),
245241
old_file_id: old_file_id.to_string(),
246242
new_file_id: new_file_id.to_string(),

src/lib.rs

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ const ENTROPY_MAX_WORD_LEN: usize = 40;
217217
#[derive(Debug, Clone)]
218218
pub struct SecretScanner {
219219
pub regex_map: BTreeMap<String, EntropyRegex>,
220-
pub allowlist_map: BTreeMap<String, Vec<Regex>>,
220+
pub allowlist_map: BTreeMap<String, AllowList>,
221221
pub pretty_print: bool,
222222
pub output_path: Option<String>,
223223
pub entropy_min_word_len: usize,
@@ -255,6 +255,22 @@ pub enum PatternEntropy {
255255
},
256256
}
257257

258+
#[derive(Deserialize, Debug)]
259+
#[serde(untagged)]
260+
pub enum AllowListEnum {
261+
PatternList(Vec<String>),
262+
AllowListJson {
263+
patterns: Vec<String>,
264+
paths: Option<Vec<String>>
265+
}
266+
}
267+
268+
#[derive(Debug, Clone)]
269+
pub struct AllowList {
270+
pub pattern_list: Vec<Regex>,
271+
pub path_list: Vec<Regex>
272+
}
273+
258274
/// Used to instantiate the `SecretScanner` object with user-supplied options
259275
///
260276
/// Use the `new()` function to create a builder object, perform configurations as needed, then
@@ -630,32 +646,40 @@ impl SecretScannerBuilder {
630646
.collect()
631647
}
632648

633-
fn build_allowlist_from_str(input: &str) -> Result<BTreeMap<String, Vec<Regex>>, SimpleError> {
649+
fn vec_string_to_vec_regex(incoming_array: Vec<String>) -> Vec<Regex> {
650+
incoming_array
651+
.into_iter()
652+
.filter_map(|x| match Regex::new(&x) {
653+
Ok(r) => Some(r),
654+
Err(e) => {
655+
error!("Failed to parse regex: {}", e);
656+
None
657+
}
658+
})
659+
.collect()
660+
}
661+
662+
fn build_allowlist_from_str(input: &str) -> Result<BTreeMap<String, AllowList>, SimpleError> {
634663
info!("Attempting to parse JSON allowlist string");
635-
let allowlist: BTreeMap<String, Value> = match serde_json::from_str(input) {
664+
let allowlist: BTreeMap<String, AllowListEnum> = match serde_json::from_str(input) {
636665
Ok(m) => Ok(m),
637666
Err(e) => Err(SimpleError::with("Failed to parse allowlist JSON", e)),
638667
}?;
639668
allowlist
640669
.into_iter()
641-
.map(|(p, list)| match list {
642-
Value::Array(v) => {
643-
let l = v
644-
.into_iter()
645-
.filter_map(|v| match v {
646-
Value::String(s) => match Regex::new(&s) {
647-
Ok(r) => Some(r),
648-
Err(e) => {
649-
error!("Failed to parse regex in allowlist JSON: {}", e);
650-
None
651-
}
652-
},
653-
_ => None,
654-
})
655-
.collect();
656-
Ok((p, l))
670+
.map(|(p, allowlistobj)| match allowlistobj {
671+
AllowListEnum::PatternList(v) => {
672+
let l = SecretScannerBuilder::vec_string_to_vec_regex(v);
673+
Ok((p, AllowList { pattern_list: l, path_list: vec![] }))
674+
}
675+
AllowListEnum::AllowListJson { patterns: pattern_list, paths: path_list } => {
676+
let l1 = SecretScannerBuilder::vec_string_to_vec_regex(pattern_list);
677+
let l2 = match path_list {
678+
Some(v) => SecretScannerBuilder::vec_string_to_vec_regex(v),
679+
None => Vec::new()
680+
};
681+
Ok((p, AllowList { pattern_list: l1, path_list: l2 }))
657682
}
658-
_ => Err(SimpleError::new("Invalid allowlist JSON format")),
659683
})
660684
.collect()
661685
}
@@ -696,7 +720,7 @@ impl SecretScanner {
696720
let matches = x.1.pattern.find_iter(line);
697721
let matches_filtered: Vec<RustyHogMatch> = matches
698722
.filter(|m| self.check_entropy(x.0, &line[m.start()..m.end()]))
699-
.filter(|m| !self.is_allowlisted(x.0, &line[m.start()..m.end()]))
723+
.filter(|m| !self.is_allowlisted_pattern(x.0, &line[m.start()..m.end()]))
700724
.map(RustyHogMatch::from)
701725
.inspect(|x| debug!("RustyHogMatch: {:?}", x))
702726
.collect();
@@ -947,21 +971,24 @@ impl SecretScanner {
947971
Ok(())
948972
}
949973

950-
/// Checks if any of the provided tokens is allowlisted
951-
pub fn is_allowlisted(&self, pattern: &str, token: &[u8]) -> bool {
974+
/// Checks if the provided path name is allowlisted
975+
pub fn is_allowlisted_path(&self, pattern: &str, path: &[u8]) -> bool {
952976
if let Some(allowlist) = self.allowlist_map.get(pattern) {
953-
for allow_regex in allowlist {
954-
if allow_regex.find(token).is_some() {
955-
return true;
956-
}
957-
}
977+
if allowlist.path_list.iter().any(|x| x.find(path).is_some()) { return true }
958978
}
959979
if let Some(allowlist) = self.allowlist_map.get("<GLOBAL>") {
960-
for allow_regex in allowlist {
961-
if allow_regex.find(token).is_some() {
962-
return true;
963-
}
964-
}
980+
if allowlist.path_list.iter().any(|x| x.find(path).is_some()) { return true }
981+
}
982+
false
983+
}
984+
985+
/// Checks if the provided token is allowlisted
986+
pub fn is_allowlisted_pattern(&self, pattern: &str, token: &[u8]) -> bool {
987+
if let Some(allowlist) = self.allowlist_map.get(pattern) {
988+
if allowlist.pattern_list.iter().any(|x| x.find(token).is_some()) { return true }
989+
}
990+
if let Some(allowlist) = self.allowlist_map.get("<GLOBAL>") {
991+
if allowlist.pattern_list.iter().any(|x| x.find(token).is_some()) { return true }
965992
}
966993
false
967994
}

0 commit comments

Comments
 (0)