diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 72e2d1465..4ee4a90d8 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,4 +1,4 @@ -META MD5 f109024fe081775340b7129cfe237ba4 +META MD5 79f7c389d3a6a9e6563bf51e2947114b DATA MD5 608e4361530d18b37b38832a965f3626 DATA: 16995334 interested lines. MARKUP: 63711 items FileType FileNumber ValidLines Positives Negatives @@ -164,7 +164,7 @@ FileType FileNumber ValidLines Positives Negatives .pyx 2 1094 24 .r 4 62 5 2 .rake 2 51 4 -.rb 834 127133 383 3069 +.rb 834 127133 384 3070 .re 1 31 1 .red 1 159 1 .release 1 13 4 @@ -225,13 +225,13 @@ FileType FileNumber ValidLines Positives Negatives .yml 560 56585 1897 1386 .zsh 6 872 11 .zsh-theme 1 97 1 -TOTAL: 11361 16995334 17153 53612 +TOTAL: 11361 16995334 17154 53613 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- API 242 4002 0 0 4002 242 0.000000 1.000000 0.942978 0.000000 AWS Client ID 205 19 0 0 19 205 0.000000 1.000000 0.084821 0.000000 -AWS Multi 82 11 0 0 11 82 0.000000 1.000000 0.118280 0.000000 +AWS Multi 83 12 0 0 12 83 0.000000 1.000000 0.126316 0.000000 AWS S3 Bucket 92 0 0 0 0 92 1.000000 0.000000 0.000000 Akamai Credentials 6 2 0 0 2 6 0.000000 1.000000 0.250000 0.000000 Anthropic API Key 1 0 0 0 0 1 1.000000 0.000000 0.000000 @@ -283,4 +283,4 @@ Token 1140 5268 0 0 Twilio Credentials 30 39 0 0 39 30 0.000000 1.000000 0.565217 0.000000 URL Credentials 225 401 0 0 401 225 0.000000 1.000000 0.640575 0.000000 UUID 2517 3716 0 0 3716 2517 0.000000 1.000000 0.596182 0.000000 - 17153 53612 0 0 0 53612 17153 0.000000 1.000000 0.757606 0.000000 + 17154 53613 0 0 0 53613 17154 0.000000 1.000000 0.757599 0.000000 diff --git a/constants.py b/constants.py index 6b0b5c694..5b74efaf4 100644 --- a/constants.py +++ b/constants.py @@ -3,3 +3,4 @@ LABEL_OTHER = 'X' ALLOWED_LABELS = (LABEL_TRUE, LABEL_FALSE, LABEL_OTHER) PRIVATE_KEY_CATEGORY = "PEM Private Key" +MULTI_PATTERN_RULES = ("AWS Multi", "Google Multi", "JWK") diff --git a/meta/39def7b4.csv b/meta/39def7b4.csv index d1fb4d8cd..e89161877 100644 --- a/meta/39def7b4.csv +++ b/meta/39def7b4.csv @@ -126,11 +126,11 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu 8971,fe313ddd,GitHub,39def7b4,data/39def7b4/lib/client/fe313ddd.rb,18,18,F,,,,,Auth 8972,b5f11327,GitHub,39def7b4,data/39def7b4/lib/b5f11327.rb,8,8,F,,,,,Secret 10958,b5ca0850,GitHub,39def7b4,data/39def7b4/spec/b5ca0850.rb,89,89,F,,,,,Credential -17247,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,91,91,T,51,112,,,Credential +17247,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,91,91,T,51,112,,,Credential:AWS Multi 1338229,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,91,91,T,51,71,,,AWS Client ID 1238230,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,91,94,T,51,93,,,AWS Multi -1338230,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,91,94,T,72,52,,,AWS Multi -11037,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,94,94,T,32,93,,,Credential +1338230,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,91,94,F,72,52,,unpair,AWS Multi +11037,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,94,94,T,32,93,,,Credential:AWS Multi 1238231,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,94,94,T,32,52,,,AWS Client ID 12188,e84ecee2,GitHub,39def7b4,data/39def7b4/lib/e84ecee2.rb,45,45,F,,,,,Key 14554,2010d338,GitHub,39def7b4,data/39def7b4/lib/rest/2010d338.rb,380,380,F,,,,,Key @@ -288,7 +288,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu 20955,d225a7ed,GitHub,39def7b4,data/39def7b4/lib/rest/d225a7ed.rb,34,34,F,,,,,API 20956,eaf6157d,GitHub,39def7b4,data/39def7b4/lib/rest/eaf6157d.rb,26,26,F,,,,,API 20957,90d44edc,GitHub,39def7b4,data/39def7b4/lib/api/rest/90d44edc.rb,40,40,F,,,,,API -21310,076b849a,GitHub,39def7b4,data/39def7b4/lib/rest/076b849a.rb,111,111,F,,,,,AWS Client ID:AWS Multi +21310,076b849a,GitHub,39def7b4,data/39def7b4/lib/rest/076b849a.rb,111,111,F,17,,,example,AWS Client ID:AWS Multi 22029,2d217115,GitHub,39def7b4,data/39def7b4/spec/api/2d217115.rb,36,36,X,21,27,,,Secret 22036,f118eccf,GitHub,39def7b4,data/39def7b4/spec/f118eccf.rb,47,47,F,,,,,Credential 22037,ad318ac0,GitHub,39def7b4,data/39def7b4/spec/api/ad318ac0.rb,58,58,F,,,,,API @@ -388,7 +388,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu 42649,52694a75,GitHub,39def7b4,data/39def7b4/spec/api/52694a75.rb,46,46,T,33,51,,,Credential 42650,52694a75,GitHub,39def7b4,data/39def7b4/spec/api/52694a75.rb,52,52,T,33,51,,,Credential 42652,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,119,119,T,58,78,,AWS Client ID,AWS Client ID -1238213,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,119,119,T,58,119,,,AWS Multi:Credential +1238213,d97e408a,GitHub,39def7b4,data/39def7b4/spec/d97e408a.rb,119,119,T,58,119,,,Credential:AWS Multi 42659,f149a41a,GitHub,39def7b4,data/39def7b4/spec/f149a41a.rb,127,127,F,29,63,,,Credential:Twilio Credentials 42661,1f4966ff,GitHub,39def7b4,data/39def7b4/spec/api/1f4966ff.rb,33,33,F,,,,,API 42662,1f4966ff,GitHub,39def7b4,data/39def7b4/spec/api/1f4966ff.rb,86,86,F,,,,,API diff --git a/obfuscate_creds.py b/obfuscate_creds.py index 13cf69d28..362e4289c 100644 --- a/obfuscate_creds.py +++ b/obfuscate_creds.py @@ -8,7 +8,7 @@ from argparse import Namespace, ArgumentParser from typing import List -from constants import PRIVATE_KEY_CATEGORY, LABEL_TRUE +from constants import PRIVATE_KEY_CATEGORY, LABEL_TRUE, MULTI_PATTERN_RULES from meta_row import read_meta, MetaRow logging.basicConfig( @@ -537,7 +537,7 @@ def obfuscate_creds(meta_dir: str, dataset_dir: str, noise: int = 0): if LABEL_TRUE != row.GroundTruth: # obfuscation is only for True cases continue - elif row.Category in ["AWS Multi", "Google Multi"]: + elif row.Category in MULTI_PATTERN_RULES: # skip obfuscation for the categories which are multi pattern continue elif PRIVATE_KEY_CATEGORY == row.Category and row.LineStart < row.LineEnd: