diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index f56b27dcd..2b2630063 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,9 +1,9 @@ -META MD5 346719990276f1c7ab597e7ea85f5b21 -DATA MD5 d9a50a89fa4ce3c5bf3cdc5d1958ee2a -DATA: 16999171 interested lines. MARKUP: 63795 items +META MD5 8df9d057ad2c7b1c7ae03bf56d5bdd4f +DATA MD5 b3618d25df84730a5d0f262ed4a19f28 +DATA: 17002898 interested lines. MARKUP: 63818 items FileType FileNumber ValidLines Positives Negatives --------------- ------------ ------------ ----------- ----------- - 685 567300 136 695 + 686 567302 138 695 .04 2 149 4 .1 2 641 2 10 .admx 1 26 1 @@ -12,7 +12,7 @@ FileType FileNumber ValidLines Positives Negatives .asciidoc 101 15394 68 350 .axaml 5 286 9 .backup 1 62 2 -.bash 2 2158 5 +.bash 3 2161 1 5 .bat 5 248 2 16 .bats 15 2804 14 64 .bazel 3 424 14 @@ -31,13 +31,13 @@ FileType FileNumber ValidLines Positives Negatives .cmd 4 401 2 3 .cnf 8 858 21 34 .coffee 1 585 3 -.conf 60 4769 65 104 -.config 20 492 7 43 +.conf 61 4771 67 104 +.config 21 494 9 43 .cpp 22 7300 20 77 .creds 1 10 2 1 .crlf 1 27 1 .crt 2 4979 119 -.cs 262 81986 250 1032 +.cs 264 83360 257 1035 .csp 3 379 8 .csproj 1 14 1 .csv 1 109 84 @@ -84,7 +84,7 @@ FileType FileNumber ValidLines Positives Negatives .ipynb 6 4804 10 10 .j 1 241 4 .j2 32 6043 7 209 -.java 650 141112 478 1455 +.java 651 141256 479 1455 .jenkinsfile 1 58 2 6 .jinja2 1 64 2 .js 640 530803 859 3134 @@ -94,6 +94,7 @@ FileType FileNumber ValidLines Positives Negatives .jwt 1 1 2 .key 115 3067 105 11 .ks 1 25 1 +.ksh 1 3 1 .kt 120 19864 69 377 .l 1 982 1 .las 1 6656 36 @@ -138,7 +139,7 @@ FileType FileNumber ValidLines Positives Negatives .pan 2 48 4 .patch 3 109384 4 25 .pbxproj 1 941 1 -.pem 65 1467 64 3 +.pem 66 1469 66 3 .php 394 81495 167 1487 .pl 16 14727 7 37 .pm 10 5224 1 30 @@ -150,9 +151,9 @@ FileType FileNumber ValidLines Positives Negatives .ppk 1 45 1 .private 1 15 1 .proj 1 85 5 -.properties 55 1637 68 54 +.properties 56 1640 69 54 .proto 5 5768 2 63 -.ps1 16 8509 15 86 +.ps1 17 8511 16 86 .ps1xml 1 5022 1 .pug 2 193 2 .purs 1 69 4 @@ -185,7 +186,7 @@ FileType FileNumber ValidLines Positives Negatives .sbt 3 570 7 .scala 52 5600 38 95 .secrets 1 11 1 -.sh 143 23115 75 478 +.sh 144 23118 76 478 .slim 1 153 3 .smali 1 775 12 .snap 3 1708 7 11 @@ -213,7 +214,7 @@ FileType FileNumber ValidLines Positives Negatives .travis 1 34 2 4 .ts 607 107776 265 1991 .tsx 54 7914 1 125 -.txt 322 89402 5258 5784 +.txt 428 91582 5258 5784 .utf8 1 77 1 .vsmdi 1 6 2 2 .vue 50 8736 1 165 @@ -221,11 +222,11 @@ FileType FileNumber ValidLines Positives Negatives .xcscheme 1 109 1 .xib 11 503 164 .xsl 1 311 1 -.yaml 171 31958 207 395 -.yml 560 56585 1897 1386 -.zsh 6 872 11 +.yaml 172 31960 209 395 +.yml 561 56587 1899 1386 +.zsh 7 875 1 11 .zsh-theme 1 97 1 -TOTAL: 11375 16999171 17141 53736 +TOTAL: 11496 17002898 17167 53739 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- @@ -239,7 +240,7 @@ Auth 1166 3616 0 0 Azure Access Token 24 0 0 0 0 24 1.000000 0.000000 0.000000 BASE64 Private Key 22 4 0 0 4 22 0.000000 1.000000 0.153846 0.000000 BASE64 encoded PEM Private Key 12 0 0 0 0 12 1.000000 0.000000 0.000000 -Basic Authorization 688 555 0 0 555 688 0.000000 1.000000 0.446500 0.000000 +Basic Authorization 689 555 0 0 555 689 0.000000 1.000000 0.446141 0.000000 Bearer Authorization 182 0 0 0 0 182 1.000000 0.000000 0.000000 CMD ConvertTo-SecureString 13 4 0 0 4 13 0.000000 1.000000 0.235294 0.000000 CMD Password 33 137 0 0 137 33 0.000000 1.000000 0.805882 0.000000 @@ -270,7 +271,7 @@ Nonce 131 109 0 0 OTP / 2FA Secret 64 3 0 0 3 64 0.000000 1.000000 0.044776 0.000000 Other 0 20 0 0 20 0 0.000000 1.000000 PEM Private Key 1157 72 0 0 72 1157 0.000000 1.000000 0.058584 0.000000 -Password 2595 11366 0 0 11366 2595 0.000000 1.000000 0.814125 0.000000 +Password 2603 11369 0 0 11369 2603 0.000000 1.000000 0.813699 0.000000 Perplexity API Key 2 0 0 0 0 2 1.000000 0.000000 0.000000 Postman Credentials 2 0 0 0 0 2 1.000000 0.000000 0.000000 SQL Password 44 14 0 0 14 44 0.000000 1.000000 0.241379 0.000000 @@ -278,10 +279,11 @@ Salesforce Credentials 6 0 0 0 Salt 90 130 0 0 130 90 0.000000 1.000000 0.590909 0.000000 Secret 1525 2492 0 0 2492 1525 0.000000 1.000000 0.620363 0.000000 Slack Token 15 1 0 0 1 15 0.000000 1.000000 0.062500 0.000000 +SonarQube Credentials 11 0 0 0 0 11 1.000000 0.000000 0.000000 Stripe Credentials 2 0 0 0 0 2 1.000000 0.000000 0.000000 Tencent WeChat API App ID 47 0 0 0 0 47 1.000000 0.000000 0.000000 -Token 1144 5285 0 0 5285 1144 0.000000 1.000000 0.822056 0.000000 +Token 1150 5285 0 0 5285 1150 0.000000 1.000000 0.821290 0.000000 Twilio Credentials 30 39 0 0 39 30 0.000000 1.000000 0.565217 0.000000 URL Credentials 225 401 0 0 401 225 0.000000 1.000000 0.640575 0.000000 UUID 2517 3716 0 0 3716 2517 0.000000 1.000000 0.596182 0.000000 - 17141 53736 0 0 0 53736 17141 0.000000 1.000000 0.758158 0.000000 + 17167 53739 0 0 0 53739 17167 0.000000 1.000000 0.757891 0.000000 diff --git a/meta/2e00b328.csv b/meta/2e00b328.csv new file mode 100644 index 000000000..585fd8733 --- /dev/null +++ b/meta/2e00b328.csv @@ -0,0 +1,24 @@ +Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,ValueEnd,CryptographyKey,PredefinedPattern,Category +11536745,a288500e,GitHub,2e00b328,data/2e00b328/src/script/a288500e.ksh,4,4,T,5,49,,,SonarQube Credentials +11536746,6bd02b7c,GitHub,2e00b328,data/2e00b328/src/script/6bd02b7c.sh,4,4,T,5,49,,,SonarQube Credentials +11536747,c85ce304,GitHub,2e00b328,data/2e00b328/src/script/c85ce304.ps1,2,2,T,12,56,,,SonarQube Credentials +11536748,038ff36f,GitHub,2e00b328,data/2e00b328/src/script/038ff36f.bash,4,4,T,5,49,,,SonarQube Credentials +11536749,0743c9c0,GitHub,2e00b328,data/2e00b328/test/client/build/0743c9c0.cs,54,54,T,41,61,,,Basic Authorization +11536750,ba458cdb,GitHub,2e00b328,data/2e00b328/src/script/ba458cdb.zsh,4,4,T,5,49,,,SonarQube Credentials +11536751,8ebb233e,GitHub,2e00b328,data/2e00b328/test/src/build/8ebb233e.java,65,65,T,48,53,,,Password +11536755,602fdca4,GitHub,2e00b328,data/2e00b328/src/602fdca4.conf,2,2,T,13,57,,,SonarQube Credentials:Token +11536756,83d57937,GitHub,2e00b328,data/2e00b328/src/resource/app/83d57937.properties,3,3,T,27,37,,,Password +11536757,97f01c1d,GitHub,2e00b328,data/2e00b328/conf/97f01c1d,2,2,T,12,56,,,SonarQube Credentials:Token +11536758,23529dd5,GitHub,2e00b328,data/2e00b328/src/resource/app/23529dd5.yaml,3,3,T,9,53,,,SonarQube Credentials:Token +11536759,6841cbe9,GitHub,2e00b328,data/2e00b328/src/6841cbe9.config,2,2,T,13,57,,,SonarQube Credentials:Token +11536760,7bee59bf,GitHub,2e00b328,data/2e00b328/src/7bee59bf.pem,2,2,T,13,57,,,SonarQube Credentials:Token +11536761,236e86fb,GitHub,2e00b328,data/2e00b328/src/resource/app/236e86fb.yml,3,3,T,9,53,,,SonarQube Credentials:Token +11536762,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,794,794,T,53,68,,,Password +11536763,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,382,382,F,181,,,,Password +11536764,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,786,786,T,77,85,,,Password +11536765,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,790,790,T,51,59,,,Password +11536766,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,791,791,F,51,,,,Password +11536767,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,793,793,F,53,65,,,Password +11536768,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,839,839,T,101,109,,,Password +11536769,26c9c814,GitHub,2e00b328,data/2e00b328/test/build/26c9c814.cs,877,877,T,87,95,,,Password +11536770,0743c9c0,GitHub,2e00b328,data/2e00b328/test/client/build/0743c9c0.cs,39,39,T,48,55,,,Password diff --git a/meta/48fd3902.csv b/meta/48fd3902.csv index bceef75ef..d2c01f1c9 100644 --- a/meta/48fd3902.csv +++ b/meta/48fd3902.csv @@ -231,8 +231,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu 1339606,c709b566,GitHub,48fd3902,data/48fd3902/test/src/util/c709b566.kt,135,135,T,44,80,,,UUID 1479593,4799bb8c,GitHub,48fd3902,data/48fd3902/test/src/4799bb8c.kt,565,565,F,46,89,,,Key 1480755,eaeb2b79,GitHub,48fd3902,data/48fd3902/test/src/eaeb2b79.java,853,853,F,94,104,,,Credential:Password -1509733,c6175528,GitHub,48fd3902,data/48fd3902/test/src/c6175528.kt,215,241,T,12,41,,,PEM Private Key -1509734,e9b76075,GitHub,48fd3902,data/48fd3902/test/src/util/e9b76075.kt,9,35,T,62,42,,,PEM Private Key +1509733,c6175528,GitHub,48fd3902,data/48fd3902/test/src/c6175528.kt,215,241,T,12,41,,broken-with-fake,PEM Private Key +1509734,e9b76075,GitHub,48fd3902,data/48fd3902/test/src/util/e9b76075.kt,9,35,T,62,42,,broken-with-fake,PEM Private Key 11527153,e33c22c3,GitHub,48fd3902,data/48fd3902/conf/e33c22c3.yml,20,20,F,19,31,,,Auth 11527154,28858c37,GitHub,48fd3902,data/48fd3902/docs/28858c37.md,88,88,F,18,25,,,Key 11527155,95ce83e8,GitHub,48fd3902,data/48fd3902/docs/95ce83e8.md,28,28,F,21,25,,,Key diff --git a/obfuscate_creds.py b/obfuscate_creds.py index 868053b73..ff80a8c69 100644 --- a/obfuscate_creds.py +++ b/obfuscate_creds.py @@ -155,6 +155,7 @@ def get_obfuscated_value(value, meta_row: MetaRow): "ANVA", "AROA", "APKA", "ASCA", "ASIA", "AIza", "AKGP"]) \ or value.startswith('1//0') and GOOGLEAPI_PATTERN.match(value) \ or value.startswith('key-') and 36 == len(value) \ + or value.startswith('squ_') and 44 == len(value) \ or value.startswith("xox") and 15 <= len(value) and value[3] in "abeoprst" and '-' == value[4]: obfuscated_value = value[:4] + generate_value(value[4:]) elif any(value.startswith(x) for x in ["ya29.", "pass:", "salt:", "akab-", "PMAK-", "PMAT-", "xapp-", "pplx-"]): diff --git a/snapshot.json b/snapshot.json index 0e52145b8..7878e47e3 100644 --- a/snapshot.json +++ b/snapshot.json @@ -286,6 +286,7 @@ "e06f3aadea4726fce31c2f6fba94d068b9d4b11acf556d0d6ad2b74ed18e7c07": "https://github.com/NickCarneiro/curlconverter", "e0a2b2e2520c62c5a30a53418e84cd225c63087ba493731cd4c12e7a8ae4220d": "https://github.com/ArchiveBox/ArchiveBox", "e0a4c18c4a074016beef46886b03e456102e2db1f157d169466db018dc95977c": "https://github.com/dbcli/pgcli", + "e1751431dc5896433bcf3c3449c7c76cb9e1aa82e5a763abd55576e2465fdcde": "https://github.com/SonarSource/sonar-scanner-msbuild", "e1ccce67acc60ee97732c4e863d1b7fcce94f6044338b129618d47baca52bfe4": "https://github.com/joemccann/dillinger", "e223321de9e3ad9214722d044c2212733ed06000c7f351c8fcd0465e466cf757": "https://github.com/redisson/redisson", "e244bae38cc2f0380bbb6b609f28168b9617f4c2297f58a2dfe47d649d2ff876": "https://github.com/ubisoft/massgate",