diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 28b667f33..f56b27dcd 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,9 +1,9 @@ -META MD5 8cf8469b772217eaa1dd9fc8c0a912de -DATA MD5 5ff49df044c012cb13bab05ee637e708 -DATA: 16995334 interested lines. MARKUP: 63736 items +META MD5 346719990276f1c7ab597e7ea85f5b21 +DATA MD5 d9a50a89fa4ce3c5bf3cdc5d1958ee2a +DATA: 16999171 interested lines. MARKUP: 63795 items FileType FileNumber ValidLines Positives Negatives --------------- ------------ ------------ ----------- ----------- - 684 567150 136 695 + 685 567300 136 695 .04 2 149 4 .1 2 641 2 10 .admx 1 26 1 @@ -62,7 +62,7 @@ FileType FileNumber ValidLines Positives Negatives .gd 1 37 1 .gml 3 3075 16 .gni 3 5017 17 -.go 1242 706630 1484 6137 +.go 1251 709843 1494 6177 .golden 5 1168 1 50 .gradle 50 4295 8 142 .graphql 8 454 2 13 @@ -157,7 +157,7 @@ FileType FileNumber ValidLines Positives Negatives .pug 2 193 2 .purs 1 69 4 .pxd 1 150 2 4 -.py 876 292413 755 3881 +.py 878 292875 756 3897 .pyi 4 1361 10 .pyp 1 167 1 .python 1 213 @@ -221,21 +221,21 @@ FileType FileNumber ValidLines Positives Negatives .xcscheme 1 109 1 .xib 11 503 164 .xsl 1 311 1 -.yaml 169 31946 207 393 +.yaml 171 31958 207 395 .yml 560 56585 1897 1386 .zsh 6 872 11 .zsh-theme 1 97 1 -TOTAL: 11361 16995334 17130 53678 +TOTAL: 11375 16999171 17141 53736 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- -API 243 4001 0 0 4001 243 0.000000 1.000000 0.942743 0.000000 +API 243 4009 0 0 4009 243 0.000000 1.000000 0.942850 0.000000 AWS Client ID 213 33 0 0 33 213 0.000000 1.000000 0.134146 0.000000 AWS Multi 34 66 0 0 66 34 0.000000 1.000000 0.660000 0.000000 AWS S3 Bucket 92 0 0 0 0 92 1.000000 0.000000 0.000000 Akamai Credentials 6 2 0 0 2 6 0.000000 1.000000 0.250000 0.000000 Anthropic API Key 1 0 0 0 0 1 1.000000 0.000000 0.000000 -Auth 1165 3610 0 0 3610 1165 0.000000 1.000000 0.756021 0.000000 +Auth 1166 3616 0 0 3616 1166 0.000000 1.000000 0.756169 0.000000 Azure Access Token 24 0 0 0 0 24 1.000000 0.000000 0.000000 BASE64 Private Key 22 4 0 0 4 22 0.000000 1.000000 0.153846 0.000000 BASE64 encoded PEM Private Key 12 0 0 0 0 12 1.000000 0.000000 0.000000 @@ -246,11 +246,12 @@ CMD Password 33 137 0 0 CMD Secret 1 18 0 0 18 1 0.000000 1.000000 0.947368 0.000000 CMD Token 6 2 0 0 2 6 0.000000 1.000000 0.250000 0.000000 CURL User Password 7 2 0 0 2 7 0.000000 1.000000 0.222222 0.000000 -Credential 96 598 0 0 598 96 0.000000 1.000000 0.861671 0.000000 +Credential 96 602 0 0 602 96 0.000000 1.000000 0.862464 0.000000 Docker Swarm Token 2 0 0 0 0 2 1.000000 0.000000 0.000000 Dropbox App secret 74 145 0 0 145 74 0.000000 1.000000 0.662100 0.000000 Facebook Access Token 0 1 0 0 1 0 0.000000 1.000000 Firebase Domain 39 0 0 0 0 39 1.000000 0.000000 0.000000 +Github Classic Token 1 0 0 0 0 1 1.000000 0.000000 0.000000 Google API Key 13 0 0 0 0 13 1.000000 0.000000 0.000000 Google Multi 11 0 0 0 0 11 1.000000 0.000000 0.000000 Google OAuth Access Token 3 0 0 0 0 3 1.000000 0.000000 0.000000 @@ -258,29 +259,29 @@ Google OAuth Refresh Token 1 2 0 0 Grafana Access Policy Token 0 2 0 0 2 0 0.000000 1.000000 Grafana Provisioned API Key 7 16 0 0 16 7 0.000000 1.000000 0.695652 0.000000 Grafana Service Account Token 3 0 0 0 0 3 1.000000 0.000000 0.000000 -JSON Web Token 174 61 0 0 61 174 0.000000 1.000000 0.259574 0.000000 +JSON Web Token 175 61 0 0 61 175 0.000000 1.000000 0.258475 0.000000 JWK 80 3 0 0 3 80 0.000000 1.000000 0.036145 0.000000 Jira / Confluence PAT token 0 4 0 0 4 0 0.000000 1.000000 -Key 4225 20798 0 0 20798 4225 0.000000 1.000000 0.831155 0.000000 +Key 4225 20804 0 0 20804 4225 0.000000 1.000000 0.831196 0.000000 MailGun API Key 8 0 0 0 0 8 1.000000 0.000000 0.000000 NKEY Seed 60 0 0 0 0 60 1.000000 0.000000 0.000000 NTLM Token 4 0 0 0 0 4 1.000000 0.000000 0.000000 Nonce 131 109 0 0 109 131 0.000000 1.000000 0.454167 0.000000 OTP / 2FA Secret 64 3 0 0 3 64 0.000000 1.000000 0.044776 0.000000 Other 0 20 0 0 20 0 0.000000 1.000000 -PEM Private Key 1154 72 0 0 72 1154 0.000000 1.000000 0.058728 0.000000 -Password 2595 11364 0 0 11364 2595 0.000000 1.000000 0.814098 0.000000 +PEM Private Key 1157 72 0 0 72 1157 0.000000 1.000000 0.058584 0.000000 +Password 2595 11366 0 0 11366 2595 0.000000 1.000000 0.814125 0.000000 Perplexity API Key 2 0 0 0 0 2 1.000000 0.000000 0.000000 Postman Credentials 2 0 0 0 0 2 1.000000 0.000000 0.000000 SQL Password 44 14 0 0 14 44 0.000000 1.000000 0.241379 0.000000 Salesforce Credentials 6 0 0 0 0 6 1.000000 0.000000 0.000000 Salt 90 130 0 0 130 90 0.000000 1.000000 0.590909 0.000000 -Secret 1525 2476 0 0 2476 1525 0.000000 1.000000 0.618845 0.000000 +Secret 1525 2492 0 0 2492 1525 0.000000 1.000000 0.620363 0.000000 Slack Token 15 1 0 0 1 15 0.000000 1.000000 0.062500 0.000000 Stripe Credentials 2 0 0 0 0 2 1.000000 0.000000 0.000000 Tencent WeChat API App ID 47 0 0 0 0 47 1.000000 0.000000 0.000000 -Token 1139 5269 0 0 5269 1139 0.000000 1.000000 0.822253 0.000000 +Token 1144 5285 0 0 5285 1144 0.000000 1.000000 0.822056 0.000000 Twilio Credentials 30 39 0 0 39 30 0.000000 1.000000 0.565217 0.000000 URL Credentials 225 401 0 0 401 225 0.000000 1.000000 0.640575 0.000000 UUID 2517 3716 0 0 3716 2517 0.000000 1.000000 0.596182 0.000000 - 17130 53678 0 0 0 53678 17130 0.000000 1.000000 0.758078 0.000000 + 17141 53736 0 0 0 53736 17141 0.000000 1.000000 0.758158 0.000000 diff --git a/meta/10e03406.csv b/meta/10e03406.csv new file mode 100644 index 000000000..134377dea --- /dev/null +++ b/meta/10e03406.csv @@ -0,0 +1,60 @@ +Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,ValueEnd,CryptographyKey,PredefinedPattern,Category +11536686,0cd5033e,GitHub,10e03406,data/10e03406/test/pkg/0cd5033e.go,107,107,F,56,120,,,Token +11536687,4278973e,GitHub,10e03406,data/10e03406/test/pkg/4278973e.go,21,47,T,15,29,,,PEM Private Key +11536688,4278973e,GitHub,10e03406,data/10e03406/test/pkg/4278973e.go,57,83,T,20,29,,,PEM Private Key +11536689,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,149,149,T,17,71,,,Token +11536690,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,618,618,T,22,76,,,Token +11536691,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,655,655,T,16,70,,,Token +11536692,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,1179,1179,T,22,76,,,Token +11536693,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,32,32,F,25,128,,HASH,Key:Token +11536694,d483ffb9,GitHub,10e03406,data/10e03406/test/pkg/cluster/d483ffb9.go,54,54,T,175,199,,,Auth +11536695,dfb6fb1d,GitHub,10e03406,data/10e03406/test/pkg/dfb6fb1d.go,708,708,T,18,58,,,Token:Github Classic Token +11536696,fd2654ce,GitHub,10e03406,data/10e03406/test/pkg/fd2654ce.go,46,46,T,70,299,,,JSON Web Token +11536697,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,93,93,F,40,104,,,API +11536698,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,110,110,F,38,102,,,Credential +11536699,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,147,147,F,40,104,,,API +11536700,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,164,164,F,38,102,,,Credential +11536701,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,204,204,F,40,104,,,API +11536702,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,221,221,F,38,102,,,Credential +11536703,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,252,252,F,40,104,,,API +11536704,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,269,269,F,38,102,,,Credential +11536705,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,41,41,F,31,35,,,Secret +11536706,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,43,43,F,23,27,,,Secret +11536707,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,52,52,F,31,35,,,Secret +11536708,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,53,53,F,31,35,,,Secret +11536709,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,25,51,T,9,29,,,PEM Private Key +11536710,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,76,76,F,31,35,,,Secret +11536711,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,78,78,F,23,27,,,Secret +11536712,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,86,86,F,31,35,,,Secret +11536713,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,87,87,F,31,35,,,Secret +11536714,3f7b7565,GitHub,10e03406,data/10e03406/test/secret/resource/3f7b7565.yaml,3,3,F,12,20,,,Password +11536715,92807aff,GitHub,10e03406,data/10e03406/test/secret/resource/92807aff.yaml,3,3,F,12,20,,,Password +11536716,1e4ad539,GitHub,10e03406,data/10e03406/test/util/pkg/1e4ad539.go,23,23,F,20,123,,,Key:Token +11536717,0cd5033e,GitHub,10e03406,data/10e03406/test/pkg/0cd5033e.go,199,199,F,56,120,,,Token +11536718,0cd5033e,GitHub,10e03406,data/10e03406/test/pkg/0cd5033e.go,239,239,F,56,120,,,Token +11536719,4278973e,GitHub,10e03406,data/10e03406/test/pkg/4278973e.go,113,113,F,28,423,,,Key +11536720,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,697,697,F,46,51,,,Token +11536721,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,700,700,F,46,64,,,Token +11536722,7caa428b,GitHub,10e03406,data/10e03406/test/pkg/7caa428b.go,951,951,F,38,45,,,Token +11536723,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,33,33,F,25,83,,,Key:Token +11536724,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,75,75,F,78,95,,,Auth:Token +11536725,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,253,253,F,78,95,,,Auth:Token +11536726,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,460,460,F,78,95,,,Auth:Token +11536727,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,681,681,F,78,95,,,Auth:Token +11536728,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,990,990,F,30,41,,,Auth:Token +11536729,6341416d,GitHub,10e03406,data/10e03406/test/pkg/cluster/6341416d.go,1129,1129,F,30,41,,,Auth:Token +11536730,d483ffb9,GitHub,10e03406,data/10e03406/test/pkg/cluster/d483ffb9.go,27,27,F,42,47,,,Secret +11536731,dfb6fb1d,GitHub,10e03406,data/10e03406/test/pkg/dfb6fb1d.go,641,641,F,57,67,,,API +11536732,dfb6fb1d,GitHub,10e03406,data/10e03406/test/pkg/dfb6fb1d.go,650,650,F,57,67,,,API +11536733,dfb6fb1d,GitHub,10e03406,data/10e03406/test/pkg/dfb6fb1d.go,666,666,F,57,67,,,API +11536734,dfb6fb1d,GitHub,10e03406,data/10e03406/test/pkg/dfb6fb1d.go,679,679,F,59,69,,,API +11536735,51945883,GitHub,10e03406,data/10e03406/test/pkg/sys/template/51945883.go,40,40,F,42,47,,,Secret +11536736,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,39,39,F,27,33,,,Secret +11536737,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,50,50,F,27,33,,,Secret +11536738,8644694b,GitHub,10e03406,data/10e03406/test/secret/8644694b.py,231,231,F,43,47,,,Key +11536739,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,74,74,F,27,33,,,Secret +11536740,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,84,84,F,27,33,,,Secret +11536741,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,248,248,F,43,47,,,Key +11536742,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,291,291,F,19,36,,,Secret +11536743,fd8cb0ea,GitHub,10e03406,data/10e03406/test/secret/fd8cb0ea.py,327,327,F,19,36,,,Secret +11536744,1e4ad539,GitHub,10e03406,data/10e03406/test/util/pkg/1e4ad539.go,24,24,F,22,33,,,Token diff --git a/obfuscate_creds.py b/obfuscate_creds.py index 362e4289c..868053b73 100644 --- a/obfuscate_creds.py +++ b/obfuscate_creds.py @@ -8,6 +8,8 @@ from argparse import Namespace, ArgumentParser from typing import List +import base62 + from constants import PRIVATE_KEY_CATEGORY, LABEL_TRUE, MULTI_PATTERN_RULES from meta_row import read_meta, MetaRow @@ -133,6 +135,16 @@ def obfuscate_glsa(value): return obfuscated_value +def obfuscate_crc32_base62(value): + token = generate_value(value[4:-6]) + data = token.encode('ascii', errors="strict") + crc32sum = binascii.crc32(data) + crc32data = crc32sum.to_bytes(length=4, byteorder="big") + crc32sign = base62.encodebytes(crc32data) + obfuscated_value = value[:4] + token + crc32sign + return obfuscated_value + + def get_obfuscated_value(value, meta_row: MetaRow): if "Info" == meta_row.PredefinedPattern: # not a credential - does not require obfuscation @@ -209,6 +221,8 @@ def get_obfuscated_value(value, meta_row: MetaRow): else: # impossible, but linter fix obfuscated_value = generate_value(value) + elif any(value.startswith(x) for x in ["npm_", "ghp_", "gho_", "ghu_", "ghs_", "ghr_"]): + obfuscated_value = obfuscate_crc32_base62(value) else: # the whole value is obfuscated obfuscated_value = generate_value(value) diff --git a/requirements.txt b/requirements.txt index 00543ba2e..8f425f1ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ -# Python 3.10.18 -# pip 25.1.1 +# Python 3.10.19 +# pip 25.3 -GitPython==3.1.44 -virtualenv==20.31.2 +GitPython==3.1.46 +virtualenv==20.35.4 -setuptools==80.9.0 +#setuptools==80.9.0 tabulate==0.9.0 +pybase62==1.0.0 #credentialdigger==4.9.5 diff --git a/snapshot.json b/snapshot.json index dbe411668..0e52145b8 100644 --- a/snapshot.json +++ b/snapshot.json @@ -307,6 +307,7 @@ "eeb29ba4b693110b0732cfd7497c08a7255a58d49d2b4ee274f4ca88eba917a4": "https://github.com/otya128/winevdm", "eef7995773c5a4d77f070a916e2b126827dd90df50d3e8a0f834e115befabf3f": "https://github.com/FredrikNoren/ungit", "efd320adaca71cfe78efab92894c2bbaf48d685bf2092b850fea7ad130a4110d": "https://github.com/vitalysim/Awesome-Hacking-Resources", + "f05e87317d8257dcd7c327c1a5450285a214fd9b4904be9e5493ce720a3101a3": "https://github.com/rancher/rancher", "f08dbd903add1390a2f84a8db0b5558be5c260d6a731a5496f5394aedf245e2f": "https://github.com/davestephens/ansible-nas", "f19f101610cbaaf67113aa4a1c223b702152dafbed9b241dec0f65a045fb50c3": "https://github.com/github/gh-ost", "f3693cffd03d7006ba8fc6159e7e9bc6dca5f13e5abcb6ad5c5d10f83b08ea04": "https://github.com/CTFd/CTFd", diff --git a/test_obfuscate_creds.py b/test_obfuscate_creds.py index 1a62ff7a5..9dbf666b0 100644 --- a/test_obfuscate_creds.py +++ b/test_obfuscate_creds.py @@ -4,7 +4,7 @@ from constants import PRIVATE_KEY_CATEGORY, LABEL_TRUE from meta_row import MetaRow -from obfuscate_creds import gen_random_value, obfuscate_jwt, obfuscate_glsa, process_pem_key +from obfuscate_creds import gen_random_value, obfuscate_jwt, obfuscate_glsa, process_pem_key, obfuscate_crc32_base62 class ObfuscatorTest(unittest.TestCase): @@ -162,6 +162,16 @@ def test_obfuscate_glsa(self): # tested value self.assertEqual("glsa_DaldL9OnCudSrj7jWui7wxVj9b4ltV2p_c97ad013", obfuscated) + def test_obfuscate_crc32_base62(self): + random.seed(20260102) + # the value from CredSweeper samples + value = "ghp_00000000000000000000000000000004WZ4EQ " + obfuscated = obfuscate_crc32_base62(value) + self.assertNotEqual(value, obfuscated) + self.assertEqual(len(value), len(obfuscated)) + # tested value + self.assertEqual("ghp_119583239986614208174882783824961NqIlW", obfuscated) + def test_obfuscate_pem(self): random.seed(20251211) original_lines = [