Skip to content

Commit 29f89e5

Browse files
authored
Password is upto 64 symbols (#257)
* Remove not scannable files from dataset * ansi2html from pip, markup * CI rollback * [skip actions] [qreview] 2025-10-20T14:28:04+03:00 * passwords64 * fix * review & markup * update markup * bcrypt? * upd
1 parent 5b85997 commit 29f89e5

25 files changed

+137
-75
lines changed

.ci/benchmark.txt

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
META MD5 614e10c6f51880b7b99cfe46dc682176
2-
DATA MD5 6a51b7741411189da422b42b67d36ba4
3-
DATA: 17230760 interested lines. MARKUP: 62300 items
1+
META MD5 c7902b7e94ed36807aa76f49bc5c5b41
2+
DATA MD5 8eb069c8e5d44914fd6dccbcbd6428cc
3+
DATA: 17230760 interested lines. MARKUP: 62332 items
44
FileType FileNumber ValidLines Positives Negatives
55
--------------- ------------ ------------ ----------- -----------
66
689 567668 138 487
@@ -29,15 +29,15 @@ FileType FileNumber ValidLines Positives Negatives
2929
.cljc 5 2421 11
3030
.cls 1 657 1
3131
.cmd 4 401 2 3
32-
.cnf 8 858 20 29
32+
.cnf 8 858 21 28
3333
.coffee 1 585 2
3434
.conf 63 5019 66 111
3535
.config 20 492 7 41
36-
.cpp 23 7628 24 60
36+
.cpp 23 7628 24 61
3737
.creds 1 10 2 1
3838
.crlf 1 27 1
3939
.crt 2 4979 126
40-
.cs 269 82841 257 1056
40+
.cs 269 82841 257 1057
4141
.cshtml 5 180 12
4242
.csp 3 379 9
4343
.csproj 1 14 1
@@ -64,7 +64,7 @@ FileType FileNumber ValidLines Positives Negatives
6464
.gd 1 37 1
6565
.gml 3 3075 16
6666
.gni 3 5017 19
67-
.go 1280 718792 1512 4862
67+
.go 1280 718792 1514 4861
6868
.golden 5 1168 1 42
6969
.gradle 50 4295 8 189
7070
.graphql 8 454 2 13
@@ -77,7 +77,7 @@ FileType FileNumber ValidLines Positives Negatives
7777
.hbs 2 54 3
7878
.hpp 1 237 2
7979
.hs 14 4140 30 65
80-
.html 127 34238 192 146
80+
.html 127 34238 192 147
8181
.idl 3 1625 37 5
8282
.iml 6 699 30
8383
.in 7 2242 10 50
@@ -86,17 +86,17 @@ FileType FileNumber ValidLines Positives Negatives
8686
.ipynb 1 134 7
8787
.j 1 241 4
8888
.j2 32 6043 8 179
89-
.java 672 144069 489 1501
89+
.java 672 144069 492 1500
9090
.jenkinsfile 1 58 2 6
9191
.jinja2 1 64 2
9292
.js 666 537560 886 2712
93-
.json 927 13140680 1964 10176
93+
.json 927 13140680 1965 10175
9494
.jsp 13 3202 1 37
9595
.jsx 7 857 19
9696
.jwt 1 1 2
9797
.key 115 3067 105 11
9898
.ks 1 25 1
99-
.kt 121 20235 64 367
99+
.kt 121 20235 65 366
100100
.l 1 982 1
101101
.las 1 6656 36
102102
.lasso 1 230 7
@@ -115,10 +115,10 @@ FileType FileNumber ValidLines Positives Negatives
115115
.lua 10 1924 3 37
116116
.m 16 13358 22 160
117117
.manifest 3 102 9 6
118-
.markdown 38 5862 69 4
118+
.markdown 38 5862 69 5
119119
.markerb 3 12 3
120120
.marko 1 21 2
121-
.md 789 185743 1068 2893
121+
.md 789 185743 1073 2890
122122
.mdx 3 549 7
123123
.mjml 1 18 1
124124
.mjs 22 4424 101 369
@@ -130,7 +130,7 @@ FileType FileNumber ValidLines Positives Negatives
130130
.mqh 1 1023 2
131131
.msg 1 26644 1 1
132132
.mysql 1 36 2
133-
.ndjson 2 5006 78 228
133+
.ndjson 2 5006 81 227
134134
.nix 4 211 12
135135
.nolint 1 2 1
136136
.odd 1 1281 43
@@ -141,7 +141,7 @@ FileType FileNumber ValidLines Positives Negatives
141141
.patch 4 109405 4 27
142142
.pbxproj 1 941 1
143143
.pem 65 1467 64 3
144-
.php 401 82359 166 1480
144+
.php 401 82359 172 1474
145145
.pl 16 14727 7 33
146146
.pm 10 5224 1 17
147147
.po 3 2994 15
@@ -152,7 +152,7 @@ FileType FileNumber ValidLines Positives Negatives
152152
.ppk 1 45 28
153153
.private 1 15 1
154154
.proj 1 85 5
155-
.properties 55 1637 67 55
155+
.properties 55 1637 70 52
156156
.proto 5 5768 2 49
157157
.ps1 16 8509 15 75
158158
.ps1xml 1 5022 1
@@ -205,15 +205,15 @@ FileType FileNumber ValidLines Positives Negatives
205205
.test 2 24 22 5
206206
.testsettings 1 21 1 10
207207
.tf 27 1644 14 31
208-
.tfstate 6 431 49 13
208+
.tfstate 6 431 53 9
209209
.tfvars 1 31 5
210210
.tl 2 2161 162
211211
.tmpl 5 336 12
212212
.token 1 1 4
213213
.toml 86 2471 65 251
214214
.tpl 1 43 1
215215
.travis 1 34 2 4
216-
.ts 609 109982 240 1970
216+
.ts 609 109982 262 1970
217217
.tsx 54 7914 1 120
218218
.ttar 1 452 1
219219
.txt 324 89406 5261 4385
@@ -225,15 +225,15 @@ FileType FileNumber ValidLines Positives Negatives
225225
.xcscheme 1 109 4
226226
.xib 11 503 164
227227
.xsl 1 311 1
228-
.yaml 168 24422 195 377
229-
.yml 564 57042 1928 1223
228+
.yaml 168 24422 195 379
229+
.yml 564 57042 1934 1217
230230
.zsh 6 872 12
231231
.zsh-theme 1 97 1
232-
TOTAL: 11640 17230760 17454 50265
232+
TOTAL: 11640 17230760 17511 50243
233233
credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0
234234
Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1
235235
------------------------------ ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ----
236-
API 242 3370 0 0 3370 242 0.000000 1.000000 0.933001 0.000000
236+
API 242 3371 0 0 3371 242 0.000000 1.000000 0.933020 0.000000
237237
AWS Client ID 205 19 0 0 19 205 0.000000 1.000000 0.084821 0.000000
238238
AWS Multi 82 10 0 0 10 82 0.000000 1.000000 0.108696 0.000000
239239
AWS S3 Bucket 67 23 0 0 23 67 0.000000 1.000000 0.255556 0.000000
@@ -267,14 +267,14 @@ Grafana Provisioned API Key 22 1 0 0
267267
JSON Web Token 174 61 0 0 61 174 0.000000 1.000000 0.259574 0.000000
268268
JWK 79 0 0 0 0 79 1.000000 0.000000 0.000000
269269
Jira / Confluence PAT token 0 4 0 0 4 0 0.000000 1.000000
270-
Key 4283 16393 0 0 16393 4283 0.000000 1.000000 0.792852 0.000000
270+
Key 4284 16395 0 0 16395 4284 0.000000 1.000000 0.792833 0.000000
271271
MailGun API Key 8 0 0 0 0 8 1.000000 0.000000 0.000000
272272
NKEY Seed 58 0 0 0 0 58 1.000000 0.000000 0.000000
273273
Nonce 130 55 0 0 55 130 0.000000 1.000000 0.297297 0.000000
274274
OTP / 2FA Secret 58 3 0 0 3 58 0.000000 1.000000 0.049180 0.000000
275275
Other 9 7321 0 0 7321 9 0.000000 1.000000 0.998772 0.000000
276276
PEM Private Key 1150 76 0 0 76 1150 0.000000 1.000000 0.061990 0.000000
277-
Password 2519 9957 0 0 9957 2519 0.000000 1.000000 0.798092 0.000000
277+
Password 2575 9932 0 0 9932 2575 0.000000 1.000000 0.794115 0.000000
278278
Postman Credentials 2 0 0 0 0 2 1.000000 0.000000 0.000000
279279
SQL Password 44 14 0 0 14 44 0.000000 1.000000 0.241379 0.000000
280280
Salesforce Credentials 6 0 0 0 0 6 1.000000 0.000000 0.000000
@@ -287,4 +287,4 @@ Token 1138 4668 0 0
287287
Twilio Credentials 30 39 0 0 39 30 0.000000 1.000000 0.565217 0.000000
288288
URL Credentials 225 382 0 0 382 225 0.000000 1.000000 0.629325 0.000000
289289
UUID 2508 280 0 0 280 2508 0.000000 1.000000 0.100430 0.000000
290-
17454 50265 0 0 0 50265 17454 0.000000 1.000000 0.742258 0.000000
290+
17511 50243 0 0 0 50243 17511 0.000000 1.000000 0.741550 0.000000

benchmark/scanner/scanner.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
import os
55
import subprocess
66
from abc import ABC, abstractmethod
7+
from functools import cache
78
from pathlib import Path
89
from typing import Tuple, Dict, List, Any
910

1011
import tabulate
12+
from colorama import Fore, Style
1113

1214
from benchmark.common import GitService, LineStatus, Result, ScannerType
1315
from benchmark.scanner.file_type_stat import FileTypeStat
@@ -242,6 +244,34 @@ def get_items_from_path(file_path: str) -> Tuple[str, str, str, str]:
242244
file_id = file_name.split('.')[0]
243245
return data_path, repo_name, file_name, file_id
244246

247+
@staticmethod
248+
@cache
249+
def read_cache(file_path: str) -> list[str]:
250+
with contextlib.suppress(Exception):
251+
with open(file_path, "r", encoding="utf8") as f:
252+
return f.read().replace("\r\n", '\n').replace('\r', '\n').split('\n')
253+
return []
254+
255+
@staticmethod
256+
def get_colored_line(file_path: str, line_start: int, line_end: int, value_start: int, value_end: int) -> str:
257+
"""get line with color highlighted value for quick review"""
258+
if lines := Scanner.read_cache(file_path):
259+
if line_start == line_end <= len(lines) and 0 <= value_start < value_end:
260+
# normal single line value
261+
_line = lines[line_start - 1]
262+
colored_line = (_line[:value_start] + Fore.LIGHTYELLOW_EX +
263+
_line[value_start:value_end] + Style.RESET_ALL + _line[value_end:])
264+
elif line_start < line_end <= len(lines):
265+
# multiline
266+
colored_line = '\n'.join(lines[line_start - 1:line_end])
267+
else:
268+
# wrong line numeration (.xml e.g.)
269+
colored_line = ''
270+
else:
271+
# no file
272+
colored_line = f"Cannot read '{Fore.LIGHTMAGENTA_EX}{file_path}{Style.RESET_ALL}' file"
273+
return colored_line
274+
245275
def check_line_from_meta(self,
246276
file_path: str,
247277
line_start: int,
@@ -275,7 +305,8 @@ def check_line_from_meta(self,
275305

276306
if not (rows := self.meta.get(MetaKey(data_path, line_start, line_end))):
277307
self.lost_cnt += 1
278-
print(f"NOT FOUND WITH KEY: {approximate}", flush=True)
308+
print(f"NOT FOUND WITH KEY: {approximate}"
309+
f"\n{Scanner.get_colored_line(file_path, line_start, line_end, value_start, value_end)}", flush=True)
279310
if self.fix:
280311
with open(f"{self.cred_data_dir}/meta/{repo_name}.csv", "a") as f:
281312
f.write(f"{str(approximate)}\n")
@@ -354,7 +385,8 @@ def check_line_from_meta(self,
354385

355386
# meta has no markup for given credential
356387
self.lost_cnt += 1
357-
print(f"{suggestion} {approximate}", flush=True)
388+
print(f"{suggestion} {approximate}"
389+
f"\n{Scanner.get_colored_line(file_path, line_start, line_end, value_start, value_end)}", flush=True)
358390
self.meta_next_id += 1
359391
if lost_meta and self.fix:
360392
with open(f"{self.cred_data_dir}/meta/{repo_name}.csv", "a") as f:

meta/0436af4a.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu
182182
1480722,92b8ee86,GitHub,0436af4a,data/0436af4a/test/src/92b8ee86.cs,115,115,T,41,50,,,Password
183183
1480723,92b8ee86,GitHub,0436af4a,data/0436af4a/test/src/92b8ee86.cs,150,150,T,41,50,,,Password
184184
1480724,92b8ee86,GitHub,0436af4a,data/0436af4a/test/src/92b8ee86.cs,312,312,T,41,50,,,Password
185+
11519294,dc9185d3,GitHub,0436af4a,data/0436af4a/test/src/dc9185d3.cs,13,13,F,41,80,,,Password

meta/057480bf.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu
1818
11503189,cba27b0c,GitHub,057480bf,data/057480bf/test/tool/cba27b0c.pm,22,22,T,27,39,,,Salt
1919
11503190,da91ab59,GitHub,057480bf,data/057480bf/test/tool/da91ab59.pm,66,66,F,24,44,,,Key
2020
11519093,188e3140,GitHub,057480bf,data/057480bf/test/tool/188e3140.py,10,10,T,12,20,,,Password
21+
11519295,16cf9f2f,GitHub,057480bf,data/057480bf/_/16cf9f2f.cpp,590,590,F,19,27,,,Key

meta/0a0d22aa.csv

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,ValueStart,Valu
1717
63792,c50315a3,GitHub,0a0d22aa,data/0a0d22aa/test/secret/pkg/c50315a3.go,26,26,F,37,53,,,Secret
1818
63793,fea85211,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/fea85211.go,41,41,F,22,38,,,Credential
1919
31802,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,35,35,F,19,47,,,Password
20-
31804,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,22,22,X,19,63,,,Password
21-
31797,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,57,57,X,19,77,,,Password
20+
31804,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,22,22,T,19,63,,,Password
21+
31797,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,57,57,F,19,77,,bcrypt?,Password
2222
31798,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,58,58,T,19,1249,,,Key
2323
31799,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,51,51,X,17,95,,,Password
24-
31800,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,45,45,X,17,73,,,Password
24+
31800,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,45,45,F,17,73,,bcrypt?,Password
2525
1113091,c50315a3,GitHub,0a0d22aa,data/0a0d22aa/test/secret/pkg/c50315a3.go,15,15,F,17,33,,,Key
26-
1031800,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,15,15,X,17,61,,,Password
26+
1031800,5f40d16d,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/5f40d16d.go,15,15,T,17,61,,,Password
2727
1480725,fea85211,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/fea85211.go,34,34,T,16,31,,,Password
2828
1509834,fea85211,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/fea85211.go,27,27,T,24,1254,,,Key
29+
11519296,fea85211,GitHub,0a0d22aa,data/0a0d22aa/test/pkg/fea85211.go,25,25,F,24,82,,,Password

0 commit comments

Comments
 (0)