Skip to content

Commit 7ded3cb

Browse files
author
Suresh Kumar Moharajan
committed
fix: improve content normalization fallbacks
Signed-off-by: Suresh Kumar Moharajan <suresh.kumar.m@ibm.com>
1 parent 43c574e commit 7ded3cb

File tree

2 files changed

+24
-20
lines changed

2 files changed

+24
-20
lines changed

.secrets.baseline

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8780,31 +8780,31 @@
87808780
"hashed_secret": "a10b98d7340036e9c8c301704f623eddd733cc1a",
87818781
"is_secret": false,
87828782
"is_verified": false,
8783-
"line_number": 2784,
8783+
"line_number": 2785,
87848784
"type": "Hex High Entropy String",
87858785
"verified_result": null
87868786
},
87878787
{
87888788
"hashed_secret": "3acfb2c2b433c0ea7ff107e33df91b18e52f960f",
87898789
"is_secret": false,
87908790
"is_verified": false,
8791-
"line_number": 5157,
8791+
"line_number": 5184,
87928792
"type": "Secret Keyword",
87938793
"verified_result": null
87948794
},
87958795
{
87968796
"hashed_secret": "fe1bae27cb7c1fb823f496f286e78f1d2ae87734",
87978797
"is_secret": false,
87988798
"is_verified": false,
8799-
"line_number": 5810,
8799+
"line_number": 5837,
88008800
"type": "Secret Keyword",
88018801
"verified_result": null
88028802
},
88038803
{
88048804
"hashed_secret": "2878cbdbbcfa6feafc04b8889f5ecc8c470ba32e",
88058805
"is_secret": false,
88068806
"is_verified": false,
8807-
"line_number": 5874,
8807+
"line_number": 5901,
88088808
"type": "Secret Keyword",
88098809
"verified_result": null
88108810
},
@@ -8820,63 +8820,63 @@
88208820
"hashed_secret": "a0f4ea7d91495df92bbac2e2149dfb850fe81396",
88218821
"is_secret": false,
88228822
"is_verified": false,
8823-
"line_number": 9109,
8823+
"line_number": 9136,
88248824
"type": "Secret Keyword",
88258825
"verified_result": null
88268826
},
88278827
{
88288828
"hashed_secret": "a75a7c7b31474f3f04f3a395228ded8d61ee1ae3",
88298829
"is_secret": false,
88308830
"is_verified": false,
8831-
"line_number": 9158,
8831+
"line_number": 9185,
88328832
"type": "Secret Keyword",
88338833
"verified_result": null
88348834
},
88358835
{
88368836
"hashed_secret": "02c593fd9af8254b859d426a76b6cd42847fbec1",
88378837
"is_secret": false,
88388838
"is_verified": false,
8839-
"line_number": 9256,
8839+
"line_number": 9224,
88408840
"type": "Secret Keyword",
88418841
"verified_result": null
88428842
},
88438843
{
88448844
"hashed_secret": "1ded3053d0363079a4e681a3b700435d6d880290",
88458845
"is_secret": false,
88468846
"is_verified": false,
8847-
"line_number": 9313,
8847+
"line_number": 9281,
88488848
"type": "Secret Keyword",
88498849
"verified_result": null
88508850
},
88518851
{
88528852
"hashed_secret": "c00dbbc9dadfbe1e232e93a729dd4752fade0abf",
88538853
"is_secret": false,
88548854
"is_verified": false,
8855-
"line_number": 14411,
8855+
"line_number": 14379,
88568856
"type": "Secret Keyword",
88578857
"verified_result": null
88588858
},
88598859
{
8860-
"hashed_secret": "a4b48a81cdab1e1a5dd37907d6c85ca1c61ddc7c",
8860+
"hashed_secret": "f2b14f68eb995facb3a1c35287b778d5bd785511",
88618861
"is_secret": false,
88628862
"is_verified": false,
8863-
"line_number": 16806,
8863+
"line_number": 17136,
88648864
"type": "Secret Keyword",
88658865
"verified_result": null
88668866
},
88678867
{
8868-
"hashed_secret": "f2b14f68eb995facb3a1c35287b778d5bd785511",
8868+
"hashed_secret": "a4b48a81cdab1e1a5dd37907d6c85ca1c61ddc7c",
88698869
"is_secret": false,
88708870
"is_verified": false,
8871-
"line_number": 17168,
8871+
"line_number": 17155,
88728872
"type": "Secret Keyword",
88738873
"verified_result": null
88748874
},
88758875
{
88768876
"hashed_secret": "dc8002865f92070749b264e76045b04fa3b8de71",
88778877
"is_secret": false,
88788878
"is_verified": false,
8879-
"line_number": 20845,
8879+
"line_number": 20813,
88808880
"type": "Secret Keyword",
88818881
"verified_result": null
88828882
}

mcpgateway/services/content_security.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -383,19 +383,23 @@ def _normalize_input(self, content: str) -> str:
383383
normalized = html.unescape(normalized)
384384

385385
# URL percent decoding (%3C -> <)
386+
url_decoded = normalized
386387
try:
387-
normalized = unquote(normalized)
388+
url_decoded = unquote(normalized)
388389
except Exception:
389-
# If URL decoding fails, continue with the current normalized value
390-
normalized = normalized
390+
# If URL decoding fails, continue with the pre-decoded value
391+
logger.debug("URL decoding failed during content normalization", exc_info=True)
392+
normalized = url_decoded
391393

392394
# Unicode normalization (NFKC - compatibility decomposition + canonical composition)
393395
# This catches various Unicode tricks like fullwidth characters
396+
unicode_normalized = normalized
394397
try:
395-
normalized = unicodedata.normalize("NFKC", normalized)
398+
unicode_normalized = unicodedata.normalize("NFKC", normalized)
396399
except Exception:
397-
# If normalization fails, continue with the current normalized value
398-
normalized = normalized
400+
# If normalization fails, continue with the pre-normalized value
401+
logger.debug("Unicode normalization failed during content normalization", exc_info=True)
402+
normalized = unicode_normalized
399403

400404
return normalized
401405

0 commit comments

Comments
 (0)