Skip to content

Commit 3efea3f

Browse files
committed
Resolved remaining bandit results
1 parent eb40f96 commit 3efea3f

4 files changed

Lines changed: 207 additions & 54 deletions

File tree

src/talkpipe/app/chatterlang_serve.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import argparse
99
import yaml
10+
import html
1011
import asyncio
1112
from fastapi.responses import FileResponse, StreamingResponse
1213
from fastapi.staticfiles import StaticFiles
@@ -638,11 +639,10 @@ def _get_stream_interface(self) -> str:
638639

639640
form_fields = self._generate_form_fields()
640641

641-
return ( # nosec B608
642-
f'''<!DOCTYPE html>
642+
return f'''<!DOCTYPE html>
643643
<html>
644644
<head>
645-
<title>{self.title} - Stream</title>
645+
<title>{html.escape(self.title)} - Stream</title>
646646
<style>
647647
* {{
648648
margin: 0;
@@ -984,7 +984,7 @@ def _get_stream_interface(self) -> str:
984984
</head>
985985
<body>
986986
<div class="header">
987-
<h1>{self.form_config.title}</h1>
987+
<h1>{html.escape(self.form_config.title)}</h1>
988988
</div>
989989
990990
<div class="main-container">
@@ -1170,7 +1170,7 @@ def _get_stream_interface(self) -> str:
11701170
}}
11711171
11721172
// Add user message to chat immediately for instant feedback
1173-
const displayProperty = '{self.display_property}' || Object.keys(data)[0];
1173+
const displayProperty = '{html.escape(str(self.display_property))}' || Object.keys(data)[0];
11741174
const userMessage = data[displayProperty] || JSON.stringify(data);
11751175
lastUserMessage = userMessage; // Store to detect duplicates from server
11761176
addMessage(userMessage, 'user', new Date().toISOString());
@@ -1228,8 +1228,7 @@ def _get_stream_interface(self) -> str:
12281228
</script>
12291229
</body>
12301230
</html>
1231-
'''
1232-
)
1231+
''' # nosec B608
12331232

12341233
def _get_html_interface(self) -> str:
12351234
"""Generate HTML interface with configurable form"""
@@ -1272,11 +1271,11 @@ def _get_html_interface(self) -> str:
12721271

12731272
form_fields = self._generate_form_fields()
12741273

1275-
return ( # nosec B608
1276-
f'''<!DOCTYPE html>
1274+
# nosec B608 - HTML template with proper escaping, not SQL injection
1275+
return f'''<!DOCTYPE html>
12771276
<html>
12781277
<head>
1279-
<title>{self.title}</title>
1278+
<title>{html.escape(self.title)}</title>
12801279
<style>
12811280
* {{
12821281
margin: 0;
@@ -1293,7 +1292,7 @@ def _get_html_interface(self) -> str:
12931292
}}
12941293
12951294
.main-content {{
1296-
height: calc(100vh - {height});
1295+
height: calc(100vh - {html.escape(height)});
12971296
padding: 20px;
12981297
overflow-y: auto;
12991298
background-color: #f8f9fa;
@@ -1336,9 +1335,9 @@ def _get_html_interface(self) -> str:
13361335
13371336
.form-panel {{
13381337
position: fixed;
1339-
{form_style}
1340-
background-color: {input_bg};
1341-
border: 1px solid {border_color};
1338+
{html.escape(form_style)}
1339+
background-color: {html.escape(input_bg)};
1340+
border: 1px solid {html.escape(border_color)};
13421341
padding: 20px;
13431342
box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
13441343
overflow-y: auto;
@@ -1355,7 +1354,7 @@ def _get_html_interface(self) -> str:
13551354
}}
13561355
13571356
.form-header h3 {{
1358-
color: {text_color};
1357+
color: {html.escape(text_color)};
13591358
margin: 0;
13601359
}}
13611360
@@ -1485,9 +1484,9 @@ def _get_html_interface(self) -> str:
14851484
<body>
14861485
<div class="main-content">
14871486
<div class="info-section">
1488-
<h1>{self.title}</h1>
1487+
<h1>{html.escape(self.title)}</h1>
14891488
<p>Submit JSON data using the form below or send POST requests to:</p>
1490-
<div class="endpoint-info">POST http://{self.host}:{self.port}/process</div>
1489+
<div class="endpoint-info">POST http://{html.escape(self.host)}:{html.escape(str(self.port))}/process</div>
14911490
{"<p>Authentication required: Include 'X-API-Key' header</p>" if self.require_auth else ""}
14921491
<p>View API documentation at: <a href="/docs">/docs</a></p>
14931492
<p>View streaming interface at: <a href="/stream">/stream</a></p>
@@ -1504,7 +1503,7 @@ def _get_html_interface(self) -> str:
15041503
<div class="form-panel" id="formPanel">
15051504
<div class="form-container">
15061505
<div class="form-header">
1507-
<h3>{self.form_config.title}</h3>
1506+
<h3>{html.escape(self.form_config.title)}</h3>
15081507
</div>
15091508
15101509
{auth_header}
@@ -1644,9 +1643,8 @@ def _get_html_interface(self) -> str:
16441643
</script>
16451644
</body>
16461645
</html>
1647-
'''
1648-
)
1649-
1646+
''' # nosec B608
1647+
16501648
def set_processor_function(self, func: Callable[[Dict[str, Any]], Any]):
16511649
"""Set the function used to process incoming JSON data"""
16521650
self.processor_function = func

src/talkpipe/pipe/basic.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55
import time
66
import sys
7-
import pickle
7+
import json
88
import hashlib
99
import copy
1010
import pandas as pd
@@ -478,14 +478,14 @@ def transform(self, input_iter: Iterable) -> Iterator:
478478
"""
479479
yield from input_iter
480480

481-
def hash_data(data, algorithm="MD5", field_list="_", use_repr=False, fail_on_missing=True, default=None):
481+
def hash_data(data, algorithm="MD5", field_list="_", use_repr=True, fail_on_missing=True, default=None):
482482
"""Hash a single data item using the specified parameters.
483483
484484
Args:
485485
data: The data item to hash
486486
algorithm (str): Hash algorithm to use. Options include SHA1, SHA224, SHA256, SHA384, SHA512, SHA-3, and MD5.
487487
fields (list): List of fields to include in the hash
488-
use_repr (bool): Whether to use repr() or pickle
488+
use_repr (bool): Whether to use repr() or JSON serialization. Defaults to True for security.
489489
fail_on_missing (bool): Whether to fail on missing fields
490490
491491
Returns:
@@ -498,28 +498,40 @@ def hash_data(data, algorithm="MD5", field_list="_", use_repr=False, fail_on_mis
498498
if fail_on_missing:
499499
raise ValueError(f"Field {field} value was None")
500500
else:
501-
logging.warning(f"Field {field} value was None. Ignoring")
501+
logging.warning(f"Field {field} value was None. Ignoring")
502502
continue
503+
503504
if use_repr:
504-
hasher.update(repr(item).encode())
505+
# Safe: repr() doesn't execute code and works with all objects
506+
hasher.update(repr(item).encode('utf-8'))
505507
else:
506-
hasher.update(pickle.dumps(item))
508+
# Safe: JSON serialization instead of pickle
509+
try:
510+
# Use JSON with sorted keys for deterministic hashing
511+
json_str = json.dumps(item, sort_keys=True, default=str, ensure_ascii=False)
512+
hasher.update(json_str.encode('utf-8'))
513+
except (TypeError, ValueError) as e:
514+
# Fallback to repr for non-JSON-serializable objects
515+
logging.debug(f"JSON serialization failed for {type(item)}, using repr: {e}")
516+
hasher.update(repr(item).encode('utf-8'))
517+
507518
return hasher.hexdigest()
508519

509520
@registry.register_segment("hash")
510521
class Hash(AbstractSegment):
511522
"""Hashes the input data using the specified algorithm.
512523
513524
This segment hashes the input data using the specified algorithm.
514-
Strings will be encoded and hashed. All other datatypes wil be hashed using either pickle or repr().
525+
All datatypes are hashed using either repr() or JSON serialization for security.
515526
516527
Args:
517-
algorithm (str): Hash algorithm to use. Options include SHA1, SHA224, SHA256, SHA384, SHA512, SHA-3, and MD5.
518-
use_repr (bool): If True, the repr() version of the input data is hashed. If False, the input data is hashed via
519-
pickling. Using repr() will handle all object, even those that can't be pickled and won't be subject to
520-
changes in pickling formats. But the pickled version will include more state and generally be more reliable.
528+
algorithm (str): Hash algorithm to use. Options include SHA1, SHA224, SHA256, SHA384, SHA512, SHA-3, and MD5.
529+
use_repr (bool): If True, the repr() version of the input data is hashed. If False, JSON serialization
530+
is used with fallback to repr() for non-JSON-serializable objects. Using repr() will handle all
531+
objects consistently and won't be subject to changes in serialization formats. JSON serialization
532+
provides more structured representation but may not work with all Python objects.
521533
"""
522-
def __init__(self, algorithm: str="MD5", use_repr=False, field_list: str = "_", append_as=None, fail_on_missing: bool = True):
534+
def __init__(self, algorithm: str="MD5", use_repr=True, field_list: str = "_", append_as=None, fail_on_missing: bool = True):
523535
super().__init__()
524536
self.algorithm = algorithm
525537
self.use_repr = use_repr

src/talkpipe/pipe/io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Optional, Iterable, Iterator
44
import logging
55
import os
6-
import pickle
6+
import pickle # nosec B403 - Used only for write operations, not loading untrusted data
77
import json
88
from pprint import pformat
99
from prompt_toolkit import PromptSession

0 commit comments

Comments
 (0)