Skip to content

Commit b40328d

Browse files
authored
Feat/file masking (#794)
1 parent 807b9db commit b40328d

6 files changed

Lines changed: 758 additions & 4 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
[project]
22
name = "uipath-langchain"
3-
version = "0.10.1"
3+
version = "0.10.2"
44
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"
77
dependencies = [
88
"uipath>=2.10.53, <2.11.0",
99
"uipath-core>=0.5.2, <0.6.0",
10-
"uipath-platform>=0.1.30, <0.2.0",
10+
"uipath-platform>=0.1.35, <0.2.0",
1111
"uipath-runtime>=0.10.0, <0.11.0",
1212
"langgraph>=1.1.8, <2.0.0",
1313
"langchain-core>=1.2.11, <2.0.0",

src/uipath_langchain/agent/tools/internal_tools/analyze_files_tool.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import mimetypes
23
import uuid
34
from typing import Any, cast
@@ -29,6 +30,7 @@
2930
build_file_content_blocks_for,
3031
)
3132
from uipath_langchain.agent.react.jsonschema_pydantic_converter import create_model
33+
from uipath_langchain.agent.tools.internal_tools.pii_masker import PiiMasker
3234
from uipath_langchain.agent.tools.structured_tool_with_argument_properties import (
3335
StructuredToolWithArgumentProperties,
3436
)
@@ -38,6 +40,8 @@
3840
extract_text_content,
3941
)
4042

43+
logger = logging.getLogger("uipath")
44+
4145
ANALYZE_FILES_SYSTEM_MESSAGE = (
4246
"Process the provided files to complete the given task. "
4347
"Analyze the files contents thoroughly to deliver an accurate response "
@@ -81,6 +85,27 @@ async def tool_fn(**kwargs: Any):
8185
if not files:
8286
return {"analysisResult": "No attachments provided to analyze."}
8387

88+
client: UiPath | None = None
89+
policy: dict[str, Any] | None = None
90+
try:
91+
client = UiPath()
92+
policy = await client.automation_ops.get_deployed_policy_async()
93+
except Exception:
94+
logger.exception("Failed to fetch deployed policy")
95+
96+
masker: PiiMasker | None = None
97+
if client is not None and PiiMasker.is_policy_enabled(policy):
98+
masker = PiiMasker(client, policy)
99+
try:
100+
analysis_task, files = await masker.apply(analysis_task, files)
101+
except Exception as exc:
102+
raise AgentRuntimeError(
103+
code=AgentRuntimeErrorCode.UNEXPECTED_ERROR,
104+
title="PII masking failed",
105+
detail=f"PII detection raised: {exc!r}",
106+
category=UiPathErrorCategory.SYSTEM,
107+
) from exc
108+
84109
try:
85110
human_message = HumanMessage(content=analysis_task)
86111
human_message_with_files = await add_files_to_message(human_message, files)
@@ -102,6 +127,18 @@ async def tool_fn(**kwargs: Any):
102127
del messages, human_message_with_files, files
103128

104129
analysis_result = extract_text_content(result)
130+
131+
if masker is not None:
132+
try:
133+
analysis_result = masker.rehydrate(analysis_result)
134+
except Exception as exc:
135+
raise AgentRuntimeError(
136+
code=AgentRuntimeErrorCode.UNEXPECTED_ERROR,
137+
title="PII rehydration failed",
138+
detail=f"Failed to rehydrate LLM response: {exc!r}",
139+
category=UiPathErrorCategory.SYSTEM,
140+
) from exc
141+
105142
return {"analysisResult": analysis_result}
106143

107144
job_attachment_wrapper = get_job_attachment_wrapper(output_type=output_model)
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""PII masking for the analyze-files tool.
2+
3+
Encapsulates the policy evaluation, PII detection request, and rehydration of
4+
masked LLM output behind a single :class:`PiiMasker` class.
5+
"""
6+
7+
import logging
8+
from typing import Any
9+
10+
from uipath.core.feature_flags import FeatureFlags
11+
from uipath.platform import UiPath
12+
from uipath.platform.semantic_proxy import (
13+
PiiDetectionRequest,
14+
PiiDetectionResponse,
15+
PiiDocument,
16+
PiiEntityThreshold,
17+
PiiFile,
18+
rehydrate_from_pii_response,
19+
)
20+
21+
from uipath_langchain.agent.multimodal import FileInfo
22+
23+
logger = logging.getLogger("uipath")
24+
25+
_FEATURE_FLAG = "FilePiiMaskingEnabled"
26+
27+
28+
class PiiMasker:
29+
"""Runs PII detection against prompts/files and rehydrates masked LLM output.
30+
31+
Two gates (both must allow) control whether masking runs:
32+
33+
1. Opt-in kill-switch — the ``FilePiiMaskingEnabled`` feature flag
34+
(defaults to ``False``; enable via ``FeatureFlags.configure_flags``
35+
or the ``UIPATH_FEATURE_FilePiiMaskingEnabled`` env var).
36+
2. Platform policy — ``data.container.pii-in-flight-agents`` from the
37+
AutomationOps deployed-policy response.
38+
"""
39+
40+
def __init__(self, client: UiPath, policy: dict[str, Any] | None) -> None:
41+
self._client = client
42+
self._policy = policy
43+
self._result: PiiDetectionResponse | None = None
44+
45+
@staticmethod
46+
def is_policy_enabled(policy: dict[str, Any] | None) -> bool:
47+
"""Return True when both the feature flag and platform policy allow masking."""
48+
flag_enabled = FeatureFlags.is_flag_enabled(_FEATURE_FLAG, default=False)
49+
logger.info("PII masking feature flag %s=%s", _FEATURE_FLAG, flag_enabled)
50+
if not flag_enabled:
51+
return False
52+
if not policy:
53+
return False
54+
container = policy.get("data", {}).get("container", {})
55+
return bool(container.get("pii-in-flight-agents", False))
56+
57+
async def apply(
58+
self, analysis_task: str, files: list[FileInfo]
59+
) -> tuple[str, list[FileInfo]]:
60+
"""Run PII detection and return the masked prompt and redacted files.
61+
62+
The underlying detection response is retained so the LLM output can be
63+
rehydrated later via :meth:`rehydrate`.
64+
"""
65+
request = PiiDetectionRequest(
66+
documents=[
67+
PiiDocument(id="user-prompt", role="user", document=analysis_task)
68+
],
69+
files=[
70+
PiiFile(
71+
file_name=f.name,
72+
file_url=f.url,
73+
file_type=f.name.rsplit(".", 1)[-1].lower()
74+
if "." in f.name
75+
else "",
76+
)
77+
for f in files
78+
],
79+
entity_thresholds=self._entity_thresholds_from_policy() or None,
80+
)
81+
self._result = await self._client.semantic_proxy.detect_pii_async(request)
82+
logger.info(
83+
"PII detection completed: %d document entities, %d file entities",
84+
sum(len(d.pii_entities) for d in self._result.response),
85+
sum(len(f.pii_entities) for f in self._result.files),
86+
)
87+
88+
masked_prompt = analysis_task
89+
for doc in self._result.response:
90+
if doc.id == "user-prompt":
91+
if doc.masked_document != analysis_task:
92+
logger.info(
93+
"User prompt masked (%d entities replaced)",
94+
len(doc.pii_entities),
95+
)
96+
masked_prompt = doc.masked_document
97+
break
98+
99+
redacted_by_name = {f.file_name: f.file_url for f in self._result.files}
100+
if redacted_by_name:
101+
masked_files = [
102+
self._rename_for_masking(f, redacted_by_name.get(f.name, f.url))
103+
for f in files
104+
]
105+
logger.info("Renamed %d file(s) with pii_masked_ prefix", len(masked_files))
106+
else:
107+
masked_files = files
108+
109+
return masked_prompt, masked_files
110+
111+
def rehydrate(self, text: str) -> str:
112+
"""Replace masked placeholders in ``text`` with the original PII values.
113+
114+
Returns ``text`` unchanged if :meth:`apply` hasn't been called.
115+
"""
116+
if self._result is None:
117+
return text
118+
rehydrated = rehydrate_from_pii_response(text, self._result)
119+
if rehydrated != text:
120+
logger.info("Rehydrated LLM response with PII entities")
121+
return rehydrated
122+
123+
def _entity_thresholds_from_policy(self) -> list[PiiEntityThreshold]:
124+
"""Extract enabled entity thresholds from the policy's ``pii-entity-table``."""
125+
if not self._policy:
126+
return []
127+
table = self._policy.get("data", {}).get("pii-entity-table", [])
128+
thresholds: list[PiiEntityThreshold] = []
129+
for entry in table:
130+
if not entry.get("pii-entity-is-enabled", False):
131+
continue
132+
category = entry.get("pii-entity-category")
133+
confidence = entry.get("pii-entity-confidence-threshold")
134+
if category is None or confidence is None:
135+
continue
136+
thresholds.append(
137+
PiiEntityThreshold(
138+
category=category,
139+
confidence_threshold=confidence,
140+
)
141+
)
142+
return thresholds
143+
144+
@staticmethod
145+
def _rename_for_masking(file: FileInfo, redacted_url: str) -> FileInfo:
146+
"""Return a FileInfo pointing at ``redacted_url`` with a ``pii_masked_`` prefix."""
147+
if "." in file.name:
148+
base, ext = file.name.rsplit(".", 1)
149+
new_name = f"pii_masked_{base}.{ext}"
150+
else:
151+
new_name = f"pii_masked_{file.name}"
152+
return FileInfo(url=redacted_url, name=new_name, mime_type=file.mime_type)

0 commit comments

Comments
 (0)