Skip to content

Commit 7a3de1c

Browse files
committed
feat: add tracing for PII file masking
1 parent 74784ac commit 7a3de1c

5 files changed

Lines changed: 491 additions & 58 deletions

File tree

src/uipath_langchain/agent/multimodal/types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ class FileInfo:
2424
url: str
2525
name: str
2626
mime_type: str
27+
masked_attachment_url: str | None = None
28+
attachment_id: str | None = None
29+
masked_attachment_id: str | None = None

src/uipath_langchain/agent/tools/internal_tools/analyze_files_tool.py

Lines changed: 214 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import mimetypes
34
import uuid
@@ -12,14 +13,21 @@
1213
HumanMessage,
1314
SystemMessage,
1415
)
15-
from langchain_core.runnables.config import var_child_runnable_config
16+
from langchain_core.runnables.config import RunnableConfig, var_child_runnable_config
1617
from langchain_core.tools import StructuredTool
18+
from opentelemetry import trace as otel_trace
1719
from uipath.agent.models.agent import (
1820
AgentInternalToolResourceConfig,
1921
)
22+
from uipath.core.tracing.span_utils import UiPathSpanUtils
2023
from uipath.eval.mocks import mockable
2124
from uipath.platform import UiPath
2225
from uipath.runtime.errors import UiPathErrorCategory
26+
from uipath.tracing import (
27+
AttachmentDirection,
28+
AttachmentProvider,
29+
SpanAttachment,
30+
)
2331

2432
from uipath_langchain.agent.exceptions import (
2533
AgentRuntimeError,
@@ -30,7 +38,10 @@
3038
build_file_content_blocks_for,
3139
)
3240
from uipath_langchain.agent.react.jsonschema_pydantic_converter import create_model
33-
from uipath_langchain.agent.tools.internal_tools.pii_masker import PiiMasker
41+
from uipath_langchain.agent.tools.internal_tools.pii_masker import (
42+
PiiMasker,
43+
_masked_name_for,
44+
)
3445
from uipath_langchain.agent.tools.structured_tool_with_argument_properties import (
3546
StructuredToolWithArgumentProperties,
3647
)
@@ -48,6 +59,170 @@
4859
"based on the extracted information."
4960
)
5061

62+
# Langchain config metadata key carrying the JSON-serialized SpanAttachment list
63+
# that should render on the llmCall span. The LLMOps callback in uipath-agents
64+
# reads this and stamps it on the llmCall span as the ``attachments`` attribute.
65+
LLM_CALL_ATTACHMENTS_METADATA_KEY = "uipath_llm_call_attachments"
66+
67+
68+
def _original_attachment_id(file: FileInfo) -> str:
69+
"""Return the id to use for the original file in trace attachments.
70+
71+
Prefers the orchestrator attachment UUID when present; falls back to a
72+
UUID derived from the file URL for files that did not come from
73+
orchestrator (defensive, should not happen in production paths).
74+
"""
75+
if file.attachment_id:
76+
return file.attachment_id
77+
return str(uuid.uuid5(uuid.NAMESPACE_URL, file.url))
78+
79+
80+
def _masked_attachment_id(masked_url: str) -> str:
81+
"""Derive a stable GUID from the masked URL for trace attachments.
82+
83+
The LLMOps traces endpoint validates ``Attachment.Id`` as ``System.Guid``.
84+
Masked files aren't orchestrator-tracked, so we synthesize a deterministic
85+
UUID from the redacted blob URL to satisfy the schema while keeping the id
86+
stable across re-runs.
87+
"""
88+
return str(uuid.uuid5(uuid.NAMESPACE_URL, masked_url))
89+
90+
91+
def _set_span_attachments(
92+
span: otel_trace.Span, attachments: list[SpanAttachment]
93+
) -> None:
94+
"""Write a :class:`SpanAttachment` list as a JSON string on the given OTel span."""
95+
if not attachments or span is None or not span.is_recording():
96+
return
97+
try:
98+
span.set_attribute(
99+
"attachments",
100+
json.dumps([att.model_dump(by_alias=True) for att in attachments]),
101+
)
102+
except Exception:
103+
logger.exception("Failed to emit trace attachments")
104+
105+
106+
def _llm_call_attachments_payload(files: list[FileInfo]) -> str | None:
107+
"""Build the JSON attachments payload for the llmCall span.
108+
109+
Each entry represents the file version actually sent to the model: the
110+
masked copy when PII masking ran (keyed by the orchestrator UUID from the
111+
re-upload when available, uuid5 fallback otherwise), else the original
112+
orchestrator attachment. Direction is ``IN`` because the file is an input
113+
to the LLM.
114+
"""
115+
if not files:
116+
return None
117+
attachments: list[SpanAttachment] = []
118+
for file in files:
119+
if file.masked_attachment_url:
120+
att_id = file.masked_attachment_id or _masked_attachment_id(
121+
file.masked_attachment_url
122+
)
123+
name = _masked_name_for(file.name)
124+
else:
125+
att_id = _original_attachment_id(file)
126+
name = file.name
127+
attachments.append(
128+
SpanAttachment(
129+
id=att_id,
130+
file_name=name,
131+
mime_type=file.mime_type,
132+
provider=AttachmentProvider.ORCHESTRATOR,
133+
direction=AttachmentDirection.IN,
134+
)
135+
)
136+
return json.dumps([att.model_dump(by_alias=True) for att in attachments])
137+
138+
139+
def _config_with_llm_call_attachments(
140+
config: RunnableConfig | None, files: list[FileInfo]
141+
) -> RunnableConfig | None:
142+
"""Return a runnable config carrying the llmCall attachments payload.
143+
144+
The LLMOps callback in ``uipath-agents`` reads the payload from
145+
``metadata[LLM_CALL_ATTACHMENTS_METADATA_KEY]`` and stamps it as the
146+
``attachments`` attribute on the llmCall span — so the file actually sent
147+
to the model (masked copy when PII masking ran, original otherwise)
148+
renders as a downloadable attachment on the LLM-call boundary in the
149+
trace UI, mirroring how the PII Masking span renders its files.
150+
"""
151+
payload = _llm_call_attachments_payload(files)
152+
if not payload:
153+
return config
154+
new_config = cast(RunnableConfig, dict(config) if config else {})
155+
metadata = dict(new_config.get("metadata") or {})
156+
metadata[LLM_CALL_ATTACHMENTS_METADATA_KEY] = payload
157+
new_config["metadata"] = metadata
158+
return new_config
159+
160+
161+
def _emit_pii_masking_attachments(
162+
span: otel_trace.Span, files: list[FileInfo]
163+
) -> None:
164+
"""Emit originals (IN) and masked copies (OUT) on the given PII Masking span.
165+
166+
Originals are keyed by the orchestrator attachment UUID; masked copies are
167+
keyed by the real orchestrator UUID from the re-upload when available, or
168+
a uuid5 derived from the redacted URL as a fallback.
169+
"""
170+
if not files:
171+
return
172+
attachments: list[SpanAttachment] = []
173+
input_files: list[dict[str, Any]] = []
174+
output_files: list[dict[str, Any]] = []
175+
176+
for file in files:
177+
original_id = _original_attachment_id(file)
178+
attachments.append(
179+
SpanAttachment(
180+
id=original_id,
181+
file_name=file.name,
182+
mime_type=file.mime_type,
183+
provider=AttachmentProvider.ORCHESTRATOR,
184+
direction=AttachmentDirection.IN,
185+
)
186+
)
187+
input_files.append(
188+
{"id": original_id, "fileName": file.name, "mimeType": file.mime_type}
189+
)
190+
191+
if file.masked_attachment_url:
192+
# Prefer the real orchestrator UUID from the re-upload so the UI
193+
# can download the file; fall back to the synthesized uuid5.
194+
masked_id = file.masked_attachment_id or _masked_attachment_id(
195+
file.masked_attachment_url
196+
)
197+
masked_name = _masked_name_for(file.name)
198+
attachments.append(
199+
SpanAttachment(
200+
id=masked_id,
201+
file_name=masked_name,
202+
mime_type=file.mime_type,
203+
provider=AttachmentProvider.ORCHESTRATOR,
204+
direction=AttachmentDirection.OUT,
205+
)
206+
)
207+
output_files.append(
208+
{"id": masked_id, "fileName": masked_name, "mimeType": file.mime_type}
209+
)
210+
211+
_set_span_attachments(span, attachments)
212+
213+
if span is not None and span.is_recording():
214+
try:
215+
input_payload = json.dumps({"files": input_files})
216+
output_payload = json.dumps({"files": output_files})
217+
span.set_attribute("input", input_payload)
218+
span.set_attribute("input.value", input_payload)
219+
span.set_attribute("input.mime_type", "application/json")
220+
span.set_attribute("output", output_payload)
221+
span.set_attribute("output.value", output_payload)
222+
span.set_attribute("output.mime_type", "application/json")
223+
except Exception:
224+
logger.exception("Failed to set PII Masking input/output attributes")
225+
51226

52227
def create_analyze_file_tool(
53228
resource: AgentInternalToolResourceConfig, llm: BaseChatModel
@@ -95,16 +270,30 @@ async def tool_fn(**kwargs: Any):
95270

96271
masker: PiiMasker | None = None
97272
if client is not None and PiiMasker.is_policy_enabled(policy):
98-
masker = PiiMasker(client, policy)
99-
try:
100-
analysis_task, files = await masker.apply(analysis_task, files)
101-
except Exception as exc:
102-
raise AgentRuntimeError(
103-
code=AgentRuntimeErrorCode.UNEXPECTED_ERROR,
104-
title="PII masking failed",
105-
detail=f"PII detection raised: {exc!r}",
106-
category=UiPathErrorCategory.SYSTEM,
107-
) from exc
273+
# Reconcile OTel current span with the LangChain/LangGraph external
274+
# span provider so the new span is parented under the active tool
275+
# call span and shares its trace id.
276+
parent_ctx = UiPathSpanUtils.get_parent_context()
277+
tracer = otel_trace.get_tracer(__name__)
278+
with tracer.start_as_current_span(
279+
"PII Masking", context=parent_ctx
280+
) as pii_span:
281+
# Required for the LLMOps exporter's span filter to keep this span.
282+
pii_span.set_attribute("uipath.custom_instrumentation", True)
283+
pii_span.set_attribute("span_type", "piiMasking")
284+
pii_span.set_attribute("type", "piiMasking")
285+
masker = PiiMasker(client, policy)
286+
try:
287+
analysis_task, files = await masker.apply(analysis_task, files)
288+
_emit_pii_masking_attachments(pii_span, files)
289+
except Exception as exc:
290+
pii_span.record_exception(exc)
291+
raise AgentRuntimeError(
292+
code=AgentRuntimeErrorCode.UNEXPECTED_ERROR,
293+
title="PII masking failed",
294+
detail=f"PII detection raised: {exc!r}",
295+
category=UiPathErrorCategory.SYSTEM,
296+
) from exc
108297

109298
try:
110299
human_message = HumanMessage(content=analysis_task)
@@ -122,6 +311,7 @@ async def tool_fn(**kwargs: Any):
122311
cast(AnyMessage, human_message_with_files),
123312
]
124313
config = var_child_runnable_config.get(None)
314+
config = _config_with_llm_call_attachments(config, files)
125315
result = await non_streaming_llm.ainvoke(messages, config=config)
126316

127317
del messages, human_message_with_files, files
@@ -198,6 +388,7 @@ async def _resolve_job_attachment_arguments(
198388
url=blob_info.uri,
199389
name=blob_info.name,
200390
mime_type=mime_type,
391+
attachment_id=str(attachment_id),
201392
)
202393
file_infos.append(file_info)
203394

@@ -222,7 +413,17 @@ async def add_files_to_message(
222413

223414
file_content_blocks: list[DataContentBlock] = []
224415
for file in files:
225-
blocks = await build_file_content_blocks_for(file)
416+
# Prefer the redacted URL + pii_masked_ name for LLM content when PII masking ran.
417+
llm_file = (
418+
FileInfo(
419+
url=file.masked_attachment_url,
420+
name=_masked_name_for(file.name),
421+
mime_type=file.mime_type,
422+
)
423+
if file.masked_attachment_url
424+
else file
425+
)
426+
blocks = await build_file_content_blocks_for(llm_file)
226427
file_content_blocks.extend(blocks)
227428
return append_content_blocks_to_message(
228429
message, cast(list[ContentBlock], file_content_blocks)

0 commit comments

Comments
 (0)