Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions src/backend/base/langflow/api/v1/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from fastapi_pagination import Page, Params
from fastapi_pagination.ext.sqlmodel import apaginate
from lfx.services.cache.utils import CACHE_MISS
from pydantic import ValidationError
from sqlmodel import and_, col, select

from langflow.api.utils import (
Expand Down Expand Up @@ -437,11 +438,19 @@ async def upload_file(

# Normalise code fields: if exported with code-as-lines format, rejoin to
# strings before creating the Pydantic models so the DB always stores strings.
if "flows" in data:
data = {**data, "flows": [normalize_code_for_import(f) for f in data["flows"]]}
flow_list = FlowListCreate(**data)
else:
flow_list = FlowListCreate(flows=[FlowCreate(**normalize_code_for_import(data))])
if not isinstance(data, dict):
raise HTTPException(
status_code=422,
detail="Invalid JSON: expected an object with 'flows' or a single flow object",
)
try:
if "flows" in data:
data = {**data, "flows": [normalize_code_for_import(f) for f in data["flows"]]}
flow_list = FlowListCreate(**data)
else:
flow_list = FlowListCreate(flows=[FlowCreate(**normalize_code_for_import(data))])
except ValidationError as e:
raise HTTPException(status_code=422, detail=str(e)) from e
Comment on lines +446 to +453
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle non-object upload payloads to avoid fallback 500s

At Line 442, "flows" in data assumes data is a mapping. If the uploaded JSON is a list/scalar, the else branch can raise TypeError (** on non-mapping), which bypasses this ValidationError handler and can still return 500.

Proposed fix
     # Normalise code fields: if exported with code-as-lines format, rejoin to
     # strings before creating the Pydantic models so the DB always stores strings.
     try:
+        if not isinstance(data, dict):
+            raise HTTPException(status_code=422, detail="Invalid payload: expected a JSON object")
         if "flows" in data:
+            if not isinstance(data["flows"], list):
+                raise HTTPException(status_code=422, detail="Invalid payload: 'flows' must be a list")
             data = {**data, "flows": [normalize_code_for_import(f) for f in data["flows"]]}
             flow_list = FlowListCreate(**data)
         else:
             flow_list = FlowListCreate(flows=[FlowCreate(**normalize_code_for_import(data))])
     except ValidationError as e:
         raise HTTPException(status_code=422, detail=str(e)) from e
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/backend/base/langflow/api/v1/flows.py` around lines 441 - 448, The
handler assumes 'data' is a mapping which causes a TypeError when uploaded JSON
is a list or scalar; update the block around FlowListCreate / FlowCreate and
normalize_code_for_import to first validate that data is a dict (e.g.,
isinstance(data, dict)) and if not return/raise an
HTTPException(status_code=422) with a clear message, otherwise proceed with the
existing "flows" in data branching and construction of FlowListCreate/FlowCreate
so that non-object payloads are handled before any ** unpacking occurs and the
existing ValidationError catch still applies.


# TODO: Full-version import is planned as a follow-up feature.
# When implemented, extract raw flow dicts here to read embedded "version"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ class TraceSummaryRead(BaseModel):
total_tokens: int
flow_id: UUID
session_id: str
input: dict[str, Any] | None = None
output: dict[str, Any] | None = None
input: dict[str, Any] | str | None = None
output: dict[str, Any] | str | None = None


class TraceListResponse(BaseModel):
Expand Down
81 changes: 81 additions & 0 deletions src/backend/tests/unit/api/v1/test_flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,87 @@ async def test_create_flow_rejects_traversal_in_subpath(client: AsyncClient, log
assert response.status_code == status.HTTP_400_BAD_REQUEST


async def test_upload_flow_rejects_list_payload(client: AsyncClient, logged_in_headers):
"""Regression: uploading a JSON array (not an object) must return 422, not 500.

orjson.loads() on a list payload returns a Python list. Before the isinstance
guard, 'flows' in <list> silently evaluates to False, routing to the else branch
where **normalize_code_for_import(list) raises TypeError — escaping as a 500.
"""
import json

file_content = json.dumps([{"name": "flow1", "data": {}}])

response = await client.post(
"api/v1/flows/upload/",
files={"file": ("flows.json", file_content, "application/json")},
headers=logged_in_headers,
)
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY


async def test_upload_flow_rejects_scalar_payload(client: AsyncClient, logged_in_headers):
"""Regression: uploading a JSON scalar (string/number) must return 422, not 500."""
import json

file_content = json.dumps("just a string")

response = await client.post(
"api/v1/flows/upload/",
files={"file": ("flows.json", file_content, "application/json")},
headers=logged_in_headers,
)
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY


async def test_upload_flow_rejects_endpoint_name_with_dots(client: AsyncClient, logged_in_headers):
"""Regression: endpoint_name containing dots must return 422, not 500.

Previously a ValidationError from the Pydantic model escaped the handler
and hit the global exception_handler, producing a 500 and a Scarf telemetry
event. The import path now wraps FlowCreate construction in a try/except
and re-raises as HTTPException(422).
"""
import json

flow_data = {
"name": "neuro-vision",
"data": {},
"endpoint_name": "neuro-vision-planning.phase1.contract",
}
file_content = json.dumps({"folder_name": "proj", "flows": [flow_data]})

response = await client.post(
"api/v1/flows/upload/",
files={"file": ("flows.json", file_content, "application/json")},
headers=logged_in_headers,
)
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY


async def test_upload_flow_accepts_valid_endpoint_name(client: AsyncClient, logged_in_headers):
"""Endpoint names with only letters, numbers, hyphens, and underscores are accepted."""
import json

flow_data = {
"name": "neuro-vision",
"data": {},
"endpoint_name": "neuro-vision-planning_phase1",
}
file_content = json.dumps({"folder_name": "proj", "flows": [flow_data]})

response = await client.post(
"api/v1/flows/upload/",
files={"file": ("flows.json", file_content, "application/json")},
headers=logged_in_headers,
)
assert response.status_code == status.HTTP_201_CREATED
body = response.json()
assert isinstance(body, list)
assert len(body) == 1
assert body[0]["name"] == "neuro-vision"

Comment on lines +709 to +730
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Strengthen the success assertion to prevent false positives

At Lines 692-693, != 500 and != 422 can still pass for other failures. Please assert the expected success code (and ideally basic response shape) for a valid payload.

Proposed fix
     response = await client.post(
         "api/v1/flows/upload/",
         files={"file": ("flows.json", file_content, "application/json")},
         headers=logged_in_headers,
     )
-    assert response.status_code != status.HTTP_500_INTERNAL_SERVER_ERROR
-    assert response.status_code != status.HTTP_422_UNPROCESSABLE_ENTITY
+    assert response.status_code == status.HTTP_201_CREATED
+    result = response.json()
+    assert isinstance(result, list)
+    assert len(result) == 1
+    assert result[0]["endpoint_name"] == "neuro-vision-planning_phase1"

Based on learnings: Applies to **/test_*.py : For API endpoints, verify both success and error response testing.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/backend/tests/unit/api/v1/test_flows.py` around lines 676 - 694, The test
test_upload_flow_accepts_valid_endpoint_name currently only asserts the response
is not 500 or 422 which can hide other failures; change it to assert the
explicit expected success status (e.g., response.status_code ==
status.HTTP_200_OK or HTTP_201_CREATED depending on the API) and validate basic
response shape by checking the JSON payload contains expected keys (parse
response.json() and assert presence/values for keys like "folder_name" or a list
of uploaded flows or a success flag). Locate the client.post call and replace
the negative assertions with a positive status assertion and one or two
lightweight JSON checks to confirm the upload succeeded.


async def test_upload_flow_rejects_absolute_path(client: AsyncClient, logged_in_headers):
"""Test that uploading flows with absolute paths is rejected."""
import json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
SpanStatus,
SpanTable,
SpanType,
TraceSummaryRead,
TraceTable,
)
from sqlalchemy import Enum as SQLEnum
Expand Down Expand Up @@ -114,3 +115,58 @@ def test_bind_processor_emits_enum_value(self, member):
bind_processor = enum_type.bind_processor(dialect)
bound = bind_processor(member) if bind_processor else member
assert bound == member.value


_TRACE_SUMMARY_DEFAULTS: dict = {
"id": "00000000-0000-0000-0000-000000000001",
"name": "t",
"status": SpanStatus.OK,
"start_time": None,
"total_latency_ms": 0,
"total_tokens": 0,
"flow_id": "00000000-0000-0000-0000-000000000002",
"session_id": "s",
}


class TestTraceSummaryReadIoFields:
"""Regression: input/output accept the '[Unserializable Object]' sentinel string.

When a trace's input or output contains a non-JSON-serializable object the
serialization layer stores the sentinel string ``'[Unserializable Object]'``
instead of a dict. ``TraceSummaryRead`` must accept that value without
raising a ``ValidationError`` so the list endpoint never returns a 500.
"""

def test_should_accept_dict_input(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "input": {"key": "value"}})
assert summary.input == {"key": "value"}

def test_should_accept_dict_output(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "output": {"result": 1}})
assert summary.output == {"result": 1}

def test_should_accept_none_input(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "input": None})
assert summary.input is None

def test_should_accept_none_output(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "output": None})
assert summary.output is None

def test_should_accept_unserializable_sentinel_as_input(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "input": "[Unserializable Object]"})
assert summary.input == "[Unserializable Object]"

def test_should_accept_unserializable_sentinel_as_output(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "output": "[Unserializable Object]"})
assert summary.output == "[Unserializable Object]"

def test_should_accept_arbitrary_string_as_input(self):
summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "input": "plain string"})
assert summary.input == "plain string"
Comment on lines +165 to +167
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add the symmetric arbitrary-string case for output.

Line 165 adds arbitrary string coverage for input, but output (also widened to str | dict | None) doesn’t have the same regression guard.

Suggested test addition
 class TestTraceSummaryReadIoFields:
@@
     def test_should_accept_arbitrary_string_as_input(self):
         summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "input": "plain string"})
         assert summary.input == "plain string"
 
+    def test_should_accept_arbitrary_string_as_output(self):
+        summary = TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "output": "plain string"})
+        assert summary.output == "plain string"
+
     def test_should_default_input_and_output_to_none(self):
         summary = TraceSummaryRead(**_TRACE_SUMMARY_DEFAULTS)
         assert summary.input is None
         assert summary.output is None

As per coding guidelines: “Verify tests cover both positive and negative scenarios where appropriate.”

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@src/backend/tests/unit/services/database/models/traces/test_enum_serialization.py`
around lines 165 - 167, Add a symmetric test to cover arbitrary-string values
for the output field: create a new test (e.g.,
test_should_accept_arbitrary_string_as_output) that constructs
TraceSummaryRead(**{**_TRACE_SUMMARY_DEFAULTS, "output": "plain string"}) and
asserts summary.output == "plain string"; this mirrors the existing
test_should_accept_arbitrary_string_as_input and ensures the widened type for
output (str | dict | None) accepts plain strings without regression.


def test_should_default_input_and_output_to_none(self):
summary = TraceSummaryRead(**_TRACE_SUMMARY_DEFAULTS)
assert summary.input is None
assert summary.output is None
Loading