Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ def _import_chromadb() -> ModuleType:

def _import_embedding_fn():
try:
from chromadb.utils.embedding_functions import (
ONNXMiniLM_L6_V2,
)
from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2

return ONNXMiniLM_L6_V2
except ImportError as e:
Expand Down
225 changes: 225 additions & 0 deletions backend/syft_space/components/dataset_types/weaviate_remote/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""Filter schemas and logic for Remote Weaviate dataset type."""

from datetime import datetime, timezone
from enum import Enum
from typing import Annotated, Any, Literal, Union

from pydantic import BaseModel, Field, model_validator


class FilterOperator(str, Enum):
"""Comparison operators for property filters."""

EQUAL = "eq"
NOT_EQUAL = "ne"
GREATER_THAN = "gt"
GREATER_OR_EQUAL = "gte"
LESS_THAN = "lt"
LESS_OR_EQUAL = "lte"
LIKE = "like"
IS_NONE = "is_none"
CONTAINS_ANY = "contains_any"
CONTAINS_ALL = "contains_all"


class FilterValueDtype(str, Enum):
"""Value type categories for filter conditions (GA-style)."""

STRING = "string"
NUMERIC = "numeric"
DATETIME = "datetime"
BOOLEAN = "boolean"


class LogicalOperator(str, Enum):
"""Logical operators for combining filter conditions."""

AND = "and"
OR = "or"
NOT = "not"


def _coerce_single_value(
raw: Any, dtype: "FilterValueDtype"
) -> str | int | float | bool | datetime:
"""Coerce a single raw value to the target dtype."""
if dtype == FilterValueDtype.STRING:
return str(raw)
elif dtype == FilterValueDtype.NUMERIC:
s = str(raw).strip()
if "." in s:
return float(s)
return int(s)
elif dtype == FilterValueDtype.BOOLEAN:
if isinstance(raw, bool):
return raw
s = str(raw).lower().strip()
if s in ("true", "1", "yes"):
return True
if s in ("false", "0", "no"):
return False
raise ValueError(f"Cannot coerce '{raw}' to boolean")
elif dtype == FilterValueDtype.DATETIME:
if isinstance(raw, datetime):
return raw
s = str(raw).strip()
dt = datetime.fromisoformat(s)
# Ensure timezone-aware (Weaviate requires it); default to UTC
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt
raise ValueError(f"Unknown dtype: {dtype}")


DTYPE_ALLOWED_OPERATORS: dict[FilterValueDtype, list[FilterOperator]] = {
FilterValueDtype.STRING: [
FilterOperator.EQUAL,
FilterOperator.NOT_EQUAL,
FilterOperator.LIKE,
FilterOperator.CONTAINS_ANY,
FilterOperator.CONTAINS_ALL,
FilterOperator.IS_NONE,
],
FilterValueDtype.NUMERIC: [
FilterOperator.EQUAL,
FilterOperator.NOT_EQUAL,
FilterOperator.GREATER_THAN,
FilterOperator.GREATER_OR_EQUAL,
FilterOperator.LESS_THAN,
FilterOperator.LESS_OR_EQUAL,
FilterOperator.CONTAINS_ANY,
FilterOperator.CONTAINS_ALL,
FilterOperator.IS_NONE,
],
FilterValueDtype.DATETIME: [
FilterOperator.EQUAL,
FilterOperator.NOT_EQUAL,
FilterOperator.GREATER_THAN,
FilterOperator.GREATER_OR_EQUAL,
FilterOperator.LESS_THAN,
FilterOperator.LESS_OR_EQUAL,
FilterOperator.IS_NONE,
],
FilterValueDtype.BOOLEAN: [
FilterOperator.EQUAL,
FilterOperator.NOT_EQUAL,
FilterOperator.IS_NONE,
],
}


class FilterCondition(BaseModel):
"""A single property filter condition."""

type: Literal["condition"] = "condition"
property: str = Field(..., description="Weaviate property name to filter on")
op: FilterOperator = Field(..., description="Comparison operator")
value_dtype: FilterValueDtype = Field(
default=FilterValueDtype.STRING,
description="Value type category: string, numeric, datetime, or boolean.",
)
value: str | int | float | bool | datetime | list[str | int | float | datetime] = (
Field(
...,
description="Value to compare against. Use a list for contains_any/contains_all.",
)
)

@model_validator(mode="after")
def validate_and_coerce(self) -> "FilterCondition":
"""Validate operator is allowed for the dtype and coerce value."""
allowed_ops = DTYPE_ALLOWED_OPERATORS.get(self.value_dtype, [])
if self.op not in allowed_ops:
raise ValueError(
f"operator '{self.op.value}' not allowed for dtype "
f"'{self.value_dtype.value}'. Allowed: {[o.value for o in allowed_ops]}"
)

try:
if self.op in (FilterOperator.CONTAINS_ANY, FilterOperator.CONTAINS_ALL):
# Ensure value is a list; split comma-separated string if needed
if isinstance(self.value, str):
items = [v.strip() for v in self.value.split(",") if v.strip()]
elif isinstance(self.value, list):
items = self.value
else:
items = [self.value]
self.value = [_coerce_single_value(v, self.value_dtype) for v in items]
else:
self.value = _coerce_single_value(self.value, self.value_dtype)
except (ValueError, TypeError) as e:
raise ValueError(
f"Cannot coerce value '{self.value}' to {self.value_dtype.value}: {e}"
) from e

return self


class FilterGroup(BaseModel):
"""A group of conditions combined with a logical operator.

Supports 1 level of nesting: operands can be conditions or sub-groups,
but sub-groups can only contain conditions.
"""

type: Literal["group"] = "group"
op: LogicalOperator = Field(..., description="Logical operator to combine operands")
operands: list[
Annotated[Union[FilterCondition, "FilterGroup"], Field(discriminator="type")]
] = Field(..., description="Conditions or sub-groups to combine")

@model_validator(mode="after")
def validate_nesting_depth(self) -> "FilterGroup":
"""Ensure max 1 level of nesting."""
for operand in self.operands:
if isinstance(operand, FilterGroup):
for inner in operand.operands:
if isinstance(inner, FilterGroup):
raise ValueError("Max 1 level of filter nesting allowed")
return self


FilterGroup.model_rebuild()

WeaviateFilter = Annotated[FilterCondition | FilterGroup, Field(discriminator="type")]

OPERATOR_MAP: dict[FilterOperator, str] = {
FilterOperator.EQUAL: "equal",
FilterOperator.NOT_EQUAL: "not_equal",
FilterOperator.GREATER_THAN: "greater_than",
FilterOperator.GREATER_OR_EQUAL: "greater_or_equal",
FilterOperator.LESS_THAN: "less_than",
FilterOperator.LESS_OR_EQUAL: "less_or_equal",
FilterOperator.LIKE: "like",
FilterOperator.IS_NONE: "is_none",
FilterOperator.CONTAINS_ANY: "contains_any",
FilterOperator.CONTAINS_ALL: "contains_all",
}


def build_filter_node(node: FilterCondition | FilterGroup, filter_cls: Any) -> Any:
"""Recursively build a Weaviate Filter object from a filter node.

Args:
node: A FilterCondition or FilterGroup to convert.
filter_cls: The Weaviate ``Filter`` class (passed in to avoid
a hard dependency on the weaviate package at import time).

Returns:
A Weaviate _Filters object ready to pass to a query.
"""
if isinstance(node, FilterCondition):
prop_filter = filter_cls.by_property(node.property)
method = getattr(prop_filter, OPERATOR_MAP[node.op])
return method(node.value)

# FilterGroup
built = [build_filter_node(op, filter_cls) for op in node.operands]

if node.op == LogicalOperator.AND:
return filter_cls.all_of(built)
elif node.op == LogicalOperator.OR:
return filter_cls.any_of(built)
else: # NOT
inner = filter_cls.all_of(built) if len(built) > 1 else built[0]
return filter_cls.not_(inner)
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
SearchParameters,
SearchResult,
)
from syft_space.components.dataset_types.weaviate_remote.filters import (
WeaviateFilter,
build_filter_node,
)
from syft_space.components.shared.domain_types import (
HealthcheckResponse,
HealthcheckStatus,
Expand All @@ -21,7 +25,7 @@
try:
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.query import MetadataQuery
from weaviate.classes.query import Filter, MetadataQuery

enabled = True
except ImportError:
Expand Down Expand Up @@ -54,6 +58,10 @@ class RemoteWeaviateConfiguration(BaseModel):
default=None,
description="Properties to include in metadata (e.g., ['title', 'author']). If not specified, all properties are included.",
)
filters: WeaviateFilter | None = Field(
default=None,
description="Filter applied when searching. Single condition or group with and/or/not.",
)


class RemoteWeaviateDatasetType(BaseDatasetType):
Expand Down Expand Up @@ -112,15 +120,34 @@ async def validate_configuration(cls, configuration: dict[str, Any]) -> None:
configuration: Configuration dictionary to validate
"""
try:
RemoteWeaviateConfiguration.model_validate(configuration)
config = RemoteWeaviateConfiguration.model_validate(configuration)
except ValidationError as e:
raise ValueError(f"Invalid configuration: {e}") from e

# Validate filters can be built into Weaviate filter objects
if config.filters and enabled:
try:
instance = cls(configuration)
instance._build_weaviate_filters()
except Exception as e:
raise ValueError(f"Invalid filter configuration: {e}") from e

@property
def collection_name(self) -> str:
"""Get the name of the collection."""
return self.config.collection_name

def _build_weaviate_filters(self) -> Any:
"""Build Weaviate Filter objects from configured filter conditions.

Returns:
A Weaviate _Filters object, or None if no filters are configured.
"""
if not self.config.filters:
return None

return build_filter_node(self.config.filters, Filter)

async def search(
self, ctx: SearchContext, query: str, params: SearchParameters | None = None
) -> SearchResult:
Expand Down Expand Up @@ -161,10 +188,13 @@ async def search(
# Get the collection
collection = client.collections.get(self.collection_name)

weaviate_filters = self._build_weaviate_filters()

results = await collection.query.near_text(
query=query,
limit=params.limit,
certainty=similarity_threshold,
filters=weaviate_filters,
return_metadata=MetadataQuery(
distance=True, score=True, creation_time=True
),
Expand Down
4 changes: 1 addition & 3 deletions backend/syft_space/components/marketplaces/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@
RegisterMarketplaceRequest,
TransactionResponse,
)
from syft_space.components.marketplaces.utils import (
ensure_valid_accounting_credentials,
)
from syft_space.components.marketplaces.utils import ensure_valid_accounting_credentials
from syft_space.components.shared.syfthub_client import SyftHubClient, SyftHubError
from syft_space.components.tenants.entities import Tenant
from syft_space.config import app_settings
Expand Down
36 changes: 9 additions & 27 deletions backend/syft_space/components/shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ def matches_any_pattern(value: str, patterns: list[str]) -> bool:


class ConfigSchemaGenerator(GenerateJsonSchema):
"""Generates clean configuration schemas without $defs and class metadata.
"""Generates configuration schemas for frontend dynamic form rendering.

Produces a simplified schema with just 'properties' and 'required',
with all $ref references inlined. This is useful for frontend forms
and API documentation where the full JSON Schema complexity isn't needed.
Produces a simplified schema with 'properties', 'required', and '$defs'.
The $defs section is preserved so $ref pointers remain valid — the frontend
resolver handles $ref lookup at render time.

Usage:
from syft_space.components.shared.utils import ConfigSchemaGenerator
Expand All @@ -55,32 +55,14 @@ def generate(self, schema: Any, mode: str = "validation") -> dict[str, Any]:
mode: Schema generation mode ('validation' or 'serialization')

Returns:
Simplified schema with just 'properties' and 'required'
Schema with 'properties', 'required', and '$defs' (if present)
"""
json_schema = super().generate(schema, mode)

# Inline $defs into properties (removes need for $ref)
defs = json_schema.pop("$defs", {})
for prop_schema in json_schema.get("properties", {}).values():
if "$ref" in prop_schema:
ref_name = prop_schema["$ref"].split("/")[-1]
if ref_name in defs:
# Get the referenced schema
ref_schema = defs[ref_name].copy()
# Preserve description and default from the property
desc = prop_schema.get("description")
default = prop_schema.get("default")
# Replace property with inlined schema
prop_schema.clear()
prop_schema.update(ref_schema)
# Restore property-level overrides
if desc:
prop_schema["description"] = desc
if default is not None:
prop_schema["default"] = default

# Return only properties and required fields
return {
result: dict[str, Any] = {
"properties": json_schema.get("properties", {}),
"required": json_schema.get("required", []),
}
if "$defs" in json_schema:
result["$defs"] = json_schema["$defs"]
return result
5 changes: 1 addition & 4 deletions frontend/src/composables/useDatasetBrowser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,7 @@ export function useDatasetBrowser() {
}

const loadSubdirectory = async (parentNode: FileNode) => {
if (
parentNode.type !== 'directory' ||
(parentNode.hasLoaded && !parentNode.permissionDenied)
) {
if (parentNode.type !== 'directory' || (parentNode.hasLoaded && !parentNode.permissionDenied)) {
return
}

Expand Down
Loading