Description
What happened?
When a error is raised between calls to request_start
and request_end
or event_start
and event_end
, then there is no option for the event handler to register that this error occurred.
This is important, because _end
methods are used to cleanup the resources (for example in OTel to mark span as failed and emit it to output). Without this, the handlers are not able to correctly close resources (if any) or register that failures happens.
The potential fix:
Introduce new methods for event handlers called:
request_failed
event_failed
That would capture the failures. The caller can invoke a method based on the execution flow. Moreover, event_failed
does not correspond to request_failed
, because if there was a failure for retrieving data for a collection, but a fallback worked, then we can have following event chain:
- start -> retrieve from A (failed) -> retrieve from B (success) -> finish (success)
- start -> retrieve from A (failed) -> retrieve from B (failed) -> finish (failed)
This would also be a good point in introducing error-as-value returns (like in Go, Scala, Haskell etc) when we would return either result or a failure (not necessarily a monad ;) ) to explicitly marked in the type system which operation are expected to failed and to force user to handle these errors (so they won't forget to call event_failed
for example).
How can we reproduce it?
import asyncio
from typing import Annotated, List
import dbally
import pandas as pd
from dbally import DataFrameBaseView
from dbally.audit import CLIEventHandler
from dbally.audit.event_handlers.otel_event_handler import OtelEventHandler
from dbally.nl_responder.nl_responder import NLResponder
from dbally.similarity import SimilarityFetcher, FaissStore
from dbally.embeddings.litellm import LiteLLMEmbeddingClient
from dbally.similarity.index import SimilarityIndex
from dbally.views import decorators
from dbally.llms import LiteLLM
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.resources import Resource
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
ConsoleSpanExporter,
)
class DFFetcher(SimilarityFetcher):
def __init__(self, df, column) -> None:
self.df = df
self.column = column
async def fetch(self) -> List[str]:
return self.df[self.column].unique().tolist()
async def main():
exporeter = OTLPSpanExporter("http://localhost:4317", insecure=True)
provider = TracerProvider(resource=Resource({"service.name": "db-ally"}))
processor = BatchSpanProcessor(exporeter)
provider.add_span_processor(processor)
df = pd.DataFrame({
"name": ["Alice", "Bob", "Charlie", "David", "Eve"],
"city": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"],
})
fetcher = DFFetcher(df, "city")
print(await fetcher.fetch())
country_store = FaissStore(
index_dir="./similarity_indexes",
index_name="country_similarity",
embedding_client=LiteLLMEmbeddingClient(
model="text-embedding-3-small", # to use openai embedding model
),
)
city_similarity = SimilarityIndex(
fetcher=fetcher,
store=country_store,
)
await city_similarity.update()
class MyView(DataFrameBaseView):
@decorators.view_filter()
def filter_by_city(self, city: Annotated[str, city_similarity]):
raise ValueError("ups - db error")
return self.df['city'] == city
llm = LiteLLM(model_name="gpt-4o")
collection = dbally.create_collection(
"clients",
llm=llm,
event_handlers=[OtelEventHandler(provider)],
nl_responder=NLResponder(llm)
)
collection.add(MyView, lambda: MyView(df))
result = await collection.ask("What clients are from LA?", return_natural_response=True)
print(result)
if __name__ == '__main__':
asyncio.run(main())
Relevant log output
Traceback (most recent call last):
File "/home/bartoszmikulski/Repos/db-ally/error.py", line 86, in <module>
asyncio.run(main())
File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
return future.result()
File "/home/bartoszmikulski/Repos/db-ally/error.py", line 80, in main
result = await collection.ask("What clients are from LA?", return_natural_response=True)
File "/home/bartoszmikulski/Repos/db-ally/src/dbally/collection/collection.py", line 204, in ask
view_result = await view.ask(
File "/home/bartoszmikulski/Repos/db-ally/src/dbally/views/structured.py", line 72, in ask
await self.apply_filters(iql)
File "/home/bartoszmikulski/Repos/db-ally/src/dbally/views/pandas_base.py", line 37, in apply_filters
self._filter_mask = await self.build_filter_node(filters.root)
File "/home/bartoszmikulski/Repos/db-ally/src/dbally/views/pandas_base.py", line 54, in build_filter_node
return await self.call_filter_method(node)
File "/home/bartoszmikulski/Repos/db-ally/src/dbally/views/methods_base.py", line 101, in call_filter_method
return method(*args)
File "/home/bartoszmikulski/Repos/db-ally/error.py", line 67, in filter_by_city
raise ValueError("ups - db error")
ValueError: ups - db error
Metadata
Metadata
Assignees
Labels
Type
Projects
Status