Timescaledb: handle schemas (#584)

alxtkr77 · Alex Toker · web-flow · commit 8d277f48d5a9 · 2025-09-03T16:00:28.000+07:00
* Timescaledb: handle schemas

* Fix docstring and make table name mandatory

---------

Co-authored-by: Alex Toker &lt;alext@mckinsey.com&gt;
diff --git a/storey/timescaledb_target.py b/storey/timescaledb_target.py
@@ -36,8 +36,8 @@ class TimescaleDBTarget(_Batching, _Writer):
         If not provided, timestamps will be parsed according to ISO-8601 format. Common formats include:
         "%Y-%m-%d %H:%M:%S", "%d/%m/%y %H:%M:%S UTC%z", etc.
     :param table: Name of the TimescaleDB hypertable where events will be written. The table must exist and be
-        configured as a hypertable before writing data. If not specified, the table name should be provided through
-        other means (e.g., via batching configuration).
+        configured as a hypertable before writing data. If the table name contains a '.', it will be interpreted
+        as <schema>.<table> format.
     :param max_events: Maximum number of events to write in a single batch. If None (default), all events will be
         written on flow termination, or after flush_after_seconds (if flush_after_seconds is set). Larger batches
         improve write performance but increase memory usage.
@@ -76,8 +76,8 @@ def __init__(
         dsn: str,
         time_col: str,
         columns: list[str],
+        table: str,
         time_format: Optional[str] = None,
-        table: Optional[str] = None,
         **kwargs,
     ) -> None:
 
@@ -112,6 +112,9 @@ def __init__(
         self._dsn = dsn
         self._pool = None  # Connection pool will be created lazily during first use
         self._column_names = self._get_column_names()
+        self._schema = None
+        if "." in self._table:
+            self._schema, self._table = self._table.split(".", 1)
 
     def _init(self):
         """Initialize the target (called synchronously).
@@ -193,7 +196,9 @@ async def _emit(self, batch, batch_key, batch_time, batch_events, last_event_tim
         async with self._pool.acquire() as conn:
             # Use PostgreSQL's COPY protocol for optimal performance
             # This is significantly faster than individual INSERT statements
-            await conn.copy_records_to_table(self._table, records=records, columns=self._column_names)
+            await conn.copy_records_to_table(
+                self._table, schema_name=self._schema, records=records, columns=self._column_names
+            )
 
     async def _terminate(self):
         """Terminate and cleanup resources.