Skip to content

Commit 1025b95

Browse files
committed
Add baseline infrastructure for emulating materialized views
1 parent 8876fb1 commit 1025b95

File tree

14 files changed

+717
-1
lines changed

14 files changed

+717
-1
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
deploy/start/resume and data import procedures using fluent API and CLI.
1111
- Cloud API: Added JWT authentication to client API and `ctk shell`.
1212
- Cloud API: Added `health` and `ping` subcommands to `ctk cluster`
13+
- Add baseline infrastructure for emulating materialized views.
1314

1415
**Breaking changes**
1516

cratedb_toolkit/materialized/__init__.py

Whitespace-only changes.

cratedb_toolkit/materialized/core.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright (c) 2023-2025, Crate.io Inc.
2+
# Distributed under the terms of the AGPLv3 license, see LICENSE.
3+
import logging
4+
5+
import sqlalchemy as sa
6+
7+
from cratedb_toolkit.materialized.model import MaterializedViewSettings
8+
from cratedb_toolkit.materialized.store import MaterializedViewStore
9+
from cratedb_toolkit.model import TableAddress
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class MaterializedViewManager:
15+
"""
16+
The main application, implementing basic synthetic materialized views.
17+
"""
18+
19+
def __init__(self, settings: MaterializedViewSettings):
20+
# Runtime context settings.
21+
self.settings = settings
22+
23+
# Materialized store API.
24+
self.store = MaterializedViewStore(settings=self.settings)
25+
26+
def refresh(self, name: str):
27+
"""
28+
Resolve a materialized view and refresh it.
29+
"""
30+
logger.info(f"Refreshing materialized view: {name}")
31+
32+
table_address = TableAddress.from_string(name)
33+
mview = self.store.get_by_table(table_address)
34+
logger.info(f"Loaded materialized view definition: {mview}")
35+
36+
sql_ddl = f"DROP TABLE IF EXISTS {mview.staging_table_fullname}"
37+
logger.info(f"Dropping materialized view (staging): {sql_ddl}")
38+
self.store.execute(sa.text(sql_ddl))
39+
40+
sql_ddl = f"CREATE TABLE IF NOT EXISTS {mview.staging_table_fullname} AS (\n{mview.sql}\n)"
41+
logger.info(f"Creating materialized view (staging): {sql_ddl}")
42+
self.store.execute(sa.text(sql_ddl))
43+
sql_refresh = f"REFRESH TABLE {mview.staging_table_fullname}"
44+
self.store.execute(sa.text(sql_refresh))
45+
46+
sql_ddl = f"CREATE TABLE IF NOT EXISTS {mview.table_fullname} (dummy INT)"
47+
logger.info(f"Creating materialized view (live): {sql_ddl}")
48+
self.store.execute(sa.text(sql_ddl))
49+
50+
# TODO: Use `ALTER TABLE ... RENAME TO ...` after resolving issue.
51+
# SQLParseException[Target table name must not include a schema]
52+
# https://github.com/crate/crate/issues/14833
53+
# CrateDB does not support renaming to a different schema, thus the target
54+
# table identifier must not include a schema. This is an artificial limitation.
55+
# Technically, it can be done.
56+
# https://github.com/crate/crate/blob/5.3.3/server/src/main/java/io/crate/analyze/AlterTableAnalyzer.java#L97-L102
57+
sql_ddl = f"ALTER CLUSTER SWAP TABLE {mview.staging_table_fullname} TO {mview.table_fullname}"
58+
logger.info(f"Activating materialized view: {sql_ddl}")
59+
self.store.execute(sa.text(sql_ddl))
60+
sql_refresh = f"REFRESH TABLE {mview.table_fullname}"
61+
self.store.execute(sa.text(sql_refresh))
62+
63+
sql_ddl = f"DROP TABLE IF EXISTS {mview.staging_table_fullname}"
64+
logger.info(f"Dropping materialized view (staging): {sql_ddl}")
65+
self.store.execute(sa.text(sql_ddl))

cratedb_toolkit/materialized/model.py

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright (c) 2023, Crate.io Inc.
2+
# Distributed under the terms of the AGPLv3 license, see LICENSE.
3+
import dataclasses
4+
import os
5+
import typing as t
6+
7+
from sqlalchemy_cratedb.support import quote_relation_name
8+
9+
from cratedb_toolkit.model import DatabaseAddress, TableAddress
10+
11+
12+
@dataclasses.dataclass
13+
class MaterializedView:
14+
"""
15+
Manage the database representation of a "materialized view" entity.
16+
17+
This layout has to be synchronized with the corresponding table definition
18+
per SQL DDL statement within `schema.sql`.
19+
"""
20+
21+
table_schema: t.Optional[str] = dataclasses.field(
22+
default=None,
23+
metadata={"help": "The target table schema"},
24+
)
25+
table_name: t.Optional[str] = dataclasses.field(
26+
default=None,
27+
metadata={"help": "The target table name"},
28+
)
29+
sql: t.Optional[str] = dataclasses.field(
30+
default=None,
31+
metadata={"help": "The SQL statement defining the emulated materialized view"},
32+
)
33+
34+
id: t.Optional[str] = dataclasses.field( # noqa: A003
35+
default=None,
36+
metadata={"help": "The materialized view identifier"},
37+
)
38+
39+
@property
40+
def table_fullname(self) -> str:
41+
return quote_relation_name(f"{self.table_schema}.{self.table_name}")
42+
43+
@property
44+
def staging_table_fullname(self) -> str:
45+
return quote_relation_name(f"{self.table_schema}-staging.{self.table_name}")
46+
47+
@classmethod
48+
def from_record(cls, record) -> "MaterializedView":
49+
return cls(**record)
50+
51+
def to_storage_dict(self, identifier: t.Optional[str] = None) -> t.Dict[str, str]:
52+
"""
53+
Return representation suitable for storing into a database table using SQLAlchemy.
54+
55+
Args:
56+
identifier: If provided, this will override any existing id in the instance.
57+
"""
58+
59+
# Serialize to dictionary.
60+
data = dataclasses.asdict(self)
61+
62+
# Optionally add identifier.
63+
if identifier is not None:
64+
# Explicitly override any existing id.
65+
data["id"] = identifier
66+
67+
return data
68+
69+
70+
def default_table_address():
71+
"""
72+
The default address of the materialized view management table.
73+
"""
74+
schema = os.environ.get("CRATEDB_EXT_SCHEMA", "ext")
75+
return TableAddress(schema=schema, table="materialized_view")
76+
77+
78+
@dataclasses.dataclass
79+
class MaterializedViewSettings:
80+
"""
81+
Bundle all configuration and runtime settings.
82+
"""
83+
84+
# Database connection URI.
85+
database: DatabaseAddress = dataclasses.field(
86+
default_factory=lambda: DatabaseAddress.from_string("crate://localhost/")
87+
)
88+
89+
# The address of the materialized view table.
90+
materialized_table: TableAddress = dataclasses.field(default_factory=default_table_address)
91+
92+
# Only pretend to invoke statements.
93+
dry_run: t.Optional[bool] = False
94+
95+
def to_dict(self):
96+
data = dataclasses.asdict(self)
97+
data["materialized_table"] = self.materialized_table
98+
return data
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Materialized Views Baseline Infrastructure
2+
3+
## About
4+
5+
This subsystem provides a foundation for emulating materialized views in CrateDB.
6+
It addresses the need to optimize query performance by caching the results of
7+
complex or resource-intensive SQL queries in regular tables that can be refreshed
8+
on a scheduled basis.
9+
10+
The subsystem emulates materialized views so that queries that take a long
11+
time to run can be cached, which is specifically useful when applied in
12+
scenarios with high traffic reads. This approach can significantly reduce
13+
the database load and improve response times for frequently accessed data.
14+
15+
## Features
16+
17+
- Create and manage materialized view definitions
18+
- Refresh materialized views on demand or on schedule
19+
- Track metadata about materialized views for management purposes
20+
- Support for different refresh strategies
21+
22+
## Prior Art
23+
24+
- https://github.com/nroi/elfenbein
25+
- https://github.com/nroi/pg_materialized_views_refresh_topologically
26+
- https://github.com/darkside/monocle
27+
- https://github.com/maggregor/maggregor
28+
- https://github.com/adamfoneil/ViewMaterializer
29+
- https://github.com/jhollinger/activerecord-viewmatic
30+
- https://github.com/q-m/metabase-matview
+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright (c) 2021-2025, Crate.io Inc.
2+
# Distributed under the terms of the AGPLv3 license, see LICENSE.
3+
import logging
4+
from importlib.resources import read_text
5+
6+
from cratedb_toolkit.materialized.model import MaterializedViewSettings
7+
from cratedb_toolkit.util.database import run_sql
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def setup_schema(settings: MaterializedViewSettings):
13+
"""
14+
Set up the materialized view management table schema.
15+
16+
TODO: Refactor to `store` module.
17+
"""
18+
19+
logger.info(
20+
f"Installing materialized view management table at "
21+
f"database '{settings.database.safe}', table {settings.materialized_table}"
22+
)
23+
24+
# Read SQL DDL statement.
25+
sql = read_text("cratedb_toolkit.materialized", "schema.sql")
26+
27+
tplvars = settings.to_dict()
28+
sql = sql.format_map(tplvars)
29+
30+
if settings.dry_run:
31+
logger.info(f"Pretending to execute SQL statement:\n{sql}")
32+
return
33+
34+
# Materialize table schema.
35+
run_sql(settings.database.dburi, sql)
36+
run_sql(settings.database.dburi, f"REFRESH TABLE {settings.materialized_table.fullname}")
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
-- Set up the materialized view management database table schema.
2+
CREATE TABLE IF NOT EXISTS {materialized_table.fullname} (
3+
4+
"id" TEXT NOT NULL PRIMARY KEY,
5+
6+
-- Target: The database table to be populated.
7+
"table_schema" TEXT, -- The source table schema.
8+
"table_name" TEXT, -- The source table name.
9+
10+
-- The SQL statement defining the emulated materialized view.
11+
"sql" TEXT
12+
13+
);

0 commit comments

Comments
 (0)