Skip to content

Add baseline infrastructure for emulating materialized views #56

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
deploy/start/resume and data import procedures using fluent API and CLI.
- Cloud API: Added JWT authentication to client API and `ctk shell`.
- Cloud API: Added `health` and `ping` subcommands to `ctk cluster`
- Add baseline infrastructure for emulating materialized views.

**Breaking changes**

Expand Down
Empty file.
65 changes: 65 additions & 0 deletions cratedb_toolkit/materialized/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2023-2025, Crate.io Inc.
# Distributed under the terms of the AGPLv3 license, see LICENSE.
import logging

import sqlalchemy as sa

from cratedb_toolkit.materialized.model import MaterializedViewSettings
from cratedb_toolkit.materialized.store import MaterializedViewStore
from cratedb_toolkit.model import TableAddress

logger = logging.getLogger(__name__)


class MaterializedViewManager:
"""
The main application, implementing basic synthetic materialized views.
"""

def __init__(self, settings: MaterializedViewSettings):
# Runtime context settings.
self.settings = settings

# Materialized store API.
self.store = MaterializedViewStore(settings=self.settings)

def refresh(self, name: str):
"""
Resolve a materialized view and refresh it.
"""
logger.info(f"Refreshing materialized view: {name}")

table_address = TableAddress.from_string(name)
mview = self.store.get_by_table(table_address)
logger.info(f"Loaded materialized view definition: {mview}")

sql_ddl = f"DROP TABLE IF EXISTS {mview.staging_table_fullname}"
logger.info(f"Dropping materialized view (staging): {sql_ddl}")
self.store.execute(sa.text(sql_ddl))

sql_ddl = f"CREATE TABLE IF NOT EXISTS {mview.staging_table_fullname} AS (\n{mview.sql}\n)"
logger.info(f"Creating materialized view (staging): {sql_ddl}")
self.store.execute(sa.text(sql_ddl))
sql_refresh = f"REFRESH TABLE {mview.staging_table_fullname}"
self.store.execute(sa.text(sql_refresh))

sql_ddl = f"CREATE TABLE IF NOT EXISTS {mview.table_fullname} (dummy INT)"
logger.info(f"Creating materialized view (live): {sql_ddl}")
self.store.execute(sa.text(sql_ddl))

# TODO: Use `ALTER TABLE ... RENAME TO ...` after resolving issue.
# SQLParseException[Target table name must not include a schema]
# https://github.com/crate/crate/issues/14833
# CrateDB does not support renaming to a different schema, thus the target
# table identifier must not include a schema. This is an artificial limitation.
# Technically, it can be done.
# https://github.com/crate/crate/blob/5.3.3/server/src/main/java/io/crate/analyze/AlterTableAnalyzer.java#L97-L102
sql_ddl = f"ALTER CLUSTER SWAP TABLE {mview.staging_table_fullname} TO {mview.table_fullname}"
logger.info(f"Activating materialized view: {sql_ddl}")
self.store.execute(sa.text(sql_ddl))
sql_refresh = f"REFRESH TABLE {mview.table_fullname}"
self.store.execute(sa.text(sql_refresh))

sql_ddl = f"DROP TABLE IF EXISTS {mview.staging_table_fullname}"
logger.info(f"Dropping materialized view (staging): {sql_ddl}")
self.store.execute(sa.text(sql_ddl))
98 changes: 98 additions & 0 deletions cratedb_toolkit/materialized/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) 2023, Crate.io Inc.
# Distributed under the terms of the AGPLv3 license, see LICENSE.
import dataclasses
import os
import typing as t

from sqlalchemy_cratedb.support import quote_relation_name

from cratedb_toolkit.model import DatabaseAddress, TableAddress


@dataclasses.dataclass
class MaterializedView:
"""
Manage the database representation of a "materialized view" entity.

This layout has to be synchronized with the corresponding table definition
per SQL DDL statement within `schema.sql`.
"""

table_schema: t.Optional[str] = dataclasses.field(
default=None,
metadata={"help": "The target table schema"},
)
table_name: t.Optional[str] = dataclasses.field(
default=None,
metadata={"help": "The target table name"},
)
sql: t.Optional[str] = dataclasses.field(
default=None,
metadata={"help": "The SQL statement defining the emulated materialized view"},
)

id: t.Optional[str] = dataclasses.field( # noqa: A003
default=None,
metadata={"help": "The materialized view identifier"},
)

@property
def table_fullname(self) -> str:
return quote_relation_name(f"{self.table_schema}.{self.table_name}")

@property
def staging_table_fullname(self) -> str:
return quote_relation_name(f"{self.table_schema}-staging.{self.table_name}")

@classmethod
def from_record(cls, record) -> "MaterializedView":
return cls(**record)

def to_storage_dict(self, identifier: t.Optional[str] = None) -> t.Dict[str, str]:
"""
Return representation suitable for storing into a database table using SQLAlchemy.

Args:
identifier: If provided, this will override any existing id in the instance.
"""

# Serialize to dictionary.
data = dataclasses.asdict(self)

# Optionally add identifier.
if identifier is not None:
# Explicitly override any existing id.
data["id"] = identifier

return data


def default_table_address():
"""
The default address of the materialized view management table.
"""
schema = os.environ.get("CRATEDB_EXT_SCHEMA", "ext")
return TableAddress(schema=schema, table="materialized_view")

Check warning on line 75 in cratedb_toolkit/materialized/model.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/materialized/model.py#L74-L75

Added lines #L74 - L75 were not covered by tests


@dataclasses.dataclass
class MaterializedViewSettings:
"""
Bundle all configuration and runtime settings.
"""

# Database connection URI.
database: DatabaseAddress = dataclasses.field(
default_factory=lambda: DatabaseAddress.from_string("crate://localhost/")
)

# The address of the materialized view table.
materialized_table: TableAddress = dataclasses.field(default_factory=default_table_address)

# Only pretend to invoke statements.
dry_run: t.Optional[bool] = False

def to_dict(self):
data = dataclasses.asdict(self)
data["materialized_table"] = self.materialized_table
return data
30 changes: 30 additions & 0 deletions cratedb_toolkit/materialized/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Materialized Views Baseline Infrastructure

## About

This subsystem provides a foundation for emulating materialized views in CrateDB.
It addresses the need to optimize query performance by caching the results of
complex or resource-intensive SQL queries in regular tables that can be refreshed
on a scheduled basis.

The subsystem emulates materialized views so that queries that take a long
time to run can be cached, which is specifically useful when applied in
scenarios with high traffic in reads. This approach can significantly reduce
the database load and improve response times for frequently accessed data.

## Features

- Create and manage materialized view definitions
- Refresh materialized views on demand or on schedule
- Track metadata about materialized views for management purposes
- Support for different refresh strategies

## Prior Art

- https://github.com/nroi/elfenbein
- https://github.com/nroi/pg_materialized_views_refresh_topologically
- https://github.com/darkside/monocle
- https://github.com/maggregor/maggregor
- https://github.com/adamfoneil/ViewMaterializer
- https://github.com/jhollinger/activerecord-viewmatic
- https://github.com/q-m/metabase-matview
36 changes: 36 additions & 0 deletions cratedb_toolkit/materialized/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) 2021-2025, Crate.io Inc.
# Distributed under the terms of the AGPLv3 license, see LICENSE.
import logging
from importlib.resources import read_text

from cratedb_toolkit.materialized.model import MaterializedViewSettings
from cratedb_toolkit.util.database import run_sql

logger = logging.getLogger(__name__)


def setup_schema(settings: MaterializedViewSettings):
"""
Set up the materialized view management table schema.

TODO: Refactor to `store` module.
"""

logger.info(
f"Installing materialized view management table at "
f"database '{settings.database.safe}', table {settings.materialized_table}"
)

# Read SQL DDL statement.
sql = read_text("cratedb_toolkit.materialized", "schema.sql")

tplvars = settings.to_dict()
sql = sql.format_map(tplvars)

if settings.dry_run:
logger.info(f"Pretending to execute SQL statement:\n{sql}")
return

Check warning on line 32 in cratedb_toolkit/materialized/schema.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/materialized/schema.py#L31-L32

Added lines #L31 - L32 were not covered by tests

# Materialize table schema.
run_sql(settings.database.dburi, sql)
run_sql(settings.database.dburi, f"REFRESH TABLE {settings.materialized_table.fullname}")
13 changes: 13 additions & 0 deletions cratedb_toolkit/materialized/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- Set up the materialized view management database table schema.
CREATE TABLE IF NOT EXISTS {materialized_table.fullname} (

"id" TEXT NOT NULL PRIMARY KEY,

-- Target: The database table to be populated.
"table_schema" TEXT, -- The source table schema.
"table_name" TEXT, -- The source table name.

-- The SQL statement defining the emulated materialized view.
"sql" TEXT

);
Loading