diff --git a/datasette/app.py b/datasette/app.py index f0349895de..3790b34025 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -354,6 +354,7 @@ def __init__( self.immutables = set(immutables or []) self.databases = collections.OrderedDict() self.actions = {} # .invoke_startup() will populate this + self._column_types = {} # .invoke_startup() will populate this try: self._refresh_schemas_lock = asyncio.Lock() except RuntimeError as rex: @@ -692,12 +693,23 @@ async def invoke_startup(self): action_abbrs[action.abbr] = action self.actions[action.name] = action + # Register column types (classes, not instances) + self._column_types = {} + for hook in pm.hook.register_column_types(datasette=self): + if hook: + for ct_cls in hook: + if ct_cls.name in self._column_types: + raise StartupError(f"Duplicate column type name: {ct_cls.name}") + self._column_types[ct_cls.name] = ct_cls + for hook in pm.hook.prepare_jinja2_environment( env=self._jinja_env, datasette=self ): await await_me_maybe(hook) # Ensure internal tables and metadata are populated before startup hooks await self._refresh_schemas() + # Load column_types from config into internal DB + await self._apply_column_types_config() for hook in pm.hook.startup(datasette=self): await await_me_maybe(hook) self._startup_invoked = True @@ -945,6 +957,102 @@ async def set_column_metadata( [database_name, resource_name, column_name, key, value], ) + # Column types API + + async def _apply_column_types_config(self): + """Load column_types from datasette.json config into the internal DB.""" + import logging + + for db_name, db_conf in (self.config or {}).get("databases", {}).items(): + for table_name, table_conf in db_conf.get("tables", {}).items(): + for col_name, ct in table_conf.get("column_types", {}).items(): + if isinstance(ct, str): + col_type, config = ct, None + else: + col_type = ct["type"] + config = ct.get("config") + if col_type not in self._column_types: + logging.warning( + "column_types config references unknown type %r " + "for %s.%s.%s", + col_type, + db_name, + table_name, + col_name, + ) + await self.set_column_type( + db_name, table_name, col_name, col_type, config + ) + + async def get_column_type(self, database: str, resource: str, column: str): + """ + Return a ColumnType instance (with config baked in) for a specific + column, or None if no column type is assigned. + """ + row = await self.get_internal_database().execute( + "SELECT column_type, config FROM column_types " + "WHERE database_name = ? AND resource_name = ? AND column_name = ?", + [database, resource, column], + ) + rows = row.rows + if not rows: + return None + ct_name, config = rows[0] + ct_cls = self._column_types.get(ct_name) + if ct_cls is None: + return None + return ct_cls(config=json.loads(config) if config else None) + + async def get_column_types(self, database: str, resource: str) -> dict: + """ + Return {column_name: ColumnType instance (with config)} + for all columns with assigned types on the given resource. + """ + rows = await self.get_internal_database().execute( + "SELECT column_name, column_type, config FROM column_types " + "WHERE database_name = ? AND resource_name = ?", + [database, resource], + ) + result = {} + for row in rows.rows: + col_name, ct_name, config = row + ct_cls = self._column_types.get(ct_name) + if ct_cls is not None: + result[col_name] = ct_cls(config=json.loads(config) if config else None) + return result + + async def set_column_type( + self, + database: str, + resource: str, + column: str, + column_type: str, + config: dict = None, + ) -> None: + """Assign a column type. Overwrites any existing assignment.""" + await self.get_internal_database().execute_write( + """INSERT OR REPLACE INTO column_types + (database_name, resource_name, column_name, column_type, config) + VALUES (?, ?, ?, ?, ?)""", + [ + database, + resource, + column, + column_type, + json.dumps(config) if config else None, + ], + ) + + async def remove_column_type( + self, database: str, resource: str, column: str + ) -> None: + """Remove a column type assignment.""" + await self.get_internal_database().execute_write( + "DELETE FROM column_types " + "WHERE database_name = ? AND resource_name = ? AND column_name = ?", + [database, resource, column], + ) + def get_internal_database(self): return self._internal_database diff --git a/datasette/column_types.py b/datasette/column_types.py new file mode 100644 index 0000000000..c41142948a --- /dev/null +++ b/datasette/column_types.py @@ -0,0 +1,44 @@ +class ColumnType: + """ + Base class for column types. + + Subclasses must define ``name`` and ``description`` as class attributes: + + - ``name``: Unique identifier string. Lowercase, no spaces. + Examples: "markdown", "file", "email", "url", "point", "image". + - ``description``: Human-readable label for admin UI dropdowns. + Examples: "Markdown text", "File reference", "Email address". + + Instantiate with an optional ``config`` dict to bind per-column + configuration:: + + ct = MyColumnType(config={"key": "value"}) + ct.config # {"key": "value"} + """ + + name: str + description: str + + def __init__(self, config=None): + self.config = config + + async def render_cell(self, value, column, table, database, datasette, request): + """ + Return an HTML string to render this cell value, or None to + fall through to the default render_cell plugin hook chain. + """ + return None + + async def validate(self, value, datasette): + """ + Validate a value before it is written. Return None if valid, + or a string error message if invalid. + """ + return None + + async def transform_value(self, value, datasette): + """ + Transform a value before it appears in JSON API output. + Return the transformed value. Default: return unchanged. + """ + return value diff --git a/datasette/default_column_types.py b/datasette/default_column_types.py new file mode 100644 index 0000000000..b4ebfcc589 --- /dev/null +++ b/datasette/default_column_types.py @@ -0,0 +1,78 @@ +import json +import re + +import markupsafe + +from datasette import hookimpl +from datasette.column_types import ColumnType + + +class UrlColumnType(ColumnType): + name = "url" + description = "URL" + + async def render_cell(self, value, column, table, database, datasette, request): + if not value or not isinstance(value, str): + return None + escaped = markupsafe.escape(value.strip()) + return markupsafe.Markup(f'{escaped}') + + async def validate(self, value, datasette): + if value is None or value == "": + return None + if not isinstance(value, str): + return "URL must be a string" + if not re.match(r"^https?://\S+$", value.strip()): + return "Invalid URL" + return None + + +class EmailColumnType(ColumnType): + name = "email" + description = "Email address" + + async def render_cell(self, value, column, table, database, datasette, request): + if not value or not isinstance(value, str): + return None + escaped = markupsafe.escape(value.strip()) + return markupsafe.Markup(f'{escaped}') + + async def validate(self, value, datasette): + if value is None or value == "": + return None + if not isinstance(value, str): + return "Email must be a string" + if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", value.strip()): + return "Invalid email address" + return None + + +class JsonColumnType(ColumnType): + name = "json" + description = "JSON data" + + async def render_cell(self, value, column, table, database, datasette, request): + if value is None: + return None + try: + parsed = json.loads(value) if isinstance(value, str) else value + formatted = json.dumps(parsed, indent=2) + escaped = markupsafe.escape(formatted) + return markupsafe.Markup(f"
{escaped}")
+ except (json.JSONDecodeError, TypeError):
+ return None
+
+ async def validate(self, value, datasette):
+ if value is None or value == "":
+ return None
+ if isinstance(value, str):
+ try:
+ json.loads(value)
+ except json.JSONDecodeError:
+ return "Invalid JSON"
+ return None
+
+
+@hookimpl
+def register_column_types(datasette):
+ return [UrlColumnType, EmailColumnType, JsonColumnType]
diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py
index 649019001f..f7bb6ab655 100644
--- a/datasette/hookspecs.py
+++ b/datasette/hookspecs.py
@@ -55,7 +55,17 @@ def publish_subcommand(publish):
@hookspec
-def render_cell(row, value, column, table, pks, database, datasette, request):
+def render_cell(
+ row,
+ value,
+ column,
+ table,
+ pks,
+ database,
+ datasette,
+ request,
+ column_type,
+):
"""Customize rendering of HTML table cell values"""
@@ -74,6 +84,11 @@ def register_actions(datasette):
"""Register actions: returns a list of datasette.permission.Action objects"""
+@hookspec
+def register_column_types(datasette):
+ """Return a list of ColumnType instances"""
+
+
@hookspec
def register_routes(datasette):
"""Register URL routes: return a list of (regex, view_function) pairs"""
diff --git a/datasette/plugins.py b/datasette/plugins.py
index 992137bd5d..b01b386cfa 100644
--- a/datasette/plugins.py
+++ b/datasette/plugins.py
@@ -25,6 +25,7 @@
"datasette.default_permissions",
"datasette.default_permissions.tokens",
"datasette.default_actions",
+ "datasette.default_column_types",
"datasette.default_magic_parameters",
"datasette.blob_renderer",
"datasette.default_menu_links",
diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py
index e4ebdddeea..df1499283c 100644
--- a/datasette/utils/internal_db.py
+++ b/datasette/utils/internal_db.py
@@ -103,6 +103,15 @@ async def initialize_metadata_tables(db):
value text,
unique(database_name, resource_name, column_name, key)
);
+
+ CREATE TABLE IF NOT EXISTS column_types (
+ database_name TEXT NOT NULL,
+ resource_name TEXT NOT NULL,
+ column_name TEXT NOT NULL,
+ column_type TEXT NOT NULL,
+ config TEXT,
+ PRIMARY KEY (database_name, resource_name, column_name)
+ );
"""))
diff --git a/datasette/views/database.py b/datasette/views/database.py
index 93ad8eda01..916cdbc147 100644
--- a/datasette/views/database.py
+++ b/datasette/views/database.py
@@ -1205,6 +1205,7 @@ async def display_rows(datasette, database, request, rows, columns):
database=database,
datasette=datasette,
request=request,
+ column_type=None,
):
candidate = await await_me_maybe(candidate)
if candidate is not None:
diff --git a/datasette/views/row.py b/datasette/views/row.py
index 7cc46368c3..4eacfe49a6 100644
--- a/datasette/views/row.py
+++ b/datasette/views/row.py
@@ -179,26 +179,41 @@ async def template_data():
if "render_cell" in extras:
# Call render_cell plugin hook for each cell
+ ct_map = await self.ds.get_column_types(database, table)
rendered_rows = []
for row in rows:
rendered_row = {}
for value, column in zip(row, columns):
- # Call render_cell plugin hook
+ ct = ct_map.get(column)
plugin_display_value = None
- for candidate in pm.hook.render_cell(
- row=row,
- value=value,
- column=column,
- table=table,
- pks=resolved.pks,
- database=database,
- datasette=self.ds,
- request=request,
- ):
- candidate = await await_me_maybe(candidate)
+ # Try column type render_cell first
+ if ct:
+ candidate = await ct.render_cell(
+ value=value,
+ column=column,
+ table=table,
+ database=database,
+ datasette=self.ds,
+ request=request,
+ )
if candidate is not None:
plugin_display_value = candidate
- break
+ if plugin_display_value is None:
+ for candidate in pm.hook.render_cell(
+ row=row,
+ value=value,
+ column=column,
+ table=table,
+ pks=resolved.pks,
+ database=database,
+ datasette=self.ds,
+ request=request,
+ column_type=ct,
+ ):
+ candidate = await await_me_maybe(candidate)
+ if candidate is not None:
+ plugin_display_value = candidate
+ break
if plugin_display_value:
rendered_row[column] = str(plugin_display_value)
rendered_rows.append(rendered_row)
@@ -352,6 +367,15 @@ async def post(self, request):
update = data["update"]
+ # Validate column types
+ from datasette.views.table import _validate_column_types
+
+ ct_errors = await _validate_column_types(
+ self.ds, resolved.db.name, resolved.table, [update]
+ )
+ if ct_errors:
+ return _error(ct_errors, 400)
+
alter = data.get("alter")
if alter and not await self.ds.allowed(
action="alter-table",
diff --git a/datasette/views/table.py b/datasette/views/table.py
index 2ee8674323..035abb1b9f 100644
--- a/datasette/views/table.py
+++ b/datasette/views/table.py
@@ -134,6 +134,22 @@ async def _redirect_if_needed(datasette, request, resolved):
)
+async def _validate_column_types(datasette, database_name, table_name, rows):
+ """Validate row values against assigned column types. Returns list of error strings."""
+ ct_map = await datasette.get_column_types(database_name, table_name)
+ if not ct_map:
+ return []
+ errors = []
+ for row in rows:
+ for col_name, ct in ct_map.items():
+ if col_name not in row:
+ continue
+ error = await ct.validate(row[col_name], datasette)
+ if error:
+ errors.append(f"{col_name}: {error}")
+ return errors
+
+
async def display_columns_and_rows(
datasette,
database_name,
@@ -163,6 +179,9 @@ async def display_columns_and_rows(
)
)
+ # Look up column types for this table
+ column_types_map = await datasette.get_column_types(database_name, table_name)
+
column_details = {
col.name: col for col in await db.table_column_details(table_name)
}
@@ -179,16 +198,21 @@ async def display_columns_and_rows(
else:
type_ = column_details[r[0]].type
notnull = column_details[r[0]].notnull
- columns.append(
- {
- "name": r[0],
- "sortable": r[0] in sortable_columns,
- "is_pk": r[0] in pks_for_display,
- "type": type_,
- "notnull": notnull,
- "description": column_descriptions.get(r[0]),
- }
- )
+ col_dict = {
+ "name": r[0],
+ "sortable": r[0] in sortable_columns,
+ "is_pk": r[0] in pks_for_display,
+ "type": type_,
+ "notnull": notnull,
+ "description": column_descriptions.get(r[0]),
+ "column_type": None,
+ "column_type_config": None,
+ }
+ ct = column_types_map.get(r[0])
+ if ct:
+ col_dict["column_type"] = ct.name
+ col_dict["column_type_config"] = ct.config
+ columns.append(col_dict)
column_to_foreign_key_table = {
fk["column"]: fk["other_table"]
@@ -227,23 +251,37 @@ async def display_columns_and_rows(
# already shown in the link column.
continue
- # First let the plugins have a go
+ # First try column type render_cell, then plugins
# pylint: disable=no-member
plugin_display_value = None
- for candidate in pm.hook.render_cell(
- row=row,
- value=value,
- column=column,
- table=table_name,
- pks=pks_for_display,
- database=database_name,
- datasette=datasette,
- request=request,
- ):
- candidate = await await_me_maybe(candidate)
+ ct = column_types_map.get(column)
+ if ct:
+ candidate = await ct.render_cell(
+ value=value,
+ column=column,
+ table=table_name,
+ database=database_name,
+ datasette=datasette,
+ request=request,
+ )
if candidate is not None:
plugin_display_value = candidate
- break
+ if plugin_display_value is None:
+ for candidate in pm.hook.render_cell(
+ row=row,
+ value=value,
+ column=column,
+ table=table_name,
+ pks=pks_for_display,
+ database=database_name,
+ datasette=datasette,
+ request=request,
+ column_type=ct,
+ ):
+ candidate = await await_me_maybe(candidate)
+ if candidate is not None:
+ plugin_display_value = candidate
+ break
if plugin_display_value:
display_value = plugin_display_value
elif isinstance(value, bytes):
@@ -484,6 +522,13 @@ async def post(self, request, upsert=False):
if errors:
return _error(errors, 400)
+ # Validate column types
+ ct_errors = await _validate_column_types(
+ self.ds, database_name, table_name, rows
+ )
+ if ct_errors:
+ return _error(ct_errors, 400)
+
num_rows = len(rows)
# No that we've passed pks to _validate_data it's safe to
@@ -1500,27 +1545,42 @@ async def extra_display_rows(run_display_columns_and_rows):
async def extra_render_cell():
"Rendered HTML for each cell using the render_cell plugin hook"
pks_for_display = pks if pks else (["rowid"] if not is_view else [])
- columns = [col[0] for col in results.description]
+ col_names = [col[0] for col in results.description]
+ ct_map = await datasette.get_column_types(database_name, table_name)
rendered_rows = []
for row in rows:
rendered_row = {}
- for value, column in zip(row, columns):
- # Call render_cell plugin hook
+ for value, column in zip(row, col_names):
+ ct = ct_map.get(column)
plugin_display_value = None
- for candidate in pm.hook.render_cell(
- row=row,
- value=value,
- column=column,
- table=table_name,
- pks=pks_for_display,
- database=database_name,
- datasette=datasette,
- request=request,
- ):
- candidate = await await_me_maybe(candidate)
+ # Try column type render_cell first
+ if ct:
+ candidate = await ct.render_cell(
+ value=value,
+ column=column,
+ table=table_name,
+ database=database_name,
+ datasette=datasette,
+ request=request,
+ )
if candidate is not None:
plugin_display_value = candidate
- break
+ if plugin_display_value is None:
+ for candidate in pm.hook.render_cell(
+ row=row,
+ value=value,
+ column=column,
+ table=table_name,
+ pks=pks_for_display,
+ database=database_name,
+ datasette=datasette,
+ request=request,
+ column_type=ct,
+ ):
+ candidate = await await_me_maybe(candidate)
+ if candidate is not None:
+ plugin_display_value = candidate
+ break
if plugin_display_value:
rendered_row[column] = str(plugin_display_value)
rendered_rows.append(rendered_row)
@@ -1533,6 +1593,17 @@ async def extra_query():
"params": params,
}
+ async def extra_column_types():
+ "Column type assignments for this table"
+ ct_map = await datasette.get_column_types(database_name, table_name)
+ return {
+ col_name: {
+ "type": ct.name,
+ "config": ct.config,
+ }
+ for col_name, ct in ct_map.items()
+ }
+
async def extra_metadata():
"Metadata about the table and database"
tablemetadata = await datasette.get_resource_metadata(database_name, table_name)
@@ -1742,6 +1813,7 @@ async def extra_facets_timed_out(extra_facet_results):
extra_debug,
extra_request,
extra_query,
+ extra_column_types,
extra_metadata,
extra_extras,
extra_database,
@@ -1775,7 +1847,18 @@ async def extra_facets_timed_out(extra_facet_results):
}
)
raw_sqlite_rows = rows[:page_size]
- data["rows"] = [dict(r) for r in raw_sqlite_rows]
+ # Apply transform_value for columns with assigned types
+ ct_map = await datasette.get_column_types(database_name, table_name)
+ transformed_rows = []
+ for r in raw_sqlite_rows:
+ row_dict = dict(r)
+ for col_name, ct in ct_map.items():
+ if col_name in row_dict:
+ row_dict[col_name] = await ct.transform_value(
+ row_dict[col_name], datasette
+ )
+ transformed_rows.append(row_dict)
+ data["rows"] = transformed_rows
if context_for_html_hack:
data.update(extra_context_from_filters)
diff --git a/docs/internals.rst b/docs/internals.rst
index 7d607bfe38..6bd3d41d5e 100644
--- a/docs/internals.rst
+++ b/docs/internals.rst
@@ -903,6 +903,102 @@ Adds a new metadata entry for the specified column.
Any previous column-level metadata entry with the same ``key`` will be overwritten.
Internally upserts the value into the the ``metadata_columns`` table inside the :ref:`internal database " in rendered["metadata"]
+
+
+# --- Validation ---
+
+
+@pytest.mark.asyncio
+async def test_email_validation_on_insert(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/insert",
+ json={"row": {"title": "Test", "author_email": "not-an-email"}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 400
+ assert "author_email" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_email_validation_passes_valid(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/insert",
+ json={"row": {"title": "Test", "author_email": "valid@example.com"}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 201
+
+
+@pytest.mark.asyncio
+async def test_url_validation_on_insert(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/insert",
+ json={"row": {"title": "Test", "website": "not-a-url"}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 400
+ assert "website" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_json_validation_on_insert(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/insert",
+ json={"row": {"title": "Test", "metadata": "not-json{"}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 400
+ assert "metadata" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_validation_on_update(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/1/-/update",
+ json={"update": {"author_email": "invalid"}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 400
+ assert "author_email" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_validation_allows_null(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/insert",
+ json={"row": {"title": "Test", "author_email": None}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 201
+
+
+@pytest.mark.asyncio
+async def test_validation_allows_empty_string(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/insert",
+ json={"row": {"title": "Test", "author_email": ""}},
+ headers=_headers(token),
+ )
+ assert response.status_code == 201
+
+
+# --- ColumnType base class ---
+
+
+@pytest.mark.asyncio
+async def test_column_type_base_defaults():
+ class TestType(ColumnType):
+ name = "test"
+ description = "Test type"
+
+ ct = TestType()
+ assert ct.config is None
+ assert await ct.render_cell("val", "col", "tbl", "db", None, None) is None
+ assert await ct.validate("val", None) is None
+ assert await ct.transform_value("val", None) == "val"
+
+
+# --- render_cell extra with column types ---
+
+
+@pytest.mark.asyncio
+async def test_render_cell_extra_with_column_types(ds_ct):
+ await ds_ct.invoke_startup()
+ response = await ds_ct.client.get("/data/posts.json?_extra=render_cell")
+ assert response.status_code == 200
+ data = response.json()
+ rendered = data["render_cell"][0]
+ assert "mailto:" in rendered["author_email"]
+ assert "href" in rendered["website"]
+
+
+# --- Duplicate column type name ---
+
+
+@pytest.mark.asyncio
+async def test_duplicate_column_type_name_raises_error():
+ class DuplicateUrlType(ColumnType):
+ name = "url"
+ description = "Duplicate URL"
+
+ async def render_cell(self, value, column, table, database, datasette, request):
+ return None
+
+ class _Plugin:
+ @hookimpl
+ def register_column_types(self, datasette):
+ return [DuplicateUrlType]
+
+ plugin = _Plugin()
+ pm.register(plugin, name="test_duplicate_ct")
+ try:
+ ds = Datasette()
+ with pytest.raises(StartupError, match="Duplicate column type name: url"):
+ await ds.invoke_startup()
+ finally:
+ pm.unregister(plugin, name="test_duplicate_ct")
+
+
+# --- Row endpoint ---
+
+
+@pytest.mark.asyncio
+async def test_row_endpoint_render_cell_with_column_types(ds_ct):
+ await ds_ct.invoke_startup()
+ response = await ds_ct.client.get("/data/posts/1.json?_extra=render_cell")
+ assert response.status_code == 200
+ data = response.json()
+ rendered = data["render_cell"][0]
+ assert "mailto:" in rendered["author_email"]
+ assert "href" in rendered["website"]
+
+
+# --- transform_value in JSON output ---
+
+
+@pytest.mark.asyncio
+async def test_transform_value_in_json_output(tmp_path_factory):
+ """A column type with transform_value should modify rows in JSON API."""
+
+ class UpperColumnType(ColumnType):
+ name = "upper"
+ description = "Uppercase"
+
+ async def transform_value(self, value, datasette):
+ if isinstance(value, str):
+ return value.upper()
+ return value
+
+ class _Plugin:
+ @hookimpl
+ def register_column_types(self, datasette):
+ return [UpperColumnType]
+
+ plugin = _Plugin()
+ pm.register(plugin, name="test_transform_ct")
+ try:
+ db_directory = tmp_path_factory.mktemp("dbs")
+ db_path = str(db_directory / "data.db")
+ db = sqlite3.connect(str(db_path))
+ db.execute("vacuum")
+ db.execute("create table t (id integer primary key, name text)")
+ db.execute("insert into t values (1, 'hello')")
+ db.commit()
+ ds = Datasette(
+ [db_path],
+ config={
+ "databases": {
+ "data": {"tables": {"t": {"column_types": {"name": "upper"}}}}
+ }
+ },
+ )
+ await ds.invoke_startup()
+ response = await ds.client.get("/data/t.json")
+ assert response.status_code == 200
+ data = response.json()
+ assert data["rows"][0]["name"] == "HELLO"
+ db.close()
+ for database in ds.databases.values():
+ if not database.is_memory:
+ database.close()
+ finally:
+ pm.unregister(plugin, name="test_transform_ct")
+
+
+# --- Column type priority over plugins ---
+
+
+@pytest.mark.asyncio
+async def test_column_type_render_cell_has_priority_over_plugins(tmp_path_factory):
+ """Column type render_cell should take priority over render_cell plugin hook."""
+
+ class PriorityColumnType(ColumnType):
+ name = "priority_test"
+ description = "Priority test"
+
+ async def render_cell(self, value, column, table, database, datasette, request):
+ if value is not None:
+ return markupsafe.Markup(
+ f"COLUMN_TYPE:{markupsafe.escape(value)}"
+ )
+ return None
+
+ class _ColumnTypePlugin:
+ @hookimpl
+ def register_column_types(self, datasette):
+ return [PriorityColumnType]
+
+ class _RenderCellPlugin:
+ @hookimpl
+ def render_cell(
+ self,
+ row,
+ value,
+ column,
+ table,
+ pks,
+ database,
+ datasette,
+ request,
+ column_type,
+ ):
+ if column == "name":
+ return markupsafe.Markup(f"PLUGIN:{markupsafe.escape(value)}")
+
+ ct_plugin = _ColumnTypePlugin()
+ rc_plugin = _RenderCellPlugin()
+ pm.register(ct_plugin, name="test_priority_ct")
+ pm.register(rc_plugin, name="test_priority_render")
+ try:
+ db_directory = tmp_path_factory.mktemp("dbs")
+ db_path = str(db_directory / "data.db")
+ db = sqlite3.connect(str(db_path))
+ db.execute("vacuum")
+ db.execute("create table t (id integer primary key, name text)")
+ db.execute("insert into t values (1, 'hello')")
+ db.commit()
+ ds = Datasette(
+ [db_path],
+ config={
+ "databases": {
+ "data": {
+ "tables": {"t": {"column_types": {"name": "priority_test"}}}
+ }
+ }
+ },
+ )
+ await ds.invoke_startup()
+ response = await ds.client.get("/data/t.json?_extra=render_cell")
+ assert response.status_code == 200
+ data = response.json()
+ rendered = data["render_cell"][0]
+ # Column type should win over the plugin
+ assert "COLUMN_TYPE:" in rendered["name"]
+ assert "PLUGIN:" not in rendered["name"]
+ db.close()
+ for database in ds.databases.values():
+ if not database.is_memory:
+ database.close()
+ finally:
+ pm.unregister(ct_plugin, name="test_priority_ct")
+ pm.unregister(rc_plugin, name="test_priority_render")
+
+
+# --- Row detail page rendering ---
+
+
+@pytest.mark.asyncio
+async def test_row_detail_page_html_rendering(ds_ct):
+ """Row detail HTML page should use column type rendering."""
+ await ds_ct.invoke_startup()
+ response = await ds_ct.client.get("/data/posts/1")
+ assert response.status_code == 200
+ html = response.text
+ # The email column should be rendered with mailto: link
+ assert "mailto:test@example.com" in html
+ # The url column should be rendered with href
+ assert 'href="https://example.com"' in html
+
+
+# --- HTML table page rendering ---
+
+
+@pytest.mark.asyncio
+async def test_html_table_page_rendering(ds_ct):
+ """HTML table page should use column type rendering."""
+ await ds_ct.invoke_startup()
+ response = await ds_ct.client.get("/data/posts")
+ assert response.status_code == 200
+ html = response.text
+ assert "mailto:test@example.com" in html
+ assert 'href="https://example.com"' in html
+
+
+# --- Validation on upsert ---
+
+
+@pytest.mark.asyncio
+async def test_validation_on_upsert(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/upsert",
+ json={
+ "rows": [{"id": 1, "title": "Updated", "author_email": "invalid"}],
+ },
+ headers=_headers(token),
+ )
+ assert response.status_code == 400
+ assert "author_email" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_validation_on_upsert_passes_valid(ds_ct):
+ await ds_ct.invoke_startup()
+ token = write_token(ds_ct)
+ response = await ds_ct.client.post(
+ "/data/posts/-/upsert",
+ json={
+ "rows": [{"id": 1, "title": "Updated", "author_email": "valid@test.com"}],
+ },
+ headers=_headers(token),
+ )
+ assert response.status_code == 200
+
+
+# --- Unknown type warning logged ---
+
+
+@pytest.mark.asyncio
+async def test_unknown_type_warning_logged(tmp_path_factory, caplog):
+ db_directory = tmp_path_factory.mktemp("dbs")
+ db_path = str(db_directory / "data.db")
+ db = sqlite3.connect(str(db_path))
+ db.execute("vacuum")
+ db.execute("create table t (id integer primary key, col text)")
+ db.commit()
+ ds = Datasette(
+ [db_path],
+ config={
+ "databases": {
+ "data": {"tables": {"t": {"column_types": {"col": "nonexistent_type"}}}}
+ }
+ },
+ )
+ with caplog.at_level(logging.WARNING):
+ await ds.invoke_startup()
+ assert "unknown type" in caplog.text.lower()
+ assert "nonexistent_type" in caplog.text
+ db.close()
+ for database in ds.databases.values():
+ if not database.is_memory:
+ database.close()
+
+
+# --- Config overwrites on restart ---
+
+
+@pytest.mark.asyncio
+async def test_config_overwrites_on_restart(tmp_path_factory):
+ """Config values should overwrite any existing column types in internal DB on startup."""
+ db_directory = tmp_path_factory.mktemp("dbs")
+ db_path = str(db_directory / "data.db")
+ db = sqlite3.connect(str(db_path))
+ db.execute("vacuum")
+ db.execute("create table t (id integer primary key, col text)")
+ db.commit()
+ ds = Datasette(
+ [db_path],
+ config={
+ "databases": {"data": {"tables": {"t": {"column_types": {"col": "email"}}}}}
+ },
+ )
+ await ds.invoke_startup()
+ ct = await ds.get_column_type("data", "t", "col")
+ assert ct.name == "email"
+
+ # Manually change the column type in the internal DB
+ await ds.set_column_type("data", "t", "col", "url")
+ ct = await ds.get_column_type("data", "t", "col")
+ assert ct.name == "url"
+
+ # Re-apply config (simulating what happens on restart)
+ await ds._apply_column_types_config()
+ ct = await ds.get_column_type("data", "t", "col")
+ assert ct.name == "email" # Config wins
+
+ db.close()
+ for database in ds.databases.values():
+ if not database.is_memory:
+ database.close()
+
+
+# --- No column_types in config ---
+
+
+@pytest.mark.asyncio
+async def test_no_column_types_in_config(tmp_path_factory):
+ """Datasette should work fine without any column_types configuration."""
+ db_directory = tmp_path_factory.mktemp("dbs")
+ db_path = str(db_directory / "data.db")
+ db = sqlite3.connect(str(db_path))
+ db.execute("vacuum")
+ db.execute("create table t (id integer primary key, col text)")
+ db.execute("insert into t values (1, 'hello')")
+ db.commit()
+ ds = Datasette([db_path])
+ await ds.invoke_startup()
+
+ # No column types assigned
+ ct_map = await ds.get_column_types("data", "t")
+ assert ct_map == {}
+
+ # JSON endpoint should work without column_types extra
+ response = await ds.client.get("/data/t.json")
+ assert response.status_code == 200
+ assert response.json()["rows"][0]["col"] == "hello"
+
+ # column_types extra should return empty
+ response = await ds.client.get("/data/t.json?_extra=column_types")
+ assert response.status_code == 200
+ assert response.json()["column_types"] == {}
+
+ db.close()
+ for database in ds.databases.values():
+ if not database.is_memory:
+ database.close()
diff --git a/tests/test_plugins.py b/tests/test_plugins.py
index f2a47ab442..47d727f240 100644
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@@ -1948,3 +1948,14 @@ def test_metadata_plugin_config_treated_as_config(
assert "plugins" not in actual_metadata
assert actual_metadata == expected_metadata
assert ds.config == expected_config
+
+
+@pytest.mark.asyncio
+async def test_hook_register_column_types():
+ ds = Datasette()
+ await ds.invoke_startup()
+ # Built-in column types should be registered
+ assert "url" in ds._column_types
+ assert "email" in ds._column_types
+ assert "json" in ds._column_types
+ assert "nonexistent" not in ds._column_types