diff --git a/datasette/app.py b/datasette/app.py index f0349895de..3790b34025 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -354,6 +354,7 @@ def __init__( self.immutables = set(immutables or []) self.databases = collections.OrderedDict() self.actions = {} # .invoke_startup() will populate this + self._column_types = {} # .invoke_startup() will populate this try: self._refresh_schemas_lock = asyncio.Lock() except RuntimeError as rex: @@ -692,12 +693,23 @@ async def invoke_startup(self): action_abbrs[action.abbr] = action self.actions[action.name] = action + # Register column types (classes, not instances) + self._column_types = {} + for hook in pm.hook.register_column_types(datasette=self): + if hook: + for ct_cls in hook: + if ct_cls.name in self._column_types: + raise StartupError(f"Duplicate column type name: {ct_cls.name}") + self._column_types[ct_cls.name] = ct_cls + for hook in pm.hook.prepare_jinja2_environment( env=self._jinja_env, datasette=self ): await await_me_maybe(hook) # Ensure internal tables and metadata are populated before startup hooks await self._refresh_schemas() + # Load column_types from config into internal DB + await self._apply_column_types_config() for hook in pm.hook.startup(datasette=self): await await_me_maybe(hook) self._startup_invoked = True @@ -945,6 +957,102 @@ async def set_column_metadata( [database_name, resource_name, column_name, key, value], ) + # Column types API + + async def _apply_column_types_config(self): + """Load column_types from datasette.json config into the internal DB.""" + import logging + + for db_name, db_conf in (self.config or {}).get("databases", {}).items(): + for table_name, table_conf in db_conf.get("tables", {}).items(): + for col_name, ct in table_conf.get("column_types", {}).items(): + if isinstance(ct, str): + col_type, config = ct, None + else: + col_type = ct["type"] + config = ct.get("config") + if col_type not in self._column_types: + logging.warning( + "column_types config references unknown type %r " + "for %s.%s.%s", + col_type, + db_name, + table_name, + col_name, + ) + await self.set_column_type( + db_name, table_name, col_name, col_type, config + ) + + async def get_column_type(self, database: str, resource: str, column: str): + """ + Return a ColumnType instance (with config baked in) for a specific + column, or None if no column type is assigned. + """ + row = await self.get_internal_database().execute( + "SELECT column_type, config FROM column_types " + "WHERE database_name = ? AND resource_name = ? AND column_name = ?", + [database, resource, column], + ) + rows = row.rows + if not rows: + return None + ct_name, config = rows[0] + ct_cls = self._column_types.get(ct_name) + if ct_cls is None: + return None + return ct_cls(config=json.loads(config) if config else None) + + async def get_column_types(self, database: str, resource: str) -> dict: + """ + Return {column_name: ColumnType instance (with config)} + for all columns with assigned types on the given resource. + """ + rows = await self.get_internal_database().execute( + "SELECT column_name, column_type, config FROM column_types " + "WHERE database_name = ? AND resource_name = ?", + [database, resource], + ) + result = {} + for row in rows.rows: + col_name, ct_name, config = row + ct_cls = self._column_types.get(ct_name) + if ct_cls is not None: + result[col_name] = ct_cls(config=json.loads(config) if config else None) + return result + + async def set_column_type( + self, + database: str, + resource: str, + column: str, + column_type: str, + config: dict = None, + ) -> None: + """Assign a column type. Overwrites any existing assignment.""" + await self.get_internal_database().execute_write( + """INSERT OR REPLACE INTO column_types + (database_name, resource_name, column_name, column_type, config) + VALUES (?, ?, ?, ?, ?)""", + [ + database, + resource, + column, + column_type, + json.dumps(config) if config else None, + ], + ) + + async def remove_column_type( + self, database: str, resource: str, column: str + ) -> None: + """Remove a column type assignment.""" + await self.get_internal_database().execute_write( + "DELETE FROM column_types " + "WHERE database_name = ? AND resource_name = ? AND column_name = ?", + [database, resource, column], + ) + def get_internal_database(self): return self._internal_database diff --git a/datasette/column_types.py b/datasette/column_types.py new file mode 100644 index 0000000000..c41142948a --- /dev/null +++ b/datasette/column_types.py @@ -0,0 +1,44 @@ +class ColumnType: + """ + Base class for column types. + + Subclasses must define ``name`` and ``description`` as class attributes: + + - ``name``: Unique identifier string. Lowercase, no spaces. + Examples: "markdown", "file", "email", "url", "point", "image". + - ``description``: Human-readable label for admin UI dropdowns. + Examples: "Markdown text", "File reference", "Email address". + + Instantiate with an optional ``config`` dict to bind per-column + configuration:: + + ct = MyColumnType(config={"key": "value"}) + ct.config # {"key": "value"} + """ + + name: str + description: str + + def __init__(self, config=None): + self.config = config + + async def render_cell(self, value, column, table, database, datasette, request): + """ + Return an HTML string to render this cell value, or None to + fall through to the default render_cell plugin hook chain. + """ + return None + + async def validate(self, value, datasette): + """ + Validate a value before it is written. Return None if valid, + or a string error message if invalid. + """ + return None + + async def transform_value(self, value, datasette): + """ + Transform a value before it appears in JSON API output. + Return the transformed value. Default: return unchanged. + """ + return value diff --git a/datasette/default_column_types.py b/datasette/default_column_types.py new file mode 100644 index 0000000000..b4ebfcc589 --- /dev/null +++ b/datasette/default_column_types.py @@ -0,0 +1,78 @@ +import json +import re + +import markupsafe + +from datasette import hookimpl +from datasette.column_types import ColumnType + + +class UrlColumnType(ColumnType): + name = "url" + description = "URL" + + async def render_cell(self, value, column, table, database, datasette, request): + if not value or not isinstance(value, str): + return None + escaped = markupsafe.escape(value.strip()) + return markupsafe.Markup(f'{escaped}') + + async def validate(self, value, datasette): + if value is None or value == "": + return None + if not isinstance(value, str): + return "URL must be a string" + if not re.match(r"^https?://\S+$", value.strip()): + return "Invalid URL" + return None + + +class EmailColumnType(ColumnType): + name = "email" + description = "Email address" + + async def render_cell(self, value, column, table, database, datasette, request): + if not value or not isinstance(value, str): + return None + escaped = markupsafe.escape(value.strip()) + return markupsafe.Markup(f'{escaped}') + + async def validate(self, value, datasette): + if value is None or value == "": + return None + if not isinstance(value, str): + return "Email must be a string" + if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", value.strip()): + return "Invalid email address" + return None + + +class JsonColumnType(ColumnType): + name = "json" + description = "JSON data" + + async def render_cell(self, value, column, table, database, datasette, request): + if value is None: + return None + try: + parsed = json.loads(value) if isinstance(value, str) else value + formatted = json.dumps(parsed, indent=2) + escaped = markupsafe.escape(formatted) + return markupsafe.Markup(f"
{escaped}
") + except (json.JSONDecodeError, TypeError): + return None + + async def validate(self, value, datasette): + if value is None or value == "": + return None + if isinstance(value, str): + try: + json.loads(value) + except json.JSONDecodeError: + return "Invalid JSON" + return None + + +@hookimpl +def register_column_types(datasette): + return [UrlColumnType, EmailColumnType, JsonColumnType] diff --git a/datasette/hookspecs.py b/datasette/hookspecs.py index 649019001f..f7bb6ab655 100644 --- a/datasette/hookspecs.py +++ b/datasette/hookspecs.py @@ -55,7 +55,17 @@ def publish_subcommand(publish): @hookspec -def render_cell(row, value, column, table, pks, database, datasette, request): +def render_cell( + row, + value, + column, + table, + pks, + database, + datasette, + request, + column_type, +): """Customize rendering of HTML table cell values""" @@ -74,6 +84,11 @@ def register_actions(datasette): """Register actions: returns a list of datasette.permission.Action objects""" +@hookspec +def register_column_types(datasette): + """Return a list of ColumnType instances""" + + @hookspec def register_routes(datasette): """Register URL routes: return a list of (regex, view_function) pairs""" diff --git a/datasette/plugins.py b/datasette/plugins.py index 992137bd5d..b01b386cfa 100644 --- a/datasette/plugins.py +++ b/datasette/plugins.py @@ -25,6 +25,7 @@ "datasette.default_permissions", "datasette.default_permissions.tokens", "datasette.default_actions", + "datasette.default_column_types", "datasette.default_magic_parameters", "datasette.blob_renderer", "datasette.default_menu_links", diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index e4ebdddeea..df1499283c 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -103,6 +103,15 @@ async def initialize_metadata_tables(db): value text, unique(database_name, resource_name, column_name, key) ); + + CREATE TABLE IF NOT EXISTS column_types ( + database_name TEXT NOT NULL, + resource_name TEXT NOT NULL, + column_name TEXT NOT NULL, + column_type TEXT NOT NULL, + config TEXT, + PRIMARY KEY (database_name, resource_name, column_name) + ); """)) diff --git a/datasette/views/database.py b/datasette/views/database.py index 93ad8eda01..916cdbc147 100644 --- a/datasette/views/database.py +++ b/datasette/views/database.py @@ -1205,6 +1205,7 @@ async def display_rows(datasette, database, request, rows, columns): database=database, datasette=datasette, request=request, + column_type=None, ): candidate = await await_me_maybe(candidate) if candidate is not None: diff --git a/datasette/views/row.py b/datasette/views/row.py index 7cc46368c3..4eacfe49a6 100644 --- a/datasette/views/row.py +++ b/datasette/views/row.py @@ -179,26 +179,41 @@ async def template_data(): if "render_cell" in extras: # Call render_cell plugin hook for each cell + ct_map = await self.ds.get_column_types(database, table) rendered_rows = [] for row in rows: rendered_row = {} for value, column in zip(row, columns): - # Call render_cell plugin hook + ct = ct_map.get(column) plugin_display_value = None - for candidate in pm.hook.render_cell( - row=row, - value=value, - column=column, - table=table, - pks=resolved.pks, - database=database, - datasette=self.ds, - request=request, - ): - candidate = await await_me_maybe(candidate) + # Try column type render_cell first + if ct: + candidate = await ct.render_cell( + value=value, + column=column, + table=table, + database=database, + datasette=self.ds, + request=request, + ) if candidate is not None: plugin_display_value = candidate - break + if plugin_display_value is None: + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table, + pks=resolved.pks, + database=database, + datasette=self.ds, + request=request, + column_type=ct, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break if plugin_display_value: rendered_row[column] = str(plugin_display_value) rendered_rows.append(rendered_row) @@ -352,6 +367,15 @@ async def post(self, request): update = data["update"] + # Validate column types + from datasette.views.table import _validate_column_types + + ct_errors = await _validate_column_types( + self.ds, resolved.db.name, resolved.table, [update] + ) + if ct_errors: + return _error(ct_errors, 400) + alter = data.get("alter") if alter and not await self.ds.allowed( action="alter-table", diff --git a/datasette/views/table.py b/datasette/views/table.py index 2ee8674323..035abb1b9f 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -134,6 +134,22 @@ async def _redirect_if_needed(datasette, request, resolved): ) +async def _validate_column_types(datasette, database_name, table_name, rows): + """Validate row values against assigned column types. Returns list of error strings.""" + ct_map = await datasette.get_column_types(database_name, table_name) + if not ct_map: + return [] + errors = [] + for row in rows: + for col_name, ct in ct_map.items(): + if col_name not in row: + continue + error = await ct.validate(row[col_name], datasette) + if error: + errors.append(f"{col_name}: {error}") + return errors + + async def display_columns_and_rows( datasette, database_name, @@ -163,6 +179,9 @@ async def display_columns_and_rows( ) ) + # Look up column types for this table + column_types_map = await datasette.get_column_types(database_name, table_name) + column_details = { col.name: col for col in await db.table_column_details(table_name) } @@ -179,16 +198,21 @@ async def display_columns_and_rows( else: type_ = column_details[r[0]].type notnull = column_details[r[0]].notnull - columns.append( - { - "name": r[0], - "sortable": r[0] in sortable_columns, - "is_pk": r[0] in pks_for_display, - "type": type_, - "notnull": notnull, - "description": column_descriptions.get(r[0]), - } - ) + col_dict = { + "name": r[0], + "sortable": r[0] in sortable_columns, + "is_pk": r[0] in pks_for_display, + "type": type_, + "notnull": notnull, + "description": column_descriptions.get(r[0]), + "column_type": None, + "column_type_config": None, + } + ct = column_types_map.get(r[0]) + if ct: + col_dict["column_type"] = ct.name + col_dict["column_type_config"] = ct.config + columns.append(col_dict) column_to_foreign_key_table = { fk["column"]: fk["other_table"] @@ -227,23 +251,37 @@ async def display_columns_and_rows( # already shown in the link column. continue - # First let the plugins have a go + # First try column type render_cell, then plugins # pylint: disable=no-member plugin_display_value = None - for candidate in pm.hook.render_cell( - row=row, - value=value, - column=column, - table=table_name, - pks=pks_for_display, - database=database_name, - datasette=datasette, - request=request, - ): - candidate = await await_me_maybe(candidate) + ct = column_types_map.get(column) + if ct: + candidate = await ct.render_cell( + value=value, + column=column, + table=table_name, + database=database_name, + datasette=datasette, + request=request, + ) if candidate is not None: plugin_display_value = candidate - break + if plugin_display_value is None: + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table_name, + pks=pks_for_display, + database=database_name, + datasette=datasette, + request=request, + column_type=ct, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break if plugin_display_value: display_value = plugin_display_value elif isinstance(value, bytes): @@ -484,6 +522,13 @@ async def post(self, request, upsert=False): if errors: return _error(errors, 400) + # Validate column types + ct_errors = await _validate_column_types( + self.ds, database_name, table_name, rows + ) + if ct_errors: + return _error(ct_errors, 400) + num_rows = len(rows) # No that we've passed pks to _validate_data it's safe to @@ -1500,27 +1545,42 @@ async def extra_display_rows(run_display_columns_and_rows): async def extra_render_cell(): "Rendered HTML for each cell using the render_cell plugin hook" pks_for_display = pks if pks else (["rowid"] if not is_view else []) - columns = [col[0] for col in results.description] + col_names = [col[0] for col in results.description] + ct_map = await datasette.get_column_types(database_name, table_name) rendered_rows = [] for row in rows: rendered_row = {} - for value, column in zip(row, columns): - # Call render_cell plugin hook + for value, column in zip(row, col_names): + ct = ct_map.get(column) plugin_display_value = None - for candidate in pm.hook.render_cell( - row=row, - value=value, - column=column, - table=table_name, - pks=pks_for_display, - database=database_name, - datasette=datasette, - request=request, - ): - candidate = await await_me_maybe(candidate) + # Try column type render_cell first + if ct: + candidate = await ct.render_cell( + value=value, + column=column, + table=table_name, + database=database_name, + datasette=datasette, + request=request, + ) if candidate is not None: plugin_display_value = candidate - break + if plugin_display_value is None: + for candidate in pm.hook.render_cell( + row=row, + value=value, + column=column, + table=table_name, + pks=pks_for_display, + database=database_name, + datasette=datasette, + request=request, + column_type=ct, + ): + candidate = await await_me_maybe(candidate) + if candidate is not None: + plugin_display_value = candidate + break if plugin_display_value: rendered_row[column] = str(plugin_display_value) rendered_rows.append(rendered_row) @@ -1533,6 +1593,17 @@ async def extra_query(): "params": params, } + async def extra_column_types(): + "Column type assignments for this table" + ct_map = await datasette.get_column_types(database_name, table_name) + return { + col_name: { + "type": ct.name, + "config": ct.config, + } + for col_name, ct in ct_map.items() + } + async def extra_metadata(): "Metadata about the table and database" tablemetadata = await datasette.get_resource_metadata(database_name, table_name) @@ -1742,6 +1813,7 @@ async def extra_facets_timed_out(extra_facet_results): extra_debug, extra_request, extra_query, + extra_column_types, extra_metadata, extra_extras, extra_database, @@ -1775,7 +1847,18 @@ async def extra_facets_timed_out(extra_facet_results): } ) raw_sqlite_rows = rows[:page_size] - data["rows"] = [dict(r) for r in raw_sqlite_rows] + # Apply transform_value for columns with assigned types + ct_map = await datasette.get_column_types(database_name, table_name) + transformed_rows = [] + for r in raw_sqlite_rows: + row_dict = dict(r) + for col_name, ct in ct_map.items(): + if col_name in row_dict: + row_dict[col_name] = await ct.transform_value( + row_dict[col_name], datasette + ) + transformed_rows.append(row_dict) + data["rows"] = transformed_rows if context_for_html_hack: data.update(extra_context_from_filters) diff --git a/docs/internals.rst b/docs/internals.rst index 7d607bfe38..6bd3d41d5e 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -903,6 +903,102 @@ Adds a new metadata entry for the specified column. Any previous column-level metadata entry with the same ``key`` will be overwritten. Internally upserts the value into the the ``metadata_columns`` table inside the :ref:`internal database `. +.. _datasette_column_types: + +Column types +------------ + +Column types are stored in the ``column_types`` table in the :ref:`internal database `. The following methods provide the API for reading and modifying column type assignments. + +.. _datasette_get_column_type: + +await .get_column_type(database, resource, column) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``database`` - string + The name of the database. +``resource`` - string + The name of the table or view. +``column`` - string + The name of the column. + +Returns a :ref:`ColumnType ` subclass instance with ``.config`` populated for the specified column, or ``None`` if no column type is assigned. + +.. code-block:: python + + ct = await datasette.get_column_type( + "mydb", "mytable", "email_col" + ) + if ct: + print(ct.name) # "email" + print(ct.config) # None or {...} + +.. _datasette_get_column_types: + +await .get_column_types(database, resource) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``database`` - string + The name of the database. +``resource`` - string + The name of the table or view. + +Returns a dictionary mapping column names to :ref:`ColumnType ` subclass instances (with ``.config`` populated) for all columns that have assigned types on the given resource. + +.. code-block:: python + + ct_map = await datasette.get_column_types("mydb", "mytable") + for col_name, ct in ct_map.items(): + print(col_name, ct.name, ct.config) + +.. _datasette_set_column_type: + +await .set_column_type(database, resource, column, column_type, config=None) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``database`` - string + The name of the database. +``resource`` - string + The name of the table or view. +``column`` - string + The name of the column. +``column_type`` - string + The column type name to assign, e.g. ``"email"``. +``config`` - dict, optional + Optional configuration dict for the column type. + +Assigns a column type to a column. Overwrites any existing assignment for that column. + +.. code-block:: python + + await datasette.set_column_type( + "mydb", + "mytable", + "location", + "point", + config={"srid": 4326}, + ) + +.. _datasette_remove_column_type: + +await .remove_column_type(database, resource, column) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``database`` - string + The name of the database. +``resource`` - string + The name of the table or view. +``column`` - string + The name of the column. + +Removes the column type assignment for the specified column. + +.. code-block:: python + + await datasette.remove_column_type( + "mydb", "mytable", "location" + ) + .. _datasette_add_database: .add_database(db, name=None, route=None) @@ -1941,6 +2037,14 @@ The internal database schema is as follows: value text, unique(database_name, resource_name, column_name, key) ); + CREATE TABLE column_types ( + database_name TEXT NOT NULL, + resource_name TEXT NOT NULL, + column_name TEXT NOT NULL, + column_type TEXT NOT NULL, + config TEXT, + PRIMARY KEY (database_name, resource_name, column_name) + ); .. [[[end]]] diff --git a/docs/plugin_hooks.rst b/docs/plugin_hooks.rst index b9701f7cf7..69710bb614 100644 --- a/docs/plugin_hooks.rst +++ b/docs/plugin_hooks.rst @@ -474,8 +474,8 @@ Examples: `datasette-publish-fly ` subclass instance or None + The :ref:`ColumnType ` subclass instance assigned to this column (with ``.config`` populated), or ``None`` if no column type is assigned. You can access ``column_type.name``, ``column_type.config``, etc. + +If a column has a :ref:`column type ` assigned and that column type's ``render_cell`` method returns a non-``None`` value, it will take priority over this plugin hook. + If your hook returns ``None``, it will be ignored. Use this to indicate that your hook is not able to custom render this particular value. If the hook returns a string, that string will be rendered in the table cell. @@ -989,6 +994,94 @@ This tells Datasette "here's how to find all documents in the system - look in t The permission system then uses this query along with rules from plugins to determine which documents each user can access, all efficiently in SQL rather than loading everything into Python. +.. _plugin_register_column_types: + +register_column_types(datasette) +-------------------------------- + +Return a list of :ref:`ColumnType ` **subclasses** (not instances) to register custom column types. Column types define how values in specific columns are rendered, validated, and transformed. + +.. code-block:: python + + from datasette import hookimpl + from datasette.column_types import ColumnType + import markupsafe + + + class ColorColumnType(ColumnType): + name = "color" + description = "CSS color value" + + async def render_cell( + self, + value, + column, + table, + database, + datasette, + request, + ): + if value: + return markupsafe.Markup( + '' + "{color}" + ).format(color=markupsafe.escape(value)) + return None + + async def validate(self, value, datasette): + if value and not value.startswith("#"): + return "Color must start with #" + return None + + async def transform_value(self, value, datasette): + # Normalize to uppercase + if isinstance(value, str): + return value.upper() + return value + + + @hookimpl + def register_column_types(datasette): + return [ColorColumnType] + +Each ``ColumnType`` subclass must define the following class attributes: + +``name`` - string + Unique identifier for the column type, e.g. ``"color"``. Must be unique across all plugins. + +``description`` - string + Human-readable label, e.g. ``"CSS color value"``. + +And the following methods, all optional: + +``render_cell(self, value, column, table, database, datasette, request)`` + Return an HTML string to render this cell value, or ``None`` to fall through to the default ``render_cell`` plugin hook chain. When a column type provides rendering, it takes priority over the ``render_cell`` plugin hook. + +``validate(self, value, datasette)`` + Validate a value before it is written via the insert, update, or upsert API endpoints. Return ``None`` if valid, or a string error message if invalid. Null values and empty strings skip validation. + +``transform_value(self, value, datasette)`` + Transform a value before it appears in JSON API output. Return the transformed value. The default implementation returns the value unchanged. + +Per-column configuration is available via ``self.config`` in all methods. When a column type is looked up for a specific column (via :ref:`get_column_type ` or :ref:`get_column_types `), the returned instance has ``config`` set to the parsed JSON config dict for that column assignment, or ``None`` if no config was provided. + +Column types are assigned to columns via the ``column_types`` key in :ref:`table configuration `: + +.. code-block:: yaml + + databases: + mydb: + tables: + mytable: + column_types: + bg_color: color + highlight: + type: color + config: + format: rgb + +Datasette includes three built-in column types: ``url``, ``email``, and ``json``. + .. _plugin_asgi_wrapper: asgi_wrapper(datasette) diff --git a/docs/plugins.rst b/docs/plugins.rst index 60bdc11141..03cbedebdd 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -207,6 +207,15 @@ If you run ``datasette plugins --all`` it will include default plugins that ship "register_actions" ] }, + { + "name": "datasette.default_column_types", + "static": false, + "templates": false, + "version": null, + "hooks": [ + "register_column_types" + ] + }, { "name": "datasette.default_magic_parameters", "static": false, diff --git a/tests/test_column_types.py b/tests/test_column_types.py new file mode 100644 index 0000000000..2929c1f3bc --- /dev/null +++ b/tests/test_column_types.py @@ -0,0 +1,732 @@ +import logging + +from datasette.app import Datasette +from datasette.column_types import ColumnType +from datasette.hookspecs import hookimpl +from datasette.plugins import pm +from datasette.utils import sqlite3 +from datasette.utils import StartupError +import markupsafe +import pytest +import time + + +@pytest.fixture +def ds_ct(tmp_path_factory): + db_directory = tmp_path_factory.mktemp("dbs") + db_path = str(db_directory / "data.db") + db = sqlite3.connect(str(db_path)) + db.execute("vacuum") + db.execute( + "create table posts (id integer primary key, title text, body text, " + "author_email text, website text, metadata text)" + ) + db.execute( + "insert into posts values (1, 'Hello', '# World', 'test@example.com', " + "'https://example.com', '{\"key\": \"value\"}')" + ) + db.commit() + ds = Datasette( + [db_path], + config={ + "databases": { + "data": { + "tables": { + "posts": { + "column_types": { + "body": "markdown", + "author_email": "email", + "website": "url", + "metadata": "json", + } + } + } + } + } + }, + ) + ds.root_enabled = True + yield ds + db.close() + for database in ds.databases.values(): + if not database.is_memory: + database.close() + + +def write_token(ds, actor_id="root", permissions=None): + to_sign = {"a": actor_id, "token": "dstok", "t": int(time.time())} + if permissions: + to_sign["_r"] = {"a": permissions} + return "dstok_{}".format(ds.sign(to_sign, namespace="token")) + + +def _headers(token): + return { + "Authorization": "Bearer {}".format(token), + "Content-Type": "application/json", + } + + +# --- Internal DB and config loading --- + + +@pytest.mark.asyncio +async def test_column_types_table_created(ds_ct): + await ds_ct.invoke_startup() + internal = ds_ct.get_internal_database() + result = await internal.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='column_types'" + ) + assert len(result.rows) == 1 + + +@pytest.mark.asyncio +async def test_config_loaded_into_internal_db(ds_ct): + await ds_ct.invoke_startup() + ct_map = await ds_ct.get_column_types("data", "posts") + # "markdown" is not a registered type, so it won't appear + assert "body" not in ct_map + assert ct_map["author_email"].name == "email" + assert ct_map["author_email"].config is None + assert ct_map["website"].name == "url" + assert ct_map["metadata"].name == "json" + + +@pytest.mark.asyncio +async def test_config_with_type_and_config(tmp_path_factory): + class PointColumnType(ColumnType): + name = "point" + description = "Geographic point" + + class _Plugin: + @hookimpl + def register_column_types(self, datasette): + return [PointColumnType] + + plugin = _Plugin() + pm.register(plugin, name="test_point_ct") + try: + db_directory = tmp_path_factory.mktemp("dbs") + db_path = str(db_directory / "data.db") + db = sqlite3.connect(str(db_path)) + db.execute("vacuum") + db.execute("create table geo (id integer primary key, location text)") + ds = Datasette( + [db_path], + config={ + "databases": { + "data": { + "tables": { + "geo": { + "column_types": { + "location": { + "type": "point", + "config": {"srid": 4326}, + } + } + } + } + } + } + }, + ) + await ds.invoke_startup() + ct = await ds.get_column_type("data", "geo", "location") + assert ct.name == "point" + assert ct.config == {"srid": 4326} + db.close() + for database in ds.databases.values(): + if not database.is_memory: + database.close() + finally: + pm.unregister(plugin, name="test_point_ct") + + +# --- Datasette API methods --- + + +@pytest.mark.asyncio +async def test_get_column_type(ds_ct): + await ds_ct.invoke_startup() + ct = await ds_ct.get_column_type("data", "posts", "author_email") + assert isinstance(ct, ColumnType) + assert ct.name == "email" + assert ct.config is None + + +@pytest.mark.asyncio +async def test_get_column_type_missing(ds_ct): + await ds_ct.invoke_startup() + ct = await ds_ct.get_column_type("data", "posts", "title") + assert ct is None + + +@pytest.mark.asyncio +async def test_set_and_remove_column_type(ds_ct): + await ds_ct.invoke_startup() + await ds_ct.set_column_type("data", "posts", "title", "email") + ct = await ds_ct.get_column_type("data", "posts", "title") + assert ct.name == "email" + assert ct.config is None + + await ds_ct.remove_column_type("data", "posts", "title") + ct = await ds_ct.get_column_type("data", "posts", "title") + assert ct is None + + +@pytest.mark.asyncio +async def test_set_column_type_with_config(ds_ct): + await ds_ct.invoke_startup() + await ds_ct.set_column_type("data", "posts", "title", "url", {"max_length": 200}) + ct = await ds_ct.get_column_type("data", "posts", "title") + assert ct.name == "url" + assert ct.config == {"max_length": 200} + + +# --- Plugin registration --- + + +@pytest.mark.asyncio +async def test_builtin_column_types_registered(ds_ct): + """register_column_types returns classes; _column_types stores them by name.""" + await ds_ct.invoke_startup() + assert "url" in ds_ct._column_types + assert "email" in ds_ct._column_types + assert "json" in ds_ct._column_types + assert "nonexistent" not in ds_ct._column_types + + +@pytest.mark.asyncio +async def test_column_type_class_attributes(ds_ct): + await ds_ct.invoke_startup() + url_cls = ds_ct._column_types["url"] + assert url_cls.name == "url" + assert url_cls.description == "URL" + email_cls = ds_ct._column_types["email"] + assert email_cls.name == "email" + assert email_cls.description == "Email address" + + +# --- JSON API --- + + +@pytest.mark.asyncio +async def test_column_types_extra(ds_ct): + await ds_ct.invoke_startup() + response = await ds_ct.client.get("/data/posts.json?_extra=column_types") + assert response.status_code == 200 + data = response.json() + assert "column_types" in data + assert data["column_types"]["author_email"] == {"type": "email", "config": None} + assert data["column_types"]["website"] == {"type": "url", "config": None} + assert data["column_types"]["metadata"] == {"type": "json", "config": None} + # "markdown" is not a registered type, so body should not appear + assert "body" not in data["column_types"] + # title has no column type, should not appear + assert "title" not in data["column_types"] + + +@pytest.mark.asyncio +async def test_display_columns_include_column_type(ds_ct): + await ds_ct.invoke_startup() + response = await ds_ct.client.get("/data/posts.json?_extra=display_columns") + assert response.status_code == 200 + data = response.json() + cols = {c["name"]: c for c in data["display_columns"]} + assert cols["author_email"]["column_type"] == "email" + assert cols["author_email"]["column_type_config"] is None + assert cols["website"]["column_type"] == "url" + assert cols["title"]["column_type"] is None + + +# --- Rendering --- + + +@pytest.mark.asyncio +async def test_url_render_cell(ds_ct): + await ds_ct.invoke_startup() + response = await ds_ct.client.get("/data/posts.json?_extra=render_cell") + assert response.status_code == 200 + data = response.json() + rendered = data["render_cell"][0] + assert "href" in rendered["website"] + assert "https://example.com" in rendered["website"] + + +@pytest.mark.asyncio +async def test_email_render_cell(ds_ct): + await ds_ct.invoke_startup() + response = await ds_ct.client.get("/data/posts.json?_extra=render_cell") + assert response.status_code == 200 + data = response.json() + rendered = data["render_cell"][0] + assert "mailto:" in rendered["author_email"] + assert "test@example.com" in rendered["author_email"] + + +@pytest.mark.asyncio +async def test_json_render_cell(ds_ct): + await ds_ct.invoke_startup() + response = await ds_ct.client.get("/data/posts.json?_extra=render_cell") + assert response.status_code == 200 + data = response.json() + rendered = data["render_cell"][0] + assert "
" in rendered["metadata"]
+
+
+# --- Validation ---
+
+
+@pytest.mark.asyncio
+async def test_email_validation_on_insert(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/insert",
+        json={"row": {"title": "Test", "author_email": "not-an-email"}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 400
+    assert "author_email" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_email_validation_passes_valid(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/insert",
+        json={"row": {"title": "Test", "author_email": "valid@example.com"}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 201
+
+
+@pytest.mark.asyncio
+async def test_url_validation_on_insert(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/insert",
+        json={"row": {"title": "Test", "website": "not-a-url"}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 400
+    assert "website" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_json_validation_on_insert(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/insert",
+        json={"row": {"title": "Test", "metadata": "not-json{"}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 400
+    assert "metadata" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_validation_on_update(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/1/-/update",
+        json={"update": {"author_email": "invalid"}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 400
+    assert "author_email" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_validation_allows_null(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/insert",
+        json={"row": {"title": "Test", "author_email": None}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 201
+
+
+@pytest.mark.asyncio
+async def test_validation_allows_empty_string(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/insert",
+        json={"row": {"title": "Test", "author_email": ""}},
+        headers=_headers(token),
+    )
+    assert response.status_code == 201
+
+
+# --- ColumnType base class ---
+
+
+@pytest.mark.asyncio
+async def test_column_type_base_defaults():
+    class TestType(ColumnType):
+        name = "test"
+        description = "Test type"
+
+    ct = TestType()
+    assert ct.config is None
+    assert await ct.render_cell("val", "col", "tbl", "db", None, None) is None
+    assert await ct.validate("val", None) is None
+    assert await ct.transform_value("val", None) == "val"
+
+
+# --- render_cell extra with column types ---
+
+
+@pytest.mark.asyncio
+async def test_render_cell_extra_with_column_types(ds_ct):
+    await ds_ct.invoke_startup()
+    response = await ds_ct.client.get("/data/posts.json?_extra=render_cell")
+    assert response.status_code == 200
+    data = response.json()
+    rendered = data["render_cell"][0]
+    assert "mailto:" in rendered["author_email"]
+    assert "href" in rendered["website"]
+
+
+# --- Duplicate column type name ---
+
+
+@pytest.mark.asyncio
+async def test_duplicate_column_type_name_raises_error():
+    class DuplicateUrlType(ColumnType):
+        name = "url"
+        description = "Duplicate URL"
+
+        async def render_cell(self, value, column, table, database, datasette, request):
+            return None
+
+    class _Plugin:
+        @hookimpl
+        def register_column_types(self, datasette):
+            return [DuplicateUrlType]
+
+    plugin = _Plugin()
+    pm.register(plugin, name="test_duplicate_ct")
+    try:
+        ds = Datasette()
+        with pytest.raises(StartupError, match="Duplicate column type name: url"):
+            await ds.invoke_startup()
+    finally:
+        pm.unregister(plugin, name="test_duplicate_ct")
+
+
+# --- Row endpoint ---
+
+
+@pytest.mark.asyncio
+async def test_row_endpoint_render_cell_with_column_types(ds_ct):
+    await ds_ct.invoke_startup()
+    response = await ds_ct.client.get("/data/posts/1.json?_extra=render_cell")
+    assert response.status_code == 200
+    data = response.json()
+    rendered = data["render_cell"][0]
+    assert "mailto:" in rendered["author_email"]
+    assert "href" in rendered["website"]
+
+
+# --- transform_value in JSON output ---
+
+
+@pytest.mark.asyncio
+async def test_transform_value_in_json_output(tmp_path_factory):
+    """A column type with transform_value should modify rows in JSON API."""
+
+    class UpperColumnType(ColumnType):
+        name = "upper"
+        description = "Uppercase"
+
+        async def transform_value(self, value, datasette):
+            if isinstance(value, str):
+                return value.upper()
+            return value
+
+    class _Plugin:
+        @hookimpl
+        def register_column_types(self, datasette):
+            return [UpperColumnType]
+
+    plugin = _Plugin()
+    pm.register(plugin, name="test_transform_ct")
+    try:
+        db_directory = tmp_path_factory.mktemp("dbs")
+        db_path = str(db_directory / "data.db")
+        db = sqlite3.connect(str(db_path))
+        db.execute("vacuum")
+        db.execute("create table t (id integer primary key, name text)")
+        db.execute("insert into t values (1, 'hello')")
+        db.commit()
+        ds = Datasette(
+            [db_path],
+            config={
+                "databases": {
+                    "data": {"tables": {"t": {"column_types": {"name": "upper"}}}}
+                }
+            },
+        )
+        await ds.invoke_startup()
+        response = await ds.client.get("/data/t.json")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["rows"][0]["name"] == "HELLO"
+        db.close()
+        for database in ds.databases.values():
+            if not database.is_memory:
+                database.close()
+    finally:
+        pm.unregister(plugin, name="test_transform_ct")
+
+
+# --- Column type priority over plugins ---
+
+
+@pytest.mark.asyncio
+async def test_column_type_render_cell_has_priority_over_plugins(tmp_path_factory):
+    """Column type render_cell should take priority over render_cell plugin hook."""
+
+    class PriorityColumnType(ColumnType):
+        name = "priority_test"
+        description = "Priority test"
+
+        async def render_cell(self, value, column, table, database, datasette, request):
+            if value is not None:
+                return markupsafe.Markup(
+                    f"COLUMN_TYPE:{markupsafe.escape(value)}"
+                )
+            return None
+
+    class _ColumnTypePlugin:
+        @hookimpl
+        def register_column_types(self, datasette):
+            return [PriorityColumnType]
+
+    class _RenderCellPlugin:
+        @hookimpl
+        def render_cell(
+            self,
+            row,
+            value,
+            column,
+            table,
+            pks,
+            database,
+            datasette,
+            request,
+            column_type,
+        ):
+            if column == "name":
+                return markupsafe.Markup(f"PLUGIN:{markupsafe.escape(value)}")
+
+    ct_plugin = _ColumnTypePlugin()
+    rc_plugin = _RenderCellPlugin()
+    pm.register(ct_plugin, name="test_priority_ct")
+    pm.register(rc_plugin, name="test_priority_render")
+    try:
+        db_directory = tmp_path_factory.mktemp("dbs")
+        db_path = str(db_directory / "data.db")
+        db = sqlite3.connect(str(db_path))
+        db.execute("vacuum")
+        db.execute("create table t (id integer primary key, name text)")
+        db.execute("insert into t values (1, 'hello')")
+        db.commit()
+        ds = Datasette(
+            [db_path],
+            config={
+                "databases": {
+                    "data": {
+                        "tables": {"t": {"column_types": {"name": "priority_test"}}}
+                    }
+                }
+            },
+        )
+        await ds.invoke_startup()
+        response = await ds.client.get("/data/t.json?_extra=render_cell")
+        assert response.status_code == 200
+        data = response.json()
+        rendered = data["render_cell"][0]
+        # Column type should win over the plugin
+        assert "COLUMN_TYPE:" in rendered["name"]
+        assert "PLUGIN:" not in rendered["name"]
+        db.close()
+        for database in ds.databases.values():
+            if not database.is_memory:
+                database.close()
+    finally:
+        pm.unregister(ct_plugin, name="test_priority_ct")
+        pm.unregister(rc_plugin, name="test_priority_render")
+
+
+# --- Row detail page rendering ---
+
+
+@pytest.mark.asyncio
+async def test_row_detail_page_html_rendering(ds_ct):
+    """Row detail HTML page should use column type rendering."""
+    await ds_ct.invoke_startup()
+    response = await ds_ct.client.get("/data/posts/1")
+    assert response.status_code == 200
+    html = response.text
+    # The email column should be rendered with mailto: link
+    assert "mailto:test@example.com" in html
+    # The url column should be rendered with href
+    assert 'href="https://example.com"' in html
+
+
+# --- HTML table page rendering ---
+
+
+@pytest.mark.asyncio
+async def test_html_table_page_rendering(ds_ct):
+    """HTML table page should use column type rendering."""
+    await ds_ct.invoke_startup()
+    response = await ds_ct.client.get("/data/posts")
+    assert response.status_code == 200
+    html = response.text
+    assert "mailto:test@example.com" in html
+    assert 'href="https://example.com"' in html
+
+
+# --- Validation on upsert ---
+
+
+@pytest.mark.asyncio
+async def test_validation_on_upsert(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/upsert",
+        json={
+            "rows": [{"id": 1, "title": "Updated", "author_email": "invalid"}],
+        },
+        headers=_headers(token),
+    )
+    assert response.status_code == 400
+    assert "author_email" in response.json()["errors"][0]
+
+
+@pytest.mark.asyncio
+async def test_validation_on_upsert_passes_valid(ds_ct):
+    await ds_ct.invoke_startup()
+    token = write_token(ds_ct)
+    response = await ds_ct.client.post(
+        "/data/posts/-/upsert",
+        json={
+            "rows": [{"id": 1, "title": "Updated", "author_email": "valid@test.com"}],
+        },
+        headers=_headers(token),
+    )
+    assert response.status_code == 200
+
+
+# --- Unknown type warning logged ---
+
+
+@pytest.mark.asyncio
+async def test_unknown_type_warning_logged(tmp_path_factory, caplog):
+    db_directory = tmp_path_factory.mktemp("dbs")
+    db_path = str(db_directory / "data.db")
+    db = sqlite3.connect(str(db_path))
+    db.execute("vacuum")
+    db.execute("create table t (id integer primary key, col text)")
+    db.commit()
+    ds = Datasette(
+        [db_path],
+        config={
+            "databases": {
+                "data": {"tables": {"t": {"column_types": {"col": "nonexistent_type"}}}}
+            }
+        },
+    )
+    with caplog.at_level(logging.WARNING):
+        await ds.invoke_startup()
+    assert "unknown type" in caplog.text.lower()
+    assert "nonexistent_type" in caplog.text
+    db.close()
+    for database in ds.databases.values():
+        if not database.is_memory:
+            database.close()
+
+
+# --- Config overwrites on restart ---
+
+
+@pytest.mark.asyncio
+async def test_config_overwrites_on_restart(tmp_path_factory):
+    """Config values should overwrite any existing column types in internal DB on startup."""
+    db_directory = tmp_path_factory.mktemp("dbs")
+    db_path = str(db_directory / "data.db")
+    db = sqlite3.connect(str(db_path))
+    db.execute("vacuum")
+    db.execute("create table t (id integer primary key, col text)")
+    db.commit()
+    ds = Datasette(
+        [db_path],
+        config={
+            "databases": {"data": {"tables": {"t": {"column_types": {"col": "email"}}}}}
+        },
+    )
+    await ds.invoke_startup()
+    ct = await ds.get_column_type("data", "t", "col")
+    assert ct.name == "email"
+
+    # Manually change the column type in the internal DB
+    await ds.set_column_type("data", "t", "col", "url")
+    ct = await ds.get_column_type("data", "t", "col")
+    assert ct.name == "url"
+
+    # Re-apply config (simulating what happens on restart)
+    await ds._apply_column_types_config()
+    ct = await ds.get_column_type("data", "t", "col")
+    assert ct.name == "email"  # Config wins
+
+    db.close()
+    for database in ds.databases.values():
+        if not database.is_memory:
+            database.close()
+
+
+# --- No column_types in config ---
+
+
+@pytest.mark.asyncio
+async def test_no_column_types_in_config(tmp_path_factory):
+    """Datasette should work fine without any column_types configuration."""
+    db_directory = tmp_path_factory.mktemp("dbs")
+    db_path = str(db_directory / "data.db")
+    db = sqlite3.connect(str(db_path))
+    db.execute("vacuum")
+    db.execute("create table t (id integer primary key, col text)")
+    db.execute("insert into t values (1, 'hello')")
+    db.commit()
+    ds = Datasette([db_path])
+    await ds.invoke_startup()
+
+    # No column types assigned
+    ct_map = await ds.get_column_types("data", "t")
+    assert ct_map == {}
+
+    # JSON endpoint should work without column_types extra
+    response = await ds.client.get("/data/t.json")
+    assert response.status_code == 200
+    assert response.json()["rows"][0]["col"] == "hello"
+
+    # column_types extra should return empty
+    response = await ds.client.get("/data/t.json?_extra=column_types")
+    assert response.status_code == 200
+    assert response.json()["column_types"] == {}
+
+    db.close()
+    for database in ds.databases.values():
+        if not database.is_memory:
+            database.close()
diff --git a/tests/test_plugins.py b/tests/test_plugins.py
index f2a47ab442..47d727f240 100644
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@@ -1948,3 +1948,14 @@ def test_metadata_plugin_config_treated_as_config(
     assert "plugins" not in actual_metadata
     assert actual_metadata == expected_metadata
     assert ds.config == expected_config
+
+
+@pytest.mark.asyncio
+async def test_hook_register_column_types():
+    ds = Datasette()
+    await ds.invoke_startup()
+    # Built-in column types should be registered
+    assert "url" in ds._column_types
+    assert "email" in ds._column_types
+    assert "json" in ds._column_types
+    assert "nonexistent" not in ds._column_types