diff --git a/.gitignore b/.gitignore index bb11e147c..50dd40302 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,6 @@ uv.lock # pixi environments .pixi/* !.pixi/config.toml + +# copilot files +copilot-instructions.md diff --git a/tests/test_user_forensics.py b/tests/test_user_forensics.py new file mode 100644 index 000000000..8a4048083 --- /dev/null +++ b/tests/test_user_forensics.py @@ -0,0 +1,371 @@ +import asyncio +import os +import time +import uuid +from urllib.parse import urljoin, urlparse + +import numpy +import pytest +from minio import Minio +from minio.error import S3Error + +from tiled.access_control.access_tags import AccessTagsCompiler +from tiled.access_control.scopes import ALL_SCOPES +from tiled.catalog import in_memory +from tiled.client import Context, from_context +from tiled.server.app import build_app, build_app_from_config +from tiled.utils import sanitize_uri +from tiled.validation_registration import ValidationRegistry + +from .utils import enter_username_password + +arr = numpy.ones((5, 5)) + +server_config = { + "access_control": { + "access_policy": "tiled.access_control.access_policies:TagBasedAccessPolicy", + "args": { + "provider": "toy", + "tags_db": { + "uri": "file:compiled_tags_mem?mode=memory&cache=shared" # in-memory and shareable + }, + "access_tags_parser": "tiled.access_control.access_tags:AccessTagsParser", + }, + }, + "authentication": { + "tiled_admins": [{"provider": "toy", "id": "admin"}], + "allow_anonymous_access": True, + "secret_keys": ["SECRET"], + "providers": [ + { + "provider": "toy", + "authenticator": "tiled.authenticators:DictionaryAuthenticator", + "args": { + "users_to_passwords": { + "alice": "alice", + "bob": "bob", + "admin": "admin", + }, + }, + }, + ], + }, +} + +access_tag_config = { + "roles": { + "facility_admin": { + "scopes": [ + "read:data", + "read:metadata", + "write:data", + "write:metadata", + "delete:node", + "delete:revision", + "create:node", + "register", + ] + }, + }, + "tags": { + "alice_tag": { + "users": [ + { + "name": "alice", + "role": "facility_admin", + }, + { + "name": "bob", + "role": "facility_admin", + }, + ], + }, + }, + "tag_owners": { + "alice_tag": { + "users": [ + { + "name": "alice", + }, + { + "name": "bob", + }, + ], + }, + }, +} + +validation_registry = ValidationRegistry() +validation_registry.register("SomeSpec", lambda *args, **kwargs: None) + + +def group_parser(groupname): + return { + "physicists": ["alice", "bob"], + }[groupname] + + +@pytest.fixture(scope="module") +def compile_access_tags_db(): + access_tags_compiler = AccessTagsCompiler( + ALL_SCOPES, + access_tag_config, + {"uri": "file:compiled_tags_mem?mode=memory&cache=shared"}, + group_parser, + ) + + access_tags_compiler.load_tag_config() + access_tags_compiler.compile() + yield access_tags_compiler + access_tags_compiler.connection.close() + + +@pytest.fixture(scope="module") +def access_control_test_context_factory(tmpdir_module, compile_access_tags_db): + config = { + "trees": [ + { + "tree": "tiled.catalog:in_memory", + "args": { + "named_memory": "catalog_foo", + "writable_storage": str(tmpdir_module / "foo"), + "top_level_access_blob": {"tags": ["alice_tag"]}, + }, + "path": "/foo", + }, + { + "tree": "tiled.catalog:in_memory", + "args": { + "named_memory": "catalog_qux", + "writable_storage": str(tmpdir_module / "qux"), + "top_level_access_blob": {"tags": ["public"]}, + }, + "path": "/qux", + }, + ], + } + + config.update(server_config) + contexts = [] + clients = {} + + def _create_and_login_context(username, password=None, api_key=None): + if not any([password, api_key]): + raise ValueError("Please provide either 'password' or 'api_key' for auth") + + if client := clients.get(username, None): + return client + app = build_app_from_config(config) + context = Context.from_app( + app, uri=f"http://local-tiled-app-{username}/api/v1", api_key=api_key + ) + contexts.append(context) + client = from_context(context, remember_me=False) + clients[username] = client + if api_key is None: + with enter_username_password(username, password): + client.context.login(remember_me=False) + return client + + yield _create_and_login_context + + for context in contexts: + context.close() + + +@pytest.fixture +def tmp_minio_bucket(): + """Create a temporary MinIO bucket and clean it up after tests.""" + if uri := os.getenv("TILED_TEST_BUCKET"): + clean_uri, username, password = sanitize_uri(uri) + minio_client = Minio( + endpoint=urlparse(clean_uri).netloc, # e.g. only "localhost:9000" + access_key=username or "minioadmin", + secret_key=password or "minioadmin", + secure=False, + ) + + bucket_name = f"test-{uuid.uuid4().hex}" + minio_client.make_bucket(bucket_name=bucket_name) + + try: + yield urljoin(uri, "/" + bucket_name) # full URI with credentials + finally: + # Cleanup: remove all objects and delete the bucket + try: + objects = minio_client.list_objects( + bucket_name=bucket_name, recursive=True + ) + for obj in objects: + minio_client.remove_object( + bucket_name=bucket_name, object_name=obj.object_name + ) + minio_client.remove_bucket(bucket_name=bucket_name) + except S3Error as e: + print(f"Warning: failed to delete test bucket {bucket_name}: {e}") + + else: + yield None + + +@pytest.fixture +def tree(tmpdir, tmp_minio_bucket): + writable_storage = [f"duckdb:///{tmpdir / 'data.duckdb'}"] + + if tmp_minio_bucket: + writable_storage.append( + { + "provider": "s3", + "uri": tmp_minio_bucket, + "config": { + "virtual_hosted_style_request": False, + "client_options": {"allow_http": True}, + }, + } + ) + + writable_storage.append(f"file://localhost{str(tmpdir / 'data')}") + + return in_memory(writable_storage=writable_storage) + + +async def coro_test(c, keys): + child_node = await c.context.http_client.app.state.root_tree[ + keys[0] + ].lookup_adapter([keys[1]]) + return child_node + + +def test_created_and_updated_info_with_users(access_control_test_context_factory): + """ + Test that created_by and updated_by fields are correctly set + on node creation and metadata update. + """ + + alice_client = access_control_test_context_factory("alice", "alice") + bob_client = access_control_test_context_factory("bob", "bob") + + top = "foo" + for data in ["data_M"]: + # Create a new node and check created_by and updated_by + alice_client[top].write_array( + arr, + key=data, + metadata={"description": "initial"}, + access_tags=["alice_tag"], + ) + coro_obj = coro_test(alice_client, [top, data]) + result = asyncio.run(coro_obj) + # When the array is first created, created_by and updated_by should be the same + assert result.node.created_by == "alice" + assert result.node.updated_by == "alice" + assert result.node.time_created == result.node.time_updated + + time.sleep(1) # ensure time_updated is different + bob_client[top][data].replace_metadata(metadata={"description": "updated"}) + coro_obj = coro_test(bob_client, [top, data]) + result = asyncio.run(coro_obj) + # After Bob updates the metadata, updated_by should be Bob, created_by should remain Alice + assert result.node.created_by != result.node.updated_by + assert result.node.time_created != result.node.time_updated + + +def test_created_and_updated_info_with_service(access_control_test_context_factory): + """ + Test that created_by and updated_by fields are correctly set + when a service (non-user) creates or updates a node. + """ + + admin_client = access_control_test_context_factory("admin", "admin") + + top = "qux" + # Create a new Service Principal. It is identified by its UUID. + # It has no "identities"; it cannot log in with a password. + sp_create = admin_client.context.admin.create_service_principal("admin") + sp_apikey_info = admin_client.context.admin.create_api_key(sp_create["uuid"]) + # Create a client for the Service Principal using its API key + sp_create_client = access_control_test_context_factory( + sp_create["uuid"], api_key=sp_apikey_info["secret"] + ) + + # Create a new Service Principal for updating the node. + # It is identified by its UUID. It has no "identities"; it cannot log in with a password. + sp_update = admin_client.context.admin.create_service_principal("admin") + sp_update_apikey_info = admin_client.context.admin.create_api_key(sp_update["uuid"]) + # Create a client for the new Service Principal using its API key + sp_update_client = access_control_test_context_factory( + sp_update["uuid"], api_key=sp_update_apikey_info["secret"] + ) + + for data in ["data_N"]: + # Create a new node with the service principal and check created_by and updated_by + sp_create_client[top].write_array( + arr, key=data, metadata={"description": "initial"}, access_tags=["public"] + ) + + coro_obj = coro_test(sp_create_client, [top, data]) + result = asyncio.run(coro_obj) + + # When the array is first created by the service principal, + # created_by and updated_by should indicate the service principal + assert sp_create["uuid"] in result.node.created_by + assert sp_create["uuid"] in result.node.updated_by + assert result.node.time_created == result.node.time_updated + + time.sleep(1) # ensure time_updated is different + sp_update_client[top][data].replace_metadata( + metadata={"description": "updated"} + ) + coro_obj = coro_test(sp_update_client, [top, data]) + result = asyncio.run(coro_obj) + # After the service principal updates the metadata, + # updated_by should indicate the second service principal, + # created_by should indicate the first service principal + assert result.node.updated_by != result.node.created_by + assert sp_update["uuid"] in result.node.updated_by + assert result.node.time_created != result.node.time_updated + + +async def coro_test_anonymous(c, keys): + child_node = await c.context.http_client.app.state.root_tree.lookup_adapter( + [keys[0]] + ) + return child_node + + +def test_created_and_updated_info_with_anonymous(tree): + """ + Test that created_by and updated_by fields are correctly set + when an anonymous user creates or updates a node. + """ + + with Context.from_app( + build_app(tree, validation_registry=validation_registry) + ) as context: + client = from_context(context) + # client.create_container(top) + + for data in ["data_O"]: + client.write_array(arr, key=data, metadata={"description": "initial"}) + + coro_obj = coro_test_anonymous(client, [data]) + result = asyncio.run(coro_obj) + + # When the array is first created by the anonymous user, + # created_by and updated_by should be None + assert result.node.created_by is None + assert result.node.updated_by is None + assert result.node.time_created == result.node.time_updated + + time.sleep(1) # ensure time_updated is different + client[data].replace_metadata(metadata={"description": "updated"}) + + coro_obj = coro_test_anonymous(client, [data]) + result = asyncio.run(coro_obj) + + # After the anonymous user updates the metadata, + # created_by and updated_by should still be None + # and time_created should be different from time_updated + assert result.node.created_by is None + assert result.node.updated_by is None + assert result.node.time_created != result.node.time_updated diff --git a/tiled/catalog/adapter.py b/tiled/catalog/adapter.py index 0762160bd..77ed33677 100644 --- a/tiled/catalog/adapter.py +++ b/tiled/catalog/adapter.py @@ -654,10 +654,12 @@ async def create_node( specs=None, data_sources=None, access_blob=None, + created_by="", ): access_blob = access_blob or {} key = key or self.context.key_maker() data_sources = data_sources or [] + time_created = datetime.now() node = orm.Node( key=key, @@ -666,6 +668,10 @@ async def create_node( structure_family=structure_family, specs=specs or [], access_blob=access_blob, + time_created=time_created, + time_updated=time_created, + created_by=created_by, + updated_by=created_by, ) async with self.context.session() as db: # TODO Consider using nested transitions to ensure that @@ -1027,7 +1033,13 @@ async def delete_revision(self, number): await db.commit() async def replace_metadata( - self, metadata=None, specs=None, access_blob=None, *, drop_revision=False + self, + metadata=None, + specs=None, + access_blob=None, + updated_by="", + *, + drop_revision=False, ): values = {} if metadata is not None: @@ -1038,6 +1050,8 @@ async def replace_metadata( values["specs"] = specs if access_blob is not None: values["access_blob"] = access_blob + values["time_updated"] = datetime.now() + values["updated_by"] = updated_by async with self.context.session() as db: if not drop_revision: current = ( @@ -1063,6 +1077,8 @@ async def replace_metadata( access_blob=current.access_blob, node_id=current.id, revision_number=next_revision_number, + time_updated=current.time_updated, + updated_by=current.updated_by, ) db.add(revision) await db.execute( diff --git a/tiled/catalog/core.py b/tiled/catalog/core.py index e708c493e..afedabc62 100644 --- a/tiled/catalog/core.py +++ b/tiled/catalog/core.py @@ -21,6 +21,7 @@ "0b033e7fbe30", "83889e049ddc", "6825c778aa3c", + "8fd6ac88f2ec", ] REQUIRED_REVISION = ALL_REVISIONS[0] diff --git a/tiled/catalog/migrations/versions/8fd6ac88f2ec_add_created_by_and_updated_by.py b/tiled/catalog/migrations/versions/8fd6ac88f2ec_add_created_by_and_updated_by.py new file mode 100644 index 000000000..4ee301377 --- /dev/null +++ b/tiled/catalog/migrations/versions/8fd6ac88f2ec_add_created_by_and_updated_by.py @@ -0,0 +1,37 @@ +"""Add created_by and updated_by + +Revision ID: 8fd6ac88f2ec +Revises: dfbb7478c6bd +Create Date: 2026-03-11 14:36:52.488573 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy import String + +# revision identifiers, used by Alembic. +revision = "8fd6ac88f2ec" +down_revision = "dfbb7478c6bd" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "nodes", + sa.Column("created_by", String, nullable=True, server_default="NULL"), + ) + op.add_column( + "nodes", + sa.Column("updated_by", String, nullable=True, server_default="NULL"), + ) + op.add_column( + "revisions", + sa.Column("updated_by", String, nullable=True, server_default="NULL"), + ) + + +def downgrade(): + op.drop_column("nodes", "created_by") + op.drop_column("nodes", "updated_by") + op.drop_column("revisions", "updated_by") diff --git a/tiled/catalog/orm.py b/tiled/catalog/orm.py index 693c0f82f..3bd5b977d 100644 --- a/tiled/catalog/orm.py +++ b/tiled/catalog/orm.py @@ -9,6 +9,7 @@ ForeignKey, Index, Integer, + String, Table, Unicode, event, @@ -29,32 +30,7 @@ JSONVariant = JSON().with_variant(JSONB(), "postgresql") -class Timestamped: - """ - Mixin for providing timestamps of creation and update time. - - These are not used by application code, but they may be useful for - forensics. - """ - - time_created = Column(DateTime(timezone=False), server_default=func.now()) - time_updated = Column( - DateTime(timezone=False), onupdate=func.now(), server_default=func.now() - ) - - def __repr__(self): - return ( - f"{type(self).__name__}(" - + ", ".join( - f"{key}={value!r}" - for key, value in self.__dict__.items() - if not key.startswith("_") - ) - + ")" - ) - - -class Node(Timestamped, Base): +class Node(Base): """ This describes a single Node and sometimes inlines descriptions of all its children. """ @@ -76,6 +52,12 @@ class Node(Timestamped, Base): metadata_ = Column("metadata", JSONVariant, nullable=False) specs = Column(JSONVariant, nullable=False) access_blob = Column("access_blob", JSONVariant, nullable=False) + time_created = Column(DateTime(timezone=False), server_default=func.now()) + time_updated = Column( + DateTime(timezone=False), onupdate=func.now(), server_default=func.now() + ) + created_by = Column("created_by", String) + updated_by = Column("updated_by", String) data_sources = relationship( "DataSource", @@ -433,7 +415,7 @@ def create_virtual_table_fits5(target, connection, **kw): connection.execute(text(statement)) -class DataSource(Timestamped, Base): +class DataSource(Base): """ The describes how to open one or more file/blobs to extract data for a Node. @@ -463,6 +445,10 @@ class DataSource(Timestamped, Base): # This relates to the mutability of the data. management = Column(Enum(Management), nullable=False) structure_family = Column(Enum(StructureFamily), nullable=False) + time_created = Column(DateTime(timezone=False), server_default=func.now()) + time_updated = Column( + DateTime(timezone=False), onupdate=func.now(), server_default=func.now() + ) # many-to-one relationship to Structure structure: Mapped["Structure"] = relationship( @@ -504,7 +490,7 @@ class Structure(Base): structure = Column(JSONVariant, nullable=False) -class Asset(Timestamped, Base): +class Asset(Base): """ This tracks individual files/blobs. """ @@ -519,6 +505,10 @@ class Asset(Timestamped, Base): hash_type = Column(Unicode(63), nullable=True) hash_content = Column(Unicode(1023), nullable=True) size = Column(Integer, nullable=True) + time_created = Column(DateTime(timezone=False), server_default=func.now()) + time_updated = Column( + DateTime(timezone=False), onupdate=func.now(), server_default=func.now() + ) # # many-to-many relationship to Asset, bypassing the `Association` class data_sources: Mapped[List["DataSource"]] = relationship( @@ -532,7 +522,7 @@ class Asset(Timestamped, Base): ) -class Revision(Timestamped, Base): +class Revision(Base): """ This tracks history of metadata, specs, and access_blob supporting 'undo' functionality. """ @@ -551,6 +541,10 @@ class Revision(Timestamped, Base): metadata_ = Column("metadata", JSONVariant, nullable=False) specs = Column(JSONVariant, nullable=False) access_blob = Column("access_blob", JSONVariant, nullable=False) + time_updated = Column( + DateTime(timezone=False), onupdate=func.now(), server_default=func.now() + ) + updated_by = Column("updated_by", String) __table_args__ = ( UniqueConstraint( diff --git a/tiled/server/router.py b/tiled/server/router.py index fee73b651..e3b9c319b 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -1601,6 +1601,14 @@ async def _create_node( access_blob_modified = access_blob != {} access_blob = {} + if principal is None: + created_by = None + else: + if len(principal.identities) > 0: + created_by = principal.identities[0].id + else: + created_by = f"service:{principal.uuid}" + node = await entry.create_node( metadata=body.metadata, structure_family=body.structure_family, @@ -1608,6 +1616,7 @@ async def _create_node( specs=body.specs, data_sources=body.data_sources, access_blob=access_blob, + created_by=created_by, ) links = links_for_node( structure_family, structure, get_base_url(request), path + f"/{node.key}" @@ -2090,7 +2099,7 @@ async def patch_metadata( policy, "modify_node" ): try: - (access_blob_modified, access_blob) = await policy.modify_node( + access_blob_modified, access_blob = await policy.modify_node( entry, principal, authn_access_tags, authn_scopes, access_blob ) except ValueError as e: @@ -2103,10 +2112,19 @@ async def patch_metadata( access_blob_modified = access_blob != entry.access_blob access_blob = entry.access_blob + if principal is None: + updated_by = None + else: + if len(principal.identities) > 0: + updated_by = principal.identities[0].id + else: + updated_by = f"service:{principal.uuid}" + await entry.replace_metadata( metadata=metadata, specs=specs, access_blob=access_blob, + updated_by=updated_by, drop_revision=drop_revision, ) @@ -2166,7 +2184,7 @@ async def put_metadata( policy, "modify_node" ): try: - (access_blob_modified, access_blob) = await policy.modify_node( + access_blob_modified, access_blob = await policy.modify_node( entry, principal, authn_access_tags, authn_scopes, access_blob ) except ValueError as e: @@ -2179,10 +2197,19 @@ async def put_metadata( access_blob_modified = access_blob != entry.access_blob access_blob = entry.access_blob + if principal is None: + updated_by = None + else: + if len(principal.identities) > 0: + updated_by = principal.identities[0].id + else: + updated_by = f"service:{principal.uuid}" + await entry.replace_metadata( metadata=metadata, specs=specs, access_blob=access_blob, + updated_by=updated_by, drop_revision=drop_revision, )