diff --git a/frontend/amundsen_application/api/exceptions.py b/frontend/amundsen_application/api/exceptions.py index 2006671f88..91de9b9058 100644 --- a/frontend/amundsen_application/api/exceptions.py +++ b/frontend/amundsen_application/api/exceptions.py @@ -7,3 +7,10 @@ class MailClientNotImplemented(Exception): An exception when Mail Client is not implemented """ pass + + +class AuthorizationMappingMissingException(Exception): + """ + An exception raised when mapping from given request to required action is missing + """ + pass diff --git a/frontend/amundsen_application/api/metadata/v0.py b/frontend/amundsen_application/api/metadata/v0.py index 6006876231..4a2ef61a05 100644 --- a/frontend/amundsen_application/api/metadata/v0.py +++ b/frontend/amundsen_application/api/metadata/v0.py @@ -24,6 +24,8 @@ from amundsen_application.api.utils.request_utils import get_query_param, request_metadata from amundsen_application.api.utils.search_utils import execute_search_document_request +from amundsen_application.api.utils.authz_utils import get_required_action_from_request, \ + is_subject_authorized_to_perform_action_on_object LOGGER = logging.getLogger(__name__) @@ -138,8 +140,20 @@ def get_table_metadata() -> Response: list_item_index = request.args.get('index', None) list_item_source = request.args.get('source', None) - results_dict = _get_table_metadata(table_key=table_key, index=list_item_index, source=list_item_source) - return make_response(jsonify(results_dict), results_dict.get('status_code', HTTPStatus.INTERNAL_SERVER_ERROR)) + required_action_permission = get_required_action_from_request(request) + is_authorized = is_subject_authorized_to_perform_action_on_object( + user = app.config['AUTH_USER_METHOD'](app), + object_type = ResourceType.Table, + object_id = table_key, + required_action = required_action_permission, + ) + if is_authorized == True: + results_dict = _get_table_metadata(table_key=table_key, index=list_item_index, source=list_item_source) + return make_response(jsonify(results_dict), results_dict.get('status_code', HTTPStatus.INTERNAL_SERVER_ERROR)) + else: + message = "User is not authorized to access the resource" + return make_response(jsonify({'tableData': {}, 'msg': message}), HTTPStatus.FORBIDDEN) + except Exception as e: message = 'Encountered exception: ' + str(e) logging.exception(message) diff --git a/frontend/amundsen_application/api/utils/authz_utils.py b/frontend/amundsen_application/api/utils/authz_utils.py new file mode 100644 index 0000000000..ebaf62fd71 --- /dev/null +++ b/frontend/amundsen_application/api/utils/authz_utils.py @@ -0,0 +1,56 @@ +from flask import Request, current_app as app + +from amundsen_common.entity.resource_type import ResourceType +from amundsen_common.models.user import User +from amundsen_application.authz.actions.base import BaseAction +from amundsen_application.authz.clients.base import BaseClient +from amundsen_application.authz.mappers.base import BaseMapper +from amundsen_application.api.exceptions import AuthorizationMappingMissingException +from typing import Optional + +AUTHZ_CLIENT_INSTANCE = None + +def get_authz_client() -> Optional[BaseClient]: + global AUTHZ_CLIENT_INSTANCE + if app.config["AUTHORIZATION_ENABLED"] and app.config["AUTHORIZATION_CLIENT_CLASS"] is None: + raise Exception("Authorization client is not configured") + if app.config["AUTHORIZATION_ENABLED"] and AUTHZ_CLIENT_INSTANCE is None: + AUTHZ_CLIENT_INSTANCE = app.config["AUTHORIZATION_CLIENT_CLASS"]() + + return AUTHZ_CLIENT_INSTANCE + + +def get_required_action_from_request(request: Request) -> BaseAction: + request_to_action_mapper: BaseMapper = app.config["AUTHORIZATION_REQUEST_TO_ACTION_MAPPER"] + if app.config["AUTHORIZATION_ENABLED"] and request_to_action_mapper is None: + raise Exception("Request to action mapping is not configured") + + return request_to_action_mapper.get_mapping(request=request) + + +def is_subject_authorized_to_perform_action_on_object( + *, + user: User, + object_type: ResourceType, + object_id: str, + required_action: BaseAction) -> bool: + is_authorized = False + if app.config["AUTHORIZATION_ENABLED"] == False: + is_authorized = True + return is_authorized + else: + authz_client = get_authz_client() + if authz_client is None: + raise Exception("Can not get authorization client. Make sure that AUTHORIZATION_CLIENT_CLASS is set") + try: + is_authorized = authz_client.is_authorized( + user=user, + object_type=object_type, + object_id=object_id, + action=required_action, + ) + + except AuthorizationMappingMissingException as e: + is_authorized = app.config["AUTHORIZATION_ALLOW_ACCESS_ON_MISSING_MAPPING"] + + return is_authorized diff --git a/frontend/amundsen_application/authz/__init__.py b/frontend/amundsen_application/authz/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontend/amundsen_application/authz/actions/__init__.py b/frontend/amundsen_application/authz/actions/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontend/amundsen_application/authz/actions/base.py b/frontend/amundsen_application/authz/actions/base.py new file mode 100644 index 0000000000..3215c366db --- /dev/null +++ b/frontend/amundsen_application/authz/actions/base.py @@ -0,0 +1,11 @@ +from enum import Enum +from typing import Type + +class BaseAction(Enum): + pass + +def to_action(*, action_enum_cls: Type[BaseAction], label: str) -> Enum: + return action_enum_cls[label.title()] + +def to_label(*, action: BaseAction) -> str: + return action.name.lower() diff --git a/frontend/amundsen_application/authz/actions/rw_action.py b/frontend/amundsen_application/authz/actions/rw_action.py new file mode 100644 index 0000000000..e5ad22590f --- /dev/null +++ b/frontend/amundsen_application/authz/actions/rw_action.py @@ -0,0 +1,6 @@ +from amundsen_application.authz.actions.base import BaseAction +from enum import auto + +class RWAction(BaseAction): + READ = auto() + WRITE = auto() diff --git a/frontend/amundsen_application/authz/clients/__init__.py b/frontend/amundsen_application/authz/clients/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontend/amundsen_application/authz/clients/base.py b/frontend/amundsen_application/authz/clients/base.py new file mode 100644 index 0000000000..232d0d829e --- /dev/null +++ b/frontend/amundsen_application/authz/clients/base.py @@ -0,0 +1,20 @@ +from abc import ABCMeta, abstractmethod +from amundsen_common.models.user import User +from amundsen_common.entity.resource_type import ResourceType +from amundsen_application.authz.actions.base import (BaseAction) + +from enum import Enum, auto + + +class BaseClient(metaclass=ABCMeta): + """ + Base Client, which behaves like an interface for all + """ + + @abstractmethod + def is_authorized(self, *, user: User, object_type: ResourceType, object_id: str, action: BaseAction) -> bool: + pass + + """ + TODO - different methods - get_user_permissions, get_authorized_users, filter_search_request + """ diff --git a/frontend/amundsen_application/authz/clients/casbin_db_client.py b/frontend/amundsen_application/authz/clients/casbin_db_client.py new file mode 100644 index 0000000000..1113e92d0b --- /dev/null +++ b/frontend/amundsen_application/authz/clients/casbin_db_client.py @@ -0,0 +1,33 @@ +from amundsen_common.entity.resource_type import ResourceType, to_label as resource_type_to_label +from amundsen_application.authz.actions.base import BaseAction, to_label as action_to_label +from amundsen_application.authz.clients.base import BaseClient +from amundsen_common.models.user import User +import casbin_sqlalchemy_adapter +import casbin +import os +from sqlalchemy import create_engine + +class CasbinDbClient(BaseClient): + """ + WIP - Authorization Client that leverages Casbin as policy enforcer and persistent database as policy storage + """ + + def __init__(self) -> None: + db_url = os.getenv("CASBIN_MODEL_DATABASE_ENGINE_URL") + if db_url is None: + raise Exception("Casbin Database URL not specified. set url as 'CASBIN_MODEL_DATABASE_ENGINE_URL' env variable") + casbin_model_config_path = os.getenv("CASBIN_MODEL_CONFIG_PATH") + if casbin_model_config_path is None: + raise Exception("Casbin config file path not specified. Set path to the file as 'CASBIN_MODEL_CONFIG_PATH' env variable") + + self.engine = create_engine() + self.adapter = casbin_sqlalchemy_adapter.Adapter(self.engine) + self.enforcer = casbin.Enforcer(casbin_model_config_path, self.adapter) + + def is_authorized(self, *, user: User, object_type: ResourceType, object_id: str, action: BaseAction) -> bool: + return self.enforcer.enforce( + user.user_id, + resource_type_to_label(resource_type=object_type), + object_id, + action_to_label(action=action) + ) diff --git a/frontend/amundsen_application/authz/clients/casbin_example_csv_client.py b/frontend/amundsen_application/authz/clients/casbin_example_csv_client.py new file mode 100644 index 0000000000..072b980bb9 --- /dev/null +++ b/frontend/amundsen_application/authz/clients/casbin_example_csv_client.py @@ -0,0 +1,28 @@ +from amundsen_common.entity.resource_type import ResourceType, to_label as resource_type_to_label +from amundsen_application.authz.actions.base import BaseAction, to_label as action_to_label +from amundsen_application.authz.clients.base import BaseClient +from amundsen_common.models.user import User +import casbin +import os +import sys +import inspect + +class CasbinExampleCsvClient(BaseClient): + """ + Example implementation of Authorization Client using Casbin + """ + + def __init__(self) -> None: + script_dir = os.path.dirname(inspect.getfile(CasbinExampleCsvClient)) + base_path = os.path.join(script_dir, "casbin_example_csv_client") + policy_file = os.path.join(base_path, "policy.csv") + model_file = os.path.join(base_path, "model.conf") + self.enforcer = casbin.Enforcer(model_file, policy_file) + + def is_authorized(self, *, user: User, object_type: ResourceType, object_id: str, action: BaseAction) -> bool: + return self.enforcer.enforce( + user.user_id, + resource_type_to_label(resource_type=object_type), + object_id, + action_to_label(action=action) + ) diff --git a/frontend/amundsen_application/authz/clients/casbin_example_csv_client/model.conf b/frontend/amundsen_application/authz/clients/casbin_example_csv_client/model.conf new file mode 100644 index 0000000000..c50c284b52 --- /dev/null +++ b/frontend/amundsen_application/authz/clients/casbin_example_csv_client/model.conf @@ -0,0 +1,13 @@ +[request_definition] +r = sub,type, obj, act + +[policy_definition] +p = sub, type, obj, act + + +[policy_effect] +e = some(where (p.eft == allow)) + +[matchers] +# match subject (e.g. user), match action(e.g. read), regex match type (e.g table), regex match object (e.g. table id) +m = r.sub == p.sub && r.act == p.act && regexMatch(r.type, p.type) && regexMatch(r.obj, p.obj) diff --git a/frontend/amundsen_application/authz/clients/casbin_example_csv_client/policy.csv b/frontend/amundsen_application/authz/clients/casbin_example_csv_client/policy.csv new file mode 100644 index 0000000000..595e60bd35 --- /dev/null +++ b/frontend/amundsen_application/authz/clients/casbin_example_csv_client/policy.csv @@ -0,0 +1 @@ +p, test_user_id, table, hive://*, read diff --git a/frontend/amundsen_application/authz/mappers/__init__.py b/frontend/amundsen_application/authz/mappers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frontend/amundsen_application/authz/mappers/base.py b/frontend/amundsen_application/authz/mappers/base.py new file mode 100644 index 0000000000..e10100d91e --- /dev/null +++ b/frontend/amundsen_application/authz/mappers/base.py @@ -0,0 +1,20 @@ +from abc import ABCMeta, abstractmethod +from amundsen_application.authz.actions.base import BaseAction +from typing import Dict, Any +from flask import Request + +class BaseMapper(metaclass=ABCMeta): + """ + Base class for adding mappings between requests and actions + """ + @abstractmethod + def __init__(self) -> None: + self._mappings: Dict[Any, Any] = {} + + @abstractmethod + def add_mapping(self, required_action: BaseAction, **kwargs: Any) -> None: + pass + + @abstractmethod + def get_mapping(self, *, request: Request) -> BaseAction: + pass diff --git a/frontend/amundsen_application/authz/mappers/default_request_to_action_mapper.py b/frontend/amundsen_application/authz/mappers/default_request_to_action_mapper.py new file mode 100644 index 0000000000..97844ee388 --- /dev/null +++ b/frontend/amundsen_application/authz/mappers/default_request_to_action_mapper.py @@ -0,0 +1,42 @@ +from flask import Request +from typing import Dict, Any +from amundsen_application.authz.actions.base import BaseAction +from amundsen_application.authz.mappers.base import BaseMapper +from amundsen_application.api.exceptions import AuthorizationMappingMissingException + + +class DefaultRequestToActionMapper(BaseMapper): + """ + Reference implementation of mapper. + Given request context, checks blueprint and function used to process the request + and returns the corresponding action. + """ + def __init__(self) -> None: + self._mappings: Dict[str, Dict[str, BaseAction]] = {} + + def add_mapping(self, required_action: BaseAction, **kwargs: Any) -> None: + if not "blueprint_name" in kwargs: + raise Exception("Expected `blueprint_name` in keyword arguments") + if not "function_name" in kwargs: + raise Exception("Expected `function_name` in keyword arguments") + + blueprint_name = kwargs["blueprint_name"] + function_name = kwargs["function_name"] + self._mappings[blueprint_name] = self._mappings.get(blueprint_name, {}) + self._mappings[blueprint_name][function_name] = required_action + + def get_mapping(self, *, request: Request) -> BaseAction: + if not request.endpoint: + raise Exception( + "Unexpected error: Request do not contain an endpoint" + ) + blueprint_name, function_name = request.endpoint.split('.') + if blueprint_name not in self._mappings: + raise AuthorizationMappingMissingException( + f'Authorization mapping not specified for blueprint {blueprint_name}' + ) + if function_name not in self._mappings[blueprint_name]: + raise AuthorizationMappingMissingException( + f'Authorization mapping not specified for function {function_name} of blueprint {blueprint_name}' + ) + return self._mappings[blueprint_name][function_name] diff --git a/frontend/amundsen_application/authz_config.py b/frontend/amundsen_application/authz_config.py new file mode 100644 index 0000000000..6418041f69 --- /dev/null +++ b/frontend/amundsen_application/authz_config.py @@ -0,0 +1,32 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +from amundsen_application.authz.actions.rw_action import RWAction +from amundsen_application.authz.mappers.default_request_to_action_mapper import DefaultRequestToActionMapper +from amundsen_application.authz.clients.casbin_example_csv_client import CasbinExampleCsvClient + +AUTHORIZATION_ENABLED = True +AUTHORIZATION_CLIENT_CLASS = CasbinExampleCsvClient +AUTHORIZATION_REQUEST_TO_ACTION_MAPPER = DefaultRequestToActionMapper() +AUTHORIZATION_ACTION_ENUM = RWAction +AUTHORIZATION_ALLOW_ACCESS_ON_MISSING_MAPPING = True + + +# Subject accessing 'get_table_metadata' defined in blueprint 'metadata' +# has to have 'read' action allowed in order to access the table metadata +AUTHORIZATION_REQUEST_TO_ACTION_MAPPER.add_mapping( + blueprint_name="metadata", + function_name="get_table_metadata", + required_action=AUTHORIZATION_ACTION_ENUM.READ, +) + + +""" +# One can follow the same logic to add more mappings... + +AUTHORIZATION_REQUEST_TO_ACTION_MAPPER.add_mapping( + blueprint_name="metadata", + function_name="update_table_tags", + required_action=AUTHORIZATION_ACTION_ENUM.WRITE, +) +""" diff --git a/frontend/amundsen_application/config.py b/frontend/amundsen_application/config.py index 604b14ff32..c118315335 100644 --- a/frontend/amundsen_application/config.py +++ b/frontend/amundsen_application/config.py @@ -131,6 +131,7 @@ class Config: CREDENTIALS_MODE_ADMIN_PASSWORD = os.getenv('CREDENTIALS_MODE_ADMIN_PASSWORD', None) MODE_ORGANIZATION = None MODE_REPORT_URL_TEMPLATE = None + # Add Preview class name below to enable ACL, assuming it is supported by the Preview class # e.g: ACL_ENABLED_DASHBOARD_PREVIEW = {'ModePreview'} ACL_ENABLED_DASHBOARD_PREVIEW = set() # type: Set[Optional[str]] @@ -145,6 +146,13 @@ class Config: MTLS_CLIENT_KEY = os.getenv('MTLS_CLIENT_KEY') """Optional. The path to a PEM formatted key to use with the MTLS_CLIENT_CERT. MTLS_CLIENT_CERT must also be set.""" + from amundsen_application.authz_config import ( + AUTHORIZATION_ENABLED, + AUTHORIZATION_CLIENT_CLASS, + AUTHORIZATION_REQUEST_TO_ACTION_MAPPER, + AUTHORIZATION_ALLOW_ACCESS_ON_MISSING_MAPPING + ) + class LocalConfig(Config): DEBUG = False @@ -184,6 +192,7 @@ class LocalConfig(Config): class TestConfig(LocalConfig): POPULAR_RESOURCES_PERSONALIZATION = True AUTH_USER_METHOD = get_test_user + AUTHORIZATION_ENABLED = False NOTIFICATIONS_ENABLED = True ISSUE_TRACKER_URL = 'test_url' ISSUE_TRACKER_USER = 'test_user' diff --git a/frontend/tests/unit/api/metadata/test_v0.py b/frontend/tests/unit/api/metadata/test_v0.py index f0a1fecdfd..1daac90dd3 100644 --- a/frontend/tests/unit/api/metadata/test_v0.py +++ b/frontend/tests/unit/api/metadata/test_v0.py @@ -2,9 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 import json +from amundsen_application.authz.actions.base import BaseAction import responses import unittest -from unittest.mock import patch +from unittest.mock import patch, Mock from http import HTTPStatus @@ -524,6 +525,7 @@ def setUp(self) -> None: "url": "test_dashboard_url" } + @responses.activate def test_popular_resources_success(self) -> None: """ @@ -1415,3 +1417,57 @@ def test_update_feature_tags(self) -> None: } ) self.assertEqual(response.status_code, HTTPStatus.OK) + + @responses.activate + @patch("amundsen_application.api.metadata.v0.get_required_action_from_request") + @patch("amundsen_application.api.metadata.v0.is_subject_authorized_to_perform_action_on_object") + def test_authorization_success_on_get_table_metadata(self, mock_authorization_function: Mock, mock_mapping_function: Mock) -> None: + """ + Test that authorization function was called and that expected payload was returned + :return: + """ + mock_authorization_function.return_value = True + mock_mapping_function.return_value = BaseAction + url = local_app.config['METADATASERVICE_BASE'] + TABLE_ENDPOINT + '/db://cluster.schema/table' + responses.add(responses.GET, url, json=self.mock_metadata, status=HTTPStatus.OK) + with patch.dict(local_app.config, {'AUTHORIZATION_ENABLED': True}): + with local_app.test_client() as test: + response = test.get( + '/api/metadata/v0/table', + query_string=dict( + key='db://cluster.schema/table', + index='0', + source='test_source' + ) + ) + mock_authorization_function.assert_called() + mock_mapping_function.assert_called() + data = json.loads(response.data) + self.assertEqual(response.status_code, HTTPStatus.OK) + self.assertCountEqual(data.get('tableData'), self.expected_parsed_metadata) + + @patch("amundsen_application.api.metadata.v0.get_required_action_from_request") + @patch("amundsen_application.api.metadata.v0.is_subject_authorized_to_perform_action_on_object") + def test_authorization_failure_on_get_table_metadata(self, mock_authorization_function: Mock, mock_mapping_function: Mock) -> None: + """ + Test that authorization function was called and that it prevented reading the table metadata + :return: + """ + mock_authorization_function.return_value = False + mock_mapping_function.return_value = BaseAction + with patch.dict(local_app.config, {'AUTHORIZATION_ENABLED': True}): + with local_app.test_client() as test: + response = test.get( + '/api/metadata/v0/table', + query_string=dict( + key='db://cluster.schema/table', + index='0', + source='test_source' + ) + ) + data = json.loads(response.data) + mock_authorization_function.assert_called() + mock_mapping_function.assert_called() + self.assertEqual(response.status_code, HTTPStatus.FORBIDDEN) + self.assertEqual(data.get("msg"), "User is not authorized to access the resource") + self.assertCountEqual(data.get('tableData'), {}) diff --git a/requirements-common.txt b/requirements-common.txt index 1fb39f4c65..313ff37a28 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -25,3 +25,6 @@ statsd==3.2.1 typing==3.6.4 werkzeug==2.0.3 wheel==0.36.2 +wheel==0.36.2 +casbin==1.17.4 +casbin-sqlalchemy-adapter==0.5.0