diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index d8ae9b22e..c833f677a 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -127,6 +127,8 @@ jobs: COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} SQL1_USER: ${{secrets.SQL1_USER}} SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} + AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} + AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} run: | # run only test with databricks. run in 6 parallel jobs pytest -n 6 --cov-report term-missing --cov=feathr_project/feathr feathr_project/test --cov-config=.github/workflows/.coveragerc_db --cov-fail-under=75 @@ -196,6 +198,8 @@ jobs: COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} SQL1_USER: ${{secrets.SQL1_USER}} SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} + AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} + AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} run: | # skip databricks related test as we just ran the test; also seperate databricks and synapse test to make sure there's no write conflict # run in 6 parallel jobs to make the time shorter diff --git a/docs/how-to-guides/feathr-configuration-and-env.md b/docs/how-to-guides/feathr-configuration-and-env.md index c5e42d925..0a14c0b18 100644 --- a/docs/how-to-guides/feathr-configuration-and-env.md +++ b/docs/how-to-guides/feathr-configuration-and-env.md @@ -27,8 +27,60 @@ This allows end users to store the configuration in a secure way, say in Kuberne Feathr will get the configurations in the following order: 1. If the key is set in the environment variable, Feathr will use the value of that environment variable -2. If it's not set in the environment, then a value is retrieved from the feathr_config.yaml file with the same config key. -3. If it's not available in the feathr_config.yaml file, Feathr will try to retrieve the value from a key vault service. Currently only Azure Key Vault is supported. +2. If it's not set in the environment, then a value is retrieved from the `feathr_config.yaml` file with the same config key. +3. If it's not available in the `feathr_config.yaml` file, Feathr will try to retrieve the value from a key vault service. Currently both Azure Key Vault and AWS Secrets Manager are supported. + +# Using Secret Management Service in Feathr + +Feathr supports using a Secret Management service for all the credentials and environment variables. Currently the supported secret management services are Azure Key Vault and AWS Secrets Manager. + +In order to use those secret management service, there are two steps: + +Step 1: Tell Feathr which secret management service to use, and what is the corresponding namespace. + +If using Azure Key Vault: +```yaml +secrets: + azure_key_vault: + name: feathrazuretest3-kv +``` + +If using AWS Secret Manager, users should put the corresponding secret_id in the `feathr_config.yaml` section, like below, so that Feathr knows which secret_id to use to retrieve the required credentials. +```yaml +secrets: + aws_secrets_manager: + secret_id: feathrsecret_namespace +``` + +Step 2: Initialize a secret management client and pass it to Feathr. + +For Azure Key Vault: +```python +from azure.keyvault.secrets import SecretClient +secret_client = SecretClient( + vault_url = f"https://.vault.azure.net", + credential=DefaultAzureCredential() + ) +feathr_client = FeathrClient(..., secret_manager_client = secret_client) +``` + +For AWS Secrets Manager, users need to create a SecretCache object and pass it to Feathr client, like below: +```python +import botocore +import botocore.session +from aws_secretsmanager_caching import SecretCache, SecretCacheConfig + +client = botocore.session.get_session().create_client( + service_name='secretsmanager', + aws_access_key_id = '', + aws_secret_access_key= '', + region_name=region_name +) +cache_config = SecretCacheConfig() +cache = SecretCache( config = cache_config, client = client) +feathr_client = FeathrClient(..., secret_manager_client = cache) + +``` # A list of environment variables that Feathr uses when running Spark job diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index 1be4238ea..4797e4cc7 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -65,6 +65,7 @@ def __init__( local_workspace_dir: str = None, credential: Any = None, project_registry_tag: Dict[str, str] = None, + secret_manager_client = None ): """Initialize Feathr Client. Configuration values used by the Feathr are evaluated in the following precedence, with items higher on the list taking priority. @@ -77,12 +78,13 @@ def __init__( local_workspace_dir (optional): Set where is the local work space dir. If not set, Feathr will create a temporary folder to store local workspace related files. credential (optional): Azure credential to access cloud resources, most likely to be the returned result of DefaultAzureCredential(). If not set, Feathr will initialize DefaultAzureCredential() inside the __init__ function to get credentials. project_registry_tag (optional): Adding tags for project in Feathr registry. This might be useful if you want to tag your project as deprecated, or allow certain customizations on project level. Default is empty + secret_manager_client: the secret manager client initialized outside of Feathr. End users need to initialize the secret manager outside of Feathr and pass it to Feathr so Feathr can use it to get required secrets. """ self.logger = logging.getLogger(__name__) # Redis key separator self._KEY_SEPARATOR = ':' self._COMPOSITE_KEY_SEPARATOR = '#' - self.env_config = EnvConfigReader(config_path=config_path) + self.env_config = EnvConfigReader(config_path=config_path, secret_manager_client=None) if local_workspace_dir: self.local_workspace_dir = local_workspace_dir else: @@ -215,17 +217,6 @@ def __init__( logger.info(f"Feathr client {get_version()} initialized successfully.") - def _check_required_environment_variables_exist(self): - """Checks if the required environment variables(form feathr_config.yaml) is set. - - Some required information has to be set via environment variables so the client can work. - """ - props = self.secret_names - for required_field in (self.required_fields + props): - if required_field not in os.environ: - raise RuntimeError(f'{required_field} is not set in environment variable. All required environment ' - f'variables are: {self.required_fields}.') - def register_features(self, from_context: bool = True): """Registers features based on the current workspace @@ -487,7 +478,8 @@ def _construct_redis_client(self): host = self.redis_host port = self.redis_port ssl_enabled = self.redis_ssl_enabled - self.redis_client = redis.Redis( + + self.self.redis_client = redis.Redis( host=host, port=port, password=password, @@ -676,7 +668,7 @@ def monitor_features(self, settings: MonitoringSettings, execution_configuration # Should search in both 'derived_feature_list' and 'anchor_list' # Return related keys(key_column list) or None if cannot find the feature def _get_feature_key(self, feature_name: str): - features = [] + features: List[FeatureBase] = [] if 'derived_feature_list' in dir(self): features += self.derived_feature_list if 'anchor_list' in dir(self): diff --git a/feathr_project/feathr/secrets/abc.py b/feathr_project/feathr/secrets/abc.py new file mode 100644 index 000000000..fae681265 --- /dev/null +++ b/feathr_project/feathr/secrets/abc.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod + +from typing import Any, Dict, List, Optional, Tuple + + +class FeathrSecretsManagementClient(ABC): + """This is the abstract class for all the secrets management service, which are used to store the credentials that Feathr might use. + """ + + @abstractmethod + def __init__(self, secret_namespace: str, secret_client) -> None: + """Initialize the FeathrSecretsManagementClient class. + + Args: + secret_namespace (str): a namespace that Feathr needs to get secrets from. + For Azure Key Vault, it is something like the key vault name. + For AWS secrets manager, it is something like a secret name. + + secret_client: A client that will be used to retrieve Feathr secrets. + """ + pass + + @abstractmethod + def get_feathr_secret(self, secret_name: str) -> str: + """Get Feathr Secrets from a certain secret management service, such as Azure Key Vault or AWS Secrets Manager. + + Returns: + str: returned secret from secret management service + """ + pass diff --git a/feathr_project/feathr/secrets/akv_client.py b/feathr_project/feathr/secrets/akv_client.py index cdec01e12..d8baf7bcc 100644 --- a/feathr_project/feathr/secrets/akv_client.py +++ b/feathr_project/feathr/secrets/akv_client.py @@ -1,31 +1,37 @@ from azure.keyvault.secrets import SecretClient -from azure.identity import DefaultAzureCredential from loguru import logger from azure.core.exceptions import ResourceNotFoundError +from feathr.secrets.abc import FeathrSecretsManagementClient -class AzureKeyVaultClient: - def __init__(self, akv_name: str): - self.akv_name = akv_name - self.secret_client = None - def get_feathr_akv_secret(self, secret_name: str): +class AzureKeyVaultClient(FeathrSecretsManagementClient): + def __init__(self, secret_namespace: str, secret_client: SecretClient = None): + """Initializes the AzureKeyVaultClient. Note that `secret_namespace` is not used, since the namespace information will be included in secret_client. + """ + self.secret_client = secret_client + if self.secret_client is not None and not isinstance(secret_client, SecretClient): + raise RuntimeError( + "You need to pass an azure.keyvault.secrets.SecretClient instance.") + + def get_feathr_secret(self, secret_name: str) -> str: """Get Feathr Secrets from Azure Key Vault. Note that this function will replace '_' in `secret_name` with '-' since Azure Key Vault doesn't support it Returns: - _type_: _description_ + str: returned secret from secret management service """ if self.secret_client is None: - self.secret_client = SecretClient( - vault_url = f"https://{self.akv_name}.vault.azure.net", - credential=DefaultAzureCredential() - ) + raise RuntimeError("You need to pass an azure.keyvault.secrets.SecretClient instance when initializing FeathrClient.") + try: # replace '_' with '-' since Azure Key Vault doesn't support it - variable_replaced = secret_name.replace('_','-') #.upper() - logger.info('Fetching the secret {} from Key Vault {}.', variable_replaced, self.akv_name) + variable_replaced = secret_name.replace('_', '-') # .upper() + logger.info('Fetching the secret {} from Key Vault {}.', + variable_replaced, self.secret_client.vault_url) secret = self.secret_client.get_secret(variable_replaced) - logger.info('Secret {} fetched from Key Vault {}.', variable_replaced, self.akv_name) + logger.info('Secret {} fetched from Key Vault {}.', + variable_replaced, self.secret_client.vault_url) return secret.value - except ResourceNotFoundError as e: - logger.error(f"Secret {secret_name} cannot be found in Key Vault {self.akv_name}.") - raise \ No newline at end of file + except ResourceNotFoundError: + logger.error( + f"Secret {secret_name} cannot be found in Key Vault {self.secret_client.vault_url}.") + raise diff --git a/feathr_project/feathr/secrets/aws_secretmanager.py b/feathr_project/feathr/secrets/aws_secretmanager.py new file mode 100644 index 000000000..feb6586f9 --- /dev/null +++ b/feathr_project/feathr/secrets/aws_secretmanager.py @@ -0,0 +1,33 @@ +from loguru import logger +import json +from feathr.secrets.abc import FeathrSecretsManagementClient +from aws_secretsmanager_caching.secret_cache import SecretCache + + +class AWSSecretManagerClient(FeathrSecretsManagementClient): + def __init__(self, secret_namespace: str = None, secret_client: SecretCache = None): + self.secret_id = secret_namespace + self.secret_client = secret_client + # make sure secret_client is a SecretCache type + if secret_client is not None and not isinstance(secret_client, SecretCache): + raise RuntimeError( + "You need to pass a aws_secretsmanager_caching.secret_cache.SecretCache instance. Please refer to https://docs.aws.amazon.com/secretsmanager/latest/userguide/retrieving-secrets_cache-python.html for more details.") + + def get_feathr_secret(self, secret_name: str): + """Get Feathr Secrets from AWS Secrets manager. It's also recommended that the client passes a cache objects to reduce cost. + See more details here: https://docs.aws.amazon.com/secretsmanager/latest/userguide/retrieving-secrets_cache-python.html + """ + if self.secret_client is None: + raise RuntimeError( + "You need to pass a aws_secretsmanager_caching.secret_cache.SecretCache instance when initializing FeathrClient.") + + try: + get_secret_value_response = self.secret_client.get_secret_string( + self.secret_id) + # result is in str format, so we need to load it as a dict + secret = json.loads(get_secret_value_response) + return secret[secret_name] + except KeyError as e: + logger.error( + f"Secret {secret_name} cannot be found in secretsmanager {self.secret_id}.") + raise e diff --git a/feathr_project/feathr/spark_provider/_abc.py b/feathr_project/feathr/spark_provider/_abc.py index c91fdf5c1..e82b42353 100644 --- a/feathr_project/feathr/spark_provider/_abc.py +++ b/feathr_project/feathr/spark_provider/_abc.py @@ -2,6 +2,7 @@ from typing import Dict, List, Optional, Tuple + class SparkJobLauncher(ABC): """This is the abstract class for all the spark launchers. All the Spark launcher should implement those interfaces """ @@ -52,8 +53,5 @@ def get_status(self) -> str: Returns: str: Status of the current job - - Returns: - str: _description_ """ pass diff --git a/feathr_project/feathr/utils/_envvariableutil.py b/feathr_project/feathr/utils/_envvariableutil.py new file mode 100644 index 000000000..9bd610d37 --- /dev/null +++ b/feathr_project/feathr/utils/_envvariableutil.py @@ -0,0 +1,111 @@ +import os +import yaml +from loguru import logger +from feathr.secrets.akv_client import AzureKeyVaultClient +from azure.core.exceptions import ResourceNotFoundError +from feathr.secrets.aws_secretmanager import AWSSecretManagerClient + +class _EnvVaraibleUtil(object): + def __init__(self, config_path: str, secret_manager_client = None): + """Initialize the environment variable utils client + + Args: + config_path (str): configuration path, if users want to use YAML to load all the configs + secret_manager_client: the secret manager client type. currently only Azure key vault and AWS secret manager is supported. + """ + self.config_path = config_path + # Set to none first to avoid invalid reference + self.secret_manager_client = None + if secret_manager_client and self.get_environment_variable_with_default('secrets', 'azure_key_vault', 'name'): + self.secret_manager_client = AzureKeyVaultClient( + secret_namespace=self.get_environment_variable_with_default('secrets', 'azure_key_vault', 'name'), + secret_client=secret_manager_client) + elif secret_manager_client and self.get_environment_variable_with_default('secrets', 'aws_secrets_manager', 'secret_id'): + self.secret_manager_client = AWSSecretManagerClient( + secret_namespace=self.get_environment_variable_with_default('secrets', 'aws_secrets_manager', 'secret_id'), + secret_client=secret_manager_client) + + def get_environment_variable_with_default(self, *args): + """Gets the environment variable for the variable key. + Args: + *args: list of keys in feathr_config.yaml file + Return: + A environment variable for the variable key. It will retrieve the value of the environment variables in the following order: + If the key is set in the environment variable, Feathr will use the value of that environment variable + If it's not set in the environment, then a default is retrieved from the feathr_config.yaml file with the same config key. + If it's not available in the feathr_config.yaml file, Feathr will try to retrieve the value from key vault + If not found, an empty string will be returned with a warning error message. + """ + + # if envs exist, just return the existing env variable without reading the file + env_keyword = "__".join(args) + upper_env_keyword = env_keyword.upper() + # make it work for lower case and upper case. + env_variable = os.environ.get( + env_keyword, os.environ.get(upper_env_keyword)) + + # If the key is set in the environment variable, Feathr will use the value of that environment variable + if env_variable: + return env_variable + + # If it's not set in the environment, then a default is retrieved from the feathr_config.yaml file with the same config key. + if os.path.exists(os.path.abspath(self.config_path)): + with open(os.path.abspath(self.config_path), 'r') as stream: + try: + yaml_config = yaml.safe_load(stream) + # concat all layers and check in environment variable + yaml_layer = yaml_config + + # resolve one layer after another + for arg in args: + yaml_layer = yaml_layer[arg] + return yaml_layer + except KeyError as exc: + logger.info( + "{} not found in the config file.", env_keyword) + except yaml.YAMLError as exc: + logger.warning(exc) + + # If it's not available in the feathr_config.yaml file, Feathr will try to retrieve the value from key vault + if self.secret_manager_client: + try: + return self.secret_manager_client.get_feathr_secret(env_keyword) + except ResourceNotFoundError: + # print out warning message if cannot find the env variable in all the resources + logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', env_keyword) + return None + except KeyError: + # print out warning message if cannot find the env variable in all the resources + logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', env_keyword) + return None + + def get_environment_variable(self, variable_key): + """Gets the environment variable for the variable key. + + Args: + variable_key: environment variable key that is used to retrieve the environment variable + Return: + A environment variable for the variable key. It will retrieve the value of the environment variables in the following order: + If the key is set in the environment variable, Feathr will use the value of that environment variable + If it's not available in the environment variable file, Feathr will try to retrieve the value from key vault + If not found, an empty string will be returned with a warning error message. + """ + env_var_value = os.environ.get(variable_key) + + if env_var_value: + return env_var_value + + # If it's not available in the environment variable file, Feathr will try to retrieve the value from key vault + logger.info(variable_key + ' is not set in the environment variables.') + + if self.secret_manager_client: + try: + return self.secret_manager_client.get_feathr_secret(variable_key) + except ResourceNotFoundError: + # print out warning message if cannot find the env variable in all the resources + logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', variable_key) + return None + except KeyError: + # print out warning message if cannot find the env variable in all the resources + logger.warning('Environment variable {} not found in environment variable, default YAML config file, or key vault service.', variable_key) + return None diff --git a/feathr_project/setup.py b/feathr_project/setup.py index 89a0e805f..4ea1eb29d 100644 --- a/feathr_project/setup.py +++ b/feathr_project/setup.py @@ -94,7 +94,9 @@ # https://github.com/Azure/azure-sdk-for-python/pull/22891 # using a version lower than that to workaround this issue. "azure-core<=1.22.1", - # azure-core 1.22.1 is dependent on msrest==0.6.21, if an environment(AML) has a different version of azure-core (say 1.24.0), + "typing_extensions>=4.2.0", + "aws-secretsmanager-caching>=1.1.1.5", + # azure-core 1.22.1 is dependent on msrest==0.6.21, if an environment(AML) has a different version of azure-core (say 1.24.0), # it brings a different version of msrest(0.7.0) which is incompatible with azure-core==1.22.1. Hence we need to pin it. # See this for more details: https://github.com/Azure/azure-sdk-for-python/issues/24765 "msrest<=0.6.21", diff --git a/feathr_project/test/test_fixture.py b/feathr_project/test/test_fixture.py index ef088479b..81377a1e4 100644 --- a/feathr_project/test/test_fixture.py +++ b/feathr_project/test/test_fixture.py @@ -131,14 +131,14 @@ def conflicts_auto_correction_setup(config_path: str): return client -def composite_keys_test_setup(config_path: str): +def secret_test_setup(config_path: str, secret_manager_client): now = datetime.now() # set workspace folder by time; make sure we don't have write conflict if there are many CI tests running os.environ['SPARK_CONFIG__DATABRICKS__WORK_DIR'] = ''.join(['dbfs:/feathrazure_cijob','_', str(now.minute), '_', str(now.second), '_', str(now.microsecond)]) os.environ['SPARK_CONFIG__AZURE_SYNAPSE__WORKSPACE_DIR'] = ''.join(['abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_github_ci','_', str(now.minute), '_', str(now.second) ,'_', str(now.microsecond)]) - client = FeathrClient(config_path=config_path) + client = FeathrClient(config_path=config_path, secret_manager_client=secret_manager_client) batch_source = HdfsSource(name="nycTaxiBatchSource", path="wasbs://public@azurefeathrstorage.blob.core.windows.net/sample_data/green_tripdata_2020-04.csv", event_timestamp_column="lpep_dropoff_datetime", @@ -181,12 +181,8 @@ def composite_keys_test_setup(config_path: str): key_column_type=ValueType.INT32, description="location id in NYC", full_name="nyc_taxi.location_id") - pu_location_id = TypedKey(key_column="PULocationID", - key_column_type=ValueType.INT32, - description="location id in NYC", - full_name="nyc_taxi.location_id") agg_features = [Feature(name="f_location_avg_fare", - key=[location_id,pu_location_id], + key=location_id, feature_type=FLOAT, transform=WindowAggTransformation(agg_expr="cast_float(fare_amount)", agg_func="AVG", @@ -194,7 +190,7 @@ def composite_keys_test_setup(config_path: str): filter="fare_amount > 0" )), Feature(name="f_location_max_fare", - key=[location_id,pu_location_id], + key=location_id, feature_type=FLOAT, transform=WindowAggTransformation(agg_expr="cast_float(fare_amount)", agg_func="MAX", diff --git a/feathr_project/test/test_secrets_read.py b/feathr_project/test/test_secrets_read.py index 2e5916825..e61d3f667 100644 --- a/feathr_project/test/test_secrets_read.py +++ b/feathr_project/test/test_secrets_read.py @@ -1,68 +1,53 @@ import os -from datetime import datetime from pathlib import Path -from unittest import result - -from click.testing import CliRunner -from feathr import (BOOLEAN, FLOAT, INT32, FeatureQuery, ObservationSettings, - SparkExecutionConfiguration, TypedKey, ValueType) +from azure.identity import DefaultAzureCredential from feathr.client import FeathrClient -from feathr.utils.job_utils import get_result_df - -from test_fixture import basic_test_setup +from azure.keyvault.secrets import SecretClient +from test_fixture import secret_test_setup from feathr.constants import OUTPUT_FORMAT +import botocore +import botocore.session +from aws_secretsmanager_caching import SecretCache, SecretCacheConfig -# test parquet file read/write without an extension name -def test_feathr_get_secrets_from_key_vault(): + +def test_feathr_get_secrets_from_azure_key_vault(): """ - Test if the program can read the key vault secrets as expected + Test if the program can read azure key vault secrets as expected """ # TODO: need to test get_environment_variable() as well - os.environ['SECRETS__AZURE_KEY_VAULT__NAME'] = 'feathrazuretest3-kv' + test_workspace_dir = Path( + __file__).parent.resolve() / "test_user_workspace" + + secret_client = SecretClient( + # hard code the CI key vault endpoint + vault_url="https://feathrazuretest3-kv.vault.azure.net", + credential=DefaultAzureCredential( + exclude_cli_credential=False, exclude_interactive_browser_credential=False) + ) + client: FeathrClient = secret_test_setup(os.path.join( + test_workspace_dir, "feathr_config_secret_test_azure_key_vault.yaml"), secret_manager_client=secret_client) + + # `redis_host` should be read from secret management service since it's not available in the environment variable, and not in the config file, we expect we get it from azure key_vault + assert client.redis_host is not None - # the config below doesn't have `ONLINE_STORE__REDIS__HOST` for testing purpose - yaml_config = """ - project_config: - project_name: 'project_feathr_integration_test' - offline_store: - s3: - s3_enabled: true - s3_endpoint: 's3.amazonaws.com' - snowflake: - url: "dqllago-ol19457.snowflakecomputing.com" - user: "feathrintegration" - role: "ACCOUNTADMIN" - spark_config: - spark_cluster: 'databricks' - spark_result_output_parts: '1' - azure_synapse: - dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net' - pool_name: 'spark3' - workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' - executor_size: 'Small' - executor_num: 1 - databricks: - workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' - workspace_token_value: '' - config_template: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}' - work_dir: 'dbfs:/feathr_getting_started' - feathr_runtime_location: '' - online_store: - redis: - port: 6380 - ssl_enabled: True - feature_registry: - purview: - type_system_initialization: false - purview_name: 'feathrazuretest3-purview1' - delimiter: '__' +# test parquet file read/write without an extension name +def test_feathr_get_secrets_from_aws_secret_manager(): + """ + Test if the program can read AWS secret manager as expected """ + test_workspace_dir = Path( + __file__).parent.resolve() / "test_user_workspace" - with open("/tmp/feathr_config.yaml", "w") as text_file: - text_file.write(yaml_config) + client = botocore.session.get_session().create_client( + service_name='secretsmanager', + region_name="us-east-1" + ) + cache_config = SecretCacheConfig() + secret_cache = SecretCache(config=cache_config, client=client) - client = FeathrClient(config_path="/tmp/feathr_config.yaml") - # `redis_host` should be there since it's not available in the environment variable, and not in the config file, we expect we get it from azure key_vault - assert client.redis_host is not None + client: FeathrClient = secret_test_setup(os.path.join( + test_workspace_dir, "feathr_config_secret_test_aws_secret_manager.yaml"), secret_manager_client=secret_cache) + # `redis_host` should be read from secret management service since it's not available in the environment variable, and not in the config file, we expect we get it from azure key_vault + assert client.redis_host is not None diff --git a/feathr_project/test/test_user_workspace/feathr_config_secret_test_aws_secret_manager.yaml b/feathr_project/test/test_user_workspace/feathr_config_secret_test_aws_secret_manager.yaml new file mode 100644 index 000000000..4251f7d36 --- /dev/null +++ b/feathr_project/test/test_user_workspace/feathr_config_secret_test_aws_secret_manager.yaml @@ -0,0 +1,30 @@ +project_config: + project_name: "project_feathr_integration_test" +offline_store: + s3: + s3_enabled: false + adls: + adls_enabled: false + wasb: + wasb_enabled: false + jdbc: + jdbc_enabled: false + snowflake: + snowflake_enabled: false +spark_config: + spark_cluster: "local" + spark_result_output_parts: "1" + local: + feathr_runtime_location: None + workspace: None + master: None +online_store: + redis: + port: 6380 + ssl_enabled: True +secrets: + aws_secrets_manager: + secret_id: feathrsecretsmanagerci +feature_registry: + # The API endpoint of the registry service + api_endpoint: "https://feathr-sql-registry.azurewebsites.net/api/v1" diff --git a/feathr_project/test/test_user_workspace/feathr_config_secret_test_azure_key_vault.yaml b/feathr_project/test/test_user_workspace/feathr_config_secret_test_azure_key_vault.yaml new file mode 100644 index 000000000..a787edd90 --- /dev/null +++ b/feathr_project/test/test_user_workspace/feathr_config_secret_test_azure_key_vault.yaml @@ -0,0 +1,30 @@ +project_config: + project_name: "project_feathr_integration_test" +offline_store: + s3: + s3_enabled: false + adls: + adls_enabled: false + wasb: + wasb_enabled: false + jdbc: + jdbc_enabled: false + snowflake: + snowflake_enabled: false +spark_config: + spark_cluster: "local" + spark_result_output_parts: "1" + local: + feathr_runtime_location: None + workspace: None + master: None +online_store: + redis: + port: 6380 + ssl_enabled: True +secrets: + azure_key_vault: + name: feathrazuretest3-kv +feature_registry: + # The API endpoint of the registry service + api_endpoint: "https://feathr-sql-registry.azurewebsites.net/api/v1"