diff --git a/docs/root/dev/generate_aws_node_schema.ipynb b/docs/root/dev/generate_aws_node_schema.ipynb new file mode 100644 index 0000000000..21d04df2d5 --- /dev/null +++ b/docs/root/dev/generate_aws_node_schema.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a developer script to help quickly get started on onboarding a new NodeSchema from AWS.\n", + "It scans through the \"[service name]/[latest spec date]/service-2.json\" file in the botocore repo.\n", + "\n", + "For now, it can only generate a `CartographyNodeProperties`, and not yet a `CartographyRelProperties`.\n", + "\n", + "0. git clone git@github.com:boto/botocore.git to some path and set `BOTOCORE_REPO_PATH` to that path.\n", + "\n", + "1. Open ths notebook\n", + "\n", + "2. Get an idea of the name of the object you want to find.\n", + "\n", + " ```python\n", + " list_object_names('kms')\n", + " ...\n", + " KeyMetadata\n", + " ...\n", + " ```\n", + "\n", + "3. Generate the NodeSchema\n", + "\n", + " ```python\n", + " s = build_from_aws_service('kms', 'KeyMetadata')\n", + " print(s)\n", + " ```\n", + "\n", + "4. Make appropriate changes, such as an appropriate `id`, and choices for `extra_index` and `set_in_kwargs`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "This is a developer script to help quickly get started on onboarding a new NodeSchema from AWS.\n", + "It scans through the \"[service name]/[latest spec date]/service-2.json\" file in the botocore repo.\n", + "\n", + "For now, it can only generate a `CartographyNodeProperties`, and not yet a `CartographyRelProperties`.\n", + "\n", + "0. git clone git@github.com:boto/botocore.git to some path and set `BOTOCORE_REPO_PATH` to that path.\n", + "\n", + "1. Open ths notebook\n", + "\n", + "2. Get an idea of the name of the object you want to find.\n", + "\n", + "```python\n", + "list_object_names('kms')\n", + "...\n", + "KeyMetadata\n", + "...\n", + "```\n", + "\n", + "3. Generate the NodeSchema\n", + "\n", + "```\n", + "s = build_from_aws_service('kms', 'KeyMetadata')\n", + "print(s)\n", + "\n", + "4. Make appropriate changes, such as an appropriate `id`, and choices for `extra_index` and `set_in_kwargs`.\n", + "\"\"\"\n", + "\n", + "BOTOCORE_REPO_PATH = '/Users/myself/src/botocore'\n", + "\n", + "import json\n", + "from pprint import pprint\n", + "from typing import Dict, List\n", + "import re, os\n", + "ParsedMembers = Dict[str, str]\n", + "STRING_SHAPE = 'String'\n", + "\n", + "\n", + "def list_object_names(service_name: str) -> List[str]:\n", + " '''\n", + " List the objects retured by various List* Get* and Describe* calls.\n", + "\n", + " The current method is somewhat crude in that it looks directly at data['shapes'],\n", + " which includes shapes that we are not interested.\n", + " For example, data['shapes'] has 'DescribeRouteTablesRequest', 'DescribeRouteTablesResult', \n", + " and 'RouteTable', even though you may not be interested in the request and response structures.\n", + " So, we filter out shapes that end with either 'Request', 'Response', or 'Result'.\n", + "\n", + " A better way could be to reason throughn the data in a more intelligent way, digging continually\n", + " from the API method we're interested in.\n", + " \n", + " e.g.,\n", + " data['shapes']['DescribeRouteTables']['output']['shape'] -> 'DescribeRouteTablesResult',\n", + "\n", + " data['shapes']['DescribeRouteTablesResult']['type'] -> 'structure'\n", + " data['shapes']['DescribeRouteTablesResult']['members'].items()[0]['shape'] -> 'RouteTableList' # plural 'members'\n", + "\n", + " data['shapes']['RouteTableList']['type'] -> 'list'\n", + " data['shapes']['RouteTableList']['member'].items()[0]['shape'] -> 'RouteTable' # singular 'member'\n", + "\n", + " # finally, our target object\n", + " data['shapes']['RouteTable']['type'] -> 'structure'\n", + "\n", + " We may also do a kind of fuzzy searching. e.g., 'Key' -> 'KeyMetadata'.\n", + " '''\n", + " shapes = _get_object_shapes(service_name=service_name)\n", + " object_names: List[str] = []\n", + " for object_name, object in shapes.items():\n", + " if any([\n", + " object_name.endswith('Request'),\n", + " object_name.endswith('Response'),\n", + " object_name.endswith('Result'),\n", + " object_name.endswith('Exception'),\n", + " object['type'] != 'structure'\n", + " ]):\n", + " continue\n", + " object_names.append(object_name)\n", + " return object_names\n", + "\n", + "def build_from_aws_service(service_name: str, object_name: str) -> str:\n", + " '''\n", + " Build a NodeSchema file from a particular object in AWS.\n", + " Currently only supports string properties.\n", + " '''\n", + " shapes = _get_object_shapes(service_name=service_name)\n", + " object_shape: Dict[str, any] = shapes[object_name]\n", + " parsed_members = _parse_members(object_shape=object_shape)\n", + " out = _build_node_class(node_name=object_name, node_members=parsed_members)\n", + " return out\n", + "\n", + "def _get_object_shapes(service_name: str) -> Dict[str, any]:\n", + " '''\n", + " Gets all object shapes from the service descriptor file.\n", + " '''\n", + " service_descriptor = _load_service_descriptor(service_name=service_name)\n", + " shapes: Dict[str, any] = service_descriptor['shapes']\n", + " return shapes\n", + "\n", + "def _load_service_descriptor(service_name: str) -> Dict[str, any]:\n", + " all_specs = _list_folders('{botocore_path}/botocore/data/{service_name}'.format(\n", + " botocore_path=BOTOCORE_REPO_PATH,\n", + " service_name=service_name\n", + " ))\n", + " latest_spec = sorted(all_specs)[-1]\n", + " file_path = '{botocore_path}/botocore/data/{service_name}/{spec_date}/service-2.json'.format(\n", + " botocore_path=BOTOCORE_REPO_PATH,\n", + " service_name=service_name,\n", + " spec_date=latest_spec,\n", + " )\n", + " with open(file_path, 'r') as file:\n", + " data = json.load(file)\n", + " return data\n", + "\n", + "def _parse_members(object_shape: Dict[str, any]) -> ParsedMembers:\n", + " '''\n", + " Takes a shape and returns a Dict: snake_prop_name -> CamelPropertyRef\n", + " Currently only handles String type properties.\n", + " '''\n", + " parsed_members: ParsedMembers = dict()\n", + " for member_name, member_spec in object_shape['members'].items():\n", + " # if member_spec['shape'] != STRING_SHAPE:\n", + " # continue\n", + " prop_name = _camel_to_snake(member_name)\n", + " propertyRef = member_name\n", + " parsed_members[prop_name] = propertyRef\n", + " return parsed_members\n", + "\n", + "def _build_node_class(node_name: str, node_members: ParsedMembers) -> str:\n", + " class_str = '''class {node_name}NodeProperties(CartographyNodeProperties):\n", + " \"\"\"\n", + " Schema describing a {node_name}.\n", + " \"\"\"\n", + "{props}\n", + "''' \n", + " rendered_props = ''\n", + " # for prop_name, property_ref in node_members.items():\n", + " for prop_name in sorted(node_members.keys()):\n", + " property_ref = node_members[prop_name]\n", + " redered_prop = \" {prop_name}: PropertyRef = PropertyRef('{property_ref}')\".format(prop_name=prop_name, property_ref=property_ref)\n", + " rendered_props += redered_prop + \"\\n\"\n", + " out = class_str.format(node_name=node_name, props=rendered_props)\n", + " return out\n", + "\n", + "def _camel_to_snake(name: str) -> str:\n", + " '''\n", + " AI-generated function: \"RouteTableId\" -> \"route_table_id\"\n", + " '''\n", + " s1 = re.sub('(.)([A-Z][a-z]+)', r'\\1_\\2', name)\n", + " return re.sub('([a-z0-9])([A-Z])', r'\\1_\\2', s1).lower()\n", + "\n", + "def _list_folders(directory: str) -> List[str]:\n", + " '''\n", + " AI-generated: lists subfolders.\n", + " '''\n", + " return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['AliasListEntry',\n", + " 'CustomKeyStoresListEntry',\n", + " 'GrantConstraints',\n", + " 'GrantListEntry',\n", + " 'KeyListEntry',\n", + " 'KeyMetadata',\n", + " 'MultiRegionConfiguration',\n", + " 'MultiRegionKey',\n", + " 'RecipientInfo',\n", + " 'RotationsListEntry',\n", + " 'Tag',\n", + " 'XksKeyConfigurationType',\n", + " 'XksProxyAuthenticationCredentialType',\n", + " 'XksProxyConfigurationType']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list_object_names('kms')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class KeyMetadataNodeProperties(CartographyNodeProperties):\n", + " \"\"\"\n", + " Schema describing a KeyMetadata.\n", + " \"\"\"\n", + " arn: PropertyRef = PropertyRef('Arn')\n", + " aws_account_id: PropertyRef = PropertyRef('AWSAccountId')\n", + " cloud_hsm_cluster_id: PropertyRef = PropertyRef('CloudHsmClusterId')\n", + " creation_date: PropertyRef = PropertyRef('CreationDate')\n", + " custom_key_store_id: PropertyRef = PropertyRef('CustomKeyStoreId')\n", + " customer_master_key_spec: PropertyRef = PropertyRef('CustomerMasterKeySpec')\n", + " deletion_date: PropertyRef = PropertyRef('DeletionDate')\n", + " description: PropertyRef = PropertyRef('Description')\n", + " enabled: PropertyRef = PropertyRef('Enabled')\n", + " encryption_algorithms: PropertyRef = PropertyRef('EncryptionAlgorithms')\n", + " expiration_model: PropertyRef = PropertyRef('ExpirationModel')\n", + " key_agreement_algorithms: PropertyRef = PropertyRef('KeyAgreementAlgorithms')\n", + " key_id: PropertyRef = PropertyRef('KeyId')\n", + " key_manager: PropertyRef = PropertyRef('KeyManager')\n", + " key_spec: PropertyRef = PropertyRef('KeySpec')\n", + " key_state: PropertyRef = PropertyRef('KeyState')\n", + " key_usage: PropertyRef = PropertyRef('KeyUsage')\n", + " mac_algorithms: PropertyRef = PropertyRef('MacAlgorithms')\n", + " multi_region: PropertyRef = PropertyRef('MultiRegion')\n", + " multi_region_configuration: PropertyRef = PropertyRef('MultiRegionConfiguration')\n", + " origin: PropertyRef = PropertyRef('Origin')\n", + " pending_deletion_window_in_days: PropertyRef = PropertyRef('PendingDeletionWindowInDays')\n", + " signing_algorithms: PropertyRef = PropertyRef('SigningAlgorithms')\n", + " valid_to: PropertyRef = PropertyRef('ValidTo')\n", + " xks_key_configuration: PropertyRef = PropertyRef('XksKeyConfiguration')\n", + "\n", + "\n" + ] + } + ], + "source": [ + "s = build_from_aws_service('kms', 'KeyMetadata')\n", + "print(s)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/root/dev/writing-intel-modules.md b/docs/root/dev/writing-intel-modules.md index 7e0eef09fa..90178de5b4 100644 --- a/docs/root/dev/writing-intel-modules.md +++ b/docs/root/dev/writing-intel-modules.md @@ -11,6 +11,10 @@ running, testing, and linting your code there. To get started coding without reading this doc, just copy the structure of our [AWS EMR module](https://github.com/lyft/cartography/blob/master/cartography/intel/aws/emr.py) and use it as an example. For a longer written explanation of the "how" and "why", read on. +### AWS + +We have a [Jupyter notebook](./generate_aws_node_schema.ipynb) with some helper functions that can help you quickly get started with writing a new AWS intel module. See the docstring at the top for details. + ## Configuration and credential management ### Supplying credentials and arguments to your module