Skip to content

feat: add generate_aws_node_schema.ipynb #1491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 301 additions & 0 deletions docs/root/dev/generate_aws_node_schema.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is a developer script to help quickly get started on onboarding a new NodeSchema from AWS.\n",
"It scans through the \"[service name]/[latest spec date]/service-2.json\" file in the botocore repo.\n",
"\n",
"For now, it can only generate a `CartographyNodeProperties`, and not yet a `CartographyRelProperties`.\n",
"\n",
"0. git clone [email protected]:boto/botocore.git to some path and set `BOTOCORE_REPO_PATH` to that path.\n",
"\n",
"1. Open ths notebook\n",
"\n",
"2. Get an idea of the name of the object you want to find.\n",
"\n",
" ```python\n",
" list_object_names('kms')\n",
" ...\n",
" KeyMetadata\n",
" ...\n",
" ```\n",
"\n",
"3. Generate the NodeSchema\n",
"\n",
" ```python\n",
" s = build_from_aws_service('kms', 'KeyMetadata')\n",
" print(s)\n",
" ```\n",
"\n",
"4. Make appropriate changes, such as an appropriate `id`, and choices for `extra_index` and `set_in_kwargs`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"This is a developer script to help quickly get started on onboarding a new NodeSchema from AWS.\n",
"It scans through the \"[service name]/[latest spec date]/service-2.json\" file in the botocore repo.\n",
"\n",
"For now, it can only generate a `CartographyNodeProperties`, and not yet a `CartographyRelProperties`.\n",
"\n",
"0. git clone [email protected]:boto/botocore.git to some path and set `BOTOCORE_REPO_PATH` to that path.\n",
"\n",
"1. Open ths notebook\n",
"\n",
"2. Get an idea of the name of the object you want to find.\n",
"\n",
"```python\n",
"list_object_names('kms')\n",
"...\n",
"KeyMetadata\n",
"...\n",
"```\n",
"\n",
"3. Generate the NodeSchema\n",
"\n",
"```\n",
"s = build_from_aws_service('kms', 'KeyMetadata')\n",
"print(s)\n",
"\n",
"4. Make appropriate changes, such as an appropriate `id`, and choices for `extra_index` and `set_in_kwargs`.\n",
"\"\"\"\n",
"\n",
"BOTOCORE_REPO_PATH = '/Users/myself/src/botocore'\n",
"\n",
"import json\n",
"from pprint import pprint\n",
"from typing import Dict, List\n",
"import re, os\n",
"ParsedMembers = Dict[str, str]\n",
"STRING_SHAPE = 'String'\n",
"\n",
"\n",
"def list_object_names(service_name: str) -> List[str]:\n",
" '''\n",
" List the objects retured by various List* Get* and Describe* calls.\n",
"\n",
" The current method is somewhat crude in that it looks directly at data['shapes'],\n",
" which includes shapes that we are not interested.\n",
" For example, data['shapes'] has 'DescribeRouteTablesRequest', 'DescribeRouteTablesResult', \n",
" and 'RouteTable', even though you may not be interested in the request and response structures.\n",
" So, we filter out shapes that end with either 'Request', 'Response', or 'Result'.\n",
"\n",
" A better way could be to reason throughn the data in a more intelligent way, digging continually\n",
" from the API method we're interested in.\n",
" \n",
" e.g.,\n",
" data['shapes']['DescribeRouteTables']['output']['shape'] -> 'DescribeRouteTablesResult',\n",
"\n",
" data['shapes']['DescribeRouteTablesResult']['type'] -> 'structure'\n",
" data['shapes']['DescribeRouteTablesResult']['members'].items()[0]['shape'] -> 'RouteTableList' # plural 'members'\n",
"\n",
" data['shapes']['RouteTableList']['type'] -> 'list'\n",
" data['shapes']['RouteTableList']['member'].items()[0]['shape'] -> 'RouteTable' # singular 'member'\n",
"\n",
" # finally, our target object\n",
" data['shapes']['RouteTable']['type'] -> 'structure'\n",
"\n",
" We may also do a kind of fuzzy searching. e.g., 'Key' -> 'KeyMetadata'.\n",
" '''\n",
" shapes = _get_object_shapes(service_name=service_name)\n",
" object_names: List[str] = []\n",
" for object_name, object in shapes.items():\n",
" if any([\n",
" object_name.endswith('Request'),\n",
" object_name.endswith('Response'),\n",
" object_name.endswith('Result'),\n",
" object_name.endswith('Exception'),\n",
" object['type'] != 'structure'\n",
" ]):\n",
" continue\n",
" object_names.append(object_name)\n",
" return object_names\n",
"\n",
"def build_from_aws_service(service_name: str, object_name: str) -> str:\n",
" '''\n",
" Build a NodeSchema file from a particular object in AWS.\n",
" Currently only supports string properties.\n",
" '''\n",
" shapes = _get_object_shapes(service_name=service_name)\n",
" object_shape: Dict[str, any] = shapes[object_name]\n",
" parsed_members = _parse_members(object_shape=object_shape)\n",
" out = _build_node_class(node_name=object_name, node_members=parsed_members)\n",
" return out\n",
"\n",
"def _get_object_shapes(service_name: str) -> Dict[str, any]:\n",
" '''\n",
" Gets all object shapes from the service descriptor file.\n",
" '''\n",
" service_descriptor = _load_service_descriptor(service_name=service_name)\n",
" shapes: Dict[str, any] = service_descriptor['shapes']\n",
" return shapes\n",
"\n",
"def _load_service_descriptor(service_name: str) -> Dict[str, any]:\n",
" all_specs = _list_folders('{botocore_path}/botocore/data/{service_name}'.format(\n",
" botocore_path=BOTOCORE_REPO_PATH,\n",
" service_name=service_name\n",
" ))\n",
" latest_spec = sorted(all_specs)[-1]\n",
" file_path = '{botocore_path}/botocore/data/{service_name}/{spec_date}/service-2.json'.format(\n",
" botocore_path=BOTOCORE_REPO_PATH,\n",
" service_name=service_name,\n",
" spec_date=latest_spec,\n",
" )\n",
" with open(file_path, 'r') as file:\n",
" data = json.load(file)\n",
" return data\n",
"\n",
"def _parse_members(object_shape: Dict[str, any]) -> ParsedMembers:\n",
" '''\n",
" Takes a shape and returns a Dict: snake_prop_name -> CamelPropertyRef\n",
" Currently only handles String type properties.\n",
" '''\n",
" parsed_members: ParsedMembers = dict()\n",
" for member_name, member_spec in object_shape['members'].items():\n",
" # if member_spec['shape'] != STRING_SHAPE:\n",
" # continue\n",
" prop_name = _camel_to_snake(member_name)\n",
" propertyRef = member_name\n",
" parsed_members[prop_name] = propertyRef\n",
" return parsed_members\n",
"\n",
"def _build_node_class(node_name: str, node_members: ParsedMembers) -> str:\n",
" class_str = '''class {node_name}NodeProperties(CartographyNodeProperties):\n",
" \"\"\"\n",
" Schema describing a {node_name}.\n",
" \"\"\"\n",
"{props}\n",
"''' \n",
" rendered_props = ''\n",
" # for prop_name, property_ref in node_members.items():\n",
" for prop_name in sorted(node_members.keys()):\n",
" property_ref = node_members[prop_name]\n",
" redered_prop = \" {prop_name}: PropertyRef = PropertyRef('{property_ref}')\".format(prop_name=prop_name, property_ref=property_ref)\n",
" rendered_props += redered_prop + \"\\n\"\n",
" out = class_str.format(node_name=node_name, props=rendered_props)\n",
" return out\n",
"\n",
"def _camel_to_snake(name: str) -> str:\n",
" '''\n",
" AI-generated function: \"RouteTableId\" -> \"route_table_id\"\n",
" '''\n",
" s1 = re.sub('(.)([A-Z][a-z]+)', r'\\1_\\2', name)\n",
" return re.sub('([a-z0-9])([A-Z])', r'\\1_\\2', s1).lower()\n",
"\n",
"def _list_folders(directory: str) -> List[str]:\n",
" '''\n",
" AI-generated: lists subfolders.\n",
" '''\n",
" return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['AliasListEntry',\n",
" 'CustomKeyStoresListEntry',\n",
" 'GrantConstraints',\n",
" 'GrantListEntry',\n",
" 'KeyListEntry',\n",
" 'KeyMetadata',\n",
" 'MultiRegionConfiguration',\n",
" 'MultiRegionKey',\n",
" 'RecipientInfo',\n",
" 'RotationsListEntry',\n",
" 'Tag',\n",
" 'XksKeyConfigurationType',\n",
" 'XksProxyAuthenticationCredentialType',\n",
" 'XksProxyConfigurationType']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list_object_names('kms')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"class KeyMetadataNodeProperties(CartographyNodeProperties):\n",
" \"\"\"\n",
" Schema describing a KeyMetadata.\n",
" \"\"\"\n",
" arn: PropertyRef = PropertyRef('Arn')\n",
" aws_account_id: PropertyRef = PropertyRef('AWSAccountId')\n",
" cloud_hsm_cluster_id: PropertyRef = PropertyRef('CloudHsmClusterId')\n",
" creation_date: PropertyRef = PropertyRef('CreationDate')\n",
" custom_key_store_id: PropertyRef = PropertyRef('CustomKeyStoreId')\n",
" customer_master_key_spec: PropertyRef = PropertyRef('CustomerMasterKeySpec')\n",
" deletion_date: PropertyRef = PropertyRef('DeletionDate')\n",
" description: PropertyRef = PropertyRef('Description')\n",
" enabled: PropertyRef = PropertyRef('Enabled')\n",
" encryption_algorithms: PropertyRef = PropertyRef('EncryptionAlgorithms')\n",
" expiration_model: PropertyRef = PropertyRef('ExpirationModel')\n",
" key_agreement_algorithms: PropertyRef = PropertyRef('KeyAgreementAlgorithms')\n",
" key_id: PropertyRef = PropertyRef('KeyId')\n",
" key_manager: PropertyRef = PropertyRef('KeyManager')\n",
" key_spec: PropertyRef = PropertyRef('KeySpec')\n",
" key_state: PropertyRef = PropertyRef('KeyState')\n",
" key_usage: PropertyRef = PropertyRef('KeyUsage')\n",
" mac_algorithms: PropertyRef = PropertyRef('MacAlgorithms')\n",
" multi_region: PropertyRef = PropertyRef('MultiRegion')\n",
" multi_region_configuration: PropertyRef = PropertyRef('MultiRegionConfiguration')\n",
" origin: PropertyRef = PropertyRef('Origin')\n",
" pending_deletion_window_in_days: PropertyRef = PropertyRef('PendingDeletionWindowInDays')\n",
" signing_algorithms: PropertyRef = PropertyRef('SigningAlgorithms')\n",
" valid_to: PropertyRef = PropertyRef('ValidTo')\n",
" xks_key_configuration: PropertyRef = PropertyRef('XksKeyConfiguration')\n",
"\n",
"\n"
]
}
],
"source": [
"s = build_from_aws_service('kms', 'KeyMetadata')\n",
"print(s)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 4 additions & 0 deletions docs/root/dev/writing-intel-modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ running, testing, and linting your code there.

To get started coding without reading this doc, just copy the structure of our [AWS EMR module](https://github.com/lyft/cartography/blob/master/cartography/intel/aws/emr.py) and use it as an example. For a longer written explanation of the "how" and "why", read on.

### AWS

We have a [Jupyter notebook](./generate_aws_node_schema.ipynb) with some helper functions that can help you quickly get started with writing a new AWS intel module. See the docstring at the top for details.

## Configuration and credential management

### Supplying credentials and arguments to your module
Expand Down