diff --git a/plugins/csv/.CHECKSUM b/plugins/csv/.CHECKSUM index bf76a55827..2fa559cea4 100644 --- a/plugins/csv/.CHECKSUM +++ b/plugins/csv/.CHECKSUM @@ -1,7 +1,7 @@ { - "spec": "f4331c96960273eb13dbbeda5ad36888", - "manifest": "add5730ccf5085e6835f1eae2532277e", - "setup": "51ce0050813b5dda5baecaee2e49f3ca", + "spec": "615a433d537504bf31f4b9d82cba43a5", + "manifest": "2a525025896bb44fdbc44bf661def46a", + "setup": "0af8a27a9f67d731101e0a5be42afa3a", "schemas": [ { "identifier": "filter_bytes/schema.py", diff --git a/plugins/csv/bin/komand_csv b/plugins/csv/bin/komand_csv index 7658188404..cc4402a71c 100755 --- a/plugins/csv/bin/komand_csv +++ b/plugins/csv/bin/komand_csv @@ -6,7 +6,7 @@ from sys import argv Name = "CSV" Vendor = "rapid7" -Version = "2.0.5" +Version = "2.1.0" Description = "[Comma Separated Value](https://en.wikipedia.org/wiki/Comma-separated_values) (CSV) is a common format to express data.This plugin allows one to extract fields from CSV strings and files.Using the CSV plugin, users can automate conversions between JSON and CSV to help enable service interoperabilityas well as filter data within a CSV file" diff --git a/plugins/csv/help.md b/plugins/csv/help.md index ec24cf6b6c..0ec9ae8f97 100644 --- a/plugins/csv/help.md +++ b/plugins/csv/help.md @@ -240,6 +240,7 @@ Example output: # Version History +* 2.1.0 - Updated To_CSV function to flatten lists in columns * 2.0.5 - Updated SDK to the latest version (6.3.10) * 2.0.4 - Updated SDK to the latest version (6.3.3) * 2.0.3 - Updated SDK to the latest version (6.2.5) diff --git a/plugins/csv/komand_csv/actions/json_to_csv_bytes/action.py b/plugins/csv/komand_csv/actions/json_to_csv_bytes/action.py index 8505c3537d..c60a221dc4 100755 --- a/plugins/csv/komand_csv/actions/json_to_csv_bytes/action.py +++ b/plugins/csv/komand_csv/actions/json_to_csv_bytes/action.py @@ -20,6 +20,7 @@ def run(self, params={}): json_object = params.get(Input.JSON, {}) # END INPUT BINDING - DO NOT REMOVE - encoded_string = json_to_csv(json_object).encode() - encoded_bytes = base64.encodebytes(encoded_string) - return {Output.CSV_BYTES: encoded_bytes.decode()} + csv_string = json_to_csv(json_object) # str + csv_bytes = csv_string.encode("utf-8") # explicit UTF-8 + csv_b64 = base64.b64encode(csv_bytes).decode("ascii") # NO line wraps + return {Output.CSV_BYTES: csv_b64} diff --git a/plugins/csv/komand_csv/util/utils.py b/plugins/csv/komand_csv/util/utils.py index 71be783214..e3885aa6b8 100644 --- a/plugins/csv/komand_csv/util/utils.py +++ b/plugins/csv/komand_csv/util/utils.py @@ -2,6 +2,7 @@ import json import re from io import StringIO +from typing import Any, Dict, List, Union import insightconnect_plugin_runtime from insightconnect_plugin_runtime.exceptions import PluginException @@ -37,7 +38,7 @@ def fields_syntax_good(fields: str) -> bool: # Ex. 'f2' -> 2 # # @param field String of field -# @return integer representation of fiele +# @return integer representation of field ## def field_to_number(field): if field.startswith("f"): @@ -140,23 +141,217 @@ def csv_to_dict(string_csv: str, action: insightconnect_plugin_runtime.Action) - return [json.loads(json.dumps(row)) for row in csv_data] -def json_to_csv(input_json: dict) -> str: - output = StringIO() - csv_writer = csv.writer(output) - keys = [] +Scalar = Union[str, int, float, bool, None] +JSONVal = Union[Scalar, Dict[str, Any], List[Any]] +TAG_RE = re.compile(r"<[^>]+>") - # get all keys from json - for entry in input_json: - keys.extend(list(entry.keys())) - # remove duplicated keys - keys = list(dict.fromkeys(keys)) +def strip_html(text: str) -> str: + """Remove HTML tags and return plain text.""" + if text is None: + return "" + return TAG_RE.sub("", str(text)) - if keys: - csv_writer.writerow(keys) - for entry in input_json: - for index, _ in enumerate(keys): - entry[keys[index]] = entry.get(keys[index], "") - csv_writer.writerow(entry.values()) - return output.getvalue() +def join_scalars(values: List[Scalar], joiner: str) -> str: + """Join a list of scalar values into a string using the provided delimiter.""" + return joiner.join("" if value is None else str(value) for value in values) + + +def emit_scalar(items_dict: Dict[str, str], key: str, value: Scalar) -> None: + """Emit a single scalar cell value with HTML stripping always enabled.""" + as_string = "" if value is None else str(value) + items_dict[key] = strip_html(as_string) + + +def flatten_mapping( + mapping: Dict[str, Any], + *, + items_dict: Dict[str, str], + base_key: str, + key_separator: str, + list_joiner: str, + list_expand_limit: int, + list_overflow_suffix: str, +) -> None: + """Flatten a nested mapping into items_dict using dot-notation for keys.""" + if not mapping: + if base_key: + items_dict[base_key] = "" + return + + for sub_key, sub_value in mapping.items(): + new_key = f"{base_key}{key_separator}{sub_key}" if base_key else str(sub_key) + flatten_value( + sub_value, + key=new_key, + items_dict=items_dict, + key_separator=key_separator, + list_joiner=list_joiner, + list_expand_limit=list_expand_limit, + list_overflow_suffix=list_overflow_suffix, + ) + + +def flatten_sequence( + sequence: List[Any], + *, + items_dict: Dict[str, str], + key: str, + key_separator: str, + list_joiner: str, + list_expand_limit: int, + list_overflow_suffix: str, +) -> None: + """Flatten a list: join scalars, expand dicts up to limit, overflow as compact JSON.""" + if not sequence: + items_dict[key] = "" + return + + contains_no_dicts = all(not isinstance(element, dict) for element in sequence) + contains_only_dicts = all(isinstance(element, dict) for element in sequence) + + if contains_no_dicts: + items_dict[key] = strip_html(join_scalars(sequence, list_joiner)) + return + + if contains_only_dicts: + limit = max(0, int(list_expand_limit)) + elements_to_expand = sequence[:limit] + for element_index, element in enumerate(elements_to_expand): + flatten_mapping( + element, + items_dict=items_dict, + base_key=f"{key}[{element_index}]", + key_separator=key_separator, + list_joiner=list_joiner, + list_expand_limit=list_expand_limit, + list_overflow_suffix=list_overflow_suffix, + ) + if len(sequence) > limit: + items_dict[f"{key}{list_overflow_suffix}"] = json.dumps(sequence[limit:], separators=(",", ":")) + return + + # Mixed list: keep intact in *_rest as compact JSON + items_dict[f"{key}{list_overflow_suffix}"] = json.dumps(sequence, separators=(",", ":")) + + +def flatten_value( + value: Any, + *, + key: str, + items_dict: Dict[str, str], + key_separator: str, + list_joiner: str, + list_expand_limit: int, + list_overflow_suffix: str, +) -> None: + """Dispatch flatten logic based on value type.""" + if isinstance(value, dict): + flatten_mapping( + value, + items_dict=items_dict, + base_key=key, + key_separator=key_separator, + list_joiner=list_joiner, + list_expand_limit=list_expand_limit, + list_overflow_suffix=list_overflow_suffix, + ) + elif isinstance(value, list): + flatten_sequence( + value, + items_dict=items_dict, + key=key, + key_separator=key_separator, + list_joiner=list_joiner, + list_expand_limit=list_expand_limit, + list_overflow_suffix=list_overflow_suffix, + ) + else: + emit_scalar(items_dict, key, value) + + +def flatten_dict( + data: Dict[str, JSONVal], + *, + parent_key: str = "", + key_separator: str = ".", + list_joiner: str = "|", + list_expand_limit: int = 3, + list_overflow_suffix: str = "_rest", +) -> Dict[str, str]: + """ + Flatten a nested dict into a single-level dict for CSV: + - objects: dot keys + - list of scalars: joined with delimiter (HTML stripped) + - list of objects: expand key[i].subkey up to limit; remainder in key_rest + - mixed lists: key_rest as compact JSON + - all leaf values are HTML stripped + """ + items_dict: Dict[str, str] = {} + for sub_key, sub_value in (data or {}).items(): + full_key = f"{parent_key}{key_separator}{sub_key}" if parent_key else str(sub_key) + flatten_value( + sub_value, + key=full_key, + items_dict=items_dict, + key_separator=key_separator, + list_joiner=list_joiner, + list_expand_limit=list_expand_limit, + list_overflow_suffix=list_overflow_suffix, + ) + return items_dict + + +def json_to_csv( + input_json: Union[Dict[str, Any], List[Dict[str, Any]]], + *, + key_sep: str = ".", + list_joiner: str = "|", + list_expand_limit: int = 3, +) -> str: + """ + Convert JSON (object or list of objects) into a flattened CSV string. + HTML is always stripped from all leaf values. + """ + if isinstance(input_json, dict): + records = [input_json] + elif isinstance(input_json, list): + records = input_json + else: + raise PluginException( + cause="Invalid JSON input.", assistance="Provide a JSON object or a JSON array of objects." + ) + + output_buffer = StringIO() + csv_writer = csv.writer(output_buffer) + + flat_rows: List[Dict[str, str]] = [] + header_keys: List[str] = [] + + for record in records or []: + if not isinstance(record, dict): + flat_row = {"value": strip_html("" if record is None else str(record))} + else: + flat_row = flatten_dict( + record, + key_separator=key_sep, + list_joiner=list_joiner, + list_expand_limit=list_expand_limit, + ) + flat_rows.append(flat_row) + header_keys.extend(flat_row.keys()) + + seen = set() + ordered_keys: List[str] = [] + for key in header_keys: + if key not in seen: + seen.add(key) + ordered_keys.append(key) + + if ordered_keys: + csv_writer.writerow(ordered_keys) + for row in flat_rows: + csv_writer.writerow([row.get(key, "") for key in ordered_keys]) + + return output_buffer.getvalue() diff --git a/plugins/csv/plugin.spec.yaml b/plugins/csv/plugin.spec.yaml index f76eb9aee9..0169a47352 100644 --- a/plugins/csv/plugin.spec.yaml +++ b/plugins/csv/plugin.spec.yaml @@ -8,7 +8,7 @@ description: "[Comma Separated Value](https://en.wikipedia.org/wiki/Comma-separa \ from CSV strings and files.\n\nUsing the CSV plugin, users can automate conversions\ \ between JSON and CSV to help enable service interoperability\nas well as filter\ \ data within a CSV file" -version: 2.0.5 +version: 2.1.0 connection_version: 2 supported_versions: ['2025-02-25'] vendor: rapid7 @@ -34,6 +34,7 @@ key_features: - Convert between JSON and CSV - Filter strings version_history: +- 2.1.0 - Updated To_CSV function to flatten lists in columns - 2.0.5 - Updated SDK to the latest version (6.3.10) - 2.0.4 - Updated SDK to the latest version (6.3.3) - 2.0.3 - Updated SDK to the latest version (6.2.5) diff --git a/plugins/csv/setup.py b/plugins/csv/setup.py index d59e434253..3725e2863a 100755 --- a/plugins/csv/setup.py +++ b/plugins/csv/setup.py @@ -4,7 +4,7 @@ setup( name="csv-rapid7-plugin", - version="2.0.5", + version="2.1.0", description="[Comma Separated Value](https://en.wikipedia.org/wiki/Comma-separated_values) (CSV) is a common format to express data.This plugin allows one to extract fields from CSV strings and files.Using the CSV plugin, users can automate conversions between JSON and CSV to help enable service interoperabilityas well as filter data within a CSV file", author="rapid7", author_email="", diff --git a/plugins/csv/unit_test/test_json_to_csv_bytes.py b/plugins/csv/unit_test/test_json_to_csv_bytes.py index d736a8e225..45f4198b85 100644 --- a/plugins/csv/unit_test/test_json_to_csv_bytes.py +++ b/plugins/csv/unit_test/test_json_to_csv_bytes.py @@ -1,3 +1,5 @@ +# plugins/csv/unit_test/test_json_to_csv_bytes.py + import os import sys @@ -21,8 +23,9 @@ def test_json_to_csv_bytes(self): ], } ) + # Expect unwrapped base64 (single line, no trailing newline) expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFs\ndWU1LHZhbHVlNg0KdmFsdWU3LHZhbHVlOCx2YWx1ZTkNCg==\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFsdWU1LHZhbHVlNg0KdmFsdWU3LHZhbHVlOCx2YWx1ZTkNCg==" } self.assertEqual(actual, expected) @@ -46,8 +49,9 @@ def test_json_to_csv_bytes_value_as_array(self): ], } ) + # Arrays of scalars are joined with "|" now expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsIlsn\ndmFsdWUnLCAndmFsdWUnXSIsdmFsdWU2DQo=\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFsdWV8dmFsdWUsdmFsdWU2DQo=" } self.assertEqual(actual, expected) @@ -61,8 +65,9 @@ def test_json_to_csv_bytes_value_as_object(self): ], } ) + # Objects are expanded into additional columns expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsInsn\nY29sdW1uMl8xJzogJ3ZhbHVlJywgJ2NvbHVtbic6ICd2YWx1ZSd9Iix2YWx1ZTYNCg==\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMsY29sdW1uMi5jb2x1bW4yXzEsY29sdW1uMi5jb2x1bW4NCnZhbHVlMSx2YWx1ZTIsdmFsdWUzLCwNCnZhbHVlNCwsdmFsdWU2LHZhbHVlLHZhbHVlDQo=" } self.assertEqual(actual, expected) @@ -78,7 +83,7 @@ def test_json_to_csv_bytes_empty_object(self): } ) expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFs\ndWU1LHZhbHVlNg0KLCwNCg==\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFsdWU1LHZhbHVlNg0KLCwNCg==" } self.assertEqual(actual, expected) @@ -94,7 +99,7 @@ def test_json_to_csv_bytes_empty_fields(self): } ) expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSwsdmFsdWUzDQosdmFsdWU1LHZhbHVlNg0K\ndmFsdWU3LHZhbHVlOCwNCg==\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSwsdmFsdWUzDQosdmFsdWU1LHZhbHVlNg0KdmFsdWU3LHZhbHVlOCwNCg==" } self.assertEqual(actual, expected) @@ -109,7 +114,7 @@ def test_json_to_csv_bytes_unicode(self): } ) expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCsSFYcSHY2XEmSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQs\ncHl0aMO2w7bDtm4sdmFsdWU2DQo=\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCsSFYcSHY2XEmSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQscHl0aMO2w7bDtm4sdmFsdWU2DQo=" } self.assertEqual(actual, expected) @@ -125,6 +130,6 @@ def test_json_to_csv_bytes_unstructured_data(self): } ) expected = { - Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMsY29sdW1uNA0KdmFsdWUxLHZhbHVlMix2YWx1ZTMsDQp2\nYWx1ZTQsdmFsdWU1LCwNCnZhbHVlNyx2YWx1ZTgsdmFsdWU5LHZhbHVlMTANCg==\n" + Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMsY29sdW1uNA0KdmFsdWUxLHZhbHVlMix2YWx1ZTMsDQp2YWx1ZTQsdmFsdWU1LCwNCnZhbHVlNyx2YWx1ZTgsdmFsdWU5LHZhbHVlMTANCg==" } self.assertEqual(actual, expected) diff --git a/plugins/csv/unit_test/test_json_to_csv_string.py b/plugins/csv/unit_test/test_json_to_csv_string.py index 79bf8d4eed..cfbc0f8865 100644 --- a/plugins/csv/unit_test/test_json_to_csv_string.py +++ b/plugins/csv/unit_test/test_json_to_csv_string.py @@ -1,3 +1,5 @@ +# plugins/csv/unit_test/test_json_to_csv_string.py + import os import sys @@ -46,8 +48,9 @@ def test_json_to_csv_string_value_as_array(self): ], } ) + # Arrays of scalars are joined with "|" now expected = { - Output.CSV_STRING: "column1,column2,column3\r\nvalue1,value2,value3\r\nvalue4,\"['value', 'value']\",value6\r\n" + Output.CSV_STRING: "column1,column2,column3\r\nvalue1,value2,value3\r\nvalue4,value|value,value6\r\n" } self.assertEqual(actual, expected) @@ -61,8 +64,9 @@ def test_json_to_csv_string_value_as_object(self): ], } ) + # Objects are expanded into additional columns expected = { - Output.CSV_STRING: "column1,column2,column3\r\nvalue1,value2,value3\r\nvalue4,\"{'column2_1': 'value', 'column': 'value'}\",value6\r\n" + Output.CSV_STRING: "column1,column2,column3,column2.column2_1,column2.column\r\nvalue1,value2,value3,,\r\nvalue4,,value6,value,value\r\n" } self.assertEqual(actual, expected)