Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions plugins/csv/.CHECKSUM
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"spec": "f4331c96960273eb13dbbeda5ad36888",
"manifest": "add5730ccf5085e6835f1eae2532277e",
"setup": "51ce0050813b5dda5baecaee2e49f3ca",
"spec": "615a433d537504bf31f4b9d82cba43a5",
"manifest": "2a525025896bb44fdbc44bf661def46a",
"setup": "0af8a27a9f67d731101e0a5be42afa3a",
"schemas": [
{
"identifier": "filter_bytes/schema.py",
Expand Down
2 changes: 1 addition & 1 deletion plugins/csv/bin/komand_csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from sys import argv

Name = "CSV"
Vendor = "rapid7"
Version = "2.0.5"
Version = "2.1.0"
Description = "[Comma Separated Value](https://en.wikipedia.org/wiki/Comma-separated_values) (CSV) is a common format to express data.This plugin allows one to extract fields from CSV strings and files.Using the CSV plugin, users can automate conversions between JSON and CSV to help enable service interoperabilityas well as filter data within a CSV file"


Expand Down
1 change: 1 addition & 0 deletions plugins/csv/help.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ Example output:

# Version History

* 2.1.0 - Updated To_CSV function to flatten lists in columns
* 2.0.5 - Updated SDK to the latest version (6.3.10)
* 2.0.4 - Updated SDK to the latest version (6.3.3)
* 2.0.3 - Updated SDK to the latest version (6.2.5)
Expand Down
7 changes: 4 additions & 3 deletions plugins/csv/komand_csv/actions/json_to_csv_bytes/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def run(self, params={}):
json_object = params.get(Input.JSON, {})
# END INPUT BINDING - DO NOT REMOVE

encoded_string = json_to_csv(json_object).encode()
encoded_bytes = base64.encodebytes(encoded_string)
return {Output.CSV_BYTES: encoded_bytes.decode()}
csv_string = json_to_csv(json_object) # str
csv_bytes = csv_string.encode("utf-8") # explicit UTF-8
csv_b64 = base64.b64encode(csv_bytes).decode("ascii") # NO line wraps
return {Output.CSV_BYTES: csv_b64}
229 changes: 212 additions & 17 deletions plugins/csv/komand_csv/util/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import re
from io import StringIO
from typing import Any, Dict, List, Union

import insightconnect_plugin_runtime
from insightconnect_plugin_runtime.exceptions import PluginException
Expand Down Expand Up @@ -37,7 +38,7 @@ def fields_syntax_good(fields: str) -> bool:
# Ex. 'f2' -> 2
#
# @param field String of field
# @return integer representation of fiele
# @return integer representation of field
##
def field_to_number(field):
if field.startswith("f"):
Expand Down Expand Up @@ -140,23 +141,217 @@ def csv_to_dict(string_csv: str, action: insightconnect_plugin_runtime.Action) -
return [json.loads(json.dumps(row)) for row in csv_data]


def json_to_csv(input_json: dict) -> str:
output = StringIO()
csv_writer = csv.writer(output)
keys = []
Scalar = Union[str, int, float, bool, None]
JSONVal = Union[Scalar, Dict[str, Any], List[Any]]
TAG_RE = re.compile(r"<[^>]+>")

# get all keys from json
for entry in input_json:
keys.extend(list(entry.keys()))

# remove duplicated keys
keys = list(dict.fromkeys(keys))
def strip_html(text: str) -> str:
"""Remove HTML tags and return plain text."""
if text is None:
return ""
return TAG_RE.sub("", str(text))

if keys:
csv_writer.writerow(keys)
for entry in input_json:
for index, _ in enumerate(keys):
entry[keys[index]] = entry.get(keys[index], "")
csv_writer.writerow(entry.values())

return output.getvalue()
def join_scalars(values: List[Scalar], joiner: str) -> str:
"""Join a list of scalar values into a string using the provided delimiter."""
return joiner.join("" if value is None else str(value) for value in values)


def emit_scalar(items_dict: Dict[str, str], key: str, value: Scalar) -> None:
"""Emit a single scalar cell value with HTML stripping always enabled."""
as_string = "" if value is None else str(value)
items_dict[key] = strip_html(as_string)


def flatten_mapping(
mapping: Dict[str, Any],
*,
items_dict: Dict[str, str],
base_key: str,
key_separator: str,
list_joiner: str,
list_expand_limit: int,
list_overflow_suffix: str,
) -> None:
"""Flatten a nested mapping into items_dict using dot-notation for keys."""
if not mapping:
if base_key:
items_dict[base_key] = ""
return

for sub_key, sub_value in mapping.items():
new_key = f"{base_key}{key_separator}{sub_key}" if base_key else str(sub_key)
flatten_value(
sub_value,
key=new_key,
items_dict=items_dict,
key_separator=key_separator,
list_joiner=list_joiner,
list_expand_limit=list_expand_limit,
list_overflow_suffix=list_overflow_suffix,
)


def flatten_sequence(
sequence: List[Any],
*,
items_dict: Dict[str, str],
key: str,
key_separator: str,
list_joiner: str,
list_expand_limit: int,
list_overflow_suffix: str,
) -> None:
"""Flatten a list: join scalars, expand dicts up to limit, overflow as compact JSON."""
if not sequence:
items_dict[key] = ""
return

contains_no_dicts = all(not isinstance(element, dict) for element in sequence)
contains_only_dicts = all(isinstance(element, dict) for element in sequence)

if contains_no_dicts:
items_dict[key] = strip_html(join_scalars(sequence, list_joiner))
return

if contains_only_dicts:
limit = max(0, int(list_expand_limit))
elements_to_expand = sequence[:limit]
for element_index, element in enumerate(elements_to_expand):
flatten_mapping(
element,
items_dict=items_dict,
base_key=f"{key}[{element_index}]",
key_separator=key_separator,
list_joiner=list_joiner,
list_expand_limit=list_expand_limit,
list_overflow_suffix=list_overflow_suffix,
)
if len(sequence) > limit:
items_dict[f"{key}{list_overflow_suffix}"] = json.dumps(sequence[limit:], separators=(",", ":"))
return

# Mixed list: keep intact in *_rest as compact JSON
items_dict[f"{key}{list_overflow_suffix}"] = json.dumps(sequence, separators=(",", ":"))


def flatten_value(
value: Any,
*,
key: str,
items_dict: Dict[str, str],
key_separator: str,
list_joiner: str,
list_expand_limit: int,
list_overflow_suffix: str,
) -> None:
"""Dispatch flatten logic based on value type."""
if isinstance(value, dict):
flatten_mapping(
value,
items_dict=items_dict,
base_key=key,
key_separator=key_separator,
list_joiner=list_joiner,
list_expand_limit=list_expand_limit,
list_overflow_suffix=list_overflow_suffix,
)
elif isinstance(value, list):
flatten_sequence(
value,
items_dict=items_dict,
key=key,
key_separator=key_separator,
list_joiner=list_joiner,
list_expand_limit=list_expand_limit,
list_overflow_suffix=list_overflow_suffix,
)
else:
emit_scalar(items_dict, key, value)


def flatten_dict(
data: Dict[str, JSONVal],
*,
parent_key: str = "",
key_separator: str = ".",
list_joiner: str = "|",
list_expand_limit: int = 3,
list_overflow_suffix: str = "_rest",
) -> Dict[str, str]:
"""
Flatten a nested dict into a single-level dict for CSV:
- objects: dot keys
- list of scalars: joined with delimiter (HTML stripped)
- list of objects: expand key[i].subkey up to limit; remainder in key_rest
- mixed lists: key_rest as compact JSON
- all leaf values are HTML stripped
"""
items_dict: Dict[str, str] = {}
for sub_key, sub_value in (data or {}).items():
full_key = f"{parent_key}{key_separator}{sub_key}" if parent_key else str(sub_key)
flatten_value(
sub_value,
key=full_key,
items_dict=items_dict,
key_separator=key_separator,
list_joiner=list_joiner,
list_expand_limit=list_expand_limit,
list_overflow_suffix=list_overflow_suffix,
)
return items_dict


def json_to_csv(
input_json: Union[Dict[str, Any], List[Dict[str, Any]]],
*,
key_sep: str = ".",
list_joiner: str = "|",
list_expand_limit: int = 3,
) -> str:
"""
Convert JSON (object or list of objects) into a flattened CSV string.
HTML is always stripped from all leaf values.
"""
if isinstance(input_json, dict):
records = [input_json]
elif isinstance(input_json, list):
records = input_json
else:
raise PluginException(
cause="Invalid JSON input.", assistance="Provide a JSON object or a JSON array of objects."
)

output_buffer = StringIO()
csv_writer = csv.writer(output_buffer)

flat_rows: List[Dict[str, str]] = []
header_keys: List[str] = []

for record in records or []:
if not isinstance(record, dict):
flat_row = {"value": strip_html("" if record is None else str(record))}
else:
flat_row = flatten_dict(
record,
key_separator=key_sep,
list_joiner=list_joiner,
list_expand_limit=list_expand_limit,
)
flat_rows.append(flat_row)
header_keys.extend(flat_row.keys())

seen = set()
ordered_keys: List[str] = []
for key in header_keys:
if key not in seen:
seen.add(key)
ordered_keys.append(key)

if ordered_keys:
csv_writer.writerow(ordered_keys)
for row in flat_rows:
csv_writer.writerow([row.get(key, "") for key in ordered_keys])

return output_buffer.getvalue()
3 changes: 2 additions & 1 deletion plugins/csv/plugin.spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ description: "[Comma Separated Value](https://en.wikipedia.org/wiki/Comma-separa
\ from CSV strings and files.\n\nUsing the CSV plugin, users can automate conversions\
\ between JSON and CSV to help enable service interoperability\nas well as filter\
\ data within a CSV file"
version: 2.0.5
version: 2.1.0
connection_version: 2
supported_versions: ['2025-02-25']
vendor: rapid7
Expand All @@ -34,6 +34,7 @@ key_features:
- Convert between JSON and CSV
- Filter strings
version_history:
- 2.1.0 - Updated To_CSV function to flatten lists in columns
- 2.0.5 - Updated SDK to the latest version (6.3.10)
- 2.0.4 - Updated SDK to the latest version (6.3.3)
- 2.0.3 - Updated SDK to the latest version (6.2.5)
Expand Down
2 changes: 1 addition & 1 deletion plugins/csv/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name="csv-rapid7-plugin",
version="2.0.5",
version="2.1.0",
description="[Comma Separated Value](https://en.wikipedia.org/wiki/Comma-separated_values) (CSV) is a common format to express data.This plugin allows one to extract fields from CSV strings and files.Using the CSV plugin, users can automate conversions between JSON and CSV to help enable service interoperabilityas well as filter data within a CSV file",
author="rapid7",
author_email="",
Expand Down
19 changes: 12 additions & 7 deletions plugins/csv/unit_test/test_json_to_csv_bytes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# plugins/csv/unit_test/test_json_to_csv_bytes.py

import os
import sys

Expand All @@ -21,8 +23,9 @@ def test_json_to_csv_bytes(self):
],
}
)
# Expect unwrapped base64 (single line, no trailing newline)
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFs\ndWU1LHZhbHVlNg0KdmFsdWU3LHZhbHVlOCx2YWx1ZTkNCg==\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFsdWU1LHZhbHVlNg0KdmFsdWU3LHZhbHVlOCx2YWx1ZTkNCg=="
}
self.assertEqual(actual, expected)

Expand All @@ -46,8 +49,9 @@ def test_json_to_csv_bytes_value_as_array(self):
],
}
)
# Arrays of scalars are joined with "|" now
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsIlsn\ndmFsdWUnLCAndmFsdWUnXSIsdmFsdWU2DQo=\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFsdWV8dmFsdWUsdmFsdWU2DQo="
}
self.assertEqual(actual, expected)

Expand All @@ -61,8 +65,9 @@ def test_json_to_csv_bytes_value_as_object(self):
],
}
)
# Objects are expanded into additional columns
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsInsn\nY29sdW1uMl8xJzogJ3ZhbHVlJywgJ2NvbHVtbic6ICd2YWx1ZSd9Iix2YWx1ZTYNCg==\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMsY29sdW1uMi5jb2x1bW4yXzEsY29sdW1uMi5jb2x1bW4NCnZhbHVlMSx2YWx1ZTIsdmFsdWUzLCwNCnZhbHVlNCwsdmFsdWU2LHZhbHVlLHZhbHVlDQo="
}
self.assertEqual(actual, expected)

Expand All @@ -78,7 +83,7 @@ def test_json_to_csv_bytes_empty_object(self):
}
)
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFs\ndWU1LHZhbHVlNg0KLCwNCg==\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQsdmFsdWU1LHZhbHVlNg0KLCwNCg=="
}
self.assertEqual(actual, expected)

Expand All @@ -94,7 +99,7 @@ def test_json_to_csv_bytes_empty_fields(self):
}
)
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSwsdmFsdWUzDQosdmFsdWU1LHZhbHVlNg0K\ndmFsdWU3LHZhbHVlOCwNCg==\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCnZhbHVlMSwsdmFsdWUzDQosdmFsdWU1LHZhbHVlNg0KdmFsdWU3LHZhbHVlOCwNCg=="
}
self.assertEqual(actual, expected)

Expand All @@ -109,7 +114,7 @@ def test_json_to_csv_bytes_unicode(self):
}
)
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCsSFYcSHY2XEmSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQs\ncHl0aMO2w7bDtm4sdmFsdWU2DQo=\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMNCsSFYcSHY2XEmSx2YWx1ZTIsdmFsdWUzDQp2YWx1ZTQscHl0aMO2w7bDtm4sdmFsdWU2DQo="
}
self.assertEqual(actual, expected)

Expand All @@ -125,6 +130,6 @@ def test_json_to_csv_bytes_unstructured_data(self):
}
)
expected = {
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMsY29sdW1uNA0KdmFsdWUxLHZhbHVlMix2YWx1ZTMsDQp2\nYWx1ZTQsdmFsdWU1LCwNCnZhbHVlNyx2YWx1ZTgsdmFsdWU5LHZhbHVlMTANCg==\n"
Output.CSV_BYTES: "Y29sdW1uMSxjb2x1bW4yLGNvbHVtbjMsY29sdW1uNA0KdmFsdWUxLHZhbHVlMix2YWx1ZTMsDQp2YWx1ZTQsdmFsdWU1LCwNCnZhbHVlNyx2YWx1ZTgsdmFsdWU5LHZhbHVlMTANCg=="
}
self.assertEqual(actual, expected)
Loading