Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/Build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,11 @@ jobs:
matrix:
python-version: [ '3.10', '3.11', '3.12', '3.13', '3.14' ]
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.11.0
opensearch:
image: opensearchproject/opensearch:3.2.0
env:
discovery.type: single-node
DISABLE_SECURITY_PLUGIN: "true"
options: >-
--health-cmd "curl http://localhost:9200/_cluster/health"
--health-interval 10s
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/PR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,17 +103,17 @@ jobs:
matrix:
python-version: [ '3.10', '3.11', '3.12', '3.13', '3.14' ]
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.11.0
opensearch:
image: opensearchproject/opensearch:3.2.0
env:
discovery.type: single-node
DISABLE_SECURITY_PLUGIN: "true"
options: >-
--health-cmd "curl http://localhost:9200/_cluster/health"
--health-interval 10s
--health-timeout 5s
--health-retries 10
ports:
# <port on host>:<port on container>
- 9200:9200
steps:
- uses: actions/checkout@v6
Expand Down
98 changes: 46 additions & 52 deletions cloud_governance/common/elasticsearch/elasticsearch_operations.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from datetime import datetime, timezone
import time
import pandas as pd
from elasticsearch.helpers import bulk

from cloud_governance.main.environment_variables import environment_variables

from elasticsearch_dsl import Search
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk as opensearch_bulk
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk as es_bulk
from typeguard import typechecked

from cloud_governance.common.elasticsearch.elasticsearch_exceptions import ElasticSearchDataNotUploaded
Expand Down Expand Up @@ -46,69 +47,63 @@ def __init__(self,
self.__environment_variables_dict.get('ES_TIMEOUT')) if self.__environment_variables_dict.get(
'ES_TIMEOUT') else timeout
self.__account = self.__environment_variables_dict.get('account')
self.__server_type = self.__environment_variables_dict.get('ES_SERVER_TYPE', 'opensearch')
try:
add_host = {'host': self.__es_host, 'port': self.__es_port,
'http_auth': f'{self.__es_user}:{self.__es_password}'}
if int(self.__es_port) == 443:
add_host['use_ssl'] = True
self.__es = Elasticsearch([add_host],
timeout=self.__timeout,
max_retries=2)
if self.__server_type == 'elasticsearch':
scheme = 'https' if int(self.__es_port) == 443 else 'http'
hosts = [{'host': self.__es_host, 'port': int(self.__es_port), 'scheme': scheme}]
basic_auth = (self.__es_user, self.__es_password) if self.__es_user else None
self.__es = Elasticsearch(hosts, basic_auth=basic_auth, verify_certs=False,
request_timeout=self.__timeout, max_retries=2)
self.__bulk_fn = es_bulk
else:
add_host = {'host': self.__es_host, 'port': self.__es_port}
if self.__es_user:
add_host['http_auth'] = (self.__es_user, self.__es_password)
if int(self.__es_port) == 443:
add_host['use_ssl'] = True
add_host['verify_certs'] = False
self.__es = OpenSearch([add_host], timeout=self.__timeout, max_retries=2)
self.__bulk_fn = opensearch_bulk
except Exception as err:
logger.error(f'Failed to connect to {self.__server_type} at {self.__es_host}:{self.__es_port}: {err}')
self.__es = None

# Skip product check for OpenSearch compatibility (elasticsearch-py 7.14+ rejects non-Elasticsearch servers)
try:
if self.__es and hasattr(self.__es.transport, '_verified_elasticsearch'):
self.__es.transport._verified_elasticsearch = True
except AttributeError as err:
logger.warning(f"Could not bypass Elasticsearch product check: {err}")

def __elasticsearch_get_index_hits(self, index: str, uuid: str = '', workload: str = '', fast_check: bool = False,
id: bool = False):
"""
This method search for data per index in last 2 minutes and return the number of docs or zero
This method search for data per index in last 15 minutes and return the number of docs or zero
:param index:
:param workload: need only if there is different timestamp parameter in Elasticsearch
:param workload: need only if there is different timestamp parameter in the server
:param id: True to return the doc ids
:param fast_check: return fast response
:return:
"""
"""
:return:
"""
ids = []
# https://github.com/elastic/elasticsearch-dsl-py/issues/49
self.__es.indices.refresh(index=index)
# timestamp name in Elasticsearch is different
search = Search(using=self.__es, index=index).filter('range', timestamp={
'gte': f'now-{self.ES_FETCH_MIN_TIME}m', 'lt': 'now'})
# reduce the search result
if fast_check:
search = search[0:self.MIN_SEARCH_RESULTS]
else:
search = search[0:self.MAX_SEARCH_RESULTS]
search_response = search.execute()
if search_response.hits:
size = self.MIN_SEARCH_RESULTS if fast_check else self.MAX_SEARCH_RESULTS
query = {"query": {"range": {"timestamp": {"gte": f"now-{self.ES_FETCH_MIN_TIME}m", "lt": "now"}}}}
search_response = self.__es.search(index=index, body=query, size=size)
hits = search_response.get('hits', {}).get('hits', [])
if hits:
if uuid:
count_hits = 0
for row in search_response:
if type(row['uuid']) == str:
# uperf return str
current_uuid = row['uuid']
else:
current_uuid = row['uuid'][0]
for row in hits:
source = row['_source']
current_uuid = source.get('uuid', '')
if isinstance(current_uuid, list):
current_uuid = current_uuid[0]
if current_uuid == uuid:
if fast_check:
return 1
ids.append(row.meta.id)
ids.append(row['_id'])
count_hits += 1
if id:
return ids
else:
return count_hits
else:
return len(search_response.hits)
return len(hits)
else:
return 0

Expand Down Expand Up @@ -141,13 +136,12 @@ def verify_elasticsearch_data_uploaded(self, index: str, uuid: str = '', workloa
raise ElasticSearchDataNotUploaded

@typechecked()
def upload_to_elasticsearch(self, index: str, data: dict, doc_type: str = '_doc', es_add_items: dict = None,
def upload_to_elasticsearch(self, index: str, data: dict, es_add_items: dict = None,
**kwargs):
Comment on lines +139 to 140

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify callsites that may pass list payloads into upload_to_elasticsearch
rg -nP --type=py -C2 '\bupload_to_elasticsearch\s*\('

Repository: redhat-performance/cloud-governance

Length of output: 16509


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="cloud_governance/common/elasticsearch/elasticsearch_operations.py"

echo "=== upload_to_elasticsearch decorator + signature ==="
nl -ba "$FILE" | sed -n '120,220p'

echo
echo "=== bulk/dispatch + result handling area ==="
nl -ba "$FILE" | sed -n '220,460p'

echo
echo "=== any direct list passed as data=... to upload_to_elasticsearch ==="
rg -nP --type=py '\bupload_to_elasticsearch\s*\(\s*[^)]*data\s*=\s*\[' -S . || true

echo
echo "=== any callsites passing variables that might be lists named data/records ==="
rg -nP --type=py '\bupload_to_elasticsearch\s*\(\s*[^)]*\bdata\s*=\s*(data|records|items|payload|records_data|record)\b' -S . || true

Repository: redhat-performance/cloud-governance

Length of output: 194


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="cloud_governance/common/elasticsearch/elasticsearch_operations.py"

echo "=== upload_to_elasticsearch decorator + signature (cat -n) ==="
cat -n "$FILE" | sed -n '120,220p'

echo
echo "=== bulk/dispatch + result handling area (cat -n) ==="
cat -n "$FILE" | sed -n '220,460p'

echo
echo "=== any direct list passed as data=[...] to upload_to_elasticsearch ==="
rg -nP --type=py '\bupload_to_elasticsearch\s*\(\s*[^)]*data\s*=\s*\[' -S . || true

echo
echo "=== any callsites passing variables that might be lists named data/records/items/payload ==="
rg -nP --type=py '\bupload_to_elasticsearch\s*\(\s*[^)]*\bdata\s*=\s*(data|records|items|payload|records_data|record)\b' -S . || true

Repository: redhat-performance/cloud-governance

Length of output: 14638


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="cloud_governance/common/elasticsearch/elasticsearch_operations.py"

echo "=== imports/head (typeguard) ==="
cat -n "$FILE" | sed -n '1,120p'

echo
echo "=== typechecked usage in repo (limit output) ==="
rg -n --type=py 'typeguard|typechecked\(' cloud_governance/common/elasticsearch/elasticsearch_operations.py cloud_governance | head -n 80

Repository: redhat-performance/cloud-governance

Length of output: 13338


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Detect Python version requirements (pyproject/tox/setup)
rg -n --no-heading -S "python_requires|requires-python|target-version|tox" pyproject.toml setup.cfg setup.py tox.ini .github/workflows -g'*.yml' -g'*.yaml' || true

# Quick heuristic: do we use PEP604 unions (|) elsewhere in repo?
rg -n --no-heading -S "\b\w+\s*\|\s*\w+" cloud_governance -g'*.py' | head -n 40 || true

Repository: redhat-performance/cloud-governance

Length of output: 690


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect key callsites around upload_to_elasticsearch(data=...)
sed -n '1,260p' cloud_governance/main/es_uploader.py | nl -ba | sed -n '1,260p'

echo "----"
sed -n '1,140p' cloud_governance/common/elasticsearch/elastic_upload.py | nl -ba | sed -n '1,160p'

echo "----"
sed -n '1,220p' cloud_governance/common/mails/postfix.py | nl -ba | sed -n '90,160p'

Repository: redhat-performance/cloud-governance

Length of output: 122


🌐 Web query:

elasticsearch.helpers.bulk return value success errors tuple stats_only

💡 Result:

The elasticsearch.helpers.bulk function in the Python Elasticsearch client returns a tuple containing two elements: (number_of_successful_actions, error_information) [1][2][3]. The structure of the second element in the returned tuple depends on the value of the stats_only parameter: If stats_only=False (the default): The second element is a list of error responses encountered during the bulk operation [1][2][3]. If stats_only=True: The second element is an integer representing the total number of failed operations [1][2][3]. Important implementation notes: 1. Raise on Error: By default, the bulk helper raises a BulkIndexError if any errors are encountered [1][3]. The stats_only parameter only effectively controls the return value if you set raise_on_error=False [1][3]. 2. Memory Usage: Collecting detailed error information (when stats_only=False) can lead to high memory usage when processing large datasets [1][3]. If you need to handle high volumes of data while tracking errors, the streaming_bulk helper is recommended, as it allows you to process results without storing all errors in memory [1][3].

Citations:


🌐 Web query:

opensearchpy.helpers.bulk return value tuple success errors

💡 Result:

The opensearchpy.helpers.bulk function returns a tuple containing two elements: a count of successfully executed actions and a collection of errors [1][2]. The exact structure of the second element in the tuple depends on the stats_only parameter: 1. If stats_only is set to False (the default), the second element is a list of error dictionaries describing each failed operation [1][2]. 2. If stats_only is set to True, the second element is an integer representing the total count of failed operations [1][2]. Important Considerations: - By default, helpers.bulk raises a BulkIndexError if any operation in the bulk request fails [3]. - To retrieve errors without raising an exception, you must set raise_on_error=False in the function arguments [1][2]. - Because helpers.bulk buffers documents to reduce memory usage, if you need to process large datasets and handle errors, it is often more memory-efficient to use the streaming_bulk helper, which yields results for each individual operation rather than returning a single summary tuple at the end [1][2][4].

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Where upload_data_in_bulk is called
rg -n --type=py '\bupload_data_in_bulk\s*\(' cloud_governance || true

# Show exact call to bulk helper in upload_data_in_bulk
rg -n --type=py '__bulk_fn|upload_data_in_bulk|helpers\.bulk|response\s*=\s*self\.__bulk_fn' cloud_governance/common/elasticsearch/elasticsearch_operations.py

Repository: redhat-performance/cloud-governance

Length of output: 1035


Fix upload_to_elasticsearch type/runtime mismatch and incorrect bulk failure accounting

  • upload_to_elasticsearch(..., data: dict) is wrapped with @typechecked(), yet the function later includes a JSON-array branch (else: for record in data ...). The body also performs dict-only operations (data.get(...), data.keys(), etc.) before that branch, so passing a list will fail (TypeCheckError and/or AttributeError). Align the type/signature with actual behavior: either remove the list branch or refactor to handle list-of-dicts before any dict operations.
  • upload_data_in_bulk checks if response: after response = helpers.bulk(...). helpers.bulk returns a tuple (success_count, errors) and (by default) raises BulkIndexError on errors, so this boolean check won’t properly detect partial failures and the code currently ignores the error details. Compute failures from the returned tuple (raise_on_error=False) and/or catch BulkIndexError to update failed_items.
🧰 Tools
🪛 Ruff (0.15.14)

[warning] 139-139: PEP 484 prohibits implicit Optional

Convert to T | None

(RUF013)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cloud_governance/common/elasticsearch/elasticsearch_operations.py` around
lines 139 - 140, The upload_to_elasticsearch function signature and body
mismatch: either accept lists or not; update the function to handle a
list-of-dicts up front (before any dict-specific calls) or change the type
annotation/remove `@typechecked` so the function only accepts dicts; specifically
alter upload_to_elasticsearch to detect if data is a list and branch before
calling data.get or data.keys (references: upload_to_elasticsearch). Also fix
bulk failure accounting in upload_data_in_bulk by calling helpers.bulk with
raise_on_error=False and unpacking the returned tuple (success_count, errors) or
by catching BulkIndexError from helpers.bulk (reference: upload_data_in_bulk and
usage of helpers.bulk), then compute failed_items from errors (or length of
errors) and surface/log the error details instead of relying on a boolean check
of response.

"""
This method is upload json data into elasticsearch
:param index: index name to be stored in elasticsearch
:param data: data must be in dictionary i.e. {'key': 'value'}
:param doc_type:
:param es_add_items:
:return:
"""
Expand Down Expand Up @@ -178,10 +172,10 @@ def upload_to_elasticsearch(self, index: str, data: dict, doc_type: str = '_doc'
kwargs['id'] = data.get('IndexId')
try:
if isinstance(data, dict): # JSON Object
self.__es.index(index=index, doc_type=doc_type, body=data, **kwargs)
self.__es.index(index=index, body=data, **kwargs)
else: # JSON Array
for record in data:
self.__es.index(index=index, doc_type=doc_type, body=record, **kwargs)
self.__es.index(index=index, body=record, **kwargs)
return True
except Exception as err:
raise err
Expand Down Expand Up @@ -217,12 +211,12 @@ def get_index_hits(self, days: int, index: str):
@param index:
@return:
"""
search = Search(using=self.__es, index=index).filter('range', timestamp={'gte': f'now-{days}d', 'lt': 'now'})
search = search[0:self.MAX_SEARCH_RESULTS]
search_response = search.execute()
query = {"query": {"range": {"timestamp": {"gte": f"now-{days}d", "lt": "now"}}}}
search_response = self.__es.search(index=index, body=query, size=self.MAX_SEARCH_RESULTS)
hits = search_response.get('hits', {}).get('hits', [])
df = pd.DataFrame()
for row in search_response:
df = pd.concat([df, pd.DataFrame([row.to_dict()])], ignore_index=True).fillna({})
for row in hits:
df = pd.concat([df, pd.DataFrame([row['_source']])], ignore_index=True).fillna({})
return df.to_dict('records')

@typechecked()
Expand Down Expand Up @@ -273,7 +267,7 @@ def fetch_data_by_es_query(self, es_index: str, query: dict = None, start_dateti
if start_datetime and end_datetime:
query = self.get_query_data_between_range(start_datetime=start_datetime, end_datetime=end_datetime)
if query:
response = self.__es.search(index=es_index, body=query, doc_type='_doc', size=search_size, scroll='1h',
response = self.__es.search(index=es_index, body=query, size=search_size, scroll='1h',
filter_path=filter_path)
if result_agg:
es_data.extend(response.get('aggregations').get(group_by).get('buckets'))
Expand Down Expand Up @@ -363,7 +357,7 @@ def upload_data_in_bulk(self, data_items: list, index: str, **kwargs):
if 'CleanUpDays' not in item:
item['ExpireDays'] = self.__environment_variables_dict.get('DAYS_TO_TAKE_ACTION')
item['policy'] = self.__environment_variables_dict.get('policy')
response = bulk(self.__es, bulk_items)
response = self.__bulk_fn(self.__es, bulk_items)
if response:
total_uploaded += len(bulk_items)
else:
Expand Down Expand Up @@ -399,7 +393,7 @@ def post_query(self, query: dict, es_index: str, result_agg: bool = False):
if result_agg:
return response.get('aggregations')
else:
return response.get('hits', {}).get('hits', {})
return response.get('hits', {}).get('hits', [])
except Exception as err:
logger.error(err)
raise err
1 change: 1 addition & 0 deletions cloud_governance/main/environment_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def __init__(self):
self._environment_variables_dict['es_index'] = EnvironmentVariables.get_env('es_index', es_index)
self._environment_variables_dict['es_doc_type'] = EnvironmentVariables.get_env('es_doc_type', '')
self._environment_variables_dict['ES_TIMEOUT'] = EnvironmentVariables.get_env('ES_TIMEOUT', 2000)
self._environment_variables_dict['ES_SERVER_TYPE'] = EnvironmentVariables.get_env('ES_SERVER_TYPE', 'opensearch')

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Normalize ES_SERVER_TYPE on read to avoid config drift failures.

At Line 213, normalize case/whitespace so non-exact env values don’t accidentally select the OpenSearch branch.

Proposed fix
-        self._environment_variables_dict['ES_SERVER_TYPE'] = EnvironmentVariables.get_env('ES_SERVER_TYPE', 'opensearch')
+        self._environment_variables_dict['ES_SERVER_TYPE'] = (
+            EnvironmentVariables.get_env('ES_SERVER_TYPE', 'opensearch').strip().lower()
+        )
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cloud_governance/main/environment_variables.py` at line 213, Normalize the
ES_SERVER_TYPE value when reading it: update the call that sets
self._environment_variables_dict['ES_SERVER_TYPE'] (which currently uses
EnvironmentVariables.get_env('ES_SERVER_TYPE', 'opensearch')) to strip
surrounding whitespace and normalize case (e.g., .strip().lower()) so values
like " OpenSearch " or "OPENSEARCH" resolve consistently; ensure the normalized
value is stored back into self._environment_variables_dict['ES_SERVER_TYPE'] so
downstream logic that checks for 'opensearch' vs other types behaves
deterministically.


# GitHub credentials
self._environment_variables_dict['git_access_token'] = EnvironmentVariables.get_env('git_access_token', '')
Expand Down
6 changes: 2 additions & 4 deletions cloud_governance/main/es_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def __init__(self, **kwargs):
self.__es_host = kwargs.get('es_host')
self.__es_port = kwargs.get('es_port')
self.__es_index = kwargs.get('es_index')
self.__es_doc_type = kwargs.get('es_doc_type')
self.__es_add_items = kwargs.get('es_add_items')
self.__bucket_name = kwargs.get('bucket')
self.__s3_file_name = kwargs.get('s3_file_name')
Expand Down Expand Up @@ -95,14 +94,13 @@ def __get_user_cost(self, data):
num += 1
return user_cost_results

def upload_last_policy_to_elasticsearch(self, policy: str, index: str, doc_type: str, s3_json_file: str,
def upload_last_policy_to_elasticsearch(self, policy: str, index: str, s3_json_file: str,
es_add_items: dict = None):
Comment on lines +97 to 98

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

es_add_items is optional in signature but required in execution path.

With the current flow, None will crash on .update() / .items(). Guard it to keep the API contract consistent.

Proposed fix
-    def upload_last_policy_to_elasticsearch(self, policy: str, index: str, s3_json_file: str,
-                                            es_add_items: dict = None):
+    def upload_last_policy_to_elasticsearch(self, policy: str, index: str, s3_json_file: str,
+                                            es_add_items: dict | None = None):
...
-        for key, value in es_add_items.items():
+        for key, value in (es_add_items or {}).items():
             data[key] = value
...
     def upload_to_es(self, account):
@@
-        self.__es_add_items.update({'policy': self.__policy_name, 'region': self.__region_name})
+        self.__es_add_items = self.__es_add_items or {}
+        self.__es_add_items.update({'policy': self.__policy_name, 'region': self.__region_name})

Also applies to: 210-212

🧰 Tools
🪛 Ruff (0.15.14)

[warning] 98-98: PEP 484 prohibits implicit Optional

Convert to T | None

(RUF013)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cloud_governance/main/es_uploader.py` around lines 97 - 98, The parameter
es_add_items in upload_last_policy_to_elasticsearch is declared optional but the
implementation calls .update() / .items() and will crash if None; fix by
guarding against None at function start (e.g., if es_add_items is None:
es_add_items = {}) or by using a local empty dict before any .update()/.items()
usage so the code never calls those methods on None; apply the same defensive
fix to the other occurrence(s) that use es_add_items around lines 210-212.

"""
This method is upload json kubernetes cluster data into elasticsearch
:param policy:
:param s3_json_file:
:param index:
:param doc_type:
:param es_add_items:
:return:
"""
Expand Down Expand Up @@ -210,5 +208,5 @@ def upload_to_es(self, account):
"""
self.__es_add_items.update({'policy': self.__policy_name, 'region': self.__region_name})
self.upload_last_policy_to_elasticsearch(policy=self.__policy_name, index=self.__es_index,
doc_type=self.__es_doc_type, s3_json_file=self.__s3_file_name,
s3_json_file=self.__s3_file_name,
es_add_items=self.__es_add_items)
2 changes: 0 additions & 2 deletions cloud_governance/main/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ def main():
es_host = environment_variables_dict.get('es_host', '')
es_port = environment_variables_dict.get('es_port', '')
es_index = environment_variables_dict.get('es_index', '')
es_doc_type = environment_variables_dict.get('es_doc_type', '')
bucket = environment_variables_dict.get('bucket', '')
main_operations = MainOperations()
response = main_operations.run()
Expand Down Expand Up @@ -309,7 +308,6 @@ def run_gcp_policy_runner():
input_data = {'es_host': es_host,
'es_port': int(es_port),
'es_index': es_index,
'es_doc_type': es_doc_type,
'es_add_items': {'account': account},
'bucket': bucket,
'logs_bucket_key': 'logs',
Expand Down
2 changes: 2 additions & 0 deletions jenkins/cloud_resource_orchestration/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ pipeline {
AWS_SECRET_ACCESS_KEY_DELETE_PERF_SCALE = credentials('cloud-governance-aws-secret-access-key-delete-perf-scale')
ES_HOST = credentials('cloud-governance-es-host')
ES_PORT = credentials('cloud-governance-es-port')
ES_USER = credentials('cloud-governance-es-user')
ES_PASSWORD = credentials('cloud-governance-es-password')
CLOUD_GOVERNANCE_SPECIAL_USER_MAILS = credentials('cloud-governance-special-user-mails')
LDAP_HOST_NAME = credentials('cloud-governance-ldap-host-name')
JIRA_URL = credentials('JIRA_URL')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
AWS_SECRET_ACCESS_KEY_DELETE_PERF_SCALE = os.environ['AWS_SECRET_ACCESS_KEY_DELETE_PERF_SCALE']
ES_HOST = os.environ['ES_HOST']
ES_PORT = os.environ['ES_PORT']
ES_USER = os.environ.get('ES_USER', '')
ES_PASSWORD = os.environ.get('ES_PASSWORD', '')
LDAP_HOST_NAME = os.environ['LDAP_HOST_NAME']
JIRA_URL = os.environ['JIRA_URL']
JIRA_USERNAME = os.environ['JIRA_USERNAME']
Expand All @@ -30,7 +32,8 @@
es_index = CLOUD_RESOURCE_ORCHESTRATION_INDEX

common_env_vars = {
'es_host': ES_HOST, 'es_port': ES_PORT, 'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
'es_host': ES_HOST, 'es_port': ES_PORT, 'es_user': ES_USER, 'es_password': ES_PASSWORD,
'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
Comment on lines +35 to +36

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Show the relevant file with line numbers (only parts likely needed)
FILE="jenkins/cloud_resource_orchestration/run_cloud_resource_orchestration.py"
if [ ! -f "$FILE" ]; then
  echo "Missing file: $FILE"
  exit 1
fi

echo "=== File excerpt around lines 1-140 ==="
nl -ba "$FILE" | sed -n '1,160p'

echo "=== Grep for os.system usage and env dict creation ==="
rg -n "os\.system|subprocess\.run|env\s*=" "$FILE" || true

echo "=== Extract specific lines mentioned in review comment ==="
nl -ba "$FILE" | sed -n '20,120p' | sed -n '1,120p'

# Run flake8 as per instructions (error detection)
echo "=== Running flake8 error detection ==="
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics

# Run flake8 style validation (exit-zero allowed)
echo "=== Running flake8 style validation ==="
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics

Repository: redhat-performance/cloud-governance

Length of output: 181


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="jenkins/cloud_resource_orchestration/run_cloud_resource_orchestration.py"
echo "=== File existence ==="
ls -l "$FILE" || { echo "Missing $FILE"; exit 1; }

echo "=== Command availability (flake8) ==="
command -v flake8 || echo "flake8 not found"

echo "=== os.system / subprocess.run occurrences (file) ==="
rg -n "os\.system|subprocess\.run" "$FILE" || true

echo "=== Inspect around lines 1-140 (no nl; use sed) ==="
sed -n '1,160p' "$FILE"

echo "=== Inspect around env dict lines (35-60) ==="
sed -n '25,70p' "$FILE"

echo "=== Inspect around later command strings (70-110) ==="
sed -n '65,120p' "$FILE"

echo "=== Inspect around later command strings (90-120) ==="
sed -n '85,130p' "$FILE"

echo "=== Extract specific mentioned snippet lines (35-36, 54-55) with awk ==="
awk 'NR>=30 && NR<=60 {printf "%5d:%s\n", NR, $0}' "$FILE"

awk 'NR>=45 && NR<=65 {printf "%5d:%s\n", NR, $0}' "$FILE"

echo "=== Run flake8 commands only if available ==="
if command -v flake8 >/dev/null 2>&1; then
  flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics || true
  flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics || true
else
  echo "Skipping flake8 (not installed in sandbox)."
fi

Repository: redhat-performance/cloud-governance

Length of output: 22289


Avoid interpolating ES credentials into os.system podman command strings.

common_env_vars/common_input_vars (lines 35-36 and 54-55) include es_user/es_password, which are stringified into -e KEY="VALUE" args (combine_vars) and injected into podman run command strings executed via os.system (lines 77 and 98). This preserves command-injection and secret-exposure risk; replace the shell-built command strings with subprocess.run([...], check=True) and pass credentials via an env= dict instead.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@jenkins/cloud_resource_orchestration/run_cloud_resource_orchestration.py`
around lines 35 - 36, The code currently stringifies secrets from
common_env_vars/common_input_vars (including es_user and es_password) via
combine_vars into a shell-interpolated podman command executed with os.system;
replace those os.system invocations that run podman (the places that call
os.system with the combined -e "KEY=VALUE" string) with subprocess.run using a
list/sequence argument (no shell=True) and supply sensitive values via an env=
dictionary (e.g., build a copy of os.environ, update it with es_user/es_password
and other non-secret envs from combine_vars) so nothing is injected into the
shell command string; keep any non-secret CLI args as list elements and use
check=True to surface failures.

'LDAP_HOST_NAME': LDAP_HOST_NAME,
'JIRA_QUEUE': JIRA_QUEUE, 'JIRA_TOKEN': JIRA_TOKEN, 'JIRA_USERNAME': JIRA_USERNAME, 'JIRA_URL': JIRA_URL,
'CRO_COST_OVER_USAGE': CRO_COST_OVER_USAGE, 'CRO_PORTAL': CRO_PORTAL, 'CRO_DEFAULT_ADMINS': CRO_DEFAULT_ADMINS,
Expand All @@ -48,7 +51,8 @@

os.system('echo Run CloudResourceOrchestration in pre active region')

common_input_vars = {'es_host': ES_HOST, 'es_port': ES_PORT, 'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
common_input_vars = {'es_host': ES_HOST, 'es_port': ES_PORT, 'es_user': ES_USER, 'es_password': ES_PASSWORD,
'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
'LDAP_HOST_NAME': LDAP_HOST_NAME,
'JIRA_QUEUE': JIRA_QUEUE, 'JIRA_TOKEN': JIRA_TOKEN, 'JIRA_USERNAME': JIRA_USERNAME,
'JIRA_URL': JIRA_URL,
Expand Down
2 changes: 2 additions & 0 deletions jenkins/clouds/aws/daily/cost_explorer/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ pipeline {
BUCKET_PERF_SCALE = credentials('cloud-governance-bucket-perf_scale')
ES_HOST = credentials('cloud-governance-es-host')
ES_PORT = credentials('cloud-governance-es-port')
ES_USER = credentials('cloud-governance-es-user')
ES_PASSWORD = credentials('cloud-governance-es-password')
GITHUB_TOKEN = credentials('cloud-governance-git-access-token')
CLOUD_GOVERNANCE_SPECIAL_USER_MAILS = credentials('cloud-governance-special-user-mails')
LDAP_HOST_NAME = credentials('cloud-governance-ldap-host-name')
Expand Down
Loading