redhat-performance · pragya811 · May 28, 2026 · May 26, 2026 · coderabbitai · May 26, 2026
diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml
@@ -125,10 +125,11 @@ jobs:
       matrix:
         python-version: [ '3.10', '3.11', '3.12', '3.13', '3.14' ]
     services:
-      elasticsearch:
-        image: docker.elastic.co/elasticsearch/elasticsearch:7.11.0
+      opensearch:
+        image: opensearchproject/opensearch:3.2.0
         env:
           discovery.type: single-node
+          DISABLE_SECURITY_PLUGIN: "true"
         options: >-
           --health-cmd "curl http://localhost:9200/_cluster/health"
           --health-interval 10s

diff --git a/.github/workflows/PR.yml b/.github/workflows/PR.yml
@@ -103,17 +103,17 @@ jobs:
       matrix:
         python-version: [ '3.10', '3.11', '3.12', '3.13', '3.14' ]
     services:
-      elasticsearch:
-        image: docker.elastic.co/elasticsearch/elasticsearch:7.11.0
+      opensearch:
+        image: opensearchproject/opensearch:3.2.0
         env:
           discovery.type: single-node
+          DISABLE_SECURITY_PLUGIN: "true"
         options: >-
           --health-cmd "curl http://localhost:9200/_cluster/health"
           --health-interval 10s
           --health-timeout 5s
           --health-retries 10
         ports:
-          # <port on host>:<port on container>
           - 9200:9200
     steps:
       - uses: actions/checkout@v6

diff --git a/cloud_governance/common/elasticsearch/elasticsearch_operations.py b/cloud_governance/common/elasticsearch/elasticsearch_operations.py
@@ -1,12 +1,13 @@
 from datetime import datetime, timezone
 import time
 import pandas as pd
-from elasticsearch.helpers import bulk
 
 from cloud_governance.main.environment_variables import environment_variables
 
-from elasticsearch_dsl import Search
+from opensearchpy import OpenSearch
+from opensearchpy.helpers import bulk as opensearch_bulk
 from elasticsearch import Elasticsearch
+from elasticsearch.helpers import bulk as es_bulk
 from typeguard import typechecked
 
 from cloud_governance.common.elasticsearch.elasticsearch_exceptions import ElasticSearchDataNotUploaded
@@ -46,69 +47,63 @@ def __init__(self,
             self.__environment_variables_dict.get('ES_TIMEOUT')) if self.__environment_variables_dict.get(
             'ES_TIMEOUT') else timeout
         self.__account = self.__environment_variables_dict.get('account')
+        self.__server_type = self.__environment_variables_dict.get('ES_SERVER_TYPE', 'opensearch')
         try:
-            add_host = {'host': self.__es_host, 'port': self.__es_port,
-                        'http_auth': f'{self.__es_user}:{self.__es_password}'}
-            if int(self.__es_port) == 443:
-                add_host['use_ssl'] = True
-            self.__es = Elasticsearch([add_host],
-                                      timeout=self.__timeout,
-                                      max_retries=2)
+            if self.__server_type == 'elasticsearch':
+                scheme = 'https' if int(self.__es_port) == 443 else 'http'
+                hosts = [{'host': self.__es_host, 'port': int(self.__es_port), 'scheme': scheme}]
+                basic_auth = (self.__es_user, self.__es_password) if self.__es_user else None
+                self.__es = Elasticsearch(hosts, basic_auth=basic_auth, verify_certs=False,
+                                          request_timeout=self.__timeout, max_retries=2)
+                self.__bulk_fn = es_bulk
+            else:
+                add_host = {'host': self.__es_host, 'port': self.__es_port}
+                if self.__es_user:
+                    add_host['http_auth'] = (self.__es_user, self.__es_password)
+                if int(self.__es_port) == 443:
+                    add_host['use_ssl'] = True
+                    add_host['verify_certs'] = False
+                self.__es = OpenSearch([add_host], timeout=self.__timeout, max_retries=2)
+                self.__bulk_fn = opensearch_bulk
         except Exception as err:
+            logger.error(f'Failed to connect to {self.__server_type} at {self.__es_host}:{self.__es_port}: {err}')
             self.__es = None
 
-        # Skip product check for OpenSearch compatibility (elasticsearch-py 7.14+ rejects non-Elasticsearch servers)
-        try:
-            if self.__es and hasattr(self.__es.transport, '_verified_elasticsearch'):
-                self.__es.transport._verified_elasticsearch = True
-        except AttributeError as err:
-            logger.warning(f"Could not bypass Elasticsearch product check: {err}")
-
     def __elasticsearch_get_index_hits(self, index: str, uuid: str = '', workload: str = '', fast_check: bool = False,
                                        id: bool = False):
         """
-        This method search for data per index in last 2 minutes and return the number of docs or zero
+        This method search for data per index in last 15 minutes and return the number of docs or zero
         :param index:
-        :param workload: need only if there is different timestamp parameter in Elasticsearch
+        :param workload: need only if there is different timestamp parameter in the server
         :param id: True to return the doc ids
         :param fast_check: return fast response
         :return:
         """
-        """
-        :return:
-        """
         ids = []
-        # https://github.com/elastic/elasticsearch-dsl-py/issues/49
         self.__es.indices.refresh(index=index)
-        # timestamp name in Elasticsearch is different
-        search = Search(using=self.__es, index=index).filter('range', timestamp={
-            'gte': f'now-{self.ES_FETCH_MIN_TIME}m', 'lt': 'now'})
-        # reduce the search result
-        if fast_check:
-            search = search[0:self.MIN_SEARCH_RESULTS]
-        else:
-            search = search[0:self.MAX_SEARCH_RESULTS]
-        search_response = search.execute()
-        if search_response.hits:
+        size = self.MIN_SEARCH_RESULTS if fast_check else self.MAX_SEARCH_RESULTS
+        query = {"query": {"range": {"timestamp": {"gte": f"now-{self.ES_FETCH_MIN_TIME}m", "lt": "now"}}}}
+        search_response = self.__es.search(index=index, body=query, size=size)
+        hits = search_response.get('hits', {}).get('hits', [])
+        if hits:
             if uuid:
                 count_hits = 0
-                for row in search_response:
-                    if type(row['uuid']) == str:
-                        # uperf return str
-                        current_uuid = row['uuid']
-                    else:
-                        current_uuid = row['uuid'][0]
+                for row in hits:
+                    source = row['_source']
+                    current_uuid = source.get('uuid', '')
+                    if isinstance(current_uuid, list):
+                        current_uuid = current_uuid[0]
                     if current_uuid == uuid:
                         if fast_check:
                             return 1
-                        ids.append(row.meta.id)
+                        ids.append(row['_id'])
                         count_hits += 1
                 if id:
                     return ids
                 else:
                     return count_hits
             else:
-                return len(search_response.hits)
+                return len(hits)
         else:
             return 0
 
@@ -141,13 +136,12 @@ def verify_elasticsearch_data_uploaded(self, index: str, uuid: str = '', workloa
         raise ElasticSearchDataNotUploaded
 
     @typechecked()
-    def upload_to_elasticsearch(self, index: str, data: dict, doc_type: str = '_doc', es_add_items: dict = None,
+    def upload_to_elasticsearch(self, index: str, data: dict, es_add_items: dict = None,
                                 **kwargs):
         """
         This method is upload json data into elasticsearch
         :param index: index name to be stored in elasticsearch
         :param data: data must be in dictionary i.e. {'key': 'value'}
-        :param doc_type:
         :param es_add_items:
         :return:
         """
@@ -178,10 +172,10 @@ def upload_to_elasticsearch(self, index: str, data: dict, doc_type: str = '_doc'
             kwargs['id'] = data.get('IndexId')
         try:
             if isinstance(data, dict):  # JSON Object
-                self.__es.index(index=index, doc_type=doc_type, body=data, **kwargs)
+                self.__es.index(index=index, body=data, **kwargs)
             else:  # JSON Array
                 for record in data:
-                    self.__es.index(index=index, doc_type=doc_type, body=record, **kwargs)
+                    self.__es.index(index=index, body=record, **kwargs)
             return True
         except Exception as err:
             raise err
@@ -217,12 +211,12 @@ def get_index_hits(self, days: int, index: str):
         @param index:
         @return:
         """
-        search = Search(using=self.__es, index=index).filter('range', timestamp={'gte': f'now-{days}d', 'lt': 'now'})
-        search = search[0:self.MAX_SEARCH_RESULTS]
-        search_response = search.execute()
+        query = {"query": {"range": {"timestamp": {"gte": f"now-{days}d", "lt": "now"}}}}
+        search_response = self.__es.search(index=index, body=query, size=self.MAX_SEARCH_RESULTS)
+        hits = search_response.get('hits', {}).get('hits', [])
         df = pd.DataFrame()
-        for row in search_response:
-            df = pd.concat([df, pd.DataFrame([row.to_dict()])], ignore_index=True).fillna({})
+        for row in hits:
+            df = pd.concat([df, pd.DataFrame([row['_source']])], ignore_index=True).fillna({})
         return df.to_dict('records')
 
     @typechecked()
@@ -273,7 +267,7 @@ def fetch_data_by_es_query(self, es_index: str, query: dict = None, start_dateti
                 if start_datetime and end_datetime:
                     query = self.get_query_data_between_range(start_datetime=start_datetime, end_datetime=end_datetime)
             if query:
-                response = self.__es.search(index=es_index, body=query, doc_type='_doc', size=search_size, scroll='1h',
+                response = self.__es.search(index=es_index, body=query, size=search_size, scroll='1h',
                                             filter_path=filter_path)
                 if result_agg:
                     es_data.extend(response.get('aggregations').get(group_by).get('buckets'))
@@ -363,7 +357,7 @@ def upload_data_in_bulk(self, data_items: list, index: str, **kwargs):
                 if 'CleanUpDays' not in item:
                     item['ExpireDays'] = self.__environment_variables_dict.get('DAYS_TO_TAKE_ACTION')
                 item['policy'] = self.__environment_variables_dict.get('policy')
-            response = bulk(self.__es, bulk_items)
+            response = self.__bulk_fn(self.__es, bulk_items)
             if response:
                 total_uploaded += len(bulk_items)
             else:
@@ -399,7 +393,7 @@ def post_query(self, query: dict, es_index: str, result_agg: bool = False):
             if result_agg:
                 return response.get('aggregations')
             else:
-                return response.get('hits', {}).get('hits', {})
+                return response.get('hits', {}).get('hits', [])
         except Exception as err:
             logger.error(err)
             raise err
diff --git a/cloud_governance/main/environment_variables.py b/cloud_governance/main/environment_variables.py
@@ -210,6 +210,7 @@ def __init__(self):
         self._environment_variables_dict['es_index'] = EnvironmentVariables.get_env('es_index', es_index)
         self._environment_variables_dict['es_doc_type'] = EnvironmentVariables.get_env('es_doc_type', '')
         self._environment_variables_dict['ES_TIMEOUT'] = EnvironmentVariables.get_env('ES_TIMEOUT', 2000)
+        self._environment_variables_dict['ES_SERVER_TYPE'] = EnvironmentVariables.get_env('ES_SERVER_TYPE', 'opensearch')
 
         # GitHub credentials
         self._environment_variables_dict['git_access_token'] = EnvironmentVariables.get_env('git_access_token', '')

diff --git a/cloud_governance/main/es_uploader.py b/cloud_governance/main/es_uploader.py
@@ -17,7 +17,6 @@ def __init__(self, **kwargs):
         self.__es_host = kwargs.get('es_host')
         self.__es_port = kwargs.get('es_port')
         self.__es_index = kwargs.get('es_index')
-        self.__es_doc_type = kwargs.get('es_doc_type')
         self.__es_add_items = kwargs.get('es_add_items')
         self.__bucket_name = kwargs.get('bucket')
         self.__s3_file_name = kwargs.get('s3_file_name')
@@ -95,14 +94,13 @@ def __get_user_cost(self, data):
                 num += 1
         return user_cost_results
 
-    def upload_last_policy_to_elasticsearch(self, policy: str, index: str, doc_type: str, s3_json_file: str,
+    def upload_last_policy_to_elasticsearch(self, policy: str, index: str, s3_json_file: str,
                                             es_add_items: dict = None):
         """
         This method is upload json kubernetes cluster data into elasticsearch
         :param policy:
         :param s3_json_file:
         :param index:
-        :param doc_type:
         :param es_add_items:
         :return:
         """
@@ -210,5 +208,5 @@ def upload_to_es(self, account):
         """
         self.__es_add_items.update({'policy': self.__policy_name, 'region': self.__region_name})
         self.upload_last_policy_to_elasticsearch(policy=self.__policy_name, index=self.__es_index,
-                                                 doc_type=self.__es_doc_type, s3_json_file=self.__s3_file_name,
+                                                 s3_json_file=self.__s3_file_name,
                                                  es_add_items=self.__es_add_items)
diff --git a/cloud_governance/main/main.py b/cloud_governance/main/main.py
@@ -218,7 +218,6 @@ def main():
         es_host = environment_variables_dict.get('es_host', '')
         es_port = environment_variables_dict.get('es_port', '')
         es_index = environment_variables_dict.get('es_index', '')
-        es_doc_type = environment_variables_dict.get('es_doc_type', '')
         bucket = environment_variables_dict.get('bucket', '')
         main_operations = MainOperations()
         response = main_operations.run()
@@ -309,7 +308,6 @@ def run_gcp_policy_runner():
                     input_data = {'es_host': es_host,
                                   'es_port': int(es_port),
                                   'es_index': es_index,
-                                  'es_doc_type': es_doc_type,
                                   'es_add_items': {'account': account},
                                   'bucket': bucket,
                                   'logs_bucket_key': 'logs',

diff --git a/jenkins/cloud_resource_orchestration/Jenkinsfile b/jenkins/cloud_resource_orchestration/Jenkinsfile
@@ -16,6 +16,8 @@ pipeline {
         AWS_SECRET_ACCESS_KEY_DELETE_PERF_SCALE = credentials('cloud-governance-aws-secret-access-key-delete-perf-scale')
         ES_HOST = credentials('cloud-governance-es-host')
         ES_PORT = credentials('cloud-governance-es-port')
+        ES_USER = credentials('cloud-governance-es-user')
+        ES_PASSWORD = credentials('cloud-governance-es-password')
         CLOUD_GOVERNANCE_SPECIAL_USER_MAILS = credentials('cloud-governance-special-user-mails')
         LDAP_HOST_NAME = credentials('cloud-governance-ldap-host-name')
         JIRA_URL = credentials('JIRA_URL')

diff --git a/jenkins/cloud_resource_orchestration/run_cloud_resource_orchestration.py b/jenkins/cloud_resource_orchestration/run_cloud_resource_orchestration.py
@@ -8,6 +8,8 @@
 AWS_SECRET_ACCESS_KEY_DELETE_PERF_SCALE = os.environ['AWS_SECRET_ACCESS_KEY_DELETE_PERF_SCALE']
 ES_HOST = os.environ['ES_HOST']
 ES_PORT = os.environ['ES_PORT']
+ES_USER = os.environ.get('ES_USER', '')
+ES_PASSWORD = os.environ.get('ES_PASSWORD', '')
 LDAP_HOST_NAME = os.environ['LDAP_HOST_NAME']
 JIRA_URL = os.environ['JIRA_URL']
 JIRA_USERNAME = os.environ['JIRA_USERNAME']
@@ -30,7 +32,8 @@
 es_index = CLOUD_RESOURCE_ORCHESTRATION_INDEX
 
 common_env_vars = {
-    'es_host': ES_HOST, 'es_port': ES_PORT, 'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
+    'es_host': ES_HOST, 'es_port': ES_PORT, 'es_user': ES_USER, 'es_password': ES_PASSWORD,
+    'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
     'LDAP_HOST_NAME': LDAP_HOST_NAME,
     'JIRA_QUEUE': JIRA_QUEUE, 'JIRA_TOKEN': JIRA_TOKEN, 'JIRA_USERNAME': JIRA_USERNAME, 'JIRA_URL': JIRA_URL,
     'CRO_COST_OVER_USAGE': CRO_COST_OVER_USAGE, 'CRO_PORTAL': CRO_PORTAL, 'CRO_DEFAULT_ADMINS': CRO_DEFAULT_ADMINS,
@@ -48,7 +51,8 @@
 
 os.system('echo Run CloudResourceOrchestration in pre active region')
 
-common_input_vars = {'es_host': ES_HOST, 'es_port': ES_PORT, 'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
+common_input_vars = {'es_host': ES_HOST, 'es_port': ES_PORT, 'es_user': ES_USER, 'es_password': ES_PASSWORD,
+                     'CRO_ES_INDEX': CRO_ES_INDEX, 'log_level': 'INFO',
                      'LDAP_HOST_NAME': LDAP_HOST_NAME,
                      'JIRA_QUEUE': JIRA_QUEUE, 'JIRA_TOKEN': JIRA_TOKEN, 'JIRA_USERNAME': JIRA_USERNAME,
                      'JIRA_URL': JIRA_URL,

diff --git a/jenkins/clouds/aws/daily/cost_explorer/Jenkinsfile b/jenkins/clouds/aws/daily/cost_explorer/Jenkinsfile
@@ -26,6 +26,8 @@ pipeline {
         BUCKET_PERF_SCALE = credentials('cloud-governance-bucket-perf_scale')
         ES_HOST = credentials('cloud-governance-es-host')
         ES_PORT = credentials('cloud-governance-es-port')
+        ES_USER = credentials('cloud-governance-es-user')
+        ES_PASSWORD = credentials('cloud-governance-es-password')
         GITHUB_TOKEN = credentials('cloud-governance-git-access-token')
         CLOUD_GOVERNANCE_SPECIAL_USER_MAILS = credentials('cloud-governance-special-user-mails')
         LDAP_HOST_NAME = credentials('cloud-governance-ldap-host-name')