11import logging
22import time
3+ import jsonpatch
34
4- from globus_sdk import (AccessTokenAuthorizer , ConfidentialAppAuthClient ,
5- SearchClient )
5+ from globus_sdk import AccessTokenAuthorizer , ConfidentialAppAuthClient , SearchClient
66from globus_sdk .scopes import SearchScopes
77
88
99class ConsumerSearchClient :
10- def __init__ (self , credentials , search_index ):
10+ def __init__ (self , credentials , search_index , error_producer ):
1111 confidential_client = ConfidentialAppAuthClient (
1212 client_id = credentials .get ("client_id" ),
1313 client_secret = credentials .get ("client_secret" ),
@@ -19,10 +19,11 @@ def __init__(self, credentials, search_index):
1919 authorizer = AccessTokenAuthorizer (search_tokens .get ("access_token" ))
2020 self .search_client = SearchClient (authorizer = authorizer )
2121 self .esgf_index = search_index
22+ self .error_producer = error_producer
2223
23- def convert_assets (self , item ):
24+ def convert_assets (self , assets ):
2425 converted_assets = []
25- for key , value in item . get ( " assets" ) .items ():
26+ for key , value in assets .items ():
2627 converted_assets .append ({"name" : key } | value )
2728 return converted_assets
2829
@@ -45,12 +46,7 @@ def ingest(self, messages_data):
4546 gmeta = []
4647 for data in messages_data :
4748 item = data .get ("data" ).get ("payload" ).get ("item" )
48- assets = item .get ("assets" )
49- assets_list = []
50- for name , asset in assets .items ():
51- asset ["name" ] = name
52- assets_list .append (asset )
53- item ["assets" ] = assets_list
49+ item ["assets" ] = item .get ("assets" )
5450 gmeta .append (self .gmetaentry (item ))
5551
5652 gmetalist = {"ingest_type" : "GMetaList" , "ingest_data" : {"gmeta" : gmeta }}
@@ -70,5 +66,95 @@ def ingest(self, messages_data):
7066 time .sleep (1 )
7167 return True
7268
69+ def post (self , message_data ):
70+ item = message_data .get ("data" ).get ("payload" ).get ("item" )
71+ globus_response = self .search_client .get_subject (self .esgf_index , item .get ("id" ))
72+ if globus_response .data :
73+ logging .info (f"Item with ID { item .get ('id' )} already exists in the index." )
74+ self .error_producer .produce (
75+ topic = "esgf-local.errors" ,
76+ key = item .get ("id" ),
77+ value = f"Item with ID { item .get ('id' )} already exists in the index." ,
78+ )
79+ return None
80+ item ["assets" ] = self .convert_assets (item .get ("assets" ))
81+ gmeta_entry = self .gmetaentry (item )
82+ return gmeta_entry
83+
84+ def json_patch (self , message_data ):
85+ payload = message_data .get ("data" ).get ("payload" )
86+ item_id = payload .get ("item_id" )
87+ globus_response = self .search_client .get_subject (self .esgf_index , item_id )
88+ if not globus_response .data :
89+ logging .info (f"Item with ID { item_id } does not exist in the index." )
90+ self .error_producer .produce (
91+ topic = "esgf-local.errors" ,
92+ key = item_id ,
93+ value = f"Item with ID { item_id } does not exist in the index." ,
94+ )
95+ return None
96+ gmeta_entry = jsonpatch .apply_patch (
97+ globus_response .data .get ("content" ), payload .get ("patch" )
98+ )
99+ return gmeta_entry
100+
73101 def delete (self , subject ):
74- self .search_client .delete_subject (self .esgf_index , subject )
102+ globus_response = self .search_client .get_subject (self .esgf_index , subject )
103+ if globus_response .data :
104+ self .search_client .delete_subject (self .esgf_index , subject )
105+ return True
106+ logging .info (f"Item with ID { subject } does not exist in the index." )
107+ self .error_producer .produce (
108+ topic = "esgf-local.errors" ,
109+ key = subject ,
110+ value = f"Item with ID { subject } does not exist in the index." ,
111+ )
112+ return None
113+
114+ def process_message (self , message_data ):
115+ try :
116+ payload = message_data .get ("data" ).get ("payload" )
117+ method = payload .get ("method" )
118+ if method == "POST" :
119+ return self .post (message_data )
120+ if method == "PUT" :
121+ return self .put (message_data )
122+ if method == "JSON_PATCH" or method == "PATCH" :
123+ return self .json_patch (message_data )
124+ if method == "MERGE_PATCH" :
125+ return self .merge_patch (message_data )
126+ return None
127+ except Exception as e :
128+ logging .error (f"Error processing message data: { e } " )
129+ self .error_producer .produce (
130+ topic = "esgf-local.errors" ,
131+ key = message_data .get ("data" ).get ("payload" ).get ("item" ).get ("id" ),
132+ value = str (e ),
133+ )
134+ return None
135+
136+ def process_messages (self , messages_data ):
137+ gmeta = []
138+ for message_data in messages_data :
139+ entry = self .process_message (message_data )
140+ if entry :
141+ gmeta .append (entry )
142+ if not gmeta :
143+ return False
144+
145+ gmetalist = {"ingest_type" : "GMetaList" , "ingest_data" : {"gmeta" : gmeta }}
146+
147+ r = self .search_client .ingest (self .esgf_index , gmetalist )
148+ task_id = r .get ("task_id" )
149+
150+ while True :
151+ r = self .search_client .get_task (task_id )
152+ state = r .get ("state" )
153+ if state == "SUCCESS" :
154+ return True
155+ if state == "FAILED" :
156+ logging .error (f"Ingestion task { task_id } failed" )
157+ logging .error (r .text )
158+ return False
159+ time .sleep (1 )
160+ return True
0 commit comments