@@ -91,6 +91,48 @@ def process_ecg_data(
9191 return processed_data
9292
9393
94+ def fetch_symptoms_single (observation_data : dict ) -> dict :
95+ """
96+ Extracts symptoms information from a single observation data dictionary and
97+ returns a dict with a normalized Symptoms field.
98+
99+ Args:
100+ observation_data: A dictionary containing observation data.
101+
102+ Returns:
103+ dict: A dictionary with 'UserId', 'ResourceId', and 'Symptoms'.
104+ """
105+ components = observation_data .get ("component" , [])
106+ user_id = observation_data .get (ColumnNames .USER_ID .value )
107+ resource_id = observation_data .get ("ResourceId" )
108+
109+ symptoms_status = None
110+ for comp in components :
111+ code = comp .get ("code" , {}).get ("coding" , [{}])[0 ].get ("code" )
112+ if code == "HKElectrocardiogram.SymptomsStatus" :
113+ symptoms_status = comp .get ("valueString" )
114+ break
115+
116+ symptoms_text = "No symptoms."
117+ if symptoms_status == "present" :
118+ symptoms_list = []
119+ for comp in components :
120+ code = comp .get ("code" , {}).get ("coding" , [{}])[0 ].get ("code" , "" )
121+ if "HKCategoryTypeIdentifier" in code :
122+ display = comp .get ("code" , {}).get ("coding" , [{}])[0 ].get ("display" )
123+ value = comp .get ("valueString" )
124+ if display and value is not None :
125+ symptoms_list .append (f"{ display } :{ value } " )
126+ if symptoms_list :
127+ symptoms_text = ", " .join (symptoms_list )
128+
129+ return {
130+ ColumnNames .USER_ID .value : user_id ,
131+ "ResourceId" : resource_id ,
132+ "Symptoms" : symptoms_text ,
133+ }
134+
135+
94136def fetch_diagnosis_data ( # pylint: disable=too-many-locals, too-many-branches
95137 db : Client ,
96138 input_df : pd .DataFrame ,
@@ -139,7 +181,9 @@ def fetch_diagnosis_data( # pylint: disable=too-many-locals, too-many-branches
139181 for doc in fhir_docs :
140182 observation_data = doc .to_dict ()
141183 observation_data ["user_id" ] = user_id
184+ observation_data [ColumnNames .USER_ID .value ] = user_id
142185 observation_data ["ResourceId" ] = doc .id
186+ observation_data [ColumnNames .RESOURCE_ID .value ] = doc .id
143187 diagnosis_docs = list (
144188 doc .reference .collection (DIAGNOSIS_DATA_SUBCOLLECTION ).stream (
145189 timeout = timeout
@@ -163,6 +207,10 @@ def fetch_diagnosis_data( # pylint: disable=too-many-locals, too-many-branches
163207 if observation_data ["NumberOfReviewers" ] < 3
164208 else "Complete review"
165209 )
210+
211+ symptoms_info = fetch_symptoms_single (observation_data )
212+ if symptoms_info :
213+ observation_data .update (symptoms_info )
166214 resources .append (observation_data )
167215
168216 for i , diagnosis_doc in enumerate (diagnosis_docs ):
@@ -184,14 +232,15 @@ def fetch_diagnosis_data( # pylint: disable=too-many-locals, too-many-branches
184232 "NumberOfReviewers" ,
185233 "Reviewers" ,
186234 "ReviewStatus" ,
235+ "Symptoms" ,
187236 ] + list (new_columns )
188237
189238 data = []
190239
191240 for resource in resources :
192241 row_data = [
193242 resource .get (ColumnNames .USER_ID .value , None ),
194- resource .get ("id " , None ),
243+ resource .get ("ResourceId " , None ),
195244 (
196245 resource .get ("effectivePeriod" , {}).get ("start" , None )
197246 if resource .get ("effectivePeriod" )
@@ -205,6 +254,7 @@ def fetch_diagnosis_data( # pylint: disable=too-many-locals, too-many-branches
205254 resource .get ("NumberOfReviewers" , None ),
206255 resource .get ("Reviewers" , None ),
207256 resource .get ("ReviewStatus" , None ),
257+ resource .get ("Symptoms" , None ),
208258 ]
209259 for col in new_columns :
210260 row_data .append (resource .get (col , None ))
@@ -221,6 +271,7 @@ def fetch_diagnosis_data( # pylint: disable=too-many-locals, too-many-branches
221271 "Reviewers" ,
222272 "ReviewStatus" ,
223273 "EffectiveDateTimeHHMM" ,
274+ "Symptoms" ,
224275 ] + list (new_columns )
225276
226277 for col in additional_columns :
0 commit comments