From b8fd8fc966da59055feaaeaaa4245f878050f656 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 20 Feb 2024 19:06:42 +0000
Subject: [PATCH 001/230] create example scenario for costing script

---
 .../costing/example_costing_scenario.py       | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 src/scripts/costing/example_costing_scenario.py

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
new file mode 100644
index 0000000000..35f17b0062
--- /dev/null
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -0,0 +1,48 @@
+'''
+
+
+Run on the batch system using:
+```tlo batch-submit src/scripts/costing/example_costing_scenario.py```
+
+or locally using:
+    ```tlo batch-submit src/scripts/costing/example_costing_scenario.py```
+
+'''
+
+from tlo import Date, logging
+from tlo.methods.fullmodel import fullmodel
+from tlo.scenario import BaseScenario
+
+class SampleCostingScenario(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 0
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = Date(2020, 1, 1)
+        self.pop_size = 100  # <- recommended population size for the runs
+        self.number_of_draws = 1  # <- one scenario
+        self.runs_per_draw = 3  # <- repeated this many times
+
+    def log_configuration(self):
+        return {
+            'filename': 'example_costing_scenario',
+            'directory': './outputs',  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.healthsystem': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+            }
+        }
+
+    def modules(self):
+        return fullmodel(resourcefilepath=self.resources)
+
+    def draw_parameters(self, draw_number, rng):
+        return get_parameters_for_status_quo()
+
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])

From 3c850f2c64ddc5e049bd375786bf08b12afe62dc Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 20 Feb 2024 19:31:15 +0000
Subject: [PATCH 002/230] update example scenario - error in running
 get_parameters_for_status_quo()

---
 src/scripts/costing/example_costing_scenario.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index 35f17b0062..9c6ba41606 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -5,7 +5,7 @@
 ```tlo batch-submit src/scripts/costing/example_costing_scenario.py```
 
 or locally using:
-    ```tlo batch-submit src/scripts/costing/example_costing_scenario.py```
+    ```tlo scenario-run src/scripts/costing/example_costing_scenario.py```
 
 '''
 
@@ -39,7 +39,11 @@ def modules(self):
         return fullmodel(resourcefilepath=self.resources)
 
     def draw_parameters(self, draw_number, rng):
-        return get_parameters_for_status_quo()
+        return {
+            'HealthSystem': {
+                'cons_availability': ['default'][draw_number]
+            }
+        }
 
 
 if __name__ == '__main__':

From 0d00046b6804be21b111a5addac305c489fbd0a8 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 20 Feb 2024 19:32:11 +0000
Subject: [PATCH 003/230] update example scenario - error in running 1 scenario

---
 src/scripts/costing/example_costing_scenario.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index 9c6ba41606..85f96ba7a0 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -20,7 +20,7 @@ def __init__(self):
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2020, 1, 1)
         self.pop_size = 100  # <- recommended population size for the runs
-        self.number_of_draws = 1  # <- one scenario
+        self.number_of_draws = 2  # <- one scenario
         self.runs_per_draw = 3  # <- repeated this many times
 
     def log_configuration(self):
@@ -41,7 +41,7 @@ def modules(self):
     def draw_parameters(self, draw_number, rng):
         return {
             'HealthSystem': {
-                'cons_availability': ['default'][draw_number]
+                'cons_availability': ['default', 'all'][draw_number]
             }
         }
 

From 6b730ea8d7b796a040e5bba03898939ab36878d7 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 20 Feb 2024 19:44:48 +0000
Subject: [PATCH 004/230] reduce scenario size

---
 src/scripts/costing/example_costing_scenario.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index 85f96ba7a0..df3bf1cc64 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -18,10 +18,10 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2020, 1, 1)
+        self.end_date = Date(2013, 1, 1)
         self.pop_size = 100  # <- recommended population size for the runs
         self.number_of_draws = 2  # <- one scenario
-        self.runs_per_draw = 3  # <- repeated this many times
+        self.runs_per_draw = 2 # <- repeated this many times
 
     def log_configuration(self):
         return {

From a102b34067ba9d8b41f58a4731a9bcc5320fc504 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 20 Feb 2024 21:03:20 +0000
Subject: [PATCH 005/230] add costing script

---
 src/scripts/costing/costing.py | 114 +++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 src/scripts/costing/costing.py

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
new file mode 100644
index 0000000000..4cb3237398
--- /dev/null
+++ b/src/scripts/costing/costing.py
@@ -0,0 +1,114 @@
+import argparse
+from pathlib import Path
+import calendar
+import datetime
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    make_age_grp_lookup,
+    make_age_grp_types,
+    summarize,
+)
+
+# define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# define a pathway to the data folder (note: currently outside the TLO model directory)
+# remember to set working directory to TLOmodel/
+outputfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
+resourcefilepath = Path("./resources")
+path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
+
+# %% Gathering basic information
+
+# Find results_folder associated with a given batch_file and get most recent
+results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
+
+# look at one log (so can decide what to extract)
+log = load_pickled_dataframes(results_folder)
+
+# get basic information about the results
+info = get_scenario_info(results_folder)
+
+# 1) Extract the parameters that have varied over the set of simulations
+params = extract_params(results_folder)
+
+
+# 1. HR cost
+'''
+Notes:
+1. Ignore squeeze factor, i.e. the actual capability used is costed
+2. ? capabilities_coefficient
+'''
+# 1.1 Overall HR Cost
+# Annual salary by officer type and facility level
+workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+                                    sheet_name = None)
+hr_annual_salary = workbook["human_resources"]
+hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str)
+
+# For total HR cost, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility level
+frac_time_used_by_officer_type = pd.DataFrame(log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'].to_list())
+aggregate_frac_time_used_by_officer_type = pd.DataFrame(frac_time_used_by_officer_type.sum(axis=0))
+aggregate_frac_time_used_by_officer_type.columns = ['Value']
+aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
+
+salary_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
+salary_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
+scenario_cost = pd.DataFrame()
+scenario_cost['HR'] = salary_df['Total_salary_by_cadre_and_level'].sum() # Need to fix this!
+
+log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_Facility_ID'] # for district disaggregation
+log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'][0]
+
+# Aggregate Daily capabilities to total used by cadre and facility level
+# Multiply these with the correct salary figure - need dictionary mapping (or read costing as a csv)
+
+# Bar plot of salary costs by cadre and facility level
+
+# log['tlo.methods.healthsystem.summary']['Capacity']['Frac_Time_Used_By_OfficerType']
+# 1.2 HR cost by Treatment_ID
+# For HR cost by Treatment_ID, multiply total cost by Officer type by fraction of time used for treatment_ID
+log['tlo.methods.healthsystem.summary']['HSI_Event']['TREATMENT_ID'][0] # what does this represent? why are there 3 rows (2 scenarios)
+# But what we need is the HR use by Treatment_ID  - Leave this for later?
+
+# log['tlo.scenario']
+log['tlo.methods.healthsystem.summary']['HSI_Event']['Number_By_Appt_Type_Code']
+
+
+
+
+
+
+df = pd.DataFrame(log['tlo.methods.healthsystem.summary'])
+df.to_csv(outputfilepath / 'temp.csv')
+
+def read_parameters(self, data_folder):
+    """
+    1. Reads the costing resource file
+    2. Declares the costing parameters
+    """
+    # Read the resourcefile
+    # Short cut to parameters dict
+    p = self.parameters
+
+    workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+                                    sheet_name = None)
+
+    p["human_resources"] = workbook["human_resources"]
+
+workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+                                    sheet_name = None)
+human_resources = workbook["human_resources"]

From ffe4dfcc683fd2c3a44f471c08aeb48764d23867 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 21 Feb 2024 12:38:23 +0000
Subject: [PATCH 006/230] Update HR costing

---
 src/scripts/costing/costing.py | 52 ++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 4cb3237398..b4f3428c98 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -47,11 +47,6 @@
 
 
 # 1. HR cost
-'''
-Notes:
-1. Ignore squeeze factor, i.e. the actual capability used is costed
-2. ? capabilities_coefficient
-'''
 # 1.1 Overall HR Cost
 # Annual salary by officer type and facility level
 workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
@@ -68,30 +63,54 @@
 salary_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
 salary_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
 scenario_cost = pd.DataFrame()
-scenario_cost['HR'] = salary_df['Total_salary_by_cadre_and_level'].sum() # Need to fix this!
+scenario_cost['HR'] = salary_df['Total_salary_by_cadre_and_level'].sum()
+
+# Plot salary costs by cadre and facility level
+# Group by cadre and level
+total_salary_by_cadre = salary_df.groupby('Officer_Category')['Total_salary_by_cadre_and_level'].sum()
+total_salary_by_level = salary_df.groupby('Facility_Level')['Total_salary_by_cadre_and_level'].sum()
+
+# If the folder doesn't exist, create it
+costing_outputs_folder = Path('./outputs/costing')
+if not os.path.exists(costing_outputs_folder):
+    os.makedirs(costing_outputs_folder)
+
+# Plot by cadre
+total_salary_by_cadre.plot(kind='bar')
+plt.xlabel('Officer_category')
+plt.ylabel('Total Salary')
+plt.title('Total Salary by Cadre')
+plt.savefig(costing_outputs_folder /  'total_salary_by_cadre.png')
+
+# Plot by level
+total_salary_by_level.plot(kind='bar')
+plt.xlabel('Facility_Level')
+plt.ylabel('Total Salary')
+plt.title('Total Salary by Facility_Level')
+plt.savefig(costing_outputs_folder /  'total_salary_by_level.png')
+
+# TODO Disaggregate by district using 'Frac_Time_Used_By_Facility_ID'
+# TODO Disaggregate by Treatment_ID - will need this for cost-effectiveness estimates - current log does not provide this
+
+
+
+'''
+# Scratch pad
 
 log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_Facility_ID'] # for district disaggregation
-log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'][0]
 
 # Aggregate Daily capabilities to total used by cadre and facility level
-# Multiply these with the correct salary figure - need dictionary mapping (or read costing as a csv)
-
-# Bar plot of salary costs by cadre and facility level
 
 # log['tlo.methods.healthsystem.summary']['Capacity']['Frac_Time_Used_By_OfficerType']
 # 1.2 HR cost by Treatment_ID
 # For HR cost by Treatment_ID, multiply total cost by Officer type by fraction of time used for treatment_ID
-log['tlo.methods.healthsystem.summary']['HSI_Event']['TREATMENT_ID'][0] # what does this represent? why are there 3 rows (2 scenarios)
+log['tlo.methods.healthsystem.summary']['HSI_Event']['TREATMENT_ID'] # what does this represent? why are there 3 rows (2 scenarios)
 # But what we need is the HR use by Treatment_ID  - Leave this for later?
 
 # log['tlo.scenario']
 log['tlo.methods.healthsystem.summary']['HSI_Event']['Number_By_Appt_Type_Code']
 
 
-
-
-
-
 df = pd.DataFrame(log['tlo.methods.healthsystem.summary'])
 df.to_csv(outputfilepath / 'temp.csv')
 
@@ -112,3 +131,6 @@ def read_parameters(self, data_folder):
 workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
                                     sheet_name = None)
 human_resources = workbook["human_resources"]
+
+'''
+

From 7d5ac0984f6b95094a9c4736e9dc9b5de31a896b Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 22 Feb 2024 18:23:18 +0000
Subject: [PATCH 007/230] add resource file

---
 resources/ResourceFile_Costing.xlsx | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 resources/ResourceFile_Costing.xlsx

diff --git a/resources/ResourceFile_Costing.xlsx b/resources/ResourceFile_Costing.xlsx
new file mode 100644
index 0000000000..3f180d3ddc
--- /dev/null
+++ b/resources/ResourceFile_Costing.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f81143dfdff97e32f7b994e6fd1c562c57ad013486cbc97554088b11458253c4
+size 25741

From c7ba4b9d032ea07950d24169bd8f2aa0a03bf989 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 28 Feb 2024 12:32:03 +0000
Subject: [PATCH 008/230] move costing resourcefile to costing folder

---
 resources/ResourceFile_Costing.xlsx | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 resources/ResourceFile_Costing.xlsx

diff --git a/resources/ResourceFile_Costing.xlsx b/resources/ResourceFile_Costing.xlsx
deleted file mode 100644
index 3f180d3ddc..0000000000
--- a/resources/ResourceFile_Costing.xlsx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f81143dfdff97e32f7b994e6fd1c562c57ad013486cbc97554088b11458253c4
-size 25741

From 6ac6b117dcef640ab483ab3e8608f7047780c0ce Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 28 Feb 2024 12:32:21 +0000
Subject: [PATCH 009/230] differentiate between financial and economic costs

---
 src/scripts/costing/costing.py | 45 +++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index b4f3428c98..09e152e738 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -47,23 +47,46 @@
 
 
 # 1. HR cost
-# 1.1 Overall HR Cost
+# 1.1 HR Cost - Financial (Given the staff available)
 # Annual salary by officer type and facility level
-workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+workbook_cost = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
                                     sheet_name = None)
-hr_annual_salary = workbook["human_resources"]
+hr_annual_salary = workbook_cost["human_resources"]
 hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str)
 
-# For total HR cost, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility level
+# Load scenario staffing level
+hr_scenario = log[ 'tlo.scenario'][ 'override_parameter']['new_value'][log[ 'tlo.scenario'][ 'override_parameter']['name'] == 'use_funded_or_actual_staffing']
+
+if hr_scenario.empty:
+    current_staff_count = pd.read_csv(
+        resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")
+
+else:
+    current_staff_count = pd.read_csv(
+        resourcefilepath / 'healthsystem'/ 'human_resources' / f'{hr_scenario}' / 'ResourceFile_Daily_Capabilities.csv')
+
+    current_staff_count_by_level_and_officer_type = current_staff_count.groupby(['Facility_Level', 'Officer_Category'])[
+        'Staff_Count'].sum().reset_index()
+
+salary_actualstaff_df = pd.merge(hr_annual_salary, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
+salary_actualstaff_df['Total_salary_by_cadre_and_level'] = salary_actualstaff_df['Salary_USD'] * salary_actualstaff_df['Staff_Count']
+
+# Create a dataframe to store financial costs
+scenario_cost_actual = pd.DataFrame({'HR': salary_actualstaff_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+
+# 1.2 HR Cost - Economic (Staff needed for interventions delivered in the simulation)
+# For total HR cost, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility leve
+# Use log['tlo.methods.population']['scaling_factor']
 frac_time_used_by_officer_type = pd.DataFrame(log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'].to_list())
 aggregate_frac_time_used_by_officer_type = pd.DataFrame(frac_time_used_by_officer_type.sum(axis=0))
 aggregate_frac_time_used_by_officer_type.columns = ['Value']
 aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
 
-salary_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
-salary_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
-scenario_cost = pd.DataFrame()
-scenario_cost['HR'] = salary_df['Total_salary_by_cadre_and_level'].sum()
+salary_staffneeded_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
+salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
+
+# Create a dataframe to store economic costs
+scenario_cost_economic = pd.DataFrame({'HR': salary_staffneeded_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
 # Plot salary costs by cadre and facility level
 # Group by cadre and level
@@ -91,8 +114,12 @@
 
 # TODO Disaggregate by district using 'Frac_Time_Used_By_Facility_ID'
 # TODO Disaggregate by Treatment_ID - will need this for cost-effectiveness estimates - current log does not provide this
+# TODO Add scaling factor
 
-
+# Consumables
+log['tlo.methods.healthsystem']['Consumables']
+# Aggregate Items_Available by Treatment_ID
+# Multiply by the cost per item (need to check quantity)
 
 '''
 # Scratch pad

From 90b1fe1d087f0834083ac0c67afbf51151b9f105 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 28 Feb 2024 12:32:44 +0000
Subject: [PATCH 010/230] account for scaling factor

---
 src/scripts/costing/costing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 09e152e738..6ae8df24a9 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -83,7 +83,7 @@
 aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
 
 salary_staffneeded_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
-salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
+salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']  * log['tlo.methods.population']['scaling_factor']['scaling_factor']
 
 # Create a dataframe to store economic costs
 scenario_cost_economic = pd.DataFrame({'HR': salary_staffneeded_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])

From 0afc7b13366bff6bd3e2d45cef64e5d6eaf925ec Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 28 Feb 2024 12:54:42 +0000
Subject: [PATCH 011/230] compare with real budget data

---
 src/scripts/costing/costing.py | 40 +++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 6ae8df24a9..e26a0dd8a1 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -30,6 +30,9 @@
 outputfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
 resourcefilepath = Path("./resources")
 path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
+costing_outputs_folder = Path('./outputs/costing')
+if not os.path.exists(costing_outputs_folder):
+    os.makedirs(costing_outputs_folder)
 
 # %% Gathering basic information
 
@@ -72,7 +75,7 @@
 salary_actualstaff_df['Total_salary_by_cadre_and_level'] = salary_actualstaff_df['Salary_USD'] * salary_actualstaff_df['Staff_Count']
 
 # Create a dataframe to store financial costs
-scenario_cost_actual = pd.DataFrame({'HR': salary_actualstaff_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+scenario_cost_financial = pd.DataFrame({'HR': salary_actualstaff_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
 # 1.2 HR Cost - Economic (Staff needed for interventions delivered in the simulation)
 # For total HR cost, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility leve
@@ -88,16 +91,41 @@
 # Create a dataframe to store economic costs
 scenario_cost_economic = pd.DataFrame({'HR': salary_staffneeded_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
+# Compare financial costs with actual budget data
+####################################################
+salary_budget_2018 = 69478749
+consuambles_budget_2018 = 228934188
+real_budget = [salary_budget_2018, consuambles_budget_2018]
+model_cost = [scenario_cost_financial['HR'][0], 0]
+labels = ['HR_salary', 'Consumables']
+
+plt.scatter(real_budget, model_cost)
+# Plot a line representing a 45-degree angle
+min_val = min(min(real_budget), min(model_cost))
+max_val = max(max(real_budget), max(model_cost))
+plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')
+
+# Format x and y axis labels to display in millions
+formatter = FuncFormatter(lambda x, _: '{:,.0f}M'.format(x / 1e6))
+plt.gca().xaxis.set_major_formatter(formatter)
+plt.gca().yaxis.set_major_formatter(formatter)
+# Add labels for each point
+hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
+plotlabels = [hr_label, 'Consumables']
+for i, txt in enumerate(plotlabels):
+    plt.text(real_budget[i], model_cost[i], txt, ha='right')
+
+plt.xlabel('Real Budget')
+plt.ylabel('Model Cost')
+plt.title('Real Budget vs Model Cost')
+plt.savefig(costing_outputs_folder /  'Cost_validation.png')
+
+
 # Plot salary costs by cadre and facility level
 # Group by cadre and level
 total_salary_by_cadre = salary_df.groupby('Officer_Category')['Total_salary_by_cadre_and_level'].sum()
 total_salary_by_level = salary_df.groupby('Facility_Level')['Total_salary_by_cadre_and_level'].sum()
 
-# If the folder doesn't exist, create it
-costing_outputs_folder = Path('./outputs/costing')
-if not os.path.exists(costing_outputs_folder):
-    os.makedirs(costing_outputs_folder)
-
 # Plot by cadre
 total_salary_by_cadre.plot(kind='bar')
 plt.xlabel('Officer_category')

From 9b8e0b403cf34ba7c8ce7d173d3a2fb112237e19 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 28 Feb 2024 15:45:44 +0000
Subject: [PATCH 012/230] Revert "account for scaling factor"

This reverts commit 90b1fe1d087f0834083ac0c67afbf51151b9f105.
---
 src/scripts/costing/costing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index e26a0dd8a1..f3a7ffbfe2 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -86,7 +86,7 @@
 aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
 
 salary_staffneeded_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
-salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']  * log['tlo.methods.population']['scaling_factor']['scaling_factor']
+salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
 
 # Create a dataframe to store economic costs
 scenario_cost_economic = pd.DataFrame({'HR': salary_staffneeded_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])

From 40bf00c33969a6298067b1bc8ff35a3904c4a91b Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 28 Feb 2024 16:32:15 +0000
Subject: [PATCH 013/230] update economic cost calculation - divide the sum of
 `Frac_Time_Used_By_OfficerType` by the number of days - plot the aggregate
 fraction time used by cadre

---
 src/scripts/costing/costing.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index f3a7ffbfe2..d459457a11 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -81,12 +81,13 @@
 # For total HR cost, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility leve
 # Use log['tlo.methods.population']['scaling_factor']
 frac_time_used_by_officer_type = pd.DataFrame(log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'].to_list())
-aggregate_frac_time_used_by_officer_type = pd.DataFrame(frac_time_used_by_officer_type.sum(axis=0))
+aggregate_frac_time_used_by_officer_type = pd.DataFrame(frac_time_used_by_officer_type.sum(axis=0))/len(frac_time_used_by_officer_type)
 aggregate_frac_time_used_by_officer_type.columns = ['Value']
 aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
 
 salary_staffneeded_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
-salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_df['Salary_USD'] * salary_df['Value']
+salary_staffneeded_df = pd.merge(salary_staffneeded_df, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
+salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_staffneeded_df['Salary_USD'] * salary_staffneeded_df['Value'] * salary_staffneeded_df['Staff_Count']
 
 # Create a dataframe to store economic costs
 scenario_cost_economic = pd.DataFrame({'HR': salary_staffneeded_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
@@ -120,6 +121,12 @@
 plt.title('Real Budget vs Model Cost')
 plt.savefig(costing_outputs_folder /  'Cost_validation.png')
 
+# Plot fraction staff time used
+fraction_stafftime_average = salary_staffneeded_df.groupby('Officer_Category')['Value'].sum()
+fraction_stafftime_average. plot(kind = "bar")
+plt.xlabel('Cadre')
+plt.ylabel('Fraction time needed')
+plt.savefig(costing_outputs_folder /  'hr_time_need_economic_cost.png')
 
 # Plot salary costs by cadre and facility level
 # Group by cadre and level

From 2ea576c60d8448d3125f9b17dc3dd36842f53584 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 5 Mar 2024 11:45:09 +0000
Subject: [PATCH 014/230] scaling factor is not needed

---
 src/scripts/costing/costing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index d459457a11..f76f735277 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -149,7 +149,7 @@
 
 # TODO Disaggregate by district using 'Frac_Time_Used_By_Facility_ID'
 # TODO Disaggregate by Treatment_ID - will need this for cost-effectiveness estimates - current log does not provide this
-# TODO Add scaling factor
+# TODO Add economic cost to figure?
 
 # Consumables
 log['tlo.methods.healthsystem']['Consumables']
@@ -195,4 +195,3 @@ def read_parameters(self, data_folder):
 human_resources = workbook["human_resources"]
 
 '''
-

From 539f7c62951374f94fbb1117754d9b58ab5e146a Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 7 Mar 2024 12:23:10 +0000
Subject: [PATCH 015/230] update dataframe names + add description for easier
 readability

---
 src/scripts/costing/costing.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index f76f735277..e92c0eb8bb 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -51,8 +51,8 @@
 
 # 1. HR cost
 # 1.1 HR Cost - Financial (Given the staff available)
-# Annual salary by officer type and facility level
-workbook_cost = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+# Load annual salary by officer type and facility level
+workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
                                     sheet_name = None)
 hr_annual_salary = workbook_cost["human_resources"]
 hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str)
@@ -68,29 +68,31 @@
     current_staff_count = pd.read_csv(
         resourcefilepath / 'healthsystem'/ 'human_resources' / f'{hr_scenario}' / 'ResourceFile_Daily_Capabilities.csv')
 
-    current_staff_count_by_level_and_officer_type = current_staff_count.groupby(['Facility_Level', 'Officer_Category'])[
-        'Staff_Count'].sum().reset_index()
+current_staff_count_by_level_and_officer_type = current_staff_count.groupby(['Facility_Level', 'Officer_Category'])[
+    'Staff_Count'].sum().reset_index()
 
-salary_actualstaff_df = pd.merge(hr_annual_salary, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
-salary_actualstaff_df['Total_salary_by_cadre_and_level'] = salary_actualstaff_df['Salary_USD'] * salary_actualstaff_df['Staff_Count']
+# Calculate salary cost for modelled health workforce (Staff count X Annual salary)
+salary_for_modelled_staff = pd.merge(hr_annual_salary, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
+salary_for_modelled_staff['Total_salary_by_cadre_and_level'] = salary_for_modelled_staff['Salary_USD'] * salary_for_modelled_staff['Staff_Count']
 
 # Create a dataframe to store financial costs
-scenario_cost_financial = pd.DataFrame({'HR': salary_actualstaff_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+scenario_cost_financial = pd.DataFrame({'HR': salary_for_modelled_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
 # 1.2 HR Cost - Economic (Staff needed for interventions delivered in the simulation)
-# For total HR cost, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility leve
-# Use log['tlo.methods.population']['scaling_factor']
+# For HR required, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility level
 frac_time_used_by_officer_type = pd.DataFrame(log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'].to_list())
 aggregate_frac_time_used_by_officer_type = pd.DataFrame(frac_time_used_by_officer_type.sum(axis=0))/len(frac_time_used_by_officer_type)
 aggregate_frac_time_used_by_officer_type.columns = ['Value']
 aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
 
-salary_staffneeded_df = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
-salary_staffneeded_df = pd.merge(salary_staffneeded_df, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
-salary_staffneeded_df['Total_salary_by_cadre_and_level'] = salary_staffneeded_df['Salary_USD'] * salary_staffneeded_df['Value'] * salary_staffneeded_df['Staff_Count']
+salary_for_required_staff = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
+salary_for_required_staff = pd.merge(salary_for_required_staff, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
+
+# Calculate salary cost for required  health workforce (Staff count X Fraction of staff time needed X Annual salary)
+salary_for_required_staff['Total_salary_by_cadre_and_level'] = salary_for_required_staff['Salary_USD'] * salary_for_required_staff['Value'] * salary_for_required_staff['Staff_Count']
 
 # Create a dataframe to store economic costs
-scenario_cost_economic = pd.DataFrame({'HR': salary_staffneeded_df['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+scenario_cost_economic = pd.DataFrame({'HR': salary_for_required_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
 # Compare financial costs with actual budget data
 ####################################################
@@ -147,10 +149,6 @@
 plt.title('Total Salary by Facility_Level')
 plt.savefig(costing_outputs_folder /  'total_salary_by_level.png')
 
-# TODO Disaggregate by district using 'Frac_Time_Used_By_Facility_ID'
-# TODO Disaggregate by Treatment_ID - will need this for cost-effectiveness estimates - current log does not provide this
-# TODO Add economic cost to figure?
-
 # Consumables
 log['tlo.methods.healthsystem']['Consumables']
 # Aggregate Items_Available by Treatment_ID

From 2fcd216b739cc57bcc66552f452f7fd86d846359 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 7 Mar 2024 19:16:28 +0000
Subject: [PATCH 016/230] first stab at calculating consumables costs

---
 resources/costing/ResourceFile_Costing.xlsx |  3 +
 src/scripts/costing/costing.py              | 76 +++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 resources/costing/ResourceFile_Costing.xlsx

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
new file mode 100644
index 0000000000..f38f840b6c
--- /dev/null
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90fd9e2cf265f22bf11a4ec28577bfa879b95cca2cac90e143facce4eeb94d9a
+size 84599
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index e92c0eb8bb..0299042f16 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -1,5 +1,8 @@
 import argparse
 from pathlib import Path
+from tlo import Date
+from collections import Counter, defaultdict
+
 import calendar
 import datetime
 import os
@@ -34,6 +37,14 @@
 if not os.path.exists(costing_outputs_folder):
     os.makedirs(costing_outputs_folder)
 
+# Declare period for which the results will be generated (defined inclusively)
+TARGET_PERIOD = (Date(2020, 1, 1), Date(2025, 12, 31))
+
+
+def drop_outside_period(_df):
+    """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
+    return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
+
 # %% Gathering basic information
 
 # Find results_folder associated with a given batch_file and get most recent
@@ -94,6 +105,71 @@
 # Create a dataframe to store economic costs
 scenario_cost_economic = pd.DataFrame({'HR': salary_for_required_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
+# 1. Consumables cost
+# 2.1 Consumables cost - Financial (What needs to be purchased given what is made available)
+_df = log['tlo.methods.healthsystem']['Consumables']
+
+counts_of_available = defaultdict(int)
+counts_of_not_available = defaultdict(int)
+for _, row in _df.iterrows():
+    for item, num in eval(row['Item_Available']).items():
+        counts_of_available[item] += num
+    for item, num in eval(row['Item_NotAvailable']).items():
+        counts_of_not_available[item] += num
+consumables_count_df = pd.concat(
+        {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
+        axis=1
+    ).fillna(0).astype(int).stack()
+
+# Load consumables cost data
+unit_price_consumable = workbook_cost["consumables"][['Item_Code', 'Chosen_price_per_unit (USD)', 'Number of units needed per HSI']]
+unit_price_consumable = unit_price_consumable.set_index('Item_Code').to_dict(orient='index')
+
+# Multiply number of items needed by cost of consumable
+cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Chosen_price_per_unit (USD)'] *
+                                                unit_price_consumable[key]['Number of units needed per HSI'] *
+                                                counts_of_available[key] for key in unit_price_consumable)))
+total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
+
+
+# But all we have are the number of HSIs for which the consumable was needed
+# Do we need to depend on the model to give the number of consumables dispensed? or just based this on number of treatment Ids successfully delivered?
+# Ensure that expected units per case are expected units per HSI
+# check costs - 0 costs, too high, nans; Get units per HSI from Emi's file?
+
+
+def get_counts_of_items_requested(_df):
+    _df = drop_outside_period(_df)
+    counts_of_available = defaultdict(int)
+    counts_of_not_available = defaultdict(int)
+    for _, row in _df.iterrows():
+        for item, num in eval(row['Item_Available']).items():
+            counts_of_available[item] += num
+        for item, num in eval(row['Item_NotAvailable']).items():
+            counts_of_not_available[item] += num
+    return pd.concat(
+        {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
+        axis=1
+    ).fillna(0).astype(int).stack()
+
+
+cons_req = summarize(
+    extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem',
+        key='Consumables',
+        custom_generate_series=get_counts_of_items_requested,
+        do_scaling=True
+    ),
+    only_mean=True,
+    collapse_columns=True)
+
+
+
+
+# 2.2 Consumables cost - Economic (Level of consumables needed to meet the demand of all patients coming in contact with the health system)
+
+
 # Compare financial costs with actual budget data
 ####################################################
 salary_budget_2018 = 69478749

From 1d799ac796adcc5370491272e04ddc849ed552e8 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Mon, 11 Mar 2024 15:03:45 +0000
Subject: [PATCH 017/230] add financial cost of consumables

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +-
 src/scripts/costing/costing.py              | 95 ++++++++++++++-------
 2 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index f38f840b6c..685d4b6c4f 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90fd9e2cf265f22bf11a4ec28577bfa879b95cca2cac90e143facce4eeb94d9a
-size 84599
+oid sha256:497784d6372b682a7139b7cd77099f1bde6b44a40e085e8b11600ad9c58cd0be
+size 484032
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 0299042f16..a5eaa64bcd 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -131,43 +131,72 @@ def drop_outside_period(_df):
                                                 counts_of_available[key] for key in unit_price_consumable)))
 total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
 
-
-# But all we have are the number of HSIs for which the consumable was needed
-# Do we need to depend on the model to give the number of consumables dispensed? or just based this on number of treatment Ids successfully delivered?
-# Ensure that expected units per case are expected units per HSI
-# check costs - 0 costs, too high, nans; Get units per HSI from Emi's file?
-
-
-def get_counts_of_items_requested(_df):
-    _df = drop_outside_period(_df)
-    counts_of_available = defaultdict(int)
-    counts_of_not_available = defaultdict(int)
-    for _, row in _df.iterrows():
-        for item, num in eval(row['Item_Available']).items():
-            counts_of_available[item] += num
-        for item, num in eval(row['Item_NotAvailable']).items():
-            counts_of_not_available[item] += num
-    return pd.concat(
-        {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
-        axis=1
-    ).fillna(0).astype(int).stack()
-
-
-cons_req = summarize(
-    extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem',
-        key='Consumables',
-        custom_generate_series=get_counts_of_items_requested,
-        do_scaling=True
-    ),
-    only_mean=True,
-    collapse_columns=True)
+# Cost of consumables stocked
+# Estimate the stock to dispensed ratio from OpenLMIS data
+lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+# Collapse by item_code
+lmis_consumable_usage_by_item = lmis_consumable_usage.groupby(['item_code'])[['closing_bal', 'amc', 'dispensed', 'received']].sum()
+lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] = lmis_consumable_usage_by_item['closing_bal']/lmis_consumable_usage_by_item['dispensed']
+#lmis_consumable_usage_by_item = lmis_consumable_usage_by_item[['item_code', 'stock_to_dispensed_ratio']]
+# Trim top and bottom 5 percentile value for stock_to_dispensed_ratio
+percentile_5 = lmis_consumable_usage_by_item['stock_to_dispensed_ratio'].quantile(0.05)
+percentile_95 = lmis_consumable_usage_by_item['stock_to_dispensed_ratio'].quantile(0.95)
+lmis_consumable_usage_by_item.loc[lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] > percentile_95, 'stock_to_dispensed_ratio'] = percentile_95
+lmis_consumable_usage_by_item.loc[lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] < percentile_5, 'stock_to_dispensed_ratio'] = percentile_5
+lmis_stock_to_dispensed_ratio_by_item = lmis_consumable_usage_by_item['stock_to_dispensed_ratio']
+lmis_stock_to_dispensed_ratio_by_item.to_dict()
+average_stock_to_dispensed_ratio = lmis_stock_to_dispensed_ratio_by_item.mean()
 
 
+# Multiply number of items needed by cost of consumable
+cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Chosen_price_per_unit (USD)'] *
+                                                unit_price_consumable[key]['Number of units needed per HSI'] *
+                                                counts_of_available[key] *
+                                                lmis_stock_to_dispensed_ratio_by_item.get(key, average_stock_to_dispensed_ratio)
+                                                for key in counts_of_available)))
+total_cost_of_consumables_stocked = sum(value for value in cost_of_consumables_stocked.values() if not np.isnan(value))
 
+scenario_cost_financial['Consumables'] = total_cost_of_consumables_stocked
 
-# 2.2 Consumables cost - Economic (Level of consumables needed to meet the demand of all patients coming in contact with the health system)
+# Explore the ratio of dispensed drugs to drug stock
+####################################################
+# Collapse monthly data
+lmis_consumable_usage_by_district_and_level = lmis_consumable_usage.groupby(['district', 'fac_type_tlo','category', 'item_code'])[['closing_bal', 'amc', 'dispensed', 'received']].sum()
+lmis_consumable_usage_by_district_and_level.reset_index()
+lmis_consumable_usage_by_district_and_level['stock_to_dispensed_ratio'] = lmis_consumable_usage_by_district_and_level['closing_bal']/lmis_consumable_usage_by_district_and_level['dispensed']
+
+# TODO: Only consider the months for which original OpenLMIS data was available for closing_stock and dispensed
+# TODO Ensure that expected units per case are expected units per HSI
+def plot_stock_to_dispensed(_df, plot_var, groupby_var, outlier_percentile):
+    # Exclude the top x percentile (outliers) from the plot
+    percentile_excluded = _df[plot_var].quantile(outlier_percentile)
+    _df_without_outliers = _df[_df[plot_var] <= percentile_excluded]
+
+    # Plot the bar plot
+    plt.figure(figsize=(10, 6))
+    sns.barplot(data=_df_without_outliers, x=groupby_var, y=plot_var, ci=None)
+
+    # Add points representing the distribution of individual values
+    sns.stripplot(data=_df_without_outliers, x=groupby_var, y=plot_var, color='black', size=5, alpha=0.2)
+
+    # Set labels and title
+    plt.xlabel(groupby_var)
+    plt.ylabel('Stock to Dispensed Ratio')
+    plt.title('Average Stock to Dispensed Ratio by ' + f'{groupby_var}')
+    plt.xticks(rotation=45)
+
+    # Show plot
+    plt.tight_layout()
+    plt.savefig(costing_outputs_folder / 'stock_to_dispensed_ratio_by' f'{groupby_var}' )
+
+plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
+                        'fac_type_tlo', 0.95)
+plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
+                        'district', 0.95)
+plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
+                        'category', 0.95)
+plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
+                        'item_code', 0.95)
 
 
 # Compare financial costs with actual budget data

From 6b1f7a61f058c8ffae1e3d3dc1fff03772cd41f8 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Mon, 11 Mar 2024 22:29:29 +0000
Subject: [PATCH 018/230] update scenario for azure run

---
 src/scripts/costing/example_costing_scenario.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index df3bf1cc64..2b3498b031 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -19,8 +19,8 @@ def __init__(self):
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2013, 1, 1)
-        self.pop_size = 100  # <- recommended population size for the runs
-        self.number_of_draws = 2  # <- one scenario
+        self.pop_size = 20_000  # <- recommended population size for the runs
+        self.number_of_draws = 1  # <- one scenario
         self.runs_per_draw = 2 # <- repeated this many times
 
     def log_configuration(self):
@@ -41,7 +41,7 @@ def modules(self):
     def draw_parameters(self, draw_number, rng):
         return {
             'HealthSystem': {
-                'cons_availability': ['default', 'all'][draw_number]
+                'cons_availability': ['default'][draw_number]
             }
         }
 

From 37afec8b1d8964a2c6b38ff64ab5dfbd6c15e3be Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Mon, 11 Mar 2024 22:31:53 +0000
Subject: [PATCH 019/230] update scenario for azure run

---
 src/scripts/costing/example_costing_scenario.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index 2b3498b031..af3fb19f8e 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -21,7 +21,7 @@ def __init__(self):
         self.end_date = Date(2013, 1, 1)
         self.pop_size = 20_000  # <- recommended population size for the runs
         self.number_of_draws = 1  # <- one scenario
-        self.runs_per_draw = 2 # <- repeated this many times
+        self.runs_per_draw = 1 # <- repeated this many times
 
     def log_configuration(self):
         return {
@@ -41,7 +41,7 @@ def modules(self):
     def draw_parameters(self, draw_number, rng):
         return {
             'HealthSystem': {
-                'cons_availability': ['default'][draw_number]
+                'cons_availability': ['default', 'all'][draw_number]
             }
         }
 

From ae31676ce383dd5b8587fd06d0a9c95e3024eb3c Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Mon, 11 Mar 2024 22:33:38 +0000
Subject: [PATCH 020/230] add  consumable scenario

---
 src/scripts/costing/example_costing_scenario.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index af3fb19f8e..586c7dec12 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -20,8 +20,8 @@ def __init__(self):
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2013, 1, 1)
         self.pop_size = 20_000  # <- recommended population size for the runs
-        self.number_of_draws = 1  # <- one scenario
-        self.runs_per_draw = 1 # <- repeated this many times
+        self.number_of_draws = 2  # <- one scenario
+        self.runs_per_draw = 2 # <- repeated this many times
 
     def log_configuration(self):
         return {

From 666b5e8f195862a4423465cb598d4c371fb7929f Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 13 Mar 2024 14:55:12 +0000
Subject: [PATCH 021/230] update HR financial cost

- include only those cadres from total capabilities which were used in the simulation
---
 src/scripts/costing/costing.py | 89 ++++++++++++++++++++++++++++++----
 1 file changed, 79 insertions(+), 10 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index a5eaa64bcd..b37e797e07 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -20,6 +20,8 @@
     make_age_grp_lookup,
     make_age_grp_types,
     summarize,
+    create_pickles_locally,
+    parse_log_file
 )
 
 # define a timestamp for script outputs
@@ -38,15 +40,12 @@
     os.makedirs(costing_outputs_folder)
 
 # Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2020, 1, 1), Date(2025, 12, 31))
-
-
+TARGET_PERIOD = (Date(2000, 1, 1), Date(2050, 12, 31))
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
 
 # %% Gathering basic information
-
 # Find results_folder associated with a given batch_file and get most recent
 results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 
@@ -59,31 +58,51 @@ def drop_outside_period(_df):
 # 1) Extract the parameters that have varied over the set of simulations
 params = extract_params(results_folder)
 
+# Load costing resourcefile
+workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
+                                    sheet_name = None)
 
 # 1. HR cost
 # 1.1 HR Cost - Financial (Given the staff available)
 # Load annual salary by officer type and facility level
-workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
-                                    sheet_name = None)
 hr_annual_salary = workbook_cost["human_resources"]
 hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str)
 
 # Load scenario staffing level
-hr_scenario = log[ 'tlo.scenario'][ 'override_parameter']['new_value'][log[ 'tlo.scenario'][ 'override_parameter']['name'] == 'use_funded_or_actual_staffing']
+hr_scenario = log[ 'tlo.scenario']['override_parameter']['new_value'][log[ 'tlo.scenario'][ 'override_parameter']['name'] == 'use_funded_or_actual_staffing']
 
 if hr_scenario.empty:
     current_staff_count = pd.read_csv(
         resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")
-
 else:
     current_staff_count = pd.read_csv(
-        resourcefilepath / 'healthsystem'/ 'human_resources' / f'{hr_scenario}' / 'ResourceFile_Daily_Capabilities.csv')
+        resourcefilepath / 'healthsystem'/ 'human_resources' / f'{hr_scenario[2]}' / 'ResourceFile_Daily_Capabilities.csv')
 
 current_staff_count_by_level_and_officer_type = current_staff_count.groupby(['Facility_Level', 'Officer_Category'])[
     'Staff_Count'].sum().reset_index()
 
+# Check if any cadres were not utilised at particular levels of care in the simulation
+_df = log['tlo.methods.healthsystem']['Capacity']
+# Initialize a dictionary to store the sums
+cadres_used = {}
+# Iterate over the rows and sum values for each key
+for index, row in _df.iterrows():
+    for key, value in row['Frac_Time_Used_By_OfficerType'].items():
+        if key not in cadres_used:
+            cadres_used[key] = 0
+        cadres_used[key] += value
+
+# Store list of cadre-level combinations used in the simulation in a list
+cadres_used_df = pd.DataFrame(cadres_used.items(), columns=['Key', 'Sum'])
+list_of_cadre_and_level_combinations_used = cadres_used_df[cadres_used_df['Sum'] != 0]['Key']
+
+# Subset scenario staffing level to only include cadre-level combinations used in the simulation
+current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + current_staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + current_staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
+used_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
+
 # Calculate salary cost for modelled health workforce (Staff count X Annual salary)
-salary_for_modelled_staff = pd.merge(hr_annual_salary, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
+salary_for_modelled_staff = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'], how = "left")
 salary_for_modelled_staff['Total_salary_by_cadre_and_level'] = salary_for_modelled_staff['Salary_USD'] * salary_for_modelled_staff['Staff_Count']
 
 # Create a dataframe to store financial costs
@@ -198,6 +217,56 @@ def plot_stock_to_dispensed(_df, plot_var, groupby_var, outlier_percentile):
 plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
                         'item_code', 0.95)
 
+# Open the .gz file in read mode ('rb' for binary mode)
+data = dict()
+with gzip.open('./outputs/tlo.methods.healthsystem.log.gz', 'rb') as f:
+    # Read the contents of the file
+    data = f.read()
+
+# Now you can process the data as needed
+# For example, you can decode it if it's in a text format
+decoded_data = data.decode('ascii')
+print(decoded_data)
+
+folder = './outputs/'
+output = dict()
+with open('./outputs/tlo.methods.healthsystem.log.gz', "rb") as f:
+    output = pickle.load(f)
+
+
+#-----
+
+parsed_dicts = []
+
+# Split the input string into individual JSON objects
+json_objects = decoded_data.split('\n')
+
+# Iterate over each JSON object and attempt to parse it
+for json_str in json_objects:
+    if json_str.strip():  # Check if the JSON string is not empty
+        try:
+            parsed_dict = json.loads(json_str)
+            parsed_dicts.append(parsed_dict)
+        except json.JSONDecodeError as e:
+            print("Error decoding JSON:", e)
+
+print(parsed_dicts)
+
+# Initialize an empty dictionary
+merged_dict = {}
+
+# Iterate over each dictionary in the list
+for d in parsed_dicts[4:30]:
+    # Update the merged dictionary with the contents of each dictionary
+    merged_dict.update(d)
+
+print(merged_dict)
+
+#-----
+
+with open('./outputs/tlo.methods.healthsystem.log', 'r') as file:
+    # Read the contents of the file
+    log_content = file.read()
 
 # Compare financial costs with actual budget data
 ####################################################

From c453fe181a9a2ed90873b06d66a5a44c714a4eb6 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 13 Mar 2024 15:05:51 +0000
Subject: [PATCH 022/230] update HR economic cost

- stylistic changes
---
 src/scripts/costing/costing.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index b37e797e07..0265152184 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -115,8 +115,10 @@ def drop_outside_period(_df):
 aggregate_frac_time_used_by_officer_type.columns = ['Value']
 aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
 
-salary_for_required_staff = pd.merge(hr_annual_salary, aggregate_frac_time_used_by_officer_type, on = ['OfficerType_FacilityLevel'])
-salary_for_required_staff = pd.merge(salary_for_required_staff, current_staff_count_by_level_and_officer_type, on = ['Officer_Category', 'Facility_Level'])
+salary_for_required_staff = pd.merge(aggregate_frac_time_used_by_officer_type[['OfficerType_FacilityLevel', 'Value']],
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'])
+salary_for_required_staff = pd.merge(salary_for_required_staff,
+                                     current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']], on = ['OfficerType_FacilityLevel'])
 
 # Calculate salary cost for required  health workforce (Staff count X Fraction of staff time needed X Annual salary)
 salary_for_required_staff['Total_salary_by_cadre_and_level'] = salary_for_required_staff['Salary_USD'] * salary_for_required_staff['Value'] * salary_for_required_staff['Staff_Count']

From d0cb41fb8820d24674d5038d5065b9a868049216 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 13 Mar 2024 15:36:46 +0000
Subject: [PATCH 023/230] update consumable cost calculation

- we assume that the log file provides the units of consumables needed rather than the number of HSIs which use the consumable
---
 src/scripts/costing/costing.py | 74 +++-------------------------------
 1 file changed, 6 insertions(+), 68 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 0265152184..9994a44389 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -126,8 +126,8 @@ def drop_outside_period(_df):
 # Create a dataframe to store economic costs
 scenario_cost_economic = pd.DataFrame({'HR': salary_for_required_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
-# 1. Consumables cost
-# 2.1 Consumables cost - Financial (What needs to be purchased given what is made available)
+# 2. Consumables cost
+# 2.1 Consumables cost - Financial (What needs to be purchased given what is dispensed)
 _df = log['tlo.methods.healthsystem']['Consumables']
 
 counts_of_available = defaultdict(int)
@@ -135,20 +135,13 @@ def drop_outside_period(_df):
 for _, row in _df.iterrows():
     for item, num in eval(row['Item_Available']).items():
         counts_of_available[item] += num
-    for item, num in eval(row['Item_NotAvailable']).items():
-        counts_of_not_available[item] += num
-consumables_count_df = pd.concat(
-        {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
-        axis=1
-    ).fillna(0).astype(int).stack()
 
 # Load consumables cost data
-unit_price_consumable = workbook_cost["consumables"][['Item_Code', 'Chosen_price_per_unit (USD)', 'Number of units needed per HSI']]
+unit_price_consumable = workbook_cost["consumables"][['Item_Code', 'Chosen_price_per_unit (USD)']]
 unit_price_consumable = unit_price_consumable.set_index('Item_Code').to_dict(orient='index')
 
 # Multiply number of items needed by cost of consumable
 cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Chosen_price_per_unit (USD)'] *
-                                                unit_price_consumable[key]['Number of units needed per HSI'] *
                                                 counts_of_available[key] for key in unit_price_consumable)))
 total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
 
@@ -156,9 +149,8 @@ def drop_outside_period(_df):
 # Estimate the stock to dispensed ratio from OpenLMIS data
 lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
 # Collapse by item_code
-lmis_consumable_usage_by_item = lmis_consumable_usage.groupby(['item_code'])[['closing_bal', 'amc', 'dispensed', 'received']].sum()
+lmis_consumable_usage_by_item = lmis_consumable_usage.groupby(['item_code'])[['closing_bal', 'dispensed']].sum()
 lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] = lmis_consumable_usage_by_item['closing_bal']/lmis_consumable_usage_by_item['dispensed']
-#lmis_consumable_usage_by_item = lmis_consumable_usage_by_item[['item_code', 'stock_to_dispensed_ratio']]
 # Trim top and bottom 5 percentile value for stock_to_dispensed_ratio
 percentile_5 = lmis_consumable_usage_by_item['stock_to_dispensed_ratio'].quantile(0.05)
 percentile_95 = lmis_consumable_usage_by_item['stock_to_dispensed_ratio'].quantile(0.95)
@@ -168,10 +160,8 @@ def drop_outside_period(_df):
 lmis_stock_to_dispensed_ratio_by_item.to_dict()
 average_stock_to_dispensed_ratio = lmis_stock_to_dispensed_ratio_by_item.mean()
 
-
 # Multiply number of items needed by cost of consumable
 cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Chosen_price_per_unit (USD)'] *
-                                                unit_price_consumable[key]['Number of units needed per HSI'] *
                                                 counts_of_available[key] *
                                                 lmis_stock_to_dispensed_ratio_by_item.get(key, average_stock_to_dispensed_ratio)
                                                 for key in counts_of_available)))
@@ -182,12 +172,11 @@ def drop_outside_period(_df):
 # Explore the ratio of dispensed drugs to drug stock
 ####################################################
 # Collapse monthly data
-lmis_consumable_usage_by_district_and_level = lmis_consumable_usage.groupby(['district', 'fac_type_tlo','category', 'item_code'])[['closing_bal', 'amc', 'dispensed', 'received']].sum()
+lmis_consumable_usage_by_district_and_level = lmis_consumable_usage.groupby(['district', 'fac_type_tlo','category', 'item_code'])[['closing_bal', 'dispensed']].sum()
 lmis_consumable_usage_by_district_and_level.reset_index()
 lmis_consumable_usage_by_district_and_level['stock_to_dispensed_ratio'] = lmis_consumable_usage_by_district_and_level['closing_bal']/lmis_consumable_usage_by_district_and_level['dispensed']
 
 # TODO: Only consider the months for which original OpenLMIS data was available for closing_stock and dispensed
-# TODO Ensure that expected units per case are expected units per HSI
 def plot_stock_to_dispensed(_df, plot_var, groupby_var, outlier_percentile):
     # Exclude the top x percentile (outliers) from the plot
     percentile_excluded = _df[plot_var].quantile(outlier_percentile)
@@ -219,63 +208,12 @@ def plot_stock_to_dispensed(_df, plot_var, groupby_var, outlier_percentile):
 plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
                         'item_code', 0.95)
 
-# Open the .gz file in read mode ('rb' for binary mode)
-data = dict()
-with gzip.open('./outputs/tlo.methods.healthsystem.log.gz', 'rb') as f:
-    # Read the contents of the file
-    data = f.read()
-
-# Now you can process the data as needed
-# For example, you can decode it if it's in a text format
-decoded_data = data.decode('ascii')
-print(decoded_data)
-
-folder = './outputs/'
-output = dict()
-with open('./outputs/tlo.methods.healthsystem.log.gz', "rb") as f:
-    output = pickle.load(f)
-
-
-#-----
-
-parsed_dicts = []
-
-# Split the input string into individual JSON objects
-json_objects = decoded_data.split('\n')
-
-# Iterate over each JSON object and attempt to parse it
-for json_str in json_objects:
-    if json_str.strip():  # Check if the JSON string is not empty
-        try:
-            parsed_dict = json.loads(json_str)
-            parsed_dicts.append(parsed_dict)
-        except json.JSONDecodeError as e:
-            print("Error decoding JSON:", e)
-
-print(parsed_dicts)
-
-# Initialize an empty dictionary
-merged_dict = {}
-
-# Iterate over each dictionary in the list
-for d in parsed_dicts[4:30]:
-    # Update the merged dictionary with the contents of each dictionary
-    merged_dict.update(d)
-
-print(merged_dict)
-
-#-----
-
-with open('./outputs/tlo.methods.healthsystem.log', 'r') as file:
-    # Read the contents of the file
-    log_content = file.read()
-
 # Compare financial costs with actual budget data
 ####################################################
 salary_budget_2018 = 69478749
 consuambles_budget_2018 = 228934188
 real_budget = [salary_budget_2018, consuambles_budget_2018]
-model_cost = [scenario_cost_financial['HR'][0], 0]
+model_cost = [scenario_cost_financial['HR'][0], scenario_cost_financial['Consumables'][0]]
 labels = ['HR_salary', 'Consumables']
 
 plt.scatter(real_budget, model_cost)

From 7ee9dfb99653d8d0144f63cff35331ceead2d9f2 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 13 Mar 2024 15:48:35 +0000
Subject: [PATCH 024/230] add latest version of costing resource file

- with update consumables cost
---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 685d4b6c4f..971235c3dd 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:497784d6372b682a7139b7cd77099f1bde6b44a40e085e8b11600ad9c58cd0be
-size 484032
+oid sha256:bc96e048f59b8db7eedc76a105c32e0bad87b21bc4a279517b460feac6e2e93f
+size 483750

From c2c6949d91b1cd0777c335f81d41246539d00366 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 13 Mar 2024 16:15:22 +0000
Subject: [PATCH 025/230] add latest version of costing resource file

---
 resources/costing/ResourceFile_Costing.xlsx | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 resources/costing/ResourceFile_Costing.xlsx

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
new file mode 100644
index 0000000000..033e2ad96a
--- /dev/null
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6702c24e0f9833ccdb18cc5f218ddf84a402864d26b01540799a01887f42e1c7
+size 484822

From dbcf1fbc4f7d0b3073b6148f61423d991a74364f Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Sun, 24 Mar 2024 20:04:41 +0000
Subject: [PATCH 026/230] add latest version of costing resource file

- with updated units for cost of consumables
---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 033e2ad96a..aba0cb2e3b 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6702c24e0f9833ccdb18cc5f218ddf84a402864d26b01540799a01887f42e1c7
-size 484822
+oid sha256:3bc42251e33aac15438923141ca4568b284465a0a7a8ac6bfc46c4bf525c4e9a
+size 1006885

From 1d21999fc9e459cc8f1d0ec98d5c513a4d23362a Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Sun, 24 Mar 2024 20:23:10 +0000
Subject: [PATCH 027/230] update Gentamycin (units)

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index aba0cb2e3b..b2f146c2fe 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bc42251e33aac15438923141ca4568b284465a0a7a8ac6bfc46c4bf525c4e9a
-size 1006885
+oid sha256:f053aa28de62547d6a1f36ace122fe62e3caff03e4fc5babbf157a9e14b66fc2
+size 1005374

From ba0f65805894afd235bba7465a223daf2ccdca22 Mon Sep 17 00:00:00 2001
From: tdm32 <t.mangal@imperial.ac.uk>
Date: Mon, 25 Mar 2024 16:26:47 +0000
Subject: [PATCH 028/230] update item quantities for HIV prep and infant prep

---
 src/tlo/methods/hiv.py | 44 ++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 6a1e7f0245..4601e2117e 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -958,31 +958,32 @@ def initialise_simulation(self, sim):
         self.item_codes_for_consumables_required['circ'] = \
             hs.get_item_codes_from_package_name("Male circumcision ")
 
-        self.item_codes_for_consumables_required['prep'] = {
-            hs.get_item_code_from_item_name("Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg"): 1}
+        # 1 tablet daily
+        self.item_codes_for_consumables_required['prep'] = \
+            hs.get_item_code_from_item_name("Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg")
 
-        # infant NVP given in 3-monthly dosages
-        self.item_codes_for_consumables_required['infant_prep'] = {
-            hs.get_item_code_from_item_name("Nevirapine, oral solution, 10 mg/ml"): 1}
+        # infant NVP 1.5mg daily for birth weight 2500g or above, 6 weeks
+        self.item_codes_for_consumables_required['infant_prep'] = \
+            hs.get_item_code_from_item_name("Nevirapine, oral solution, 10 mg/ml")
 
         # First - line ART for adults(age > "ART_age_cutoff_older_child")
-        self.item_codes_for_consumables_required['First-line ART regimen: adult'] = {
-            hs.get_item_code_from_item_name("First-line ART regimen: adult"): 1}
-        self.item_codes_for_consumables_required['First-line ART regimen: adult: cotrimoxazole'] = {
-            hs.get_item_code_from_item_name("Cotrimoxizole, 960mg pppy"): 1}
+        self.item_codes_for_consumables_required['First-line ART regimen: adult'] = \
+            hs.get_item_code_from_item_name("First-line ART regimen: adult")
+        self.item_codes_for_consumables_required['First-line ART regimen: adult: cotrimoxazole'] = \
+            hs.get_item_code_from_item_name("Cotrimoxizole, 960mg pppy")
 
         # ART for older children aged ("ART_age_cutoff_younger_child" < age <= "ART_age_cutoff_older_child"):
         # cotrim is separate item - optional in get_cons call
-        self.item_codes_for_consumables_required['First line ART regimen: older child'] = {
-            hs.get_item_code_from_item_name("First line ART regimen: older child"): 1}
-        self.item_codes_for_consumables_required['First line ART regimen: older child: cotrimoxazole'] = {
-            hs.get_item_code_from_item_name("Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg"): 1}
+        self.item_codes_for_consumables_required['First line ART regimen: older child'] = \
+            hs.get_item_code_from_item_name("First line ART regimen: older child")
+        self.item_codes_for_consumables_required['First line ART regimen: older child: cotrimoxazole'] = \
+            hs.get_item_code_from_item_name("Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg")
 
         # ART for younger children aged (age < "ART_age_cutoff_younger_child"):
-        self.item_codes_for_consumables_required['First line ART regimen: young child'] = {
-            hs.get_item_code_from_item_name("First line ART regimen: young child"): 1}
-        self.item_codes_for_consumables_required['First line ART regimen: young child: cotrimoxazole'] = {
-            hs.get_item_code_from_item_name("Sulfamethoxazole + trimethropin, oral suspension, 240 mg, 100 ml"): 1}
+        self.item_codes_for_consumables_required['First line ART regimen: young child'] = \
+            hs.get_item_code_from_item_name("First line ART regimen: young child")
+        self.item_codes_for_consumables_required['First line ART regimen: young child: cotrimoxazole'] = \
+            hs.get_item_code_from_item_name("Sulfamethoxazole + trimethropin, oral suspension, 240 mg, 100 ml")
 
         # 7) Define the DxTests
         # HIV Rapid Diagnostic Test:
@@ -2320,7 +2321,9 @@ def apply(self, person_id, squeeze_factor):
             return self.sim.modules["HealthSystem"].get_blank_appt_footprint()
 
         # Check that infant prophylaxis is available and if it is, initiate:
-        if self.get_consumables(item_codes=self.module.item_codes_for_consumables_required['infant_prep']):
+        if self.get_consumables(
+            item_codes={self.module.item_codes_for_consumables_required['infant_prep']: 63}
+        ):
             df.at[person_id, "hv_is_on_prep"] = True
 
             # Schedule follow-up visit for 3 months time
@@ -2400,7 +2403,10 @@ def apply(self, person_id, squeeze_factor):
             return self.make_appt_footprint({"Over5OPD": 1, "VCTPositive": 1})
 
         # Check that PrEP is available and if it is, initiate or continue  PrEP:
-        if self.get_consumables(item_codes=self.module.item_codes_for_consumables_required['prep']):
+        quantity_required = self.module.parameters['dispensation_period_months'] * 30
+        if self.get_consumables(
+            item_codes={self.module.item_codes_for_consumables_required['prep']: quantity_required}
+        ):
             df.at[person_id, "hv_is_on_prep"] = True
 
             # Schedule 'decision about whether to continue on PrEP' for 3 months time

From ae1dc16a697ff730eab1dcc5c8ff797483e44f4c Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Mar 2024 10:57:05 +0000
Subject: [PATCH 029/230] update 1g to 1mg for Albendazole (Item code 52)

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index b2f146c2fe..2fde5966be 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f053aa28de62547d6a1f36ace122fe62e3caff03e4fc5babbf157a9e14b66fc2
-size 1005374
+oid sha256:faa917052b0c99ae60174c884b4e104be59b0b7d9afebe6f8ed47902ace1b2a3
+size 1005652

From fcee46707fed9de4f232557a1aa70facf43cad95 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Mar 2024 17:02:23 +0000
Subject: [PATCH 030/230] update units for gloves from 1 glove to "1 pair of
 gloves"

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 2fde5966be..cd9c0e63a6 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faa917052b0c99ae60174c884b4e104be59b0b7d9afebe6f8ed47902ace1b2a3
-size 1005652
+oid sha256:c02f133045eaa139e37af44e9f88da6c954e3254ca7c8f8a7327ca4b80876dcf
+size 1005696

From c352bee2c59b13b0f5fa1d18a6aa3b5ef1ad3040 Mon Sep 17 00:00:00 2001
From: tdm32 <t.mangal@imperial.ac.uk>
Date: Wed, 27 Mar 2024 11:52:41 +0000
Subject: [PATCH 031/230] update item quantities for TB drugs add 3HP as new
 consumable for IPT

---
 resources/costing/ResourceFile_Costing.xlsx   |   4 +-
 resources/costing/~$ResourceFile_Costing.xlsx |   3 +
 .../analysis_scenarios_draws.json             | 245 ++++++++++++++++++
 src/tlo/methods/hiv.py                        |  37 ++-
 src/tlo/methods/tb.py                         |  42 ++-
 5 files changed, 304 insertions(+), 27 deletions(-)
 create mode 100644 resources/costing/~$ResourceFile_Costing.xlsx
 create mode 100644 src/scripts/malaria/impact_analysis/analysis_scenarios_draws.json

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index b2f146c2fe..f6e8ff9344 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f053aa28de62547d6a1f36ace122fe62e3caff03e4fc5babbf157a9e14b66fc2
-size 1005374
+oid sha256:2e73c37d42dbb70c452d5abe3fbaa09be11430dcb0e730b05a736d8c79a65c4a
+size 1006136
diff --git a/resources/costing/~$ResourceFile_Costing.xlsx b/resources/costing/~$ResourceFile_Costing.xlsx
new file mode 100644
index 0000000000..d5d6d83ccc
--- /dev/null
+++ b/resources/costing/~$ResourceFile_Costing.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:019349b15c524cfef4b39db4dd792de3376f4bc3da9b6b298a1fee07c4eb219e
+size 165
diff --git a/src/scripts/malaria/impact_analysis/analysis_scenarios_draws.json b/src/scripts/malaria/impact_analysis/analysis_scenarios_draws.json
new file mode 100644
index 0000000000..f456b70bd1
--- /dev/null
+++ b/src/scripts/malaria/impact_analysis/analysis_scenarios_draws.json
@@ -0,0 +1,245 @@
+{
+  "scenario_script_path": "src/scripts/malaria/impact_analysis/analysis_scenarios.py",
+  "scenario_seed": 0,
+  "runs_per_draw": 5,
+  "draws": [
+    {
+      "draw_number": 0,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "*"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 0
+        }
+      }
+    },
+    {
+      "draw_number": 1,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "*"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 1
+        }
+      }
+    },
+    {
+      "draw_number": 2,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "*"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 2
+        }
+      }
+    },
+    {
+      "draw_number": 3,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "*"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 3
+        }
+      }
+    },
+    {
+      "draw_number": 4,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "*"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 5
+        }
+      }
+    },
+    {
+      "draw_number": 5,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "Alri_*",
+            "AntenatalCare_*",
+            "BladderCancer_*",
+            "BreastCancer_*",
+            "CardioMetabolicDisorders_*",
+            "Contraception_*",
+            "Copd_*",
+            "DeliveryCare_*",
+            "Depression_*",
+            "Diarrhoea_*",
+            "Epi_*",
+            "Epilepsy_*",
+            "FirstAttendance_*",
+            "Malaria_*",
+            "Measles_*",
+            "OesophagealCancer_*",
+            "OtherAdultCancer_*",
+            "PostnatalCare_*",
+            "ProstateCancer_*",
+            "Rti_*",
+            "Schisto_*",
+            "Tb_*",
+            "Undernutrition_*",
+            "Hiv_PalliativeCare"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 0
+        }
+      }
+    },
+    {
+      "draw_number": 6,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "Alri_*",
+            "AntenatalCare_*",
+            "BladderCancer_*",
+            "BreastCancer_*",
+            "CardioMetabolicDisorders_*",
+            "Contraception_*",
+            "Copd_*",
+            "DeliveryCare_*",
+            "Depression_*",
+            "Diarrhoea_*",
+            "Epi_*",
+            "Epilepsy_*",
+            "FirstAttendance_*",
+            "Hiv_*",
+            "Malaria_*",
+            "Measles_*",
+            "OesophagealCancer_*",
+            "OtherAdultCancer_*",
+            "PostnatalCare_*",
+            "ProstateCancer_*",
+            "Rti_*",
+            "Schisto_*",
+            "Undernutrition_*",
+            "Tb_PalliativeCare"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 0
+        }
+      }
+    },
+    {
+      "draw_number": 7,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "Alri_*",
+            "AntenatalCare_*",
+            "BladderCancer_*",
+            "BreastCancer_*",
+            "CardioMetabolicDisorders_*",
+            "Contraception_*",
+            "Copd_*",
+            "DeliveryCare_*",
+            "Depression_*",
+            "Diarrhoea_*",
+            "Epi_*",
+            "Epilepsy_*",
+            "FirstAttendance_*",
+            "Hiv_*",
+            "Measles_*",
+            "OesophagealCancer_*",
+            "OtherAdultCancer_*",
+            "PostnatalCare_*",
+            "ProstateCancer_*",
+            "Rti_*",
+            "Schisto_*",
+            "Tb_*",
+            "Undernutrition_*",
+            "Malaria_Treatment_Complicated"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 3
+        }
+      }
+    },
+    {
+      "draw_number": 8,
+      "parameters": {
+        "HealthSystem": {
+          "Service_Availability": [
+            "Alri_*",
+            "AntenatalCare_*",
+            "BladderCancer_*",
+            "BreastCancer_*",
+            "CardioMetabolicDisorders_*",
+            "Contraception_*",
+            "Copd_*",
+            "DeliveryCare_*",
+            "Depression_*",
+            "Diarrhoea_*",
+            "Epi_*",
+            "Epilepsy_*",
+            "FirstAttendance_*",
+            "Measles_*",
+            "OesophagealCancer_*",
+            "OtherAdultCancer_*",
+            "PostnatalCare_*",
+            "ProstateCancer_*",
+            "Rti_*",
+            "Schisto_*",
+            "Undernutrition_*",
+            "Hiv_PalliativeCare",
+            "Tb_PalliativeCare",
+            "Malaria_Treatment_Complicated"
+          ],
+          "use_funded_or_actual_staffing": "funded",
+          "mode_appt_constraints": 1,
+          "policy_name": "Naive"
+        },
+        "Hiv": {
+          "scenario": 3
+        }
+      }
+    }
+  ],
+  "commit": "bc91bdca536aace9c51a36b72628d532ef172019",
+  "github": "https://github.com/UCL/TLOmodel/tree/bc91bdca536aace9c51a36b72628d532ef172019"
+}
\ No newline at end of file
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 4601e2117e..7377b93f51 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -958,32 +958,37 @@ def initialise_simulation(self, sim):
         self.item_codes_for_consumables_required['circ'] = \
             hs.get_item_codes_from_package_name("Male circumcision ")
 
-        # 1 tablet daily
+        # adult prep: 1 tablet daily
         self.item_codes_for_consumables_required['prep'] = \
             hs.get_item_code_from_item_name("Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg")
 
-        # infant NVP 1.5mg daily for birth weight 2500g or above, 6 weeks
+        # infant NVP 1.5mg daily for birth weight 2500g or above, for 6 weeks
         self.item_codes_for_consumables_required['infant_prep'] = \
             hs.get_item_code_from_item_name("Nevirapine, oral solution, 10 mg/ml")
 
         # First - line ART for adults(age > "ART_age_cutoff_older_child")
+        # TDF/3TC/DTG 120/60/50mg, 1 tablet per day
+        # cotrim adult tablet, 1 tablet per day
         self.item_codes_for_consumables_required['First-line ART regimen: adult'] = \
             hs.get_item_code_from_item_name("First-line ART regimen: adult")
         self.item_codes_for_consumables_required['First-line ART regimen: adult: cotrimoxazole'] = \
             hs.get_item_code_from_item_name("Cotrimoxizole, 960mg pppy")
 
         # ART for older children aged ("ART_age_cutoff_younger_child" < age <= "ART_age_cutoff_older_child"):
-        # cotrim is separate item - optional in get_cons call
+        # ABC/3TC/DTG 120/60/50mg, 3 tablets per day
+        # cotrim paediatric tablet, 4 tablets per day
         self.item_codes_for_consumables_required['First line ART regimen: older child'] = \
             hs.get_item_code_from_item_name("First line ART regimen: older child")
         self.item_codes_for_consumables_required['First line ART regimen: older child: cotrimoxazole'] = \
-            hs.get_item_code_from_item_name("Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg")
+            hs.get_item_code_from_item_name("Cotrimoxazole 120mg_1000_CMST")
 
         # ART for younger children aged (age < "ART_age_cutoff_younger_child"):
+        # ABC/3TC/DTG 120/60/10mg, 2 tablets per day
+        # cotrim paediatric tablet, 2 tablets per day
         self.item_codes_for_consumables_required['First line ART regimen: young child'] = \
             hs.get_item_code_from_item_name("First line ART regimen: young child")
         self.item_codes_for_consumables_required['First line ART regimen: young child: cotrimoxazole'] = \
-            hs.get_item_code_from_item_name("Sulfamethoxazole + trimethropin, oral suspension, 240 mg, 100 ml")
+            hs.get_item_code_from_item_name("Cotrimoxazole 120mg_1000_CMST")
 
         # 7) Define the DxTests
         # HIV Rapid Diagnostic Test:
@@ -2630,29 +2635,33 @@ def get_drugs(self, age_of_person):
         whether individual drugs were available"""
 
         p = self.module.parameters
+        dispensation_days = 30 * self.module.parameters['dispensation_period_months']
 
         if age_of_person < p["ART_age_cutoff_young_child"]:
             # Formulation for young children
             drugs_available = self.get_consumables(
-                item_codes=self.module.item_codes_for_consumables_required['First line ART regimen: young child'],
-                optional_item_codes=self.module.item_codes_for_consumables_required[
-                    'First line ART regimen: young child: cotrimoxazole'],
+                item_codes={self.module.item_codes_for_consumables_required[
+                                'First line ART regimen: young child']: dispensation_days * 2},
+                optional_item_codes={self.module.item_codes_for_consumables_required[
+                                         'First line ART regimen: young child: cotrimoxazole']: dispensation_days * 2},
                 return_individual_results=True)
 
         elif age_of_person <= p["ART_age_cutoff_older_child"]:
             # Formulation for older children
             drugs_available = self.get_consumables(
-                item_codes=self.module.item_codes_for_consumables_required['First line ART regimen: older child'],
-                optional_item_codes=self.module.item_codes_for_consumables_required[
-                    'First line ART regimen: older child: cotrimoxazole'],
+                item_codes={self.module.item_codes_for_consumables_required[
+                                'First line ART regimen: older child']: dispensation_days * 3},
+                optional_item_codes={self.module.item_codes_for_consumables_required[
+                    'First line ART regimen: older child: cotrimoxazole']: dispensation_days * 4},
                 return_individual_results=True)
 
         else:
             # Formulation for adults
             drugs_available = self.get_consumables(
-                item_codes=self.module.item_codes_for_consumables_required['First-line ART regimen: adult'],
-                optional_item_codes=self.module.item_codes_for_consumables_required[
-                    'First-line ART regimen: adult: cotrimoxazole'],
+                item_codes={self.module.item_codes_for_consumables_required[
+                                'First-line ART regimen: adult']: dispensation_days},
+                optional_item_codes={self.module.item_codes_for_consumables_required[
+                    'First-line ART regimen: adult: cotrimoxazole']: dispensation_days},
                 return_individual_results=True)
 
         # add drug names to dict
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 79afd6fa5f..6995cf1eba 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -740,6 +740,7 @@ def get_consumables_for_dx_and_tx(self):
         )
 
         # 4) -------- Define the treatment options --------
+        # treatment supplied as full kits for duration of treatment
         # adult treatment - primary
         self.item_codes_for_consumables_required['tb_tx_adult'] = \
             hs.get_item_code_from_item_name("Cat. I & III Patient Kit A")
@@ -761,12 +762,16 @@ def get_consumables_for_dx_and_tx(self):
             hs.get_item_code_from_item_name("Cat. II Patient Kit A2")
 
         # mdr treatment
-        self.item_codes_for_consumables_required['tb_mdrtx'] = {
-            hs.get_item_code_from_item_name("Treatment: second-line drugs"): 1}
+        self.item_codes_for_consumables_required['tb_mdrtx'] = \
+            hs.get_item_code_from_item_name("Treatment: second-line drugs")
 
         # ipt
-        self.item_codes_for_consumables_required['tb_ipt'] = {
-            hs.get_item_code_from_item_name("Isoniazid/Pyridoxine, tablet 300 mg"): 1}
+        self.item_codes_for_consumables_required['tb_isoniazid'] = \
+            hs.get_item_code_from_item_name("Isoniazid/Pyridoxine, tablet 300 mg")
+
+        # todo not yet on consumables database
+        # self.item_codes_for_consumables_required['tb_3HP'] = {
+        #     hs.get_item_code_from_item_name("Isoniazid/Rifapentine"): 1}
 
     def initialise_population(self, population):
 
@@ -2192,8 +2197,9 @@ def apply(self, person_id, squeeze_factor):
             return self.sim.modules["HealthSystem"].get_blank_appt_footprint()
 
         treatment_regimen = self.select_treatment(person_id)
+        # treatment supplied in kits, one kit per treatment course
         treatment_available = self.get_consumables(
-            item_codes=self.module.item_codes_for_consumables_required[treatment_regimen]
+            item_codes={self.module.item_codes_for_consumables_required[treatment_regimen]: 1}
         )
 
         if treatment_available:
@@ -2432,8 +2438,9 @@ class HSI_Tb_Start_or_Continue_Ipt(HSI_Event, IndividualScopeEventMixin):
     * HIV.HSI_Hiv_StartOrContinueTreatment for PLHIV, diagnosed and on ART
     * Tb.HSI_Tb_StartTreatment for up to 5 contacts of diagnosed active TB case
 
-    if person referred by ART initiation (HIV+), IPT given for 36 months
-    paediatric IPT is 6-9 months
+    Isoniazid preventive therapy for HIV-infected children : 6 months, 180 doses
+    3HP (Isoniazid/Rifapentine) for adults: 12 weeks, 12 doses
+    3HP for children ages >2 yrs hiv-
     """
 
     def __init__(self, module, person_id):
@@ -2473,10 +2480,23 @@ def apply(self, person_id, squeeze_factor):
 
         else:
             # Check/log use of consumables, and give IPT if available
-            # if not available, reschedule IPT start
-            if self.get_consumables(
-                item_codes=self.module.item_codes_for_consumables_required["tb_ipt"]
-            ):
+
+            # if child and HIV+ or child under 2 yrs
+            if ((person["age_years"] <= 15) and person["hv_inf"]) or (person["age_years"] <= 2):
+
+                # 6 months dispensation, once daily
+                drugs_available = self.get_consumables(
+                    item_codes={self.module.item_codes_for_consumables_required["tb_ipt"]: 180})
+
+            # for all others
+            # todo check 3HP listed in database
+            else:
+                # 12 weeks dispensation, once weekly
+                drugs_available = self.get_consumables(
+                    item_codes={self.module.item_codes_for_consumables_required["tb_3HP"]: 12})
+
+            # if available, schedule IPT decision
+            if drugs_available:
                 # Update properties
                 df.at[person_id, "tb_on_ipt"] = True
                 df.at[person_id, "tb_date_ipt"] = self.sim.date

From 7d67c5c3097efb787c8fb9dc609cc89928d0347b Mon Sep 17 00:00:00 2001
From: tdm32 <t.mangal@imperial.ac.uk>
Date: Wed, 27 Mar 2024 12:02:24 +0000
Subject: [PATCH 032/230] add conditions for each IPT recommendation

---
 resources/costing/~$ResourceFile_Costing.xlsx | 3 ---
 src/tlo/methods/tb.py                         | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)
 delete mode 100644 resources/costing/~$ResourceFile_Costing.xlsx

diff --git a/resources/costing/~$ResourceFile_Costing.xlsx b/resources/costing/~$ResourceFile_Costing.xlsx
deleted file mode 100644
index d5d6d83ccc..0000000000
--- a/resources/costing/~$ResourceFile_Costing.xlsx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:019349b15c524cfef4b39db4dd792de3376f4bc3da9b6b298a1fee07c4eb219e
-size 165
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 6995cf1eba..59f360780f 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -770,8 +770,8 @@ def get_consumables_for_dx_and_tx(self):
             hs.get_item_code_from_item_name("Isoniazid/Pyridoxine, tablet 300 mg")
 
         # todo not yet on consumables database
-        # self.item_codes_for_consumables_required['tb_3HP'] = {
-        #     hs.get_item_code_from_item_name("Isoniazid/Rifapentine"): 1}
+        self.item_codes_for_consumables_required['tb_3HP'] = {
+            hs.get_item_code_from_item_name("Isoniazid/Rifapentine"): 1}
 
     def initialise_population(self, population):
 

From ae0d18f0cb3389853ca9bbcc077d56816b7e1b45 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 28 Mar 2024 14:57:15 +0000
Subject: [PATCH 033/230] add prices for ARVs - and create placeholder for
 Isoniazid/Rifapentine

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index cd9c0e63a6..4207f556c0 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c02f133045eaa139e37af44e9f88da6c954e3254ca7c8f8a7327ca4b80876dcf
-size 1005696
+oid sha256:7fc22d8bd0ac4b1b1777ce816acfd44c4430932a4f4619abae144a3d7c9d4dc4
+size 1007307

From c666e502b75d3e35479647bbecb319a1d7ade0f4 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 28 Mar 2024 15:12:04 +0000
Subject: [PATCH 034/230] add Isoniazid/Rifapentine to
 `ResourceFile_Consumables_Items_and_Packages.csv`

---
 ...urceFile_Consumables_Items_and_Packages.csv |  4 ++--
 ...rate_consumables_item_codes_and_packages.py | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
index 8af8f070b2..98c85fb1cb 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85e2c3ba8037e74490751fbb8384709dff1907c785c856f0394f40b4fc024da3
-size 253400
+oid sha256:743c05348267ae887993b280cfb28752a8353a356566e5ab41276160ea7276ef
+size 249149
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
index 3fcbccf9e2..f9f5149097 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
@@ -21,7 +21,8 @@
 
 # Set local Dropbox source
 path_to_dropbox = Path(  # <-- point to the TLO dropbox locally
-    '/Users/tbh03/Dropbox (SPH Imperial College)/Thanzi la Onse Theme 1 SHARE')
+   # '/Users/tbh03/Dropbox (SPH Imperial College)/Thanzi la Onse Theme 1 SHARE'
+    '/Users/sm2511/Dropbox/Thanzi La Onse')
 
 resourcefilepath = Path("./resources")
 path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
@@ -245,7 +246,7 @@
 
 def add_record(df: pd.DataFrame, record: Dict):
     """Add a row to the bottom of the dataframe, where the row is specified by a dict keyed by the target columns."""
-    assert list(df.columns) == list(record.keys())
+    assert set(df.columns) == set(record.keys())
     return pd.concat([df, pd.DataFrame.from_records([record])], ignore_index=True)
 
 
@@ -328,6 +329,19 @@ def add_record(df: pd.DataFrame, record: Dict):
     },
 )
 
+cons = add_record(
+    cons,
+    {
+        'Intervention_Cat': "Added by SM (Recommended by TM)",
+        'Intervention_Pkg': "Misc",
+        'Intervention_Pkg_Code': -99,
+        'Items': "Isoniazid/Rifapentine",
+        'Item_Code': 2675,
+        'Expected_Units_Per_Case': 1.0,
+        'Unit_Cost': 1.0
+    },
+)
+
 # --------------
 # --------------
 # --------------

From 2667397053f5dc2752e29c2218affa58bbb95aa1 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 3 Apr 2024 15:58:39 +0100
Subject: [PATCH 035/230] add Isoniazid/Rifapentine to all relevant RFs - 1.
 `ResourceFile_Consumables_Items_and_Packages.csv` - list of item codes and
 names - 2. `ResourceFile_consumabes_matched.csv` - crosswalk between
 consumables in the TLO model and the OpenLMIS dataset to extract availability
 (used proxy 'Isoniazid, 100mg' OR 'Isoniazid, 300mg') - 3.
 `ResourceFile_Consumables_availability_small.csv` - Final availability
 estimates - updated by running `consumables_availability_estimation.py` - 4
 `ResourceFile_Costing.xlsx` - cost of tablet based on external web report

---
 resources/costing/ResourceFile_Costing.xlsx                 | 4 ++--
 .../ResourceFile_Consumables_Items_and_Packages.csv         | 4 ++--
 .../ResourceFile_Consumables_availability_small.csv         | 4 ++--
 .../consumables/ResourceFile_consumables_matched.csv        | 4 ++--
 .../consumables_availability_estimation.py                  | 2 +-
 .../generate_consumables_item_codes_and_packages.py         | 6 +++---
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 4207f556c0..840fdbca8f 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fc22d8bd0ac4b1b1777ce816acfd44c4430932a4f4619abae144a3d7c9d4dc4
-size 1007307
+oid sha256:070cc08625170fab8e11198a350822b427c84add24444b6293bc830102c9b612
+size 1007683
diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
index 98c85fb1cb..e5e17d740c 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:743c05348267ae887993b280cfb28752a8353a356566e5ab41276160ea7276ef
-size 249149
+oid sha256:a188114a407bb28dc663a41a2854cb9d75c207dbb0809ff3ce8c1f88eca378aa
+size 249189
diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
index 54453cbc2f..7db58ae153 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69a5143c0b7307c7bb48726aa73d6c2f61de2a69aeb445eec87494cf9d4a1041
-size 6087331
+oid sha256:2261b945069955c025e106116ae167cd8dc167a962d7387b9e808b9683f5fa69
+size 6122712
diff --git a/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv b/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
index 7754d65118..73fd80d045 100644
--- a/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
+++ b/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbfe91222d3a2a32ed44a4be711b30c5323276a71df802f6c9249eb4c21f8d43
-size 90158
+oid sha256:d8649f9521dfd5bdbf2bc525c7dd0922a32d657fd8f69394bd997d7a595b9576
+size 90430
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
index ccef5c219d..44868b562a 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
@@ -277,7 +277,7 @@ def custom_agg(x):
     return _collapsed_df
 
 # Hold out the dataframe with no naming inconsistencies
-list_of_items_with_inconsistent_names_zipped = list(zip(inconsistent_item_names_mapping.keys(), inconsistent_item_names_mapping.values()))
+list_of_items_with_inconsistent_names_zipped = set(zip(inconsistent_item_names_mapping.keys(), inconsistent_item_names_mapping.values()))
 list_of_items_with_inconsistent_names = [item for sublist in list_of_items_with_inconsistent_names_zipped for item in sublist]
 df_with_consistent_item_names =  lmis_df_wide_flat[~lmis_df_wide_flat[('item',)].isin(list_of_items_with_inconsistent_names)]
 df_without_consistent_item_names = lmis_df_wide_flat[lmis_df_wide_flat[('item',)].isin(list_of_items_with_inconsistent_names)]
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
index f9f5149097..3e5b742a3e 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
@@ -333,10 +333,10 @@ def add_record(df: pd.DataFrame, record: Dict):
     cons,
     {
         'Intervention_Cat': "Added by SM (Recommended by TM)",
-        'Intervention_Pkg': "Misc",
-        'Intervention_Pkg_Code': -99,
+        'Intervention_Pkg': "Isoniazid preventative therapy for HIV+ no TB",
+        'Intervention_Pkg_Code': 82,
         'Items': "Isoniazid/Rifapentine",
-        'Item_Code': 2675,
+        'Item_Code': 2678,
         'Expected_Units_Per_Case': 1.0,
         'Unit_Cost': 1.0
     },

From d71fcb8a1b9ee6949df4edf1ede855da3cf2dcf9 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 4 Apr 2024 16:20:54 +0100
Subject: [PATCH 036/230] Update costing file - to represent the new version of
 consumables costs following PR #1298

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 971235c3dd..840fdbca8f 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc96e048f59b8db7eedc76a105c32e0bad87b21bc4a279517b460feac6e2e93f
-size 483750
+oid sha256:070cc08625170fab8e11198a350822b427c84add24444b6293bc830102c9b612
+size 1007683

From ab7328a97df7f174bf80a07a995d3433465906cf Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Fri, 5 Apr 2024 13:26:57 +0100
Subject: [PATCH 037/230] Update to the functionality for requesting
 consumables across the MNH modules to allow for clearer requesting of units
 of consumables for costing (correct number of units not yet implemented for
 all consumables)

---
 .../methods/care_of_women_during_pregnancy.py | 295 ++++++++++--------
 src/tlo/methods/labour.py                     | 286 +++++++++--------
 src/tlo/methods/newborn_outcomes.py           |  62 ++--
 src/tlo/methods/postnatal_supervisor.py       |  32 +-
 src/tlo/methods/pregnancy_helper_functions.py |  17 +-
 5 files changed, 381 insertions(+), 311 deletions(-)

diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py
index 42e026478c..b30b191066 100644
--- a/src/tlo/methods/care_of_women_during_pregnancy.py
+++ b/src/tlo/methods/care_of_women_during_pregnancy.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import pandas as pd
+import numpy as np
 
 from tlo import DateOffset, Module, Parameter, Property, Types, logging
 from tlo.events import IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent
@@ -202,157 +203,162 @@ def get_and_store_pregnancy_item_codes(self):
         This function defines the required consumables for each intervention delivered during this module and stores
         them in a module level dictionary called within HSIs
         """
-        get_list_of_items = pregnancy_helper_functions.get_list_of_items
+        ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
+        # First we store the item codes for the consumables for which their quantity varies for individuals based on
+        # length of pregnancy
         # ---------------------------------- BLOOD TEST EQUIPMENT ---------------------------------------------------
         self.item_codes_preg_consumables['blood_test_equipment'] = \
-            get_list_of_items(self, ['Blood collecting tube, 5 ml',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'])
-
+            {ic('Blood collecting tube, 5 ml'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
         # ---------------------------------- IV DRUG ADMIN EQUIPMENT  -------------------------------------------------
         self.item_codes_preg_consumables['iv_drug_equipment'] = \
-            get_list_of_items(self, ['Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'])
+            {ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
 
         # -------------------------------------------- ECTOPIC PREGNANCY ---------------------------------------------
         self.item_codes_preg_consumables['ectopic_pregnancy_core'] = \
-            get_list_of_items(self, ['Halothane (fluothane)_250ml_CMST'])
+            {ic('Halothane (fluothane)_250ml_CMST'): 1}  # TODO: dose
 
         self.item_codes_preg_consumables['ectopic_pregnancy_optional'] = \
-            get_list_of_items(self, ['Scalpel blade size 22 (individually wrapped)_100_CMST',
-                                     'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     'Paracetamol, tablet, 500 mg',
-                                     'Pethidine, 50 mg/ml, 2 ml ampoule',
-                                     'Suture pack',
-                                     'Gauze, absorbent 90cm x 40m_each_CMST',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'
-                                     ])
+            {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,  # todo: dose
+             ic('Suture pack'): 1,
+             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             }
 
         # ------------------------------------------- POST ABORTION CARE - GENERAL  -----------------------------------
         self.item_codes_preg_consumables['post_abortion_care_core'] = \
-            get_list_of_items(self, ['Misoprostol, tablet, 200 mcg'])
+            {ic('Misoprostol, tablet, 200 mcg'): 1}  # TODO: dose
 
         self.item_codes_preg_consumables['post_abortion_care_optional'] = \
-            get_list_of_items(self, ['Complete blood count',
-                                     'Blood collecting tube, 5 ml',
-                                     'Paracetamol, tablet, 500 mg',
-                                     'Pethidine, 50 mg/ml, 2 ml ampoule',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'
-                                     ])
+            {ic('Complete blood count'): 1,
+             ic('Blood collecting tube, 5 ml'): 1,
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             }
 
         # ------------------------------------------- POST ABORTION CARE - SEPSIS -------------------------------------
         self.item_codes_preg_consumables['post_abortion_care_sepsis_core'] = \
-            get_list_of_items(self, ['Benzylpenicillin 3g (5MU), PFR_each_CMST',
-                                     'Gentamycin, injection, 40 mg/ml in 2 ml vial'])
+            {ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 1,  # TODO: dose
+             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 1,  # TODO: dose
+             }
 
         self.item_codes_preg_consumables['post_abortion_care_sepsis_optional'] = \
-            get_list_of_items(self, ['Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Oxygen, 1000 liters, primarily with oxygen cylinders'])
-
-        # ------------------------------------------- POST ABORTION CARE - SHOCK -------------------------------------
+            {ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # TODO: dose
+             }
+
+        # ------------------------------------------- POST ABORTION CARE - SHOCK ------------------------------------
         self.item_codes_preg_consumables['post_abortion_care_shock'] = \
-            get_list_of_items(self, ['Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     'Oxygen, 1000 liters, primarily with oxygen cylinders'])
+            {ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # TODO: dose
+             }
 
         self.item_codes_preg_consumables['post_abortion_care_shock_optional'] = \
-            get_list_of_items(self, ['Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST'])
-
+            {ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             }
         # ---------------------------------- URINE DIPSTICK ----------------------------------------------------------
-        self.item_codes_preg_consumables['urine_dipstick'] = get_list_of_items(self, ['Urine analysis'])
+        self.item_codes_preg_consumables['urine_dipstick'] = {ic('Urine analysis'): 1}
 
         # ---------------------------------- IRON AND FOLIC ACID ------------------------------------------------------
-        self.item_codes_preg_consumables['iron_folic_acid'] = get_list_of_items(
-            self, ['Ferrous Salt + Folic Acid, tablet, 200 + 0.25 mg'])
+        # Dose changes at run time
+        self.item_codes_preg_consumables['iron_folic_acid'] = \
+            {ic('Ferrous Salt + Folic Acid, tablet, 200 + 0.25 mg'): 1}  # TODO: update con requested here
 
         # --------------------------------- BALANCED ENERGY AND PROTEIN ----------------------------------------------
-        self.item_codes_preg_consumables['balanced_energy_protein'] = get_list_of_items(
-            self, ['Dietary supplements (country-specific)'])
+        # Dose changes at run time
+        self.item_codes_preg_consumables['balanced_energy_protein'] = \
+            {ic('Dietary supplements (country-specific)'): 1}
 
         # --------------------------------- INSECTICIDE TREATED NETS ------------------------------------------------
-        self.item_codes_preg_consumables['itn'] = get_list_of_items(self, ['Insecticide-treated net'])
+        self.item_codes_preg_consumables['itn'] = {ic('Insecticide-treated net'): 1}
 
         # --------------------------------- CALCIUM SUPPLEMENTS -----------------------------------------------------
-        self.item_codes_preg_consumables['calcium'] = get_list_of_items(self, ['Calcium, tablet, 600 mg'])
+        self.item_codes_preg_consumables['calcium'] = {ic('Calcium, tablet, 600 mg'): 1}
 
         # -------------------------------- HAEMOGLOBIN TESTING -------------------------------------------------------
-        self.item_codes_preg_consumables['hb_test'] = get_list_of_items(self, ['Haemoglobin test (HB)'])
+        self.item_codes_preg_consumables['hb_test'] = {ic('Haemoglobin test (HB)'): 1}
 
         # ------------------------------------------- ALBENDAZOLE -----------------------------------------------------
-        self.item_codes_preg_consumables['albendazole'] = get_list_of_items(self, ['Albendazole 200mg_1000_CMST'])
+        self.item_codes_preg_consumables['albendazole'] = {ic('Albendazole 200mg_1000_CMST'): 400}
 
         # ------------------------------------------- HEP B TESTING ---------------------------------------------------
-        self.item_codes_preg_consumables['hep_b_test'] = get_list_of_items(
-            self, ['Hepatitis B test kit-Dertemine_100 tests_CMST'])
+        self.item_codes_preg_consumables['hep_b_test'] = {ic('Hepatitis B test kit-Dertemine_100 tests_CMST'): 1}
 
         # ------------------------------------------- SYPHILIS TESTING ------------------------------------------------
-        self.item_codes_preg_consumables['syphilis_test'] = get_list_of_items(
-            self, ['Test, Rapid plasma reagin (RPR)'])
+        self.item_codes_preg_consumables['syphilis_test'] = {ic('Test, Rapid plasma reagin (RPR)'): 1}
 
         # ------------------------------------------- SYPHILIS TREATMENT ----------------------------------------------
-        self.item_codes_preg_consumables['syphilis_treatment'] = get_list_of_items(
-            self, ['Benzathine benzylpenicillin, powder for injection, 2.4 million IU'])
-
-        # ----------------------------------------------- IPTP --------------------------------------------------------
-        self.item_codes_preg_consumables['iptp'] = get_list_of_items(
-            self, ['Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg'])
+        self.item_codes_preg_consumables['syphilis_treatment'] =\
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}
 
         # ----------------------------------------------- GDM TEST ----------------------------------------------------
-        self.item_codes_preg_consumables['gdm_test'] = get_list_of_items(self, ['Blood glucose level test'])
+        self.item_codes_preg_consumables['gdm_test'] = {ic('Blood glucose level test'): 1}
 
         # ------------------------------------------ FULL BLOOD COUNT -------------------------------------------------
-        self.item_codes_preg_consumables['full_blood_count'] = get_list_of_items(self, ['Complete blood count'])
+        self.item_codes_preg_consumables['full_blood_count'] = {ic('Complete blood count'): 1}
 
         # ---------------------------------------- BLOOD TRANSFUSION -------------------------------------------------
-        self.item_codes_preg_consumables['blood_transfusion'] = get_list_of_items(self, ['Blood, one unit'])
+        self.item_codes_preg_consumables['blood_transfusion'] = {ic('Blood, one unit'): 2}
 
         # --------------------------------------- ORAL ANTIHYPERTENSIVES ---------------------------------------------
-        self.item_codes_preg_consumables['oral_antihypertensives'] = get_list_of_items(
-            self, ['Methyldopa 250mg_1000_CMST'])
+        # Dose changes at run time
+        self.item_codes_preg_consumables['oral_antihypertensives'] = {ic('Methyldopa 250mg_1000_CMST'): 1}
 
         # -------------------------------------  INTRAVENOUS ANTIHYPERTENSIVES ---------------------------------------
-        self.item_codes_preg_consumables['iv_antihypertensives'] = get_list_of_items(
-            self, ['Hydralazine, powder for injection, 20 mg ampoule'])
+        self.item_codes_preg_consumables['iv_antihypertensives'] = \
+            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}
 
         # ---------------------------------------- MAGNESIUM SULPHATE ------------------------------------------------
-        self.item_codes_preg_consumables['magnesium_sulfate'] = get_list_of_items(
-            self, ['Magnesium sulfate, injection, 500 mg/ml in 10-ml ampoule'])
+        self.item_codes_preg_consumables['magnesium_sulfate'] = \
+            {ic('Magnesium sulfate, injection, 500 mg/ml in 10-ml ampoule'): 2}
 
         # ---------------------------------------- MANAGEMENT OF ECLAMPSIA --------------------------------------------
-        self.item_codes_preg_consumables['eclampsia_management_optional'] = get_list_of_items(
-            self, ['Misoprostol, tablet, 200 mcg',
-                   'Oxytocin, injection, 10 IU in 1 ml ampoule',
-                   'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                   'Cannula iv  (winged with injection pot) 18_each_CMST',
-                   'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                   'Disposables gloves, powder free, 100 pieces per box',
-                   'Oxygen, 1000 liters, primarily with oxygen cylinders',
-                   'Complete blood count',
-                   'Blood collecting tube, 5 ml',
-                   'Foley catheter',
-                   'Bag, urine, collecting, 2000 ml'])
+        self.item_codes_preg_consumables['eclampsia_management_optional'] = \
+            {ic('Misoprostol, tablet, 200 mcg'): 1,  # todo: dose
+             ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1,  # todo: dose
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Complete blood count'): 1,
+             ic('Blood collecting tube, 5 ml'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             }
 
         # -------------------------------------- ANTIBIOTICS FOR PROM ------------------------------------------------
-        self.item_codes_preg_consumables['abx_for_prom'] = get_list_of_items(
-            self, ['Benzathine benzylpenicillin, powder for injection, 2.4 million IU'])
+        self.item_codes_preg_consumables['abx_for_prom'] = \
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}  # todo: dose
 
         # ----------------------------------- ORAL DIABETIC MANAGEMENT -----------------------------------------------
-        self.item_codes_preg_consumables['oral_diabetic_treatment'] = get_list_of_items(
-            self, ['Glibenclamide 5mg_1000_CMST'])
+        # Dose changes at run time
+        self.item_codes_preg_consumables['oral_diabetic_treatment'] = \
+            {ic('Glibenclamide 5mg_1000_CMST'): 1}
 
         # ---------------------------------------- INSULIN ----------------------------------------------------------
-        self.item_codes_preg_consumables['insulin_treatment'] = get_list_of_items(
-            self, ['Insulin soluble 100 IU/ml, 10ml_each_CMST'])
+        # Dose changes at run time
+        self.item_codes_preg_consumables['insulin_treatment'] = \
+            {ic('Insulin soluble 100 IU/ml, 10ml_each_CMST'): 1}
 
     def initialise_simulation(self, sim):
 
@@ -730,7 +736,7 @@ def screening_interventions_delivered_at_every_contact(self, hsi_event):
 
             # check consumables
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_preg_consumables, core='urine_dipstick')
+                self, hsi_event, cons=self.item_codes_preg_consumables['urine_dipstick'], opt_cons=None)
 
             # If the intervention will be delivered the dx_manager runs, returning True if the consumables are
             # available and the test detects protein in the urine
@@ -788,8 +794,10 @@ def iron_and_folic_acid_supplementation(self, hsi_event):
 
             # check consumable availability - dose is total days of pregnancy x 2 tablets
             days = self.get_approx_days_of_pregnancy(person_id)
+            updated_cons = {k: v*(days*2) for (k, v) in self.item_codes_preg_consumables['iron_folic_acid'].items()}
+
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_preg_consumables, core='iron_folic_acid', number=days*3)
+                self, hsi_event, cons=updated_cons, opt_cons=None)
 
             if avail:
                 logger.info(key='anc_interventions', data={'mother': person_id, 'intervention': 'iron_folic_acid'})
@@ -822,8 +830,11 @@ def balance_energy_and_protein_supplementation(self, hsi_event):
 
             # If the consumables are available...
             days = self.get_approx_days_of_pregnancy(person_id)
+            updated_cons = {k: v*days for (k, v) in
+                            self.item_codes_preg_consumables['balanced_energy_protein'].items()}
+
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_preg_consumables, core='balanced_energy_protein', number=days)
+                self, hsi_event, cons=updated_cons, opt_cons=None)
 
             # And she is deemed to be at risk (i.e. BMI < 18) she is started on supplements
             if avail and (df.at[person_id, 'li_bmi'] == 1):
@@ -885,8 +896,11 @@ def calcium_supplementation(self, hsi_event):
                                                                          or (df.at[person_id, 'la_parity'] > 4)):
 
             days = self.get_approx_days_of_pregnancy(person_id) * 3
+            updated_cons = {k: v * days for (k, v) in
+                            self.item_codes_preg_consumables['calcium'].items()}
+
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_preg_consumables, core='calcium', number=days)
+                self, hsi_event, cons=updated_cons, opt_cons=None)
 
             if avail:
                 df.at[person_id, 'ac_receiving_calcium_supplements'] = True
@@ -909,7 +923,9 @@ def point_of_care_hb_testing(self, hsi_event):
 
         # Run check against probability of testing being delivered
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_preg_consumables, core='hb_test', optional='blood_test_equipment')
+            self, hsi_event,
+            cons=self.item_codes_preg_consumables['hb_test'],
+            opt_cons=self.item_codes_preg_consumables['blood_test_equipment'])
 
         # We run the test through the dx_manager and if a woman has anaemia and its detected she will be admitted
         # for further care
@@ -983,8 +999,9 @@ def syphilis_screening_and_treatment(self, hsi_event):
             logger.info(key='anc_interventions', data={'mother': person_id, 'intervention': 'syphilis_test'})
 
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_preg_consumables, core='syphilis_test',
-                optional='blood_test_equipment')
+                self, hsi_event,
+                cons=self.item_codes_preg_consumables['syphilis_test'],
+                opt_cons=self.item_codes_preg_consumables['blood_test_equipment'])
 
             test = self.sim.modules['HealthSystem'].dx_manager.run_dx_test(
                          dx_tests_to_run='blood_test_syphilis', hsi_event=hsi_event)
@@ -993,8 +1010,9 @@ def syphilis_screening_and_treatment(self, hsi_event):
             if avail and test:
 
                 avail = pregnancy_helper_functions.return_cons_avail(
-                    self, hsi_event, self.item_codes_preg_consumables, core='syphilis_treatment',
-                    optional='blood_test_equipment')
+                    self, hsi_event,
+                    cons=self.item_codes_preg_consumables['syphilis_treatment'],
+                    opt_cons=self.item_codes_preg_consumables['blood_test_equipment'])
 
                 if avail:
                     # We assume that treatment is 100% effective at curing infection
@@ -1059,7 +1077,9 @@ def gdm_screening(self, hsi_event):
             if self.rng.random_sample() < params['prob_intervention_delivered_gdm_test']:
 
                 avail = pregnancy_helper_functions.return_cons_avail(
-                    self, hsi_event, self.item_codes_preg_consumables, core='gdm_test', optional='blood_test_equipment')
+                    self, hsi_event,
+                    cons=self.item_codes_preg_consumables['gdm_test'],
+                    opt_cons=self.item_codes_preg_consumables['blood_test_equipment'])
 
                 # If the test accurately detects a woman has gestational diabetes the consumables are recorded and
                 # she is referred for treatment
@@ -1225,8 +1245,9 @@ def antenatal_blood_transfusion(self, individual_id, hsi_event):
 
         # Check for consumables
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_preg_consumables, core='blood_transfusion', number=2,
-            optional='iv_drug_equipment')
+            self, hsi_event,
+            cons=self.item_codes_preg_consumables['blood_transfusion'],
+            opt_cons=self.item_codes_preg_consumables['iv_drug_equipment'])
 
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self.sim.modules['Labour'],
                                                                                    sf='blood_tran',
@@ -1253,9 +1274,12 @@ def initiate_maintenance_anti_hypertensive_treatment(self, individual_id, hsi_ev
         df = self.sim.population.props
 
         # Calculate the approximate dose for the remainder of pregnancy and check availability
+        days = self.get_approx_days_of_pregnancy(individual_id) * 4
+        updated_cons = {k: v * days for (k, v) in
+                        self.item_codes_preg_consumables['oral_antihypertensives'].items()}
+
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_preg_consumables, core='oral_antihypertensives',
-            number=(self.get_approx_days_of_pregnancy(individual_id) * 4))
+            self, hsi_event, cons=updated_cons, opt_cons=None)
 
         # If the consumables are available then the woman is started on treatment
         if avail:
@@ -1274,8 +1298,9 @@ def initiate_treatment_for_severe_hypertension(self, individual_id, hsi_event):
 
         # Define the consumables and check their availability
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_preg_consumables, core='iv_antihypertensives',
-            optional='iv_drug_equipment')
+            self, hsi_event,
+            cons=self.item_codes_preg_consumables['iv_antihypertensives'],
+            opt_cons=self.item_codes_preg_consumables['iv_drug_equipment'])
 
         # If they are available then the woman is started on treatment
         if avail:
@@ -1305,8 +1330,9 @@ def treatment_for_severe_pre_eclampsia_or_eclampsia(self, individual_id, hsi_eve
         df = self.sim.population.props
 
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_preg_consumables, core='magnesium_sulfate',
-            optional='eclampsia_management_optional')
+            self, hsi_event,
+            cons=self.item_codes_preg_consumables['magnesium_sulfate'],
+            opt_cons=self.item_codes_preg_consumables['eclampsia_management_optional'])
 
         # check HCW will deliver intervention
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self.sim.modules['Labour'],
@@ -1329,8 +1355,9 @@ def antibiotics_for_prom(self, individual_id, hsi_event):
 
         # check consumables and whether HCW are available to deliver the intervention
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_preg_consumables, core='abx_for_prom',
-            optional='iv_drug_equipment')
+            self, hsi_event,
+            cons=self.item_codes_preg_consumables['abx_for_prom'],
+            opt_cons=self.item_codes_preg_consumables['iv_drug_equipment'])
 
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self.sim.modules['Labour'],
                                                                                    sf='iv_abx',
@@ -2426,6 +2453,8 @@ def apply(self, person_id, squeeze_factor):
         if not mother.la_currently_in_labour and not mother.hs_is_inpatient and mother.ps_gest_diab != 'none' \
                 and (mother.ac_gest_diab_on_treatment != 'none') and (mother.ps_gestational_age_in_weeks > 21):
 
+            est_length_preg = self.module.get_approx_days_of_pregnancy(person_id)
+
             def schedule_gdm_event_and_checkup():
                 # Schedule GestationalDiabetesGlycaemicControlEvent which determines if this new treatment will
                 # effectively control blood glucose prior to next follow up
@@ -2450,9 +2479,12 @@ def schedule_gdm_event_and_checkup():
                 # meds
                 if mother.ac_gest_diab_on_treatment == 'diet_exercise':
 
+                    days = est_length_preg * 10
+                    updated_cons = {k: v * days for (k, v) in
+                                    self.item_codes_preg_consumables['oral_diabetic_treatment'].items()}
+
                     avail = pregnancy_helper_functions.return_cons_avail(
-                        self.module, self, self.module.item_codes_preg_consumables, core='oral_diabetic_treatment',
-                        number=(self.module.get_approx_days_of_pregnancy(person_id) * 2))
+                        self.module, self,  cons=updated_cons, opt_cons=None)
 
                     # If the meds are available women are started on that treatment
                     if avail:
@@ -2468,9 +2500,15 @@ def schedule_gdm_event_and_checkup():
                 # blood sugar- they are started on insulin
                 if mother.ac_gest_diab_on_treatment == 'orals':
 
+                    # Dose is (avg.) 0.8 units per KG per day. Average weight is an appoximation
+                    required_units_per_preg = 65 * (0.8 * est_length_preg)
+                    required_vials = np.ceil(required_units_per_preg/1000)
+
+                    updated_cons = {k: v * required_vials for (k, v) in
+                                    self.item_codes_preg_consumables['insulin_treatment'].items()}
+
                     avail = pregnancy_helper_functions.return_cons_avail(
-                        self.module, self, self.module.item_codes_preg_consumables, core='insulin_treatment',
-                        number=5)
+                        self.module, self, cons=updated_cons, opt_cons=None)
 
                     if avail:
                         df.at[person_id, 'ac_gest_diab_on_treatment'] = 'insulin'
@@ -2514,8 +2552,9 @@ def apply(self, person_id, squeeze_factor):
 
         # Request baseline PAC consumables
         baseline_cons = pregnancy_helper_functions.return_cons_avail(
-            self.module, self, self.module.item_codes_preg_consumables, core='post_abortion_care_core',
-            optional='post_abortion_care_optional')
+            self.module, self,
+            cons=self.module.item_codes_preg_consumables['post_abortion_care_core'],
+            opt_cons=self.module.item_codes_preg_consumables['post_abortion_care_optional'])
 
         # Check HCW availability to deliver surgical removal of retained products
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self.sim.modules['Labour'],
@@ -2528,29 +2567,32 @@ def apply(self, person_id, squeeze_factor):
         if abortion_complications.has_any([person_id], 'sepsis', first=True):
 
             cons_for_sepsis_pac = pregnancy_helper_functions.return_cons_avail(
-                self.module, self, self.module.item_codes_preg_consumables, core='post_abortion_care_sepsis_core',
-                optional='post_abortion_care_sepsis_optional')
+                self.module, self,
+                cons=self.module.item_codes_preg_consumables['post_abortion_care_sepsis_core'],
+                opt_cons=self.module.item_codes_preg_consumables['post_abortion_care_sepsis_optional'])
 
             if cons_for_sepsis_pac and (baseline_cons or sf_check):
                 df.at[person_id, 'ac_received_post_abortion_care'] = True
 
         elif abortion_complications.has_any([person_id], 'haemorrhage', first=True):
-
             cons_for_haemorrhage = pregnancy_helper_functions.return_cons_avail(
-                self.module, self, self.module.item_codes_preg_consumables, core='blood_transfusion', number=2,
-                optional='iv_drug_equipment')
+                self.module, self,
+                cons=self.module.item_codes_preg_consumables['blood_transfusion'],
+                opt_cons=self.module.item_codes_preg_consumables['iv_drug_equipment'])
 
             cons_for_shock = pregnancy_helper_functions.return_cons_avail(
-                self.module, self, self.module.item_codes_preg_consumables, core='post_abortion_care_shock',
-                optional='post_abortion_care_shock_optional')
+                self.module, self,
+                cons=self.module.item_codes_preg_consumables['post_abortion_care_shock'],
+                opt_cons=self.module.item_codes_preg_consumables['post_abortion_care_shock_optional'])
 
             if cons_for_haemorrhage and cons_for_shock and (baseline_cons or sf_check):
                 df.at[person_id, 'ac_received_post_abortion_care'] = True
 
         elif abortion_complications.has_any([person_id], 'injury', first=True):
             cons_for_shock = pregnancy_helper_functions.return_cons_avail(
-                self.module, self, self.module.item_codes_preg_consumables, core='post_abortion_care_shock',
-                optional='post_abortion_care_shock_optional')
+                self.module, self,
+                cons=self.module.item_codes_preg_consumables['post_abortion_care_shock'],
+                opt_cons=self.module.item_codes_preg_consumables['post_abortion_care_shock_optional'])
 
             if cons_for_shock and (baseline_cons or sf_check):
                 df.at[person_id, 'ac_received_post_abortion_care'] = True
@@ -2595,8 +2637,9 @@ def apply(self, person_id, squeeze_factor):
 
         # We define the required consumables and check their availability
         avail = pregnancy_helper_functions.return_cons_avail(
-            self.module, self, self.module.item_codes_preg_consumables, core='ectopic_pregnancy_core',
-            optional='ectopic_pregnancy_optional')
+            self.module, self,
+            cons=self.module.item_codes_preg_consumables['ectopic_pregnancy_core'],
+            opt_cons=self.module.item_codes_preg_consumables['ectopic_pregnancy_optional'])
 
         # If they are available then treatment can go ahead
         if avail:
diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py
index 0f6d7d134e..2635a3b98b 100644
--- a/src/tlo/methods/labour.py
+++ b/src/tlo/methods/labour.py
@@ -681,174 +681,182 @@ def get_and_store_labour_item_codes(self):
         This function defines the required consumables for each intervention delivered during this module and stores
         them in a module level dictionary called within HSIs
          """
-        get_item_code_from_pkg = self.sim.modules['HealthSystem'].get_item_codes_from_package_name
-
-        get_list_of_items = pregnancy_helper_functions.get_list_of_items
-
-        # ---------------------------------- IV DRUG ADMIN EQUIPMENT  -------------------------------------------------
-        self.item_codes_lab_consumables['iv_drug_equipment'] = \
-            get_list_of_items(self, ['Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'])
+        ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
         # ---------------------------------- BLOOD TEST EQUIPMENT ---------------------------------------------------
         self.item_codes_lab_consumables['blood_test_equipment'] = \
-            get_list_of_items(self, ['Blood collecting tube, 5 ml',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'])
+            {ic('Blood collecting tube, 5 ml'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
+        # ---------------------------------- IV DRUG ADMIN EQUIPMENT  -------------------------------------------------
+        self.item_codes_lab_consumables['iv_drug_equipment'] = \
+            {ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
 
         # ------------------------------------------ FULL BLOOD COUNT -------------------------------------------------
-        self.item_codes_lab_consumables['full_blood_count'] = get_list_of_items(self, ['Complete blood count'])
+        self.item_codes_lab_consumables['full_blood_count'] = {ic('Complete blood count'): 1}
 
         # -------------------------------------------- DELIVERY ------------------------------------------------------
         # assuming CDK has blade, soap, cord tie
         self.item_codes_lab_consumables['delivery_core'] = \
-            get_list_of_items(self, ['Clean delivery kit',
-                                     'Chlorhexidine 1.5% solution_5_CMST'])
+            {ic('Clean delivery kit'): 1,
+             ic('Chlorhexidine 1.5% solution_5_CMST'): 1,  # todo: dose
+             }
 
         self.item_codes_lab_consumables['delivery_optional'] = \
-            get_list_of_items(self, ['Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Gauze, absorbent 90cm x 40m_each_CMST',
-                                     'Paracetamol, tablet, 500 mg'])
+            {ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Paracetamol, tablet, 500 mg'): 8000
+             }
 
         # -------------------------------------------- CAESAREAN DELIVERY ------------------------------------------
         self.item_codes_lab_consumables['caesarean_delivery_core'] = \
-            get_list_of_items(self, ['Halothane (fluothane)_250ml_CMST',
-                                     'Ceftriaxone 1g, PFR_each_CMST',
-                                     'Metronidazole 200mg_1000_CMST'])
+            {ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
+             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
+             }
 
         self.item_codes_lab_consumables['caesarean_delivery_optional'] = \
-            get_list_of_items(self, ['Scalpel blade size 22 (individually wrapped)_100_CMST',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Paracetamol, tablet, 500 mg',
-                                     'Declofenac injection_each_CMST',
-                                     'Pethidine, 50 mg/ml, 2 ml ampoule',
-                                     'Foley catheter',
-                                     'Bag, urine, collecting, 2000 ml',
-                                     "ringer's lactate (Hartmann's solution), 1000 ml_12_IDA",
-                                     'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     "Giving set iv administration + needle 15 drops/ml_each_CMST",
-                                     "Chlorhexidine 1.5% solution_5_CMST"])
+            {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Declofenac injection_each_CMST'): 1,   # todo: dose
+             ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
+             }
 
         # -------------------------------------------- OBSTETRIC SURGERY ----------------------------------------------
         self.item_codes_lab_consumables['obstetric_surgery_core'] = \
-            get_list_of_items(self, ['Halothane (fluothane)_250ml_CMST',
-                                     'Ceftriaxone 1g, PFR_each_CMST',
-                                     'Metronidazole 200mg_1000_CMST'])
+            {ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
+             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
+             }
 
         self.item_codes_lab_consumables['obstetric_surgery_optional'] = \
-            get_list_of_items(self, ['Scalpel blade size 22 (individually wrapped)_100_CMST',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Paracetamol, tablet, 500 mg',
-                                     'Declofenac injection_each_CMST',
-                                     'Pethidine, 50 mg/ml, 2 ml ampoule',
-                                     'Foley catheter',
-                                     'Bag, urine, collecting, 2000 ml',
-                                     "ringer's lactate (Hartmann's solution), 1000 ml_12_IDA",
-                                     'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     "Giving set iv administration + needle 15 drops/ml_each_CMST"])
+            {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Declofenac injection_each_CMST'): 1,  # todo: dose
+             ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
+             }
 
         # -------------------------------------------- ABX FOR PROM -------------------------------------------------
         self.item_codes_lab_consumables['abx_for_prom'] = \
-            get_list_of_items(self, ['Benzathine benzylpenicillin, powder for injection, 2.4 million IU'])
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}  # todo: dose
 
         # -------------------------------------------- ANTENATAL STEROIDS ---------------------------------------------
+
         self.item_codes_lab_consumables['antenatal_steroids'] = \
-            get_list_of_items(self, ['Dexamethasone 5mg/ml, 5ml_each_CMST'])
+            {ic('Dexamethasone 5mg/ml, 5ml_each_CMST'): 1}  # todo: dose
 
         # -------------------------------------  INTRAVENOUS ANTIHYPERTENSIVES ---------------------------------------
         self.item_codes_lab_consumables['iv_antihypertensives'] = \
-            get_list_of_items(self, ['Hydralazine, powder for injection, 20 mg ampoule'])
+            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}  # todo: dose
 
         # --------------------------------------- ORAL ANTIHYPERTENSIVES ---------------------------------------------
         self.item_codes_lab_consumables['oral_antihypertensives'] = \
-            get_list_of_items(self, ['Methyldopa 250mg_1000_CMST'])
+            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}  # todo: dose
 
         # ----------------------------------  SEVERE PRE-ECLAMPSIA/ECLAMPSIA  -----------------------------------------
         self.item_codes_lab_consumables['magnesium_sulfate'] = \
-            get_list_of_items(self, ['Magnesium sulfate, injection, 500 mg/ml in 10-ml ampoule'])
+            {ic('Magnesium sulfate, injection, 500 mg/ml in 10-ml ampoule'): 2}
 
         self.item_codes_lab_consumables['eclampsia_management_optional'] = \
-            get_list_of_items(self, ['Misoprostol, tablet, 200 mcg',
-                                     'Oxytocin, injection, 10 IU in 1 ml ampoule',
-                                     'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Oxygen, 1000 liters, primarily with oxygen cylinders',
-                                     'Complete blood count',
-                                     'Foley catheter',
-                                     'Bag, urine, collecting, 2000 ml'])
-
+            {ic('Misoprostol, tablet, 200 mcg'): 1,  # todo: dose
+             ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1,  # todo: dose
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Complete blood count'): 1,
+             ic('Blood collecting tube, 5 ml'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             }
         # -------------------------------------  OBSTRUCTED LABOUR  ---------------------------------------------------
         self.item_codes_lab_consumables['obstructed_labour'] = \
-            get_list_of_items(self, ['Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml',
-                                     'Benzylpenicillin 3g (5MU), PFR_each_CMST',
-                                     'Gentamycin, injection, 40 mg/ml in 2 ml vial',
-                                     'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Complete blood count',
-                                     'Foley catheter',
-                                     'Bag, urine, collecting, 2000 ml',
-                                     'Paracetamol, tablet, 500 mg',
-                                     'Pethidine, 50 mg/ml, 2 ml ampoule',
-                                     'Gauze, absorbent 90cm x 40m_each_CMST',
-                                     'Suture pack'])
-
+            {ic('Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml'): 1,  # todo: dose
+             ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 1,    # todo: dose
+             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 1,  # todo: dose
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Complete blood count'): 1,
+             ic('Blood collecting tube, 5 ml'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,  # todo: dose
+             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Suture pack'): 1,
+             }
         # -------------------------------------  OBSTETRIC VACUUM   ---------------------------------------------------
-        self.item_codes_lab_consumables['vacuum'] = get_list_of_items(self, ['Vacuum, obstetric'])
+        self.item_codes_lab_consumables['vacuum'] = {ic('Vacuum, obstetric'): 1}
 
         # -------------------------------------  MATERNAL SEPSIS  -----------------------------------------------------
         self.item_codes_lab_consumables['maternal_sepsis_core'] = \
-            get_list_of_items(self, ['Benzylpenicillin 3g (5MU), PFR_each_CMST',
-                                     'Gentamycin, injection, 40 mg/ml in 2 ml vial'])
-        # 'Metronidazole, injection, 500 mg in 100 ml vial'])
+            {ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 1,  # todo: dose
+             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 1,  # todo: dose
+             }
 
         self.item_codes_lab_consumables['maternal_sepsis_optional'] = \
-            get_list_of_items(self, ['Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Oxygen, 1000 liters, primarily with oxygen cylinders',
-                                     'Paracetamol, tablet, 500 mg',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Foley catheter',
-                                     'Bag, urine, collecting, 2000 ml',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Complete blood count'])
-
+            {ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Complete blood count'): 1,
+             }
         # -------------------------------------  ACTIVE MANAGEMENT THIRD STAGE  ---------------------------------------
-        self.item_codes_lab_consumables['amtsl'] = \
-            get_list_of_items(self, ['Oxytocin, injection, 10 IU in 1 ml ampoule'])
+        self.item_codes_lab_consumables['amtsl'] = {ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1}  # todo: dose
 
         # -------------------------------------  POSTPARTUM HAEMORRHAGE  ---------------------------------------
         self.item_codes_lab_consumables['pph_core'] = \
-            get_list_of_items(self, ['Oxytocin, injection, 10 IU in 1 ml ampoule'])
+            {ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1}  # todo: dose
 
         self.item_codes_lab_consumables['pph_optional'] = \
-            get_list_of_items(self, ['Misoprostol, tablet, 200 mcg',
-                                     'Pethidine, 50 mg/ml, 2 ml ampoule',
-                                     'Oxygen, 1000 liters, primarily with oxygen cylinders',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Bag, urine, collecting, 2000 ml',
-                                     'Foley catheter',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box',
-                                     'Complete blood count'])
+            {ic('Misoprostol, tablet, 200 mcg'): 1,  # todo: dose
+             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1,
+             ic('Complete blood count'): 1,
+             }
 
         # -------------------------------------  BLOOD TRANSFUSION  ---------------------------------------
-        self.item_codes_lab_consumables['blood_transfusion'] = get_list_of_items(self, ['Blood, one unit'])
+        self.item_codes_lab_consumables['blood_transfusion'] = {ic('Blood, one unit'): 2}
 
         # ------------------------------------------ FULL BLOOD COUNT -------------------------------------------------
-        self.item_codes_lab_consumables['hb_test'] = get_list_of_items(self, ['Haemoglobin test (HB)'])
+        self.item_codes_lab_consumables['hb_test'] = {ic('Haemoglobin test (HB)'): 1}
 
         # ---------------------------------- IRON AND FOLIC ACID ------------------------------------------------------
+        # Dose changes at run time
         self.item_codes_lab_consumables['iron_folic_acid'] = \
-            get_item_code_from_pkg('Ferrous Salt + Folic Acid, tablet, 200 + 0.25 mg')
+            {ic('Ferrous Salt + Folic Acid, tablet, 200 + 0.25 mg'): 1}
 
         # -------------------------------------------- RESUSCITATION ------------------------------------------
-        self.item_codes_lab_consumables['resuscitation'] = \
-            get_list_of_items(self, ['Infant resuscitator, clear plastic + mask + bag_each_CMST'])
+        self.item_codes_lab_consumables['resuscitation'] =\
+            {ic('Infant resuscitator, clear plastic + mask + bag_each_CMST'): 1}
 
     def initialise_simulation(self, sim):
         # Update self.current_parameters
@@ -1658,7 +1666,9 @@ def prophylactic_labour_interventions(self, hsi_event):
 
                 # If she has not already receive antibiotics, we check for consumables
                 avail = pregnancy_helper_functions.return_cons_avail(
-                    self, hsi_event, self.item_codes_lab_consumables, core='abx_for_prom', optional='iv_drug_equipment')
+                    self, hsi_event,
+                    cons=self.item_codes_lab_consumables['abx_for_prom'],
+                    opt_cons=self.item_codes_lab_consumables['iv_drug_equipment'])
 
                 # Then query if these consumables are available during this HSI And provide if available.
                 # Antibiotics for from reduce risk of newborn sepsis within the first
@@ -1672,8 +1682,9 @@ def prophylactic_labour_interventions(self, hsi_event):
            mni[person_id]['labour_state'] == 'late_preterm_labour':
 
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_lab_consumables, core='antenatal_steroids',
-                optional='iv_drug_equipment')
+                self, hsi_event,
+                cons=self.item_codes_lab_consumables['antenatal_steroids'],
+                opt_cons=self.item_codes_lab_consumables['iv_drug_equipment'])
 
             # If available they are given. Antenatal steroids reduce a preterm newborns chance of developing
             # respiratory distress syndrome and of death associated with prematurity
@@ -1735,8 +1746,9 @@ def assessment_and_treatment_of_severe_pre_eclampsia_mgso4(self, hsi_event, labo
 
             # Define and check for the required consumables
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_lab_consumables, core='magnesium_sulfate',
-                optional='eclampsia_management_optional')
+                self, hsi_event,
+                cons=self.item_codes_lab_consumables['magnesium_sulfate'],
+                opt_cons=self.item_codes_lab_consumables['eclampsia_management_optional'])
 
             # If the consumables are available - the intervention is delivered. IV magnesium reduces the
             # probability that a woman with severe pre-eclampsia will experience eclampsia in labour
@@ -1764,8 +1776,9 @@ def assessment_and_treatment_of_hypertension(self, hsi_event, labour_stage):
 
             # Then query if these consumables are available during this HSI
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_lab_consumables, core='iv_antihypertensives',
-                optional='iv_drug_equipment')
+                self, hsi_event,
+                cons=self.item_codes_lab_consumables['iv_antihypertensives'],
+                opt_cons=self.item_codes_lab_consumables['iv_drug_equipment'])
 
             # If they are available then the woman is started on treatment. Intravenous antihypertensive reduce a
             # womans risk of progression from mild to severe gestational hypertension ANd reduce risk of death for
@@ -1811,8 +1824,9 @@ def assessment_and_treatment_of_eclampsia(self, hsi_event, labour_stage):
 
             # define and check required consumables
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_lab_consumables, core='magnesium_sulfate',
-                optional='eclampsia_management_optional')
+                self, hsi_event,
+                cons=self.item_codes_lab_consumables['magnesium_sulfate'],
+                opt_cons=self.item_codes_lab_consumables['eclampsia_management_optional'])
 
             if (labour_stage == 'ip') and (df.at[person_id, 'ac_admitted_for_immediate_delivery'] == 'none'):
                 self.determine_delivery_mode_in_spe_or_ec(person_id, hsi_event, 'ec')
@@ -1858,8 +1872,9 @@ def refer_for_cs():
                 # If the general package is available AND the facility has the correct tools to carry out the
                 # delivery then it can occur
                 avail = pregnancy_helper_functions.return_cons_avail(
-                    self, hsi_event, self.item_codes_lab_consumables, core='vacuum',
-                    optional='obstructed_labour')
+                    self, hsi_event,
+                    cons=self.item_codes_lab_consumables['vacuum'],
+                    opt_cons=self.item_codes_lab_consumables['obstructed_labour'])
 
                 # run HCW check
                 sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self, sf='avd',
@@ -1911,8 +1926,9 @@ def assessment_and_treatment_of_maternal_sepsis(self, hsi_event, labour_stage):
 
             # Define and check available consumables
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_lab_consumables, core='maternal_sepsis_core',
-                optional='maternal_sepsis_optional')
+                self, hsi_event,
+                cons=self.item_codes_lab_consumables['maternal_sepsis_core'],
+                opt_cons=self.item_codes_lab_consumables['maternal_sepsis_optional'])
 
             # If delivered this intervention reduces a womans risk of dying from sepsis
             if avail and sf_check:
@@ -1990,7 +2006,9 @@ def active_management_of_the_third_stage_of_labour(self, hsi_event):
 
         # Define and check available consumables
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_lab_consumables, core='amtsl', optional='iv_drug_equipment')
+            self, hsi_event,
+            cons=self.item_codes_lab_consumables['amtsl'],
+            opt_cons=self.item_codes_lab_consumables['iv_drug_equipment'])
 
         # run HCW check
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self, sf='uterotonic',
@@ -2021,7 +2039,9 @@ def assessment_and_treatment_of_pph_uterine_atony(self, hsi_event):
 
             # Define and check available consumables
             avail = pregnancy_helper_functions.return_cons_avail(
-                self, hsi_event, self.item_codes_lab_consumables, core='pph_core', optional='pph_optional')
+                self, hsi_event,
+                cons=self.item_codes_lab_consumables['pph_core'],
+                opt_cons=self.item_codes_lab_consumables['pph_optional'])
 
             # run HCW check
             sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self, sf='uterotonic',
@@ -2104,8 +2124,9 @@ def surgical_management_of_pph(self, hsi_event):
         # We log the required consumables and condition the surgery happening on the availability of the
         # first consumable in this package, the anaesthetic required for the surgery
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_lab_consumables, core='obstetric_surgery_core',
-            optional='obstetric_surgery_optional')
+            self, hsi_event,
+            cons=self.item_codes_lab_consumables['obstetric_surgery_core'],
+            opt_cons=self.item_codes_lab_consumables['obstetric_surgery_optional'])
 
         # run HCW check
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self, sf='surg',
@@ -2140,8 +2161,9 @@ def blood_transfusion(self, hsi_event):
 
         # Check consumables
         avail = pregnancy_helper_functions.return_cons_avail(
-            self, hsi_event, self.item_codes_lab_consumables, core='blood_transfusion', number=2,
-            optional='iv_drug_equipment')
+            self, hsi_event,
+            cons=self.item_codes_lab_consumables['blood_transfusion'],
+            opt_cons=self.item_codes_lab_consumables['iv_drug_equipment'])
 
         # check HCW
         sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self, sf='blood_tran',
@@ -2865,8 +2887,9 @@ def apply(self, person_id, squeeze_factor):
         # LOG CONSUMABLES FOR DELIVERY...
         # We assume all deliveries require this basic package of consumables
         avail = pregnancy_helper_functions.return_cons_avail(
-            self.module, self, self.module.item_codes_lab_consumables, core='delivery_core',
-            optional='delivery_optional')
+            self.module, self,
+            cons=self.module.item_codes_lab_consumables['delivery_core'],
+            opt_cons=self.module.item_codes_lab_consumables['delivery_optional'])
 
         # If the clean delivery kit consumable is available, we assume women benefit from clean delivery
         if avail:
@@ -2942,7 +2965,7 @@ def apply(self, person_id, squeeze_factor):
             # TODO: potential issue is that this consumable is being logged now for every birth as opposed to
             #  for each birth where resuscitation of the newborn is required
             avail = pregnancy_helper_functions.return_cons_avail(
-                self.module, self, self.module.item_codes_lab_consumables, core='resuscitation')
+                self.module, self, cons=self.module.item_codes_lab_consumables['resuscitation'], opt_cons=None)
 
             # Run HCW check
             sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self.module,
@@ -3161,8 +3184,9 @@ def apply(self, person_id, squeeze_factor):
             # We log the required consumables and condition the caesarean happening on the availability of the
             # first consumable in this package, the anaesthetic required for the surgery
             avail = pregnancy_helper_functions.return_cons_avail(
-                self.module, self, self.module.item_codes_lab_consumables, core='caesarean_delivery_core',
-                optional='caesarean_delivery_optional')
+                self.module, self,
+                cons=self.module.item_codes_lab_consumables['caesarean_delivery_core'],
+                opt_cons=self.module.item_codes_lab_consumables['caesarean_delivery_optional'])
 
             # We check that the HCW will deliver the intervention
             sf_check = pregnancy_helper_functions.check_emonc_signal_function_will_run(self.module, sf='surg',
diff --git a/src/tlo/methods/newborn_outcomes.py b/src/tlo/methods/newborn_outcomes.py
index 513b644746..f6fb49650e 100644
--- a/src/tlo/methods/newborn_outcomes.py
+++ b/src/tlo/methods/newborn_outcomes.py
@@ -377,43 +377,51 @@ def get_and_store_newborn_item_codes(self):
         This function defines the required consumables for each intervention delivered during this module and stores
         them in a module level dictionary called within HSIs
         """
-        get_list_of_items = pregnancy_helper_functions.get_list_of_items
-
-        # ---------------------------------- IV DRUG ADMIN EQUIPMENT  -------------------------------------------------
-        self.item_codes_nb_consumables['iv_drug_equipment'] = \
-            get_list_of_items(self, ['Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'])
+        ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
+        # First we store the item codes for the consumables for which their quantity varies for individuals based on
+        # length of pregnancy
         # ---------------------------------- BLOOD TEST EQUIPMENT ---------------------------------------------------
         self.item_codes_nb_consumables['blood_test_equipment'] = \
-            get_list_of_items(self, ['Disposables gloves, powder free, 100 pieces per box'])
+            {ic('Blood collecting tube, 5 ml'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
+        # ---------------------------------- IV DRUG ADMIN EQUIPMENT  -------------------------------------------------
+        self.item_codes_nb_consumables['iv_drug_equipment'] = \
+            {ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
 
         # -------------------------------------------- VITAMIN K ------------------------------------------
         self.item_codes_nb_consumables['vitamin_k'] = \
-            get_list_of_items(self, ['vitamin K1  (phytomenadione) 1 mg/ml, 1 ml, inj._100_IDA'])
+            {ic('vitamin K1  (phytomenadione) 1 mg/ml, 1 ml, inj._100_IDA'): 1}  # todo: dose
 
         # -------------------------------------------- EYE CARE  ------------------------------------------
-        self.item_codes_nb_consumables['eye_care'] = get_list_of_items(
-            self, ['Tetracycline eye ointment, 1 %, tube 5 mg'])
+        self.item_codes_nb_consumables['eye_care'] = \
+            {ic('Tetracycline eye ointment, 1 %, tube 5 mg'): 1}  # todo: dose
 
         # ------------------------------------- SEPSIS - FULL SUPPORTIVE CARE ---------------------------------------
         self.item_codes_nb_consumables['sepsis_supportive_care_core'] = \
-            get_list_of_items(self, ['Benzylpenicillin 1g (1MU), PFR_Each_CMST',
-                                     'Gentamicin 40mg/ml, 2ml_each_CMST',
-                                     'Oxygen, 1000 liters, primarily with oxygen cylinders'])
+            {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,  # todo: dose
+             ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1  # todo: dose
+             }
 
         self.item_codes_nb_consumables['sepsis_supportive_care_optional'] = \
-            get_list_of_items(self, ['Dextrose (glucose) 5%, 1000ml_each_CMST',
-                                     'Tube, feeding CH 8_each_CMST',
-                                     'Cannula iv  (winged with injection pot) 18_each_CMST',
-                                     'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                                     'Disposables gloves, powder free, 100 pieces per box'])
+            {ic('Dextrose (glucose) 5%, 1000ml_each_CMST'): 500,
+             ic('Tube, feeding CH 8_each_CMST'): 1,
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Disposables gloves, powder free, 100 pieces per box'): 1
+             }
 
         # ---------------------------------------- SEPSIS - ANTIBIOTICS ---------------------------------------------
-        self.item_codes_nb_consumables['sepsis_abx'] =\
-            get_list_of_items(self, ['Benzylpenicillin 1g (1MU), PFR_Each_CMST',
-                                     'Gentamicin 40mg/ml, 2ml_each_CMST'])
+        self.item_codes_nb_consumables['sepsis_abx'] = \
+            {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,  # todo: dose
+             ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,  # todo: dose
+             }
 
     def initialise_simulation(self, sim):
         # For the first period (2010-2015) we use the first value in each list as a parameter
@@ -969,8 +977,9 @@ def assessment_and_treatment_newborn_sepsis(self, hsi_event, facility_type):
 
                 # check consumables
                 avail = pregnancy_helper_functions.return_cons_avail(
-                    self, hsi_event, self.item_codes_nb_consumables, core='sepsis_supportive_care_core',
-                    optional='sepsis_supportive_care_optional')
+                    self, hsi_event,
+                    cons=self.item_codes_nb_consumables['sepsis_supportive_care_core'],
+                    opt_cons=self.item_codes_nb_consumables['sepsis_supportive_care_optional'])
 
                 # Then, if the consumables are available, treatment for sepsis is delivered
                 if avail and sf_check:
@@ -980,8 +989,9 @@ def assessment_and_treatment_newborn_sepsis(self, hsi_event, facility_type):
             # The same pattern is then followed for health centre care
             else:
                 avail = pregnancy_helper_functions.return_cons_avail(
-                    self, hsi_event, self.item_codes_nb_consumables, core='sepsis_abx',
-                    optional='iv_drug_equipment')
+                    self, hsi_event,
+                    cons=self.item_codes_nb_consumables['sepsis_abx'],
+                    opt_cons=self.item_codes_nb_consumables['iv_drug_equipment'])
 
                 if avail and sf_check:
                     df.at[person_id, 'nb_inj_abx_neonatal_sepsis'] = True
diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py
index 0d16a2d7ac..6303763b3d 100644
--- a/src/tlo/methods/postnatal_supervisor.py
+++ b/src/tlo/methods/postnatal_supervisor.py
@@ -1274,21 +1274,23 @@ def apply(self, person_id, squeeze_factor):
             return
 
         # Define the consumables
-        of_repair_cons = pregnancy_helper_functions.get_list_of_items(
-            self, ['Scalpel blade size 22 (individually wrapped)_100_CMST',
-                   'Halothane (fluothane)_250ml_CMST',
-                   'Ceftriaxone 1g, PFR_each_CMST',
-                   'Metronidazole 200mg_1000_CMST',
-                   'Cannula iv  (winged with injection pot) 18_each_CMST',
-                   'Paracetamol, tablet, 500 mg',
-                   'Declofenac injection_each_CMST',
-                   'Pethidine, 50 mg/ml, 2 ml ampoule',
-                   'Foley catheter',
-                   'Bag, urine, collecting, 2000 ml',
-                   "ringer's lactate (Hartmann's solution), 1000 ml_12_IDA",
-                   'Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                   "Giving set iv administration + needle 15 drops/ml_each_CMST",
-                   "Chlorhexidine 1.5% solution_5_CMST"])
+        ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
+
+        of_repair_cons = \
+            {ic('Scalpel blade size 22 (individually wrapped)_100_CMS'): 1,
+             ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
+             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
+             ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
+             ic('Paracetamol, tablet, 500 mg'): 8000,
+             ic('Declofenac injection_each_CMST'): 1,  # todo: dose
+             ic('Foley catheter'): 1,
+             ic('Bag, urine, collecting, 2000 ml'): 1,
+             ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
+             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic('Chlorhexidine 1.5% solution_5_CMST'): 1,  # todo: dose
+             }
 
         self.get_consumables(item_codes=of_repair_cons)
 
diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py
index 20a712f134..8f7faa0503 100644
--- a/src/tlo/methods/pregnancy_helper_functions.py
+++ b/src/tlo/methods/pregnancy_helper_functions.py
@@ -21,7 +21,7 @@ def get_list_of_items(self, item_list):
     return codes
 
 
-def return_cons_avail(self, hsi_event, cons_dict, **info):
+def return_cons_avail(self, hsi_event, cons, opt_cons):
     """
     This function is called by majority of interventions across maternal and neonatal modules to return whether a
     consumable or package of consumables are available. If analysis is not being conducted (as indicated by a series of
@@ -38,21 +38,12 @@ def return_cons_avail(self, hsi_event, cons_dict, **info):
     ps_params = self.sim.modules['PregnancySupervisor'].current_parameters
     la_params = self.sim.modules['Labour'].current_parameters
 
-    # If 'number' is passed as an optional argument then a predetermined number of consumables will be requested
-    if 'number' in info.keys():
-        core_cons = {cons_dict[info['core']][0]: info['number']}
-    else:
-        core_cons = cons_dict[info['core']]
-
-    # If 'optional' is passed then the optional set of consumables is selected from the consumables dict
-    if 'optional' in info.keys():
-        opt_cons = cons_dict[info['optional']]
-    else:
+    if opt_cons is None:
         opt_cons = []
 
     # Check if analysis is currently running, if not then availability is determined normally
     if not ps_params['ps_analysis_in_progress'] and not la_params['la_analysis_in_progress']:
-        available = hsi_event.get_consumables(item_codes=core_cons,
+        available = hsi_event.get_consumables(item_codes=cons,
                                               optional_item_codes=opt_cons)
 
         if not available and (hsi_event.target in mni) and (hsi_event != 'AntenatalCare_Outpatient'):
@@ -61,7 +52,7 @@ def return_cons_avail(self, hsi_event, cons_dict, **info):
         return available
 
     else:
-        available = hsi_event.get_consumables(item_codes=core_cons, optional_item_codes=opt_cons)
+        available = hsi_event.get_consumables(item_codes=cons, optional_item_codes=opt_cons)
 
         # Depending on HSI calling this function a different parameter set is used to determine if analysis is being
         # conducted

From 0126a6bd1cf634bfc7627aeb1f9043170c70f308 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Fri, 5 Apr 2024 15:23:26 +0100
Subject: [PATCH 038/230] fixes to failing tests

---
 src/tlo/methods/care_of_women_during_pregnancy.py          | 4 ++--
 .../test_maternal_health_helper_and_analysis_functions.py  | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py
index b30b191066..124e8e6d2a 100644
--- a/src/tlo/methods/care_of_women_during_pregnancy.py
+++ b/src/tlo/methods/care_of_women_during_pregnancy.py
@@ -2481,7 +2481,7 @@ def schedule_gdm_event_and_checkup():
 
                     days = est_length_preg * 10
                     updated_cons = {k: v * days for (k, v) in
-                                    self.item_codes_preg_consumables['oral_diabetic_treatment'].items()}
+                                    self.module.item_codes_preg_consumables['oral_diabetic_treatment'].items()}
 
                     avail = pregnancy_helper_functions.return_cons_avail(
                         self.module, self,  cons=updated_cons, opt_cons=None)
@@ -2505,7 +2505,7 @@ def schedule_gdm_event_and_checkup():
                     required_vials = np.ceil(required_units_per_preg/1000)
 
                     updated_cons = {k: v * required_vials for (k, v) in
-                                    self.item_codes_preg_consumables['insulin_treatment'].items()}
+                                    self.module.item_codes_preg_consumables['insulin_treatment'].items()}
 
                     avail = pregnancy_helper_functions.return_cons_avail(
                         self.module, self, cons=updated_cons, opt_cons=None)
diff --git a/tests/test_maternal_health_helper_and_analysis_functions.py b/tests/test_maternal_health_helper_and_analysis_functions.py
index 4c55c3a366..eb45d69775 100644
--- a/tests/test_maternal_health_helper_and_analysis_functions.py
+++ b/tests/test_maternal_health_helper_and_analysis_functions.py
@@ -252,15 +252,16 @@ def test_analysis_events_force_availability_of_consumables_when_scheduled_in_anc
     # Override the availability of the consumables within the health system- set to 0. If analysis was not running no
     # interventions requiring these consumable would run
     module = sim.modules['CareOfWomenDuringPregnancy']
+
     iron = module.item_codes_preg_consumables['iron_folic_acid']
     protein = module.item_codes_preg_consumables['balanced_energy_protein']
     calcium = module.item_codes_preg_consumables['calcium']
     syph_test = module.item_codes_preg_consumables['syphilis_test']
     syph_treat = module.item_codes_preg_consumables['syphilis_treatment']
 
-    for cons in iron, protein, calcium, syph_test, syph_treat:
-        sim.modules['HealthSystem'].override_availability_of_consumables(
-            {cons[0]: 0.0})
+    for cons in 'iron_folic_acid', 'balanced_energy_protein', 'calcium', 'syphilis_test', 'syphilis_treatment':
+        updated_cons = {k: v * 0 for (k, v) in module.item_codes_preg_consumables[cons].items()}
+        sim.modules['HealthSystem'].override_availability_of_consumables(updated_cons)
 
     # refresh the consumables
     sim.modules['HealthSystem'].consumables._refresh_availability_of_consumables(date=sim.date)

From ebd94959ec2a00e46c42ce6300f83053fe5c00aa Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Fri, 5 Apr 2024 15:49:53 +0100
Subject: [PATCH 039/230] linting

---
 src/tlo/methods/care_of_women_during_pregnancy.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py
index 124e8e6d2a..94ab6d7e49 100644
--- a/src/tlo/methods/care_of_women_during_pregnancy.py
+++ b/src/tlo/methods/care_of_women_during_pregnancy.py
@@ -1,7 +1,8 @@
 from pathlib import Path
 
-import pandas as pd
 import numpy as np
+import pandas as pd
+
 
 from tlo import DateOffset, Module, Parameter, Property, Types, logging
 from tlo.events import IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent

From ccdab822e68b11937eb75db721280380a120023a Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Fri, 5 Apr 2024 15:52:31 +0100
Subject: [PATCH 040/230] linting

---
 src/tlo/methods/care_of_women_during_pregnancy.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py
index 94ab6d7e49..d4eaee9bd1 100644
--- a/src/tlo/methods/care_of_women_during_pregnancy.py
+++ b/src/tlo/methods/care_of_women_during_pregnancy.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pandas as pd
 
-
 from tlo import DateOffset, Module, Parameter, Property, Types, logging
 from tlo.events import IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent
 from tlo.methods import Metadata, pregnancy_helper_functions

From 556d5b18b97610aa87188fe2c26cad2456b23bb4 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Fri, 5 Apr 2024 20:06:16 +0100
Subject: [PATCH 041/230] Update cost estimation method and net inflow
 estimation - to represent the new version of consumables costs following PR
 #1298

---
 src/scripts/costing/costing.py | 85 ++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 39 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 9994a44389..6c3c4392c3 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -137,76 +137,83 @@ def drop_outside_period(_df):
         counts_of_available[item] += num
 
 # Load consumables cost data
-unit_price_consumable = workbook_cost["consumables"][['Item_Code', 'Chosen_price_per_unit (USD)']]
+unit_price_consumable = workbook_cost["consumables"]
+unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
+unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
+unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
 unit_price_consumable = unit_price_consumable.set_index('Item_Code').to_dict(orient='index')
 
 # Multiply number of items needed by cost of consumable
-cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Chosen_price_per_unit (USD)'] *
+cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
                                                 counts_of_available[key] for key in unit_price_consumable)))
 total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
 
 # Cost of consumables stocked
 # Estimate the stock to dispensed ratio from OpenLMIS data
 lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
-# Collapse by item_code
-lmis_consumable_usage_by_item = lmis_consumable_usage.groupby(['item_code'])[['closing_bal', 'dispensed']].sum()
-lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] = lmis_consumable_usage_by_item['closing_bal']/lmis_consumable_usage_by_item['dispensed']
-# Trim top and bottom 5 percentile value for stock_to_dispensed_ratio
-percentile_5 = lmis_consumable_usage_by_item['stock_to_dispensed_ratio'].quantile(0.05)
-percentile_95 = lmis_consumable_usage_by_item['stock_to_dispensed_ratio'].quantile(0.95)
-lmis_consumable_usage_by_item.loc[lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] > percentile_95, 'stock_to_dispensed_ratio'] = percentile_95
-lmis_consumable_usage_by_item.loc[lmis_consumable_usage_by_item['stock_to_dispensed_ratio'] < percentile_5, 'stock_to_dispensed_ratio'] = percentile_5
-lmis_stock_to_dispensed_ratio_by_item = lmis_consumable_usage_by_item['stock_to_dispensed_ratio']
-lmis_stock_to_dispensed_ratio_by_item.to_dict()
-average_stock_to_dispensed_ratio = lmis_stock_to_dispensed_ratio_by_item.mean()
+# Collapse individual facilities
+lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
+df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
+df = df.loc[df.index.get_level_values('month') != "Aggregate"]
+opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
+                      df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
+                      df.loc[df.index.get_level_values('month') == 'January', 'received']
+closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
+total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
+                                         opening_bal_january.reset_index(level='month', drop=True) -\
+                                         closing_bal_december.reset_index(level='month', drop=True)
+total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
+inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
+inflow_to_outflow_ratio.to_dict()
+
+# Edit outlier ratios
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
+average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean()
+#inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio.isna()] = average_inflow_to_outflow_ratio_ratio # replace missing with average
 
 # Multiply number of items needed by cost of consumable
-cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Chosen_price_per_unit (USD)'] *
+inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
+# TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
+cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
                                                 counts_of_available[key] *
-                                                lmis_stock_to_dispensed_ratio_by_item.get(key, average_stock_to_dispensed_ratio)
+                                                inflow_to_outflow_ratio_by_consumable.get(key, average_inflow_to_outflow_ratio_ratio)
                                                 for key in counts_of_available)))
 total_cost_of_consumables_stocked = sum(value for value in cost_of_consumables_stocked.values() if not np.isnan(value))
 
 scenario_cost_financial['Consumables'] = total_cost_of_consumables_stocked
 
-# Explore the ratio of dispensed drugs to drug stock
-####################################################
-# Collapse monthly data
-lmis_consumable_usage_by_district_and_level = lmis_consumable_usage.groupby(['district', 'fac_type_tlo','category', 'item_code'])[['closing_bal', 'dispensed']].sum()
-lmis_consumable_usage_by_district_and_level.reset_index()
-lmis_consumable_usage_by_district_and_level['stock_to_dispensed_ratio'] = lmis_consumable_usage_by_district_and_level['closing_bal']/lmis_consumable_usage_by_district_and_level['dispensed']
 
+# Explore the ratio of consumable inflows to outflows
+######################################################
 # TODO: Only consider the months for which original OpenLMIS data was available for closing_stock and dispensed
-def plot_stock_to_dispensed(_df, plot_var, groupby_var, outlier_percentile):
-    # Exclude the top x percentile (outliers) from the plot
-    percentile_excluded = _df[plot_var].quantile(outlier_percentile)
-    _df_without_outliers = _df[_df[plot_var] <= percentile_excluded]
+def plot_inflow_to_outflow_ratio(_dict, groupby_var):
+    # Convert Dict to dataframe
+    flattened_data = [(level1, level2, level3, level4, value) for (level1, level2, level3, level4), value in
+                      inflow_to_outflow_ratio.items()] # Flatten dictionary into a list of tuples
+    _df = pd.DataFrame(flattened_data, columns=['category', 'item_code', 'district', 'fac_type_tlo', 'inflow_to_outflow_ratio']) # Convert flattened data to DataFrame
 
     # Plot the bar plot
     plt.figure(figsize=(10, 6))
-    sns.barplot(data=_df_without_outliers, x=groupby_var, y=plot_var, ci=None)
+    sns.barplot(data=_df , x=groupby_var, y= 'inflow_to_outflow_ratio', errorbar=None)
 
     # Add points representing the distribution of individual values
-    sns.stripplot(data=_df_without_outliers, x=groupby_var, y=plot_var, color='black', size=5, alpha=0.2)
+    sns.stripplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', color='black', size=5, alpha=0.2)
 
     # Set labels and title
     plt.xlabel(groupby_var)
-    plt.ylabel('Stock to Dispensed Ratio')
-    plt.title('Average Stock to Dispensed Ratio by ' + f'{groupby_var}')
+    plt.ylabel('Inflow to Outflow Ratio')
+    plt.title('Average Inflow to Outflow Ratio by ' + f'{groupby_var}')
     plt.xticks(rotation=45)
 
     # Show plot
     plt.tight_layout()
-    plt.savefig(costing_outputs_folder / 'stock_to_dispensed_ratio_by' f'{groupby_var}' )
-
-plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
-                        'fac_type_tlo', 0.95)
-plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
-                        'district', 0.95)
-plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
-                        'category', 0.95)
-plot_stock_to_dispensed(lmis_consumable_usage_by_district_and_level, 'stock_to_dispensed_ratio',
-                        'item_code', 0.95)
+    plt.savefig(costing_outputs_folder / 'inflow_to_outflow_ratio_by' f'{groupby_var}' )
+
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'fac_type_tlo')
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'district')
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code')
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category')
 
 # Compare financial costs with actual budget data
 ####################################################

From ef2d344afe503027808906acdd38f40e230b477c Mon Sep 17 00:00:00 2001
From: tdm32 <t.mangal@imperial.ac.uk>
Date: Wed, 10 Apr 2024 09:17:48 +0100
Subject: [PATCH 042/230] change cotrimoxazole units to mg

---
 resources/costing/~$ResourceFile_Costing.xlsx |  3 +++
 src/tlo/methods/hiv.py                        | 12 ++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)
 create mode 100644 resources/costing/~$ResourceFile_Costing.xlsx

diff --git a/resources/costing/~$ResourceFile_Costing.xlsx b/resources/costing/~$ResourceFile_Costing.xlsx
new file mode 100644
index 0000000000..d5d6d83ccc
--- /dev/null
+++ b/resources/costing/~$ResourceFile_Costing.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:019349b15c524cfef4b39db4dd792de3376f4bc3da9b6b298a1fee07c4eb219e
+size 165
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 7377b93f51..e734892d2c 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -968,7 +968,7 @@ def initialise_simulation(self, sim):
 
         # First - line ART for adults(age > "ART_age_cutoff_older_child")
         # TDF/3TC/DTG 120/60/50mg, 1 tablet per day
-        # cotrim adult tablet, 1 tablet per day
+        # cotrim adult tablet, 1 tablet per day, units specified in mg * dispensation days
         self.item_codes_for_consumables_required['First-line ART regimen: adult'] = \
             hs.get_item_code_from_item_name("First-line ART regimen: adult")
         self.item_codes_for_consumables_required['First-line ART regimen: adult: cotrimoxazole'] = \
@@ -976,7 +976,7 @@ def initialise_simulation(self, sim):
 
         # ART for older children aged ("ART_age_cutoff_younger_child" < age <= "ART_age_cutoff_older_child"):
         # ABC/3TC/DTG 120/60/50mg, 3 tablets per day
-        # cotrim paediatric tablet, 4 tablets per day
+        # cotrim paediatric tablet, 4 tablets per day, units specified in mg * dispensation days
         self.item_codes_for_consumables_required['First line ART regimen: older child'] = \
             hs.get_item_code_from_item_name("First line ART regimen: older child")
         self.item_codes_for_consumables_required['First line ART regimen: older child: cotrimoxazole'] = \
@@ -984,7 +984,7 @@ def initialise_simulation(self, sim):
 
         # ART for younger children aged (age < "ART_age_cutoff_younger_child"):
         # ABC/3TC/DTG 120/60/10mg, 2 tablets per day
-        # cotrim paediatric tablet, 2 tablets per day
+        # cotrim paediatric tablet, 2 tablets per day, units specified in mg * dispensation days
         self.item_codes_for_consumables_required['First line ART regimen: young child'] = \
             hs.get_item_code_from_item_name("First line ART regimen: young child")
         self.item_codes_for_consumables_required['First line ART regimen: young child: cotrimoxazole'] = \
@@ -2643,7 +2643,7 @@ def get_drugs(self, age_of_person):
                 item_codes={self.module.item_codes_for_consumables_required[
                                 'First line ART regimen: young child']: dispensation_days * 2},
                 optional_item_codes={self.module.item_codes_for_consumables_required[
-                                         'First line ART regimen: young child: cotrimoxazole']: dispensation_days * 2},
+                                         'First line ART regimen: young child: cotrimoxazole']: dispensation_days * 240},
                 return_individual_results=True)
 
         elif age_of_person <= p["ART_age_cutoff_older_child"]:
@@ -2652,7 +2652,7 @@ def get_drugs(self, age_of_person):
                 item_codes={self.module.item_codes_for_consumables_required[
                                 'First line ART regimen: older child']: dispensation_days * 3},
                 optional_item_codes={self.module.item_codes_for_consumables_required[
-                    'First line ART regimen: older child: cotrimoxazole']: dispensation_days * 4},
+                    'First line ART regimen: older child: cotrimoxazole']: dispensation_days * 480},
                 return_individual_results=True)
 
         else:
@@ -2661,7 +2661,7 @@ def get_drugs(self, age_of_person):
                 item_codes={self.module.item_codes_for_consumables_required[
                                 'First-line ART regimen: adult']: dispensation_days},
                 optional_item_codes={self.module.item_codes_for_consumables_required[
-                    'First-line ART regimen: adult: cotrimoxazole']: dispensation_days},
+                    'First-line ART regimen: adult: cotrimoxazole']: dispensation_days * 960},
                 return_individual_results=True)
 
         # add drug names to dict

From 2f11f243558b3cd8450fbf7796f6ad79cedc550e Mon Sep 17 00:00:00 2001
From: tdm32 <t.mangal@imperial.ac.uk>
Date: Wed, 10 Apr 2024 09:23:59 +0100
Subject: [PATCH 043/230] merge in updated unit costs 3hp added and linked to
 consumables dataset

---
 src/tlo/methods/tb.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 59f360780f..9bf68d6a5e 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -769,7 +769,7 @@ def get_consumables_for_dx_and_tx(self):
         self.item_codes_for_consumables_required['tb_isoniazid'] = \
             hs.get_item_code_from_item_name("Isoniazid/Pyridoxine, tablet 300 mg")
 
-        # todo not yet on consumables database
+        # 3hp
         self.item_codes_for_consumables_required['tb_3HP'] = {
             hs.get_item_code_from_item_name("Isoniazid/Rifapentine"): 1}
 
@@ -2489,7 +2489,6 @@ def apply(self, person_id, squeeze_factor):
                     item_codes={self.module.item_codes_for_consumables_required["tb_ipt"]: 180})
 
             # for all others
-            # todo check 3HP listed in database
             else:
                 # 12 weeks dispensation, once weekly
                 drugs_available = self.get_consumables(

From be0ba95b109060e37a8eb64234f0ee253046e761 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Wed, 10 Apr 2024 17:43:05 +0100
Subject: [PATCH 044/230] create framework for costing equipment - TODOs to be
 completed once log file is ready

---
 resources/costing/ResourceFile_Costing.xlsx |  4 ++--
 src/scripts/costing/costing.py              | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 840fdbca8f..637a967851 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:070cc08625170fab8e11198a350822b427c84add24444b6293bc830102c9b612
-size 1007683
+oid sha256:24bbca255afe21a7727be0a6c7d76d77199953f179c00ccee638dbbec9b1dd89
+size 3719568
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 6c3c4392c3..70190d0b91 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -183,6 +183,25 @@ def drop_outside_period(_df):
 
 scenario_cost_financial['Consumables'] = total_cost_of_consumables_stocked
 
+# 3. Equipment cost
+# Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it been used in the simulation
+unit_cost_equipment = workbook_cost["equipment"]
+unit_cost_equipment =   unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
+# Calculate necessary costs based on HSSP-III assumptions
+unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
+unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
+unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
+unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.1 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 10% of the items over 8 years
+
+# TODO From the log, extract the facility IDs which use any equipment item
+# TODO Collapse facility IDs by level of care to get the total number of facilities at each level using an item
+# TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)
+# TODO Multiply quantity needed with cost per item (this is the repair, replacement, and maintenance cost)
+# TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
+
+
+# 4. Facility running costs
+# Average running costs by facility level and district times the number of facilities in the simulation
 
 # Explore the ratio of consumable inflows to outflows
 ######################################################

From 48104b0e889bd7a122eadecec734f026b7a78c4b Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 11:14:18 +0100
Subject: [PATCH 045/230] added dosage for amitriptyline antidepressant
 treatment

---
 src/tlo/methods/depression.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/depression.py b/src/tlo/methods/depression.py
index 8f5fd9661c..7feb49921c 100644
--- a/src/tlo/methods/depression.py
+++ b/src/tlo/methods/depression.py
@@ -830,9 +830,10 @@ def apply(self, person_id, squeeze_factor):
                                                                  "receiving an HSI. "
 
         # Check availability of antidepressant medication
-        item_code = self.module.parameters['anti_depressant_medication_item_code']
+        # Dose is 25mg daily, patient provided with month supply - 25mg x 30.437 (days) = 761mg per month
+        item_code_with_dose = {self.module.parameters['anti_depressant_medication_item_code']: 761}
 
-        if self.get_consumables(item_codes=item_code):
+        if self.get_consumables(item_codes=item_code_with_dose):
             # If medication is available, flag as being on antidepressants
             df.at[person_id, 'de_on_antidepr'] = True
 
@@ -873,7 +874,10 @@ def apply(self, person_id, squeeze_factor):
             return self.sim.modules['HealthSystem'].get_blank_appt_footprint()
 
         # Check availability of antidepressant medication
-        if self.get_consumables(self.module.parameters['anti_depressant_medication_item_code']):
+        # Dose is 25mg daily, patient provided with month supply - 25mg x 30.437 (days) = 761mg per month
+        item_code_with_dose = {self.module.parameters['anti_depressant_medication_item_code']: 761}
+
+        if self.get_consumables(item_codes=item_code_with_dose):
             # Schedule their next HSI for a refill of medication, one month from now
             self.sim.modules['HealthSystem'].schedule_hsi_event(
                 hsi_event=HSI_Depression_Refill_Antidepressant(person_id=person_id, module=self.module),

From c11b707a00f297a33504725a175e51497ca385fd Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 12:01:21 +0100
Subject: [PATCH 046/230] added dosage for epilepsy treatment

---
 src/tlo/methods/epilepsy.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/tlo/methods/epilepsy.py b/src/tlo/methods/epilepsy.py
index f06c62d098..aa11d313ed 100644
--- a/src/tlo/methods/epilepsy.py
+++ b/src/tlo/methods/epilepsy.py
@@ -600,7 +600,14 @@ def apply(self, person_id, squeeze_factor):
 
         if best_available_medicine is not None:
             # Request the medicine from the health system
-            self.get_consumables(self.module.item_codes[best_available_medicine])
+
+            dose = {'phenobarbitone': 9131,  # 100mg per day - 3 months
+                    'carbamazepine': 91_311,  # 1000mg per day - 3 months
+                    'phenytoin': 27_393}  # 300mg per day - 3 months
+
+            self.get_consumables({self.module.item_codes[best_available_medicine]:
+                                  dose[best_available_medicine]})
+
             # Update this person's properties to show that they are currently on medication
             df.at[person_id, 'ep_antiep'] = True
 
@@ -651,18 +658,30 @@ def apply(self, person_id, squeeze_factor):
         # Request the medicine
         best_available_medicine = self.module.get_best_available_medicine(self)
         if best_available_medicine is not None:
+
+            # Schedule a reoccurrence of this follow-up in 3 months if ep_seiz_stat == '3',
+            # else, schedule this reoccurrence of it in 1 year (i.e., if ep_seiz_stat == '2'
+            if df.at[person_id, 'ep_seiz_stat'] == '3':
+                fu_mnths = 3
+            else:
+                fu_mnths = 12
+
             # The medicine is available, so request it
-            self.get_consumables(self.module.item_codes[best_available_medicine])
+            dose = {'phenobarbitone_3_mnths': 9131, 'phenobarbitone_12_mnths': 36_525,  # 100mg per day - 3/12 months
+                    'carbamazepine_3_mnths': 91_311, 'carbamazepine_12_mnths': 365_250,  # 1000mg per day - 3/12 months
+                    'phenytoin_3_mnths': 27_393,  'phenytoin_12_mnths': 109_575}  # 300mg per day - 3/12 months
+
+            self.get_consumables({self.module.item_codes[best_available_medicine]:
+                                  dose[f'{best_available_medicine}_{fu_mnths}_mnths']})
 
             # Reset counter of "failed attempts" and put the appointment for the next occurrence to the usual
             self._counter_of_failed_attempts_due_to_unavailable_medicines = 0
             self.EXPECTED_APPT_FOOTPRINT = self._DEFAULT_APPT_FOOTPRINT
 
-            # Schedule a reoccurrence of this follow-up in 3 months if ep_seiz_stat == '3',
-            # else, schedule this reoccurrence of it in 1 year (i.e., if ep_seiz_stat == '2')
+            # Schedule follow-up
             hs.schedule_hsi_event(
                 hsi_event=self,
-                topen=self.sim.date + DateOffset(months=3 if df.at[person_id, 'ep_seiz_stat'] == '3' else 12),
+                topen=self.sim.date + DateOffset(months=fu_mnths),
                 tclose=None,
                 priority=0
             )

From 0ac40bb5c8ac82457d8fa31d2074498e76581530 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 12:37:37 +0100
Subject: [PATCH 047/230] fix error in postnatal_supervisor.py leading to
 failing test

---
 src/tlo/methods/postnatal_supervisor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py
index 6303763b3d..df372c0054 100644
--- a/src/tlo/methods/postnatal_supervisor.py
+++ b/src/tlo/methods/postnatal_supervisor.py
@@ -1277,7 +1277,7 @@ def apply(self, person_id, squeeze_factor):
         ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
         of_repair_cons = \
-            {ic('Scalpel blade size 22 (individually wrapped)_100_CMS'): 1,
+            {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
              ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
              ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
              ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose

From 3535a583042c32af34e919fcd5e31f024e192d9b Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 13:53:07 +0100
Subject: [PATCH 048/230] units added for care_of_women_during_pregnancy.py

---
 .../methods/care_of_women_during_pregnancy.py | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py
index d4eaee9bd1..a39a1d7027 100644
--- a/src/tlo/methods/care_of_women_during_pregnancy.py
+++ b/src/tlo/methods/care_of_women_during_pregnancy.py
@@ -222,15 +222,15 @@ def get_and_store_pregnancy_item_codes(self):
 
         # -------------------------------------------- ECTOPIC PREGNANCY ---------------------------------------------
         self.item_codes_preg_consumables['ectopic_pregnancy_core'] = \
-            {ic('Halothane (fluothane)_250ml_CMST'): 1}  # TODO: dose
+            {ic('Halothane (fluothane)_250ml_CMST'): 100}
 
         self.item_codes_preg_consumables['ectopic_pregnancy_optional'] = \
             {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
              ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,  # todo: dose
+             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 6,
              ic('Suture pack'): 1,
-             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 30,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Disposables gloves, powder free, 100 pieces per box'): 1,
@@ -238,13 +238,13 @@ def get_and_store_pregnancy_item_codes(self):
 
         # ------------------------------------------- POST ABORTION CARE - GENERAL  -----------------------------------
         self.item_codes_preg_consumables['post_abortion_care_core'] = \
-            {ic('Misoprostol, tablet, 200 mcg'): 1}  # TODO: dose
+            {ic('Misoprostol, tablet, 200 mcg'): 600}
 
         self.item_codes_preg_consumables['post_abortion_care_optional'] = \
             {ic('Complete blood count'): 1,
              ic('Blood collecting tube, 5 ml'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 30,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Disposables gloves, powder free, 100 pieces per box'): 1,
@@ -252,8 +252,8 @@ def get_and_store_pregnancy_item_codes(self):
 
         # ------------------------------------------- POST ABORTION CARE - SEPSIS -------------------------------------
         self.item_codes_preg_consumables['post_abortion_care_sepsis_core'] = \
-            {ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 1,  # TODO: dose
-             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 1,  # TODO: dose
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 8,
+             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 6,
              }
 
         self.item_codes_preg_consumables['post_abortion_care_sepsis_optional'] = \
@@ -261,13 +261,13 @@ def get_and_store_pregnancy_item_codes(self):
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Disposables gloves, powder free, 100 pieces per box'): 1,
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # TODO: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 23_040,
              }
 
         # ------------------------------------------- POST ABORTION CARE - SHOCK ------------------------------------
         self.item_codes_preg_consumables['post_abortion_care_shock'] = \
             {ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # TODO: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 23_040,
              }
 
         self.item_codes_preg_consumables['post_abortion_care_shock_optional'] = \
@@ -308,7 +308,7 @@ def get_and_store_pregnancy_item_codes(self):
 
         # ------------------------------------------- SYPHILIS TREATMENT ----------------------------------------------
         self.item_codes_preg_consumables['syphilis_treatment'] =\
-            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 8}
 
         # ----------------------------------------------- GDM TEST ----------------------------------------------------
         self.item_codes_preg_consumables['gdm_test'] = {ic('Blood glucose level test'): 1}
@@ -333,13 +333,11 @@ def get_and_store_pregnancy_item_codes(self):
 
         # ---------------------------------------- MANAGEMENT OF ECLAMPSIA --------------------------------------------
         self.item_codes_preg_consumables['eclampsia_management_optional'] = \
-            {ic('Misoprostol, tablet, 200 mcg'): 1,  # todo: dose
-             ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1,  # todo: dose
-             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+            {ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Disposables gloves, powder free, 100 pieces per box'): 1,
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 23_040,
              ic('Complete blood count'): 1,
              ic('Blood collecting tube, 5 ml'): 1,
              ic('Foley catheter'): 1,
@@ -348,7 +346,7 @@ def get_and_store_pregnancy_item_codes(self):
 
         # -------------------------------------- ANTIBIOTICS FOR PROM ------------------------------------------------
         self.item_codes_preg_consumables['abx_for_prom'] = \
-            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}  # todo: dose
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 8}
 
         # ----------------------------------- ORAL DIABETIC MANAGEMENT -----------------------------------------------
         # Dose changes at run time

From ff4e29a74f5187c722be6e9dc2d1cf8456a18876 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 13:53:55 +0100
Subject: [PATCH 049/230] units added for care_of_women_during_pregnancy.py

---
 src/tlo/methods/care_of_women_during_pregnancy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py
index a39a1d7027..74b14e766a 100644
--- a/src/tlo/methods/care_of_women_during_pregnancy.py
+++ b/src/tlo/methods/care_of_women_during_pregnancy.py
@@ -308,7 +308,7 @@ def get_and_store_pregnancy_item_codes(self):
 
         # ------------------------------------------- SYPHILIS TREATMENT ----------------------------------------------
         self.item_codes_preg_consumables['syphilis_treatment'] =\
-            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 8}
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}
 
         # ----------------------------------------------- GDM TEST ----------------------------------------------------
         self.item_codes_preg_consumables['gdm_test'] = {ic('Blood glucose level test'): 1}

From 26a7025668df8a68546f83ba7a33f4a590bbfb5d Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 14:19:19 +0100
Subject: [PATCH 050/230] units added for labour.py

---
 src/tlo/methods/labour.py | 60 +++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py
index 2635a3b98b..80ea2a6ebe 100644
--- a/src/tlo/methods/labour.py
+++ b/src/tlo/methods/labour.py
@@ -703,11 +703,11 @@ def get_and_store_labour_item_codes(self):
         # assuming CDK has blade, soap, cord tie
         self.item_codes_lab_consumables['delivery_core'] = \
             {ic('Clean delivery kit'): 1,
-             ic('Chlorhexidine 1.5% solution_5_CMST'): 1,  # todo: dose
+             ic('Chlorhexidine 1.5% solution_5_CMST'): 20,
              }
 
         self.item_codes_lab_consumables['delivery_optional'] = \
-            {ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+            {ic('Gauze, absorbent 90cm x 40m_each_CMST'): 30,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Disposables gloves, powder free, 100 pieces per box'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000
@@ -715,9 +715,9 @@ def get_and_store_labour_item_codes(self):
 
         # -------------------------------------------- CAESAREAN DELIVERY ------------------------------------------
         self.item_codes_lab_consumables['caesarean_delivery_core'] = \
-            {ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
-             ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
-             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
+            {ic('Halothane (fluothane)_250ml_CMST'): 100,
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 2,
+             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: replace
              }
 
         self.item_codes_lab_consumables['caesarean_delivery_optional'] = \
@@ -729,15 +729,15 @@ def get_and_store_labour_item_codes(self):
              ic('Foley catheter'): 1,
              ic('Bag, urine, collecting, 2000 ml'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Declofenac injection_each_CMST'): 1,   # todo: dose
+             ic('Declofenac injection_1_CMST'): 2,
              ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
              }
 
         # -------------------------------------------- OBSTETRIC SURGERY ----------------------------------------------
         self.item_codes_lab_consumables['obstetric_surgery_core'] = \
-            {ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
-             ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
-             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
+            {ic('Halothane (fluothane)_250ml_CMST'): 100,
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 2,
+             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: replace
              }
 
         self.item_codes_lab_consumables['obstetric_surgery_optional'] = \
@@ -749,39 +749,37 @@ def get_and_store_labour_item_codes(self):
              ic('Foley catheter'): 1,
              ic('Bag, urine, collecting, 2000 ml'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Declofenac injection_each_CMST'): 1,  # todo: dose
+             ic('Declofenac injection_each_CMST'): 2,
              ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
              }
 
         # -------------------------------------------- ABX FOR PROM -------------------------------------------------
         self.item_codes_lab_consumables['abx_for_prom'] = \
-            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 1}  # todo: dose
+            {ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 8}
 
         # -------------------------------------------- ANTENATAL STEROIDS ---------------------------------------------
 
         self.item_codes_lab_consumables['antenatal_steroids'] = \
-            {ic('Dexamethasone 5mg/ml, 5ml_each_CMST'): 1}  # todo: dose
+            {ic('Dexamethasone 5mg/ml, 5ml_each_CMST'): 12}
 
         # -------------------------------------  INTRAVENOUS ANTIHYPERTENSIVES ---------------------------------------
         self.item_codes_lab_consumables['iv_antihypertensives'] = \
-            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}  # todo: dose
+            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}
 
         # --------------------------------------- ORAL ANTIHYPERTENSIVES ---------------------------------------------
         self.item_codes_lab_consumables['oral_antihypertensives'] = \
-            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}  # todo: dose
+            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}
 
         # ----------------------------------  SEVERE PRE-ECLAMPSIA/ECLAMPSIA  -----------------------------------------
         self.item_codes_lab_consumables['magnesium_sulfate'] = \
             {ic('Magnesium sulfate, injection, 500 mg/ml in 10-ml ampoule'): 2}
 
         self.item_codes_lab_consumables['eclampsia_management_optional'] = \
-            {ic('Misoprostol, tablet, 200 mcg'): 1,  # todo: dose
-             ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1,  # todo: dose
-             ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
+            {ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Disposables gloves, powder free, 100 pieces per box'): 1,
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 23_040,
              ic('Complete blood count'): 1,
              ic('Blood collecting tube, 5 ml'): 1,
              ic('Foley catheter'): 1,
@@ -789,9 +787,9 @@ def get_and_store_labour_item_codes(self):
              }
         # -------------------------------------  OBSTRUCTED LABOUR  ---------------------------------------------------
         self.item_codes_lab_consumables['obstructed_labour'] = \
-            {ic('Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml'): 1,  # todo: dose
-             ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 1,    # todo: dose
-             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 1,  # todo: dose
+            {ic('Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml'): 1,
+             ic('Benzathine benzylpenicillin, powder for injection, 2.4 million IU'): 8,
+             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 6,
              ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
@@ -801,8 +799,8 @@ def get_and_store_labour_item_codes(self):
              ic('Foley catheter'): 1,
              ic('Bag, urine, collecting, 2000 ml'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,  # todo: dose
-             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 1,
+             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 6,
+             ic('Gauze, absorbent 90cm x 40m_each_CMST'): 30,
              ic('Suture pack'): 1,
              }
         # -------------------------------------  OBSTETRIC VACUUM   ---------------------------------------------------
@@ -810,13 +808,13 @@ def get_and_store_labour_item_codes(self):
 
         # -------------------------------------  MATERNAL SEPSIS  -----------------------------------------------------
         self.item_codes_lab_consumables['maternal_sepsis_core'] = \
-            {ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 1,  # todo: dose
-             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 1,  # todo: dose
+            {ic('Benzylpenicillin 3g (5MU), PFR_each_CMST'): 8,
+             ic('Gentamycin, injection, 40 mg/ml in 2 ml vial'): 6,
              }
 
         self.item_codes_lab_consumables['maternal_sepsis_optional'] = \
             {ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 23_040,
              ic('Paracetamol, tablet, 500 mg'): 8000,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Foley catheter'): 1,
@@ -825,16 +823,16 @@ def get_and_store_labour_item_codes(self):
              ic('Complete blood count'): 1,
              }
         # -------------------------------------  ACTIVE MANAGEMENT THIRD STAGE  ---------------------------------------
-        self.item_codes_lab_consumables['amtsl'] = {ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1}  # todo: dose
+        self.item_codes_lab_consumables['amtsl'] = {ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1}
 
         # -------------------------------------  POSTPARTUM HAEMORRHAGE  ---------------------------------------
         self.item_codes_lab_consumables['pph_core'] = \
-            {ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 1}  # todo: dose
+            {ic('Oxytocin, injection, 10 IU in 1 ml ampoule'): 5}
 
         self.item_codes_lab_consumables['pph_optional'] = \
-            {ic('Misoprostol, tablet, 200 mcg'): 1,  # todo: dose
-             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,  # todo: dose
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,  # todo: dose
+            {ic('Misoprostol, tablet, 200 mcg'): 600,
+             ic('Pethidine, 50 mg/ml, 2 ml ampoule'): 6,
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 23_040,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Foley catheter'): 1,

From 789bf215ed41a7cfc16f694e38d80c0d6eafa511 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Thu, 11 Apr 2024 14:35:47 +0100
Subject: [PATCH 051/230] fix failing test. initial doses for newborn
 outcomes/postnatal supervisor

---
 src/tlo/methods/labour.py               | 2 +-
 src/tlo/methods/newborn_outcomes.py     | 6 +++---
 src/tlo/methods/postnatal_supervisor.py | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py
index 80ea2a6ebe..f636fb3dbc 100644
--- a/src/tlo/methods/labour.py
+++ b/src/tlo/methods/labour.py
@@ -729,7 +729,7 @@ def get_and_store_labour_item_codes(self):
              ic('Foley catheter'): 1,
              ic('Bag, urine, collecting, 2000 ml'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Declofenac injection_1_CMST'): 2,
+             ic('Declofenac injection_each_CMST'): 2,
              ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
              }
 
diff --git a/src/tlo/methods/newborn_outcomes.py b/src/tlo/methods/newborn_outcomes.py
index f6fb49650e..7c88fae1db 100644
--- a/src/tlo/methods/newborn_outcomes.py
+++ b/src/tlo/methods/newborn_outcomes.py
@@ -396,17 +396,17 @@ def get_and_store_newborn_item_codes(self):
 
         # -------------------------------------------- VITAMIN K ------------------------------------------
         self.item_codes_nb_consumables['vitamin_k'] = \
-            {ic('vitamin K1  (phytomenadione) 1 mg/ml, 1 ml, inj._100_IDA'): 1}  # todo: dose
+            {ic('vitamin K1  (phytomenadione) 1 mg/ml, 1 ml, inj._100_IDA'): 1}
 
         # -------------------------------------------- EYE CARE  ------------------------------------------
         self.item_codes_nb_consumables['eye_care'] = \
-            {ic('Tetracycline eye ointment, 1 %, tube 5 mg'): 1}  # todo: dose
+            {ic('Tetracycline eye ointment, 1 %, tube 5 mg'): 5}
 
         # ------------------------------------- SEPSIS - FULL SUPPORTIVE CARE ---------------------------------------
         self.item_codes_nb_consumables['sepsis_supportive_care_core'] = \
             {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,  # todo: dose
              ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,  # todo: dose
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 1  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 5760  # todo: dose
              }
 
         self.item_codes_nb_consumables['sepsis_supportive_care_optional'] = \
diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py
index df372c0054..d91fc2073b 100644
--- a/src/tlo/methods/postnatal_supervisor.py
+++ b/src/tlo/methods/postnatal_supervisor.py
@@ -1278,8 +1278,8 @@ def apply(self, person_id, squeeze_factor):
 
         of_repair_cons = \
             {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
-             ic('Halothane (fluothane)_250ml_CMST'): 1,  # todo: dose
-             ic('Ceftriaxone 1g, PFR_each_CMST'): 1,  # todo: dose
+             ic('Halothane (fluothane)_250ml_CMST'): 100,  # todo: dose
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 2,  # todo: dose
              ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
@@ -1289,7 +1289,7 @@ def apply(self, person_id, squeeze_factor):
              ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
              ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
-             ic('Chlorhexidine 1.5% solution_5_CMST'): 1,  # todo: dose
+             ic('Chlorhexidine 1.5% solution_5_CMST'): 50,  # todo: dose
              }
 
         self.get_consumables(item_codes=of_repair_cons)

From 1e7756442c133912b50d7519712c55b1eeb670f4 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Wed, 17 Apr 2024 15:48:48 +0100
Subject: [PATCH 052/230] fix failing test. initial doses for newborn
 outcomes/postnatal supervisor

---
 src/tlo/methods/newborn_outcomes.py     |  2 +-
 src/tlo/methods/postnatal_supervisor.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tlo/methods/newborn_outcomes.py b/src/tlo/methods/newborn_outcomes.py
index 7c88fae1db..fe501be12f 100644
--- a/src/tlo/methods/newborn_outcomes.py
+++ b/src/tlo/methods/newborn_outcomes.py
@@ -406,7 +406,7 @@ def get_and_store_newborn_item_codes(self):
         self.item_codes_nb_consumables['sepsis_supportive_care_core'] = \
             {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,  # todo: dose
              ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,  # todo: dose
-             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 5760  # todo: dose
+             ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 5760  #
              }
 
         self.item_codes_nb_consumables['sepsis_supportive_care_optional'] = \
diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py
index d91fc2073b..18631dfed0 100644
--- a/src/tlo/methods/postnatal_supervisor.py
+++ b/src/tlo/methods/postnatal_supervisor.py
@@ -1278,18 +1278,18 @@ def apply(self, person_id, squeeze_factor):
 
         of_repair_cons = \
             {ic('Scalpel blade size 22 (individually wrapped)_100_CMST'): 1,
-             ic('Halothane (fluothane)_250ml_CMST'): 100,  # todo: dose
-             ic('Ceftriaxone 1g, PFR_each_CMST'): 2,  # todo: dose
-             ic('Metronidazole 200mg_1000_CMST'): 1,  # todo: dose
+             ic('Halothane (fluothane)_250ml_CMST'): 100,
+             ic('Ceftriaxone 1g, PFR_each_CMST'): 2,
+             ic('Metronidazole 200mg_1000_CMST'): 6000,
              ic('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
              ic('Paracetamol, tablet, 500 mg'): 8000,
-             ic('Declofenac injection_each_CMST'): 1,  # todo: dose
+             ic('Declofenac injection_each_CMST'): 1,
              ic('Foley catheter'): 1,
              ic('Bag, urine, collecting, 2000 ml'): 1,
              ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
              ic('Sodium chloride, injectable solution, 0,9 %, 500 ml'): 2000,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
-             ic('Chlorhexidine 1.5% solution_5_CMST'): 50,  # todo: dose
+             ic('Chlorhexidine 1.5% solution_5_CMST'): 50,
              }
 
         self.get_consumables(item_codes=of_repair_cons)

From 06cec427fa2be4f835f9cb256ff6092013a79e95 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 18 Apr 2024 13:15:01 +0100
Subject: [PATCH 053/230] delete temporary ~ file created

---
 resources/costing/~$ResourceFile_Costing.xlsx | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 resources/costing/~$ResourceFile_Costing.xlsx

diff --git a/resources/costing/~$ResourceFile_Costing.xlsx b/resources/costing/~$ResourceFile_Costing.xlsx
deleted file mode 100644
index d5d6d83ccc..0000000000
--- a/resources/costing/~$ResourceFile_Costing.xlsx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:019349b15c524cfef4b39db4dd792de3376f4bc3da9b6b298a1fee07c4eb219e
-size 165

From 9821cfc2bfa5d86597d7b74e1984c74b50b6017e Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 18 Apr 2024 14:14:33 +0100
Subject: [PATCH 054/230] update units of safety box to "1 disposed syringe
 (100 syringes per box)"

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 840fdbca8f..9f06132aaa 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:070cc08625170fab8e11198a350822b427c84add24444b6293bc830102c9b612
-size 1007683
+oid sha256:2afa3649672e10c9741b26dc70fa0f4496af4fccfafbf8b7b70f3b90b291a4fb
+size 1007463

From 4e6a20f764ad3676e9d0299cb7e04d2395cbb2b9 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 23 Apr 2024 09:38:43 +0100
Subject: [PATCH 055/230] add table describing costing method to RF

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 637a967851..50dd554901 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24bbca255afe21a7727be0a6c7d76d77199953f179c00ccee638dbbec9b1dd89
-size 3719568
+oid sha256:a22ccaf4e7c348ed36c6c165ac1b784b1e8511c006f7d7d87d5048fea2d2c4d6
+size 3667205

From a5c3d72bd4fb2fc911563f9a62e4968941e9fba6 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Fri, 26 Apr 2024 15:54:07 +0100
Subject: [PATCH 056/230] update cost data for HR

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 50dd554901..0e4d3617c6 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a22ccaf4e7c348ed36c6c165ac1b784b1e8511c006f7d7d87d5048fea2d2c4d6
-size 3667205
+oid sha256:2f9a0eaa0d60ccc2f4a3d221c46abf7f878538551ef1c1e7f1e55a379ab009e7
+size 3912424

From 3636fc27c769c092340915dca6b45c4bb9887522 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Fri, 26 Apr 2024 16:19:58 +0100
Subject: [PATCH 057/230] minor updates to make script run

---
 src/scripts/costing/costing.py | 46 +++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 70190d0b91..10a40233ae 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -8,6 +8,7 @@
 import os
 
 import matplotlib.pyplot as plt
+from matplotlib.ticker import FuncFormatter
 import numpy as np
 import pandas as pd
 
@@ -32,7 +33,8 @@
 
 # define a pathway to the data folder (note: currently outside the TLO model directory)
 # remember to set working directory to TLOmodel/
-outputfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
+#outputfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
+outputfilepath = Path('./outputs/tbh03@ic.ac.uk')
 resourcefilepath = Path("./resources")
 path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
 costing_outputs_folder = Path('./outputs/costing')
@@ -47,7 +49,9 @@ def drop_outside_period(_df):
 
 # %% Gathering basic information
 # Find results_folder associated with a given batch_file and get most recent
-results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
+#results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
+results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
+#results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 
 # look at one log (so can decide what to extract)
 log = load_pickled_dataframes(results_folder)
@@ -63,6 +67,7 @@ def drop_outside_period(_df):
                                     sheet_name = None)
 
 # 1. HR cost
+# TODO apply attrition rate to the cost calculation  https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9237893/#SP1
 # 1.1 HR Cost - Financial (Given the staff available)
 # Load annual salary by officer type and facility level
 hr_annual_salary = workbook_cost["human_resources"]
@@ -101,12 +106,18 @@ def drop_outside_period(_df):
 used_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
 
 # Calculate salary cost for modelled health workforce (Staff count X Annual salary)
+salary_for_all_staff = pd.merge(current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'], how = "left")
+salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Salary_USD'] * salary_for_all_staff['Staff_Count']
+
+# Calculate salary cost for current total staff
 salary_for_modelled_staff = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'], how = "left")
 salary_for_modelled_staff['Total_salary_by_cadre_and_level'] = salary_for_modelled_staff['Salary_USD'] * salary_for_modelled_staff['Staff_Count']
 
+
 # Create a dataframe to store financial costs
-scenario_cost_financial = pd.DataFrame({'HR': salary_for_modelled_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+scenario_cost_financial = pd.DataFrame({'HR': salary_for_all_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
 # 1.2 HR Cost - Economic (Staff needed for interventions delivered in the simulation)
 # For HR required, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility level
@@ -128,14 +139,17 @@ def drop_outside_period(_df):
 
 # 2. Consumables cost
 # 2.1 Consumables cost - Financial (What needs to be purchased given what is dispensed)
-_df = log['tlo.methods.healthsystem']['Consumables']
+_df = log['tlo.methods.healthsystem.summary']['Consumables']
 
 counts_of_available = defaultdict(int)
 counts_of_not_available = defaultdict(int)
 for _, row in _df.iterrows():
-    for item, num in eval(row['Item_Available']).items():
+    for item, num in row['Item_Available'].items(): # if using 'tlo.methods.healthsystem' eval(row['Item_Available'])
         counts_of_available[item] += num
 
+counts_of_available = defaultdict(int, {int(key): value for key, value in counts_of_available.items()}) # Convert string keys to integer
+# for consistency with other dictionaries
+
 # Load consumables cost data
 unit_price_consumable = workbook_cost["consumables"]
 unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
@@ -164,7 +178,6 @@ def drop_outside_period(_df):
                                          closing_bal_december.reset_index(level='month', drop=True)
 total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
 inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
-inflow_to_outflow_ratio.to_dict()
 
 # Edit outlier ratios
 inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
@@ -174,6 +187,7 @@ def drop_outside_period(_df):
 
 # Multiply number of items needed by cost of consumable
 inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
+inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio_by_consumable.to_dict()
 # TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
 cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
                                                 counts_of_available[key] *
@@ -201,7 +215,7 @@ def drop_outside_period(_df):
 
 
 # 4. Facility running costs
-# Average running costs by facility level and district times the number of facilities in the simulation
+# Average running costs by facility level and district times the number of facilities  in the simulation
 
 # Explore the ratio of consumable inflows to outflows
 ######################################################
@@ -242,6 +256,7 @@ def plot_inflow_to_outflow_ratio(_dict, groupby_var):
 model_cost = [scenario_cost_financial['HR'][0], scenario_cost_financial['Consumables'][0]]
 labels = ['HR_salary', 'Consumables']
 
+plt.clf()
 plt.scatter(real_budget, model_cost)
 # Plot a line representing a 45-degree angle
 min_val = min(min(real_budget), min(model_cost))
@@ -254,7 +269,8 @@ def plot_inflow_to_outflow_ratio(_dict, groupby_var):
 plt.gca().yaxis.set_major_formatter(formatter)
 # Add labels for each point
 hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
-plotlabels = [hr_label, 'Consumables']
+consumables_label = 'Consumables ' + f'{round(model_cost[1] / real_budget[1], 5)}'
+plotlabels = [hr_label, consumables_label]
 for i, txt in enumerate(plotlabels):
     plt.text(real_budget[i], model_cost[i], txt, ha='right')
 
@@ -272,10 +288,14 @@ def plot_inflow_to_outflow_ratio(_dict, groupby_var):
 
 # Plot salary costs by cadre and facility level
 # Group by cadre and level
-total_salary_by_cadre = salary_df.groupby('Officer_Category')['Total_salary_by_cadre_and_level'].sum()
-total_salary_by_level = salary_df.groupby('Facility_Level')['Total_salary_by_cadre_and_level'].sum()
+salary_for_all_staff[['Officer_Type', 'Facility_Level']] = salary_for_all_staff['OfficerType_FacilityLevel'].str.split('|', expand=True)
+salary_for_all_staff['Officer_Type'] = salary_for_all_staff['Officer_Type'].str.replace('Officer_Type=', '')
+salary_for_all_staff['Facility_Level'] = salary_for_all_staff['Facility_Level'].str.replace('Facility_Level=', '')
+total_salary_by_cadre = salary_for_all_staff.groupby('Officer_Type')['Total_salary_by_cadre_and_level'].sum()
+total_salary_by_level = salary_for_all_staff.groupby('Facility_Level')['Total_salary_by_cadre_and_level'].sum()
 
 # Plot by cadre
+plt.clf()
 total_salary_by_cadre.plot(kind='bar')
 plt.xlabel('Officer_category')
 plt.ylabel('Total Salary')
@@ -283,17 +303,13 @@ def plot_inflow_to_outflow_ratio(_dict, groupby_var):
 plt.savefig(costing_outputs_folder /  'total_salary_by_cadre.png')
 
 # Plot by level
+plt.clf()
 total_salary_by_level.plot(kind='bar')
 plt.xlabel('Facility_Level')
 plt.ylabel('Total Salary')
 plt.title('Total Salary by Facility_Level')
 plt.savefig(costing_outputs_folder /  'total_salary_by_level.png')
 
-# Consumables
-log['tlo.methods.healthsystem']['Consumables']
-# Aggregate Items_Available by Treatment_ID
-# Multiply by the cost per item (need to check quantity)
-
 '''
 # Scratch pad
 

From 533fe320427a8490658b7fe7622fc11369fd279f Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Fri, 3 May 2024 19:57:47 +0100
Subject: [PATCH 058/230] update HR costing method and RF

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +-
 src/scripts/costing/costing.py              | 71 ++++++++++-----------
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 0e4d3617c6..7597747a8a 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f9a0eaa0d60ccc2f4a3d221c46abf7f878538551ef1c1e7f1e55a379ab009e7
-size 3912424
+oid sha256:5f189ab775ea3f6b4b00060a9f43c11595db71159c064397acca8d3237a0ee81
+size 3914776
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 10a40233ae..ba8a94ea01 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -70,7 +70,8 @@ def drop_outside_period(_df):
 # TODO apply attrition rate to the cost calculation  https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9237893/#SP1
 # 1.1 HR Cost - Financial (Given the staff available)
 # Load annual salary by officer type and facility level
-hr_annual_salary = workbook_cost["human_resources"]
+hr_cost_parameters = workbook_cost["human_resources"]
+hr_annual_salary = hr_cost_parameters[hr_cost_parameters['Parameter_name'] == 'salary_usd']
 hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str)
 
 # Load scenario staffing level
@@ -87,19 +88,30 @@ def drop_outside_period(_df):
     'Staff_Count'].sum().reset_index()
 
 # Check if any cadres were not utilised at particular levels of care in the simulation
-_df = log['tlo.methods.healthsystem']['Capacity']
-# Initialize a dictionary to store the sums
-cadres_used = {}
-# Iterate over the rows and sum values for each key
-for index, row in _df.iterrows():
-    for key, value in row['Frac_Time_Used_By_OfficerType'].items():
-        if key not in cadres_used:
-            cadres_used[key] = 0
-        cadres_used[key] += value
-
-# Store list of cadre-level combinations used in the simulation in a list
-cadres_used_df = pd.DataFrame(cadres_used.items(), columns=['Key', 'Sum'])
-list_of_cadre_and_level_combinations_used = cadres_used_df[cadres_used_df['Sum'] != 0]['Key']
+def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
+    """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
+    _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
+    _df.index.name = 'year'
+    return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
+
+extracted_results = extract_results(
+    Path(results_folder),
+    module='tlo.methods.healthsystem.summary',
+    key='Capacity_By_OfficerType_And_FacilityLevel',
+    custom_generate_series=expand_capacity_by_officer_type_and_facility_level,
+    do_scaling=False,
+)
+
+# Mean of results across the runs
+summarized_results = summarize(extracted_results, only_mean=True, collapse_columns=True)
+
+# Take mean across the entire simulation
+mean_across_simulation = summarized_results.groupby(['OfficerType', 'FacilityLevel']).mean()
+
+# Unstack to make it look like a nice table
+cadres_utilisation_rate = mean_across_simulation.reset_index(drop=False)
+cadres_utilisation_rate['OfficerType_FacilityLevel'] = 'Officer_Type=' + cadres_utilisation_rate['OfficerType'].astype(str) + '|Facility_Level=' + cadres_utilisation_rate['FacilityLevel'].astype(str)
+list_of_cadre_and_level_combinations_used = cadres_utilisation_rate[cadres_utilisation_rate['mean'] != 0]['OfficerType_FacilityLevel']
 
 # Subset scenario staffing level to only include cadre-level combinations used in the simulation
 current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + current_staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + current_staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
@@ -107,35 +119,22 @@ def drop_outside_period(_df):
 
 # Calculate salary cost for modelled health workforce (Staff count X Annual salary)
 salary_for_all_staff = pd.merge(current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Salary_USD'] * salary_for_all_staff['Staff_Count']
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
+salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']
 
 # Calculate salary cost for current total staff
 salary_for_modelled_staff = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_modelled_staff['Total_salary_by_cadre_and_level'] = salary_for_modelled_staff['Salary_USD'] * salary_for_modelled_staff['Staff_Count']
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
+salary_for_modelled_staff['Total_salary_by_cadre_and_level'] = salary_for_modelled_staff['Value'] * salary_for_modelled_staff['Staff_Count']
 
+# Other costs to maintain a fixed size of health workforce
+# TODO consider annual_preservice_training_cost_percapita_usd, annual_preservice_training_cost_percapita_usd, annual_attrition_rate, absorption_rate_of_students_into_public_workforce,
+# TODO consider proportion_of_workforce_recruited_from_abroad, recruitment_cost_per_person_recruited_usd, licensure_exam_passing_rate, graduation_rate
 
 # Create a dataframe to store financial costs
-scenario_cost_financial = pd.DataFrame({'HR': salary_for_all_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
-
-# 1.2 HR Cost - Economic (Staff needed for interventions delivered in the simulation)
-# For HR required, multiply above with total capabilities X 'Frac_Time_Used_By_OfficerType' by facility level
-frac_time_used_by_officer_type = pd.DataFrame(log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_OfficerType'].to_list())
-aggregate_frac_time_used_by_officer_type = pd.DataFrame(frac_time_used_by_officer_type.sum(axis=0))/len(frac_time_used_by_officer_type)
-aggregate_frac_time_used_by_officer_type.columns = ['Value']
-aggregate_frac_time_used_by_officer_type['OfficerType_FacilityLevel'] = aggregate_frac_time_used_by_officer_type.index
-
-salary_for_required_staff = pd.merge(aggregate_frac_time_used_by_officer_type[['OfficerType_FacilityLevel', 'Value']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Salary_USD']], on = ['OfficerType_FacilityLevel'])
-salary_for_required_staff = pd.merge(salary_for_required_staff,
-                                     current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']], on = ['OfficerType_FacilityLevel'])
-
-# Calculate salary cost for required  health workforce (Staff count X Fraction of staff time needed X Annual salary)
-salary_for_required_staff['Total_salary_by_cadre_and_level'] = salary_for_required_staff['Salary_USD'] * salary_for_required_staff['Value'] * salary_for_required_staff['Staff_Count']
+scenario_cost_financial = pd.DataFrame({'HR': salary_for_modelled_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 
-# Create a dataframe to store economic costs
-scenario_cost_economic = pd.DataFrame({'HR': salary_for_required_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+# TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
 
 # 2. Consumables cost
 # 2.1 Consumables cost - Financial (What needs to be purchased given what is dispensed)

From 9f654a45b2be6527e4b1c5fb602cec92135de631 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Mon, 6 May 2024 19:30:07 +0100
Subject: [PATCH 059/230] add HR costs other than salary

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +-
 src/scripts/costing/costing.py              | 67 ++++++++++++++++++---
 2 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 7597747a8a..c96f5e8c7b 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f189ab775ea3f6b4b00060a9f43c11595db71159c064397acca8d3237a0ee81
-size 3914776
+oid sha256:c25738f43f6594f79a39246f40f0c85e96960698fbfeb85c94d4f3bb1271c6c2
+size 3907145
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index ba8a94ea01..d42709d20c 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -67,7 +67,6 @@ def drop_outside_period(_df):
                                     sheet_name = None)
 
 # 1. HR cost
-# TODO apply attrition rate to the cost calculation  https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9237893/#SP1
 # 1.1 HR Cost - Financial (Given the staff available)
 # Load annual salary by officer type and facility level
 hr_cost_parameters = workbook_cost["human_resources"]
@@ -117,22 +116,70 @@ def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Ser
 current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + current_staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + current_staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
 used_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
 
-# Calculate salary cost for modelled health workforce (Staff count X Annual salary)
+# Calculate various components of HR cost
+# 1. Salary cost for modelled health workforce (Staff count X Annual salary)
 salary_for_all_staff = pd.merge(current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
 salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']
+total_salary_for_all_staff = salary_for_all_staff['Total_salary_by_cadre_and_level'].sum()
 
-# Calculate salary cost for current total staff
-salary_for_modelled_staff = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
+# 2. Salary cost for current total staff
+salary_for_staff_used_in_scenario = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_modelled_staff['Total_salary_by_cadre_and_level'] = salary_for_modelled_staff['Value'] * salary_for_modelled_staff['Staff_Count']
-
-# Other costs to maintain a fixed size of health workforce
-# TODO consider annual_preservice_training_cost_percapita_usd, annual_preservice_training_cost_percapita_usd, annual_attrition_rate, absorption_rate_of_students_into_public_workforce,
-# TODO consider proportion_of_workforce_recruited_from_abroad, recruitment_cost_per_person_recruited_usd, licensure_exam_passing_rate, graduation_rate
+salary_for_staff_used_in_scenario['Total_salary_by_cadre_and_level'] = salary_for_staff_used_in_scenario['Value'] * salary_for_staff_used_in_scenario['Staff_Count']
+total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario['Total_salary_by_cadre_and_level'].sum()
+
+# 3. Recruitment cost to fill gap created by attrition
+def merge_cost_and_model_data(cost_df, model_df, varnames):
+    merged_df = model_df.copy()
+    for varname in varnames:
+        new_cost_df = cost_df[cost_df['Parameter_name'] == varname][['Officer_Category', 'Facility_Level', 'Value']]
+        new_cost_df = new_cost_df.rename(columns={"Value": varname})
+        if ((new_cost_df['Officer_Category'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all()):
+            merged_df[varname] = new_cost_df[varname].mean()
+        elif ((new_cost_df['Officer_Category'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all() == False):
+            merged_df = pd.merge(merged_df, new_cost_df[['Facility_Level',varname]], on=['Facility_Level'], how="left")
+        elif ((new_cost_df['Officer_Category'] == 'All').all() == False) and ((new_cost_df['Facility_Level'] == 'All').all()):
+            merged_df = pd.merge(merged_df, new_cost_df[['Officer_Category',varname]], on=['Officer_Category'], how="left")
+        else:
+            merged_df = pd.merge(merged_df, new_cost_df, on=['Officer_Category', 'Facility_Level'], how="left")
+    return merged_df
+
+recruitment_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                     varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
+recruitment_cost_df['annual_recruitment_cost'] = recruitment_cost_df['annual_attrition_rate'] * recruitment_cost_df['Staff_Count'] * \
+                      recruitment_cost_df['recruitment_cost_per_person_recruited_usd']
+recruitment_cost_for_attrited_workers = recruitment_cost_df['annual_recruitment_cost'].sum()
+
+# 4. Pre-service training cost to fill gap created by attrition
+preservice_training_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                     varnames = ['annual_attrition_rate',
+                                                                 'licensure_exam_passing_rate', 'graduation_rate',
+                                                                 'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
+                                                                 'annual_preservice_training_cost_percapita_usd'])
+preservice_training_cost_df['annual_preservice_training_cost'] = preservice_training_cost_df['annual_attrition_rate'] * preservice_training_cost_df['Staff_Count'] * \
+                                                (1/(preservice_training_cost_df['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost_df['proportion_of_workforce_recruited_from_abroad'])) * \
+                                                (1/preservice_training_cost_df['graduation_rate']) * (1/preservice_training_cost_df['licensure_exam_passing_rate']) * \
+                                                preservice_training_cost_df['annual_preservice_training_cost_percapita_usd']
+preservice_training_cost_for_attrited_workers = preservice_training_cost_df['annual_preservice_training_cost'].sum()
+
+# 5. In-service training cost to train all staff
+inservice_training_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                     varnames = ['annual_inservice_training_cost_usd'])
+inservice_training_cost_df['annual_inservice_training_cost'] = inservice_training_cost_df['Staff_Count'] * inservice_training_cost_df['annual_inservice_training_cost_usd']
+inservice_training_cost_for_staff_used_in_scenario = inservice_training_cost_df['annual_inservice_training_cost'].sum()
+# TODO check why annual_inservice_training_cost for DCSA is NaN in the merged_df
 
 # Create a dataframe to store financial costs
-scenario_cost_financial = pd.DataFrame({'HR': salary_for_modelled_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+#scenario_cost_financial = pd.DataFrame({'HR': salary_for_modelled_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
+hr_cost_subcategories = ['total_salary_for_all_staff', 'total_salary_for_staff_used_in_scenario',
+                         'recruitment_cost_for_attrited_workers', 'preservice_training_cost_for_attrited_workers',
+                         'inservice_training_cost_for_staff_used_in_scenario']
+scenario_cost_financial = pd.DataFrame({
+    'Cost_Category': ['Human Resources for Health'] * len(hr_cost_subcategories),
+    'Cost_Sub-category': hr_cost_subcategories,
+    'Value_2023USD': [total_salary_for_all_staff, total_salary_for_staff_used_in_scenario, recruitment_cost_for_attrited_workers, preservice_training_cost_for_attrited_workers, inservice_training_cost_for_staff_used_in_scenario]#print('[' + ', '.join(hr_cost_subcategories) + ']')
+})
 
 # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
 

From d9b222a65e456719630405ea9c8d4a713cdb8799 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 7 May 2024 10:21:52 +0100
Subject: [PATCH 060/230] update calculation and extraction of consumable costs
 - to consider all runs - to extract a dataframe of different cost categories

---
 src/scripts/costing/costing.py | 66 +++++++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 20 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index d42709d20c..473f03e498 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -117,19 +117,19 @@ def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Ser
 used_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
 
 # Calculate various components of HR cost
-# 1. Salary cost for modelled health workforce (Staff count X Annual salary)
+# 1.1 Salary cost for modelled health workforce (Staff count X Annual salary)
 salary_for_all_staff = pd.merge(current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
 salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']
 total_salary_for_all_staff = salary_for_all_staff['Total_salary_by_cadre_and_level'].sum()
 
-# 2. Salary cost for current total staff
+# 1.2 Salary cost for current total staff
 salary_for_staff_used_in_scenario = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
 salary_for_staff_used_in_scenario['Total_salary_by_cadre_and_level'] = salary_for_staff_used_in_scenario['Value'] * salary_for_staff_used_in_scenario['Staff_Count']
 total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario['Total_salary_by_cadre_and_level'].sum()
 
-# 3. Recruitment cost to fill gap created by attrition
+# 1.3 Recruitment cost to fill gap created by attrition
 def merge_cost_and_model_data(cost_df, model_df, varnames):
     merged_df = model_df.copy()
     for varname in varnames:
@@ -151,7 +151,7 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
                       recruitment_cost_df['recruitment_cost_per_person_recruited_usd']
 recruitment_cost_for_attrited_workers = recruitment_cost_df['annual_recruitment_cost'].sum()
 
-# 4. Pre-service training cost to fill gap created by attrition
+# 1.4 Pre-service training cost to fill gap created by attrition
 preservice_training_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate',
                                                                  'licensure_exam_passing_rate', 'graduation_rate',
@@ -163,7 +163,7 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
                                                 preservice_training_cost_df['annual_preservice_training_cost_percapita_usd']
 preservice_training_cost_for_attrited_workers = preservice_training_cost_df['annual_preservice_training_cost'].sum()
 
-# 5. In-service training cost to train all staff
+# 1.5 In-service training cost to train all staff
 inservice_training_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_inservice_training_cost_usd'])
 inservice_training_cost_df['annual_inservice_training_cost'] = inservice_training_cost_df['Staff_Count'] * inservice_training_cost_df['annual_inservice_training_cost_usd']
@@ -171,31 +171,49 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
 # TODO check why annual_inservice_training_cost for DCSA is NaN in the merged_df
 
 # Create a dataframe to store financial costs
-#scenario_cost_financial = pd.DataFrame({'HR': salary_for_modelled_staff['Total_salary_by_cadre_and_level'].sum()}, index=[0])
 hr_cost_subcategories = ['total_salary_for_all_staff', 'total_salary_for_staff_used_in_scenario',
                          'recruitment_cost_for_attrited_workers', 'preservice_training_cost_for_attrited_workers',
                          'inservice_training_cost_for_staff_used_in_scenario']
 scenario_cost_financial = pd.DataFrame({
     'Cost_Category': ['Human Resources for Health'] * len(hr_cost_subcategories),
     'Cost_Sub-category': hr_cost_subcategories,
-    'Value_2023USD': [total_salary_for_all_staff, total_salary_for_staff_used_in_scenario, recruitment_cost_for_attrited_workers, preservice_training_cost_for_attrited_workers, inservice_training_cost_for_staff_used_in_scenario]#print('[' + ', '.join(hr_cost_subcategories) + ']')
+    'Value_2023USD': [total_salary_for_all_staff, total_salary_for_staff_used_in_scenario, recruitment_cost_for_attrited_workers, preservice_training_cost_for_attrited_workers, inservice_training_cost_for_staff_used_in_scenario]
 })
 
+# TODO 'Value_2023USD' - use hr_cost_subcategories rather than the hardcoded list
 # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
 
 # 2. Consumables cost
-# 2.1 Consumables cost - Financial (What needs to be purchased given what is dispensed)
-_df = log['tlo.methods.healthsystem.summary']['Consumables']
+def get_counts_of_items_requested(_df):
+    _df = drop_outside_period(_df)
+    counts_of_available = defaultdict(int)
+    counts_of_not_available = defaultdict(int)
+    for _, row in _df.iterrows():
+        for item, num in row['Item_Available'].items():
+            counts_of_available[item] += num
+        for item, num in row['Item_NotAvailable'].items():
+            counts_of_not_available[item] += num
+    return pd.concat(
+        {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
+        axis=1
+    ).fillna(0).astype(int).stack()
+
+cons_req = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Consumables',
+        custom_generate_series=get_counts_of_items_requested,
+        do_scaling=True)
 
-counts_of_available = defaultdict(int)
-counts_of_not_available = defaultdict(int)
-for _, row in _df.iterrows():
-    for item, num in row['Item_Available'].items(): # if using 'tlo.methods.healthsystem' eval(row['Item_Available'])
-        counts_of_available[item] += num
+# Mean of results across the runs
+summarized_cons_req = summarize(cons_req, only_mean=True, collapse_columns=True)
 
-counts_of_available = defaultdict(int, {int(key): value for key, value in counts_of_available.items()}) # Convert string keys to integer
-# for consistency with other dictionaries
+# Consumables to be costed (only available, i.e. dispensed)
+cons_dispensed = summarized_cons_req.xs("Available", level=1)
+cons_dispensed = cons_dispensed.to_dict()
+cons_dispensed = defaultdict(int, {int(key): value for key, value in cons_dispensed.items()}) # Convert string keys to integer
 
+# 2.1 Cost of consumables dispensed
 # Load consumables cost data
 unit_price_consumable = workbook_cost["consumables"]
 unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
@@ -205,10 +223,10 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
 
 # Multiply number of items needed by cost of consumable
 cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
-                                                counts_of_available[key] for key in unit_price_consumable)))
+                                                cons_dispensed[key] for key in unit_price_consumable)))
 total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
 
-# Cost of consumables stocked
+# 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
 # Estimate the stock to dispensed ratio from OpenLMIS data
 lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
 # Collapse individual facilities
@@ -236,12 +254,20 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
 inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio_by_consumable.to_dict()
 # TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
 cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
-                                                counts_of_available[key] *
+                                                cons_dispensed[key] *
                                                 inflow_to_outflow_ratio_by_consumable.get(key, average_inflow_to_outflow_ratio_ratio)
                                                 for key in counts_of_available)))
 total_cost_of_consumables_stocked = sum(value for value in cost_of_consumables_stocked.values() if not np.isnan(value))
 
-scenario_cost_financial['Consumables'] = total_cost_of_consumables_stocked
+# Add consumable costs to the financial cost dataframe
+consumable_cost_subcategories = ['total_cost_of_consumables_dispensed', 'total_cost_of_consumables_stocked']
+consumable_costs = pd.DataFrame({
+    'Cost_Category': ['Consumables'] * len(consumable_cost_subcategories),
+    'Cost_Sub-category': consumable_cost_subcategories,
+    'Value_2023USD': [total_cost_of_consumables_dispensed, total_cost_of_consumables_stocked]
+})
+# Append new_data to scenario_cost_financial
+scenario_cost_financial = pd.concat([scenario_cost_financial, consumable_costs], ignore_index=True)
 
 # 3. Equipment cost
 # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it been used in the simulation

From 35763163623a3881517d4e7ab8ecf50ffa42afb0 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 7 May 2024 10:45:54 +0100
Subject: [PATCH 061/230] update cost validation method - the data is now taken
 from the costing RF

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +-
 src/scripts/costing/costing.py              | 65 +++++++++++----------
 2 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index c96f5e8c7b..3aaf3d8e94 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c25738f43f6594f79a39246f40f0c85e96960698fbfeb85c94d4f3bb1271c6c2
-size 3907145
+oid sha256:0cc45c90bacff8ae310cb08457227a3fec23bd00c21e3df0a9a3486923582735
+size 3908125
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 473f03e498..786c62da00 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -285,10 +285,42 @@ def get_counts_of_items_requested(_df):
 # TODO Multiply quantity needed with cost per item (this is the repair, replacement, and maintenance cost)
 # TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
 
-
 # 4. Facility running costs
 # Average running costs by facility level and district times the number of facilities  in the simulation
 
+# Compare financial costs with actual budget data
+####################################################
+# Import budget data
+budget_data = workbook_cost["budget_validation"]
+list_of_costs_for_comparison = ['total_salary_for_all_staff', 'total_cost_of_consumables_dispensed']
+real_budget = [budget_data[budget_data['Category'] == list_of_costs_for_comparison[0]]['Budget_in_2023USD'].values[0],
+               budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0]]
+model_cost = [scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[0]]['Value_2023USD'].values[0],
+              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[1]]['Value_2023USD'].values[0]]
+
+plt.clf()
+plt.scatter(real_budget, model_cost)
+# Plot a line representing a 45-degree angle
+min_val = min(min(real_budget), min(model_cost))
+max_val = max(max(real_budget), max(model_cost))
+plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')
+
+# Format x and y axis labels to display in millions
+formatter = FuncFormatter(lambda x, _: '{:,.0f}M'.format(x / 1e6))
+plt.gca().xaxis.set_major_formatter(formatter)
+plt.gca().yaxis.set_major_formatter(formatter)
+# Add labels for each point
+hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
+consumables_label = 'Consumables ' + f'{round(model_cost[1] / real_budget[1], 2)}'
+plotlabels = [hr_label, consumables_label]
+for i, txt in enumerate(plotlabels):
+    plt.text(real_budget[i], model_cost[i], txt, ha='right')
+
+plt.xlabel('Real Budget')
+plt.ylabel('Model Cost')
+plt.title('Real Budget vs Model Cost')
+plt.savefig(costing_outputs_folder /  'Cost_validation.png')
+
 # Explore the ratio of consumable inflows to outflows
 ######################################################
 # TODO: Only consider the months for which original OpenLMIS data was available for closing_stock and dispensed
@@ -320,37 +352,6 @@ def plot_inflow_to_outflow_ratio(_dict, groupby_var):
 plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code')
 plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category')
 
-# Compare financial costs with actual budget data
-####################################################
-salary_budget_2018 = 69478749
-consuambles_budget_2018 = 228934188
-real_budget = [salary_budget_2018, consuambles_budget_2018]
-model_cost = [scenario_cost_financial['HR'][0], scenario_cost_financial['Consumables'][0]]
-labels = ['HR_salary', 'Consumables']
-
-plt.clf()
-plt.scatter(real_budget, model_cost)
-# Plot a line representing a 45-degree angle
-min_val = min(min(real_budget), min(model_cost))
-max_val = max(max(real_budget), max(model_cost))
-plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')
-
-# Format x and y axis labels to display in millions
-formatter = FuncFormatter(lambda x, _: '{:,.0f}M'.format(x / 1e6))
-plt.gca().xaxis.set_major_formatter(formatter)
-plt.gca().yaxis.set_major_formatter(formatter)
-# Add labels for each point
-hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
-consumables_label = 'Consumables ' + f'{round(model_cost[1] / real_budget[1], 5)}'
-plotlabels = [hr_label, consumables_label]
-for i, txt in enumerate(plotlabels):
-    plt.text(real_budget[i], model_cost[i], txt, ha='right')
-
-plt.xlabel('Real Budget')
-plt.ylabel('Model Cost')
-plt.title('Real Budget vs Model Cost')
-plt.savefig(costing_outputs_folder /  'Cost_validation.png')
-
 # Plot fraction staff time used
 fraction_stafftime_average = salary_staffneeded_df.groupby('Officer_Category')['Value'].sum()
 fraction_stafftime_average. plot(kind = "bar")

From e9c80d02d42e6e244c9a24d343de09093a65e65c Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 7 May 2024 18:48:28 +0100
Subject: [PATCH 062/230] minor update for validation code

---
 resources/costing/ResourceFile_Costing.xlsx |  4 ++--
 src/scripts/costing/costing.py              | 17 ++++++++++++-----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 3aaf3d8e94..faecf58e80 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cc45c90bacff8ae310cb08457227a3fec23bd00c21e3df0a9a3486923582735
-size 3908125
+oid sha256:532b32405a5cb63a5f0b04ef7c685d1c6d3c6f3b861917d9f2683e866ab9bf76
+size 3908122
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 786c62da00..02287bbaca 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -42,7 +42,7 @@
     os.makedirs(costing_outputs_folder)
 
 # Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2000, 1, 1), Date(2050, 12, 31))
+TARGET_PERIOD = (Date(2015, 1, 1), Date(2015, 12, 31))
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
@@ -288,15 +288,21 @@ def get_counts_of_items_requested(_df):
 # 4. Facility running costs
 # Average running costs by facility level and district times the number of facilities  in the simulation
 
+# Extract all costs to a .csv
+scenario_cost_financial.to_csv(costing_outputs_folder / 'scenario_cost.csv')
+
+
 # Compare financial costs with actual budget data
 ####################################################
 # Import budget data
 budget_data = workbook_cost["budget_validation"]
-list_of_costs_for_comparison = ['total_salary_for_all_staff', 'total_cost_of_consumables_dispensed']
+list_of_costs_for_comparison = ['total_salary_for_all_staff', 'total_cost_of_consumables_dispensed', 'total_cost_of_consumables_stocked']
 real_budget = [budget_data[budget_data['Category'] == list_of_costs_for_comparison[0]]['Budget_in_2023USD'].values[0],
+               budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0],
                budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0]]
 model_cost = [scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[0]]['Value_2023USD'].values[0],
-              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[1]]['Value_2023USD'].values[0]]
+              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[1]]['Value_2023USD'].values[0],
+              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[2]]['Value_2023USD'].values[0]]
 
 plt.clf()
 plt.scatter(real_budget, model_cost)
@@ -311,8 +317,9 @@ def get_counts_of_items_requested(_df):
 plt.gca().yaxis.set_major_formatter(formatter)
 # Add labels for each point
 hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
-consumables_label = 'Consumables ' + f'{round(model_cost[1] / real_budget[1], 2)}'
-plotlabels = [hr_label, consumables_label]
+consumables_label1= 'Consumables dispensed ' + f'{round(model_cost[1] / real_budget[1], 2)}'
+consumables_label2 = 'Consumables stocked ' + f'{round(model_cost[2] / real_budget[2], 2)}'
+plotlabels = [hr_label, consumables_label1, consumables_label2]
 for i, txt in enumerate(plotlabels):
     plt.text(real_budget[i], model_cost[i], txt, ha='right')
 

From 2fc79a0552881962bfa863f135d67be8de85793f Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 7 May 2024 18:53:01 +0100
Subject: [PATCH 063/230] temporarily add large consumable availability file
 before moving it to dropbox

---
 .../ResourceFile_Consumables_availability_and_usage.csv        | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 resources/healthsystem/consumables/ResourceFile_Consumables_availability_and_usage.csv

diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_and_usage.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_and_usage.csv
new file mode 100644
index 0000000000..4d7f1cb90e
--- /dev/null
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_and_usage.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b91ddbc76e9833e4b1262d187988dd568be63a8d448785986577d40164c8f02b
+size 115910333

From 3dc234dfe144f7c694cf6c36cac79d39d81e8286 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Sat, 11 May 2024 20:05:50 +0100
Subject: [PATCH 064/230] update consumable costing

---
 src/scripts/costing/costing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 02287bbaca..e4a7ecf7f5 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -223,7 +223,7 @@ def get_counts_of_items_requested(_df):
 
 # Multiply number of items needed by cost of consumable
 cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
-                                                cons_dispensed[key] for key in unit_price_consumable)))
+                                                cons_dispensed[key] for key in cons_dispensed)))
 total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
 
 # 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
@@ -256,7 +256,8 @@ def get_counts_of_items_requested(_df):
 cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
                                                 cons_dispensed[key] *
                                                 inflow_to_outflow_ratio_by_consumable.get(key, average_inflow_to_outflow_ratio_ratio)
-                                                for key in counts_of_available)))
+                                                for key in cons_dispensed)))
+# TODO Make sure that the above code runs
 total_cost_of_consumables_stocked = sum(value for value in cost_of_consumables_stocked.values() if not np.isnan(value))
 
 # Add consumable costs to the financial cost dataframe

From 7bc45ba8b2f1643ba459d4e31be340b418015682 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Sat, 11 May 2024 20:06:06 +0100
Subject: [PATCH 065/230] update costing method intro in the RF

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index faecf58e80..b4599ff9f1 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:532b32405a5cb63a5f0b04ef7c685d1c6d3c6f3b861917d9f2683e866ab9bf76
-size 3908122
+oid sha256:927b59c152da750555043ff6e1c4fdba16b87e7387e9a7541d964ac4a729f03e
+size 3965338

From 5efb29d52169c042504ea932641606dbaa23f5bf Mon Sep 17 00:00:00 2001
From: Tim Hallett <39991060+tbhallett@users.noreply.github.com>
Date: Tue, 14 May 2024 10:00:01 +0100
Subject: [PATCH 066/230] adapt use of do_scaling to behave as user expects.

---
 src/tlo/analysis/utils.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 3aeff2bb11..0662379380 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -254,13 +254,11 @@ def extract_results(results_folder: Path,
     """
 
     def get_multiplier(_draw, _run):
-        """Helper function to get the multiplier from the simulation, if do_scaling=True.
+        """Helper function to get the multiplier from the simulation.
         Note that if the scaling factor cannot be found a `KeyError` is thrown."""
-        if not do_scaling:
-            return 1.0
-        else:
-            return load_pickled_dataframes(results_folder, _draw, _run, 'tlo.methods.population'
-                                           )['tlo.methods.population']['scaling_factor']['scaling_factor'].values[0]
+        return load_pickled_dataframes(
+            results_folder, _draw, _run, 'tlo.methods.population'
+        )['tlo.methods.population']['scaling_factor']['scaling_factor'].values[0]
 
     if custom_generate_series is None:
         # If there is no `custom_generate_series` provided, it implies that function required selects the specified
@@ -293,7 +291,10 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series:
                 df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key]
                 output_from_eval: pd.Series = generate_series(df)
                 assert pd.Series == type(output_from_eval), 'Custom command does not generate a pd.Series'
-                res[draw_run] = output_from_eval * get_multiplier(draw, run)
+                if do_scaling:
+                    res[draw_run] = output_from_eval * get_multiplier(draw, run)
+                else:
+                    res[draw_run] = output_from_eval
 
             except KeyError:
                 # Some logs could not be found - probably because this run failed.

From 68b9f7a83c5b4ceea71f73c9164b917e2431dca7 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 14 May 2024 11:28:42 +0100
Subject: [PATCH 067/230] add new pre-service training unit costs

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index b4599ff9f1..06ee70a161 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:927b59c152da750555043ff6e1c4fdba16b87e7387e9a7541d964ac4a729f03e
-size 3965338
+oid sha256:bf0d2818d402d3ccece08ffaf3e3ac98f4fb9f18db3fa5d53e3932f98bca2cd5
+size 4038180

From 0a6392426897a2b72b4614ff456ce3ae7d365690 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Wed, 17 Apr 2024 15:59:30 +0100
Subject: [PATCH 068/230] fix failing test. initial doses for newborn
 outcomes/postnatal supervisor

---
 src/tlo/methods/newborn_outcomes.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/tlo/methods/newborn_outcomes.py b/src/tlo/methods/newborn_outcomes.py
index fe501be12f..06986067a8 100644
--- a/src/tlo/methods/newborn_outcomes.py
+++ b/src/tlo/methods/newborn_outcomes.py
@@ -403,9 +403,11 @@ def get_and_store_newborn_item_codes(self):
             {ic('Tetracycline eye ointment, 1 %, tube 5 mg'): 5}
 
         # ------------------------------------- SEPSIS - FULL SUPPORTIVE CARE ---------------------------------------
+        # Whilst abx for newborns are weight based the maximum dose does not exceed the minimum unit for the costing
+        # model
         self.item_codes_nb_consumables['sepsis_supportive_care_core'] = \
-            {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,  # todo: dose
-             ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,  # todo: dose
+            {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,
+             ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,
              ic('Oxygen, 1000 liters, primarily with oxygen cylinders'): 5760  #
              }
 
@@ -419,8 +421,8 @@ def get_and_store_newborn_item_codes(self):
 
         # ---------------------------------------- SEPSIS - ANTIBIOTICS ---------------------------------------------
         self.item_codes_nb_consumables['sepsis_abx'] = \
-            {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,  # todo: dose
-             ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,  # todo: dose
+            {ic('Benzylpenicillin 1g (1MU), PFR_Each_CMST'): 1,
+             ic('Gentamicin 40mg/ml, 2ml_each_CMST'): 1,
              }
 
     def initialise_simulation(self, sim):

From 82ec04ef903945ac5dd02a0607985f7f605b187d Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Tue, 14 May 2024 14:45:37 +0100
Subject: [PATCH 069/230] fixed error in labour. COPD consumable unites

---
 src/tlo/methods/copd.py   | 24 +++++++++++++-----------
 src/tlo/methods/labour.py |  7 ++++---
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/tlo/methods/copd.py b/src/tlo/methods/copd.py
index 2fd27e13a2..e91ed00e39 100644
--- a/src/tlo/methods/copd.py
+++ b/src/tlo/methods/copd.py
@@ -184,14 +184,14 @@ def define_symptoms(self):
 
     def lookup_item_codes(self):
         """Look-up the item-codes for the consumables needed in the HSI Events for this module."""
-        # todo: Need to look-up these item-codes.
+        ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
+
         self.item_codes = {
-            'bronchodilater_inhaler': 293,
-            'steroid_inhaler': 294,
-            'oxygen': 127,
-            'aminophylline': 292,
-            'amoxycillin': 125,
-            'prednisolone': 291
+            'bronchodilater_inhaler': ic('Salbutamol Inhaler 100mcg/dose - 200 doses '),
+            'oxygen': ic('Oxygen, 1000 liters, primarily with oxygen cylinders'),
+            'aminophylline': ic('Aminophylline 100mg, tablets'),
+            'amoxycillin': ic('Amoxycillin 250mg_1000_CMST'),
+            'prednisolone': ic('Prednisolone 5mg_100_CMST'),
         }
 
     def do_logging(self):
@@ -209,7 +209,7 @@ def give_inhaler(self, person_id: int, hsi_event: HSI_Event):
         df = self.sim.population.props
         has_inhaler = df.at[person_id, 'ch_has_inhaler']
         if not has_inhaler:
-            if hsi_event.get_consumables(self.item_codes['bronchodilater_inhaler']):
+            if hsi_event.get_consumables({self.item_codes['bronchodilater_inhaler']:1}):
                 df.at[person_id, 'ch_has_inhaler'] = True
 
     def do_when_present_with_breathless(self, person_id: int, hsi_event: HSI_Event):
@@ -527,7 +527,8 @@ def apply(self, person_id, squeeze_factor):
          * Provide treatment: whatever is available at this facility at this time (no referral).
         """
         df = self.sim.population.props
-        if not self.get_consumables(self.module.item_codes['oxygen']):
+        # Assume average 8L O2 for 2 days inpatient care
+        if not self.get_consumables({self.module.item_codes['oxygen']: 23_040}):
             # refer to the next higher facility if the current facility has no oxygen
             self.facility_levels_index += 1
             if self.facility_levels_index >= len(self.all_facility_levels):
@@ -537,10 +538,11 @@ def apply(self, person_id, squeeze_factor):
 
         else:
             # Give oxygen and AminoPhylline, if possible, ... and cancel death if the treatment is successful.
+            # Aminophylline dose = 100mg 8hrly, assuming 600mg in 48 hours
             prob_treatment_success = self.module.models.prob_livesaved_given_treatment(
                 df=df.iloc[[person_id]],
-                oxygen=self.get_consumables(self.module.item_codes['oxygen']),
-                aminophylline=self.get_consumables(self.module.item_codes['aminophylline'])
+                oxygen=self.get_consumables({self.module.item_codes['oxygen']: 23_040}),
+                aminophylline=self.get_consumables({self.module.item_codes['aminophylline']: 600})
             )
 
             if prob_treatment_success:
diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py
index f636fb3dbc..07b0b53dde 100644
--- a/src/tlo/methods/labour.py
+++ b/src/tlo/methods/labour.py
@@ -768,7 +768,7 @@ def get_and_store_labour_item_codes(self):
 
         # --------------------------------------- ORAL ANTIHYPERTENSIVES ---------------------------------------------
         self.item_codes_lab_consumables['oral_antihypertensives'] = \
-            {ic('Hydralazine, powder for injection, 20 mg ampoule'): 1}
+            {ic('Methyldopa 250mg_1000_CMST'): 1}
 
         # ----------------------------------  SEVERE PRE-ECLAMPSIA/ECLAMPSIA  -----------------------------------------
         self.item_codes_lab_consumables['magnesium_sulfate'] = \
@@ -1790,8 +1790,9 @@ def assessment_and_treatment_of_hypertension(self, hsi_event, labour_stage):
                 elif (labour_stage == 'pp') and (df.at[person_id, 'pn_htn_disorders'] == 'severe_gest_htn'):
                     df.at[person_id, 'pn_htn_disorders'] = 'gest_htn'
 
-                avail = hsi_event.get_consumables(
-                    item_codes=self.item_codes_lab_consumables['oral_antihypertensives'])
+                dose = (7 * 4) * 6 # approximating 4 tablets a day, for 6 weeks
+                cons = {_i: dose for _i in self.item_codes_lab_consumables['oral_antihypertensives']}
+                avail = hsi_event.get_consumables(item_codes=cons)
 
                 if avail:
                     df.at[person_id, 'la_gest_htn_on_treatment'] = True

From ae8615d4eab628aa9814cf8b12f87cb1d66ad06a Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Tue, 14 May 2024 15:14:25 +0100
Subject: [PATCH 070/230] remove consumable packages from diarrhoea.py

---
 src/tlo/methods/diarrhoea.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/tlo/methods/diarrhoea.py b/src/tlo/methods/diarrhoea.py
index 242ecb2cc1..7e9c989995 100644
--- a/src/tlo/methods/diarrhoea.py
+++ b/src/tlo/methods/diarrhoea.py
@@ -644,18 +644,22 @@ def report_daly_values(self):
 
     def look_up_consumables(self):
         """Look up and store the consumables item codes used in each of the HSI."""
-        get_item_codes_from_package_name = self.sim.modules['HealthSystem'].get_item_codes_from_package_name
-
-        self.consumables_used_in_hsi['ORS'] = get_item_codes_from_package_name(
-            package='ORS')
-        self.consumables_used_in_hsi['Treatment_Severe_Dehydration'] = get_item_codes_from_package_name(
-            package='Treatment of severe diarrhea')
-        self.consumables_used_in_hsi['Zinc_Under6mo'] = get_item_codes_from_package_name(
-            package='Zinc for Children 0-6 months')
-        self.consumables_used_in_hsi['Zinc_Over6mo'] = get_item_codes_from_package_name(
-            package='Zinc for Children 6-59 months')
-        self.consumables_used_in_hsi['Antibiotics_for_Dysentery'] = get_item_codes_from_package_name(
-            package='Antibiotics for treatment of dysentery')
+        ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
+
+        self.consumables_used_in_hsi['ORS'] = {ic('ORS, sachet'): 1}
+
+        self.consumables_used_in_hsi['Treatment_Severe_Dehydration'] = \
+            {ic('ORS, sachet'): 1,
+             ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
+             ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1}
+
+        self.consumables_used_in_hsi['Zinc_Under6mo'] = {ic('Zinc, tablet, 20 mg'): 1}
+
+        self.consumables_used_in_hsi['Zinc_Over6mo'] = {ic('Zinc, tablet, 20 mg'): 1}
+
+        self.consumables_used_in_hsi['Antibiotics_for_Dysentery'] = \
+            {ic('Ciprofloxacin 250mg_100_CMST'): 1,
+             ic("Paracetamol syrup 120mg/5ml_0.0119047619047619_CMST"): 1}
 
     def do_when_presentation_with_diarrhoea(self, person_id, hsi_event):
         """This routine is called when Diarrhoea is a symptom for a child attending a Generic HSI Appointment. It

From a96785095b72f539d588a2e8f57b4aeea2816b2b Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Tue, 14 May 2024 15:57:38 +0100
Subject: [PATCH 071/230] diarrhoea.py consumable units

---
 src/tlo/methods/diarrhoea.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/tlo/methods/diarrhoea.py b/src/tlo/methods/diarrhoea.py
index 7e9c989995..fe442138fc 100644
--- a/src/tlo/methods/diarrhoea.py
+++ b/src/tlo/methods/diarrhoea.py
@@ -646,20 +646,21 @@ def look_up_consumables(self):
         """Look up and store the consumables item codes used in each of the HSI."""
         ic = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
-        self.consumables_used_in_hsi['ORS'] = {ic('ORS, sachet'): 1}
+        self.consumables_used_in_hsi['ORS'] = {ic('ORS, sachet'): 2}
 
         self.consumables_used_in_hsi['Treatment_Severe_Dehydration'] = \
-            {ic('ORS, sachet'): 1,
+            {ic('ORS, sachet'): 2,
              ic('Giving set iv administration + needle 15 drops/ml_each_CMST'): 1,
-             ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1}
+             ic("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1000}
 
-        self.consumables_used_in_hsi['Zinc_Under6mo'] = {ic('Zinc, tablet, 20 mg'): 1}
-
-        self.consumables_used_in_hsi['Zinc_Over6mo'] = {ic('Zinc, tablet, 20 mg'): 1}
+        self.consumables_used_in_hsi['Zinc'] = ic('Zinc, tablet, 20 mg')
 
+        # For weight based treatment for children under five, we've averaged the median weight for each for years
+        # 0-5 as 12kg.
+        # So for cipro/para - 10mg/kg 12 hrly for 7 days = ((10*12)*2) * 7 (same dose reccomended)
         self.consumables_used_in_hsi['Antibiotics_for_Dysentery'] = \
-            {ic('Ciprofloxacin 250mg_100_CMST'): 1,
-             ic("Paracetamol syrup 120mg/5ml_0.0119047619047619_CMST"): 1}
+            {ic('Ciprofloxacin 250mg_100_CMST'): 1680,
+             ic("Paracetamol syrup 120mg/5ml_0.0119047619047619_CMST"): 1680}
 
     def do_when_presentation_with_diarrhoea(self, person_id, hsi_event):
         """This routine is called when Diarrhoea is a symptom for a child attending a Generic HSI Appointment. It
@@ -732,9 +733,10 @@ def do_treatment(self, person_id, hsi_event):
 
         # ** Implement the procedure for treatment **
         # STEP ZERO: Get the Zinc consumable (happens irrespective of whether child will die or not)
+        # Dose is 10mg 24hrly for 10 days <6months or 20m for >6mnths
+        dose = 100 if person.age_exact_years < 0.5 else 200
         gets_zinc = hsi_event.get_consumables(
-            item_codes=self.consumables_used_in_hsi[
-                'Zinc_Under6mo' if person.age_exact_years < 0.5 else 'Zinc_Over6mo']
+            item_codes={self.consumables_used_in_hsi['Zinc']: dose}
         )
 
         # STEP ONE: Aim to alleviate dehydration:

From a80e389022e358266618675483a49ccb93399454 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Wed, 15 May 2024 10:06:39 +0100
Subject: [PATCH 072/230] cmd.py consumable units

---
 src/tlo/methods/cardio_metabolic_disorders.py | 42 +++++++++++++++----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/tlo/methods/cardio_metabolic_disorders.py b/src/tlo/methods/cardio_metabolic_disorders.py
index 4ef5c41c8e..5a0190afa7 100644
--- a/src/tlo/methods/cardio_metabolic_disorders.py
+++ b/src/tlo/methods/cardio_metabolic_disorders.py
@@ -1567,10 +1567,23 @@ def apply(self, person_id, squeeze_factor):
             return self.sim.modules['HealthSystem'].get_blank_appt_footprint()
         assert person[f'nc_{self.condition}_ever_diagnosed'], "The person is not diagnosed and so should not be " \
                                                               "receiving an HSI."
+
+        # Monthly doses of medications as follows. Diabetes - 1000mg metformin daily (1000*30.5),
+        # hypertension - 25mg hydrochlorothiazide daily (25*30.5), CKD 1 dialysis bag (estimate),
+        # lower back pain - 2400mg aspirin daily  (2400*30.5), CIHD - 75mg aspirin daily (75*30.5)
+        dose = {'diabetes': 30_500,
+                'hypertension': 610,
+                'chronic_kidney_disease': 1,
+                'chronic_lower_back_pain': 73_200,
+                'chronic_ischemic_hd': 2288,
+                'ever_stroke': 2288,
+                'ever_heart_attack': 2288}
+
         # Check availability of medication for condition
-        if self.get_consumables(
-            item_codes=self.module.parameters[f'{self.condition}_hsi'].get('medication_item_code').astype(int)
-        ):
+        if self.get_consumables(item_codes=
+                                {self.module.parameters[f'{self.condition}_hsi'].get(
+                                    'medication_item_code').astype(int): dose[self.condition]}):
+
             # If medication is available, flag as being on medication
             df.at[person_id, f'nc_{self.condition}_on_medication'] = True
             # Determine if the medication will work to prevent death
@@ -1636,10 +1649,21 @@ def apply(self, person_id, squeeze_factor):
             # Return the blank_appt_footprint() so that this HSI does not occupy any time resources
             return self.sim.modules['HealthSystem'].get_blank_appt_footprint()
 
+        # Monthly doses of medications as follows. Diabetes - 1000mg metformin daily (1000*30.5),
+        # hypertension - 25mg hydrochlorothiazide daily (25*30.5), CKD 1 dialysis bag (estimate),
+        # lower back pain - 2400mg aspirin daily  (2400*30.5), CIHD - 75mg aspirin daily (75*30.5)
+        dose = {'diabetes': 30_500,
+                'hypertension': 610,
+                'chronic_kidney_disease': 1,
+                'chronic_lower_back_pain': 73_200,
+                'chronic_ischemic_hd': 2288,
+                'ever_stroke': 2288,
+                'ever_heart_attack': 2288}
+
         # Check availability of medication for condition
         if self.get_consumables(
-            item_codes=self.module.parameters[f'{self.condition}_hsi'].get('medication_item_code').astype(int)
-        ):
+            item_codes={self.module.parameters[f'{self.condition}_hsi'].get('medication_item_code').astype(int)
+                        : dose[self.condition]}):
             # Schedule their next HSI for a refill of medication, one month from now
             self.sim.modules['HealthSystem'].schedule_hsi_event(
                 hsi_event=self,
@@ -1701,10 +1725,14 @@ def do_for_each_event_to_be_investigated(self, _ev):
             df.at[person_id, f'nc_{_ev}_date_diagnosis'] = self.sim.date
             df.at[person_id, f'nc_{_ev}_ever_diagnosed'] = True
             if self.module.parameters['prob_care_provided_given_seek_emergency_care'] > self.module.rng.random_sample():
+
                 # If care is provided....
+                # TODO: confirm dose for emergency treatment of MI/STROKE
+                dose = 20 if _ev == 'ever_stroke' else 40
+
                 if self.get_consumables(
-                    item_codes=self.module.parameters[f'{_ev}_hsi'].get(
-                        'emergency_medication_item_code').astype(int)
+                    item_codes={self.module.parameters[f'{_ev}_hsi'].get(
+                        'emergency_medication_item_code').astype(int): dose}
                 ):
                     logger.debug(key='debug', data='Treatment will be provided.')
                     df.at[person_id, f'nc_{_ev}_on_medication'] = True

From 08cfa4240c1ec676d76056878f46f4e32586e9d8 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Wed, 15 May 2024 11:21:17 +0100
Subject: [PATCH 073/230] rti.py consumable units

---
 src/tlo/methods/rti.py | 64 ++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index aac0129cf6..fe97c19456 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -3746,21 +3746,21 @@ def apply(self, person_id, squeeze_factor):
         # TODO: find a more complete list of required consumables for adults
         if is_child:
             self.module.item_codes_for_consumables_required['shock_treatment_child'] = {
-                get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1,
-                get_item_code("Dextrose (glucose) 5%, 1000ml_each_CMST"): 1,
+                get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 500,
+                get_item_code("Dextrose (glucose) 5%, 1000ml_each_CMST"): 500,
                 get_item_code('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
-                get_item_code('Blood, one unit'): 1,
-                get_item_code("Oxygen, 1000 liters, primarily with oxygen cylinders"): 1
+                get_item_code('Blood, one unit'): 2,
+                get_item_code("Oxygen, 1000 liters, primarily with oxygen cylinders"): 23_040
             }
             is_cons_available = self.get_consumables(
                 self.module.item_codes_for_consumables_required['shock_treatment_child']
             )
         else:
             self.module.item_codes_for_consumables_required['shock_treatment_adult'] = {
-                get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1,
+                get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
                 get_item_code('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
-                get_item_code('Blood, one unit'): 1,
-                get_item_code("Oxygen, 1000 liters, primarily with oxygen cylinders"): 1
+                get_item_code('Blood, one unit'): 2,
+                get_item_code("Oxygen, 1000 liters, primarily with oxygen cylinders"): 23_040
             }
             is_cons_available = self.get_consumables(
                 self.module.item_codes_for_consumables_required['shock_treatment_adult']
@@ -3858,7 +3858,7 @@ def apply(self, person_id, squeeze_factor):
         # If they have a fracture that needs a cast, ask for plaster of paris
         self.module.item_codes_for_consumables_required['fracture_treatment'] = {
             get_item_code('Plaster of Paris (POP) 10cm x 7.5cm slab_12_CMST'): fracturecastcounts,
-            get_item_code('Bandage, crepe 7.5cm x 1.4m long , when stretched'): slingcounts,
+            get_item_code('Bandage, crepe 7.5cm x 1.4m long , when stretched'): 200,
         }
         is_cons_available = self.get_consumables(
             self.module.item_codes_for_consumables_required['fracture_treatment']
@@ -3979,11 +3979,12 @@ def apply(self, person_id, squeeze_factor):
         assert df.loc[person_id, 'rt_med_int'], 'person sent here has not been treated'
 
         # If they have an open fracture, ask for consumables to treat fracture
+        # todo: confirm single dose of prophylactic ceftriaxone
         if open_fracture_counts > 0:
             self.module.item_codes_for_consumables_required['open_fracture_treatment'] = {
-                get_item_code('Ceftriaxone 1g, PFR_each_CMST'): 1,
-                get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 1,
-                get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 1,
+                get_item_code('Ceftriaxone 1g, PFR_each_CMST'): 2000,
+                get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 500,
+                get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 100,
                 get_item_code('Suture pack'): 1,
             }
             # If wound is "grossly contaminated" administer Metronidazole
@@ -3991,9 +3992,10 @@ def apply(self, person_id, squeeze_factor):
             p = self.module.parameters
             prob_open_fracture_contaminated = p['prob_open_fracture_contaminated']
             rand_for_contamination = self.module.rng.random_sample(size=1)
+            # todo: confirm correct dose (below from BNF for surgical prophylaxsis)
             if rand_for_contamination < prob_open_fracture_contaminated:
                 self.module.item_codes_for_consumables_required['open_fracture_treatment'].update(
-                    {get_item_code('Metronidazole, injection, 500 mg in 100 ml vial'): 1}
+                    {get_item_code('Metronidazole, injection, 500 mg in 100 ml vial'): 1500}
                 )
         # Check that there are enough consumables to treat this person's fractures
         is_cons_available = self.get_consumables(
@@ -4092,7 +4094,7 @@ def apply(self, person_id, squeeze_factor):
         if lacerationcounts > 0:
             self.module.item_codes_for_consumables_required['laceration_treatment'] = {
                 get_item_code('Suture pack'): lacerationcounts,
-                get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): lacerationcounts,
+                get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 500,
 
             }
             # check the number of suture kits required and request them
@@ -4200,7 +4202,7 @@ def apply(self, person_id, squeeze_factor):
                 # check if they have multiple burns, which implies a higher burned total body surface area (TBSA) which
                 # will alter the treatment plan
                 self.module.item_codes_for_consumables_required['burn_treatment'].update(
-                    {get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1}
+                    {get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 4000}
                 )
 
             is_cons_available = self.get_consumables(
@@ -4386,20 +4388,20 @@ def apply(self, person_id, squeeze_factor):
                         description='Summary of the pain medicine requested by each person')
             if df.loc[person_id, 'age_years'] < 16:
                 self.module.item_codes_for_consumables_required['pain_management'] = {
-                    get_item_code("Paracetamol 500mg_1000_CMST"): 1
+                    get_item_code("Paracetamol 500mg_1000_CMST"): 8000
                 }
                 cond = self.get_consumables(
                     self.module.item_codes_for_consumables_required['pain_management']
                 )
             else:
                 self.module.item_codes_for_consumables_required['pain_management'] = {
-                    get_item_code("diclofenac sodium 25 mg, enteric coated_1000_IDA"): 1
+                    get_item_code("diclofenac sodium 25 mg, enteric coated_1000_IDA"): 300
                 }
                 cond1 = self.get_consumables(
                     self.module.item_codes_for_consumables_required['pain_management']
                 )
                 self.module.item_codes_for_consumables_required['pain_management'] = {
-                    get_item_code("Paracetamol 500mg_1000_CMST"): 1
+                    get_item_code("Paracetamol 500mg_1000_CMST"): 8000
                 }
                 cond2 = self.get_consumables(
                     self.module.item_codes_for_consumables_required['pain_management']
@@ -4456,7 +4458,7 @@ def apply(self, person_id, squeeze_factor):
                         data=dict_to_output,
                         description='Summary of the pain medicine requested by each person')
             self.module.item_codes_for_consumables_required['pain_management'] = {
-                get_item_code("tramadol HCl 100 mg/2 ml, for injection_100_IDA"): 1
+                get_item_code("tramadol HCl 100 mg/2 ml, for injection_100_IDA"): 300
             }
             is_cons_available = self.get_consumables(
                 self.module.item_codes_for_consumables_required['pain_management']
@@ -4488,7 +4490,7 @@ def apply(self, person_id, squeeze_factor):
                         description='Summary of the pain medicine requested by each person')
             # give morphine
             self.module.item_codes_for_consumables_required['pain_management'] = {
-                get_item_code("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 1
+                get_item_code("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 120
             }
             is_cons_available = self.get_consumables(
                 self.module.item_codes_for_consumables_required['pain_management']
@@ -4648,22 +4650,22 @@ def apply(self, person_id, squeeze_factor):
         # Request first draft of consumables used in major surgery
         self.module.item_codes_for_consumables_required['major_surgery'] = {
             # request a general anaesthetic
-            get_item_code("Halothane (fluothane)_250ml_CMST"): 1,
+            get_item_code("Halothane (fluothane)_250ml_CMST"): 100,
             # clean the site of the surgery
-            get_item_code("Chlorhexidine 1.5% solution_5_CMST"): 1,
+            get_item_code("Chlorhexidine 1.5% solution_5_CMST"): 500,
             # tools to begin surgery
             get_item_code("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1,
             # administer an IV
             get_item_code('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
             get_item_code("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1,
-            get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1,
+            get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
             # repair incision made
             get_item_code("Suture pack"): 1,
-            get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 1,
+            get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 100,
             # administer pain killer
-            get_item_code('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,
+            get_item_code('Pethidine, 50 mg/ml, 2 ml ampoule'): 6,
             # administer antibiotic
-            get_item_code("Ampicillin injection 500mg, PFR_each_CMST"): 1,
+            get_item_code("Ampicillin injection 500mg, PFR_each_CMST"): 1000,
             # equipment used by surgeon, gloves and facemask
             get_item_code('Disposables gloves, powder free, 100 pieces per box'): 1,
             get_item_code('surgical face mask, disp., with metal nose piece_50_IDA'): 1,
@@ -4984,22 +4986,22 @@ def apply(self, person_id, squeeze_factor):
         # Request first draft of consumables used in major surgery
         self.module.item_codes_for_consumables_required['minor_surgery'] = {
             # request a local anaesthetic
-            get_item_code("Halothane (fluothane)_250ml_CMST"): 1,
+            get_item_code("Halothane (fluothane)_250ml_CMST"): 100,
             # clean the site of the surgery
-            get_item_code("Chlorhexidine 1.5% solution_5_CMST"): 1,
+            get_item_code("Chlorhexidine 1.5% solution_5_CMST"): 500,
             # tools to begin surgery
             get_item_code("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1,
             # administer an IV
             get_item_code('Cannula iv  (winged with injection pot) 18_each_CMST'): 1,
             get_item_code("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1,
-            get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 1,
+            get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000,
             # repair incision made
             get_item_code("Suture pack"): 1,
-            get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 1,
+            get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 100,
             # administer pain killer
-            get_item_code('Pethidine, 50 mg/ml, 2 ml ampoule'): 1,
+            get_item_code('Pethidine, 50 mg/ml, 2 ml ampoule'): 6,
             # administer antibiotic
-            get_item_code("Ampicillin injection 500mg, PFR_each_CMST"): 1,
+            get_item_code("Ampicillin injection 500mg, PFR_each_CMST"): 1000,
             # equipment used by surgeon, gloves and facemask
             get_item_code('Disposables gloves, powder free, 100 pieces per box'): 1,
             get_item_code('surgical face mask, disp., with metal nose piece_50_IDA'): 1,

From 6bdf8c6e221aa8bbe43885f3abccf693a94fc157 Mon Sep 17 00:00:00 2001
From: joehcollins <joseph.h.collins@ucl.ac.uk>
Date: Fri, 17 May 2024 10:00:25 +0100
Subject: [PATCH 074/230] alri.py consumable units plus fixed error in
 diarrhoea.py

---
 src/tlo/methods/alri.py      | 132 ++++++++++-------------------------
 src/tlo/methods/diarrhoea.py |   6 +-
 2 files changed, 38 insertions(+), 100 deletions(-)

diff --git a/src/tlo/methods/alri.py b/src/tlo/methods/alri.py
index 277726e0ff..9f5b5e015c 100644
--- a/src/tlo/methods/alri.py
+++ b/src/tlo/methods/alri.py
@@ -1014,139 +1014,85 @@ def look_up_consumables(self):
 
         get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
-        def get_dosage_for_age_in_months(age_in_whole_months: float, doses_by_age_in_months: Dict[int, float]):
-            """Returns the dose corresponding to age, using the lookup provided in `doses`. The format of `doses` is:
-             {<higher_age_boundary_of_age_group_in_months>: <dose>}."""
-
-            for upper_age_bound_in_months, _dose in sorted(doses_by_age_in_months.items()):
-                if age_in_whole_months < upper_age_bound_in_months:
-                    return _dose
-            return _dose
-
-        # # # # # # Dosages by age # # # # # #
+        # # # # # # Dosages by weight # # # # # #
+        # Assuming average weight of 0-5 is 12kg (abstraction). Doses sourced for WHO Pocket book of hospital care for
+        # children: Second edition 2014
 
         # Antibiotic therapy -------------------
-        # Antibiotics for non-severe pneumonia - oral amoxicillin for 5 days
+        # Antibiotics for non-severe pneumonia - oral amoxicillin for 5 days (40mg/kg BD - ((12*40)*2)*5 =4800mg)
         self.consumables_used_in_hsi['Amoxicillin_tablet_or_suspension_5days'] = {
-            get_item_code(item='Amoxycillin 250mg_1000_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {2: 0, 12: 0.006, 36: 0.012, np.inf: 0.018}
-                                                          ),
-            get_item_code(item='Amoxycillin 125mg/5ml suspension, PFR_0.025_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {2: 0, 12: 1, 36: 2, np.inf: 3}
-                                                          ),
-        }
+            get_item_code(item='Amoxycillin 250mg_1000_CMST'): 4800,
+            get_item_code(item='Amoxycillin 125mg/5ml suspension, PFR_0.025_CMST'): 192}  # 25mg/ml - 4800/25
 
-        # Antibiotics for non-severe pneumonia - oral amoxicillin for 3 days
+        # Antibiotics for non-severe pneumonia - oral amoxicillin for 3 days (40mg/kg BD - ((12*40)*2)*3 =2880mg)
         self.consumables_used_in_hsi['Amoxicillin_tablet_or_suspension_3days'] = {
-            get_item_code(item='Amoxycillin 250mg_1000_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {2: 0, 12: 0.01, 36: 0.02, np.inf: 0.03}
-                                                          ),
-            get_item_code(item='Amoxycillin 125mg/5ml suspension, PFR_0.025_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {2: 0, 12: 1, 36: 2, np.inf: 3}
-                                                          ),
-        }
+            get_item_code(item='Amoxycillin 250mg_1000_CMST'): 2880,
+            get_item_code(item='Amoxycillin 125mg/5ml suspension, PFR_0.025_CMST'): 115}  # 25mg/ml - 2880/25
 
-        # Antibiotics for non-severe pneumonia - oral amoxicillin for 7 days for young infants only
+        # Antibiotics for non-severe pneumonia - oral amoxicillin for 7 days for young infants only (40mg/kg BD -
+        # ((12*40)*2)*7 =6720mg)
         self.consumables_used_in_hsi['Amoxicillin_tablet_or_suspension_7days'] = {
-            get_item_code(item='Amoxycillin 250mg_1000_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {1: 0.004, 2: 0.006, np.inf: 0.01}
-                                                          ),
-            get_item_code(item='Amoxycillin 125mg/5ml suspension, PFR_0.025_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {1: 0.4, 2: 0.5, np.inf: 1}
-                                                          ),
-        }
+            get_item_code(item='Amoxycillin 250mg_1000_CMST'): 6720,
+            get_item_code(item='Amoxycillin 125mg/5ml suspension, PFR_0.025_CMST'): 269}  # 25mg/ml - 6720/25
 
         # Antibiotic therapy for severe pneumonia - ampicillin package
+        # Amp. dose - 50mg/KG QDS 5 days = (50*12)*4)*5 = 12_000mg
+        # Gent. dose -7.5mg/kg per day 5 days = (7.5*12)*5 = 450mg
         self.consumables_used_in_hsi['1st_line_IV_antibiotics'] = {
-            get_item_code(item='Ampicillin injection 500mg, PFR_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {1: 3.73, 2: 5.6, 4: 8, 12: 16, 36: 24, np.inf: 40}
-                                                          ),
-            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {1: 0.7, 2: 1.4, 4: 2.81, 12: 4.69, 36: 7.03, np.inf: 9.37}
-                                                          ),
+            get_item_code(item='Ampicillin injection 500mg, PFR_each_CMST'): 24,  # 500mg vial -12_000/500
+            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'): 6,  # 80mg/2ml = 450/8
             get_item_code(item='Cannula iv  (winged with injection pot) 16_each_CMST'): 1,
             get_item_code(item='Syringe, Autodisable SoloShot IX '): 1
         }
 
         # # Antibiotic therapy for severe pneumonia - benzylpenicillin package when ampicillin is not available
+        # Benpen dose - 50_000IU/KG QDS 5 days = (50_000*12)*4)*5 = 12_000_000IU = 8g (approx)
+        # Gent. dose -7.5mg/kg per day 5 days = (7.5*12)*5 = 450mg
         self.consumables_used_in_hsi['Benzylpenicillin_gentamicin_therapy_for_severe_pneumonia'] = {
-            get_item_code(item='Benzylpenicillin 3g (5MU), PFR_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {1: 2, 2: 5, 4: 8, 12: 15, 36: 24, np.inf: 34}
-                                                          ),
-            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {1: 0.7, 2: 1.4, 4: 2.81, 12: 4.69, 36: 7.03, np.inf: 9.37}
-                                                          ),
+            get_item_code(item='Benzylpenicillin 3g (5MU), PFR_each_CMST'): 8,
+            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'): 6,  # 80mg/2ml = 450/8
             get_item_code(item='Cannula iv  (winged with injection pot) 16_each_CMST'): 1,
             get_item_code(item='Syringe, Autodisable SoloShot IX '): 1
         }
 
         # Second line of antibiotics for severe pneumonia, if Staph not suspected
+        # Ceft. dose = 80mg/kg per day 5 days = (80*12)*5 = 4800mg
         self.consumables_used_in_hsi['Ceftriaxone_therapy_for_severe_pneumonia'] = {
-            get_item_code(item='Ceftriaxone 1g, PFR_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {4: 1.5, 12: 3, 36: 5, np.inf: 7}
-                                                          ),
+            get_item_code(item='Ceftriaxone 1g, PFR_each_CMST'): 1,  # smallest unit is 1g
             get_item_code(item='Cannula iv  (winged with injection pot) 16_each_CMST'): 1,
             get_item_code(item='Syringe, Autodisable SoloShot IX '): 1
         }
 
         # Second line of antibiotics for severe pneumonia, if Staph is suspected
+        # Flucox. dose = 50mg/kg QDS 7 days = ((50*12)*4)*7 = 16_800mg
+        # Oral flucox dose. = same
         self.consumables_used_in_hsi['2nd_line_Antibiotic_therapy_for_severe_staph_pneumonia'] = {
-            get_item_code(item='Flucloxacillin 250mg, vial, PFR_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {2: 21, 4: 22.4, 12: 37.3, 36: 67.2, 60: 93.3, np.inf: 140}
-                                                          ),
-            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {4: 2.81, 12: 4.69, 36: 7.03, 60: 9.37, np.inf: 13.6}
-                                                          ),
+            get_item_code(item='Flucloxacillin 250mg, vial, PFR_each_CMST'): 16_800,
+            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'): 6,  # 80mg/2ml = 450/8
             get_item_code(item='Cannula iv  (winged with injection pot) 16_each_CMST'): 1,
             get_item_code(item='Syringe, Autodisable SoloShot IX '): 1,
-            get_item_code(item='Flucloxacillin 250mg_100_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {4: 0.42, 36: 0.84, 60: 1.68, np.inf: 1.68}
-                                                          ),
-        }
+            get_item_code(item='Flucloxacillin 250mg_100_CMST'): 16_800}
 
         # First dose of antibiotic before referral -------------------
-
+        # single dose of 7.5mg gent and 50mg/g amp. given
         # Referral process in iCCM for severe pneumonia, and at health centres for HIV exposed/infected
         self.consumables_used_in_hsi['First_dose_oral_amoxicillin_for_referral'] = {
-            get_item_code(item='Amoxycillin 250mg_1000_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {12: 0.001, 36: 0.002, np.inf: 0.003}
-                                                          ),
+            get_item_code(item='Amoxycillin 250mg_1000_CMST'): 250,
         }
 
         # Referral process at health centres for severe cases
         self.consumables_used_in_hsi['First_dose_IM_antibiotics_for_referral'] = {
-            get_item_code(item='Ampicillin injection 500mg, PFR_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {4: 0.4, 12: 0.8, 36: 1.4, np.inf: 2}
-                                                          ),
-            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {4: 0.56, 12: 0.94, 36: 1.41, np.inf: 1.87}
-                                                          ),
+            get_item_code(item='Ampicillin injection 500mg, PFR_each_CMST'): 2,  # 2 x 500mg vial
+            get_item_code(item='Gentamicin Sulphate 40mg/ml, 2ml_each_CMST'): 2,  # assuming single dose at referral
             get_item_code(item='Cannula iv  (winged with injection pot) 16_each_CMST'): 1,
             get_item_code(item='Syringe, Autodisable SoloShot IX '): 1
         }
 
         # Oxygen, pulse oximetry and x-ray -------------------
 
-        # Oxygen for hypoxaemia
+        # Oxygen for hypoxaemia - 5/l per min (Approx) for 3 days ((24*60)*5)*3
         self.consumables_used_in_hsi['Oxygen_Therapy'] = {
-            get_item_code(item='Oxygen, 1000 liters, primarily with oxygen cylinders'): 1,
+            get_item_code(item='Oxygen, 1000 liters, primarily with oxygen cylinders'): 21_600,
         }
 
         # Pulse oximetry
@@ -1162,10 +1108,7 @@ def get_dosage_for_age_in_months(age_in_whole_months: float, doses_by_age_in_mon
         # Optional consumables -------------------
         # Paracetamol
         self.consumables_used_in_hsi['Paracetamol_tablet'] = {
-            get_item_code(item='Paracetamol, tablet, 100 mg'):
-                lambda _age: get_dosage_for_age_in_months(int(_age * 12.0),
-                                                          {36: 12, np.inf: 18}
-                                                          ),
+            get_item_code(item='Paracetamol, tablet, 100 mg'): 240,  # 20mg/kg
         }
 
         # Maintenance of fluids via nasograstric tube
@@ -1178,11 +1121,6 @@ def get_dosage_for_age_in_months(age_in_whole_months: float, doses_by_age_in_mon
             get_item_code(item='Salbutamol sulphate 1mg/ml, 5ml_each_CMST'): 2
         }
 
-        # Bronchodilator - oral
-        self.consumables_used_in_hsi['Oral_Brochodilator'] = {
-            get_item_code(item='Salbutamol, syrup, 2 mg/5 ml'): 1,
-            get_item_code(item='Salbutamol, tablet, 4 mg'): 1
-        }
 
     def end_episode(self, person_id):
         """End the episode infection for a person (i.e. reset all properties to show no current infection or
diff --git a/src/tlo/methods/diarrhoea.py b/src/tlo/methods/diarrhoea.py
index e3998c9ba0..1877e0727e 100644
--- a/src/tlo/methods/diarrhoea.py
+++ b/src/tlo/methods/diarrhoea.py
@@ -664,10 +664,10 @@ def look_up_consumables(self):
 
         # For weight based treatment for children under five, we've averaged the median weight for each for years
         # 0-5 as 12kg.
-        # So for cipro/para - 10mg/kg 12 hrly for 7 days = ((10*12)*2) * 7 (same dose reccomended)
+        # So for cipro/para - 10mg/kg 12 hrly for 7 days = ((10*12)*2) * 7 (same dose in mg reccomended)
         self.consumables_used_in_hsi['Antibiotics_for_Dysentery'] = \
             {ic('Ciprofloxacin 250mg_100_CMST'): 1680,
-             ic("Paracetamol syrup 120mg/5ml_0.0119047619047619_CMST"): 1680}
+             ic("Paracetamol syrup 120mg/5ml_0.0119047619047619_CMST"): 70}  # 24mg/ml so 1680/24 = 70ml per dose
 
     def do_treatment(self, person_id, hsi_event):
         """Method called by the HSI that enacts decisions about a treatment and its effect for diarrhoea caused by a
@@ -970,7 +970,7 @@ def do_at_generic_first_appt(
             self.rng.rand() < self.parameters["prob_hospitalization_on_danger_signs"]
         )
         hsi_event_class = (
-            HSI_Diarrhoea_Treatment_Inpatient if is_inpatient else 
+            HSI_Diarrhoea_Treatment_Inpatient if is_inpatient else
             HSI_Diarrhoea_Treatment_Outpatient
         )
         event = hsi_event_class(person_id=patient_id, module=self)

From e6b3005cc0639e0873b9933cd804a891b69fd798 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Mon, 20 May 2024 11:25:07 +0100
Subject: [PATCH 075/230] drop level = 5 staff from the calculation of health
 worker salaries

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 src/scripts/costing/costing.py              | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 06ee70a161..6090f374b5 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf0d2818d402d3ccece08ffaf3e3ac98f4fb9f18db3fa5d53e3932f98bca2cd5
-size 4038180
+oid sha256:7302da0a48df4b066448666fcc67c4d71a7e8b520a3c67bd53de37ec839853c2
+size 4039674
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index e4a7ecf7f5..77ea522f05 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -22,7 +22,8 @@
     make_age_grp_types,
     summarize,
     create_pickles_locally,
-    parse_log_file
+    parse_log_file,
+    unflatten_flattened_multi_index_in_logging
 )
 
 # define a timestamp for script outputs
@@ -118,6 +119,8 @@ def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Ser
 
 # Calculate various components of HR cost
 # 1.1 Salary cost for modelled health workforce (Staff count X Annual salary)
+current_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type.reset_index()
+current_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type.drop(current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type.Facility_Level == '5'].index)
 salary_for_all_staff = pd.merge(current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
 salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']

From 7625c77b49a41eda461b87345b01a0d71d34a599 Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Mon, 20 May 2024 19:00:23 +0100
Subject: [PATCH 076/230] co: get item codes of consumables from item names and
 define number of units per case in the module (using new chosen units)

---
 src/tlo/methods/contraception.py | 89 +++++++++++++++++++++++++++-----
 1 file changed, 77 insertions(+), 12 deletions(-)

diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index 9ddbc6aac9..4f51c80293 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -645,24 +645,89 @@ def select_contraceptive_following_birth(self, mother_id, mother_age):
         self.schedule_batch_of_contraceptive_changes(ids=[mother_id], old=['not_using'], new=[new_contraceptive])
 
     def get_item_code_for_each_contraceptive(self):
-        """Get the item_code for each contraceptive and for contraceptive initiation."""
+        """Get the item_code and numbers of units per case for each contraceptive and for contraceptive initiation."""
         # TODO: update with optional items (currently all considered essential)
 
-        get_items_from_pkg = self.sim.modules['HealthSystem'].get_item_codes_from_package_name
+        # ### Get item codes and number of units per case from package name
+        # get_items_from_pkg = self.sim.modules['HealthSystem'].get_item_codes_from_package_name
+
+        # _cons_codes = dict()
+        # # items for each method that requires an HSI to switch to
+        # _cons_codes['pill'] = get_items_from_pkg('Pill')
+        # _cons_codes['male_condom'] = get_items_from_pkg('Male condom')
+        # _cons_codes['other_modern'] = get_items_from_pkg('Female Condom')
+        # # NB. The consumable female condom is used for the contraceptive state of "other_modern method"
+        # _cons_codes['IUD'] = get_items_from_pkg('IUD')
+        # _cons_codes['injections'] = get_items_from_pkg('Injectable')
+        # _cons_codes['implant'] = get_items_from_pkg('Implant')
+        # _cons_codes['female_sterilization'] = get_items_from_pkg('Female sterilization')
+        # assert set(_cons_codes.keys()) == set(self.states_that_may_require_HSI_to_switch_to)
+        # # items used when initiating a modern reliable method after not using or switching from non-reliable method
+        # _cons_codes['co_initiation'] = get_items_from_pkg('Contraception initiation')
+
+        # ### Get item codes from item names and define number of units per case here
+        get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
         _cons_codes = dict()
-        # items for each method that requires an HSI to switch to
-        _cons_codes['pill'] = get_items_from_pkg('Pill')
-        _cons_codes['male_condom'] = get_items_from_pkg('Male condom')
-        _cons_codes['other_modern'] = get_items_from_pkg('Female Condom')
-        # NB. The consumable female condom is used for the contraceptive state of "other_modern method"
-        _cons_codes['IUD'] = get_items_from_pkg('IUD')
-        _cons_codes['injections'] = get_items_from_pkg('Injectable')
-        _cons_codes['implant'] = get_items_from_pkg('Implant')
-        _cons_codes['female_sterilization'] = get_items_from_pkg('Female sterilization')
+        # # items for each method that requires an HSI to switch to
+        # TODO: @TimH Pills are alternatives, we weight their use by the probabilities (as approximation of use in 80:20
+        #  cases, for each case 0.8 and 0.2 proportion of units is used). Is that okay, or do we want to choose for each
+        #  case only one type of pills? Total usage should be approximately same.
+        # TODO: @Sakshi what are the numbers of pills in monthly packets for both following items?
+        _cons_codes['pill'] =\
+            {get_item_code("Levonorgestrel 0.0375 mg, cycle"): 28*3.75*0.2,  # progesterone-only pills used in 20% cases
+             get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"): 21*3.75*0.8
+             # combined pills used in other 80% cases
+             }
+        _cons_codes['male_condom'] =\
+            {get_item_code("Condom, male"): 30}
+        _cons_codes['other_modern'] =\
+            {get_item_code("Female Condom_Each_CMST"): 30}
+        _cons_codes['IUD'] =\
+            {get_item_code("Glove disposable powdered latex medium_100_CMST"): 2,
+             get_item_code("IUD, Copper T-380A"): 1}
+        # TODO: originally used "Medroxyprogesterone acetate injection 150mg/mL, 1mL vial with 2ml syringe with 22g 0.7
+        #  X 25mm needle_each_CMST", which is not within the RF_Costing anymore, does it mean we need to add a syringe
+        #  and a needle?
+        _cons_codes['injections'] = \
+            {get_item_code("Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly"): 1,
+             get_item_code("Glove disposable powdered latex medium_100_CMST"): 1,
+             get_item_code("Water for injection, 10ml_Each_CMST"): 1,
+             get_item_code("Povidone iodine, solution, 10 %, 5 ml per injection"): 5,
+             get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 1}
+        # TODO: trocar may be reusable (we used 0.1 units, indicating 10 uses on average) - need to find out for what
+        #  kind of trocar (reusable or not) we have the cost
+        # Jadelle & Implanon are alternatives, we weight their use by the probabilities (as approximation of use in
+        # 50:50 cases, for each case 0.5 and 0.5 proportion of units is used)
+        # TODO: @TimH Is that okay, or do we want to choose for each case only one type of implant? Total usage should
+        #  be approximately same.
+        # TODO: The chosen unit for Jadelle is '1 implant', does it cover both rods?
+        _cons_codes['implant'] =\
+            {get_item_code("Glove disposable powdered latex medium_100_CMST"): 3,
+             get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 2,
+             get_item_code("Povidone iodine, solution, 10 %, 5 ml per injection"): 1*5,  # unit: 1 ml
+             get_item_code("Syringe, needle + swab"): 2,
+             get_item_code("Trocar"): 1,
+             get_item_code("Needle suture intestinal round bodied ½ circle trocar_6_CMST"): 1,
+             get_item_code("Jadelle (implant), box of 2_CMST"): 1*0.5,  # implant used in 50% cases
+             get_item_code("Implanon (Etonogestrel 68 mg)"): 1*0.5,  # implant used in other 50% cases
+             get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 1}
+        _cons_codes['female_sterilization'] =\
+            {get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 1,
+             get_item_code("Atropine sulphate  600 micrograms/ml, 1ml_each_CMST"): 0.5,  # used only in 50% cases
+             get_item_code("Diazepam, injection, 5 mg/ml, in 2 ml ampoule"): 1,
+             get_item_code("Syringe, autodestruct, 5ml, disposable, hypoluer with 21g needle_each_CMST"): 3,
+             get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 2,
+             get_item_code("Needle, suture, assorted sizes, round body"): 3,
+             get_item_code("Suture, catgut, chromic, 0, 150 cm"): 3,
+             get_item_code("Tape, adhesive, 2.5 cm wide, zinc oxide, 5 m roll"): 125,  # unit: 1 cm long (2.5 cm wide)
+             get_item_code("Glove surgeon's size 7 sterile_2_CMST"): 2,
+             get_item_code("Paracetamol, tablet, 500 mg"): 8*500,  # unit: 1 mg
+             get_item_code("Povidone iodine, solution, 10 %, 5 ml per injection"): 2*5,  # unit: 1 ml
+             get_item_code("Cotton wool, 500g_1_CMST"): 100}  # unit: 1 g
         assert set(_cons_codes.keys()) == set(self.states_that_may_require_HSI_to_switch_to)
         # items used when initiating a modern reliable method after not using or switching from non-reliable method
-        _cons_codes['co_initiation'] = get_items_from_pkg('Contraception initiation')
+        _cons_codes['co_initiation'] = {get_item_code('Pregnancy slide test kit_100_CMST'): 1}
 
         return _cons_codes
 

From dc0b8ac8fca9f86a850fc9d2a264da21a481f94a Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 21 May 2024 17:35:51 +0100
Subject: [PATCH 077/230] Add cystoscope, endoscope and prostate specific
 antigen test - add three consumables to
 `ResourceFile_Consumables_Items_and_Packages.csv` using the
 `generate_consumables_item_codes_and_packages.py` script - This is a
 replication of commit ff4d072 from PR #1341

---
 ...rceFile_Consumables_Items_and_Packages.csv |  4 +--
 ...ate_consumables_item_codes_and_packages.py | 35 +++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
index e5e17d740c..0ee403abb0 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a188114a407bb28dc663a41a2854cb9d75c207dbb0809ff3ce8c1f88eca378aa
-size 249189
+oid sha256:4106c2e3ae068d40b115857885b673bec3e1114be5183c0a4ae0366560e2a5c9
+size 249391
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
index 3e5b742a3e..7ca04f763f 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
@@ -342,6 +342,41 @@ def add_record(df: pd.DataFrame, record: Dict):
     },
 )
 
+cons = add_record(
+    cons,
+    {
+    'Intervention_Cat': "Added by SM (Recommended by EJ)",
+    'Intervention_Pkg': "Misc",
+    'Intervention_Pkg_Code': -99,
+    'Items': "Cystoscope",
+    'Item_Code': 285,
+    'Expected_Units_Per_Case': 1.0,
+    'Unit_Cost': np.nan},
+)
+
+cons = add_record(
+    cons,{
+    'Intervention_Cat': "Added by SM (Recommended by EJ)",
+    'Intervention_Pkg': "Misc",
+    'Intervention_Pkg_Code': -99,
+    'Items': "Endoscope",
+    'Item_Code': 280,
+    'Expected_Units_Per_Case': 1.0,
+    'Unit_Cost': np.nan},
+)
+
+cons = add_record(
+    cons,{
+    'Intervention_Cat': "Added by SM (Recommended by EJ)",
+    'Intervention_Pkg': "Misc",
+    'Intervention_Pkg_Code': -99,
+    'Items': "Prostate specific antigen test",
+    'Item_Code': 281,
+    'Expected_Units_Per_Case': 1.0,
+    'Unit_Cost': np.nan},
+)
+
+
 # --------------
 # --------------
 # --------------

From 4600b19c592c500941c6d4e6fe93c9829aafd9bb Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 5 Mar 2024 15:38:36 +0000
Subject: [PATCH 078/230] co: alternative consumables chosen by probs

---
 src/tlo/methods/contraception.py | 79 +++++++++++++++++++++++---------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index 4f51c80293..5e3f681b72 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -155,6 +155,7 @@ def __init__(self, name=None, resourcefilepath=None, use_healthsystem=True, run_
 
         self.processed_params = dict()  # (Will store the processed data for rates/probabilities of outcomes).
         self.cons_codes = dict()  # (Will store the consumables codes for use in the HSI)
+        self.cons_alternatives_probs = dict()  # (Will store the probabilities of use for consumable alternatives)
         self.rng2 = None  # (Will be a second random number generator, used for things to do with scheduling HSI)
 
         self._women_ids_sterilized_below30 = set()  # The ids of women who had female sterilization initiated when they
@@ -257,9 +258,10 @@ def initialise_simulation(self, sim):
         # Schedule first occurrences of Contraception Poll to occur at the beginning of the simulation
         sim.schedule_event(ContraceptionPoll(self, run_update_contraceptive=self.run_update_contraceptive), sim.date)
 
-        # Retrieve the consumables codes for the consumables used
+        # Retrieve the consumables codes for the consumables used and probabilities for alternatives
         if self.use_healthsystem:
-            self.cons_codes = self.get_item_code_for_each_contraceptive()
+            self.cons_codes, self.cons_alternatives_probs =\
+                self.get_item_codes_for_each_contraceptive_and_probs_for_cons_alternatives()
 
         # Create second random number generator
         self.rng2 = np.random.RandomState(self.rng.randint(2 ** 31 - 1))
@@ -644,11 +646,13 @@ def select_contraceptive_following_birth(self, mother_id, mother_age):
         # Do the change in contraceptive
         self.schedule_batch_of_contraceptive_changes(ids=[mother_id], old=['not_using'], new=[new_contraceptive])
 
-    def get_item_code_for_each_contraceptive(self):
-        """Get the item_code and numbers of units per case for each contraceptive and for contraceptive initiation."""
+    def get_item_codes_for_each_contraceptive_and_probs_for_cons_alternatives(self):
+        """Get the item_code and numbers of units per case for each contraceptive and for contraceptive initiation.
+        Save the probs for consumable alternatives."""
         # TODO: update with optional items (currently all considered essential)
 
         # ### Get item codes and number of units per case from package name
+        # (note: this version does not deal with consumable alternatives)
         # get_items_from_pkg = self.sim.modules['HealthSystem'].get_item_codes_from_package_name
 
         # _cons_codes = dict()
@@ -669,16 +673,19 @@ def get_item_code_for_each_contraceptive(self):
         get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
         _cons_codes = dict()
+        _cons_alternatives_probs = dict()
         # # items for each method that requires an HSI to switch to
-        # TODO: @TimH Pills are alternatives, we weight their use by the probabilities (as approximation of use in 80:20
-        #  cases, for each case 0.8 and 0.2 proportion of units is used). Is that okay, or do we want to choose for each
-        #  case only one type of pills? Total usage should be approximately same.
-        # TODO: @Sakshi what are the numbers of pills in monthly packets for both following items?
+        # TODO: to find from Emi - What are the numbers of pills in monthly packets for both following items?
         _cons_codes['pill'] =\
-            {get_item_code("Levonorgestrel 0.0375 mg, cycle"): 28*3.75*0.2,  # progesterone-only pills used in 20% cases
-             get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"): 21*3.75*0.8
-             # combined pills used in other 80% cases
+            {get_item_code("Levonorgestrel 0.0375 mg, cycle"): 28*3.75,
+             # (alternative) progesterone-only pills used in other 20% cases (see _cons_alternatives_probs)
+             get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"): 21*3.75
+             # combined pills used in 80% cases (see _cons_alternatives_probs)
              }
+        # Pills are alternatives, type chosen according to probabilities when administrated.
+        _cons_alternatives_probs['pill'] =\
+            {get_item_code("Levonorgestrel 0.0375 mg, cycle"): 0.2,
+             get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"): 0.8}
         _cons_codes['male_condom'] =\
             {get_item_code("Condom, male"): 30}
         _cons_codes['other_modern'] =\
@@ -697,11 +704,7 @@ def get_item_code_for_each_contraceptive(self):
              get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 1}
         # TODO: trocar may be reusable (we used 0.1 units, indicating 10 uses on average) - need to find out for what
         #  kind of trocar (reusable or not) we have the cost
-        # Jadelle & Implanon are alternatives, we weight their use by the probabilities (as approximation of use in
-        # 50:50 cases, for each case 0.5 and 0.5 proportion of units is used)
-        # TODO: @TimH Is that okay, or do we want to choose for each case only one type of implant? Total usage should
-        #  be approximately same.
-        # TODO: The chosen unit for Jadelle is '1 implant', does it cover both rods?
+        # TODO: @Sakshi The chosen unit for Jadelle is '1 implant', does it cover both rods?
         _cons_codes['implant'] =\
             {get_item_code("Glove disposable powdered latex medium_100_CMST"): 3,
              get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 2,
@@ -709,12 +712,18 @@ def get_item_code_for_each_contraceptive(self):
              get_item_code("Syringe, needle + swab"): 2,
              get_item_code("Trocar"): 1,
              get_item_code("Needle suture intestinal round bodied ½ circle trocar_6_CMST"): 1,
-             get_item_code("Jadelle (implant), box of 2_CMST"): 1*0.5,  # implant used in 50% cases
-             get_item_code("Implanon (Etonogestrel 68 mg)"): 1*0.5,  # implant used in other 50% cases
+             get_item_code("Jadelle (implant), box of 2_CMST"): 1,  # implant used in 50% cases
+             get_item_code("Implanon (Etonogestrel 68 mg)"): 1,  # alternative implant used in other 50% cases
+             # (see _cons_alternatives_probs)
              get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 1}
+        # Implants are alternatives, type chosen according to probabilities when administrated.
+        _cons_alternatives_probs['implant'] = \
+            {get_item_code("Jadelle (implant), box of 2_CMST"): 0.5,
+             get_item_code("Implanon (Etonogestrel 68 mg)"): 0.5}
         _cons_codes['female_sterilization'] =\
             {get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 1,
-             get_item_code("Atropine sulphate  600 micrograms/ml, 1ml_each_CMST"): 0.5,  # used only in 50% cases
+             get_item_code("Atropine sulphate  600 micrograms/ml, 1ml_each_CMST"): 1,  # used only in 50% cases
+             # (see _cons_alternatives_probs)
              get_item_code("Diazepam, injection, 5 mg/ml, in 2 ml ampoule"): 1,
              get_item_code("Syringe, autodestruct, 5ml, disposable, hypoluer with 21g needle_each_CMST"): 3,
              get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 2,
@@ -725,11 +734,23 @@ def get_item_code_for_each_contraceptive(self):
              get_item_code("Paracetamol, tablet, 500 mg"): 8*500,  # unit: 1 mg
              get_item_code("Povidone iodine, solution, 10 %, 5 ml per injection"): 2*5,  # unit: 1 ml
              get_item_code("Cotton wool, 500g_1_CMST"): 100}  # unit: 1 g
+        # Atropine is used only in some cases
+        _cons_alternatives_probs['female_sterilization'] = \
+            {-99: 0.5,  # no alternative used
+             get_item_code("Atropine sulphate  600 micrograms/ml, 1ml_each_CMST"): 0.5,
+             }
+
+        # Order the item codes for each method
+        for method in _cons_codes:
+            _cons_codes[method] = dict(sorted(_cons_codes[method].items()))
+            if method in _cons_alternatives_probs:
+                _cons_alternatives_probs[method] = dict(sorted(_cons_alternatives_probs[method].items()))
+
         assert set(_cons_codes.keys()) == set(self.states_that_may_require_HSI_to_switch_to)
         # items used when initiating a modern reliable method after not using or switching from non-reliable method
         _cons_codes['co_initiation'] = {get_item_code('Pregnancy slide test kit_100_CMST'): 1}
 
-        return _cons_codes
+        return _cons_codes, _cons_alternatives_probs
 
     def schedule_batch_of_contraceptive_changes(self, ids, old, new):
         """Enact the change in contraception, either through editing properties instantaneously or by scheduling HSI.
@@ -1211,7 +1232,23 @@ def apply(self, person_id, squeeze_factor):
 
         # Determine essential and optional items
         # TODO: we don't distinguish essential X optional for contraception methods yet, will need to update once we do
-        items_essential = self.module.cons_codes[self.new_contraceptive]
+        if self.new_contraceptive in self.module.cons_alternatives_probs:
+            # administrate one of the alternative consumables (item code -99 means, no alternative used)
+            alternatives = list(self.module.cons_alternatives_probs[self.new_contraceptive].keys())
+            probs = list(self.module.cons_alternatives_probs[self.new_contraceptive].values())
+            alternative_administrated = self.module.rng.choice(alternatives, p=probs)
+            items_essential = {key: self.module.cons_codes[self.new_contraceptive][key]
+                               for key in [alternative_administrated] if key != -99}
+
+            if self.module.cons_codes[self.new_contraceptive].keys() - alternatives:
+                # administrate all other consumables
+                items_essential.update({key: value
+                                        for key, value in self.module.cons_codes[self.new_contraceptive].items()
+                                        if key not in alternatives})
+
+        else:
+            items_essential = self.module.cons_codes[self.new_contraceptive]
+
         items_optional = {}
         # Record use of consumables and default the person to "not_using" if the consumable is not available.
         # If initiating use of a modern contraceptive method except condoms (after not using any or using non-modern

From 48401d82dc056bf3f4b5aa9016f6e7bbaa7ea0cc Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Wed, 29 May 2024 14:46:26 +0100
Subject: [PATCH 079/230] co: 21 tablets per packet for both types of pills
 (informed by Emi)

---
 src/tlo/methods/contraception.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index 5e3f681b72..a86f395496 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -675,9 +675,8 @@ def get_item_codes_for_each_contraceptive_and_probs_for_cons_alternatives(self):
         _cons_codes = dict()
         _cons_alternatives_probs = dict()
         # # items for each method that requires an HSI to switch to
-        # TODO: to find from Emi - What are the numbers of pills in monthly packets for both following items?
         _cons_codes['pill'] =\
-            {get_item_code("Levonorgestrel 0.0375 mg, cycle"): 28*3.75,
+            {get_item_code("Levonorgestrel 0.0375 mg, cycle"): 21*3.75,
              # (alternative) progesterone-only pills used in other 20% cases (see _cons_alternatives_probs)
              get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"): 21*3.75
              # combined pills used in 80% cases (see _cons_alternatives_probs)

From 534b9d8206b2fb40606c62f5cff8a8652bef7b25 Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Wed, 29 May 2024 14:58:58 +0100
Subject: [PATCH 080/230] co: update TODOs

---
 src/tlo/methods/contraception.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index a86f395496..bc12d03fd6 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -701,9 +701,7 @@ def get_item_codes_for_each_contraceptive_and_probs_for_cons_alternatives(self):
              get_item_code("Water for injection, 10ml_Each_CMST"): 1,
              get_item_code("Povidone iodine, solution, 10 %, 5 ml per injection"): 5,
              get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 1}
-        # TODO: trocar may be reusable (we used 0.1 units, indicating 10 uses on average) - need to find out for what
-        #  kind of trocar (reusable or not) we have the cost
-        # TODO: @Sakshi The chosen unit for Jadelle is '1 implant', does it cover both rods?
+        # TODO: @Sakshi - update the unit to 1 use
         _cons_codes['implant'] =\
             {get_item_code("Glove disposable powdered latex medium_100_CMST"): 3,
              get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 2,

From 275dc454e786f3e89300bf2341193d0e032366fd Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Thu, 30 May 2024 00:03:02 +0100
Subject: [PATCH 081/230] cancer_cons: consumables updated and required units
 per case defined (as nmbs of chosen units)

---
 src/tlo/methods/cancer_consumables.py | 108 +++++++++++++-------------
 1 file changed, 54 insertions(+), 54 deletions(-)

diff --git a/src/tlo/methods/cancer_consumables.py b/src/tlo/methods/cancer_consumables.py
index 16a6f94f65..b72f86b7b9 100644
--- a/src/tlo/methods/cancer_consumables.py
+++ b/src/tlo/methods/cancer_consumables.py
@@ -6,91 +6,91 @@
 from tlo import Module
 
 
-def get_consumable_item_codes_cancers(cancer_module: Module) -> Dict[str, int]:
+def get_consumable_item_codes_cancers(self, cancer_module: Module) -> Dict[str, int]:
     """
     Returns dict the relevant item_codes for the consumables across the five cancer modules. This is intended to prevent
     repetition within module code.
     """
 
-    def get_list_of_items(item_list):
-        item_lookup_fn = cancer_module.sim.modules['HealthSystem'].get_item_code_from_item_name
-        return list(map(item_lookup_fn, item_list))
+    get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name
 
     cons_dict = dict()
 
     # Add items that are needed for all cancer modules
-    # todo: @Eva - add syringes, dressing
-    cons_dict['screening_biopsy_core'] = get_list_of_items(['Biopsy needle'])
+    cons_dict['screening_biopsy_core'] = \
+        {get_item_code("Biopsy needle"): 1}
 
     cons_dict['screening_biopsy_optional'] = \
-        get_list_of_items(['Specimen container',
-                           'Lidocaine, injection, 1 % in 20 ml vial',
-                           'Gauze, absorbent 90cm x 40m_each_CMST',
-                           'Disposables gloves, powder free, 100 pieces per box'])
+        {get_item_code("Specimen container"): 1,
+         get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 1,
+         get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 30,
+         get_item_code("Disposables gloves, powder free, 100 pieces per box"): 1,
+         get_item_code("Syringe, needle + swab"): 1}
 
     cons_dict['treatment_surgery_core'] = \
-        get_list_of_items(['Halothane (fluothane)_250ml_CMST',
-                           'Scalpel blade size 22 (individually wrapped)_100_CMST'])
+        {get_item_code("Halothane (fluothane)_250ml_CMST"): 100,
+         get_item_code("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1}
 
     cons_dict['treatment_surgery_optional'] = \
-        get_list_of_items(['Sodium chloride, injectable solution, 0,9 %, 500 ml',
-                           'Paracetamol, tablet, 500 mg',
-                           'Pethidine, 50 mg/ml, 2 ml ampoule',
-                           'Suture pack',
-                           'Gauze, absorbent 90cm x 40m_each_CMST',
-                           'Cannula iv  (winged with injection pot) 18_each_CMST'])
+        {get_item_code("Sodium chloride, injectable solution, 0,9 %, 500 ml"): 2000,
+         get_item_code("Paracetamol, tablet, 500 mg"): 8000,
+         get_item_code("Pethidine, 50 mg/ml, 2 ml ampoule"): 6,
+         get_item_code("Suture pack"): 1,
+         get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 30,
+         get_item_code("Cannula iv  (winged with injection pot) 18_each_CMST"): 1}
 
     cons_dict['palliation'] = \
-        get_list_of_items(['morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA',
-                           'Diazepam, injection, 5 mg/ml, in 2 ml ampoule',
-                           # N.B. This is not an exhaustive list of drugs required for palliation
-                           ])
+        {get_item_code("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 1,
+         get_item_code("Diazepam, injection, 5 mg/ml, in 2 ml ampoule"): 3,
+         get_item_code("Syringe, needle + swab"): 4}
+    # N.B. This is not an exhaustive list of drugs required for palliation
 
-    cons_dict['iv_drug_cons'] = \
-        get_list_of_items(['Cannula iv  (winged with injection pot) 18_each_CMST',
-                           'Giving set iv administration + needle 15 drops/ml_each_CMST',
-                           'Disposables gloves, powder free, 100 pieces per box'
-                           ])
-
-    # Add items that are specific to each cancer module
-    if 'BreastCancer' == cancer_module.name:
-
-        # TODO: chemotharpy protocols??: TAC(Taxotere, Adriamycin, and Cyclophosphamide), AC (anthracycline and
-        #  cyclophosphamide) +/-Taxane, TC (Taxotere and cyclophosphamide), CMF (cyclophosphamide, methotrexate,
-        #  and fluorouracil), FEC-75 (5-Fluorouracil, Epirubicin, Cyclophosphamide). HER 2 +: Add Trastuzumab
+    cons_dict['treatment_chemotherapy_core'] = \
+        {get_item_code("Cyclophosphamide, 1 g"): 16800}
 
-        # only chemotherapy i consumable list which is also in suggested protocol is cyclo
-        cons_dict['treatment_chemotherapy'] = get_list_of_items(['Cyclophosphamide, 1 g'])
+    cons_dict['iv_drug_cons'] = \
+        {get_item_code("Cannula iv  (winged with injection pot) 18_each_CMST"): 1,
+         get_item_code("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1,
+         get_item_code("Disposables gloves, powder free, 100 pieces per box"): 1,
+         get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_FF010800_CMST"): 84}
 
-    elif 'ProstateCancer' == cancer_module.name:
+    # Add items that are specific to a particular cancer module
+    if 'ProstateCancer' == cancer_module.name:
 
-        # TODO: Prostate specific antigen test is listed in ResourceFile_Consumables_availability_and_usage but not
-        #  ResourceFile_Consumables_Items_and_Package
-        # cons_dict['screening_psa_test_core'] = get_list_of_items(['Prostate specific antigen test'])
+        # TODO: @Sakshi the script to create RF_Consumables_Items_and_Pkgs needs to be re-run
+        cons_dict['screening_psa_test_core'] = \
+            {get_item_code("Prostate specific antigen test"): 1}
 
         cons_dict['screening_psa_test_optional'] = \
-            get_list_of_items(['Blood collecting tube, 5 ml',
-                               'Disposables gloves, powder free, 100 pieces per box'])
+            {get_item_code("Blood collecting tube, 5 ml"): 1,
+             get_item_code("Disposables gloves, powder free, 100 pieces per box"): 1,
+             get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_FF010800_CMST"): 1}
 
     elif 'BladderCancer' == cancer_module.name:
         # Note: bladder cancer is not in the malawi STG 2023 therefore no details on chemotherapy
 
-        # TODO: cytoscope is listed in ResourceFile_Consumables_availability_and_usage but not
-        #  ResourceFile_Consumables_Items_and_Packages
-        # cons_dict['screening_cystoscopy_core'] = get_list_of_items(['Cytoscope'])
+        # TODO: @Sakshi the script to create RF_Consumables_Items_and_Pkgs needs to be re-run
+        cons_dict['screening_cystoscopy_core'] = \
+            {get_item_code("Cystoscope"): 1}
 
-        cons_dict['screening_cystoscope_optional'] = get_list_of_items(['Specimen container'])
+        cons_dict['screening_cystoscope_optional'] = \
+            {get_item_code("Specimen container"): 1,
+             get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 1,
+             get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 30,
+             get_item_code("Disposables gloves, powder free, 100 pieces per box"): 1,
+             get_item_code("Syringe, needle + swab"): 1}
 
     elif 'OesophagealCancer' == cancer_module.name:
 
-        # TODO: endoscope is listed in ResourceFile_Consumables_availability_and_usage but not
-        #  ResourceFile_Consumables_Items_and_Packages
-        # cons_dict['screening_endoscope_core'] = get_list_of_items(['Endoscope'])
-
-        cons_dict['screening_endoscope_optional'] =\
-            get_list_of_items(['Specimen container',
-                               'Gauze, absorbent 90cm x 40m_each_CMST'])
+        # TODO: @Sakshi the script to create RF_Consumables_Items_and_Pkgs needs to be re-run
+        cons_dict['screening_endoscope_core'] = \
+            {get_item_code("Endoscope"): 1}
 
-        cons_dict['treatment_chemotherapy'] = get_list_of_items(['Cisplatin 50mg Injection'])
+        cons_dict['screening_endoscope_optional'] = \
+            {get_item_code("Specimen container"): 1,
+             get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 30,
+             get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 1,
+             get_item_code("Disposables gloves, powder free, 100 pieces per box"): 1,
+             get_item_code("Syringe, needle + swab"): 1}
 
     return cons_dict

From 5d2c8138c4034d8be1f85370a14d15aa5c221033 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 30 May 2024 16:21:04 +0100
Subject: [PATCH 082/230] change cost of contraception consumables - Item_Code
 12 Jadelle - costs are expressed in terms of "1 implant (2 rods)" -
 previously cost multiplied by 2 - Item_Code 75 - costs are expressed in terms
 of "1 swab (10cm X 10cm)" - previous cost divided by 100

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 6090f374b5..99e23d0e4e 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7302da0a48df4b066448666fcc67c4d71a7e8b520a3c67bd53de37ec839853c2
-size 4039674
+oid sha256:4c79b2e7abf3265e35d31bc1b8fa7d4a8bb0e05672b3a25ddc81749aa24a5092
+size 4119898

From 4b721a7cc20d0f4862f5f73d868962b71abe5e51 Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Thu, 30 May 2024 16:47:54 +0100
Subject: [PATCH 083/230] [no ci] cancer_cons: rm outdated TODOs

---
 src/tlo/methods/cancer_consumables.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/tlo/methods/cancer_consumables.py b/src/tlo/methods/cancer_consumables.py
index b72f86b7b9..6c5c9df65b 100644
--- a/src/tlo/methods/cancer_consumables.py
+++ b/src/tlo/methods/cancer_consumables.py
@@ -57,7 +57,6 @@ def get_consumable_item_codes_cancers(self, cancer_module: Module) -> Dict[str,
     # Add items that are specific to a particular cancer module
     if 'ProstateCancer' == cancer_module.name:
 
-        # TODO: @Sakshi the script to create RF_Consumables_Items_and_Pkgs needs to be re-run
         cons_dict['screening_psa_test_core'] = \
             {get_item_code("Prostate specific antigen test"): 1}
 
@@ -69,7 +68,6 @@ def get_consumable_item_codes_cancers(self, cancer_module: Module) -> Dict[str,
     elif 'BladderCancer' == cancer_module.name:
         # Note: bladder cancer is not in the malawi STG 2023 therefore no details on chemotherapy
 
-        # TODO: @Sakshi the script to create RF_Consumables_Items_and_Pkgs needs to be re-run
         cons_dict['screening_cystoscopy_core'] = \
             {get_item_code("Cystoscope"): 1}
 
@@ -82,7 +80,6 @@ def get_consumable_item_codes_cancers(self, cancer_module: Module) -> Dict[str,
 
     elif 'OesophagealCancer' == cancer_module.name:
 
-        # TODO: @Sakshi the script to create RF_Consumables_Items_and_Pkgs needs to be re-run
         cons_dict['screening_endoscope_core'] = \
             {get_item_code("Endoscope"): 1}
 

From e20b79475a8f3e1566dc6af15543c60ba93822a9 Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Thu, 30 May 2024 17:01:22 +0100
Subject: [PATCH 084/230] [no ci] co: rm resolved TODOs

---
 src/tlo/methods/contraception.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index bc12d03fd6..68b01071d2 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -692,16 +692,12 @@ def get_item_codes_for_each_contraceptive_and_probs_for_cons_alternatives(self):
         _cons_codes['IUD'] =\
             {get_item_code("Glove disposable powdered latex medium_100_CMST"): 2,
              get_item_code("IUD, Copper T-380A"): 1}
-        # TODO: originally used "Medroxyprogesterone acetate injection 150mg/mL, 1mL vial with 2ml syringe with 22g 0.7
-        #  X 25mm needle_each_CMST", which is not within the RF_Costing anymore, does it mean we need to add a syringe
-        #  and a needle?
         _cons_codes['injections'] = \
             {get_item_code("Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly"): 1,
              get_item_code("Glove disposable powdered latex medium_100_CMST"): 1,
              get_item_code("Water for injection, 10ml_Each_CMST"): 1,
              get_item_code("Povidone iodine, solution, 10 %, 5 ml per injection"): 5,
              get_item_code("Gauze, swabs 8-ply 10cm x 10cm_100_CMST"): 1}
-        # TODO: @Sakshi - update the unit to 1 use
         _cons_codes['implant'] =\
             {get_item_code("Glove disposable powdered latex medium_100_CMST"): 3,
              get_item_code("Lidocaine HCl (in dextrose 7.5%), ampoule 2 ml"): 2,

From b7f61b80024ca631d5d8236bdeca27ae80959c03 Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Thu, 30 May 2024 17:11:19 +0100
Subject: [PATCH 085/230] co: rm TODOs as opened an issue to resolve them
 (#1384)

---
 src/tlo/methods/contraception.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index 1d6154f801..95e8a5e84c 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -649,7 +649,6 @@ def select_contraceptive_following_birth(self, mother_id, mother_age):
     def get_item_codes_for_each_contraceptive_and_probs_for_cons_alternatives(self):
         """Get the item_code and numbers of units per case for each contraceptive and for contraceptive initiation.
         Save the probs for consumable alternatives."""
-        # TODO: update with optional items (currently all considered essential)
 
         # ### Get item codes and number of units per case from package name
         # (note: this version does not deal with consumable alternatives)
@@ -1224,7 +1223,6 @@ def apply(self, person_id, squeeze_factor):
         self.sim.population.props.at[person_id, "co_date_of_last_fp_appt"] = self.sim.date
 
         # Determine essential and optional items
-        # TODO: we don't distinguish essential X optional for contraception methods yet, will need to update once we do
         if self.new_contraceptive in self.module.cons_alternatives_probs:
             # administrate one of the alternative consumables (item code -99 means, no alternative used)
             alternatives = list(self.module.cons_alternatives_probs[self.new_contraceptive].keys())

From 5c007cceb1a62a2623b799d54ac619ba6676464e Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 30 May 2024 17:17:32 +0100
Subject: [PATCH 086/230] update cost of trocar - the new unit is `1 use of
 reuseable trocar (assuming 100 uses per trocar)`

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 99e23d0e4e..a657754515 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c79b2e7abf3265e35d31bc1b8fa7d4a8bb0e05672b3a25ddc81749aa24a5092
-size 4119898
+oid sha256:a290e50f73b2eefab213e7eab640d9422aa5d3fc7cc60e40dd9557946f39f20a
+size 4120496

From 63e8e46d84b5671dc78494de6417e1f9aa3a738c Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 30 May 2024 17:43:32 +0100
Subject: [PATCH 087/230] add cancer consumables to ResourceFile_Costing.xlsx -
 Biopsy needle and Specimen container

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index a657754515..11dd6ade4d 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a290e50f73b2eefab213e7eab640d9422aa5d3fc7cc60e40dd9557946f39f20a
-size 4120496
+oid sha256:4bc54df5aa6e59472dd1be864e74c905a56a0dcd33ec838d0c30446777777216
+size 4120951

From c8598f4509365d74a6fe1deb662364a6cfb66d0e Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 30 May 2024 17:52:19 +0100
Subject: [PATCH 088/230] include OHT 2016 costs for Biopsy needle and Specimen
 container

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 11dd6ade4d..845abbe672 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4bc54df5aa6e59472dd1be864e74c905a56a0dcd33ec838d0c30446777777216
-size 4120951
+oid sha256:dc21b40e8f2311f804a8164621890ce3b2c2f67ea090911f330b6d59d83285b5
+size 4121027

From 5e3d5c045e8451b56dfee833b31e4a041d3e06f9 Mon Sep 17 00:00:00 2001
From: Eva Janouskova <janouskova.eva.012@gmail.com>
Date: Thu, 30 May 2024 18:05:34 +0100
Subject: [PATCH 089/230] cancers: missing arguments added

---
 src/tlo/methods/bladder_cancer.py      | 2 +-
 src/tlo/methods/breast_cancer.py       | 2 +-
 src/tlo/methods/oesophagealcancer.py   | 2 +-
 src/tlo/methods/other_adult_cancers.py | 2 +-
 src/tlo/methods/prostate_cancer.py     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/tlo/methods/bladder_cancer.py b/src/tlo/methods/bladder_cancer.py
index 4d81432d04..ba4d2764f3 100644
--- a/src/tlo/methods/bladder_cancer.py
+++ b/src/tlo/methods/bladder_cancer.py
@@ -378,7 +378,7 @@ def initialise_simulation(self, sim):
         """
         # We call the following function to store the required consumables for the simulation run within the appropriate
         # dictionary
-        self.item_codes_bladder_can = get_consumable_item_codes_cancers(self)
+        self.item_codes_bladder_can = get_consumable_item_codes_cancers(self, self.__module__)
 
         # ----- SCHEDULE LOGGING EVENTS -----
         # Schedule logging event to happen immediately
diff --git a/src/tlo/methods/breast_cancer.py b/src/tlo/methods/breast_cancer.py
index 21347c1f98..b1568b077e 100644
--- a/src/tlo/methods/breast_cancer.py
+++ b/src/tlo/methods/breast_cancer.py
@@ -350,7 +350,7 @@ def initialise_simulation(self, sim):
         """
         # We call the following function to store the required consumables for the simulation run within the appropriate
         # dictionary
-        self.item_codes_breast_can = get_consumable_item_codes_cancers(self)
+        self.item_codes_breast_can = get_consumable_item_codes_cancers(self, self.__module__)
 
         # ----- SCHEDULE LOGGING EVENTS -----
         # Schedule logging event to happen immediately
diff --git a/src/tlo/methods/oesophagealcancer.py b/src/tlo/methods/oesophagealcancer.py
index b3a302bcd9..bb57f0ee28 100644
--- a/src/tlo/methods/oesophagealcancer.py
+++ b/src/tlo/methods/oesophagealcancer.py
@@ -365,7 +365,7 @@ def initialise_simulation(self, sim):
         """
         # We call the following function to store the required consumables for the simulation run within the appropriate
         # dictionary
-        self.item_codes_oesophageal_can = get_consumable_item_codes_cancers(self)
+        self.item_codes_oesophageal_can = get_consumable_item_codes_cancers(self, self.__module__)
 
         # ----- SCHEDULE LOGGING EVENTS -----
         # Schedule logging event to happen immediately
diff --git a/src/tlo/methods/other_adult_cancers.py b/src/tlo/methods/other_adult_cancers.py
index 1e8d5c1e22..0d9fe90094 100644
--- a/src/tlo/methods/other_adult_cancers.py
+++ b/src/tlo/methods/other_adult_cancers.py
@@ -373,7 +373,7 @@ def initialise_simulation(self, sim):
         """
         # We call the following function to store the required consumables for the simulation run within the appropriate
         # dictionary
-        self.item_codes_other_can = get_consumable_item_codes_cancers(self)
+        self.item_codes_other_can = get_consumable_item_codes_cancers(self, self.__module__)
 
         # ----- SCHEDULE LOGGING EVENTS -----
         # Schedule logging event to happen immediately
diff --git a/src/tlo/methods/prostate_cancer.py b/src/tlo/methods/prostate_cancer.py
index f9520052b1..fc7eef102f 100644
--- a/src/tlo/methods/prostate_cancer.py
+++ b/src/tlo/methods/prostate_cancer.py
@@ -378,7 +378,7 @@ def initialise_simulation(self, sim):
         """
         # We call the following function to store the required consumables for the simulation run within the appropriate
         # dictionary
-        self.item_codes_prostate_can = get_consumable_item_codes_cancers(self)
+        self.item_codes_prostate_can = get_consumable_item_codes_cancers(self, self.__module__)
 
         # ----- SCHEDULE LOGGING EVENTS -----
         # Schedule logging event to happen immediately

From 108dd72c63a8ed429869e60e43b12a2eba789586 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 30 May 2024 18:46:22 +0100
Subject: [PATCH 090/230] add availability data cancer consumables - Biopsy
 needle and Specimen container (This is based on assumptions made in `"05 -
 Resources/Module-healthsystem/consumables raw
 files/ResourceFile_hhfa_consumables.xlsx`

---
 .../ResourceFile_Consumables_availability_small.csv           | 4 ++--
 .../consumables/ResourceFile_consumables_matched.csv          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
index 7db58ae153..25249531b2 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2261b945069955c025e106116ae167cd8dc167a962d7387b9e808b9683f5fa69
-size 6122712
+oid sha256:c358a643e4def0e574b75f89f83d77f9c3366f668422e005150f4d69ebe8d7a7
+size 6169152
diff --git a/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv b/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
index 73fd80d045..7ab675ecba 100644
--- a/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
+++ b/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8649f9521dfd5bdbf2bc525c7dd0922a32d657fd8f69394bd997d7a595b9576
-size 90430
+oid sha256:b5b0f417681cbdd2489e2f9c6634b2825c32beb9637dc045b56e308c910a102c
+size 90569

From da5a0bd7210dc1db4ee04d4d883ffbd46d2cbf03 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Thu, 18 Jul 2024 11:49:21 +0100
Subject: [PATCH 091/230] first draft of equipment cost calculation

---
 src/scripts/costing/costing.py | 77 ++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 77ea522f05..1b7c3e3bf0 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -54,6 +54,11 @@ def drop_outside_period(_df):
 results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 #results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 
+# Load equipment log
+equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
+# check can read results from draw=0, run=0
+log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
+
 # look at one log (so can decide what to extract)
 log = load_pickled_dataframes(results_folder)
 
@@ -67,6 +72,14 @@ def drop_outside_period(_df):
 workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
                                     sheet_name = None)
 
+# Extract districts and facility levels from the Master Facility List
+mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
+districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District'])
+fac_levels = set(mfl.Facility_Level)
+
+# Extract count of facilities from Actual Facilities List
+#afl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Actual_Facilities_List.csv")
+
 # 1. HR cost
 # 1.1 HR Cost - Financial (Given the staff available)
 # Load annual salary by officer type and facility level
@@ -283,6 +296,70 @@ def get_counts_of_items_requested(_df):
 unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
 unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.1 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 10% of the items over 8 years
 
+unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
+                                           'service_fee_annual', 'spare_parts_annual',  'upfront_repair_cost_annual', 'replacement_cost_annual',
+                                           'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
+unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
+unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
+unit_cost_equipment = unit_cost_equipment[unit_cost_equipment.Item_code.notna()]
+
+unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
+                          i=['Item_code', 'Equipment_tlo', 'service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual'],
+                          j='Facility_Level', suffix='(\d+|\w+)').reset_index()
+facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
+unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
+unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
+#unit_cost_equipment_small  = unit_cost_equipment[['Item_code', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']]
+#equipment_cost_dict = unit_cost_equipment_small.groupby('Facility_Level').apply(lambda x: x.to_dict(orient='records')).to_dict()
+
+# Get list of equipment used by district and level
+equip = pd.DataFrame(
+    log_equipment['tlo.methods.healthsystem.summary']['EquipmentEverUsed_ByFacilityID']
+)
+
+equip['EquipmentEverUsed'] = equip['EquipmentEverUsed'].apply(ast.literal_eval)
+
+# Extract a list of equipment which was used at each facility level within each district
+equipment_used = {district: {level: [] for level in fac_levels} for district in districts} # create a dictionary with a key for each district and facility level
+for dist in districts:
+    for level in fac_levels:
+        equip_subset = equip[(equip['District'] == dist) & (equip['Facility_Level'] == level)]
+        equipment_used[dist][level] = set().union(*equip_subset['EquipmentEverUsed'])
+equipment_used = pd.concat({
+        k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
+        axis=0)
+list_of_equipment_used = set().union(*equip['EquipmentEverUsed'])
+
+equipment_df = pd.DataFrame()
+equipment_df.index = equipment_used.index
+for item in list_of_equipment_used:
+    equipment_df[str(item)] = 0
+    for dist_fac_index in equipment_df.index:
+        equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1)
+equipment_df.to_csv('./outputs/equipment_use.csv')
+equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
+equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
+equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])
+
+# Merge the two datasets to calculate cost
+equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']],
+                          on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
+categories_of_equipment_cost = ['replacement_cost', 'upfront_repair_cost', 'spare_parts', 'service_fee']
+for cost_category in categories_of_equipment_cost:
+    equipment_cost['total_' + cost_category] = equipment_cost[cost_category + '_annual'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity']
+equipment_cost['annual_cost'] = equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum(axis = 1)
+#equipment_cost.to_csv('./outputs/equipment_cost.csv')
+
+equipment_costs = pd.DataFrame({
+    'Cost_Category': ['Equipment'] * len(categories_of_equipment_cost),
+    'Cost_Sub-category': categories_of_equipment_cost,
+    'Value_2023USD': equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum().values.tolist()
+})
+# Append new_data to scenario_cost_financial
+scenario_cost_financial = pd.concat([scenario_cost_financial, equipment_costs], ignore_index=True)
+
+# TODO Use AFL to multiple the number of facilities at each level
+# TODO PLot which equipment is used by district and facility or a heatmap of the number of facilities at which an equipment is used
 # TODO From the log, extract the facility IDs which use any equipment item
 # TODO Collapse facility IDs by level of care to get the total number of facilities at each level using an item
 # TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)

From f7471fd2c957039612aeffdab13e222d5374cb55 Mon Sep 17 00:00:00 2001
From: sm2511 <sakshi.mohan@york.ac.uk>
Date: Tue, 23 Jul 2024 15:05:50 +0100
Subject: [PATCH 092/230] correct equipment use extraction in order not to mask
 levels 3,4,5 which don't belong to any district

---
 src/scripts/costing/costing.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 1b7c3e3bf0..7c2695289d 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -318,9 +318,12 @@ def get_counts_of_items_requested(_df):
 )
 
 equip['EquipmentEverUsed'] = equip['EquipmentEverUsed'].apply(ast.literal_eval)
+equip.loc[equip.Facility_Level.isin(['3', '4', '5']),'District'] = 'Central' # Assign a district name for Central health facilities
+districts.add('Central')
 
 # Extract a list of equipment which was used at each facility level within each district
 equipment_used = {district: {level: [] for level in fac_levels} for district in districts} # create a dictionary with a key for each district and facility level
+
 for dist in districts:
     for level in fac_levels:
         equip_subset = equip[(equip['District'] == dist) & (equip['Facility_Level'] == level)]
@@ -337,6 +340,7 @@ def get_counts_of_items_requested(_df):
     for dist_fac_index in equipment_df.index:
         equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1)
 equipment_df.to_csv('./outputs/equipment_use.csv')
+
 equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
 equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
 equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])

From c88ea29dfad7f6ed0bb02af48a55fd46e7bc2277 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 15 Aug 2024 19:44:21 +0100
Subject: [PATCH 093/230] clean HR costing and add figures

---
 src/scripts/costing/costing.py | 173 +++++++++++++++++++++++----------
 1 file changed, 123 insertions(+), 50 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 7c2695289d..e19641c005 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -41,6 +41,9 @@
 costing_outputs_folder = Path('./outputs/costing')
 if not os.path.exists(costing_outputs_folder):
     os.makedirs(costing_outputs_folder)
+figurespath = costing_outputs_folder / "figures"
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
 
 # Declare period for which the results will be generated (defined inclusively)
 TARGET_PERIOD = (Date(2015, 1, 1), Date(2015, 12, 31))
@@ -49,13 +52,13 @@ def drop_outside_period(_df):
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
 
 # %% Gathering basic information
-# Find results_folder associated with a given batch_file and get most recent
+# Load result files
+#-------------------
 #results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 #results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
-
-# Load equipment log
 equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
+
 # check can read results from draw=0, run=0
 log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
 
@@ -68,7 +71,9 @@ def drop_outside_period(_df):
 # 1) Extract the parameters that have varied over the set of simulations
 params = extract_params(results_folder)
 
-# Load costing resourcefile
+# Load cost input files
+#------------------------
+# Load primary costing resourcefile
 workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
                                     sheet_name = None)
 
@@ -80,25 +85,27 @@ def drop_outside_period(_df):
 # Extract count of facilities from Actual Facilities List
 #afl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Actual_Facilities_List.csv")
 
+#%% Calculate financial costs
 # 1. HR cost
-# 1.1 HR Cost - Financial (Given the staff available)
 # Load annual salary by officer type and facility level
 hr_cost_parameters = workbook_cost["human_resources"]
+hr_cost_parameters['Facility_Level'] =  hr_cost_parameters['Facility_Level'].astype(str)
 hr_annual_salary = hr_cost_parameters[hr_cost_parameters['Parameter_name'] == 'salary_usd']
-hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str)
+hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str) # create column for merging with model log
 
 # Load scenario staffing level
 hr_scenario = log[ 'tlo.scenario']['override_parameter']['new_value'][log[ 'tlo.scenario'][ 'override_parameter']['name'] == 'use_funded_or_actual_staffing']
 
 if hr_scenario.empty:
-    current_staff_count = pd.read_csv(
-        resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")
+    staff_count = pd.read_csv(
+        resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv") # if missing default to reading actual capabilities
 else:
-    current_staff_count = pd.read_csv(
+    staff_count = pd.read_csv(
         resourcefilepath / 'healthsystem'/ 'human_resources' / f'{hr_scenario[2]}' / 'ResourceFile_Daily_Capabilities.csv')
 
-current_staff_count_by_level_and_officer_type = current_staff_count.groupby(['Facility_Level', 'Officer_Category'])[
+staff_count_by_level_and_officer_type = staff_count.groupby(['Facility_Level', 'Officer_Category'])[
     'Staff_Count'].sum().reset_index()
+staff_count_by_level_and_officer_type['Facility_Level'] = staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
 
 # Check if any cadres were not utilised at particular levels of care in the simulation
 def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
@@ -107,45 +114,72 @@ def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Ser
     _df.index.name = 'year'
     return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
 
-extracted_results = extract_results(
+annual_capacity_used_by_cadre_and_level = summarize(extract_results(
     Path(results_folder),
     module='tlo.methods.healthsystem.summary',
     key='Capacity_By_OfficerType_And_FacilityLevel',
     custom_generate_series=expand_capacity_by_officer_type_and_facility_level,
     do_scaling=False,
-)
-
-# Mean of results across the runs
-summarized_results = summarize(extracted_results, only_mean=True, collapse_columns=True)
+), only_mean=True, collapse_columns=True)
 
 # Take mean across the entire simulation
-mean_across_simulation = summarized_results.groupby(['OfficerType', 'FacilityLevel']).mean()
-
+average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False)
 # Unstack to make it look like a nice table
-cadres_utilisation_rate = mean_across_simulation.reset_index(drop=False)
-cadres_utilisation_rate['OfficerType_FacilityLevel'] = 'Officer_Type=' + cadres_utilisation_rate['OfficerType'].astype(str) + '|Facility_Level=' + cadres_utilisation_rate['FacilityLevel'].astype(str)
-list_of_cadre_and_level_combinations_used = cadres_utilisation_rate[cadres_utilisation_rate['mean'] != 0]['OfficerType_FacilityLevel']
+average_capacity_used_by_cadre_and_level['OfficerType_FacilityLevel'] = 'Officer_Type=' + average_capacity_used_by_cadre_and_level['OfficerType'].astype(str) + '|Facility_Level=' + average_capacity_used_by_cadre_and_level['FacilityLevel'].astype(str)
+list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['mean'] != 0]['OfficerType_FacilityLevel']
+print(f"Out of {len(average_capacity_used_by_cadre_and_level)} cadre and level combinations available, {len(list_of_cadre_and_level_combinations_used)} are used in the simulation")
 
 # Subset scenario staffing level to only include cadre-level combinations used in the simulation
-current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + current_staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + current_staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
-used_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
+staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
+used_staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type[staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
 
 # Calculate various components of HR cost
-# 1.1 Salary cost for modelled health workforce (Staff count X Annual salary)
-current_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type.reset_index()
-current_staff_count_by_level_and_officer_type = current_staff_count_by_level_and_officer_type.drop(current_staff_count_by_level_and_officer_type[current_staff_count_by_level_and_officer_type.Facility_Level == '5'].index)
-salary_for_all_staff = pd.merge(current_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
+# 1.1 Salary cost for current total staff
+#---------------------------------------------------------------------------------------------------------------
+staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type.drop(staff_count_by_level_and_officer_type[staff_count_by_level_and_officer_type.Facility_Level == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
+salary_for_all_staff = pd.merge(staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_all_staff['Total_salary_by_cadre_and_level'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']
-total_salary_for_all_staff = salary_for_all_staff['Total_salary_by_cadre_and_level'].sum()
+salary_for_all_staff['Cost'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']
+total_salary_for_all_staff = salary_for_all_staff['Cost'].sum()
 
-# 1.2 Salary cost for current total staff
+# 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
+#---------------------------------------------------------------------------------------------------------------
 salary_for_staff_used_in_scenario = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_staff_used_in_scenario['Total_salary_by_cadre_and_level'] = salary_for_staff_used_in_scenario['Value'] * salary_for_staff_used_in_scenario['Staff_Count']
-total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario['Total_salary_by_cadre_and_level'].sum()
+salary_for_staff_used_in_scenario['Cost'] = salary_for_staff_used_in_scenario['Value'] * salary_for_staff_used_in_scenario['Staff_Count']
+total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario['Cost'].sum()
+
+# Bar chart of salaries by cadre which goes into the HR folder in outputs (stacked for levels of care and two series for modelled and all)
+def get_level_and_cadre_from_concatenated_value(_df, varname):
+    _df['Cadre'] = _df[varname].str.extract(r'=(.*?)\|')
+    _df['Facility_Level'] = _df[varname].str.extract(r'^[^=]*=[^|]*\|[^=]*=([^|]*)')
+    return _df
+def plot_cost_by_cadre_and_level(_df, figname_prefix, figname_suffix):
+    if ('Facility_Level' in _df.columns) & ('Cadre' in _df.columns):
+        pass
+    else:
+        _df = get_level_and_cadre_from_concatenated_value(_df, 'OfficerType_FacilityLevel')
+
+    pivot_df = _df.pivot_table(index='Cadre', columns='Facility_Level', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    total_salary = round(_df['Cost'].sum(), 0)
+    total_salary = f"{total_salary:,.0f}"
+    ax  = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Cadre and Facility Level')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {figname_prefix} cost by cadre and facility level")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+    plt.text(x=0.3, y=-0.5, s=f"Total {figname_prefix} cost = USD {total_salary}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+    plt.savefig(figurespath / f'{figname_prefix}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_cost_by_cadre_and_level(salary_for_all_staff,figname_prefix = "salary", figname_suffix= "all_staff")
+plot_cost_by_cadre_and_level(salary_for_staff_used_in_scenario,figname_prefix = "salary", figname_suffix= "staff_used_in_scenario")
 
 # 1.3 Recruitment cost to fill gap created by attrition
+#---------------------------------------------------------------------------------------------------------------
 def merge_cost_and_model_data(cost_df, model_df, varnames):
     merged_df = model_df.copy()
     for varname in varnames:
@@ -161,44 +195,83 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
             merged_df = pd.merge(merged_df, new_cost_df, on=['Officer_Category', 'Facility_Level'], how="left")
     return merged_df
 
-recruitment_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
-recruitment_cost_df['annual_recruitment_cost'] = recruitment_cost_df['annual_attrition_rate'] * recruitment_cost_df['Staff_Count'] * \
-                      recruitment_cost_df['recruitment_cost_per_person_recruited_usd']
-recruitment_cost_for_attrited_workers = recruitment_cost_df['annual_recruitment_cost'].sum()
+recruitment_cost['Cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['Staff_Count'] * \
+                      recruitment_cost['recruitment_cost_per_person_recruited_usd']
+total_recruitment_cost_for_attrited_workers = recruitment_cost['Cost'].sum()
+
+plot_cost_by_cadre_and_level(recruitment_cost, figname_prefix = "recruitment", figname_suffix= "all_staff")
 
 # 1.4 Pre-service training cost to fill gap created by attrition
-preservice_training_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+#---------------------------------------------------------------------------------------------------------------
+preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate',
                                                                  'licensure_exam_passing_rate', 'graduation_rate',
                                                                  'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
                                                                  'annual_preservice_training_cost_percapita_usd'])
-preservice_training_cost_df['annual_preservice_training_cost'] = preservice_training_cost_df['annual_attrition_rate'] * preservice_training_cost_df['Staff_Count'] * \
-                                                (1/(preservice_training_cost_df['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost_df['proportion_of_workforce_recruited_from_abroad'])) * \
-                                                (1/preservice_training_cost_df['graduation_rate']) * (1/preservice_training_cost_df['licensure_exam_passing_rate']) * \
-                                                preservice_training_cost_df['annual_preservice_training_cost_percapita_usd']
-preservice_training_cost_for_attrited_workers = preservice_training_cost_df['annual_preservice_training_cost'].sum()
+preservice_training_cost['Cost'] = preservice_training_cost['annual_attrition_rate'] * preservice_training_cost['Staff_Count'] * \
+                                                (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) * \
+                                                (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) * \
+                                                preservice_training_cost['annual_preservice_training_cost_percapita_usd']
+preservice_training_cost_for_attrited_workers = preservice_training_cost['Cost'].sum()
+
+plot_cost_by_cadre_and_level(preservice_training_cost, figname_prefix = "pre-service training", figname_suffix= "all_staff")
 
 # 1.5 In-service training cost to train all staff
-inservice_training_cost_df = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+#---------------------------------------------------------------------------------------------------------------
+inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_inservice_training_cost_usd'])
-inservice_training_cost_df['annual_inservice_training_cost'] = inservice_training_cost_df['Staff_Count'] * inservice_training_cost_df['annual_inservice_training_cost_usd']
-inservice_training_cost_for_staff_used_in_scenario = inservice_training_cost_df['annual_inservice_training_cost'].sum()
+inservice_training_cost['Cost'] = inservice_training_cost['Staff_Count'] * inservice_training_cost['annual_inservice_training_cost_usd']
+inservice_training_cost_for_all_staff = inservice_training_cost['Cost'].sum()
+
+plot_cost_by_cadre_and_level(inservice_training_cost, figname_prefix = "in-service training", figname_suffix= "all_staff")
+
 # TODO check why annual_inservice_training_cost for DCSA is NaN in the merged_df
 
 # Create a dataframe to store financial costs
-hr_cost_subcategories = ['total_salary_for_all_staff', 'total_salary_for_staff_used_in_scenario',
-                         'recruitment_cost_for_attrited_workers', 'preservice_training_cost_for_attrited_workers',
-                         'inservice_training_cost_for_staff_used_in_scenario']
-scenario_cost_financial = pd.DataFrame({
+hr_cost_subcategories = ['salary_for_all_staff', 'recruitment_cost',
+                         'preservice_training_cost', 'inservice_training_cost']
+scenario_cost = pd.DataFrame({
     'Cost_Category': ['Human Resources for Health'] * len(hr_cost_subcategories),
     'Cost_Sub-category': hr_cost_subcategories,
-    'Value_2023USD': [total_salary_for_all_staff, total_salary_for_staff_used_in_scenario, recruitment_cost_for_attrited_workers, preservice_training_cost_for_attrited_workers, inservice_training_cost_for_staff_used_in_scenario]
+    'Cost': [salary_for_all_staff['Cost'].sum(), recruitment_cost['Cost'].sum(),
+                      preservice_training_cost['Cost'].sum(), preservice_training_cost['Cost'].sum()]
 })
-
 # TODO 'Value_2023USD' - use hr_cost_subcategories rather than the hardcoded list
 # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
 
+def plot_components_of_cost_category(_df, cost_category, figname_suffix):
+    pivot_df = _df[_df['Cost_Category'] == cost_category].pivot_table(index='Cost_Sub-category', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    ax = pivot_df.plot(kind='bar', stacked=False, title='Scenario Cost by Category')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {cost_category} cost")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+
+    # Add text labels on the bars
+    total_cost = pivot_df['Cost'].sum()
+    rects = ax.patches
+    for rect, cost in zip(rects, pivot_df['Cost']):
+        cost_millions = cost / 1e6
+        percentage = (cost / total_cost) * 100
+        label_text = f"{cost_millions:.1f}M ({percentage:.1f}%)"
+        # Place text at the top of the bar
+        x = rect.get_x() + rect.get_width() / 2
+        y = rect.get_height()
+        ax.text(x, y, label_text, ha='center', va='bottom', fontsize=8, rotation=0)
+
+    total_cost = f"{total_cost:,.0f}"
+    plt.text(x=0.3, y=-0.5, s=f"Total {cost_category} cost = USD {total_cost}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+
+    plt.savefig(figurespath / f'{cost_category}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Human Resources for Health', figname_suffix = "all_staff")
+
 # 2. Consumables cost
 def get_counts_of_items_requested(_df):
     _df = drop_outside_period(_df)

From 63c94c2a1b8040210a84360a954f91f24023e080 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 21 Aug 2024 17:28:31 +0100
Subject: [PATCH 094/230] add the cost of item codes 19 and 20

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 845abbe672..3d90b8a2d5 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc21b40e8f2311f804a8164621890ce3b2c2f67ea090911f330b6d59d83285b5
-size 4121027
+oid sha256:6196d471d4f48759e85b58af4af26e490752786d244785c6fc958be5e9e6fcf4
+size 4122795

From b6502e5215fae747ad4b2410ca082ba9eaac960f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 21 Aug 2024 19:11:16 +0100
Subject: [PATCH 095/230] correct the costs being extracted from OneHealthTool

- the chosen unit was not correctly applied.
---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 3d90b8a2d5..720634fec5 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6196d471d4f48759e85b58af4af26e490752786d244785c6fc958be5e9e6fcf4
-size 4122795
+oid sha256:6426eeffc40c0b028194a3446457cfe1bcf17e58baaa9f57421dff434478d3c1
+size 4122347

From 9eb1b5526d8fc37b7660e441d1240f4baa0d0c6f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 29 Aug 2024 19:20:29 +0100
Subject: [PATCH 096/230] update costing of consumables dispensed and add a
 plot

---
 src/scripts/costing/costing.py | 171 ++++++++++++++++++++++++++-------
 1 file changed, 138 insertions(+), 33 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index e19641c005..2c332059c0 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -46,7 +46,7 @@
     os.makedirs(figurespath)
 
 # Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2015, 1, 1), Date(2015, 12, 31))
+TARGET_PERIOD = (Date(2015, 1, 1), Date(2015, 12, 31)) # TODO allow for multi-year costing
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
@@ -58,6 +58,8 @@ def drop_outside_period(_df):
 results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 #results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
+consumables_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/impact_of_consumables_scenarios-2024-06-11T204007Z/')
+# TODO When the costing module is ready the above results_folder should be the same for the calculation of all costs
 
 # check can read results from draw=0, run=0
 log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
@@ -272,37 +274,44 @@ def plot_components_of_cost_category(_df, cost_category, figname_suffix):
 
 plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Human Resources for Health', figname_suffix = "all_staff")
 
+# %%
 # 2. Consumables cost
-def get_counts_of_items_requested(_df):
-    _df = drop_outside_period(_df)
-    counts_of_available = defaultdict(int)
-    counts_of_not_available = defaultdict(int)
-    for _, row in _df.iterrows():
-        for item, num in row['Item_Available'].items():
-            counts_of_available[item] += num
-        for item, num in row['Item_NotAvailable'].items():
-            counts_of_not_available[item] += num
-    return pd.concat(
-        {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
-        axis=1
-    ).fillna(0).astype(int).stack()
-
-cons_req = extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem.summary',
-        key='Consumables',
-        custom_generate_series=get_counts_of_items_requested,
-        do_scaling=True)
-
-# Mean of results across the runs
-summarized_cons_req = summarize(cons_req, only_mean=True, collapse_columns=True)
-
-# Consumables to be costed (only available, i.e. dispensed)
-cons_dispensed = summarized_cons_req.xs("Available", level=1)
-cons_dispensed = cons_dispensed.to_dict()
-cons_dispensed = defaultdict(int, {int(key): value for key, value in cons_dispensed.items()}) # Convert string keys to integer
+def get_quantity_of_consumables_dispensed(results_folder):
+    def get_counts_of_items_requested(_df):
+        _df = drop_outside_period(_df)
+        counts_of_available = defaultdict(int)
+        counts_of_not_available = defaultdict(int)
+        for _, row in _df.iterrows():
+            for item, num in row['Item_Available'].items():
+                counts_of_available[item] += num
+            for item, num in row['Item_NotAvailable'].items():
+                counts_of_not_available[item] += num
+        return pd.concat(
+            {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
+            axis=1
+        ).fillna(0).astype(int).stack()
+
+    cons_req = summarize(
+        extract_results(
+            results_folder,
+            module='tlo.methods.healthsystem.summary',
+            key='Consumables',
+            custom_generate_series=get_counts_of_items_requested,
+            do_scaling=True)
+    )
+
+    cons_dispensed = cons_req.xs("Available", level=1) # only keep actual dispensed amount, i.e. when available
+    return cons_dispensed
+
+consumables_dispensed_under_perfect_availability = get_quantity_of_consumables_dispensed(consumables_results_folder)[9]
+consumables_dispensed_under_perfect_availability = consumables_dispensed_under_perfect_availability['mean'].to_dict() # TODO incorporate uncertainty in estimates
+consumables_dispensed_under_perfect_availability = defaultdict(int, {int(key): value for key, value in
+                                   consumables_dispensed_under_perfect_availability.items()})  # Convert string keys to integer
+consumables_dispensed_under_default_availability = get_quantity_of_consumables_dispensed(consumables_results_folder)[0]
+consumables_dispensed_under_default_availability = consumables_dispensed_under_default_availability['mean'].to_dict()
+consumables_dispensed_under_default_availability = defaultdict(int, {int(key): value for key, value in
+                                   consumables_dispensed_under_default_availability.items()})  # Convert string keys to integer
 
-# 2.1 Cost of consumables dispensed
 # Load consumables cost data
 unit_price_consumable = workbook_cost["consumables"]
 unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
@@ -310,12 +319,108 @@ def get_counts_of_items_requested(_df):
 unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
 unit_price_consumable = unit_price_consumable.set_index('Item_Code').to_dict(orient='index')
 
+# 2.1 Cost of consumables dispensed
+#---------------------------------------------------------------------------------------------------------------
 # Multiply number of items needed by cost of consumable
-cost_of_consumables_dispensed = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
-                                                cons_dispensed[key] for key in cons_dispensed)))
-total_cost_of_consumables_dispensed = sum(value for value in cost_of_consumables_dispensed.values() if not np.isnan(value))
+cost_of_consumables_dispensed_under_perfect_availability = {key: unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] * consumables_dispensed_under_perfect_availability[key] for
+                                                            key in unit_price_consumable if key in consumables_dispensed_under_perfect_availability}
+total_cost_of_consumables_dispensed_under_perfect_availability = sum(value for value in cost_of_consumables_dispensed_under_perfect_availability.values() if not np.isnan(value))
+
+cost_of_consumables_dispensed_under_default_availability = {key: unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] * consumables_dispensed_under_default_availability[key] for
+                                                            key in unit_price_consumable if key in consumables_dispensed_under_default_availability}
+total_cost_of_consumables_dispensed_under_default_availability = sum(value for value in cost_of_consumables_dispensed_under_default_availability.values() if not np.isnan(value))
+
+# Extract cost to .csv
+def convert_dict_to_dataframe(_dict):
+    data = {key: [value] for key, value in _dict.items()}
+    _df = pd.DataFrame(data)
+    return _df
+
+cost_perfect_df = convert_dict_to_dataframe(cost_of_consumables_dispensed_under_perfect_availability).T.rename(columns = {0:"cost_perfect_availability"}).round(2)
+cost_default_df = convert_dict_to_dataframe(cost_of_consumables_dispensed_under_default_availability).T.rename(columns = {0:"cost_default_availability"}).round(2)
+unit_cost_df = convert_dict_to_dataframe(unit_price_consumable).T.rename(columns = {0:"unit_cost"})
+dispensed_default_df = convert_dict_to_dataframe(consumables_dispensed_under_default_availability).T.rename(columns = {0:"dispensed_default_availability"}).round(2)
+dispensed_perfect_df = convert_dict_to_dataframe(consumables_dispensed_under_perfect_availability).T.rename(columns = {0:"dispensed_perfect_availability"}).round(2)
+
+full_cons_cost_df = pd.merge(cost_perfect_df, cost_default_df, left_index=True, right_index=True)
+full_cons_cost_df = pd.merge(full_cons_cost_df, unit_cost_df, left_index=True, right_index=True)
+full_cons_cost_df = pd.merge(full_cons_cost_df, dispensed_default_df, left_index=True, right_index=True)
+full_cons_cost_df = pd.merge(full_cons_cost_df, dispensed_perfect_df, left_index=True, right_index=True)
+full_cons_cost_df = full_cons_cost_df.reset_index().rename(columns = {'index' : 'item_code'})
+full_cons_cost_df.to_csv(figurespath / 'consumables_cost_220824.csv')
+
+# Import data for plotting
+tlo_lmis_mapping = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, encoding="ISO-8859-1")[['item_code', 'module_name']]
+tlo_lmis_mapping = tlo_lmis_mapping[~tlo_lmis_mapping['item_code'].duplicated(keep='first')]
+full_cons_cost_df = pd.merge(full_cons_cost_df, tlo_lmis_mapping, on = 'item_code', how = 'left', validate = "1:1")
+
+def recategorize_modules_into_consumable_categories(_df):
+    _df['category'] = _df['module_name'].str.lower()
+    cond_RH = (_df['category'].str.contains('care_of_women_during_pregnancy')) | \
+              (_df['category'].str.contains('labour'))
+    cond_newborn = (_df['category'].str.contains('newborn'))
+    cond_newborn[cond_newborn.isna()] = False
+    cond_childhood = (_df['category'] == 'acute lower respiratory infections') | \
+                     (_df['category'] == 'measles') | \
+                     (_df['category'] == 'diarrhoea')
+    cond_rti = _df['category'] == 'road traffic injuries'
+    cond_cancer = _df['category'].str.contains('cancer')
+    cond_cancer[cond_cancer.isna()] = False
+    cond_ncds = (_df['category'] == 'epilepsy') | \
+                (_df['category'] == 'depression')
+    _df.loc[cond_RH, 'category'] = 'reproductive_health'
+    _df.loc[cond_cancer, 'category'] = 'cancer'
+    _df.loc[cond_newborn, 'category'] = 'neonatal_health'
+    _df.loc[cond_childhood, 'category'] = 'other_childhood_illnesses'
+    _df.loc[cond_rti, 'category'] = 'road_traffic_injuries'
+    _df.loc[cond_ncds, 'category'] = 'ncds'
+    cond_condom = _df['item_code'] == 2
+    _df.loc[cond_condom, 'category'] = 'contraception'
+
+    # Create a general consumables category
+    general_cons_list = [300, 33, 57, 58, 141, 5, 6, 10, 21, 23, 127, 24, 80, 93, 144, 149, 154, 40, 67, 73, 76,
+                         82, 101, 103, 88, 126, 135, 71, 98, 171, 133, 134, 244, 247, 49, 112, 1933, 1960]
+    cond_general = _df['item_code'].isin(general_cons_list)
+    _df.loc[cond_general, 'category'] = 'general'
+
+    return _df
+
+full_cons_cost_df = recategorize_modules_into_consumable_categories(full_cons_cost_df)
+# Fill gaps in categories
+dict_for_missing_categories =  {292: 'acute lower respiratory infections',  293: 'acute lower respiratory infections',
+                                307: 'reproductive_health', 2019: 'reproductive_health',
+                                2678: 'tb', 1171: 'other_childhood_illnesses', 1237: 'cancer', 1239: 'cancer'}
+# Use map to create a new series from item_code to fill missing values in category
+mapped_categories = full_cons_cost_df['item_code'].map(dict_for_missing_categories)
+# Use fillna on the 'category' column to fill missing values using the mapped_categories
+full_cons_cost_df['category'] = full_cons_cost_df['category'].fillna(mapped_categories)
+
+# Bar plot of cost by category
+def plot_cost_by_consumable_category(_df, suffix):
+    pivot_df = _df.groupby('category')['cost_' + suffix].sum().reset_index()
+    pivot_df['cost_' + suffix] = pivot_df['cost_' + suffix]/1e6
+    total_cost = round(_df['cost_' + suffix].sum(), 0)
+    total_cost = f"{total_cost:,.0f}"
+    ax  = pivot_df.plot(kind='bar', stacked=False, title='Consumables cost by Category/Program')
+    # Setting x-ticks explicitly
+    #ax.set_xticks(range(len(pivot_df['category'])))
+    ax.set_xticklabels(pivot_df['category'], rotation=45)
+    plt.ylabel(f'US Dollars (millions)')
+    plt.title(f"Annual consumables cost by category (assuming {suffix})")
+    plt.xticks(rotation=90)
+    plt.yticks(rotation=0)
+    plt.text(x=0.5, y=-0.8, s=f"Total consumables cost =\n USD {total_cost}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+    plt.savefig(figurespath / f'consumables_cost_by_category_{suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_cost_by_consumable_category(full_cons_cost_df, 'perfect_availability')
+plot_cost_by_consumable_category(full_cons_cost_df, 'default_availability')
 
 # 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
+#---------------------------------------------------------------------------------------------------------------
+# Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry
 # Estimate the stock to dispensed ratio from OpenLMIS data
 lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
 # Collapse individual facilities

From 010b4b455485ab449fe9f5c5b23532232a37dacc Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 29 Aug 2024 19:40:51 +0100
Subject: [PATCH 097/230] add bar plot of the cost of the top 10 consumables

---
 src/scripts/costing/costing.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 2c332059c0..e165aa030a 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -350,7 +350,7 @@ def convert_dict_to_dataframe(_dict):
 full_cons_cost_df.to_csv(figurespath / 'consumables_cost_220824.csv')
 
 # Import data for plotting
-tlo_lmis_mapping = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, encoding="ISO-8859-1")[['item_code', 'module_name']]
+tlo_lmis_mapping = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, encoding="ISO-8859-1")[['item_code', 'module_name', 'consumable_name_tlo']]
 tlo_lmis_mapping = tlo_lmis_mapping[~tlo_lmis_mapping['item_code'].duplicated(keep='first')]
 full_cons_cost_df = pd.merge(full_cons_cost_df, tlo_lmis_mapping, on = 'item_code', how = 'left', validate = "1:1")
 
@@ -396,27 +396,36 @@ def recategorize_modules_into_consumable_categories(_df):
 full_cons_cost_df['category'] = full_cons_cost_df['category'].fillna(mapped_categories)
 
 # Bar plot of cost by category
-def plot_cost_by_consumable_category(_df, suffix):
-    pivot_df = _df.groupby('category')['cost_' + suffix].sum().reset_index()
+def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan')):
+    pivot_df = _df.groupby(groupby_var)['cost_' + suffix].sum().reset_index()
     pivot_df['cost_' + suffix] = pivot_df['cost_' + suffix]/1e6
+    if math.isnan(top_x_values):
+        pass
+    else:
+        pivot_df = pivot_df.sort_values('cost_' + suffix, ascending = False)[1:top_x_values]
     total_cost = round(_df['cost_' + suffix].sum(), 0)
     total_cost = f"{total_cost:,.0f}"
-    ax  = pivot_df.plot(kind='bar', stacked=False, title='Consumables cost by Category/Program')
+    ax  = pivot_df['cost_' + suffix].plot(kind='bar', stacked=False, title=f'Consumables cost by {groupby_var}')
     # Setting x-ticks explicitly
     #ax.set_xticks(range(len(pivot_df['category'])))
-    ax.set_xticklabels(pivot_df['category'], rotation=45)
+    ax.set_xticklabels(pivot_df[groupby_var], rotation=45)
     plt.ylabel(f'US Dollars (millions)')
-    plt.title(f"Annual consumables cost by category (assuming {suffix})")
+    plt.title(f"Annual consumables cost by {groupby_var} (assuming {suffix})")
     plt.xticks(rotation=90)
     plt.yticks(rotation=0)
     plt.text(x=0.5, y=-0.8, s=f"Total consumables cost =\n USD {total_cost}", transform=ax.transAxes,
              horizontalalignment='center', fontsize=12, weight='bold', color='black')
-    plt.savefig(figurespath / f'consumables_cost_by_category_{suffix}.png', dpi=100,
+    plt.savefig(figurespath / f'consumables_cost_by_{groupby_var}_{suffix}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 
-plot_cost_by_consumable_category(full_cons_cost_df, 'perfect_availability')
-plot_cost_by_consumable_category(full_cons_cost_df, 'default_availability')
+plot_consumable_cost(_df = full_cons_cost_df,suffix =  'perfect_availability', groupby_var = 'category')
+plot_consumable_cost(_df = full_cons_cost_df, suffix =  'default_availability', groupby_var = 'category')
+
+# Plot the 10 consumables with the highest cost
+plot_consumable_cost(_df = full_cons_cost_df,suffix =  'perfect_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
+plot_consumable_cost(_df = full_cons_cost_df,suffix =  'default_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
+
 
 # 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
 #---------------------------------------------------------------------------------------------------------------

From 4a3019a6c6693cca7010fd8876e2e7160c808e31 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sat, 31 Aug 2024 13:45:29 +0100
Subject: [PATCH 098/230] add count of facilities from HHFA to master facility
 list

---
 .../ResourceFile_Master_Facilities_List.csv   |   4 +-
 .../get_number_of_facilities.py               | 154 ++++++++++++++++++
 2 files changed, 156 insertions(+), 2 deletions(-)
 create mode 100644 src/scripts/data_file_processing/healthsystem/health_facilities/get_number_of_facilities.py

diff --git a/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv b/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv
index 9468bcf080..5ebedf3aab 100644
--- a/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv
+++ b/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73bfb5a34b54939cbaf842feceef1013c6309d3b1e0a5ef27ffc84577ac3519e
-size 6602
+oid sha256:c6df4a42409b22d0b10d56ec077f6f4b5ccbed0f16f570fedbfd397e100063a9
+size 8471
diff --git a/src/scripts/data_file_processing/healthsystem/health_facilities/get_number_of_facilities.py b/src/scripts/data_file_processing/healthsystem/health_facilities/get_number_of_facilities.py
new file mode 100644
index 0000000000..0522580dbe
--- /dev/null
+++ b/src/scripts/data_file_processing/healthsystem/health_facilities/get_number_of_facilities.py
@@ -0,0 +1,154 @@
+"""
+This script extracts the number of health facilities by level and district from the HHFA 2018-19
+
+Inputs:
+1. Raw HHFA data - Q1.dta (~Dropbox/Thanzi la Onse/07 - Data/HHFA_2018-19/0 raw/2_Final data/)
+2. Cleaned variable names for HHFA data - variable_list.csv (~Dropbox/Thanzi la Onse/07 - Data/HHFA_2018-19/1 processing)
+
+Outputs:
+1. updated master facilities list resource file - ResourceFile_Master_Facilities_List.csv
+"""
+
+import calendar
+import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from tabulate import tabulate
+import copy
+
+# Set local Dropbox source
+path_to_dropbox = Path(  # <-- point to the TLO dropbox locally
+     '/Users/sm2511/Dropbox/Thanzi la Onse'
+)
+
+path_to_files_in_the_tlo_dropbox = path_to_dropbox / "07 - Data/HHFA_2018-19/" # <-- point to HHFA data folder in dropbox
+resourcefilepath = Path("./resources")
+
+# define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# %%
+## 1. DATA IMPORT ##
+raw_hhfa = pd.read_csv(path_to_files_in_the_tlo_dropbox / '0 raw/2_Final data/Q1.csv',
+                       low_memory=False)  # import 2018 data
+varnames = pd.read_csv(path_to_files_in_the_tlo_dropbox / '1 processing/variable_list.csv',
+                       encoding="ISO-8859-1")  # import file with cleaned variable names
+
+# Rename HHFA columns using variable name mapping in loaded .csv
+old_var_name = varnames['var']
+new_var_name = varnames['new_var_name']
+
+hhfa = copy.deepcopy(raw_hhfa)
+for i in range(len(new_var_name)):
+    if new_var_name[i] != np.nan:
+        hhfa.rename(columns={old_var_name[i]: new_var_name[i]},
+                    inplace=True)
+    else:
+        pass
+
+# Rename columns with missing data to "a" and then drop these columns since these will not be used in the analysis
+hhfa.rename({np.nan: "a"}, axis="columns", inplace=True)
+hhfa.drop(["a"], axis=1, inplace=True)
+
+# Preserve only relevant columns
+facility_identification_columns = ['fac_code', 'fac_name', 'region', 'zone','district', 'fac_type', 'fac_location', 'fac_owner']
+hhfa = hhfa[facility_identification_columns]
+
+# %%
+## 2. FEATURE CLEANING ##
+# Clean district names #
+hhfa.loc[hhfa['district'] == 'Blanytyre', 'district'] = 'Blantyre'
+hhfa.loc[hhfa['district'] == 'Nkhatabay', 'district'] = 'Nkhata Bay'
+
+# Pvt for profit hospital incorrectly labelled District Hospital
+cond = hhfa.fac_code == 5067
+hhfa.loc[cond, 'fac_type'] = 'Other Hospital'
+
+# Clean fac_owner
+cond = hhfa.fac_owner == 'Private non profit'
+hhfa.loc[cond, 'fac_owner'] = 'NGO'
+
+# convert fac_location to binary (Yes/No)
+hhfa = hhfa.rename(columns={'fac_location': 'fac_urban'})
+cond1 = hhfa.fac_urban.str.lower() == "rural"
+hhfa.loc[cond1, 'fac_urban'] = 0
+cond2 = hhfa.fac_urban.str.lower() == "urban"
+hhfa.loc[cond2, 'fac_urban'] = 1
+
+# Clean facility type
+hhfa['fac_type'] = hhfa['fac_type'].str.replace(' ', '').str.lower()
+hhfa['Facility_Level'] = ""
+
+def assign_facilty_level_based_on_hhfa_facility_names(_df):
+    cond_mch = (_df['fac_name'].str.replace(' ', '').str.lower().str.contains('mzuzucent'))
+    _df.loc[cond_mch, 'fac_name'] = 'Mzuzu Central Hospital'
+    cond_level0 = (_df['fac_name'].str.replace(' ', '').str.lower().str.contains('healthpost')) | \
+                    (_df['fac_type'].str.contains('healthpost'))
+    cond_level1a = (_df['fac_type'] == 'clinic') | (_df['fac_type'] == 'healthcentre') | \
+            (_df['fac_type'].str.replace(' ', '').str.lower().str.contains('dispensary')) | \
+            (_df['fac_type'].str.replace(' ', '').str.lower().str.contains('maternity'))
+    cond_level1b =  (_df['fac_type'].str.contains('communityhospital')) | \
+                    (_df['fac_type'] == 'otherhospital')
+    cond_level2 = (_df['fac_type'] == 'districthospital')
+    cond_level3 = _df.fac_name.str.replace(' ', '').str.lower().str.contains("centralhospit")
+    cond_level4 = _df.fac_name.str.replace(' ', '').str.lower().str.contains("mentalhospit")
+
+    _df.loc[cond_level0,'Facility_Level'] = '0'
+    _df.loc[cond_level1a,'Facility_Level'] = '1a'
+    _df.loc[cond_level1b,'Facility_Level'] = '1b'
+    _df.loc[cond_level2,'Facility_Level'] = '2'
+    _df.loc[cond_level3,'Facility_Level'] = '3'
+    _df.loc[cond_level4,'Facility_Level'] = '4'
+
+assign_facilty_level_based_on_hhfa_facility_names(hhfa)
+hhfa = hhfa.drop_duplicates('fac_name')
+
+# Count facilities by category
+# Count number of private facilities by district
+cond_private = hhfa.fac_owner.str.contains("Private")
+cond_level0 = hhfa.Facility_Level == '0'
+private_facility_count = hhfa[cond_private & ~cond_level0].groupby('district')['fac_name'].count()
+
+# Count number of NGO facilities by district
+cond_ngo = hhfa.fac_owner.str.contains("NGO")
+ngo_facility_count = hhfa[cond_ngo & ~cond_level0].groupby('district')['fac_name'].count()
+
+# For the TLO model, we are only concerned with government and CHAM facilities
+tlo_model_facilities = hhfa[~(cond_ngo|cond_private)]
+facility_count_govt_and_cham = tlo_model_facilities.groupby(['district', 'Facility_Level'])['fac_name'].count().reset_index()
+# Collapse data for Mzimba  North and South into 'Mzimba'
+cond_north = facility_count_govt_and_cham['district'] == 'Mzimba North'
+cond_south = facility_count_govt_and_cham['district'] == 'Mzimba South'
+facility_count_govt_and_cham.loc[(cond_north|cond_south), 'district'] = 'Mzimba'
+facility_count_govt_and_cham = facility_count_govt_and_cham.groupby(['district', 'Facility_Level']).sum()
+
+tlo_model_facilities['govt'] = 0
+tlo_model_facilities.loc[tlo_model_facilities.fac_owner == "Government", 'govt'] = 1
+proportion_of_facilities_run_by_govt = tlo_model_facilities.groupby(['district', 'Facility_Level'])['govt'].mean()
+
+proportion_of_facilities_in_urban_location = tlo_model_facilities.groupby(['district', 'Facility_Level'])['fac_urban'].mean()
+
+facility_count_data = pd.merge(facility_count_govt_and_cham, proportion_of_facilities_run_by_govt, right_index=True, left_index=True, how = 'left', validate = "1:1")
+facility_count_data = pd.merge(facility_count_data, proportion_of_facilities_in_urban_location, right_index=True, left_index=True, how = 'left', validate = "1:1")
+facility_count_data = facility_count_data.reset_index().rename(columns = {'district' : 'District',
+                                                                          'fac_name' : 'Facility_Count',
+                                                                          'govt': 'Proportion_owned_by_government',
+                                                                          'fac_urban': 'Proportion_located_in_urban_area'})
+facility_count_data = facility_count_data[~(facility_count_data.Facility_Level.isin(['3', '4', '5']))]
+
+#%%
+# Add this data to the Master Health Facilities Resource File
+mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")[['District', 'Facility_Level', 'Region', 'Facility_ID','Facility_Name']]
+mfl = mfl.merge(facility_count_data, on = ['District', 'Facility_Level'], how = 'left')
+mfl.loc[mfl.Facility_Level.isin(['3', '4', '5']), 'Facility_Count'] = 1
+mfl.loc[mfl.Facility_Level.isin(['3', '4', '5']), 'Proportion_owned_by_government'] = 1
+mfl.loc[mfl.Facility_Count.isna(), 'Facility_Count'] = 0
+
+# Export Master Health Facilities Resource File with facility count data
+mfl.to_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv", index = False)

From b49d35d836f0a7119837c98ff0cf0f38614328f3 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sat, 31 Aug 2024 18:50:23 +0100
Subject: [PATCH 099/230] update calculation of the cost of excess stock

---
 src/scripts/costing/costing.py | 182 ++++++++++++++++++++++++---------
 1 file changed, 133 insertions(+), 49 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index e165aa030a..c2951f97d8 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -11,6 +11,7 @@
 from matplotlib.ticker import FuncFormatter
 import numpy as np
 import pandas as pd
+import ast
 
 from tlo.analysis.utils import (
     extract_params,
@@ -84,9 +85,6 @@ def drop_outside_period(_df):
 districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District'])
 fac_levels = set(mfl.Facility_Level)
 
-# Extract count of facilities from Actual Facilities List
-#afl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Actual_Facilities_List.csv")
-
 #%% Calculate financial costs
 # 1. HR cost
 # Load annual salary by officer type and facility level
@@ -336,8 +334,8 @@ def convert_dict_to_dataframe(_dict):
     _df = pd.DataFrame(data)
     return _df
 
-cost_perfect_df = convert_dict_to_dataframe(cost_of_consumables_dispensed_under_perfect_availability).T.rename(columns = {0:"cost_perfect_availability"}).round(2)
-cost_default_df = convert_dict_to_dataframe(cost_of_consumables_dispensed_under_default_availability).T.rename(columns = {0:"cost_default_availability"}).round(2)
+cost_perfect_df = convert_dict_to_dataframe(cost_of_consumables_dispensed_under_perfect_availability).T.rename(columns = {0:"cost_dispensed_stock_perfect_availability"}).round(2)
+cost_default_df = convert_dict_to_dataframe(cost_of_consumables_dispensed_under_default_availability).T.rename(columns = {0:"cost_dispensed_stock_default_availability"}).round(2)
 unit_cost_df = convert_dict_to_dataframe(unit_price_consumable).T.rename(columns = {0:"unit_cost"})
 dispensed_default_df = convert_dict_to_dataframe(consumables_dispensed_under_default_availability).T.rename(columns = {0:"dispensed_default_availability"}).round(2)
 dispensed_perfect_df = convert_dict_to_dataframe(consumables_dispensed_under_perfect_availability).T.rename(columns = {0:"dispensed_perfect_availability"}).round(2)
@@ -346,6 +344,55 @@ def convert_dict_to_dataframe(_dict):
 full_cons_cost_df = pd.merge(full_cons_cost_df, unit_cost_df, left_index=True, right_index=True)
 full_cons_cost_df = pd.merge(full_cons_cost_df, dispensed_default_df, left_index=True, right_index=True)
 full_cons_cost_df = pd.merge(full_cons_cost_df, dispensed_perfect_df, left_index=True, right_index=True)
+
+# 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
+#---------------------------------------------------------------------------------------------------------------
+# Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry
+# While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon
+# an empirical estimate based on OpenLMIS data
+# Estimate the stock to dispensed ratio from OpenLMIS data
+lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+# Collapse individual facilities
+lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
+df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
+df = df.loc[df.index.get_level_values('month') != "Aggregate"]
+opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
+                      df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
+                      df.loc[df.index.get_level_values('month') == 'January', 'received']
+closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
+total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
+                                         opening_bal_january.reset_index(level='month', drop=True) -\
+                                         closing_bal_december.reset_index(level='month', drop=True)
+total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
+inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
+
+# Edit outlier ratios
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
+average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean()
+#inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio.isna()] = average_inflow_to_outflow_ratio_ratio # replace missing with average
+
+# Multiply number of items needed by cost of consumable
+inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
+excess_stock_ratio = inflow_to_outflow_ratio_by_consumable - 1
+excess_stock_ratio = excess_stock_ratio.to_dict()
+# TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
+cost_of_excess_consumables_stocked_under_perfect_availability = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
+                                                consumables_dispensed_under_perfect_availability[key] *
+                                                excess_stock_ratio.get(key, average_inflow_to_outflow_ratio_ratio - 1)
+                                                for key in consumables_dispensed_under_perfect_availability)))
+cost_of_excess_consumables_stocked_under_default_availability = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
+                                                consumables_dispensed_under_default_availability[key] *
+                                                excess_stock_ratio.get(key, average_inflow_to_outflow_ratio_ratio - 1)
+                                                for key in consumables_dispensed_under_default_availability)))
+cost_excess_stock_perfect_df = convert_dict_to_dataframe(cost_of_excess_consumables_stocked_under_perfect_availability).T.rename(columns = {0:"cost_excess_stock_perfect_availability"}).round(2)
+cost_excess_stock_default_df = convert_dict_to_dataframe(cost_of_excess_consumables_stocked_under_default_availability).T.rename(columns = {0:"cost_excess_stock_default_availability"}).round(2)
+full_cons_cost_df = pd.merge(full_cons_cost_df, cost_excess_stock_perfect_df, left_index=True, right_index=True)
+full_cons_cost_df = pd.merge(full_cons_cost_df, cost_excess_stock_default_df, left_index=True, right_index=True)
+
+total_cost_of_excess_consumables_stocked_under_perfect_availability = sum(value for value in cost_of_excess_consumables_stocked_under_perfect_availability.values() if not np.isnan(value))
+total_cost_of_excess_consumables_stocked_under_default_availability = sum(value for value in cost_of_excess_consumables_stocked_under_default_availability.values() if not np.isnan(value))
+
 full_cons_cost_df = full_cons_cost_df.reset_index().rename(columns = {'index' : 'item_code'})
 full_cons_cost_df.to_csv(figurespath / 'consumables_cost_220824.csv')
 
@@ -353,6 +400,8 @@ def convert_dict_to_dataframe(_dict):
 tlo_lmis_mapping = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, encoding="ISO-8859-1")[['item_code', 'module_name', 'consumable_name_tlo']]
 tlo_lmis_mapping = tlo_lmis_mapping[~tlo_lmis_mapping['item_code'].duplicated(keep='first')]
 full_cons_cost_df = pd.merge(full_cons_cost_df, tlo_lmis_mapping, on = 'item_code', how = 'left', validate = "1:1")
+full_cons_cost_df['total_cost_perfect_availability'] = full_cons_cost_df['cost_dispensed_stock_perfect_availability'] + full_cons_cost_df['cost_excess_stock_perfect_availability']
+full_cons_cost_df['total_cost_default_availability'] = full_cons_cost_df['cost_dispensed_stock_default_availability'] + full_cons_cost_df['cost_excess_stock_default_availability']
 
 def recategorize_modules_into_consumable_categories(_df):
     _df['category'] = _df['module_name'].str.lower()
@@ -395,7 +444,7 @@ def recategorize_modules_into_consumable_categories(_df):
 # Use fillna on the 'category' column to fill missing values using the mapped_categories
 full_cons_cost_df['category'] = full_cons_cost_df['category'].fillna(mapped_categories)
 
-# Bar plot of cost by category
+# Bar plot of cost of dispensed consumables
 def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan')):
     pivot_df = _df.groupby(groupby_var)['cost_' + suffix].sum().reset_index()
     pivot_df['cost_' + suffix] = pivot_df['cost_' + suffix]/1e6
@@ -426,42 +475,24 @@ def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan'))
 plot_consumable_cost(_df = full_cons_cost_df,suffix =  'perfect_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
 plot_consumable_cost(_df = full_cons_cost_df,suffix =  'default_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
 
+def plot_cost_by_category(_df, suffix , figname_prefix = 'Consumables'):
+    pivot_df = full_cons_cost_df[['category', 'cost_dispensed_stock_' + suffix, 'cost_excess_stock_' + suffix]]
+    pivot_df = pivot_df.groupby('category')[['cost_dispensed_stock_' + suffix, 'cost_excess_stock_' + suffix]].sum()
+    total_cost = round(_df['total_cost_' + suffix].sum(), 0)
+    total_cost = f"{total_cost:,.0f}"
+    ax  = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Category')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {figname_prefix} cost by category")
+    plt.xticks(rotation=90, size = 9)
+    plt.yticks(rotation=0)
+    plt.text(x=0.3, y=-0.5, s=f"Total {figname_prefix} cost = USD {total_cost}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+    plt.savefig(figurespath / f'{figname_prefix}_by_category_{suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
 
-# 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
-#---------------------------------------------------------------------------------------------------------------
-# Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry
-# Estimate the stock to dispensed ratio from OpenLMIS data
-lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
-# Collapse individual facilities
-lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
-df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
-df = df.loc[df.index.get_level_values('month') != "Aggregate"]
-opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
-                      df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
-                      df.loc[df.index.get_level_values('month') == 'January', 'received']
-closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
-total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
-                                         opening_bal_january.reset_index(level='month', drop=True) -\
-                                         closing_bal_december.reset_index(level='month', drop=True)
-total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
-inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
-
-# Edit outlier ratios
-inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
-inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
-average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean()
-#inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio.isna()] = average_inflow_to_outflow_ratio_ratio # replace missing with average
-
-# Multiply number of items needed by cost of consumable
-inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
-inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio_by_consumable.to_dict()
-# TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
-cost_of_consumables_stocked = dict(zip(unit_price_consumable, (unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] *
-                                                cons_dispensed[key] *
-                                                inflow_to_outflow_ratio_by_consumable.get(key, average_inflow_to_outflow_ratio_ratio)
-                                                for key in cons_dispensed)))
-# TODO Make sure that the above code runs
-total_cost_of_consumables_stocked = sum(value for value in cost_of_consumables_stocked.values() if not np.isnan(value))
+plot_cost_by_category(full_cons_cost_df, suffix = 'perfect_availability' , figname_prefix = 'Consumables')
+plot_cost_by_category(full_cons_cost_df, suffix = 'default_availability' , figname_prefix = 'Consumables')
 
 # Add consumable costs to the financial cost dataframe
 consumable_cost_subcategories = ['total_cost_of_consumables_dispensed', 'total_cost_of_consumables_stocked']
@@ -473,6 +504,7 @@ def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan'))
 # Append new_data to scenario_cost_financial
 scenario_cost_financial = pd.concat([scenario_cost_financial, consumable_costs], ignore_index=True)
 
+# %%
 # 3. Equipment cost
 # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it been used in the simulation
 unit_cost_equipment = workbook_cost["equipment"]
@@ -482,6 +514,7 @@ def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan'))
 unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
 unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
 unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.1 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 10% of the items over 8 years
+# TODO the above line assumes that the life span of each item of equipment is 80 years. This needs to be updated using realistic life span data
 
 unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
                                            'service_fee_annual', 'spare_parts_annual',  'upfront_repair_cost_annual', 'replacement_cost_annual',
@@ -531,37 +564,88 @@ def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan'))
 equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
 equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
 equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])
+# Merge the count of facilities by district and level
+equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left')
+equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0
 
 # Merge the two datasets to calculate cost
 equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']],
                           on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
 categories_of_equipment_cost = ['replacement_cost', 'upfront_repair_cost', 'spare_parts', 'service_fee']
 for cost_category in categories_of_equipment_cost:
-    equipment_cost['total_' + cost_category] = equipment_cost[cost_category + '_annual'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity']
+    equipment_cost['total_' + cost_category] = equipment_cost[cost_category + '_annual'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count']
 equipment_cost['annual_cost'] = equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum(axis = 1)
-#equipment_cost.to_csv('./outputs/equipment_cost.csv')
 
 equipment_costs = pd.DataFrame({
     'Cost_Category': ['Equipment'] * len(categories_of_equipment_cost),
     'Cost_Sub-category': categories_of_equipment_cost,
-    'Value_2023USD': equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum().values.tolist()
+    'Cost': equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum().values.tolist()
 })
 # Append new_data to scenario_cost_financial
-scenario_cost_financial = pd.concat([scenario_cost_financial, equipment_costs], ignore_index=True)
+scenario_cost = pd.concat([scenario_cost, equipment_costs], ignore_index=True)
+
+# Plot equipment cost
+# Plot different categories of cost by level of care
+def plot_components_of_cost_category(_df, cost_category, figname_suffix):
+    pivot_df = _df[_df['Cost_Category'] == cost_category].pivot_table(index='Cost_Sub-category', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    ax = pivot_df.plot(kind='bar', stacked=False, title='Scenario Cost by Category')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {cost_category} cost")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+
+    # Add text labels on the bars
+    total_cost = pivot_df['Cost'].sum()
+    rects = ax.patches
+    for rect, cost in zip(rects, pivot_df['Cost']):
+        cost_millions = cost / 1e6
+        percentage = (cost / total_cost) * 100
+        label_text = f"{cost_millions:.1f}M ({percentage:.1f}%)"
+        # Place text at the top of the bar
+        x = rect.get_x() + rect.get_width() / 2
+        y = rect.get_height()
+        ax.text(x, y, label_text, ha='center', va='bottom', fontsize=8, rotation=0)
+
+    total_cost = f"{total_cost:,.0f}"
+    plt.text(x=0.3, y=-0.5, s=f"Total {cost_category} cost = USD {total_cost}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+
+    plt.savefig(figurespath / f'{cost_category}_{figname_suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Equipment', figname_suffix = "")
+
+# Plot top 10 most expensive items
+def plot_most_expensive_equipment(_df, top_x_values = 10, figname_prefix = "Equipment"):
+    top_x_items = _df.groupby('Item_code')['annual_cost'].sum().sort_values(ascending = False)[0:top_x_values-1].index
+    _df_subset = _df[_df.Item_code.isin(top_x_items)]
+
+    pivot_df = _df_subset.pivot_table(index='Equipment_tlo', columns='Facility_Level', values='annual_cost',
+                               aggfunc='sum', fill_value=0)
+    ax = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Item and Facility Level')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {figname_prefix} cost by item and facility level")
+    plt.xticks(rotation=90, size = 8)
+    plt.yticks(rotation=0)
+    plt.savefig(figurespath / f'{figname_prefix}_by_item_and_level.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+
+plot_most_expensive_equipment(equipment_cost)
 
-# TODO Use AFL to multiple the number of facilities at each level
 # TODO PLot which equipment is used by district and facility or a heatmap of the number of facilities at which an equipment is used
-# TODO From the log, extract the facility IDs which use any equipment item
 # TODO Collapse facility IDs by level of care to get the total number of facilities at each level using an item
 # TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)
-# TODO Multiply quantity needed with cost per item (this is the repair, replacement, and maintenance cost)
 # TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
 
 # 4. Facility running costs
 # Average running costs by facility level and district times the number of facilities  in the simulation
 
 # Extract all costs to a .csv
-scenario_cost_financial.to_csv(costing_outputs_folder / 'scenario_cost.csv')
+scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv')
 
 
 # Compare financial costs with actual budget data

From e7a3299bee0fd75bfce12ad4ff2b3bcfb4d1c842 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 3 Sep 2024 18:12:10 +0100
Subject: [PATCH 100/230] minor changes to extract correct cost values into
 scenario_cost.csv

---
 src/scripts/costing/costing.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index c2951f97d8..43bc245071 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -12,6 +12,7 @@
 import numpy as np
 import pandas as pd
 import ast
+import math
 
 from tlo.analysis.utils import (
     extract_params,
@@ -327,8 +328,6 @@ def get_counts_of_items_requested(_df):
 cost_of_consumables_dispensed_under_default_availability = {key: unit_price_consumable[key]['Final_price_per_chosen_unit (USD, 2023)'] * consumables_dispensed_under_default_availability[key] for
                                                             key in unit_price_consumable if key in consumables_dispensed_under_default_availability}
 total_cost_of_consumables_dispensed_under_default_availability = sum(value for value in cost_of_consumables_dispensed_under_default_availability.values() if not np.isnan(value))
-
-# Extract cost to .csv
 def convert_dict_to_dataframe(_dict):
     data = {key: [value] for key, value in _dict.items()}
     _df = pd.DataFrame(data)
@@ -468,12 +467,12 @@ def plot_consumable_cost(_df, suffix, groupby_var, top_x_values =  float('nan'))
                 bbox_inches='tight')
     plt.close()
 
-plot_consumable_cost(_df = full_cons_cost_df,suffix =  'perfect_availability', groupby_var = 'category')
-plot_consumable_cost(_df = full_cons_cost_df, suffix =  'default_availability', groupby_var = 'category')
+plot_consumable_cost(_df = full_cons_cost_df,suffix =  'dispensed_stock_perfect_availability', groupby_var = 'category')
+plot_consumable_cost(_df = full_cons_cost_df, suffix =  'dispensed_stock_default_availability', groupby_var = 'category')
 
 # Plot the 10 consumables with the highest cost
-plot_consumable_cost(_df = full_cons_cost_df,suffix =  'perfect_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
-plot_consumable_cost(_df = full_cons_cost_df,suffix =  'default_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
+plot_consumable_cost(_df = full_cons_cost_df,suffix =  'dispensed_stock_perfect_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
+plot_consumable_cost(_df = full_cons_cost_df,suffix =  'dispensed_stock_default_availability', groupby_var = 'consumable_name_tlo', top_x_values = 10)
 
 def plot_cost_by_category(_df, suffix , figname_prefix = 'Consumables'):
     pivot_df = full_cons_cost_df[['category', 'cost_dispensed_stock_' + suffix, 'cost_excess_stock_' + suffix]]
@@ -495,14 +494,14 @@ def plot_cost_by_category(_df, suffix , figname_prefix = 'Consumables'):
 plot_cost_by_category(full_cons_cost_df, suffix = 'default_availability' , figname_prefix = 'Consumables')
 
 # Add consumable costs to the financial cost dataframe
-consumable_cost_subcategories = ['total_cost_of_consumables_dispensed', 'total_cost_of_consumables_stocked']
+consumable_cost_subcategories = ['total_cost_of_consumables_dispensed', 'total_cost_of_excess_consumables_stocked']
 consumable_costs = pd.DataFrame({
     'Cost_Category': ['Consumables'] * len(consumable_cost_subcategories),
     'Cost_Sub-category': consumable_cost_subcategories,
-    'Value_2023USD': [total_cost_of_consumables_dispensed, total_cost_of_consumables_stocked]
+    'Cost': [total_cost_of_consumables_dispensed_under_default_availability, total_cost_of_excess_consumables_stocked_under_default_availability]
 })
 # Append new_data to scenario_cost_financial
-scenario_cost_financial = pd.concat([scenario_cost_financial, consumable_costs], ignore_index=True)
+scenario_cost = pd.concat([scenario_cost, consumable_costs], ignore_index=True)
 
 # %%
 # 3. Equipment cost

From 197693a6132a9371353ec19752a590067ac79b56 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 7 Oct 2024 19:13:47 +0100
Subject: [PATCH 101/230] Update HR costs in account for multiple draws and
 runs

---
 src/scripts/costing/costing.py | 310 +++++++++++++++++++++------------
 1 file changed, 197 insertions(+), 113 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 43bc245071..846f6883b8 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -37,7 +37,7 @@
 # define a pathway to the data folder (note: currently outside the TLO model directory)
 # remember to set working directory to TLOmodel/
 #outputfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
-outputfilepath = Path('./outputs/tbh03@ic.ac.uk')
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
 resourcefilepath = Path("./resources")
 path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
 costing_outputs_folder = Path('./outputs/costing')
@@ -57,14 +57,16 @@ def drop_outside_period(_df):
 # Load result files
 #-------------------
 #results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
-results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
+#results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO scenarios
 #results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
-equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
-consumables_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/impact_of_consumables_scenarios-2024-06-11T204007Z/')
+
+#equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
+#consumables_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/impact_of_consumables_scenarios-2024-06-11T204007Z/')
 # TODO When the costing module is ready the above results_folder should be the same for the calculation of all costs
 
 # check can read results from draw=0, run=0
-log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
+#log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
 
 # look at one log (so can decide what to extract)
 log = load_pickled_dataframes(results_folder)
@@ -74,6 +76,8 @@ def drop_outside_period(_df):
 
 # 1) Extract the parameters that have varied over the set of simulations
 params = extract_params(results_folder)
+final_year_of_simulation = max(log['tlo.simulation']['info']['date']).year
+first_year_of_simulation = min(log['tlo.simulation']['info']['date']).year
 
 # Load cost input files
 #------------------------
@@ -93,18 +97,68 @@ def drop_outside_period(_df):
 hr_cost_parameters['Facility_Level'] =  hr_cost_parameters['Facility_Level'].astype(str)
 hr_annual_salary = hr_cost_parameters[hr_cost_parameters['Parameter_name'] == 'salary_usd']
 hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str) # create column for merging with model log
-
-# Load scenario staffing level
-hr_scenario = log[ 'tlo.scenario']['override_parameter']['new_value'][log[ 'tlo.scenario'][ 'override_parameter']['name'] == 'use_funded_or_actual_staffing']
-
-if hr_scenario.empty:
-    staff_count = pd.read_csv(
-        resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv") # if missing default to reading actual capabilities
-else:
-    staff_count = pd.read_csv(
-        resourcefilepath / 'healthsystem'/ 'human_resources' / f'{hr_scenario[2]}' / 'ResourceFile_Daily_Capabilities.csv')
-
-staff_count_by_level_and_officer_type = staff_count.groupby(['Facility_Level', 'Officer_Category'])[
+hr_annual_salary = hr_annual_salary.rename({'Value':'Annual_Salary'}, axis = 1)
+
+# Load scenario staffing level for each year and draw
+use_funded_or_actual_staffing = params[params.module_param == 'HealthSystem:use_funded_or_actual_staffing'].reset_index()
+HR_scaling_by_level_and_officer_type_mode  = params[params.module_param == 'HealthSystem:HR_scaling_by_level_and_officer_type_mode'].reset_index()
+year_HR_scaling_by_level_and_officer_type = params[params.module_param == 'HealthSystem:year_HR_scaling_by_level_and_officer_type'].reset_index()
+yearly_HR_scaling_mode  = params[params.module_param == 'HealthSystem:yearly_HR_scaling_mode'].reset_index()
+
+draws = params.index.unique().tolist() # list of draws
+years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
+
+# TODO add the following parameters to estimate HR availability per year - HealthSystem:yearly_HR_scaling_mode, HealthSystem:HR_scaling_by_level_and_officer_type_mode, HealthSystem:year_HR_scaling_by_level_and_officer_type
+hr_df_columns = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv").columns.drop(['Facility_ID', 'Officer_Category'])
+facilities = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Facility_ID'].unique().tolist()
+officer_categories = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Officer_Category'].unique().tolist()
+staff_count = pd.DataFrame(columns = hr_df_columns, index=pd.MultiIndex.from_product([draws, years, facilities, officer_categories], names=['draw', 'year', 'Facility_ID ', 'Officer_Category'])) # Create the empty DataFrame staff_count with multi-level index ['draw', 'year']
+
+for d in draws:
+    year_of_switch = (
+        year_HR_scaling_by_level_and_officer_type.loc[
+            year_HR_scaling_by_level_and_officer_type.draw == d, 'value'
+        ].iloc[0] if not year_HR_scaling_by_level_and_officer_type.loc[
+            year_HR_scaling_by_level_and_officer_type.draw == d, 'value'
+        ].empty else final_year_of_simulation
+    )
+    chosen_hr_scenario = use_funded_or_actual_staffing.loc[use_funded_or_actual_staffing.draw == d,'value'].iloc[0] if not use_funded_or_actual_staffing.loc[use_funded_or_actual_staffing.draw == d,'value'].empty else ''
+    condition_draw = staff_count.index.get_level_values('draw') == d  # Condition for draw
+    condition_before_switch = staff_count.index.get_level_values('year') < year_of_switch  # Condition for year
+
+    for year in years:
+        condition_draw = staff_count.index.get_level_values('draw') == d  # Condition for draw
+        condition_year = staff_count.index.get_level_values('year') == year  # Condition for the specific year
+
+        if year < year_of_switch:
+            if chosen_hr_scenario == '':
+                new_data = pd.read_csv(
+                    resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
+                )
+            else:
+                new_data = pd.read_csv(
+                    resourcefilepath / 'healthsystem' / 'human_resources' / f'{chosen_hr_scenario}' / 'ResourceFile_Daily_Capabilities.csv'
+                )  # Use the chosen HR scenario
+        else:
+            if chosen_hr_scenario == '':
+                new_data = pd.read_csv(
+                    resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
+                )  # If missing default to reading actual capabilities
+            else:
+                new_data = pd.read_csv(
+                    resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
+                )  # If missing default to reading actual capabilities
+
+        # Set the 'draw' and 'year' in new_data
+        new_data['draw'] = d
+        new_data['year'] = year
+        new_data = new_data.set_index(['draw', 'year', 'Facility_ID', 'Officer_Category'])
+
+        # Replace empty values in staff_count with values from new_data
+        staff_count.loc[condition_draw & condition_year] = staff_count.loc[
+            condition_draw & condition_year].fillna(new_data)
+
+staff_count_by_level_and_officer_type = staff_count.groupby(['draw', 'year', 'Facility_Level', 'Officer_Category'])[
     'Staff_Count'].sum().reset_index()
 staff_count_by_level_and_officer_type['Facility_Level'] = staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
 
@@ -115,69 +169,48 @@ def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Ser
     _df.index.name = 'year'
     return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
 
-annual_capacity_used_by_cadre_and_level = summarize(extract_results(
+annual_capacity_used_by_cadre_and_level = extract_results(
     Path(results_folder),
     module='tlo.methods.healthsystem.summary',
     key='Capacity_By_OfficerType_And_FacilityLevel',
     custom_generate_series=expand_capacity_by_officer_type_and_facility_level,
     do_scaling=False,
-), only_mean=True, collapse_columns=True)
+) #, only_mean=True, collapse_columns=True
 
-# Take mean across the entire simulation
+# Prepare capacity used dataframe to be multiplied by staff count
 average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False)
+average_capacity_used_by_cadre_and_level.reset_index(drop=True) # Flatten multi=index column
+average_capacity_used_by_cadre_and_level = average_capacity_used_by_cadre_and_level.melt(id_vars=['OfficerType', 'FacilityLevel'],
+                        var_name=['draw', 'run'],
+                        value_name='capacity_used')
 # Unstack to make it look like a nice table
 average_capacity_used_by_cadre_and_level['OfficerType_FacilityLevel'] = 'Officer_Type=' + average_capacity_used_by_cadre_and_level['OfficerType'].astype(str) + '|Facility_Level=' + average_capacity_used_by_cadre_and_level['FacilityLevel'].astype(str)
-list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['mean'] != 0]['OfficerType_FacilityLevel']
-print(f"Out of {len(average_capacity_used_by_cadre_and_level)} cadre and level combinations available, {len(list_of_cadre_and_level_combinations_used)} are used in the simulation")
+list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType_FacilityLevel', 'draw', 'run']]
+print(f"Out of {len(average_capacity_used_by_cadre_and_level.OfficerType_FacilityLevel.unique())} cadre and level combinations available, {len(list_of_cadre_and_level_combinations_used.OfficerType_FacilityLevel.unique())} are used across the simulations")
 
 # Subset scenario staffing level to only include cadre-level combinations used in the simulation
 staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
-used_staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type[staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'].isin(list_of_cadre_and_level_combinations_used)]
+used_staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw', 'OfficerType_FacilityLevel'], how = 'right', validate = 'm:m')
 
 # Calculate various components of HR cost
 # 1.1 Salary cost for current total staff
 #---------------------------------------------------------------------------------------------------------------
 staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type.drop(staff_count_by_level_and_officer_type[staff_count_by_level_and_officer_type.Facility_Level == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
-salary_for_all_staff = pd.merge(staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_all_staff['Cost'] = salary_for_all_staff['Value'] * salary_for_all_staff['Staff_Count']
-total_salary_for_all_staff = salary_for_all_staff['Cost'].sum()
+salary_for_all_staff = pd.merge(staff_count_by_level_and_officer_type[['draw', 'year', 'OfficerType_FacilityLevel', 'Staff_Count']],
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Annual_Salary']], on = ['OfficerType_FacilityLevel'], how = "left", validate = 'm:1')
+salary_for_all_staff['Cost'] = salary_for_all_staff['Annual_Salary'] * salary_for_all_staff['Staff_Count']
+total_salary_for_all_staff = salary_for_all_staff.groupby(['draw', 'year'])['Cost'].sum()
 
 # 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
 #---------------------------------------------------------------------------------------------------------------
-salary_for_staff_used_in_scenario = pd.merge(used_staff_count_by_level_and_officer_type[['OfficerType_FacilityLevel', 'Staff_Count']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Value']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_staff_used_in_scenario['Cost'] = salary_for_staff_used_in_scenario['Value'] * salary_for_staff_used_in_scenario['Staff_Count']
-total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario['Cost'].sum()
-
-# Bar chart of salaries by cadre which goes into the HR folder in outputs (stacked for levels of care and two series for modelled and all)
-def get_level_and_cadre_from_concatenated_value(_df, varname):
-    _df['Cadre'] = _df[varname].str.extract(r'=(.*?)\|')
-    _df['Facility_Level'] = _df[varname].str.extract(r'^[^=]*=[^|]*\|[^=]*=([^|]*)')
-    return _df
-def plot_cost_by_cadre_and_level(_df, figname_prefix, figname_suffix):
-    if ('Facility_Level' in _df.columns) & ('Cadre' in _df.columns):
-        pass
-    else:
-        _df = get_level_and_cadre_from_concatenated_value(_df, 'OfficerType_FacilityLevel')
-
-    pivot_df = _df.pivot_table(index='Cadre', columns='Facility_Level', values='Cost',
-                               aggfunc='sum', fill_value=0)
-    total_salary = round(_df['Cost'].sum(), 0)
-    total_salary = f"{total_salary:,.0f}"
-    ax  = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Cadre and Facility Level')
-    plt.ylabel(f'US Dollars')
-    plt.title(f"Annual {figname_prefix} cost by cadre and facility level")
-    plt.xticks(rotation=45)
-    plt.yticks(rotation=0)
-    plt.text(x=0.3, y=-0.5, s=f"Total {figname_prefix} cost = USD {total_salary}", transform=ax.transAxes,
-             horizontalalignment='center', fontsize=12, weight='bold', color='black')
-    plt.savefig(figurespath / f'{figname_prefix}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-plot_cost_by_cadre_and_level(salary_for_all_staff,figname_prefix = "salary", figname_suffix= "all_staff")
-plot_cost_by_cadre_and_level(salary_for_staff_used_in_scenario,figname_prefix = "salary", figname_suffix= "staff_used_in_scenario")
+used_staff_count_by_level_and_officer_type = used_staff_count_by_level_and_officer_type.drop(used_staff_count_by_level_and_officer_type[used_staff_count_by_level_and_officer_type.Facility_Level == '5'].index)
+salary_for_staff_used_in_scenario = pd.merge(used_staff_count_by_level_and_officer_type[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Staff_Count']],
+                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Annual_Salary']], on = ['OfficerType_FacilityLevel'], how = "left")
+salary_for_staff_used_in_scenario['Cost'] = salary_for_staff_used_in_scenario['Annual_Salary'] * salary_for_staff_used_in_scenario['Staff_Count']
+salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
+salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
+salary_for_staff_used_in_scenario  = summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
+total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.groupby(['year']).sum()
 
 # 1.3 Recruitment cost to fill gap created by attrition
 #---------------------------------------------------------------------------------------------------------------
@@ -196,17 +229,18 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
             merged_df = pd.merge(merged_df, new_cost_df, on=['Officer_Category', 'Facility_Level'], how="left")
     return merged_df
 
-recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_count_by_level_and_officer_type,
+recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
 recruitment_cost['Cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['Staff_Count'] * \
                       recruitment_cost['recruitment_cost_per_person_recruited_usd']
-total_recruitment_cost_for_attrited_workers = recruitment_cost['Cost'].sum()
-
-plot_cost_by_cadre_and_level(recruitment_cost, figname_prefix = "recruitment", figname_suffix= "all_staff")
+recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
+recruitment_cost = recruitment_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
+recruitment_cost  = summarize(recruitment_cost, only_mean = True, collapse_columns=True)
+total_recruitment_cost_for_attrited_workers = recruitment_cost.groupby(['year']).sum()
 
 # 1.4 Pre-service training cost to fill gap created by attrition
 #---------------------------------------------------------------------------------------------------------------
-preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_count_by_level_and_officer_type,
+preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate',
                                                                  'licensure_exam_passing_rate', 'graduation_rate',
                                                                  'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
@@ -215,64 +249,46 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
                                                 (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) * \
                                                 (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) * \
                                                 preservice_training_cost['annual_preservice_training_cost_percapita_usd']
-preservice_training_cost_for_attrited_workers = preservice_training_cost['Cost'].sum()
-
-plot_cost_by_cadre_and_level(preservice_training_cost, figname_prefix = "pre-service training", figname_suffix= "all_staff")
+preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
+preservice_training_cost = preservice_training_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
+preservice_training_cost  = summarize(preservice_training_cost, only_mean = True, collapse_columns=True)
+preservice_training_cost_for_attrited_workers = preservice_training_cost.groupby(['year']).sum()
 
 # 1.5 In-service training cost to train all staff
 #---------------------------------------------------------------------------------------------------------------
-inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_count_by_level_and_officer_type,
+inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_inservice_training_cost_usd'])
 inservice_training_cost['Cost'] = inservice_training_cost['Staff_Count'] * inservice_training_cost['annual_inservice_training_cost_usd']
-inservice_training_cost_for_all_staff = inservice_training_cost['Cost'].sum()
-
-plot_cost_by_cadre_and_level(inservice_training_cost, figname_prefix = "in-service training", figname_suffix= "all_staff")
+inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
+inservice_training_cost = inservice_training_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
+inservice_training_cost  = summarize(inservice_training_cost, only_mean = True, collapse_columns=True)
+inservice_training_cost_for_all_staff = inservice_training_cost.groupby(['year']).sum()
 
 # TODO check why annual_inservice_training_cost for DCSA is NaN in the merged_df
 
 # Create a dataframe to store financial costs
-hr_cost_subcategories = ['salary_for_all_staff', 'recruitment_cost',
-                         'preservice_training_cost', 'inservice_training_cost']
-scenario_cost = pd.DataFrame({
-    'Cost_Category': ['Human Resources for Health'] * len(hr_cost_subcategories),
-    'Cost_Sub-category': hr_cost_subcategories,
-    'Cost': [salary_for_all_staff['Cost'].sum(), recruitment_cost['Cost'].sum(),
-                      preservice_training_cost['Cost'].sum(), preservice_training_cost['Cost'].sum()]
-})
-# TODO 'Value_2023USD' - use hr_cost_subcategories rather than the hardcoded list
+# Function to melt and label the cost category
+def melt_and_label(df, label):
+    melted_df = pd.melt(df.reset_index(), id_vars='year')
+    melted_df['Cost_Sub-category'] = label
+    return melted_df
+
+# Initialize scenario_cost with the salary data
+scenario_cost = melt_and_label(total_salary_for_staff_used_in_scenario, 'salary_for_used_cadres')
+
+# Concatenate additional cost categories
+additional_costs = [
+    (total_recruitment_cost_for_attrited_workers, 'recruitment_cost_for_attrited_workers'),
+    (preservice_training_cost_for_attrited_workers, 'preservice_training_cost_for_attrited_workers'),
+    (inservice_training_cost_for_all_staff, 'inservice_training_cost_for_all_staff')
+]
+# Iterate through additional costs, melt and concatenate
+for df, label in additional_costs:
+    melted = melt_and_label(df, label)
+    scenario_cost = pd.concat([scenario_cost, melted])
+scenario_cost['Cost_Category'] = 'Human Resources for Health'
 # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
-
-def plot_components_of_cost_category(_df, cost_category, figname_suffix):
-    pivot_df = _df[_df['Cost_Category'] == cost_category].pivot_table(index='Cost_Sub-category', values='Cost',
-                               aggfunc='sum', fill_value=0)
-    ax = pivot_df.plot(kind='bar', stacked=False, title='Scenario Cost by Category')
-    plt.ylabel(f'US Dollars')
-    plt.title(f"Annual {cost_category} cost")
-    plt.xticks(rotation=45)
-    plt.yticks(rotation=0)
-
-    # Add text labels on the bars
-    total_cost = pivot_df['Cost'].sum()
-    rects = ax.patches
-    for rect, cost in zip(rects, pivot_df['Cost']):
-        cost_millions = cost / 1e6
-        percentage = (cost / total_cost) * 100
-        label_text = f"{cost_millions:.1f}M ({percentage:.1f}%)"
-        # Place text at the top of the bar
-        x = rect.get_x() + rect.get_width() / 2
-        y = rect.get_height()
-        ax.text(x, y, label_text, ha='center', va='bottom', fontsize=8, rotation=0)
-
-    total_cost = f"{total_cost:,.0f}"
-    plt.text(x=0.3, y=-0.5, s=f"Total {cost_category} cost = USD {total_cost}", transform=ax.transAxes,
-             horizontalalignment='center', fontsize=12, weight='bold', color='black')
-
-    plt.savefig(figurespath / f'{cost_category}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Human Resources for Health', figname_suffix = "all_staff")
-
+scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
 # %%
 # 2. Consumables cost
 def get_quantity_of_consumables_dispensed(results_folder):
@@ -646,6 +662,74 @@ def plot_most_expensive_equipment(_df, top_x_values = 10, figname_prefix = "Equi
 # Extract all costs to a .csv
 scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv')
 
+# Plot costs
+####################################################
+# TODO all these HR plots need to be looked at
+# 1. HR
+# Stacked bar chart of salaries by cadre
+def get_level_and_cadre_from_concatenated_value(_df, varname):
+    _df['Cadre'] = _df[varname].str.extract(r'=(.*?)\|')
+    _df['Facility_Level'] = _df[varname].str.extract(r'^[^=]*=[^|]*\|[^=]*=([^|]*)')
+    return _df
+def plot_cost_by_cadre_and_level(_df, figname_prefix, figname_suffix, draw):
+    if ('Facility_Level' in _df.columns) & ('Cadre' in _df.columns):
+        pass
+    else:
+        _df = get_level_and_cadre_from_concatenated_value(_df, 'OfficerType_FacilityLevel')
+
+    _df = _df[_df.draw == draw]
+    pivot_df = _df.pivot_table(index='Cadre', columns='Facility_Level', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    total_salary = round(_df['Cost'].sum(), 0)
+    total_salary = f"{total_salary:,.0f}"
+    ax  = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Cadre and Facility Level')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {figname_prefix} cost by cadre and facility level")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+    plt.text(x=0.3, y=-0.5, s=f"Total {figname_prefix} cost = USD {total_salary}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+    plt.savefig(figurespath / f'{figname_prefix}_by_cadre_and_level_{figname_suffix}{draw}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_cost_by_cadre_and_level(salary_for_all_staff,figname_prefix = "salary", figname_suffix= f"all_staff_draw", draw = 0)
+plot_cost_by_cadre_and_level(salary_for_staff_used_in_scenario.reset_index(),figname_prefix = "salary", figname_suffix= "staff_used_in_scenario_draw", draw = 0)
+plot_cost_by_cadre_and_level(recruitment_cost, figname_prefix = "recruitment", figname_suffix= "all_staff")
+plot_cost_by_cadre_and_level(preservice_training_cost, figname_prefix = "pre-service training", figname_suffix= "all_staff")
+plot_cost_by_cadre_and_level(inservice_training_cost, figname_prefix = "in-service training", figname_suffix= "all_staff")
+
+def plot_components_of_cost_category(_df, cost_category, figname_suffix):
+    pivot_df = _df[_df['Cost_Category'] == cost_category].pivot_table(index='Cost_Sub-category', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    ax = pivot_df.plot(kind='bar', stacked=False, title='Scenario Cost by Category')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {cost_category} cost")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+
+    # Add text labels on the bars
+    total_cost = pivot_df['Cost'].sum()
+    rects = ax.patches
+    for rect, cost in zip(rects, pivot_df['Cost']):
+        cost_millions = cost / 1e6
+        percentage = (cost / total_cost) * 100
+        label_text = f"{cost_millions:.1f}M ({percentage:.1f}%)"
+        # Place text at the top of the bar
+        x = rect.get_x() + rect.get_width() / 2
+        y = rect.get_height()
+        ax.text(x, y, label_text, ha='center', va='bottom', fontsize=8, rotation=0)
+
+    total_cost = f"{total_cost:,.0f}"
+    plt.text(x=0.3, y=-0.5, s=f"Total {cost_category} cost = USD {total_cost}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+
+    plt.savefig(figurespath / f'{cost_category}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Human Resources for Health', figname_suffix = "all_staff")
+
 
 # Compare financial costs with actual budget data
 ####################################################

From 598867a0173d58382c8db49d12d7f4baf1f1d4e1 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 10 Oct 2024 11:40:44 +0100
Subject: [PATCH 102/230] update consumables costing to allow for multiple runs
 and draws

---
 src/scripts/costing/costing.py | 123 ++++++++++++++++++++++++++++-----
 1 file changed, 104 insertions(+), 19 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 846f6883b8..ddab8464cd 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -48,7 +48,7 @@
     os.makedirs(figurespath)
 
 # Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2015, 1, 1), Date(2015, 12, 31)) # TODO allow for multi-year costing
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2031, 12, 31)) # TODO allow for multi-year costing
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
@@ -294,17 +294,23 @@ def melt_and_label(df, label):
 def get_quantity_of_consumables_dispensed(results_folder):
     def get_counts_of_items_requested(_df):
         _df = drop_outside_period(_df)
-        counts_of_available = defaultdict(int)
-        counts_of_not_available = defaultdict(int)
+        counts_of_available = defaultdict(lambda: defaultdict(int))
+        counts_of_not_available = defaultdict(lambda: defaultdict(int))
+
         for _, row in _df.iterrows():
+            date = row['date']
             for item, num in row['Item_Available'].items():
-                counts_of_available[item] += num
+                counts_of_available[date][item] += num
             for item, num in row['Item_NotAvailable'].items():
-                counts_of_not_available[item] += num
-        return pd.concat(
-            {'Available': pd.Series(counts_of_available), 'Not_Available': pd.Series(counts_of_not_available)},
-            axis=1
-        ).fillna(0).astype(int).stack()
+                counts_of_not_available[date][item] += num
+        available_df = pd.DataFrame(counts_of_available).fillna(0).astype(int).stack().rename('Available')
+        not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available')
+
+        # Combine the two dataframes into one series with MultiIndex (date, item, availability_status)
+        combined_df = pd.concat([available_df, not_available_df], axis=1).fillna(0).astype(int)
+
+        # Convert to a pd.Series, as expected by the custom_generate_series function
+        return combined_df.stack()
 
     cons_req = summarize(
         extract_results(
@@ -315,9 +321,94 @@ def get_counts_of_items_requested(_df):
             do_scaling=True)
     )
 
-    cons_dispensed = cons_req.xs("Available", level=1) # only keep actual dispensed amount, i.e. when available
+    cons_dispensed = cons_req.xs("Available", level=2) # only keep actual dispensed amount, i.e. when available
     return cons_dispensed
+# TODO Extract year of dispensing drugs
+
+consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder)
+consumables_dispensed = consumables_dispensed.reset_index().rename(columns = {'level_0': 'Item_Code', 'level_1': 'year'})
+consumables_dispensed[(     'year',      '')] = pd.to_datetime(consumables_dispensed[('year', '')]).dt.year # Extract only year from date
+consumables_dispensed[('Item_Code',      '')] = pd.to_numeric(consumables_dispensed[('Item_Code',      '')])
+quantity_columns = consumables_dispensed.columns.to_list()
+quantity_columns = [tup for tup in quantity_columns if ((tup != ('Item_Code', '')) & (tup != ('year', '')))] # exclude item_code and year columns
 
+# Load consumables cost data
+unit_price_consumable = workbook_cost["consumables"]
+unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
+unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
+unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
+
+# 2.1 Cost of consumables dispensed
+#---------------------------------------------------------------------------------------------------------------
+# Multiply number of items needed by cost of consumable
+cost_of_consumables_dispensed = consumables_dispensed.merge(unit_price_consumable, left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left')
+price_column = 'Final_price_per_chosen_unit (USD, 2023)'
+cost_of_consumables_dispensed[quantity_columns] = cost_of_consumables_dispensed[quantity_columns].multiply(
+    cost_of_consumables_dispensed[price_column], axis=0)
+total_cost_of_consumables_dispensed = cost_of_consumables_dispensed.groupby(('year', ''))[quantity_columns].sum()
+total_cost_of_consumables_dispensed = total_cost_of_consumables_dispensed.reset_index()
+
+# 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
+#---------------------------------------------------------------------------------------------------------------
+# Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry
+# While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon
+# an empirical estimate based on OpenLMIS data
+# Estimate the stock to dispensed ratio from OpenLMIS data
+lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+# Collapse individual facilities
+lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
+df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
+df = df.loc[df.index.get_level_values('month') != "Aggregate"]
+opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
+                      df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
+                      df.loc[df.index.get_level_values('month') == 'January', 'received']
+closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
+total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
+                                         opening_bal_january.reset_index(level='month', drop=True) -\
+                                         closing_bal_december.reset_index(level='month', drop=True)
+total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
+inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
+
+# Edit outlier ratios
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
+average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean()
+
+# Multiply number of items needed by cost of consumable
+inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
+excess_stock_ratio = inflow_to_outflow_ratio_by_consumable - 1
+excess_stock_ratio = excess_stock_ratio.reset_index().rename(columns = {0: 'excess_stock_proportion_of_dispensed'})
+# TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
+cost_of_excess_consumables_stocked = consumables_dispensed.merge(unit_price_consumable, left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left')
+cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.merge(excess_stock_ratio, left_on = 'Item_Code', right_on = 'item_code', validate = 'm:1', how = 'left')
+cost_of_excess_consumables_stocked.loc[cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1# TODO disaggregate the average by program
+cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[price_column], axis=0)
+cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked['excess_stock_proportion_of_dispensed'], axis=0)
+total_cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.groupby(('year', ''))[quantity_columns].sum()
+total_cost_of_excess_consumables_stocked = total_cost_of_excess_consumables_stocked.reset_index()
+
+# Add to financial costs dataframe
+# Function to melt and label the cost category
+def melt_and_label_consumables_cost(_df, label):
+    multi_index = pd.MultiIndex.from_tuples(_df.columns)
+    _df.columns = multi_index
+    melted_df = pd.melt(_df, id_vars='year').rename(columns = {'variable_0': 'draw', 'variable_1': 'stat'})
+    melted_df['Cost_Sub-category'] = label
+    return melted_df
+
+consumable_costs = [
+    (total_cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'),
+    (total_cost_of_excess_consumables_stocked, 'cost_of_consumables_stocked'),
+]
+# Iterate through additional costs, melt and concatenate
+for df, label in consumable_costs:
+    new_df = melt_and_label_consumables_cost(df, label)
+    scenario_cost = pd.concat([scenario_cost, new_df], ignore_index=True)
+scenario_cost.loc[scenario_cost.Cost_Category.isna(), 'Cost_Category'] = 'Medical consumables'
+#scenario_cost['value'] = scenario_cost['value'].apply(pd.to_numeric, errors='coerce')
+#scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
+
+'''
 consumables_dispensed_under_perfect_availability = get_quantity_of_consumables_dispensed(consumables_results_folder)[9]
 consumables_dispensed_under_perfect_availability = consumables_dispensed_under_perfect_availability['mean'].to_dict() # TODO incorporate uncertainty in estimates
 consumables_dispensed_under_perfect_availability = defaultdict(int, {int(key): value for key, value in
@@ -508,16 +599,8 @@ def plot_cost_by_category(_df, suffix , figname_prefix = 'Consumables'):
 
 plot_cost_by_category(full_cons_cost_df, suffix = 'perfect_availability' , figname_prefix = 'Consumables')
 plot_cost_by_category(full_cons_cost_df, suffix = 'default_availability' , figname_prefix = 'Consumables')
+'''
 
-# Add consumable costs to the financial cost dataframe
-consumable_cost_subcategories = ['total_cost_of_consumables_dispensed', 'total_cost_of_excess_consumables_stocked']
-consumable_costs = pd.DataFrame({
-    'Cost_Category': ['Consumables'] * len(consumable_cost_subcategories),
-    'Cost_Sub-category': consumable_cost_subcategories,
-    'Cost': [total_cost_of_consumables_dispensed_under_default_availability, total_cost_of_excess_consumables_stocked_under_default_availability]
-})
-# Append new_data to scenario_cost_financial
-scenario_cost = pd.concat([scenario_cost, consumable_costs], ignore_index=True)
 
 # %%
 # 3. Equipment cost
@@ -547,6 +630,7 @@ def plot_cost_by_category(_df, suffix , figname_prefix = 'Consumables'):
 #unit_cost_equipment_small  = unit_cost_equipment[['Item_code', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']]
 #equipment_cost_dict = unit_cost_equipment_small.groupby('Facility_Level').apply(lambda x: x.to_dict(orient='records')).to_dict()
 
+'''
 # Get list of equipment used by district and level
 equip = pd.DataFrame(
     log_equipment['tlo.methods.healthsystem.summary']['EquipmentEverUsed_ByFacilityID']
@@ -655,6 +739,7 @@ def plot_most_expensive_equipment(_df, top_x_values = 10, figname_prefix = "Equi
 # TODO Collapse facility IDs by level of care to get the total number of facilities at each level using an item
 # TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)
 # TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
+'''
 
 # 4. Facility running costs
 # Average running costs by facility level and district times the number of facilities  in the simulation

From 8b1080741dc46348022a2e016a4d4dc6207148b5 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 10 Oct 2024 11:41:23 +0100
Subject: [PATCH 103/230] Update pre-service training costs in the costing RF +
 add RM R7 data

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 720634fec5..081ca8f6b2 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6426eeffc40c0b028194a3446457cfe1bcf17e58baaa9f57421dff434478d3c1
-size 4122347
+oid sha256:bceffce75f02bcd6ad80c7c4508cec24a5e834863fa489c76751fc2ec76f8b02
+size 4266421

From 517c4c541f653c8ef49241ff8711b21aa20e94f2 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 11 Oct 2024 14:48:06 +0100
Subject: [PATCH 104/230] Update pre-service and in-service training cost
 calculation - see resource file (human resources tab - 'Description' column)
 for further detail

---
 resources/costing/ResourceFile_Costing.xlsx |  4 ++--
 src/scripts/costing/costing.py              | 17 +++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 081ca8f6b2..2ccb7846ad 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bceffce75f02bcd6ad80c7c4508cec24a5e834863fa489c76751fc2ec76f8b02
-size 4266421
+oid sha256:96b25d9edc8abbdce58608a86b42f4782d77ac548cbf4e2b4016becee3dd8f80
+size 4331748
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index ddab8464cd..29e53b1987 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -244,11 +244,14 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
                                                      varnames = ['annual_attrition_rate',
                                                                  'licensure_exam_passing_rate', 'graduation_rate',
                                                                  'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
-                                                                 'annual_preservice_training_cost_percapita_usd'])
-preservice_training_cost['Cost'] = preservice_training_cost['annual_attrition_rate'] * preservice_training_cost['Staff_Count'] * \
-                                                (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) * \
-                                                (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) * \
-                                                preservice_training_cost['annual_preservice_training_cost_percapita_usd']
+                                                                 'preservice_training_cost_per_staff_recruited_usd'])
+preservice_training_cost['Annual_cost_per_staff_recruited'] = preservice_training_cost['preservice_training_cost_per_staff_recruited_usd'] *\
+                                                (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) *\
+                                                (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) *\
+                                                preservice_training_cost['annual_attrition_rate']
+# Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate
+# the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure
+preservice_training_cost['Cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['Staff_Count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
 preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
 preservice_training_cost = preservice_training_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
 preservice_training_cost  = summarize(preservice_training_cost, only_mean = True, collapse_columns=True)
@@ -264,8 +267,6 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
 inservice_training_cost  = summarize(inservice_training_cost, only_mean = True, collapse_columns=True)
 inservice_training_cost_for_all_staff = inservice_training_cost.groupby(['year']).sum()
 
-# TODO check why annual_inservice_training_cost for DCSA is NaN in the merged_df
-
 # Create a dataframe to store financial costs
 # Function to melt and label the cost category
 def melt_and_label(df, label):
@@ -288,7 +289,7 @@ def melt_and_label(df, label):
     scenario_cost = pd.concat([scenario_cost, melted])
 scenario_cost['Cost_Category'] = 'Human Resources for Health'
 # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
-scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
+#scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
 # %%
 # 2. Consumables cost
 def get_quantity_of_consumables_dispensed(results_folder):

From 91842c99ccacee0511850bc4e82e4ed3630f6b76 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 14 Oct 2024 18:32:06 +0100
Subject: [PATCH 105/230] Update equipment cost calculation to account for each
 draw and run

---
 src/scripts/costing/costing.py | 644 ++++++++++++++++++---------------
 1 file changed, 347 insertions(+), 297 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 29e53b1987..151c3934ce 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -78,6 +78,9 @@ def drop_outside_period(_df):
 params = extract_params(results_folder)
 final_year_of_simulation = max(log['tlo.simulation']['info']['date']).year
 first_year_of_simulation = min(log['tlo.simulation']['info']['date']).year
+draws = params.index.unique().tolist() # list of draws
+runs = range(0, info['runs_per_draw'])
+years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
 
 # Load cost input files
 #------------------------
@@ -105,9 +108,6 @@ def drop_outside_period(_df):
 year_HR_scaling_by_level_and_officer_type = params[params.module_param == 'HealthSystem:year_HR_scaling_by_level_and_officer_type'].reset_index()
 yearly_HR_scaling_mode  = params[params.module_param == 'HealthSystem:yearly_HR_scaling_mode'].reset_index()
 
-draws = params.index.unique().tolist() # list of draws
-years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
-
 # TODO add the following parameters to estimate HR availability per year - HealthSystem:yearly_HR_scaling_mode, HealthSystem:HR_scaling_by_level_and_officer_type_mode, HealthSystem:year_HR_scaling_by_level_and_officer_type
 hr_df_columns = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv").columns.drop(['Facility_ID', 'Officer_Category'])
 facilities = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Facility_ID'].unique().tolist()
@@ -409,6 +409,350 @@ def melt_and_label_consumables_cost(_df, label):
 #scenario_cost['value'] = scenario_cost['value'].apply(pd.to_numeric, errors='coerce')
 #scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
 
+
+# %%
+# 3. Equipment cost
+# Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it been used in the simulation
+# Load unit costs of equipment
+unit_cost_equipment = workbook_cost["equipment"]
+unit_cost_equipment =   unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
+# Calculate necessary costs based on HSSP-III assumptions
+unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.1 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 10% of the items over 8 years
+unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
+unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
+unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
+# TODO the above line assumes that the life span of each item of equipment is 80 years. This needs to be updated using realistic life span data
+
+unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
+                                           'service_fee_annual', 'spare_parts_annual',  'upfront_repair_cost_annual', 'replacement_cost_annual',
+                                           'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
+unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
+unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
+unit_cost_equipment = unit_cost_equipment[unit_cost_equipment.Item_code.notna()]
+
+unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
+                          i=['Item_code', 'Equipment_tlo', 'service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual'],
+                          j='Facility_Level', suffix='(\d+|\w+)').reset_index()
+facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
+unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
+unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
+#unit_cost_equipment_small  = unit_cost_equipment[['Item_code', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']]
+#equipment_cost_dict = unit_cost_equipment_small.groupby('Facility_Level').apply(lambda x: x.to_dict(orient='records')).to_dict()
+
+# Get list of equipment used by district and level
+def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
+    """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
+    _df = _df.pivot_table(index=['District', 'Facility_Level'],
+                    values='EquipmentEverUsed',
+                    aggfunc='first')
+    _df.index.name = 'year'
+    return _df['EquipmentEverUsed']
+
+list_of_equipment_used_by_draw_and_run = extract_results(
+    Path(results_folder),
+    module='tlo.methods.healthsystem.summary',
+    key='EquipmentEverUsed_ByFacilityID',
+    custom_generate_series=get_equipment_used_by_district_and_facility,
+    do_scaling=False,
+)
+for col in list_of_equipment_used_by_draw_and_run.columns:
+    list_of_equipment_used_by_draw_and_run[col] = list_of_equipment_used_by_draw_and_run[col].apply(ast.literal_eval)
+
+# Initialize an empty DataFrame
+equipment_cost_across_sim = pd.DataFrame()
+
+# Extract equipment cost for each draw and run
+for d in draws:
+    for r in runs:
+        print(f"Now processing draw {d} and run {r}")
+        # Extract a list of equipment which was used at each facility level within each district
+        equipment_used = {district: {level: [] for level in fac_levels} for district in districts} # create a dictionary with a key for each district and facility level
+        list_of_equipment_used_by_current_draw_and_run = list_of_equipment_used_by_draw_and_run[(d, r)].reset_index()
+        for dist in districts:
+            for level in fac_levels:
+                equipment_used_subset = list_of_equipment_used_by_current_draw_and_run[(list_of_equipment_used_by_current_draw_and_run['District'] == dist) & (list_of_equipment_used_by_current_draw_and_run['Facility_Level'] == level)]
+                equipment_used_subset.columns = ['District', 'Facility_Level', 'EquipmentEverUsed']
+                equipment_used[dist][level] = set().union(*equipment_used_subset['EquipmentEverUsed'])
+        equipment_used = pd.concat({
+                k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
+                axis=0)
+        full_list_of_equipment_used = set().union(*equip['EquipmentEverUsed'])
+
+        equipment_df = pd.DataFrame()
+        equipment_df.index = equipment_used.index
+        for item in full_list_of_equipment_used:
+            equipment_df[str(item)] = 0
+            for dist_fac_index in equipment_df.index:
+                equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1)
+        #equipment_df.to_csv('./outputs/equipment_use.csv')
+
+        equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
+        equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
+        equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])
+        # Merge the count of facilities by district and level
+        equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left')
+        equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0
+
+        # Merge the two datasets to calculate cost
+        equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']],
+                                  on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
+        categories_of_equipment_cost = ['replacement_cost', 'upfront_repair_cost', 'spare_parts', 'service_fee']
+        for cost_category in categories_of_equipment_cost:
+            # Rename unit cost columns
+            unit_cost_column = cost_category + '_annual_unit'
+            equipment_cost = equipment_cost.rename(columns = {cost_category + '_annual':unit_cost_column })
+            equipment_cost[cost_category + '_annual_total'] = equipment_cost[cost_category + '_annual_unit'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count']
+        #equipment_cost['total_equipment_cost_annual'] = equipment_cost[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum(axis = 1)
+        equipment_cost['year'] = final_year_of_simulation - 1
+        if equipment_cost_across_sim.empty:
+            equipment_cost_across_sim = equipment_cost.groupby('year')[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
+            equipment_cost_across_sim['draw'] = d
+            equipment_cost_across_sim['run'] = r
+        else:
+            equipment_cost_for_current_sim = equipment_cost.groupby('year')[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
+            equipment_cost_for_current_sim['draw'] = d
+            equipment_cost_for_current_sim['run'] = r
+            # Concatenate the results
+            equipment_cost_across_sim = pd.concat([equipment_cost_across_sim, equipment_cost_for_current_sim], axis=0)
+
+equipment_costs = pd.melt(equipment_cost_across_sim,
+                  id_vars=['draw', 'run'],  # Columns to keep
+                  value_vars=[col for col in equipment_cost_across_sim.columns if col.endswith('_annual_total')],  # Columns to unpivot
+                  var_name='Cost_Sub-category',  # New column name for the 'sub-category' of cost
+                  value_name='value')  # New column name for the values
+
+equipment_costs_summary = pd.concat(
+    {
+        'mean': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].mean(),
+        'lower': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].quantile(0.025),
+        'upper': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].quantile(0.975),
+    },
+    axis=1
+)
+equipment_costs_summary =  pd.melt(equipment_costs_summary.reset_index(),
+                  id_vars=['draw', 'Cost_Sub-category'],  # Columns to keep
+                  value_vars=['mean', 'lower', 'upper'],  # Columns to unpivot
+                  var_name='stat',  # New column name for the 'sub-category' of cost
+                  value_name='value')
+equipment_costs_summary['Cost_Category'] = 'Equipment purchase and maintenance'
+# Assume that the annual costs are constant each year of the simulation
+equipment_costs_summary = pd.concat([equipment_costs_summary.assign(year=year) for year in years])
+equipment_costs_summary = equipment_costs_summary.reset_index(drop=True)
+scenario_cost = pd.concat([scenario_cost, equipment_costs_summary], ignore_index=True)
+
+# 4. Facility running costs
+# Average running costs by facility level and district times the number of facilities  in the simulation
+
+# Extract all costs to a .csv
+scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv')
+
+# Plot costs
+####################################################
+# TODO all these HR plots need to be looked at
+# 1. HR
+# Stacked bar chart of salaries by cadre
+def get_level_and_cadre_from_concatenated_value(_df, varname):
+    _df['Cadre'] = _df[varname].str.extract(r'=(.*?)\|')
+    _df['Facility_Level'] = _df[varname].str.extract(r'^[^=]*=[^|]*\|[^=]*=([^|]*)')
+    return _df
+def plot_cost_by_cadre_and_level(_df, figname_prefix, figname_suffix, draw):
+    if ('Facility_Level' in _df.columns) & ('Cadre' in _df.columns):
+        pass
+    else:
+        _df = get_level_and_cadre_from_concatenated_value(_df, 'OfficerType_FacilityLevel')
+
+    _df = _df[_df.draw == draw]
+    pivot_df = _df.pivot_table(index='Cadre', columns='Facility_Level', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    total_salary = round(_df['Cost'].sum(), 0)
+    total_salary = f"{total_salary:,.0f}"
+    ax  = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Cadre and Facility Level')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {figname_prefix} cost by cadre and facility level")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+    plt.text(x=0.3, y=-0.5, s=f"Total {figname_prefix} cost = USD {total_salary}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+    plt.savefig(figurespath / f'{figname_prefix}_by_cadre_and_level_{figname_suffix}{draw}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_cost_by_cadre_and_level(salary_for_all_staff,figname_prefix = "salary", figname_suffix= f"all_staff_draw", draw = 0)
+plot_cost_by_cadre_and_level(salary_for_staff_used_in_scenario.reset_index(),figname_prefix = "salary", figname_suffix= "staff_used_in_scenario_draw", draw = 0)
+plot_cost_by_cadre_and_level(recruitment_cost, figname_prefix = "recruitment", figname_suffix= "all_staff")
+plot_cost_by_cadre_and_level(preservice_training_cost, figname_prefix = "pre-service training", figname_suffix= "all_staff")
+plot_cost_by_cadre_and_level(inservice_training_cost, figname_prefix = "in-service training", figname_suffix= "all_staff")
+
+def plot_components_of_cost_category(_df, cost_category, figname_suffix):
+    pivot_df = _df[_df['Cost_Category'] == cost_category].pivot_table(index='Cost_Sub-category', values='Cost',
+                               aggfunc='sum', fill_value=0)
+    ax = pivot_df.plot(kind='bar', stacked=False, title='Scenario Cost by Category')
+    plt.ylabel(f'US Dollars')
+    plt.title(f"Annual {cost_category} cost")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+
+    # Add text labels on the bars
+    total_cost = pivot_df['Cost'].sum()
+    rects = ax.patches
+    for rect, cost in zip(rects, pivot_df['Cost']):
+        cost_millions = cost / 1e6
+        percentage = (cost / total_cost) * 100
+        label_text = f"{cost_millions:.1f}M ({percentage:.1f}%)"
+        # Place text at the top of the bar
+        x = rect.get_x() + rect.get_width() / 2
+        y = rect.get_height()
+        ax.text(x, y, label_text, ha='center', va='bottom', fontsize=8, rotation=0)
+
+    total_cost = f"{total_cost:,.0f}"
+    plt.text(x=0.3, y=-0.5, s=f"Total {cost_category} cost = USD {total_cost}", transform=ax.transAxes,
+             horizontalalignment='center', fontsize=12, weight='bold', color='black')
+
+    plt.savefig(figurespath / f'{cost_category}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Human Resources for Health', figname_suffix = "all_staff")
+
+
+# Compare financial costs with actual budget data
+####################################################
+# Import budget data
+budget_data = workbook_cost["budget_validation"]
+list_of_costs_for_comparison = ['total_salary_for_all_staff', 'total_cost_of_consumables_dispensed', 'total_cost_of_consumables_stocked']
+real_budget = [budget_data[budget_data['Category'] == list_of_costs_for_comparison[0]]['Budget_in_2023USD'].values[0],
+               budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0],
+               budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0]]
+model_cost = [scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[0]]['Value_2023USD'].values[0],
+              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[1]]['Value_2023USD'].values[0],
+              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[2]]['Value_2023USD'].values[0]]
+
+plt.clf()
+plt.scatter(real_budget, model_cost)
+# Plot a line representing a 45-degree angle
+min_val = min(min(real_budget), min(model_cost))
+max_val = max(max(real_budget), max(model_cost))
+plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')
+
+# Format x and y axis labels to display in millions
+formatter = FuncFormatter(lambda x, _: '{:,.0f}M'.format(x / 1e6))
+plt.gca().xaxis.set_major_formatter(formatter)
+plt.gca().yaxis.set_major_formatter(formatter)
+# Add labels for each point
+hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
+consumables_label1= 'Consumables dispensed ' + f'{round(model_cost[1] / real_budget[1], 2)}'
+consumables_label2 = 'Consumables stocked ' + f'{round(model_cost[2] / real_budget[2], 2)}'
+plotlabels = [hr_label, consumables_label1, consumables_label2]
+for i, txt in enumerate(plotlabels):
+    plt.text(real_budget[i], model_cost[i], txt, ha='right')
+
+plt.xlabel('Real Budget')
+plt.ylabel('Model Cost')
+plt.title('Real Budget vs Model Cost')
+plt.savefig(costing_outputs_folder /  'Cost_validation.png')
+
+# Explore the ratio of consumable inflows to outflows
+######################################################
+# TODO: Only consider the months for which original OpenLMIS data was available for closing_stock and dispensed
+def plot_inflow_to_outflow_ratio(_dict, groupby_var):
+    # Convert Dict to dataframe
+    flattened_data = [(level1, level2, level3, level4, value) for (level1, level2, level3, level4), value in
+                      inflow_to_outflow_ratio.items()] # Flatten dictionary into a list of tuples
+    _df = pd.DataFrame(flattened_data, columns=['category', 'item_code', 'district', 'fac_type_tlo', 'inflow_to_outflow_ratio']) # Convert flattened data to DataFrame
+
+    # Plot the bar plot
+    plt.figure(figsize=(10, 6))
+    sns.barplot(data=_df , x=groupby_var, y= 'inflow_to_outflow_ratio', errorbar=None)
+
+    # Add points representing the distribution of individual values
+    sns.stripplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', color='black', size=5, alpha=0.2)
+
+    # Set labels and title
+    plt.xlabel(groupby_var)
+    plt.ylabel('Inflow to Outflow Ratio')
+    plt.title('Average Inflow to Outflow Ratio by ' + f'{groupby_var}')
+    plt.xticks(rotation=45)
+
+    # Show plot
+    plt.tight_layout()
+    plt.savefig(costing_outputs_folder / 'inflow_to_outflow_ratio_by' f'{groupby_var}' )
+
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'fac_type_tlo')
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'district')
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code')
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category')
+
+# Plot fraction staff time used
+fraction_stafftime_average = salary_staffneeded_df.groupby('Officer_Category')['Value'].sum()
+fraction_stafftime_average. plot(kind = "bar")
+plt.xlabel('Cadre')
+plt.ylabel('Fraction time needed')
+plt.savefig(costing_outputs_folder /  'hr_time_need_economic_cost.png')
+
+# Plot salary costs by cadre and facility level
+# Group by cadre and level
+salary_for_all_staff[['Officer_Type', 'Facility_Level']] = salary_for_all_staff['OfficerType_FacilityLevel'].str.split('|', expand=True)
+salary_for_all_staff['Officer_Type'] = salary_for_all_staff['Officer_Type'].str.replace('Officer_Type=', '')
+salary_for_all_staff['Facility_Level'] = salary_for_all_staff['Facility_Level'].str.replace('Facility_Level=', '')
+total_salary_by_cadre = salary_for_all_staff.groupby('Officer_Type')['Total_salary_by_cadre_and_level'].sum()
+total_salary_by_level = salary_for_all_staff.groupby('Facility_Level')['Total_salary_by_cadre_and_level'].sum()
+
+# Plot by cadre
+plt.clf()
+total_salary_by_cadre.plot(kind='bar')
+plt.xlabel('Officer_category')
+plt.ylabel('Total Salary')
+plt.title('Total Salary by Cadre')
+plt.savefig(costing_outputs_folder /  'total_salary_by_cadre.png')
+
+# Plot by level
+plt.clf()
+total_salary_by_level.plot(kind='bar')
+plt.xlabel('Facility_Level')
+plt.ylabel('Total Salary')
+plt.title('Total Salary by Facility_Level')
+plt.savefig(costing_outputs_folder /  'total_salary_by_level.png')
+
+'''
+# Scratch pad
+
+log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_Facility_ID'] # for district disaggregation
+
+# Aggregate Daily capabilities to total used by cadre and facility level
+
+# log['tlo.methods.healthsystem.summary']['Capacity']['Frac_Time_Used_By_OfficerType']
+# 1.2 HR cost by Treatment_ID
+# For HR cost by Treatment_ID, multiply total cost by Officer type by fraction of time used for treatment_ID
+log['tlo.methods.healthsystem.summary']['HSI_Event']['TREATMENT_ID'] # what does this represent? why are there 3 rows (2 scenarios)
+# But what we need is the HR use by Treatment_ID  - Leave this for later?
+
+# log['tlo.scenario']
+log['tlo.methods.healthsystem.summary']['HSI_Event']['Number_By_Appt_Type_Code']
+
+
+df = pd.DataFrame(log['tlo.methods.healthsystem.summary'])
+df.to_csv(outputfilepath / 'temp.csv')
+
+def read_parameters(self, data_folder):
+    """
+    1. Reads the costing resource file
+    2. Declares the costing parameters
+    """
+    # Read the resourcefile
+    # Short cut to parameters dict
+    p = self.parameters
+
+    workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+                                    sheet_name = None)
+
+    p["human_resources"] = workbook["human_resources"]
+
+workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
+                                    sheet_name = None)
+human_resources = workbook["human_resources"]
+
+'''
+
 '''
 consumables_dispensed_under_perfect_availability = get_quantity_of_consumables_dispensed(consumables_results_folder)[9]
 consumables_dispensed_under_perfect_availability = consumables_dispensed_under_perfect_availability['mean'].to_dict() # TODO incorporate uncertainty in estimates
@@ -602,88 +946,7 @@ def plot_cost_by_category(_df, suffix , figname_prefix = 'Consumables'):
 plot_cost_by_category(full_cons_cost_df, suffix = 'default_availability' , figname_prefix = 'Consumables')
 '''
 
-
-# %%
-# 3. Equipment cost
-# Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it been used in the simulation
-unit_cost_equipment = workbook_cost["equipment"]
-unit_cost_equipment =   unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
-# Calculate necessary costs based on HSSP-III assumptions
-unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
-unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
-unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
-unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.1 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 10% of the items over 8 years
-# TODO the above line assumes that the life span of each item of equipment is 80 years. This needs to be updated using realistic life span data
-
-unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
-                                           'service_fee_annual', 'spare_parts_annual',  'upfront_repair_cost_annual', 'replacement_cost_annual',
-                                           'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
-unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
-unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
-unit_cost_equipment = unit_cost_equipment[unit_cost_equipment.Item_code.notna()]
-
-unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
-                          i=['Item_code', 'Equipment_tlo', 'service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual'],
-                          j='Facility_Level', suffix='(\d+|\w+)').reset_index()
-facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
-unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
-unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
-#unit_cost_equipment_small  = unit_cost_equipment[['Item_code', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']]
-#equipment_cost_dict = unit_cost_equipment_small.groupby('Facility_Level').apply(lambda x: x.to_dict(orient='records')).to_dict()
-
 '''
-# Get list of equipment used by district and level
-equip = pd.DataFrame(
-    log_equipment['tlo.methods.healthsystem.summary']['EquipmentEverUsed_ByFacilityID']
-)
-
-equip['EquipmentEverUsed'] = equip['EquipmentEverUsed'].apply(ast.literal_eval)
-equip.loc[equip.Facility_Level.isin(['3', '4', '5']),'District'] = 'Central' # Assign a district name for Central health facilities
-districts.add('Central')
-
-# Extract a list of equipment which was used at each facility level within each district
-equipment_used = {district: {level: [] for level in fac_levels} for district in districts} # create a dictionary with a key for each district and facility level
-
-for dist in districts:
-    for level in fac_levels:
-        equip_subset = equip[(equip['District'] == dist) & (equip['Facility_Level'] == level)]
-        equipment_used[dist][level] = set().union(*equip_subset['EquipmentEverUsed'])
-equipment_used = pd.concat({
-        k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
-        axis=0)
-list_of_equipment_used = set().union(*equip['EquipmentEverUsed'])
-
-equipment_df = pd.DataFrame()
-equipment_df.index = equipment_used.index
-for item in list_of_equipment_used:
-    equipment_df[str(item)] = 0
-    for dist_fac_index in equipment_df.index:
-        equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1)
-equipment_df.to_csv('./outputs/equipment_use.csv')
-
-equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
-equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
-equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])
-# Merge the count of facilities by district and level
-equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left')
-equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0
-
-# Merge the two datasets to calculate cost
-equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']],
-                          on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
-categories_of_equipment_cost = ['replacement_cost', 'upfront_repair_cost', 'spare_parts', 'service_fee']
-for cost_category in categories_of_equipment_cost:
-    equipment_cost['total_' + cost_category] = equipment_cost[cost_category + '_annual'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count']
-equipment_cost['annual_cost'] = equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum(axis = 1)
-
-equipment_costs = pd.DataFrame({
-    'Cost_Category': ['Equipment'] * len(categories_of_equipment_cost),
-    'Cost_Sub-category': categories_of_equipment_cost,
-    'Cost': equipment_cost[['total_' + item for item in categories_of_equipment_cost]].sum().values.tolist()
-})
-# Append new_data to scenario_cost_financial
-scenario_cost = pd.concat([scenario_cost, equipment_costs], ignore_index=True)
-
 # Plot equipment cost
 # Plot different categories of cost by level of care
 def plot_components_of_cost_category(_df, cost_category, figname_suffix):
@@ -741,216 +1004,3 @@ def plot_most_expensive_equipment(_df, top_x_values = 10, figname_prefix = "Equi
 # TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)
 # TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
 '''
-
-# 4. Facility running costs
-# Average running costs by facility level and district times the number of facilities  in the simulation
-
-# Extract all costs to a .csv
-scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv')
-
-# Plot costs
-####################################################
-# TODO all these HR plots need to be looked at
-# 1. HR
-# Stacked bar chart of salaries by cadre
-def get_level_and_cadre_from_concatenated_value(_df, varname):
-    _df['Cadre'] = _df[varname].str.extract(r'=(.*?)\|')
-    _df['Facility_Level'] = _df[varname].str.extract(r'^[^=]*=[^|]*\|[^=]*=([^|]*)')
-    return _df
-def plot_cost_by_cadre_and_level(_df, figname_prefix, figname_suffix, draw):
-    if ('Facility_Level' in _df.columns) & ('Cadre' in _df.columns):
-        pass
-    else:
-        _df = get_level_and_cadre_from_concatenated_value(_df, 'OfficerType_FacilityLevel')
-
-    _df = _df[_df.draw == draw]
-    pivot_df = _df.pivot_table(index='Cadre', columns='Facility_Level', values='Cost',
-                               aggfunc='sum', fill_value=0)
-    total_salary = round(_df['Cost'].sum(), 0)
-    total_salary = f"{total_salary:,.0f}"
-    ax  = pivot_df.plot(kind='bar', stacked=True, title='Stacked Bar Graph by Cadre and Facility Level')
-    plt.ylabel(f'US Dollars')
-    plt.title(f"Annual {figname_prefix} cost by cadre and facility level")
-    plt.xticks(rotation=45)
-    plt.yticks(rotation=0)
-    plt.text(x=0.3, y=-0.5, s=f"Total {figname_prefix} cost = USD {total_salary}", transform=ax.transAxes,
-             horizontalalignment='center', fontsize=12, weight='bold', color='black')
-    plt.savefig(figurespath / f'{figname_prefix}_by_cadre_and_level_{figname_suffix}{draw}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-plot_cost_by_cadre_and_level(salary_for_all_staff,figname_prefix = "salary", figname_suffix= f"all_staff_draw", draw = 0)
-plot_cost_by_cadre_and_level(salary_for_staff_used_in_scenario.reset_index(),figname_prefix = "salary", figname_suffix= "staff_used_in_scenario_draw", draw = 0)
-plot_cost_by_cadre_and_level(recruitment_cost, figname_prefix = "recruitment", figname_suffix= "all_staff")
-plot_cost_by_cadre_and_level(preservice_training_cost, figname_prefix = "pre-service training", figname_suffix= "all_staff")
-plot_cost_by_cadre_and_level(inservice_training_cost, figname_prefix = "in-service training", figname_suffix= "all_staff")
-
-def plot_components_of_cost_category(_df, cost_category, figname_suffix):
-    pivot_df = _df[_df['Cost_Category'] == cost_category].pivot_table(index='Cost_Sub-category', values='Cost',
-                               aggfunc='sum', fill_value=0)
-    ax = pivot_df.plot(kind='bar', stacked=False, title='Scenario Cost by Category')
-    plt.ylabel(f'US Dollars')
-    plt.title(f"Annual {cost_category} cost")
-    plt.xticks(rotation=45)
-    plt.yticks(rotation=0)
-
-    # Add text labels on the bars
-    total_cost = pivot_df['Cost'].sum()
-    rects = ax.patches
-    for rect, cost in zip(rects, pivot_df['Cost']):
-        cost_millions = cost / 1e6
-        percentage = (cost / total_cost) * 100
-        label_text = f"{cost_millions:.1f}M ({percentage:.1f}%)"
-        # Place text at the top of the bar
-        x = rect.get_x() + rect.get_width() / 2
-        y = rect.get_height()
-        ax.text(x, y, label_text, ha='center', va='bottom', fontsize=8, rotation=0)
-
-    total_cost = f"{total_cost:,.0f}"
-    plt.text(x=0.3, y=-0.5, s=f"Total {cost_category} cost = USD {total_cost}", transform=ax.transAxes,
-             horizontalalignment='center', fontsize=12, weight='bold', color='black')
-
-    plt.savefig(figurespath / f'{cost_category}_by_cadre_and_level_{figname_suffix}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-plot_components_of_cost_category(_df = scenario_cost, cost_category = 'Human Resources for Health', figname_suffix = "all_staff")
-
-
-# Compare financial costs with actual budget data
-####################################################
-# Import budget data
-budget_data = workbook_cost["budget_validation"]
-list_of_costs_for_comparison = ['total_salary_for_all_staff', 'total_cost_of_consumables_dispensed', 'total_cost_of_consumables_stocked']
-real_budget = [budget_data[budget_data['Category'] == list_of_costs_for_comparison[0]]['Budget_in_2023USD'].values[0],
-               budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0],
-               budget_data[budget_data['Category'] == list_of_costs_for_comparison[1]]['Budget_in_2023USD'].values[0]]
-model_cost = [scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[0]]['Value_2023USD'].values[0],
-              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[1]]['Value_2023USD'].values[0],
-              scenario_cost_financial[scenario_cost_financial['Cost_Sub-category'] == list_of_costs_for_comparison[2]]['Value_2023USD'].values[0]]
-
-plt.clf()
-plt.scatter(real_budget, model_cost)
-# Plot a line representing a 45-degree angle
-min_val = min(min(real_budget), min(model_cost))
-max_val = max(max(real_budget), max(model_cost))
-plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')
-
-# Format x and y axis labels to display in millions
-formatter = FuncFormatter(lambda x, _: '{:,.0f}M'.format(x / 1e6))
-plt.gca().xaxis.set_major_formatter(formatter)
-plt.gca().yaxis.set_major_formatter(formatter)
-# Add labels for each point
-hr_label = 'HR_salary ' + f'{round(model_cost[0] / real_budget[0], 2)}'
-consumables_label1= 'Consumables dispensed ' + f'{round(model_cost[1] / real_budget[1], 2)}'
-consumables_label2 = 'Consumables stocked ' + f'{round(model_cost[2] / real_budget[2], 2)}'
-plotlabels = [hr_label, consumables_label1, consumables_label2]
-for i, txt in enumerate(plotlabels):
-    plt.text(real_budget[i], model_cost[i], txt, ha='right')
-
-plt.xlabel('Real Budget')
-plt.ylabel('Model Cost')
-plt.title('Real Budget vs Model Cost')
-plt.savefig(costing_outputs_folder /  'Cost_validation.png')
-
-# Explore the ratio of consumable inflows to outflows
-######################################################
-# TODO: Only consider the months for which original OpenLMIS data was available for closing_stock and dispensed
-def plot_inflow_to_outflow_ratio(_dict, groupby_var):
-    # Convert Dict to dataframe
-    flattened_data = [(level1, level2, level3, level4, value) for (level1, level2, level3, level4), value in
-                      inflow_to_outflow_ratio.items()] # Flatten dictionary into a list of tuples
-    _df = pd.DataFrame(flattened_data, columns=['category', 'item_code', 'district', 'fac_type_tlo', 'inflow_to_outflow_ratio']) # Convert flattened data to DataFrame
-
-    # Plot the bar plot
-    plt.figure(figsize=(10, 6))
-    sns.barplot(data=_df , x=groupby_var, y= 'inflow_to_outflow_ratio', errorbar=None)
-
-    # Add points representing the distribution of individual values
-    sns.stripplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', color='black', size=5, alpha=0.2)
-
-    # Set labels and title
-    plt.xlabel(groupby_var)
-    plt.ylabel('Inflow to Outflow Ratio')
-    plt.title('Average Inflow to Outflow Ratio by ' + f'{groupby_var}')
-    plt.xticks(rotation=45)
-
-    # Show plot
-    plt.tight_layout()
-    plt.savefig(costing_outputs_folder / 'inflow_to_outflow_ratio_by' f'{groupby_var}' )
-
-plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'fac_type_tlo')
-plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'district')
-plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code')
-plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category')
-
-# Plot fraction staff time used
-fraction_stafftime_average = salary_staffneeded_df.groupby('Officer_Category')['Value'].sum()
-fraction_stafftime_average. plot(kind = "bar")
-plt.xlabel('Cadre')
-plt.ylabel('Fraction time needed')
-plt.savefig(costing_outputs_folder /  'hr_time_need_economic_cost.png')
-
-# Plot salary costs by cadre and facility level
-# Group by cadre and level
-salary_for_all_staff[['Officer_Type', 'Facility_Level']] = salary_for_all_staff['OfficerType_FacilityLevel'].str.split('|', expand=True)
-salary_for_all_staff['Officer_Type'] = salary_for_all_staff['Officer_Type'].str.replace('Officer_Type=', '')
-salary_for_all_staff['Facility_Level'] = salary_for_all_staff['Facility_Level'].str.replace('Facility_Level=', '')
-total_salary_by_cadre = salary_for_all_staff.groupby('Officer_Type')['Total_salary_by_cadre_and_level'].sum()
-total_salary_by_level = salary_for_all_staff.groupby('Facility_Level')['Total_salary_by_cadre_and_level'].sum()
-
-# Plot by cadre
-plt.clf()
-total_salary_by_cadre.plot(kind='bar')
-plt.xlabel('Officer_category')
-plt.ylabel('Total Salary')
-plt.title('Total Salary by Cadre')
-plt.savefig(costing_outputs_folder /  'total_salary_by_cadre.png')
-
-# Plot by level
-plt.clf()
-total_salary_by_level.plot(kind='bar')
-plt.xlabel('Facility_Level')
-plt.ylabel('Total Salary')
-plt.title('Total Salary by Facility_Level')
-plt.savefig(costing_outputs_folder /  'total_salary_by_level.png')
-
-'''
-# Scratch pad
-
-log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_Facility_ID'] # for district disaggregation
-
-# Aggregate Daily capabilities to total used by cadre and facility level
-
-# log['tlo.methods.healthsystem.summary']['Capacity']['Frac_Time_Used_By_OfficerType']
-# 1.2 HR cost by Treatment_ID
-# For HR cost by Treatment_ID, multiply total cost by Officer type by fraction of time used for treatment_ID
-log['tlo.methods.healthsystem.summary']['HSI_Event']['TREATMENT_ID'] # what does this represent? why are there 3 rows (2 scenarios)
-# But what we need is the HR use by Treatment_ID  - Leave this for later?
-
-# log['tlo.scenario']
-log['tlo.methods.healthsystem.summary']['HSI_Event']['Number_By_Appt_Type_Code']
-
-
-df = pd.DataFrame(log['tlo.methods.healthsystem.summary'])
-df.to_csv(outputfilepath / 'temp.csv')
-
-def read_parameters(self, data_folder):
-    """
-    1. Reads the costing resource file
-    2. Declares the costing parameters
-    """
-    # Read the resourcefile
-    # Short cut to parameters dict
-    p = self.parameters
-
-    workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
-                                    sheet_name = None)
-
-    p["human_resources"] = workbook["human_resources"]
-
-workbook = pd.read_excel((resourcefilepath / "ResourceFile_Costing.xlsx"),
-                                    sheet_name = None)
-human_resources = workbook["human_resources"]
-
-'''

From c7a7c245eec4d04d80a69a797438077acb7850fc Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 14 Oct 2024 19:15:02 +0100
Subject: [PATCH 106/230] Add stacked bar plot - plot costs by category and
 sub-category + compare with resource mapping expenditure data

---
 src/scripts/costing/costing.py | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 151c3934ce..0cdcd8e98b 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -548,6 +548,41 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
 
 # Plot costs
 ####################################################
+# Stacked bar plot
+def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
+    # Subset and Pivot the data to have 'Cost Sub-category' as columns
+    _df = _df[_df.stat == 'mean']
+    # Convert 'value' to millions
+    _df['value'] = _df['value'] / 1e6
+    if year == 'all':
+        subset_df = _df
+    else:
+        subset_df = _df[_df['year'] == year]
+    if cost_category == 'all':
+        subset_df = subset_df
+        pivot_df = subset_df.pivot_table(index='draw', columns='Cost_Category', values='value', aggfunc='sum')
+    else:
+        subset_df = subset_df[subset_df['Cost_Category'] == cost_category]
+        pivot_df = subset_df.pivot_table(index='draw', columns='Cost_Sub-category', values='value', aggfunc='sum')
+
+    # Plot a stacked bar chart
+    pivot_df.plot(kind='bar', stacked=True)
+    # Add a horizontal red line to represent 2018 Expenditure as per resource mapping
+    plt.axhline(y=actual_expenditure/1e6, color='red', linestyle='--', label='Actual expenditure recorded in 2018')
+
+    # Save plot
+    plt.xlabel('Scenario')
+    plt.ylabel('Cost (2023 USD), millions')
+    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
+    plt.title(f'Costs by Scenario \n (Cost Category = {cost_category} ; Year = {year})')
+    plt.savefig(figurespath / f'stacked_bar_chart_{cost_category}_year_{year}.png', dpi=100,
+                bbox_inches='tight')
+
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Medical consumables', year = 2018, actual_expenditure = 206_747_565)
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Human Resources for Health', year = 2018, actual_expenditure = 128_593_787)
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance', year = 2018, actual_expenditure = 6_048_481)
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = 2018, actual_expenditure = 624_054_027)
+
 # TODO all these HR plots need to be looked at
 # 1. HR
 # Stacked bar chart of salaries by cadre

From 4f83dc3a8061341720d5811a71c5f85a1c09b1de Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 15 Oct 2024 10:54:06 +0100
Subject: [PATCH 107/230] Add ROI and maximum ability to pay calculation and
 plots

---
 src/scripts/costing/costing.py | 171 ++++++++++++++++++++++++++++++++-
 1 file changed, 170 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 0cdcd8e98b..9557b8d57b 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -6,6 +6,7 @@
 import calendar
 import datetime
 import os
+import textwrap
 
 import matplotlib.pyplot as plt
 from matplotlib.ticker import FuncFormatter
@@ -546,9 +547,81 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
 # Extract all costs to a .csv
 scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv')
 
+# Calculate total cost
+total_scenario_cost = scenario_cost.groupby(['draw', 'stat'])['value'].sum().unstack()
+total_scenario_cost = total_scenario_cost.unstack().reset_index()
+total_scenario_cost_wide = total_scenario_cost.pivot_table(index=None, columns=['draw', 'stat'], values=0)
+
+# Calculate incremental cost
+def find_difference_relative_to_comparison(_ser: pd.Series,
+                                           comparison: str,
+                                           scaled: bool = False,
+                                           drop_comparison: bool = True,
+                                           ):
+    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+    within the runs (level 1), relative to where draw = `comparison`.
+    The comparison is `X - COMPARISON`."""
+    return _ser \
+        .unstack(level=0) \
+        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+        .drop(columns=([comparison] if drop_comparison else [])) \
+        .stack()
+
+# TODO the following calculation should first capture the different by run and then be summarised
+incremental_scenario_cost = (pd.DataFrame(
+            find_difference_relative_to_comparison(
+                total_scenario_cost_wide.loc[0],
+                comparison= 0) # sets the comparator to 0 which is the Actual scenario
+        ).T.iloc[0].unstack()).T
+
+# %%
+# Monetary value of health impact
+def get_num_dalys(_df):
+    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+    results from runs that crashed mid-way through the simulation.
+    """
+    years_needed = [i.year for i in TARGET_PERIOD]
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .drop(columns=['date', 'sex', 'age_range', 'year'])
+        .sum().sum()
+    )
+
+num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    )
+
+num_dalys_summarized = summarize(num_dalys).loc[0].unstack()
+#num_dalys_summarized['scenario'] = scenarios.to_list() # add when scenarios have names
+#num_dalys_summarized = num_dalys_summarized.set_index('scenario')
+
+# Get absolute DALYs averted
+num_dalys_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison(
+                num_dalys.loc[0],
+                comparison= 0) # sets the comparator to 0 which is the Actual scenario
+        ).T
+    ).iloc[0].unstack()
+#num_dalys_averted['scenario'] = scenarios.to_list()[1:12]
+#num_dalys_averted = num_dalys_averted.set_index('scenario')
+
+chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+monetary_value_of_incremental_health = num_dalys_averted * chosen_cet
+max_ability_to_pay_for_implementation = monetary_value_of_incremental_health - incremental_scenario_cost # monetary value - change in costs
+
 # Plot costs
 ####################################################
-# Stacked bar plot
+# 1. Stacked bar plot (Total cost + Cost categories)
+#----------------------------------------------------
 def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     _df = _df[_df.stat == 'mean']
@@ -583,6 +656,102 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance', year = 2018, actual_expenditure = 6_048_481)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = 2018, actual_expenditure = 624_054_027)
 
+# 2. Return on Investment Plot
+#----------------------------------------------------
+# Plot ROI at various levels of cost
+# Step 1: Create an array of costs ranging from 0 to the max value in the 'mean' column
+costs = np.linspace(0, max_ability_to_pay_for_implementation['mean'].max(), 500)
+# Step 2: Initialize the plot
+plt.figure(figsize=(10, 6))
+# Step 3: Loop through each row and plot mean, lower, and upper values divided by costs
+for index, row in max_ability_to_pay_for_implementation.iterrows():
+    mean_values = row['mean'] / np.where(costs == 0, np.nan, costs)
+    lower_values = row['lower'] / np.where(costs == 0, np.nan, costs)
+    upper_values = row['upper'] / np.where(costs == 0, np.nan, costs)
+
+    # Plot mean line
+    plt.plot(costs, mean_values, label=f'Draw {index}')
+
+    # Plot the confidence interval as a shaded region
+    plt.fill_between(costs, lower_values, upper_values, alpha=0.2)
+
+# Step 4: Set plot labels and title
+plt.xlabel('Implementation cost')
+plt.ylabel('Return on Investment')
+plt.title('Return on Investment of scenarios at different levels of implementation cost')
+# Show legend
+plt.legend()
+# Save
+plt.savefig(figurespath / f'ROI.png', dpi=100,
+                bbox_inches='tight')
+
+# 3. Plot Maximum ability-to-pay
+#----------------------------------------------------
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    xticks = {(i+1): k for i, k in enumerate(_df.index)}
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+        label=xticks.values()
+    )
+    '''
+    if annotations:
+        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=11)
+
+    ax.set_xticks(list(xticks.keys()))
+    if not xticklabels_horizontal_and_wrapped:
+        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+        ax.set_xticklabels(wrapped_labs, rotation=45, ha='right', fontsize=10)
+    else:
+        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+        ax.set_xticklabels(wrapped_labs, fontsize=10)
+    '''
+
+    # Set font size for y-tick labels
+    ax.tick_params(axis='y', labelsize=12)
+    ax.tick_params(axis='x', labelsize=11)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
+
+# Plot DALYS accrued (with xtickabels horizontal and wrapped)
+name_of_plot = f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
+fig, ax = do_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation / 1e6).clip(lower=0.0),
+    annotations=[
+        f"{round(row['mean']/1e6, 1)} \n ({round(row['lower']/1e6, 1)}-{round(row['upper']/1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation.clip(lower=0.0).iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+#ax.set_ylim(0, 120)
+#ax.set_yticks(np.arange(0, 120, 10))
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.show()
+plt.close(fig)
+
 # TODO all these HR plots need to be looked at
 # 1. HR
 # Stacked bar chart of salaries by cadre

From f065b5f5c1ce76b68ccb61f79e0c522a7a2803aa Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 15 Oct 2024 14:00:25 +0100
Subject: [PATCH 108/230] rename cost_of_excess_consumables_stocked

---
 src/scripts/costing/costing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 9557b8d57b..c91523453c 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -400,7 +400,7 @@ def melt_and_label_consumables_cost(_df, label):
 
 consumable_costs = [
     (total_cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'),
-    (total_cost_of_excess_consumables_stocked, 'cost_of_consumables_stocked'),
+    (total_cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'),
 ]
 # Iterate through additional costs, melt and concatenate
 for df, label in consumable_costs:
@@ -656,7 +656,7 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance', year = 2018, actual_expenditure = 6_048_481)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = 2018, actual_expenditure = 624_054_027)
 
-# 2. Return on Investment Plot
+# 3. Return on Investment Plot
 #----------------------------------------------------
 # Plot ROI at various levels of cost
 # Step 1: Create an array of costs ranging from 0 to the max value in the 'mean' column

From 0f54a1e61fc54d55e85316dbd54050ca835402be Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 16 Oct 2024 12:39:24 +0100
Subject: [PATCH 109/230] update the cost of item_code 20 "Suture, catgut,
 chromic, 0, 150 cm" - the CMST price needed to be divided by 12

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 2ccb7846ad..b40114daff 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96b25d9edc8abbdce58608a86b42f4782d77ac548cbf4e2b4016becee3dd8f80
-size 4331748
+oid sha256:970461b4ef4ad08a1028069a74638bc9c49acbaa26de7163329cd42871898d45
+size 4331998

From b2198575ddaa3ded3b8cf2b50add8a1df9dd309b Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 16 Oct 2024 18:27:09 +0100
Subject: [PATCH 110/230] Update plots - add line plot over time - update ROI

---
 src/scripts/costing/costing.py | 111 ++++++++++++++++++++++++++-------
 1 file changed, 88 insertions(+), 23 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index c91523453c..0448e1d6d1 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -477,7 +477,7 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
         equipment_used = pd.concat({
                 k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
                 axis=0)
-        full_list_of_equipment_used = set().union(*equip['EquipmentEverUsed'])
+        full_list_of_equipment_used = set().union(*equipment_used_subset['EquipmentEverUsed'])
 
         equipment_df = pd.DataFrame()
         equipment_df.index = equipment_used.index
@@ -624,7 +624,8 @@ def get_num_dalys(_df):
 #----------------------------------------------------
 def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
-    _df = _df[_df.stat == 'mean']
+    # Make a copy of the dataframe to avoid modifying the original
+    _df = _df[_df.stat == 'mean'].copy()
     # Convert 'value' to millions
     _df['value'] = _df['value'] / 1e6
     if year == 'all':
@@ -650,40 +651,104 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
     plt.title(f'Costs by Scenario \n (Cost Category = {cost_category} ; Year = {year})')
     plt.savefig(figurespath / f'stacked_bar_chart_{cost_category}_year_{year}.png', dpi=100,
                 bbox_inches='tight')
+    plt.close()
 
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Medical consumables', year = 2018, actual_expenditure = 206_747_565)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Human Resources for Health', year = 2018, actual_expenditure = 128_593_787)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance', year = 2018, actual_expenditure = 6_048_481)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = 2018, actual_expenditure = 624_054_027)
 
+# 2. Line plots of total costs
+#----------------------------------------------------
+def do_line_plot(_df, cost_category, actual_expenditure, _draw):
+    # Convert 'value' to millions
+    _df = _df.copy()
+
+    # Filter the dataframe based on the selected draw
+    subset_df = _df[_df.draw == _draw]
+
+    if cost_category != 'all':
+        subset_df = subset_df[subset_df['Cost_Category'] == cost_category]
+
+    # Reset the index for plotting purposes
+    subset_df = subset_df.reset_index()
+
+    # Extract mean, lower, and upper values for the plot
+    mean_values = subset_df[subset_df.stat == 'mean'].groupby(['Cost_Category', 'year'])['value'].sum() / 1e6
+    lower_values = subset_df[subset_df.stat == 'lower'].groupby(['Cost_Category', 'year'])['value'].sum() / 1e6
+    upper_values = subset_df[subset_df.stat == 'upper'].groupby(['Cost_Category', 'year'])['value'].sum() / 1e6
+    years = subset_df[subset_df.stat == 'mean']['year']
+
+    # Plot the line for 'mean'
+    plt.plot(mean_values.index.get_level_values(1), mean_values, marker='o', linestyle='-', color='b', label='Mean')
+
+    # Add confidence interval using fill_between
+    plt.fill_between(mean_values.index.get_level_values(1), lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
+
+    # Add a horizontal red line to represent the actual expenditure
+    plt.axhline(y=actual_expenditure / 1e6, color='red', linestyle='--', label='Actual expenditure recorded in 2018')
+
+    # Set plot labels and title
+    plt.xlabel('Year')
+    plt.ylabel('Cost (2023 USD), millions')
+    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
+    plt.title(f'Costs by Scenario \n (Cost Category = {cost_category} ; Draw = {_draw})')
+
+    # Save the plot
+    plt.savefig(figurespath / f'trend_{cost_category}_{first_year_of_simulation}-{final_year_of_simulation}.png',
+                dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+do_line_plot(_df = scenario_cost, cost_category = 'Medical consumables', _draw = 0, actual_expenditure = 206_747_565)
+do_line_plot(_df = scenario_cost, cost_category = 'Human Resources for Health',  _draw = 0, actual_expenditure = 128_593_787)
+do_line_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance',  _draw = 0, actual_expenditure = 6_048_481)
+do_line_plot(_df = scenario_cost, cost_category = 'all',  _draw = 0, actual_expenditure = 624_054_027)
+# TODO Check which plots 2-4 do now show actual values
+
 # 3. Return on Investment Plot
 #----------------------------------------------------
 # Plot ROI at various levels of cost
-# Step 1: Create an array of costs ranging from 0 to the max value in the 'mean' column
-costs = np.linspace(0, max_ability_to_pay_for_implementation['mean'].max(), 500)
-# Step 2: Initialize the plot
-plt.figure(figsize=(10, 6))
-# Step 3: Loop through each row and plot mean, lower, and upper values divided by costs
-for index, row in max_ability_to_pay_for_implementation.iterrows():
-    mean_values = row['mean'] / np.where(costs == 0, np.nan, costs)
-    lower_values = row['lower'] / np.where(costs == 0, np.nan, costs)
-    upper_values = row['upper'] / np.where(costs == 0, np.nan, costs)
+roi_outputs_folder = Path(figurespath / 'roi')
+if not os.path.exists(roi_outputs_folder):
+    os.makedirs(roi_outputs_folder)
 
-    # Plot mean line
-    plt.plot(costs, mean_values, label=f'Draw {index}')
+# Loop through each row and plot mean, lower, and upper values divided by costs
+for index, row in monetary_value_of_incremental_health.iterrows():
+    # Step 1: Create an array of implementation costs ranging from 0 to the max value of the max ability to pay
+    implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[index]['mean'], 50)
+
+    plt.figure(figsize=(10, 6))
 
+    # Retrieve the corresponding row from incremental_scenario_cost for the same 'index'
+    scenario_cost_row = incremental_scenario_cost.loc[index]
+    # Divide rows by the sum of implementation costs and incremental input cost
+    mean_values = row['mean'] / (implementation_costs + scenario_cost_row['mean'])
+    lower_values = row['lower'] / (implementation_costs + scenario_cost_row['lower'])
+    upper_values = row['upper'] / (implementation_costs + scenario_cost_row['upper'])
+    # Plot mean line
+    plt.plot(implementation_costs/1e6, mean_values, label=f'Draw {index}')
     # Plot the confidence interval as a shaded region
-    plt.fill_between(costs, lower_values, upper_values, alpha=0.2)
-
-# Step 4: Set plot labels and title
-plt.xlabel('Implementation cost')
-plt.ylabel('Return on Investment')
-plt.title('Return on Investment of scenarios at different levels of implementation cost')
-# Show legend
-plt.legend()
-# Save
-plt.savefig(figurespath / f'ROI.png', dpi=100,
+    plt.fill_between(implementation_costs/1e6, lower_values, upper_values, alpha=0.2)
+
+    # Step 4: Set plot labels and title
+    plt.xlabel('Implementation cost, millions')
+    plt.ylabel('Return on Investment')
+    plt.title('Return on Investment of scenarios at different levels of implementation cost')
+
+    plt.text(x=0.95, y=0.8, s=f"Monetary value of incremental health = USD {round(monetary_value_of_incremental_health.loc[index]['mean']/1e6,2)}m (USD {round(monetary_value_of_incremental_health.loc[index]['lower']/1e6,2)}m-{round(monetary_value_of_incremental_health.loc[index]['upper']/1e6,2)}m);\n "
+                             f"Incremental input cost of scenario = USD {round(scenario_cost_row['mean']/1e6,2)}m (USD {round(scenario_cost_row['lower']/1e6,2)}m-{round(scenario_cost_row['upper']/1e6,2)}m)",
+             horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9, weight='bold', color='black')
+
+
+    # Show legend
+    plt.legend()
+    # Save
+    plt.savefig(figurespath / f'roi/ROI_draw{index}.png', dpi=100,
                 bbox_inches='tight')
+    plt.close()
+
+
 
 # 3. Plot Maximum ability-to-pay
 #----------------------------------------------------

From 051a39fbfac81c5793b6d7bd813afbe462216b92 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 17 Oct 2024 15:02:37 +0100
Subject: [PATCH 111/230] fix line plot so that it does not change the
 scenario_cost

---
 src/scripts/costing/costing.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 0448e1d6d1..097a93d753 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -661,9 +661,6 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
 # 2. Line plots of total costs
 #----------------------------------------------------
 def do_line_plot(_df, cost_category, actual_expenditure, _draw):
-    # Convert 'value' to millions
-    _df = _df.copy()
-
     # Filter the dataframe based on the selected draw
     subset_df = _df[_df.draw == _draw]
 
@@ -704,7 +701,6 @@ def do_line_plot(_df, cost_category, actual_expenditure, _draw):
 do_line_plot(_df = scenario_cost, cost_category = 'Human Resources for Health',  _draw = 0, actual_expenditure = 128_593_787)
 do_line_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance',  _draw = 0, actual_expenditure = 6_048_481)
 do_line_plot(_df = scenario_cost, cost_category = 'all',  _draw = 0, actual_expenditure = 624_054_027)
-# TODO Check which plots 2-4 do now show actual values
 
 # 3. Return on Investment Plot
 #----------------------------------------------------

From d82ca890a1f2bafbe95f3bed3a337f4189e57aac Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 17 Oct 2024 18:55:29 +0100
Subject: [PATCH 112/230] create calibration data for consumables

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +-
 src/scripts/costing/costing.py              | 90 ++++++++++++++++++++-
 2 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index b40114daff..d4b338966b 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:970461b4ef4ad08a1028069a74638bc9c49acbaa26de7163329cd42871898d45
-size 4331998
+oid sha256:1d739b4b45bde1089325dcaa388b9fa645a97e0e774a10acfc8f39ec764d9728
+size 4218627
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 097a93d753..1ddc573c00 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -744,9 +744,7 @@ def do_line_plot(_df, cost_category, actual_expenditure, _draw):
                 bbox_inches='tight')
     plt.close()
 
-
-
-# 3. Plot Maximum ability-to-pay
+# 4. Plot Maximum ability-to-pay
 #----------------------------------------------------
 def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
     """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
@@ -813,6 +811,92 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 fig.show()
 plt.close(fig)
 
+# 5. Calibration plots
+# Steps: 1. Create a mapping of data labels in model_costing and relevant calibration data, 2. Create a dataframe with model_costs and calibration costs;
+# Prepare data for calibration
+calibration_data = workbook_cost["resource_mapping_r7_summary"]
+# Make sure values are numeric
+budget_columns = ['BUDGETS (USD) (Jul 2019 - Jun 2020)', 'BUDGETS (USD) (Jul 2020 - Jun 2021)',
+       'BUDGETS (USD) (Jul 2021 - Jun 2022)']
+expenditure_columns = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
+calibration_data[budget_columns + expenditure_columns] = calibration_data[budget_columns + expenditure_columns].apply(lambda x: pd.to_numeric(x, errors='coerce'))
+# For calibration to budget figures, we take the maximum value across the three years in the RM to provide an
+# upper limit to calibrate to (expenditure providing the lower limit)
+calibration_data['max_annual_budget_2020-22'] = calibration_data[budget_columns].max(axis=1, skipna = True)
+calibration_data = calibration_data.rename(columns = {'EXPENDITURE (USD) (Jul 2018 - Jun 2019)': 'actual_expenditure_2019',
+                                                      'Calibration_category': 'calibration_category'})
+calibration_data = calibration_data[['calibration_category','actual_expenditure_2019', 'max_annual_budget_2020-22']]
+calibration_data1 = calibration_data.copy()
+calibration_data1['stat'] = 'lower'
+calibration_data2 = calibration_data.copy()
+calibration_data2['stat'] = 'mean'
+calibration_data3 = calibration_data.copy()
+calibration_data3['stat'] = 'upper'
+calibration_data = pd.concat([calibration_data1, calibration_data2, calibration_data3], axis = 0)
+calibration_data = calibration_data.set_index(['calibration_category', 'stat'])
+
+# Manually create a dataframe of model costs and relevant calibration values
+def get_calibration_relevant_subset(_df):
+    cond_calibration_subset = (_df.year == 2018) & (_df.draw == 0)
+    return _df[cond_calibration_subset]
+def get_calibration_relevant_subset_of_consumables_cost(_df, item):
+    #_df =_df.rename(columns = {('year', ''):'year'})
+    for col in ['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)', 'excess_stock_proportion_of_dispensed','item_code']:
+        try:
+            _df = _df.drop(columns = col)
+        except:
+            pass
+    _df.columns = pd.MultiIndex.from_tuples(_df.columns)
+    _df = _df.melt(id_vars = ['year', 'Item_Code'], var_name=['draw', 'stat'], value_name='value')
+    _df = _df[_df['Item_Code'].isin(item)]
+    _df = _df.groupby(['year', 'draw', 'stat'])['value'].sum()
+    return get_calibration_relevant_subset(_df.reset_index())
+def merged_calibration_relevant_consumables_costs(item, category):
+    merged_df = pd.merge(get_calibration_relevant_subset_of_consumables_cost(cost_of_consumables_dispensed, item),
+                         get_calibration_relevant_subset_of_consumables_cost(cost_of_excess_consumables_stocked, item),
+                         on=['year', 'draw', 'stat'], how='outer', suffixes=('_dispensed', '_excess_stock'))
+    # Fill any missing values in the value columns with 0 (for cases where only one dataframe has a value)
+    # and sum to get total consumable cost
+    merged_df['value'] = merged_df['value_dispensed'].fillna(0) + merged_df['value_excess_stock'].fillna(0)
+    merged_df['calibration_category'] = category
+    return merged_df.set_index(['calibration_category', 'stat'])['value']
+
+def first_positive(series):
+    return next((x for x in series if pd.notna(x) and x > 0), np.nan)
+
+# Consumables
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([2671, 2672, 2673], 'Antiretrovirals')], axis = 1)
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([176, 177, 179, 178, 181, 2678], 'TB Treatment')], axis = 1)
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([162,164,170], 'Antimalarials')], axis = 1)
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([163], 'Malaria RDTs')], axis = 1)
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([190,191,196], 'HIV Screening/Diagnostic Tests')], axis = 1)
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([2,25], 'Condoms and Lubricants')], axis = 1)
+calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([184,187, 175], 'TB Tests (including RDTs)')], axis = 1)
+# Apply across rows to find the first positive value
+calibration_data['model_cost'] = calibration_data[['value']].apply(first_positive, axis=1)
+
+
+# HR
+calibration_data[calibration_data['calibration_category'] == 'Other Drugs, medical supplies, and commodities'] = merged_calibration_relevant_consumables_costs()
+calibration_data[calibration_data['calibration_category'] == 'Health Worker Salaries'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Health Worker Training - In-Service'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Health Worker Training - Pre-Service'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Other Human Resources for Health expenses'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Facility utility bills - ICT', 'Infrastructure - New Builds'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Medical Equipment - Maintenance'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Medical Equipment - Purchase'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
+calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
+
+# This will reshape your data such that:
+# 3. Create calibration plot
+# Consumables
+
+
+# HR
+# Equipment
+
 # TODO all these HR plots need to be looked at
 # 1. HR
 # Stacked bar chart of salaries by cadre

From 98366fdbed033961f9d12a3c5eb3de59b3a3c811 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 18 Oct 2024 14:54:55 +0100
Subject: [PATCH 113/230] update the unit cost of 'chlorhexidine', 'vacuum,
 obstetric' and ' Suture, catgut, chromic, 0, 150 cm'

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index d4b338966b..3d26cb5014 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d739b4b45bde1089325dcaa388b9fa645a97e0e774a10acfc8f39ec764d9728
-size 4218627
+oid sha256:38e2109d20142772ad9b2e54e28d317ed3eea2a561ee4b3e1ca1cee25a796884
+size 4219295

From 8ea26eb17797270f0b407af9c6058b339691a7af Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 20 Oct 2024 18:23:05 +0100
Subject: [PATCH 114/230] add cost of item 188

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 3d26cb5014..722c9e0d6b 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38e2109d20142772ad9b2e54e28d317ed3eea2a561ee4b3e1ca1cee25a796884
-size 4219295
+oid sha256:1ea3e7600ea17e9d410dbdb81f2e3b957d610ac7d165aa54f377049f8b00c1f5
+size 4220137

From 80760ad37f8d5f539677ab2467643100da9d3ce0 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 20 Oct 2024 22:03:13 +0100
Subject: [PATCH 115/230] fix equipment costs + calibration_data for
 consumables, HR and equipment

---
 src/scripts/costing/costing.py | 62 ++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 1ddc573c00..8794bfc590 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -477,7 +477,8 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
         equipment_used = pd.concat({
                 k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
                 axis=0)
-        full_list_of_equipment_used = set().union(*equipment_used_subset['EquipmentEverUsed'])
+        full_list_of_equipment_used = set(equipment_used.values.flatten())
+        full_list_of_equipment_used = set(filter(pd.notnull, full_list_of_equipment_used))
 
         equipment_df = pd.DataFrame()
         equipment_df.index = equipment_used.index
@@ -864,38 +865,47 @@ def merged_calibration_relevant_consumables_costs(item, category):
 def first_positive(series):
     return next((x for x in series if pd.notna(x) and x > 0), np.nan)
 
+def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibration_category):
+    new_data = get_calibration_relevant_subset(_df[_df['Cost_Sub-category'].isin([_subcategory])]).groupby('stat')['value'].sum()
+    new_data = new_data.reset_index()
+    new_data['calibration_category'] = _calibration_category
+    new_data = new_data.rename(columns =  {'value':'model_cost'})
+    return new_data.set_index(['calibration_category', 'stat'])['model_cost']
+
 # Consumables
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([2671, 2672, 2673], 'Antiretrovirals')], axis = 1)
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([176, 177, 179, 178, 181, 2678], 'TB Treatment')], axis = 1)
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([162,164,170], 'Antimalarials')], axis = 1)
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([163], 'Malaria RDTs')], axis = 1)
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([190,191,196], 'HIV Screening/Diagnostic Tests')], axis = 1)
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([2,25], 'Condoms and Lubricants')], axis = 1)
-calibration_data = pd.concat([calibration_data, merged_calibration_relevant_consumables_costs([184,187, 175], 'TB Tests (including RDTs)')], axis = 1)
-# Apply across rows to find the first positive value
-calibration_data['model_cost'] = calibration_data[['value']].apply(first_positive, axis=1)
+calibration_data['model_cost'] = np.nan
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([2671, 2672, 2673], 'Antiretrovirals'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([176, 177, 179, 178, 181, 2678], 'TB Treatment'))
 
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([162,164,170], 'Antimalarials'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([163], 'Malaria RDTs'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([190,191,196], 'HIV Screening/Diagnostic Tests'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([2,25], 'Condoms and Lubricants'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([184,187, 175], 'TB Tests (including RDTs)'))
+#calibration_data[calibration_data['calibration_category'] == 'Other Drugs, medical supplies, and commodities']['model_cost'] = ??
 
 # HR
-calibration_data[calibration_data['calibration_category'] == 'Other Drugs, medical supplies, and commodities'] = merged_calibration_relevant_consumables_costs()
-calibration_data[calibration_data['calibration_category'] == 'Health Worker Salaries'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Health Worker Training - In-Service'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Health Worker Training - Pre-Service'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Other Human Resources for Health expenses'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Facility utility bills - ICT', 'Infrastructure - New Builds'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Medical Equipment - Maintenance'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Medical Equipment - Purchase'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
-calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
-
-# This will reshape your data such that:
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'salary_for_used_cadres', 'Health Worker Salaries'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'preservice_training_cost_for_attrited_workers', 'Health Worker Training - In-Service'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'inservice_training_cost_for_all_staff', 'Health Worker Training - Pre-Service'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'recruitment_cost_for_attrited_workers', 'Other Human Resources for Health expenses'))
+
+# Equipment
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'replacement_cost_annual_total', 'Medical Equipment - Purchase'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['upfront_repair_cost_annual_total', 'spare_parts_annual_total',
+       'service_fee_annual_total'], 'Medical Equipment - Maintenance'))
+#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
+#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
+
+# Facility operation costs
+#calibration_data[calibration_data['calibration_category'] == 'Facility utility bills - ICT', 'Infrastructure - New Builds'] = get_calibration_relevant_subset()
+
+# Infrastructure
+#calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
+
 # 3. Create calibration plot
-# Consumables
 
 
-# HR
-# Equipment
 
 # TODO all these HR plots need to be looked at
 # 1. HR

From cbfb2872cd27141d69166c697d5bda18f731db25 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 20 Oct 2024 22:05:37 +0100
Subject: [PATCH 116/230] calibration plot (work in progress)

---
 src/scripts/costing/costing.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 8794bfc590..7b35f10b58 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -904,7 +904,32 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 #calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
 
 # 3. Create calibration plot
-
+# Filter the DataFrame to get the 'mean', 'lower', and 'upper' values
+df = calibration_data
+df_mean = df.loc[df.index.get_level_values('stat') == 'mean']
+df_lower = df.loc[df.index.get_level_values('stat') == 'lower']
+df_upper = df.loc[df.index.get_level_values('stat') == 'upper']
+
+# Create the scatter plot
+plt.figure(figsize=(10, 6))
+
+# Plot each point with error bars (for confidence interval)
+plt.errorbar(df_mean['actual_expenditure_2019'],
+             df_mean['model_cost'],
+             yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
+             fmt='o',
+             ecolor='gray',
+             capsize=5,
+             label='Calibration Category')
+
+# Add labels and title
+plt.xlabel('Actual Expenditure 2019')
+plt.ylabel('Model Cost (with confidence interval)')
+plt.title('Scatter Plot: Actual Expenditure 2019 vs. Model Cost (with Confidence Intervals)')
+
+# Show the plot
+plt.tight_layout()
+plt.show()
 
 
 # TODO all these HR plots need to be looked at

From 30bc4a72913c3f389a94ac7b789f119bb4a6816f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 13:25:30 +0100
Subject: [PATCH 117/230] update ARV costs based on TDF/3TC/DTG regimen
 (previously ABC/3TC/DTG)

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 722c9e0d6b..a6249c209c 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ea3e7600ea17e9d410dbdb81f2e3b957d610ac7d165aa54f377049f8b00c1f5
-size 4220137
+oid sha256:f47fbc7497c2b4be4f28f238215b841d3be8f2422e3b6a55616689e6d8b91b34
+size 4220561

From 534449d81d2ab3fbf1c3f4e50000c9b2adcba320 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 14:14:14 +0100
Subject: [PATCH 118/230] update calibration plots

---
 src/scripts/costing/costing.py | 93 ++++++++++++++++++++++------------
 1 file changed, 62 insertions(+), 31 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 7b35f10b58..43a19229d2 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -827,6 +827,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 calibration_data = calibration_data.rename(columns = {'EXPENDITURE (USD) (Jul 2018 - Jun 2019)': 'actual_expenditure_2019',
                                                       'Calibration_category': 'calibration_category'})
 calibration_data = calibration_data[['calibration_category','actual_expenditure_2019', 'max_annual_budget_2020-22']]
+calibration_data = calibration_data.groupby('calibration_category')[['actual_expenditure_2019', 'max_annual_budget_2020-22']].sum().reset_index()
 calibration_data1 = calibration_data.copy()
 calibration_data1['stat'] = 'lower'
 calibration_data2 = calibration_data.copy()
@@ -874,7 +875,9 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 
 # Consumables
 calibration_data['model_cost'] = np.nan
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([2671, 2672, 2673], 'Antiretrovirals'))
+# Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
+# was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([2671, 2672, 2673], 'Antiretrovirals') * 80/(0.103*365))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([176, 177, 179, 178, 181, 2678], 'TB Treatment'))
 
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([162,164,170], 'Antimalarials'))
@@ -885,10 +888,11 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 #calibration_data[calibration_data['calibration_category'] == 'Other Drugs, medical supplies, and commodities']['model_cost'] = ??
 
 # HR
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'salary_for_used_cadres', 'Health Worker Salaries'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'preservice_training_cost_for_attrited_workers', 'Health Worker Training - In-Service'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'inservice_training_cost_for_all_staff', 'Health Worker Training - Pre-Service'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'recruitment_cost_for_attrited_workers', 'Other Human Resources for Health expenses'))
+ratio_of_all_to_used_staff = total_salary_for_all_staff[(0,2018)]/total_salary_for_staff_used_in_scenario[( 0, 'lower')][2018]
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'salary_for_used_cadres', 'Health Worker Salaries') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'preservice_training_cost_for_attrited_workers', 'Health Worker Training - In-Service') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'inservice_training_cost_for_all_staff', 'Health Worker Training - Pre-Service') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'recruitment_cost_for_attrited_workers', 'Other Human Resources for Health expenses') * ratio_of_all_to_used_staff)
 
 # Equipment
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'replacement_cost_annual_total', 'Medical Equipment - Purchase'))
@@ -904,33 +908,60 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 #calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
 
 # 3. Create calibration plot
-# Filter the DataFrame to get the 'mean', 'lower', and 'upper' values
-df = calibration_data
-df_mean = df.loc[df.index.get_level_values('stat') == 'mean']
-df_lower = df.loc[df.index.get_level_values('stat') == 'lower']
-df_upper = df.loc[df.index.get_level_values('stat') == 'upper']
-
-# Create the scatter plot
-plt.figure(figsize=(10, 6))
-
-# Plot each point with error bars (for confidence interval)
-plt.errorbar(df_mean['actual_expenditure_2019'],
-             df_mean['model_cost'],
-             yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
-             fmt='o',
-             ecolor='gray',
-             capsize=5,
-             label='Calibration Category')
-
-# Add labels and title
-plt.xlabel('Actual Expenditure 2019')
-plt.ylabel('Model Cost (with confidence interval)')
-plt.title('Scatter Plot: Actual Expenditure 2019 vs. Model Cost (with Confidence Intervals)')
-
-# Show the plot
-plt.tight_layout()
-plt.show()
+list_of_consumables_costs_for_calibration_only_hiv = ['Antiretrovirals', 'HIV Screening/Diagnostic Tests']
+list_of_consumables_costs_for_calibration_without_hiv =['Antimalarials', 'Condoms and Lubricants','Malaria RDTs', 'TB Tests (including RDTs)', 'TB Treatment']
+list_of_hr_costs_for_calibration = [ 'Health Worker Training - In-Service',  'Health Worker Salaries', 'Health Worker Training - Pre-Service']
+list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
+# Add folder to store calibration plots
+
+calibration_outputs_folder = Path(figurespath / 'calibration')
+if not os.path.exists(calibration_outputs_folder):
+    os.makedirs(calibration_outputs_folder)
+def do_cost_calibration_plot(_df, _costs_included, _calibration_var):
+    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
+    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)
+    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)
+    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)
+
+    # Create the scatter plot
+    plt.figure(figsize=(10, 6))
+
+    # Plot each point with error bars (for confidence interval)
+    plt.errorbar(df_mean[_calibration_var],
+                 df_mean['model_cost'],
+                 yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
+                 fmt='o',
+                 ecolor='gray',
+                 capsize=5,
+                 label='Calibration Category')
+
+    # Adding the 45-degree line (where y = x)
+    min_val = min(df_mean[_calibration_var].min(), df_mean['model_cost'].min())
+    max_val = max(df_mean[_calibration_var].max(), df_mean['model_cost'].max())
+    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')  # Red dashed line
+
+    # Add labels for each calibration_category
+    for i, label in enumerate(df_mean.index):
+        plt.annotate(label, (df_mean[_calibration_var].iloc[i], df_mean['model_cost'].iloc[i]))
+
+    # Add labels and title
+    plt.xlabel('Actual Expenditure 2019')
+    plt.ylabel('Model Cost (with confidence interval)')
+    plt.title(f'Model Cost vs {_calibration_var}')
+
+    # Show the plot
+    plt.tight_layout()
+    cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
+    cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
+    plt.savefig(calibration_outputs_folder / f'calibration_{_calibration_var}_{cost_subcategory}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
 
+for var in ['actual_expenditure_2019', 'max_annual_budget_2020-22']:
+    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_only_hiv, var)
+    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_without_hiv, var)
+    do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration, var)
+    do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration, var)
 
 # TODO all these HR plots need to be looked at
 # 1. HR

From 7d1707d2aab81f61e38868b24b7a388b603b2b9c Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 16:26:34 +0100
Subject: [PATCH 119/230] correct in-service training costs

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index a6249c209c..4185f08322 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f47fbc7497c2b4be4f28f238215b841d3be8f2422e3b6a55616689e6d8b91b34
-size 4220561
+oid sha256:b3cf5595b8cb0b2eb7f2beed1421eaf8e5eba16c10d859bd97353abeb4b9d16f
+size 4221094

From e25bf7d3548e4abe0eb29ad88c6973330c2b6f11 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 16:27:25 +0100
Subject: [PATCH 120/230] replace calibration scatter plots with  dot plots

---
 src/scripts/costing/costing.py | 187 ++++++++++++++++++++++++---------
 1 file changed, 137 insertions(+), 50 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 43a19229d2..b0a0772788 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -867,7 +867,7 @@ def first_positive(series):
     return next((x for x in series if pd.notna(x) and x > 0), np.nan)
 
 def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibration_category):
-    new_data = get_calibration_relevant_subset(_df[_df['Cost_Sub-category'].isin([_subcategory])]).groupby('stat')['value'].sum()
+    new_data = get_calibration_relevant_subset(_df[_df['Cost_Sub-category'].isin(_subcategory)]).groupby('stat')['value'].sum()
     new_data = new_data.reset_index()
     new_data['calibration_category'] = _calibration_category
     new_data = new_data.rename(columns =  {'value':'model_cost'})
@@ -877,25 +877,34 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 calibration_data['model_cost'] = np.nan
 # Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
 # was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([2671, 2672, 2673], 'Antiretrovirals') * 80/(0.103*365))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([176, 177, 179, 178, 181, 2678], 'TB Treatment'))
-
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([162,164,170], 'Antimalarials'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([163], 'Malaria RDTs'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([190,191,196], 'HIV Screening/Diagnostic Tests'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([2,25], 'Condoms and Lubricants'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs([184,187, 175], 'TB Tests (including RDTs)'))
-#calibration_data[calibration_data['calibration_category'] == 'Other Drugs, medical supplies, and commodities']['model_cost'] = ??
+art = [2671, 2672, 2673]
+tb_treatment = [176, 177, 179, 178, 181, 2678]
+antimalarials = [162,164,170]
+malaria_rdts = [163]
+hiv_screening = [190,191,196]
+condoms = [2,25]
+tb_tests = [184,187, 175]
+other_drugs = set(cost_of_consumables_dispensed['Item_Code'].unique()) - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
+              - set(condoms) - set(tb_tests)
+
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(art, 'Antiretrovirals') * 80/(0.103*365))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(tb_treatment, 'TB Treatment'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(antimalarials, 'Antimalarials'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(malaria_rdts, 'Malaria RDTs'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(hiv_screening, 'HIV Screening/Diagnostic Tests'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(condoms, 'Condoms and Lubricants'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(tb_tests, 'TB Tests (including RDTs)'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(other_drugs, 'Other Drugs, medical supplies, and commodities'))
 
 # HR
 ratio_of_all_to_used_staff = total_salary_for_all_staff[(0,2018)]/total_salary_for_staff_used_in_scenario[( 0, 'lower')][2018]
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'salary_for_used_cadres', 'Health Worker Salaries') * ratio_of_all_to_used_staff)
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'preservice_training_cost_for_attrited_workers', 'Health Worker Training - In-Service') * ratio_of_all_to_used_staff)
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'inservice_training_cost_for_all_staff', 'Health Worker Training - Pre-Service') * ratio_of_all_to_used_staff)
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'recruitment_cost_for_attrited_workers', 'Other Human Resources for Health expenses') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['salary_for_used_cadres'], 'Health Worker Salaries') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['preservice_training_cost_for_attrited_workers'], 'Health Worker Training - Pre-Service') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['inservice_training_cost_for_all_staff'], 'Health Worker Training - In-Service') * ratio_of_all_to_used_staff)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['recruitment_cost_for_attrited_workers'], 'Other Human Resources for Health expenses') * ratio_of_all_to_used_staff)
 
 # Equipment
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, 'replacement_cost_annual_total', 'Medical Equipment - Purchase'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['replacement_cost_annual_total'], 'Medical Equipment - Purchase'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['upfront_repair_cost_annual_total', 'spare_parts_annual_total',
        'service_fee_annual_total'], 'Medical Equipment - Maintenance'))
 #calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
@@ -909,59 +918,88 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 
 # 3. Create calibration plot
 list_of_consumables_costs_for_calibration_only_hiv = ['Antiretrovirals', 'HIV Screening/Diagnostic Tests']
-list_of_consumables_costs_for_calibration_without_hiv =['Antimalarials', 'Condoms and Lubricants','Malaria RDTs', 'TB Tests (including RDTs)', 'TB Treatment']
-list_of_hr_costs_for_calibration = [ 'Health Worker Training - In-Service',  'Health Worker Salaries', 'Health Worker Training - Pre-Service']
+list_of_consumables_costs_for_calibration_without_hiv =['Antimalarials', 'Condoms and Lubricants','Malaria RDTs', 'TB Tests (including RDTs)', 'TB Treatment', 'Other Drugs, medical supplies, and commodities']
+list_of_hr_costs_for_calibration = [ 'Health Worker Training - In-Service',  'Health Worker Salaries', 'Health Worker Training - Pre-Service', 'Other Human Resources for Health expenses']
 list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
 # Add folder to store calibration plots
 
 calibration_outputs_folder = Path(figurespath / 'calibration')
 if not os.path.exists(calibration_outputs_folder):
     os.makedirs(calibration_outputs_folder)
-def do_cost_calibration_plot(_df, _costs_included, _calibration_var):
-    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
-    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)
-    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)
-    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)
 
-    # Create the scatter plot
-    plt.figure(figsize=(10, 6))
+def do_cost_calibration_plot(_df, _costs_included):
+    # Filter the dataframe
+    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
 
-    # Plot each point with error bars (for confidence interval)
-    plt.errorbar(df_mean[_calibration_var],
-                 df_mean['model_cost'],
+    # For df_mean
+    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)/1e6
+    total_mean = pd.DataFrame(df_mean.sum()).T  # Calculate the total and convert it to a DataFrame
+    total_mean.index = ['Total']  # Name the index of the total row as 'Total'
+    df_mean = pd.concat([df_mean, total_mean], axis=0)  # Concatenate the total row
+
+    # For df_lower
+    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)/1e6
+    total_lower = pd.DataFrame(df_lower.sum()).T  # Calculate the total and convert it to a DataFrame
+    total_lower.index = ['Total']  # Name the index of the total row as 'Total'
+    df_lower = pd.concat([df_lower, total_lower], axis=0)  # Concatenate the total row
+
+    # For df_upper
+    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)/1e6
+    total_upper = pd.DataFrame(df_upper.sum()).T  # Calculate the total and convert it to a DataFrame
+    total_upper.index = ['Total']  # Name the index of the total row as 'Total'
+    df_upper = pd.concat([df_upper, total_upper], axis=0)  # Concatenate the total row
+
+    # Create the dot plot
+    plt.figure(figsize=(12, 8))
+
+    # Plot model_cost as dots with confidence interval error bars
+    plt.errorbar(df_mean.index, df_mean['model_cost'],
                  yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
-                 fmt='o',
-                 ecolor='gray',
-                 capsize=5,
-                 label='Calibration Category')
+                 fmt='o', label='Model Cost', ecolor='gray', capsize=5, color='saddlebrown')
 
-    # Adding the 45-degree line (where y = x)
-    min_val = min(df_mean[_calibration_var].min(), df_mean['model_cost'].min())
-    max_val = max(df_mean[_calibration_var].max(), df_mean['model_cost'].max())
-    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')  # Red dashed line
+    # Plot annual_expenditure_2019 and max_annual_budget_2020-22 as dots
+    plt.plot(df_mean.index, df_mean['actual_expenditure_2019'], 'bo', label='Actual Expenditure 2019', markersize=8)
+    plt.plot(df_mean.index, df_mean['max_annual_budget_2020-22'], 'go', label='Max Annual Budget 2020-22', markersize=8)
 
-    # Add labels for each calibration_category
-    for i, label in enumerate(df_mean.index):
-        plt.annotate(label, (df_mean[_calibration_var].iloc[i], df_mean['model_cost'].iloc[i]))
+    # Draw a blue line between annual_expenditure_2019 and max_annual_budget_2020-22
+    plt.vlines(df_mean.index, df_mean['actual_expenditure_2019'], df_mean['max_annual_budget_2020-22'], color='blue',
+               label='Budget Range')
 
-    # Add labels and title
-    plt.xlabel('Actual Expenditure 2019')
-    plt.ylabel('Model Cost (with confidence interval)')
-    plt.title(f'Model Cost vs {_calibration_var}')
+    # Add labels to the model_cost dots (yellow color, slightly shifted right)
+    for i, (x, y) in enumerate(zip(df_mean.index, df_mean['model_cost'])):
+        plt.text(i + 0.05, y, f'{y:.2f}', ha='left', va='bottom', fontsize=9,
+                 color='saddlebrown')  # label model_cost values
 
-    # Show the plot
-    plt.tight_layout()
+    # Add labels and title
     cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
     cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
-    plt.savefig(calibration_outputs_folder / f'calibration_{_calibration_var}_{cost_subcategory}.png', dpi=100,
+    plt.xlabel('Cost Sub-Category')
+    plt.ylabel('Costs (USD), millions')
+    plt.title(f'Model Cost vs Annual Expenditure 2019 and Max(Annual Budget 2020-22)\n {cost_subcategory}')
+
+    # Rotate x-axis labels for readability
+    plt.xticks(rotation=45, ha='right')
+
+    # Adding a legend
+    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=10)
+
+    # Tight layout and save the figure
+    plt.tight_layout()
+    plt.savefig(calibration_outputs_folder / f'calibration_dot_plot_{cost_subcategory}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 
-for var in ['actual_expenditure_2019', 'max_annual_budget_2020-22']:
-    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_only_hiv, var)
-    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_without_hiv, var)
-    do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration, var)
-    do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration, var)
+
+# Call the function for each variable and cost list
+all_calibration_costs = list_of_consumables_costs_for_calibration_only_hiv + list_of_consumables_costs_for_calibration_without_hiv + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration
+all_consumable_costs = list_of_consumables_costs_for_calibration_without_hiv + list_of_consumables_costs_for_calibration_only_hiv
+
+do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_without_hiv)
+do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_only_hiv)
+do_cost_calibration_plot(calibration_data,all_consumable_costs)
+do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
+do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
+do_cost_calibration_plot(calibration_data,all_calibration_costs)
 
 # TODO all these HR plots need to be looked at
 # 1. HR
@@ -1418,4 +1456,53 @@ def plot_most_expensive_equipment(_df, top_x_values = 10, figname_prefix = "Equi
 # TODO Collapse facility IDs by level of care to get the total number of facilities at each level using an item
 # TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)
 # TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
+
+# Calibration scatter plots
+def do_cost_calibration_plot(_df, _costs_included, _calibration_var):
+    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
+    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)
+    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)
+    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)
+
+    # Create the scatter plot
+    plt.figure(figsize=(10, 6))
+
+    # Plot each point with error bars (for confidence interval)
+    plt.errorbar(df_mean[_calibration_var],
+                 df_mean['model_cost'],
+                 yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
+                 fmt='o',
+                 ecolor='gray',
+                 capsize=5,
+                 label='Calibration Category')
+
+    # Adding the 45-degree line (where y = x)
+    min_val = min(df_mean[_calibration_var].min(), df_mean['model_cost'].min())
+    max_val = max(df_mean[_calibration_var].max(), df_mean['model_cost'].max())
+    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')  # Red dashed line
+
+    # Add labels for each calibration_category
+    for i, label in enumerate(df_mean.index):
+        plt.annotate(label, (df_mean[_calibration_var].iloc[i], df_mean['model_cost'].iloc[i]))
+
+    # Add labels and title
+    plt.xlabel('Actual Expenditure 2019')
+    plt.ylabel('Model Cost (with confidence interval)')
+    plt.title(f'Model Cost vs {_calibration_var}')
+
+    # Show the plot
+    plt.tight_layout()
+    cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
+    cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
+    plt.savefig(calibration_outputs_folder / f'calibration_{_calibration_var}_{cost_subcategory}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+for var in ['actual_expenditure_2019', 'max_annual_budget_2020-22']:
+    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_only_hiv, var)
+    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_without_hiv, var)
+    do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration, var)
+    do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration, var)
+
+
 '''

From 15a54575a5b64fa81dd6bc1ff37f643d33acf8e1 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 17:09:52 +0100
Subject: [PATCH 121/230] add cost of vitamin b-12

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 4185f08322..01a4941651 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3cf5595b8cb0b2eb7f2beed1421eaf8e5eba16c10d859bd97353abeb4b9d16f
-size 4221094
+oid sha256:98128e2d463dcc5cfb72d44347f56d93d3e121a678729eb3780f4e1f333d2bf7
+size 4221311

From 628dc2e9044f1377b56b5d93bb87acaeb1be216b Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 18:53:59 +0100
Subject: [PATCH 122/230] use life span to calculate equipment replacement
 costs

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 src/scripts/costing/costing.py              | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 01a4941651..2d0b9507ad 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98128e2d463dcc5cfb72d44347f56d93d3e121a678729eb3780f4e1f333d2bf7
-size 4221311
+oid sha256:ec6fb44a6031d7312cbb3e866ade71f15fe5e9cbbf0492d0f4f8e924f27c8d7c
+size 4269589
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index b0a0772788..c7a5c52ac3 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -94,6 +94,9 @@ def drop_outside_period(_df):
 districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District'])
 fac_levels = set(mfl.Facility_Level)
 
+# Overall cost assumptions
+discount_rate = 0.03
+
 #%% Calculate financial costs
 # 1. HR cost
 # Load annual salary by officer type and facility level
@@ -418,11 +421,11 @@ def melt_and_label_consumables_cost(_df, label):
 unit_cost_equipment = workbook_cost["equipment"]
 unit_cost_equipment =   unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
 # Calculate necessary costs based on HSSP-III assumptions
-unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.1 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 10% of the items over 8 years
+unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost']/(1+(1-(1+discount_rate)**(-row['Life span']+1))/discount_rate), axis=1) # 10% of the items over 8 years
 unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
 unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
 unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
-# TODO the above line assumes that the life span of each item of equipment is 80 years. This needs to be updated using realistic life span data
+# TODO consider discounting the other components
 
 unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
                                            'service_fee_annual', 'spare_parts_annual',  'upfront_repair_cost_annual', 'replacement_cost_annual',

From 8534463f1d01814df12488e6e148b9f1c0050789 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 21 Oct 2024 19:14:46 +0100
Subject: [PATCH 123/230] update the unit_cost of IRS

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 2d0b9507ad..bb397baa62 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec6fb44a6031d7312cbb3e866ade71f15fe5e9cbbf0492d0f4f8e924f27c8d7c
-size 4269589
+oid sha256:007bd40f36a257b5d38942990c021356ba4c6b65c7664516253ea8dcf823b36a
+size 4269736

From a128a3ab718359a87f8e24e470f569404c0b7d57 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 22 Oct 2024 13:15:20 +0100
Subject: [PATCH 124/230] add costs of IRS and bednets

---
 src/scripts/costing/costing.py | 99 +++++++++++++++++++++++++++++-----
 1 file changed, 86 insertions(+), 13 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index c7a5c52ac3..67fd24831b 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -49,7 +49,7 @@
     os.makedirs(figurespath)
 
 # Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2010, 1, 1), Date(2031, 12, 31)) # TODO allow for multi-year costing
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31)) # TODO allow for multi-year costing
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
@@ -57,11 +57,13 @@ def drop_outside_period(_df):
 # %% Gathering basic information
 # Load result files
 #-------------------
-#results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
-#results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO scenarios
+#results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0]
+#results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0]
 #results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios
+#results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios
+
 #equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
 #consumables_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/impact_of_consumables_scenarios-2024-06-11T204007Z/')
 # TODO When the costing module is ready the above results_folder should be the same for the calculation of all costs
@@ -70,7 +72,7 @@ def drop_outside_period(_df):
 #log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
 
 # look at one log (so can decide what to extract)
-log = load_pickled_dataframes(results_folder)
+log = load_pickled_dataframes(results_folder, 0, 0)
 
 # get basic information about the results
 info = get_scenario_info(results_folder)
@@ -548,11 +550,78 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
 # 4. Facility running costs
 # Average running costs by facility level and district times the number of facilities  in the simulation
 
+# Additional costs pertaining to simulation
+# IRS costs
+irs_coverage_rate = 0.8
+districts_with_irs_scaleup = ['Kasungu', 'Mchinji', 'Lilongwe', 'Lilongwe City', 'Dowa', 'Ntchisi', 'Salima', 'Mangochi',
+                              'Mwanza', 'Likoma', 'Nkhotakota']
+proportion_of_district_with_irs_coverage = len(districts_with_irs_scaleup)/mfl.District.nunique()
+# Get total population (11/total number o districts in mfl)
+#TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2030, 12, 31))
+def get_total_population(_df):
+    years_needed = [i.year for i in TARGET_PERIOD]
+    _df['year'] = pd.to_datetime(_df['date']).dt.year
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .drop(columns=['male', 'female', 'date']).set_index('year').sum(axis = 1)
+    )
+
+total_population_by_year = summarize(extract_results(
+    results_folder,
+    module='tlo.methods.demography',
+    key='population',
+    custom_generate_series=get_total_population,
+    do_scaling=True
+))
+
+#years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
+#years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
+#years_with_no_malaria_scaleup =  [i.year for i in years_with_no_malaria_scaleup]
+
+irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)']
+irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate * proportion_of_district_with_irs_coverage
+total_irs_cost = irs_multiplication_factor.iloc[0] * total_population_by_year # for districts and scenarios included
+# TODO scenarios_with_irs_scaleup
+# TODO population_in_district from malria logger
+
+# Bednet costs
+bednet_coverage_rate = 0.7
+# All districts covered
+# We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people.
+unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)']
+annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3
+bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person
+total_bednet_cost = bednet_multiplication_factor.iloc[0] * total_population_by_year  # for scenarios included
+
+years_with_no_malaria_scaleup = list(range(first_year_of_simulation, 2024))
+def set_cost_during_years_before_malaria_scaleup_to_zero(_df):
+    for col in _df.columns:
+        for y in years_with_no_malaria_scaleup:
+            _df.loc[_df.index.get_level_values(0) == y, col] = 0
+    return _df
+
+total_bednet_cost = set_cost_during_years_before_malaria_scaleup_to_zero(total_bednet_cost)
+total_irs_cost = set_cost_during_years_before_malaria_scaleup_to_zero(total_irs_cost)
+# TODO Scale-up programmes are implemented from 01/01/2024
+
+# Malaria scale-up costs - TOTAL
+malaria_scaleup_costs = [
+    (total_irs_cost.reset_index(), 'cost_of_IRS_scaleup'),
+    (total_bednet_cost.reset_index(), 'cost_of_bednet_scaleup'),
+]
+# Iterate through additional costs, melt and concatenate
+for df, label in malaria_scaleup_costs:
+    new_df = melt_and_label_consumables_cost(df, label)
+    scenario_cost = pd.concat([scenario_cost, new_df], ignore_index=True)
+scenario_cost.loc[scenario_cost['Cost_Category'].isna(),'Cost_Category'] = 'IRS and Bednet Scale-up Costs'
+
 # Extract all costs to a .csv
-scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv')
+scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv', index = False)
 
 # Calculate total cost
-total_scenario_cost = scenario_cost.groupby(['draw', 'stat'])['value'].sum().unstack()
+total_scenario_cost = scenario_cost[(scenario_cost.year >= 2020) & (scenario_cost.year <= 2030)].groupby(['draw', 'stat'])['value'].sum().unstack()
 total_scenario_cost = total_scenario_cost.unstack().reset_index()
 total_scenario_cost_wide = total_scenario_cost.pivot_table(index=None, columns=['draw', 'stat'], values=0)
 
@@ -580,12 +649,13 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
 
 # %%
 # Monetary value of health impact
+TARGET_PERIOD_INTERVENTION = (Date(2020, 1, 1), Date(2030, 12, 31))
 def get_num_dalys(_df):
     """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
     Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
     results from runs that crashed mid-way through the simulation.
     """
-    years_needed = [i.year for i in TARGET_PERIOD]
+    years_needed = [i.year for i in TARGET_PERIOD_INTERVENTION]
     assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
     return pd.Series(
         data=_df
@@ -635,7 +705,7 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
     if year == 'all':
         subset_df = _df
     else:
-        subset_df = _df[_df['year'] == year]
+        subset_df = _df[_df['year'].isin(year)]
     if cost_category == 'all':
         subset_df = subset_df
         pivot_df = subset_df.pivot_table(index='draw', columns='Cost_Category', values='value', aggfunc='sum')
@@ -646,7 +716,7 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
     # Plot a stacked bar chart
     pivot_df.plot(kind='bar', stacked=True)
     # Add a horizontal red line to represent 2018 Expenditure as per resource mapping
-    plt.axhline(y=actual_expenditure/1e6, color='red', linestyle='--', label='Actual expenditure recorded in 2018')
+    #plt.axhline(y=actual_expenditure/1e6, color='red', linestyle='--', label='Actual expenditure recorded in 2018')
 
     # Save plot
     plt.xlabel('Scenario')
@@ -660,7 +730,9 @@ def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Medical consumables', year = 2018, actual_expenditure = 206_747_565)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Human Resources for Health', year = 2018, actual_expenditure = 128_593_787)
 do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance', year = 2018, actual_expenditure = 6_048_481)
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = 2018, actual_expenditure = 624_054_027)
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = [2020], actual_expenditure = 624_054_027)
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = [2024], actual_expenditure = 624_054_027)
+do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = list(range(2020,2031)), actual_expenditure = np.nan)
 
 # 2. Line plots of total costs
 #----------------------------------------------------
@@ -796,8 +868,8 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     return fig, ax
 
-# Plot DALYS accrued (with xtickabels horizontal and wrapped)
-name_of_plot = f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
+# Plot Max ability to pay
+name_of_plot = f'Maximum ability to pay, 2020-2030' #f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
 fig, ax = do_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation / 1e6).clip(lower=0.0),
     annotations=[
@@ -1003,6 +1075,7 @@ def do_cost_calibration_plot(_df, _costs_included):
 do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
 do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
 do_cost_calibration_plot(calibration_data,all_calibration_costs)
+calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 
 # TODO all these HR plots need to be looked at
 # 1. HR

From dc52c3315eb068881d90343896159a100832fc83 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 22 Oct 2024 17:05:56 +0100
Subject: [PATCH 125/230] stylistic updates

---
 src/scripts/costing/costing.py | 51 ++++++++++++++--------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 67fd24831b..9db3a15abc 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -29,18 +29,16 @@
     unflatten_flattened_multi_index_in_logging
 )
 
-# define a timestamp for script outputs
+# Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
 
-# print the start time of the script
+# Print the start time of the script
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
-# define a pathway to the data folder (note: currently outside the TLO model directory)
-# remember to set working directory to TLOmodel/
-#outputfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
+# Define a pathway to relevant folders
 outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
 resourcefilepath = Path("./resources")
-path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
+path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables"
 costing_outputs_folder = Path('./outputs/costing')
 if not os.path.exists(costing_outputs_folder):
     os.makedirs(costing_outputs_folder)
@@ -49,7 +47,7 @@
     os.makedirs(figurespath)
 
 # Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31)) # TODO allow for multi-year costing
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31))
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
@@ -61,29 +59,22 @@ def drop_outside_period(_df):
 #results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0]
 #results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
 
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios
-#results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios
+#results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
+#results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
+#results_folder = get_scenario_outputs('hss_elements-2024-10-12T111649Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
 
-#equipment_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/021_long_run_all_diseases_run')
-#consumables_results_folder = Path('./outputs/sakshi.mohan@york.ac.uk/impact_of_consumables_scenarios-2024-06-11T204007Z/')
-# TODO When the costing module is ready the above results_folder should be the same for the calculation of all costs
-
-# check can read results from draw=0, run=0
-#log_equipment = load_pickled_dataframes(equipment_results_folder, 0, 0)
-
-# look at one log (so can decide what to extract)
-log = load_pickled_dataframes(results_folder, 0, 0)
-
-# get basic information about the results
-info = get_scenario_info(results_folder)
-
-# 1) Extract the parameters that have varied over the set of simulations
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
+info = get_scenario_info(results_folder) # get basic information about the results
+# Extract the parameters that have varied over the set of simulations
 params = extract_params(results_folder)
 final_year_of_simulation = max(log['tlo.simulation']['info']['date']).year
 first_year_of_simulation = min(log['tlo.simulation']['info']['date']).year
 draws = params.index.unique().tolist() # list of draws
 runs = range(0, info['runs_per_draw'])
 years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
+population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
 
 # Load cost input files
 #------------------------
@@ -93,7 +84,8 @@ def drop_outside_period(_df):
 
 # Extract districts and facility levels from the Master Facility List
 mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
-districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District'])
+district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[['District_Num', 'District']].drop_duplicates()
+district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
 fac_levels = set(mfl.Facility_Level)
 
 # Overall cost assumptions
@@ -114,7 +106,6 @@ def drop_outside_period(_df):
 year_HR_scaling_by_level_and_officer_type = params[params.module_param == 'HealthSystem:year_HR_scaling_by_level_and_officer_type'].reset_index()
 yearly_HR_scaling_mode  = params[params.module_param == 'HealthSystem:yearly_HR_scaling_mode'].reset_index()
 
-# TODO add the following parameters to estimate HR availability per year - HealthSystem:yearly_HR_scaling_mode, HealthSystem:HR_scaling_by_level_and_officer_type_mode, HealthSystem:year_HR_scaling_by_level_and_officer_type
 hr_df_columns = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv").columns.drop(['Facility_ID', 'Officer_Category'])
 facilities = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Facility_ID'].unique().tolist()
 officer_categories = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Officer_Category'].unique().tolist()
@@ -361,7 +352,7 @@ def get_counts_of_items_requested(_df):
 # While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon
 # an empirical estimate based on OpenLMIS data
 # Estimate the stock to dispensed ratio from OpenLMIS data
-lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+lmis_consumable_usage = pd.read_csv(path_for_consumable_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
 # Collapse individual facilities
 lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
 df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
@@ -472,9 +463,9 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
     for r in runs:
         print(f"Now processing draw {d} and run {r}")
         # Extract a list of equipment which was used at each facility level within each district
-        equipment_used = {district: {level: [] for level in fac_levels} for district in districts} # create a dictionary with a key for each district and facility level
+        equipment_used = {district: {level: [] for level in fac_levels} for district in list(district_dict.values())} # create a dictionary with a key for each district and facility level
         list_of_equipment_used_by_current_draw_and_run = list_of_equipment_used_by_draw_and_run[(d, r)].reset_index()
-        for dist in districts:
+        for dist in list(district_dict.values()):
             for level in fac_levels:
                 equipment_used_subset = list_of_equipment_used_by_current_draw_and_run[(list_of_equipment_used_by_current_draw_and_run['District'] == dist) & (list_of_equipment_used_by_current_draw_and_run['Facility_Level'] == level)]
                 equipment_used_subset.columns = ['District', 'Facility_Level', 'EquipmentEverUsed']
@@ -1331,7 +1322,7 @@ def convert_dict_to_dataframe(_dict):
 # While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon
 # an empirical estimate based on OpenLMIS data
 # Estimate the stock to dispensed ratio from OpenLMIS data
-lmis_consumable_usage = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+lmis_consumable_usage = pd.read_csv(path_for_consumable_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
 # Collapse individual facilities
 lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
 df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
@@ -1377,7 +1368,7 @@ def convert_dict_to_dataframe(_dict):
 full_cons_cost_df.to_csv(figurespath / 'consumables_cost_220824.csv')
 
 # Import data for plotting
-tlo_lmis_mapping = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, encoding="ISO-8859-1")[['item_code', 'module_name', 'consumable_name_tlo']]
+tlo_lmis_mapping = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, encoding="ISO-8859-1")[['item_code', 'module_name', 'consumable_name_tlo']]
 tlo_lmis_mapping = tlo_lmis_mapping[~tlo_lmis_mapping['item_code'].duplicated(keep='first')]
 full_cons_cost_df = pd.merge(full_cons_cost_df, tlo_lmis_mapping, on = 'item_code', how = 'left', validate = "1:1")
 full_cons_cost_df['total_cost_perfect_availability'] = full_cons_cost_df['cost_dispensed_stock_perfect_availability'] + full_cons_cost_df['cost_excess_stock_perfect_availability']

From fbb7b88d5360984cc5ced96bb7b4c1726cfa6d84 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 22 Oct 2024 18:37:30 +0100
Subject: [PATCH 126/230] update staff count estimation - use
 healthsystem.summary logger

---
 src/scripts/costing/costing.py | 113 ++++++++++++++-------------------
 1 file changed, 48 insertions(+), 65 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 9db3a15abc..2a80b51319 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -48,10 +48,19 @@
 
 # Declare period for which the results will be generated (defined inclusively)
 TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31))
+
+# Useful common functions
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
     return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
 
+def melt_model_output_draws_and_runs(_df, id_vars):
+    multi_index = pd.MultiIndex.from_tuples(_df.columns)
+    _df.columns = multi_index
+    melted_df = pd.melt(_df, id_vars=id_vars).rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
+    return melted_df
+
+
 # %% Gathering basic information
 # Load result files
 #-------------------
@@ -86,6 +95,7 @@ def drop_outside_period(_df):
 mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
 district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[['District_Num', 'District']].drop_duplicates()
 district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
+facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level']))
 fac_levels = set(mfl.Facility_Level)
 
 # Overall cost assumptions
@@ -100,67 +110,42 @@ def drop_outside_period(_df):
 hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str) # create column for merging with model log
 hr_annual_salary = hr_annual_salary.rename({'Value':'Annual_Salary'}, axis = 1)
 
-# Load scenario staffing level for each year and draw
-use_funded_or_actual_staffing = params[params.module_param == 'HealthSystem:use_funded_or_actual_staffing'].reset_index()
-HR_scaling_by_level_and_officer_type_mode  = params[params.module_param == 'HealthSystem:HR_scaling_by_level_and_officer_type_mode'].reset_index()
-year_HR_scaling_by_level_and_officer_type = params[params.module_param == 'HealthSystem:year_HR_scaling_by_level_and_officer_type'].reset_index()
-yearly_HR_scaling_mode  = params[params.module_param == 'HealthSystem:yearly_HR_scaling_mode'].reset_index()
+# Get staffing level for each year and draw
+def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series:
+    """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
+    _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
+    _df.index.name = 'year'
+
+    def change_to_standard_flattened_index_format(col):
+        parts = col.split("_", 3)  # Split by "_" only up to 3 parts
+        if len(parts) > 2:
+            return parts[0] + "=" + parts[1] + "|" + parts[2] + "=" + parts[3]  # Rejoin with "I" at the second occurrence
+        return col  # If there's no second underscore, return the string as it is
+    _df.columns = [change_to_standard_flattened_index_format(col) for col in _df.columns]
 
-hr_df_columns = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv").columns.drop(['Facility_ID', 'Officer_Category'])
-facilities = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Facility_ID'].unique().tolist()
-officer_categories = pd.read_csv(resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv")['Officer_Category'].unique().tolist()
-staff_count = pd.DataFrame(columns = hr_df_columns, index=pd.MultiIndex.from_product([draws, years, facilities, officer_categories], names=['draw', 'year', 'Facility_ID ', 'Officer_Category'])) # Create the empty DataFrame staff_count with multi-level index ['draw', 'year']
+    return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
 
-for d in draws:
-    year_of_switch = (
-        year_HR_scaling_by_level_and_officer_type.loc[
-            year_HR_scaling_by_level_and_officer_type.draw == d, 'value'
-        ].iloc[0] if not year_HR_scaling_by_level_and_officer_type.loc[
-            year_HR_scaling_by_level_and_officer_type.draw == d, 'value'
-        ].empty else final_year_of_simulation
-    )
-    chosen_hr_scenario = use_funded_or_actual_staffing.loc[use_funded_or_actual_staffing.draw == d,'value'].iloc[0] if not use_funded_or_actual_staffing.loc[use_funded_or_actual_staffing.draw == d,'value'].empty else ''
-    condition_draw = staff_count.index.get_level_values('draw') == d  # Condition for draw
-    condition_before_switch = staff_count.index.get_level_values('year') < year_of_switch  # Condition for year
-
-    for year in years:
-        condition_draw = staff_count.index.get_level_values('draw') == d  # Condition for draw
-        condition_year = staff_count.index.get_level_values('year') == year  # Condition for the specific year
-
-        if year < year_of_switch:
-            if chosen_hr_scenario == '':
-                new_data = pd.read_csv(
-                    resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
-                )
-            else:
-                new_data = pd.read_csv(
-                    resourcefilepath / 'healthsystem' / 'human_resources' / f'{chosen_hr_scenario}' / 'ResourceFile_Daily_Capabilities.csv'
-                )  # Use the chosen HR scenario
-        else:
-            if chosen_hr_scenario == '':
-                new_data = pd.read_csv(
-                    resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
-                )  # If missing default to reading actual capabilities
-            else:
-                new_data = pd.read_csv(
-                    resourcefilepath / "healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
-                )  # If missing default to reading actual capabilities
-
-        # Set the 'draw' and 'year' in new_data
-        new_data['draw'] = d
-        new_data['year'] = year
-        new_data = new_data.set_index(['draw', 'year', 'Facility_ID', 'Officer_Category'])
-
-        # Replace empty values in staff_count with values from new_data
-        staff_count.loc[condition_draw & condition_year] = staff_count.loc[
-            condition_draw & condition_year].fillna(new_data)
-
-staff_count_by_level_and_officer_type = staff_count.groupby(['draw', 'year', 'Facility_Level', 'Officer_Category'])[
-    'Staff_Count'].sum().reset_index()
-staff_count_by_level_and_officer_type['Facility_Level'] = staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
+# Staff count by Facility ID
+available_staff_count_by_facid_and_officertype = extract_results(
+    Path(results_folder),
+    module='tlo.methods.healthsystem.summary',
+    key='number_of_hcw_staff',
+    custom_generate_series=get_staff_count_by_facid_and_officer_type,
+    do_scaling=True,
+)
+
+# Update above series to get staff count by Facility_Level
+available_staff_count_by_facid_and_officertype = available_staff_count_by_facid_and_officertype.reset_index().rename(columns= {'FacilityID': 'Facility_ID', 'Officer': 'OfficerType'})
+available_staff_count_by_facid_and_officertype['Facility_ID'] = pd.to_numeric(available_staff_count_by_facid_and_officertype['Facility_ID'])
+available_staff_count_by_facid_and_officertype['Facility_Level'] = available_staff_count_by_facid_and_officertype['Facility_ID'].map(facility_id_levels_dict)
+idx = pd.IndexSlice
+available_staff_count_by_level_and_officer_type = available_staff_count_by_facid_and_officertype.drop(columns = [idx['Facility_ID']]).groupby([idx['year'], idx['Facility_Level'], idx['OfficerType']]).sum()
+available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs(available_staff_count_by_level_and_officer_type.reset_index(), id_vars= ['year', 'Facility_Level', 'OfficerType'])
+available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type['Facility_Level'].astype(str) # make sure facility level is stored as string
+available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop(available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type['Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
 
 # Check if any cadres were not utilised at particular levels of care in the simulation
-def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
+def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
     """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
     _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
     _df.index.name = 'year'
@@ -170,29 +155,27 @@ def expand_capacity_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Ser
     Path(results_folder),
     module='tlo.methods.healthsystem.summary',
     key='Capacity_By_OfficerType_And_FacilityLevel',
-    custom_generate_series=expand_capacity_by_officer_type_and_facility_level,
+    custom_generate_series=get_capacity_used_by_officer_type_and_facility_level,
     do_scaling=False,
 ) #, only_mean=True, collapse_columns=True
 
 # Prepare capacity used dataframe to be multiplied by staff count
 average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False)
+# TODO see if cadre-level combinations should be chosen by year
 average_capacity_used_by_cadre_and_level.reset_index(drop=True) # Flatten multi=index column
 average_capacity_used_by_cadre_and_level = average_capacity_used_by_cadre_and_level.melt(id_vars=['OfficerType', 'FacilityLevel'],
                         var_name=['draw', 'run'],
                         value_name='capacity_used')
-# Unstack to make it look like a nice table
-average_capacity_used_by_cadre_and_level['OfficerType_FacilityLevel'] = 'Officer_Type=' + average_capacity_used_by_cadre_and_level['OfficerType'].astype(str) + '|Facility_Level=' + average_capacity_used_by_cadre_and_level['FacilityLevel'].astype(str)
-list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType_FacilityLevel', 'draw', 'run']]
-print(f"Out of {len(average_capacity_used_by_cadre_and_level.OfficerType_FacilityLevel.unique())} cadre and level combinations available, {len(list_of_cadre_and_level_combinations_used.OfficerType_FacilityLevel.unique())} are used across the simulations")
+list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType', 'FacilityLevel', 'draw', 'run']]
+print(f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations")
 
 # Subset scenario staffing level to only include cadre-level combinations used in the simulation
-staff_count_by_level_and_officer_type['OfficerType_FacilityLevel'] = 'Officer_Type=' + staff_count_by_level_and_officer_type['Officer_Category'].astype(str) + '|Facility_Level=' + staff_count_by_level_and_officer_type['Facility_Level'].astype(str)
-used_staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw', 'OfficerType_FacilityLevel'], how = 'right', validate = 'm:m')
+used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, left_on = ['draw','run','OfficerType', 'Facility_Level'],
+                                                                                         right_on = ['draw','run','OfficerType', 'FacilityLevel'], how = 'right', validate = 'm:m')
 
 # Calculate various components of HR cost
 # 1.1 Salary cost for current total staff
 #---------------------------------------------------------------------------------------------------------------
-staff_count_by_level_and_officer_type = staff_count_by_level_and_officer_type.drop(staff_count_by_level_and_officer_type[staff_count_by_level_and_officer_type.Facility_Level == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
 salary_for_all_staff = pd.merge(staff_count_by_level_and_officer_type[['draw', 'year', 'OfficerType_FacilityLevel', 'Staff_Count']],
                                      hr_annual_salary[['OfficerType_FacilityLevel', 'Annual_Salary']], on = ['OfficerType_FacilityLevel'], how = "left", validate = 'm:1')
 salary_for_all_staff['Cost'] = salary_for_all_staff['Annual_Salary'] * salary_for_all_staff['Staff_Count']

From b08ce484fd89515f47762bbe48daf28ca7fb3b7c Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 23 Oct 2024 20:08:08 +0100
Subject: [PATCH 127/230] update hr and consumable costs to be calculated by
 run.

---
 resources/costing/ResourceFile_Costing.xlsx |   4 +-
 src/scripts/costing/costing.py              | 164 ++++++++++----------
 2 files changed, 87 insertions(+), 81 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index bb397baa62..cd1be2a2ad 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:007bd40f36a257b5d38942990c021356ba4c6b65c7664516253ea8dcf823b36a
-size 4269736
+oid sha256:c69de82e8e03dab76f8ba05c16634a667396b0a4a78dca24763b56039c8adf57
+size 4269628
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 2a80b51319..e8b961a01a 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -46,9 +46,6 @@
 if not os.path.exists(figurespath):
     os.makedirs(figurespath)
 
-# Declare period for which the results will be generated (defined inclusively)
-TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31))
-
 # Useful common functions
 def drop_outside_period(_df):
     """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
@@ -99,18 +96,43 @@ def melt_model_output_draws_and_runs(_df, id_vars):
 fac_levels = set(mfl.Facility_Level)
 
 # Overall cost assumptions
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
 discount_rate = 0.03
 
-#%% Calculate financial costs
-# 1. HR cost
-# Load annual salary by officer type and facility level
+# Read all cost parameters
+#---------------------------------------
+# Read parameters for HR costs
 hr_cost_parameters = workbook_cost["human_resources"]
-hr_cost_parameters['Facility_Level'] =  hr_cost_parameters['Facility_Level'].astype(str)
-hr_annual_salary = hr_cost_parameters[hr_cost_parameters['Parameter_name'] == 'salary_usd']
-hr_annual_salary['OfficerType_FacilityLevel'] = 'Officer_Type=' + hr_annual_salary['Officer_Category'].astype(str) + '|Facility_Level=' + hr_annual_salary['Facility_Level'].astype(str) # create column for merging with model log
-hr_annual_salary = hr_annual_salary.rename({'Value':'Annual_Salary'}, axis = 1)
+hr_cost_parameters['Facility_Level'] =  hr_cost_parameters['Facility_Level'].astype(str) # Store Facility_Level as string
+
+# Read parameters for consumables costs
+# Load consumables cost data
+unit_price_consumable = workbook_cost["consumables"]
+unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
+unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
+unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
+
+# CALCULATE FINANCIAL COSTS
+#%%
+# 1. HR cost
+#------------------------
+# Define a function to merge unit cost data with model outputs
+def merge_cost_and_model_data(cost_df, model_df, varnames):
+    merged_df = model_df.copy()
+    for varname in varnames:
+        new_cost_df = cost_df[cost_df['Parameter_name'] == varname][['OfficerType', 'Facility_Level', 'Value']]
+        new_cost_df = new_cost_df.rename(columns={"Value": varname})
+        if ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all()):
+            merged_df[varname] = new_cost_df[varname].mean()
+        elif ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all() == False):
+            merged_df = pd.merge(merged_df, new_cost_df[['Facility_Level',varname]], on=['Facility_Level'], how="left")
+        elif ((new_cost_df['OfficerType'] == 'All').all() == False) and ((new_cost_df['Facility_Level'] == 'All').all()):
+            merged_df = pd.merge(merged_df, new_cost_df[['OfficerType',varname]], on=['OfficerType'], how="left")
+        else:
+            merged_df = pd.merge(merged_df, new_cost_df, on=['OfficerType', 'Facility_Level'], how="left")
+    return merged_df
 
-# Get staffing level for each year and draw
+# Get available staff count for each year and draw
 def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series:
     """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
     _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
@@ -143,8 +165,11 @@ def change_to_standard_flattened_index_format(col):
 available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs(available_staff_count_by_level_and_officer_type.reset_index(), id_vars= ['year', 'Facility_Level', 'OfficerType'])
 available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type['Facility_Level'].astype(str) # make sure facility level is stored as string
 available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop(available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type['Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
+available_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
 
-# Check if any cadres were not utilised at particular levels of care in the simulation
+# Get list of cadres which were utilised in each run to get the count of staff used in the simulation
+# Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and
+# not the amount of time which was used
 def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
     """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
     _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
@@ -157,7 +182,7 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S
     key='Capacity_By_OfficerType_And_FacilityLevel',
     custom_generate_series=get_capacity_used_by_officer_type_and_facility_level,
     do_scaling=False,
-) #, only_mean=True, collapse_columns=True
+)
 
 # Prepare capacity used dataframe to be multiplied by staff count
 average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False)
@@ -168,55 +193,38 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S
                         value_name='capacity_used')
 list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType', 'FacilityLevel', 'draw', 'run']]
 print(f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations")
+list_of_cadre_and_level_combinations_used = list_of_cadre_and_level_combinations_used.rename(columns = {'FacilityLevel':'Facility_Level'})
 
 # Subset scenario staffing level to only include cadre-level combinations used in the simulation
-used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, left_on = ['draw','run','OfficerType', 'Facility_Level'],
-                                                                                         right_on = ['draw','run','OfficerType', 'FacilityLevel'], how = 'right', validate = 'm:m')
+used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw','run','OfficerType', 'Facility_Level'], how = 'right', validate = 'm:m')
+used_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
 
 # Calculate various components of HR cost
 # 1.1 Salary cost for current total staff
 #---------------------------------------------------------------------------------------------------------------
-salary_for_all_staff = pd.merge(staff_count_by_level_and_officer_type[['draw', 'year', 'OfficerType_FacilityLevel', 'Staff_Count']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Annual_Salary']], on = ['OfficerType_FacilityLevel'], how = "left", validate = 'm:1')
-salary_for_all_staff['Cost'] = salary_for_all_staff['Annual_Salary'] * salary_for_all_staff['Staff_Count']
-total_salary_for_all_staff = salary_for_all_staff.groupby(['draw', 'year'])['Cost'].sum()
+salary_for_all_staff = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = available_staff_count_by_level_and_officer_type,
+                                                     varnames = ['salary_usd'])
+salary_for_all_staff['Cost'] = salary_for_all_staff['salary_usd'] * salary_for_all_staff['staff_count']
+total_salary_for_all_staff = salary_for_all_staff.groupby(['draw', 'run', 'year'])['Cost'].sum()
 
 # 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
 #---------------------------------------------------------------------------------------------------------------
-used_staff_count_by_level_and_officer_type = used_staff_count_by_level_and_officer_type.drop(used_staff_count_by_level_and_officer_type[used_staff_count_by_level_and_officer_type.Facility_Level == '5'].index)
-salary_for_staff_used_in_scenario = pd.merge(used_staff_count_by_level_and_officer_type[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Staff_Count']],
-                                     hr_annual_salary[['OfficerType_FacilityLevel', 'Annual_Salary']], on = ['OfficerType_FacilityLevel'], how = "left")
-salary_for_staff_used_in_scenario['Cost'] = salary_for_staff_used_in_scenario['Annual_Salary'] * salary_for_staff_used_in_scenario['Staff_Count']
-salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
-salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
-salary_for_staff_used_in_scenario  = summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
-total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.groupby(['year']).sum()
+salary_for_staff_used_in_scenario = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                     varnames = ['salary_usd'])
+salary_for_staff_used_in_scenario['Cost'] = salary_for_staff_used_in_scenario['salary_usd'] * salary_for_staff_used_in_scenario['staff_count']
+total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.groupby(['draw', 'run', 'year'])['Cost'].sum()
+# summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
+#.set_index(['draw', 'run', 'year', 'OfficerType', 'Facility_Level']).unstack(level=['draw', 'run'])
+#salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
 
 # 1.3 Recruitment cost to fill gap created by attrition
 #---------------------------------------------------------------------------------------------------------------
-def merge_cost_and_model_data(cost_df, model_df, varnames):
-    merged_df = model_df.copy()
-    for varname in varnames:
-        new_cost_df = cost_df[cost_df['Parameter_name'] == varname][['Officer_Category', 'Facility_Level', 'Value']]
-        new_cost_df = new_cost_df.rename(columns={"Value": varname})
-        if ((new_cost_df['Officer_Category'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all()):
-            merged_df[varname] = new_cost_df[varname].mean()
-        elif ((new_cost_df['Officer_Category'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all() == False):
-            merged_df = pd.merge(merged_df, new_cost_df[['Facility_Level',varname]], on=['Facility_Level'], how="left")
-        elif ((new_cost_df['Officer_Category'] == 'All').all() == False) and ((new_cost_df['Facility_Level'] == 'All').all()):
-            merged_df = pd.merge(merged_df, new_cost_df[['Officer_Category',varname]], on=['Officer_Category'], how="left")
-        else:
-            merged_df = pd.merge(merged_df, new_cost_df, on=['Officer_Category', 'Facility_Level'], how="left")
-    return merged_df
-
 recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
-recruitment_cost['Cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['Staff_Count'] * \
+recruitment_cost['Cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['staff_count'] * \
                       recruitment_cost['recruitment_cost_per_person_recruited_usd']
-recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
-recruitment_cost = recruitment_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
-recruitment_cost  = summarize(recruitment_cost, only_mean = True, collapse_columns=True)
-total_recruitment_cost_for_attrited_workers = recruitment_cost.groupby(['year']).sum()
+recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'Facility_Level', 'OfficerType', 'Cost']]
+total_recruitment_cost_for_attrited_workers = recruitment_cost.groupby(['draw', 'run', 'year'])['Cost'].sum()
 
 # 1.4 Pre-service training cost to fill gap created by attrition
 #---------------------------------------------------------------------------------------------------------------
@@ -231,26 +239,23 @@ def merge_cost_and_model_data(cost_df, model_df, varnames):
                                                 preservice_training_cost['annual_attrition_rate']
 # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate
 # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure
-preservice_training_cost['Cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['Staff_Count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
-preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
-preservice_training_cost = preservice_training_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
-preservice_training_cost  = summarize(preservice_training_cost, only_mean = True, collapse_columns=True)
-preservice_training_cost_for_attrited_workers = preservice_training_cost.groupby(['year']).sum()
+preservice_training_cost['Cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['staff_count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
+preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'Cost']]
+preservice_training_cost_for_attrited_workers = preservice_training_cost.groupby(['draw', 'run', 'year'])['Cost'].sum()
 
 # 1.5 In-service training cost to train all staff
 #---------------------------------------------------------------------------------------------------------------
 inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_inservice_training_cost_usd'])
-inservice_training_cost['Cost'] = inservice_training_cost['Staff_Count'] * inservice_training_cost['annual_inservice_training_cost_usd']
-inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType_FacilityLevel', 'Cost']].set_index(['draw', 'run', 'year', 'OfficerType_FacilityLevel']).unstack(level=['draw', 'run'])
-inservice_training_cost = inservice_training_cost.apply(lambda x: pd.to_numeric(x, errors='coerce'))
-inservice_training_cost  = summarize(inservice_training_cost, only_mean = True, collapse_columns=True)
-inservice_training_cost_for_all_staff = inservice_training_cost.groupby(['year']).sum()
+inservice_training_cost['Cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd']
+inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'Cost']]
+inservice_training_cost_for_all_staff = inservice_training_cost.groupby(['draw', 'run', 'year'])['Cost'].sum()
 
 # Create a dataframe to store financial costs
 # Function to melt and label the cost category
-def melt_and_label(df, label):
-    melted_df = pd.melt(df.reset_index(), id_vars='year')
+def melt_and_label(_df, label):
+    #melted_df = pd.melt(_df.reset_index(), id_vars='year')
+    melted_df = _df.reset_index()
     melted_df['Cost_Sub-category'] = label
     return melted_df
 
@@ -269,9 +274,10 @@ def melt_and_label(df, label):
     scenario_cost = pd.concat([scenario_cost, melted])
 scenario_cost['Cost_Category'] = 'Human Resources for Health'
 # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
-#scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
+
 # %%
 # 2. Consumables cost
+#------------------------
 def get_quantity_of_consumables_dispensed(results_folder):
     def get_counts_of_items_requested(_df):
         _df = drop_outside_period(_df)
@@ -293,14 +299,12 @@ def get_counts_of_items_requested(_df):
         # Convert to a pd.Series, as expected by the custom_generate_series function
         return combined_df.stack()
 
-    cons_req = summarize(
-        extract_results(
+    cons_req = extract_results(
             results_folder,
             module='tlo.methods.healthsystem.summary',
             key='Consumables',
             custom_generate_series=get_counts_of_items_requested,
             do_scaling=True)
-    )
 
     cons_dispensed = cons_req.xs("Available", level=2) # only keep actual dispensed amount, i.e. when available
     return cons_dispensed
@@ -308,25 +312,22 @@ def get_counts_of_items_requested(_df):
 
 consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder)
 consumables_dispensed = consumables_dispensed.reset_index().rename(columns = {'level_0': 'Item_Code', 'level_1': 'year'})
-consumables_dispensed[(     'year',      '')] = pd.to_datetime(consumables_dispensed[('year', '')]).dt.year # Extract only year from date
-consumables_dispensed[('Item_Code',      '')] = pd.to_numeric(consumables_dispensed[('Item_Code',      '')])
+consumables_dispensed[idx['year']] = pd.to_datetime(consumables_dispensed[idx['year']]).dt.year # Extract only year from date
+consumables_dispensed[idx['Item_Code']] = pd.to_numeric(consumables_dispensed[idx['Item_Code']])
+# Make a list of columns in the DataFrame pertaining to quantity dispensed
 quantity_columns = consumables_dispensed.columns.to_list()
-quantity_columns = [tup for tup in quantity_columns if ((tup != ('Item_Code', '')) & (tup != ('year', '')))] # exclude item_code and year columns
-
-# Load consumables cost data
-unit_price_consumable = workbook_cost["consumables"]
-unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
-unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
-unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
+quantity_columns = [tup for tup in quantity_columns if tup not in [('Item_Code', ''), ('year', '')]]
 
 # 2.1 Cost of consumables dispensed
 #---------------------------------------------------------------------------------------------------------------
 # Multiply number of items needed by cost of consumable
-cost_of_consumables_dispensed = consumables_dispensed.merge(unit_price_consumable, left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left')
+#consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge
+unit_price_consumable.columns = pd.MultiIndex.from_arrays([unit_price_consumable.columns, [''] * len(unit_price_consumable.columns)])
+cost_of_consumables_dispensed = consumables_dispensed.merge(unit_price_consumable, on = idx['Item_Code'], validate = 'm:1', how = 'left')
 price_column = 'Final_price_per_chosen_unit (USD, 2023)'
 cost_of_consumables_dispensed[quantity_columns] = cost_of_consumables_dispensed[quantity_columns].multiply(
     cost_of_consumables_dispensed[price_column], axis=0)
-total_cost_of_consumables_dispensed = cost_of_consumables_dispensed.groupby(('year', ''))[quantity_columns].sum()
+total_cost_of_consumables_dispensed = cost_of_consumables_dispensed.groupby(idx['year'])[quantity_columns].sum()
 total_cost_of_consumables_dispensed = total_cost_of_consumables_dispensed.reset_index()
 
 # 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
@@ -336,14 +337,17 @@ def get_counts_of_items_requested(_df):
 # an empirical estimate based on OpenLMIS data
 # Estimate the stock to dispensed ratio from OpenLMIS data
 lmis_consumable_usage = pd.read_csv(path_for_consumable_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+# TODO Generate a smaller version of this file
 # Collapse individual facilities
 lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
 df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
 df = df.loc[df.index.get_level_values('month') != "Aggregate"]
+# Opening balance in January is the closing balance for the month minus what was received during the month plus what was dispensed
 opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
                       df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
                       df.loc[df.index.get_level_values('month') == 'January', 'received']
 closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
+# the consumable inflow during the year is the opening balance in January + what was received throughout the year - what was transferred to the next year (i.e. closing bal of December)
 total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
                                          opening_bal_january.reset_index(level='month', drop=True) -\
                                          closing_bal_december.reset_index(level='month', drop=True)
@@ -353,7 +357,7 @@ def get_counts_of_items_requested(_df):
 # Edit outlier ratios
 inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
 inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
-average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean()
+average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean() # Use average where item-specific ratio is not available
 
 # Multiply number of items needed by cost of consumable
 inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
@@ -361,11 +365,12 @@ def get_counts_of_items_requested(_df):
 excess_stock_ratio = excess_stock_ratio.reset_index().rename(columns = {0: 'excess_stock_proportion_of_dispensed'})
 # TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
 cost_of_excess_consumables_stocked = consumables_dispensed.merge(unit_price_consumable, left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left')
+excess_stock_ratio.columns = pd.MultiIndex.from_arrays([excess_stock_ratio.columns, [''] * len(excess_stock_ratio.columns)]) # TODO convert this into a funciton
 cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.merge(excess_stock_ratio, left_on = 'Item_Code', right_on = 'item_code', validate = 'm:1', how = 'left')
 cost_of_excess_consumables_stocked.loc[cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1# TODO disaggregate the average by program
-cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[price_column], axis=0)
-cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked['excess_stock_proportion_of_dispensed'], axis=0)
-total_cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.groupby(('year', ''))[quantity_columns].sum()
+cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0)
+cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx['excess_stock_proportion_of_dispensed']], axis=0)
+total_cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.groupby(idx['year'])[quantity_columns].sum()
 total_cost_of_excess_consumables_stocked = total_cost_of_excess_consumables_stocked.reset_index()
 
 # Add to financial costs dataframe
@@ -373,8 +378,9 @@ def get_counts_of_items_requested(_df):
 def melt_and_label_consumables_cost(_df, label):
     multi_index = pd.MultiIndex.from_tuples(_df.columns)
     _df.columns = multi_index
-    melted_df = pd.melt(_df, id_vars='year').rename(columns = {'variable_0': 'draw', 'variable_1': 'stat'})
+    melted_df = pd.melt(_df, id_vars='year').rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
     melted_df['Cost_Sub-category'] = label
+    melted_df = melted_df.rename(columns = {'value': 'Cost'})
     return melted_df
 
 consumable_costs = [

From d77dc1744478f6a7db402c3fc993143ad3df1004 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 24 Oct 2024 18:02:31 +0100
Subject: [PATCH 128/230] update all costs to be in the same format and add two
 levels of disaggregation to the scenario costs - 'Facility_Level' & groups
 (such as consumable name) corresponding to the cost_category

---
 src/scripts/costing/costing.py | 225 +++++++++++++++++++--------------
 1 file changed, 131 insertions(+), 94 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index e8b961a01a..5b9face89d 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -75,12 +75,16 @@ def melt_model_output_draws_and_runs(_df, id_vars):
 info = get_scenario_info(results_folder) # get basic information about the results
 # Extract the parameters that have varied over the set of simulations
 params = extract_params(results_folder)
-final_year_of_simulation = max(log['tlo.simulation']['info']['date']).year
-first_year_of_simulation = min(log['tlo.simulation']['info']['date']).year
-draws = params.index.unique().tolist() # list of draws
+final_year_of_simulation = max(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
+first_year_of_simulation = min(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
+population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
+
+
+# Load basic simulation parameters
+#-------------------------------------
+draws = range(0, info['number_of_draws'])
 runs = range(0, info['runs_per_draw'])
 years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
-population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
 
 # Load cost input files
 #------------------------
@@ -112,6 +116,42 @@ def melt_model_output_draws_and_runs(_df, id_vars):
 unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
 unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
 
+# Load and prepare equipment cost parameters
+# Unit costs of equipment
+unit_cost_equipment = workbook_cost["equipment"]
+unit_cost_equipment = unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
+unit_cost_equipment = unit_cost_equipment[unit_cost_equipment['Item_code'].notna()] # drop empty row
+# Calculate necessary costs based on HSSP-III assumptions
+unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost']/(1+(1-(1+discount_rate)**(-row['Life span']+1))/discount_rate), axis=1) # Annuitised over the life span of the equipment assuming outlay at the beginning of the year
+unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
+unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
+unit_cost_equipment['major_corrective_maintenance_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
+# TODO consider discounting the other components
+# Quantity needed for each equipment by facility
+unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
+                                           'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual',  'major_corrective_maintenance_cost_annual',
+                                           'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
+unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
+unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
+
+unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
+                          i=['Item_code', 'Equipment_tlo', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual'],
+                          j='Facility_Level', suffix='(\d+|\w+)').reset_index()
+facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
+unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
+unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
+
+# Function to prepare cost dataframe ready to be merged across cross categories
+def retain_relevant_column_subset(_df, _category_specific_group):
+    columns_to_retain = ['draw', 'run', 'year', 'cost_subcategory', 'Facility_Level', _category_specific_group, 'cost']
+    _df = _df[columns_to_retain]
+    return _df
+def prepare_cost_dataframe(_df, _category_specific_group, _cost_category):
+    _df = _df.rename(columns = {_category_specific_group: 'cost_subgroup'})
+    _df['cost_category'] = _cost_category
+    return retain_relevant_column_subset(_df, 'cost_subgroup')
+
+
 # CALCULATE FINANCIAL COSTS
 #%%
 # 1. HR cost
@@ -204,15 +244,13 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S
 #---------------------------------------------------------------------------------------------------------------
 salary_for_all_staff = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = available_staff_count_by_level_and_officer_type,
                                                      varnames = ['salary_usd'])
-salary_for_all_staff['Cost'] = salary_for_all_staff['salary_usd'] * salary_for_all_staff['staff_count']
-total_salary_for_all_staff = salary_for_all_staff.groupby(['draw', 'run', 'year'])['Cost'].sum()
+salary_for_all_staff['cost'] = salary_for_all_staff['salary_usd'] * salary_for_all_staff['staff_count']
 
 # 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
 #---------------------------------------------------------------------------------------------------------------
 salary_for_staff_used_in_scenario = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['salary_usd'])
-salary_for_staff_used_in_scenario['Cost'] = salary_for_staff_used_in_scenario['salary_usd'] * salary_for_staff_used_in_scenario['staff_count']
-total_salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.groupby(['draw', 'run', 'year'])['Cost'].sum()
+salary_for_staff_used_in_scenario['cost'] = salary_for_staff_used_in_scenario['salary_usd'] * salary_for_staff_used_in_scenario['staff_count']
 # summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
 #.set_index(['draw', 'run', 'year', 'OfficerType', 'Facility_Level']).unstack(level=['draw', 'run'])
 #salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
@@ -221,10 +259,9 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S
 #---------------------------------------------------------------------------------------------------------------
 recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
-recruitment_cost['Cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['staff_count'] * \
+recruitment_cost['cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['staff_count'] * \
                       recruitment_cost['recruitment_cost_per_person_recruited_usd']
-recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'Facility_Level', 'OfficerType', 'Cost']]
-total_recruitment_cost_for_attrited_workers = recruitment_cost.groupby(['draw', 'run', 'year'])['Cost'].sum()
+recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'Facility_Level', 'OfficerType', 'cost']]
 
 # 1.4 Pre-service training cost to fill gap created by attrition
 #---------------------------------------------------------------------------------------------------------------
@@ -239,41 +276,40 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S
                                                 preservice_training_cost['annual_attrition_rate']
 # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate
 # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure
-preservice_training_cost['Cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['staff_count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
-preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'Cost']]
-preservice_training_cost_for_attrited_workers = preservice_training_cost.groupby(['draw', 'run', 'year'])['Cost'].sum()
+preservice_training_cost['cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['staff_count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
+preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
 
 # 1.5 In-service training cost to train all staff
 #---------------------------------------------------------------------------------------------------------------
 inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
                                                      varnames = ['annual_inservice_training_cost_usd'])
-inservice_training_cost['Cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd']
-inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'Cost']]
-inservice_training_cost_for_all_staff = inservice_training_cost.groupby(['draw', 'run', 'year'])['Cost'].sum()
+inservice_training_cost['cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd']
+inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
+# TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
 
-# Create a dataframe to store financial costs
+# 1.6 Store all HR costs in one standard format dataframe
+#---------------------------------------------------------------------------------------------------------------
 # Function to melt and label the cost category
-def melt_and_label(_df, label):
-    #melted_df = pd.melt(_df.reset_index(), id_vars='year')
-    melted_df = _df.reset_index()
-    melted_df['Cost_Sub-category'] = label
-    return melted_df
+def label_rows_of_cost_dataframe(_df, label_var, label):
+    _df = _df.reset_index()
+    _df[label_var] = label
+    return _df
 
-# Initialize scenario_cost with the salary data
-scenario_cost = melt_and_label(total_salary_for_staff_used_in_scenario, 'salary_for_used_cadres')
+# Initialize HR with the salary data
+human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff_used_in_scenario, 'cost_subcategory', 'salary_for_used_cadres'), 'OfficerType')
 
 # Concatenate additional cost categories
 additional_costs = [
-    (total_recruitment_cost_for_attrited_workers, 'recruitment_cost_for_attrited_workers'),
-    (preservice_training_cost_for_attrited_workers, 'preservice_training_cost_for_attrited_workers'),
-    (inservice_training_cost_for_all_staff, 'inservice_training_cost_for_all_staff')
+    (recruitment_cost , 'recruitment_cost_for_attrited_workers'),
+    (preservice_training_cost, 'preservice_training_cost_for_attrited_workers'),
+    (inservice_training_cost, 'inservice_training_cost_for_all_staff')
 ]
 # Iterate through additional costs, melt and concatenate
 for df, label in additional_costs:
-    melted = melt_and_label(df, label)
-    scenario_cost = pd.concat([scenario_cost, melted])
-scenario_cost['Cost_Category'] = 'Human Resources for Health'
-# TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
+    labelled_df = retain_relevant_column_subset(label_rows_of_cost_dataframe(df, 'cost_subcategory', label), 'OfficerType')
+    human_resource_costs = pd.concat([human_resource_costs, labelled_df])
+
+human_resource_costs = prepare_cost_dataframe(human_resource_costs, _category_specific_group = 'OfficerType', _cost_category = 'human resources for health')
 
 # %%
 # 2. Consumables cost
@@ -327,8 +363,6 @@ def get_counts_of_items_requested(_df):
 price_column = 'Final_price_per_chosen_unit (USD, 2023)'
 cost_of_consumables_dispensed[quantity_columns] = cost_of_consumables_dispensed[quantity_columns].multiply(
     cost_of_consumables_dispensed[price_column], axis=0)
-total_cost_of_consumables_dispensed = cost_of_consumables_dispensed.groupby(idx['year'])[quantity_columns].sum()
-total_cost_of_consumables_dispensed = total_cost_of_consumables_dispensed.reset_index()
 
 # 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
 #---------------------------------------------------------------------------------------------------------------
@@ -370,62 +404,41 @@ def get_counts_of_items_requested(_df):
 cost_of_excess_consumables_stocked.loc[cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1# TODO disaggregate the average by program
 cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0)
 cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx['excess_stock_proportion_of_dispensed']], axis=0)
-total_cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.groupby(idx['year'])[quantity_columns].sum()
-total_cost_of_excess_consumables_stocked = total_cost_of_excess_consumables_stocked.reset_index()
 
-# Add to financial costs dataframe
+# 2.3 Store all HR costs in one standard format dataframe
+#---------------------------------------------------------------------------------------------------------------
 # Function to melt and label the cost category
+consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
+                             encoding="ISO-8859-1")[['item_code', 'consumable_name_tlo']]
+consumables_dict = consumables_dict.rename(columns = {'item_code': 'Item_Code'})
+consumables_dict = dict(zip(consumables_dict['Item_Code'], consumables_dict['consumable_name_tlo']))
 def melt_and_label_consumables_cost(_df, label):
     multi_index = pd.MultiIndex.from_tuples(_df.columns)
     _df.columns = multi_index
-    melted_df = pd.melt(_df, id_vars='year').rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
-    melted_df['Cost_Sub-category'] = label
-    melted_df = melted_df.rename(columns = {'value': 'Cost'})
+    # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric (these are the (draw,run) columns with cost values)
+    selected_columns = [col for col in _df.columns if
+                        (col[0] in ['Item_Code', 'year']) or (isinstance(col[0], int) and isinstance(col[1], int))]
+    _df = _df[selected_columns]    # Subset the dataframe with the selected columns
+
+    # reshape dataframe and assign 'draw' and 'run' as the correct column headers
+    melted_df = pd.melt(_df, id_vars=['year', 'Item_Code']).rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
+    # Replace item_code with consumable_name_tlo
+    melted_df['consumable'] = melted_df['Item_Code'].map(consumables_dict)
+    melted_df['cost_subcategory'] = label
+    melted_df['Facility_Level'] = 'all' #TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level
+    melted_df = melted_df.rename(columns = {'value': 'cost'})
     return melted_df
 
-consumable_costs = [
-    (total_cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'),
-    (total_cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'),
-]
-# Iterate through additional costs, melt and concatenate
-for df, label in consumable_costs:
-    new_df = melt_and_label_consumables_cost(df, label)
-    scenario_cost = pd.concat([scenario_cost, new_df], ignore_index=True)
-scenario_cost.loc[scenario_cost.Cost_Category.isna(), 'Cost_Category'] = 'Medical consumables'
-#scenario_cost['value'] = scenario_cost['value'].apply(pd.to_numeric, errors='coerce')
-#scenario_cost.to_csv(figurespath / 'scenario_cost.csv')
-
+cost_of_consumables_dispensed = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable')
+cost_of_excess_consumables_stocked = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable')
+consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked])
+consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
 
 # %%
 # 3. Equipment cost
-# Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it been used in the simulation
-# Load unit costs of equipment
-unit_cost_equipment = workbook_cost["equipment"]
-unit_cost_equipment =   unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
-# Calculate necessary costs based on HSSP-III assumptions
-unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost']/(1+(1-(1+discount_rate)**(-row['Life span']+1))/discount_rate), axis=1) # 10% of the items over 8 years
-unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
-unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
-unit_cost_equipment['upfront_repair_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
-# TODO consider discounting the other components
-
-unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
-                                           'service_fee_annual', 'spare_parts_annual',  'upfront_repair_cost_annual', 'replacement_cost_annual',
-                                           'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
-unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
-unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
-unit_cost_equipment = unit_cost_equipment[unit_cost_equipment.Item_code.notna()]
-
-unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
-                          i=['Item_code', 'Equipment_tlo', 'service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual'],
-                          j='Facility_Level', suffix='(\d+|\w+)').reset_index()
-facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
-unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
-unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
-#unit_cost_equipment_small  = unit_cost_equipment[['Item_code', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']]
-#equipment_cost_dict = unit_cost_equipment_small.groupby('Facility_Level').apply(lambda x: x.to_dict(orient='records')).to_dict()
-
-# Get list of equipment used by district and level
+#--------------------------------------------
+# Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the simulation
+# Get list of equipment used in the simulation by district and level
 def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
     """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
     _df = _df.pivot_table(index=['District', 'Facility_Level'],
@@ -480,34 +493,57 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
         equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left')
         equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0
 
+        # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as that recorded for level 2
+        def update_itemuse_for_level1b_using_level2_data(_df):
+            # Create a list of District and Item_code combinations for which use == True
+            list_of_equipment_used_at_level2 = _df[(_df.Facility_Level == '2') & (_df['whether_item_was_used'] == True)][['District', 'Item_code']]
+            # Now update the 'whether_item_was_used' for 'Facility_Level' == '1b' to match that of level '2'
+            _df.loc[
+                (_df['Facility_Level'] == '1b') &
+                (_df[['District', 'Item_code']].apply(tuple, axis=1).isin(
+                    list_of_equipment_used_at_level2.apply(tuple, axis=1))),
+                'whether_item_was_used'
+            ] = True
+
+            return _df
+
+        equipment_df = update_itemuse_for_level1b_using_level2_data(equipment_df)
+
         # Merge the two datasets to calculate cost
-        equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity','service_fee_annual', 'spare_parts_annual', 'upfront_repair_cost_annual', 'replacement_cost_annual']],
+        equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual']],
                                   on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
-        categories_of_equipment_cost = ['replacement_cost', 'upfront_repair_cost', 'spare_parts', 'service_fee']
+        categories_of_equipment_cost = ['replacement_cost', 'service_fee', 'spare_parts', 'major_corrective_maintenance_cost']
         for cost_category in categories_of_equipment_cost:
             # Rename unit cost columns
             unit_cost_column = cost_category + '_annual_unit'
             equipment_cost = equipment_cost.rename(columns = {cost_category + '_annual':unit_cost_column })
             equipment_cost[cost_category + '_annual_total'] = equipment_cost[cost_category + '_annual_unit'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count']
-        #equipment_cost['total_equipment_cost_annual'] = equipment_cost[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum(axis = 1)
         equipment_cost['year'] = final_year_of_simulation - 1
         if equipment_cost_across_sim.empty:
-            equipment_cost_across_sim = equipment_cost.groupby('year')[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
+            equipment_cost_across_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
             equipment_cost_across_sim['draw'] = d
             equipment_cost_across_sim['run'] = r
         else:
-            equipment_cost_for_current_sim = equipment_cost.groupby('year')[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
+            equipment_cost_for_current_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
             equipment_cost_for_current_sim['draw'] = d
             equipment_cost_for_current_sim['run'] = r
             # Concatenate the results
             equipment_cost_across_sim = pd.concat([equipment_cost_across_sim, equipment_cost_for_current_sim], axis=0)
 
-equipment_costs = pd.melt(equipment_cost_across_sim,
-                  id_vars=['draw', 'run'],  # Columns to keep
+equipment_costs = pd.melt(equipment_cost_across_sim.reset_index(),
+                  id_vars=['draw', 'run', 'Facility_Level', 'Equipment_tlo'],  # Columns to keep
                   value_vars=[col for col in equipment_cost_across_sim.columns if col.endswith('_annual_total')],  # Columns to unpivot
-                  var_name='Cost_Sub-category',  # New column name for the 'sub-category' of cost
-                  value_name='value')  # New column name for the values
+                  var_name='cost_subcategory',  # New column name for the 'sub-category' of cost
+                  value_name='cost')  # New column name for the values
 
+# Assume that the annual costs are constant each year of the simulation
+equipment_costs = pd.concat([equipment_costs.assign(year=year) for year in years])
+# TODO If the logger is updated to include year, we may wish to calculate equipment costs by year - currently we assume the same annuitised equipment cost each year
+equipment_costs = equipment_costs.reset_index(drop=True)
+equipment_costs = equipment_costs.rename(columns = {'Equipment_tlo': 'Equipment'})
+equipment_costs = prepare_cost_dataframe(equipment_costs, _category_specific_group = 'Equipment', _cost_category = 'Medical Equipment')
+
+'''
 equipment_costs_summary = pd.concat(
     {
         'mean': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].mean(),
@@ -521,15 +557,16 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
                   value_vars=['mean', 'lower', 'upper'],  # Columns to unpivot
                   var_name='stat',  # New column name for the 'sub-category' of cost
                   value_name='value')
-equipment_costs_summary['Cost_Category'] = 'Equipment purchase and maintenance'
-# Assume that the annual costs are constant each year of the simulation
-equipment_costs_summary = pd.concat([equipment_costs_summary.assign(year=year) for year in years])
-equipment_costs_summary = equipment_costs_summary.reset_index(drop=True)
-scenario_cost = pd.concat([scenario_cost, equipment_costs_summary], ignore_index=True)
+'''
 
 # 4. Facility running costs
 # Average running costs by facility level and district times the number of facilities  in the simulation
 
+# %%
+# Store all costs in single dataframe
+#--------------------------------------------
+scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs], ignore_index=True)
+
 # Additional costs pertaining to simulation
 # IRS costs
 irs_coverage_rate = 0.8

From 1d63014a7a2c6cb45dc07120d10201564e3fed3a Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 24 Oct 2024 19:06:18 +0100
Subject: [PATCH 129/230] make cost estimation a function

---
 src/scripts/costing/costing.py | 1063 ++++++++++++++++----------------
 1 file changed, 543 insertions(+), 520 deletions(-)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 5b9face89d..8b4eb0631f 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -35,537 +35,563 @@
 # Print the start time of the script
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
-# Define a pathway to relevant folders
-outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-resourcefilepath = Path("./resources")
-path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables"
-costing_outputs_folder = Path('./outputs/costing')
-if not os.path.exists(costing_outputs_folder):
-    os.makedirs(costing_outputs_folder)
-figurespath = costing_outputs_folder / "figures"
-if not os.path.exists(figurespath):
-    os.makedirs(figurespath)
+def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path = None, draws = None, runs = None,
+                                     summarize: bool = False):
+    # Useful common functions
+    def drop_outside_period(_df):
+        """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
+        return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
+
+    def melt_model_output_draws_and_runs(_df, id_vars):
+        multi_index = pd.MultiIndex.from_tuples(_df.columns)
+        _df.columns = multi_index
+        melted_df = pd.melt(_df, id_vars=id_vars).rename(columns={'variable_0': 'draw', 'variable_1': 'run'})
+        return melted_df
+
+    # Define a relative pathway for relavant folders
+    path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables"
+
+    # %% Gathering basic information
+    # Load basic simulation parameters
+    #-------------------------------------
+    log = load_pickled_dataframes(results_folder, 0, 0)  # read from 1 draw and run
+    info = get_scenario_info(results_folder)  # get basic information about the results
+    if draws is None:
+        draws = range(0, info['number_of_draws'])
+    if runs is None:
+        runs = range(0, info['runs_per_draw'])
+    final_year_of_simulation = max(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
+    first_year_of_simulation = min(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
+    years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
+
+    # Load cost input files
+    #------------------------
+    # Load primary costing resourcefile
+    workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
+                                        sheet_name = None)
+
+    # Extract districts and facility levels from the Master Facility List
+    mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
+    district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[['District_Num', 'District']].drop_duplicates()
+    district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
+    facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level']))
+    fac_levels = set(mfl.Facility_Level)
+
+    # Overall cost assumptions
+    TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
+    discount_rate = 0.03
+
+    # Read all cost parameters
+    #---------------------------------------
+    # Read parameters for HR costs
+    hr_cost_parameters = workbook_cost["human_resources"]
+    hr_cost_parameters['Facility_Level'] =  hr_cost_parameters['Facility_Level'].astype(str) # Store Facility_Level as string
+
+    # Read parameters for consumables costs
+    # Load consumables cost data
+    unit_price_consumable = workbook_cost["consumables"]
+    unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
+    unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
+    unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
+
+    # Load and prepare equipment cost parameters
+    # Unit costs of equipment
+    unit_cost_equipment = workbook_cost["equipment"]
+    unit_cost_equipment = unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
+    unit_cost_equipment = unit_cost_equipment[unit_cost_equipment['Item_code'].notna()] # drop empty row
+    # Calculate necessary costs based on HSSP-III assumptions
+    unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost']/(1+(1-(1+discount_rate)**(-row['Life span']+1))/discount_rate), axis=1) # Annuitised over the life span of the equipment assuming outlay at the beginning of the year
+    unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
+    unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
+    unit_cost_equipment['major_corrective_maintenance_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
+    # TODO consider discounting the other components
+    # Quantity needed for each equipment by facility
+    unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
+                                               'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual',  'major_corrective_maintenance_cost_annual',
+                                               'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
+    unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
+    unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
+
+    unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
+                              i=['Item_code', 'Equipment_tlo', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual'],
+                              j='Facility_Level', suffix='(\d+|\w+)').reset_index()
+    facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
+    unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
+    unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
+
+    # Function to prepare cost dataframe ready to be merged across cross categories
+    def retain_relevant_column_subset(_df, _category_specific_group):
+        columns_to_retain = ['draw', 'run', 'year', 'cost_subcategory', 'Facility_Level', _category_specific_group, 'cost']
+        if 'cost_category' in _df.columns:
+            columns_to_retain.append('cost_category')
+        _df = _df[columns_to_retain]
+        return _df
+    def prepare_cost_dataframe(_df, _category_specific_group, _cost_category):
+        _df = _df.rename(columns = {_category_specific_group: 'cost_subgroup'})
+        _df['cost_category'] = _cost_category
+        return retain_relevant_column_subset(_df, 'cost_subgroup')
+
+
+    # CALCULATE ECONOMIC COSTS
+    #%%
+    # 1. HR cost
+    #------------------------
+    print("Now estimating HR costs...")
+    # Define a function to merge unit cost data with model outputs
+    def merge_cost_and_model_data(cost_df, model_df, varnames):
+        merged_df = model_df.copy()
+        for varname in varnames:
+            new_cost_df = cost_df[cost_df['Parameter_name'] == varname][['OfficerType', 'Facility_Level', 'Value']]
+            new_cost_df = new_cost_df.rename(columns={"Value": varname})
+            if ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all()):
+                merged_df[varname] = new_cost_df[varname].mean()
+            elif ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all() == False):
+                merged_df = pd.merge(merged_df, new_cost_df[['Facility_Level',varname]], on=['Facility_Level'], how="left")
+            elif ((new_cost_df['OfficerType'] == 'All').all() == False) and ((new_cost_df['Facility_Level'] == 'All').all()):
+                merged_df = pd.merge(merged_df, new_cost_df[['OfficerType',varname]], on=['OfficerType'], how="left")
+            else:
+                merged_df = pd.merge(merged_df, new_cost_df, on=['OfficerType', 'Facility_Level'], how="left")
+        return merged_df
+
+    # Get available staff count for each year and draw
+    def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series:
+        """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
+        _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
+        _df.index.name = 'year'
+
+        def change_to_standard_flattened_index_format(col):
+            parts = col.split("_", 3)  # Split by "_" only up to 3 parts
+            if len(parts) > 2:
+                return parts[0] + "=" + parts[1] + "|" + parts[2] + "=" + parts[3]  # Rejoin with "I" at the second occurrence
+            return col  # If there's no second underscore, return the string as it is
+        _df.columns = [change_to_standard_flattened_index_format(col) for col in _df.columns]
+
+        return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
+
+    # Staff count by Facility ID
+    available_staff_count_by_facid_and_officertype = extract_results(
+        Path(results_folder),
+        module='tlo.methods.healthsystem.summary',
+        key='number_of_hcw_staff',
+        custom_generate_series=get_staff_count_by_facid_and_officer_type,
+        do_scaling=True,
+    )
 
-# Useful common functions
-def drop_outside_period(_df):
-    """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
-    return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)])
+    # Update above series to get staff count by Facility_Level
+    available_staff_count_by_facid_and_officertype = available_staff_count_by_facid_and_officertype.reset_index().rename(columns= {'FacilityID': 'Facility_ID', 'Officer': 'OfficerType'})
+    available_staff_count_by_facid_and_officertype['Facility_ID'] = pd.to_numeric(available_staff_count_by_facid_and_officertype['Facility_ID'])
+    available_staff_count_by_facid_and_officertype['Facility_Level'] = available_staff_count_by_facid_and_officertype['Facility_ID'].map(facility_id_levels_dict)
+    idx = pd.IndexSlice
+    available_staff_count_by_level_and_officer_type = available_staff_count_by_facid_and_officertype.drop(columns = [idx['Facility_ID']]).groupby([idx['year'], idx['Facility_Level'], idx['OfficerType']]).sum()
+    available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs(available_staff_count_by_level_and_officer_type.reset_index(), id_vars= ['year', 'Facility_Level', 'OfficerType'])
+    available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type['Facility_Level'].astype(str) # make sure facility level is stored as string
+    available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop(available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type['Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
+    available_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
+
+    # Get list of cadres which were utilised in each run to get the count of staff used in the simulation
+    # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and
+    # not the amount of time which was used
+    def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
+        """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
+        _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
+        _df.index.name = 'year'
+        return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
+
+    annual_capacity_used_by_cadre_and_level = extract_results(
+        Path(results_folder),
+        module='tlo.methods.healthsystem.summary',
+        key='Capacity_By_OfficerType_And_FacilityLevel',
+        custom_generate_series=get_capacity_used_by_officer_type_and_facility_level,
+        do_scaling=False,
+    )
 
-def melt_model_output_draws_and_runs(_df, id_vars):
-    multi_index = pd.MultiIndex.from_tuples(_df.columns)
-    _df.columns = multi_index
-    melted_df = pd.melt(_df, id_vars=id_vars).rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
-    return melted_df
+    # Prepare capacity used dataframe to be multiplied by staff count
+    average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False)
+    # TODO see if cadre-level combinations should be chosen by year
+    average_capacity_used_by_cadre_and_level.reset_index(drop=True) # Flatten multi=index column
+    average_capacity_used_by_cadre_and_level = average_capacity_used_by_cadre_and_level.melt(id_vars=['OfficerType', 'FacilityLevel'],
+                            var_name=['draw', 'run'],
+                            value_name='capacity_used')
+    list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType', 'FacilityLevel', 'draw', 'run']]
+    print(f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations")
+    list_of_cadre_and_level_combinations_used = list_of_cadre_and_level_combinations_used.rename(columns = {'FacilityLevel':'Facility_Level'})
+
+    # Subset scenario staffing level to only include cadre-level combinations used in the simulation
+    used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw','run','OfficerType', 'Facility_Level'], how = 'right', validate = 'm:m')
+    used_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
+
+    # Calculate various components of HR cost
+    # 1.1 Salary cost for current total staff
+    #---------------------------------------------------------------------------------------------------------------
+    salary_for_all_staff = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = available_staff_count_by_level_and_officer_type,
+                                                         varnames = ['salary_usd'])
+    salary_for_all_staff['cost'] = salary_for_all_staff['salary_usd'] * salary_for_all_staff['staff_count']
+
+    # 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
+    #---------------------------------------------------------------------------------------------------------------
+    salary_for_staff_used_in_scenario = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                         varnames = ['salary_usd'])
+    salary_for_staff_used_in_scenario['cost'] = salary_for_staff_used_in_scenario['salary_usd'] * salary_for_staff_used_in_scenario['staff_count']
+    # summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
+    #.set_index(['draw', 'run', 'year', 'OfficerType', 'Facility_Level']).unstack(level=['draw', 'run'])
+    #salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
+
+    # 1.3 Recruitment cost to fill gap created by attrition
+    #---------------------------------------------------------------------------------------------------------------
+    recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                         varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
+    recruitment_cost['cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['staff_count'] * \
+                          recruitment_cost['recruitment_cost_per_person_recruited_usd']
+    recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'Facility_Level', 'OfficerType', 'cost']]
+
+    # 1.4 Pre-service training cost to fill gap created by attrition
+    #---------------------------------------------------------------------------------------------------------------
+    preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                         varnames = ['annual_attrition_rate',
+                                                                     'licensure_exam_passing_rate', 'graduation_rate',
+                                                                     'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
+                                                                     'preservice_training_cost_per_staff_recruited_usd'])
+    preservice_training_cost['Annual_cost_per_staff_recruited'] = preservice_training_cost['preservice_training_cost_per_staff_recruited_usd'] *\
+                                                    (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) *\
+                                                    (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) *\
+                                                    preservice_training_cost['annual_attrition_rate']
+    # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate
+    # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure
+    preservice_training_cost['cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['staff_count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
+    preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
+
+    # 1.5 In-service training cost to train all staff
+    #---------------------------------------------------------------------------------------------------------------
+    inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+                                                         varnames = ['annual_inservice_training_cost_usd'])
+    inservice_training_cost['cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd']
+    inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
+    # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
+
+    # 1.6 Store all HR costs in one standard format dataframe
+    #---------------------------------------------------------------------------------------------------------------
+    # Function to melt and label the cost category
+    def label_rows_of_cost_dataframe(_df, label_var, label):
+        _df = _df.reset_index()
+        _df[label_var] = label
+        return _df
+
+    # Initialize HR with the salary data
+    human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff_used_in_scenario, 'cost_subcategory', 'salary_for_used_cadres'), 'OfficerType')
+
+    # Concatenate additional cost categories
+    additional_costs = [
+        (recruitment_cost , 'recruitment_cost_for_attrited_workers'),
+        (preservice_training_cost, 'preservice_training_cost_for_attrited_workers'),
+        (inservice_training_cost, 'inservice_training_cost_for_all_staff')
+    ]
+    # Iterate through additional costs, melt and concatenate
+    for df, label in additional_costs:
+        labelled_df = retain_relevant_column_subset(label_rows_of_cost_dataframe(df, 'cost_subcategory', label), 'OfficerType')
+        human_resource_costs = pd.concat([human_resource_costs, labelled_df])
+
+    human_resource_costs = prepare_cost_dataframe(human_resource_costs, _category_specific_group = 'OfficerType', _cost_category = 'human resources for health')
+
+    # Only preserve the draws and runs requested
+    if draws is not None:
+        human_resource_costs = human_resource_costs[human_resource_costs.draw.isin(draws)]
+    if runs is not None:
+        human_resource_costs = human_resource_costs[human_resource_costs.run.isin(runs)]
+
+    # %%
+    # 2. Consumables cost
+    #------------------------
+    print("Now estimating Consumables costs...")
+    def get_quantity_of_consumables_dispensed(results_folder):
+        def get_counts_of_items_requested(_df):
+            _df = drop_outside_period(_df)
+            counts_of_available = defaultdict(lambda: defaultdict(int))
+            counts_of_not_available = defaultdict(lambda: defaultdict(int))
+
+            for _, row in _df.iterrows():
+                date = row['date']
+                for item, num in row['Item_Available'].items():
+                    counts_of_available[date][item] += num
+                for item, num in row['Item_NotAvailable'].items():
+                    counts_of_not_available[date][item] += num
+            available_df = pd.DataFrame(counts_of_available).fillna(0).astype(int).stack().rename('Available')
+            not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available')
+
+            # Combine the two dataframes into one series with MultiIndex (date, item, availability_status)
+            combined_df = pd.concat([available_df, not_available_df], axis=1).fillna(0).astype(int)
+
+            # Convert to a pd.Series, as expected by the custom_generate_series function
+            return combined_df.stack()
+
+        cons_req = extract_results(
+                results_folder,
+                module='tlo.methods.healthsystem.summary',
+                key='Consumables',
+                custom_generate_series=get_counts_of_items_requested,
+                do_scaling=True)
+
+        cons_dispensed = cons_req.xs("Available", level=2) # only keep actual dispensed amount, i.e. when available
+        return cons_dispensed
+    # TODO Extract year of dispensing drugs
+
+    consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder)
+    consumables_dispensed = consumables_dispensed.reset_index().rename(columns = {'level_0': 'Item_Code', 'level_1': 'year'})
+    consumables_dispensed[idx['year']] = pd.to_datetime(consumables_dispensed[idx['year']]).dt.year # Extract only year from date
+    consumables_dispensed[idx['Item_Code']] = pd.to_numeric(consumables_dispensed[idx['Item_Code']])
+    # Make a list of columns in the DataFrame pertaining to quantity dispensed
+    quantity_columns = consumables_dispensed.columns.to_list()
+    quantity_columns = [tup for tup in quantity_columns if tup not in [('Item_Code', ''), ('year', '')]]
+
+    # 2.1 Cost of consumables dispensed
+    #---------------------------------------------------------------------------------------------------------------
+    # Multiply number of items needed by cost of consumable
+    #consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge
+    unit_price_consumable.columns = pd.MultiIndex.from_arrays([unit_price_consumable.columns, [''] * len(unit_price_consumable.columns)])
+    cost_of_consumables_dispensed = consumables_dispensed.merge(unit_price_consumable, on = idx['Item_Code'], validate = 'm:1', how = 'left')
+    price_column = 'Final_price_per_chosen_unit (USD, 2023)'
+    cost_of_consumables_dispensed[quantity_columns] = cost_of_consumables_dispensed[quantity_columns].multiply(
+        cost_of_consumables_dispensed[price_column], axis=0)
+
+    # 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
+    #---------------------------------------------------------------------------------------------------------------
+    # Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry
+    # While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon
+    # an empirical estimate based on OpenLMIS data
+    # Estimate the stock to dispensed ratio from OpenLMIS data
+    lmis_consumable_usage = pd.read_csv(path_for_consumable_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+    # TODO Generate a smaller version of this file
+    # Collapse individual facilities
+    lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
+    df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
+    df = df.loc[df.index.get_level_values('month') != "Aggregate"]
+    # Opening balance in January is the closing balance for the month minus what was received during the month plus what was dispensed
+    opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
+                          df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
+                          df.loc[df.index.get_level_values('month') == 'January', 'received']
+    closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
+    # the consumable inflow during the year is the opening balance in January + what was received throughout the year - what was transferred to the next year (i.e. closing bal of December)
+    total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
+                                             opening_bal_january.reset_index(level='month', drop=True) -\
+                                             closing_bal_december.reset_index(level='month', drop=True)
+    total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
+    inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
+
+    # Edit outlier ratios
+    inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
+    inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
+    average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean() # Use average where item-specific ratio is not available
+
+    # Multiply number of items needed by cost of consumable
+    inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
+    excess_stock_ratio = inflow_to_outflow_ratio_by_consumable - 1
+    excess_stock_ratio = excess_stock_ratio.reset_index().rename(columns = {0: 'excess_stock_proportion_of_dispensed'})
+    # TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
+    cost_of_excess_consumables_stocked = consumables_dispensed.merge(unit_price_consumable, left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left')
+    excess_stock_ratio.columns = pd.MultiIndex.from_arrays([excess_stock_ratio.columns, [''] * len(excess_stock_ratio.columns)]) # TODO convert this into a funciton
+    cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.merge(excess_stock_ratio, left_on = 'Item_Code', right_on = 'item_code', validate = 'm:1', how = 'left')
+    cost_of_excess_consumables_stocked.loc[cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1# TODO disaggregate the average by program
+    cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0)
+    cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx['excess_stock_proportion_of_dispensed']], axis=0)
+
+    # 2.3 Store all HR costs in one standard format dataframe
+    #---------------------------------------------------------------------------------------------------------------
+    # Function to melt and label the cost category
+    consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
+                                 encoding="ISO-8859-1")[['item_code', 'consumable_name_tlo']]
+    consumables_dict = consumables_dict.rename(columns = {'item_code': 'Item_Code'})
+    consumables_dict = dict(zip(consumables_dict['Item_Code'], consumables_dict['consumable_name_tlo']))
+    def melt_and_label_consumables_cost(_df, label):
+        multi_index = pd.MultiIndex.from_tuples(_df.columns)
+        _df.columns = multi_index
+        # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric (these are the (draw,run) columns with cost values)
+        selected_columns = [col for col in _df.columns if
+                            (col[0] in ['Item_Code', 'year']) or (isinstance(col[0], int) and isinstance(col[1], int))]
+        _df = _df[selected_columns]    # Subset the dataframe with the selected columns
+
+        # reshape dataframe and assign 'draw' and 'run' as the correct column headers
+        melted_df = pd.melt(_df, id_vars=['year', 'Item_Code']).rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
+        # Replace item_code with consumable_name_tlo
+        melted_df['consumable'] = melted_df['Item_Code'].map(consumables_dict)
+        melted_df['cost_subcategory'] = label
+        melted_df['Facility_Level'] = 'all' #TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level
+        melted_df = melted_df.rename(columns = {'value': 'cost'})
+        return melted_df
+
+    cost_of_consumables_dispensed = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable')
+    cost_of_excess_consumables_stocked = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable')
+    consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked])
+    consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
+
+    # Only preserve the draws and runs requested
+    if draws is not None:
+        consumable_costs = consumable_costs[consumable_costs.draw.isin(draws)]
+    if runs is not None:
+        consumable_costs = consumable_costs[consumable_costs.run.isin(runs)]
+
+
+    # %%
+    # 3. Equipment cost
+    #--------------------------------------------
+    print("Now estimating Medical equipment costs...")
+    # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the simulation
+    # Get list of equipment used in the simulation by district and level
+    def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
+        """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
+        _df = _df.pivot_table(index=['District', 'Facility_Level'],
+                        values='EquipmentEverUsed',
+                        aggfunc='first')
+        _df.index.name = 'year'
+        return _df['EquipmentEverUsed']
+
+    list_of_equipment_used_by_draw_and_run = extract_results(
+        Path(results_folder),
+        module='tlo.methods.healthsystem.summary',
+        key='EquipmentEverUsed_ByFacilityID',
+        custom_generate_series=get_equipment_used_by_district_and_facility,
+        do_scaling=False,
+    )
+    for col in list_of_equipment_used_by_draw_and_run.columns:
+        list_of_equipment_used_by_draw_and_run[col] = list_of_equipment_used_by_draw_and_run[col].apply(ast.literal_eval)
+
+    # Initialize an empty DataFrame
+    equipment_cost_across_sim = pd.DataFrame()
+
+    # Extract equipment cost for each draw and run
+    for d in draws:
+        for r in runs:
+            print(f"Processing draw {d} and run {r} of equipment costs")
+            # Extract a list of equipment which was used at each facility level within each district
+            equipment_used = {district: {level: [] for level in fac_levels} for district in list(district_dict.values())} # create a dictionary with a key for each district and facility level
+            list_of_equipment_used_by_current_draw_and_run = list_of_equipment_used_by_draw_and_run[(d, r)].reset_index()
+            for dist in list(district_dict.values()):
+                for level in fac_levels:
+                    equipment_used_subset = list_of_equipment_used_by_current_draw_and_run[(list_of_equipment_used_by_current_draw_and_run['District'] == dist) & (list_of_equipment_used_by_current_draw_and_run['Facility_Level'] == level)]
+                    equipment_used_subset.columns = ['District', 'Facility_Level', 'EquipmentEverUsed']
+                    equipment_used[dist][level] = set().union(*equipment_used_subset['EquipmentEverUsed'])
+            equipment_used = pd.concat({
+                    k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
+                    axis=0)
+            full_list_of_equipment_used = set(equipment_used.values.flatten())
+            full_list_of_equipment_used = set(filter(pd.notnull, full_list_of_equipment_used))
+
+            equipment_df = pd.DataFrame()
+            equipment_df.index = equipment_used.index
+            for item in full_list_of_equipment_used:
+                equipment_df[str(item)] = 0
+                for dist_fac_index in equipment_df.index:
+                    equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1)
+            #equipment_df.to_csv('./outputs/equipment_use.csv')
+
+            equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
+            equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
+            equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])
+            # Merge the count of facilities by district and level
+            equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left')
+            equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0
+
+            # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as that recorded for level 2
+            def update_itemuse_for_level1b_using_level2_data(_df):
+                # Create a list of District and Item_code combinations for which use == True
+                list_of_equipment_used_at_level2 = _df[(_df.Facility_Level == '2') & (_df['whether_item_was_used'] == True)][['District', 'Item_code']]
+                # Now update the 'whether_item_was_used' for 'Facility_Level' == '1b' to match that of level '2'
+                _df.loc[
+                    (_df['Facility_Level'] == '1b') &
+                    (_df[['District', 'Item_code']].apply(tuple, axis=1).isin(
+                        list_of_equipment_used_at_level2.apply(tuple, axis=1))),
+                    'whether_item_was_used'
+                ] = True
+
+                return _df
+
+            equipment_df = update_itemuse_for_level1b_using_level2_data(equipment_df)
+
+            # Merge the two datasets to calculate cost
+            equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual']],
+                                      on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
+            categories_of_equipment_cost = ['replacement_cost', 'service_fee', 'spare_parts', 'major_corrective_maintenance_cost']
+            for cost_category in categories_of_equipment_cost:
+                # Rename unit cost columns
+                unit_cost_column = cost_category + '_annual_unit'
+                equipment_cost = equipment_cost.rename(columns = {cost_category + '_annual':unit_cost_column })
+                equipment_cost[cost_category + '_annual_total'] = equipment_cost[cost_category + '_annual_unit'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count']
+            equipment_cost['year'] = final_year_of_simulation - 1
+            if equipment_cost_across_sim.empty:
+                equipment_cost_across_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
+                equipment_cost_across_sim['draw'] = d
+                equipment_cost_across_sim['run'] = r
+            else:
+                equipment_cost_for_current_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
+                equipment_cost_for_current_sim['draw'] = d
+                equipment_cost_for_current_sim['run'] = r
+                # Concatenate the results
+                equipment_cost_across_sim = pd.concat([equipment_cost_across_sim, equipment_cost_for_current_sim], axis=0)
+
+    equipment_costs = pd.melt(equipment_cost_across_sim.reset_index(),
+                      id_vars=['draw', 'run', 'Facility_Level', 'Equipment_tlo'],  # Columns to keep
+                      value_vars=[col for col in equipment_cost_across_sim.columns if col.endswith('_annual_total')],  # Columns to unpivot
+                      var_name='cost_subcategory',  # New column name for the 'sub-category' of cost
+                      value_name='cost')  # New column name for the values
+
+    # Assume that the annual costs are constant each year of the simulation
+    equipment_costs = pd.concat([equipment_costs.assign(year=year) for year in years])
+    # TODO If the logger is updated to include year, we may wish to calculate equipment costs by year - currently we assume the same annuitised equipment cost each year
+    equipment_costs = equipment_costs.reset_index(drop=True)
+    equipment_costs = equipment_costs.rename(columns = {'Equipment_tlo': 'Equipment'})
+    equipment_costs = prepare_cost_dataframe(equipment_costs, _category_specific_group = 'Equipment', _cost_category = 'medical equipment')
+
+    # 4. Facility running costs
+    # Average running costs by facility level and district times the number of facilities  in the simulation
+
+    # %%
+    # Store all costs in single dataframe
+    #--------------------------------------------
+    scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs], ignore_index=True)
+    scenario_cost['cost'] = pd.to_numeric(scenario_cost['cost'], errors='coerce')
+
+    # Summarize costs
+    if summarize == True:
+        groupby_cols = [col for col in scenario_cost.columns if ((col != 'run') & (col != 'cost'))]
+        scenario_cost = pd.concat(
+            {
+                'mean': scenario_cost.groupby(by=groupby_cols, sort=False)['cost'].mean(),
+                'lower': scenario_cost.groupby(by=groupby_cols, sort=False)['cost'].quantile(0.025),
+                'upper': scenario_cost.groupby(by=groupby_cols, sort=False)['cost'].quantile(0.975),
+            },
+            axis=1
+        )
+        scenario_cost =  pd.melt(scenario_cost.reset_index(),
+                  id_vars=groupby_cols,  # Columns to keep
+                  value_vars=['mean', 'lower', 'upper'],  # Columns to unpivot
+                  var_name='stat',  # New column name for the 'sub-category' of cost
+                  value_name='cost')
 
+    return scenario_cost
 
-# %% Gathering basic information
 # Load result files
 #-------------------
-#results_folder = get_scenario_outputs('example_costing_scenario.py', outputfilepath)[0]
-#results_folder = get_scenario_outputs('long_run_all_diseases.py', outputfilepath)[0]
-#results_folder = get_scenario_outputs('scenario_impact_of_consumables_availability.py', outputfilepath)[0] # impact_of_cons_regression_scenarios
-
 #results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
 #results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
 results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
 #results_folder = get_scenario_outputs('hss_elements-2024-10-12T111649Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
 
 # Check can read results from draw=0, run=0
-log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
-info = get_scenario_info(results_folder) # get basic information about the results
-# Extract the parameters that have varied over the set of simulations
-params = extract_params(results_folder)
-final_year_of_simulation = max(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
-first_year_of_simulation = min(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
-population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
-
-
-# Load basic simulation parameters
-#-------------------------------------
-draws = range(0, info['number_of_draws'])
-runs = range(0, info['runs_per_draw'])
-years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
-
-# Load cost input files
-#------------------------
-# Load primary costing resourcefile
-workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
-                                    sheet_name = None)
+#log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
+#params = extract_params(results_folder)
+#population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
 
-# Extract districts and facility levels from the Master Facility List
-mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
-district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[['District_Num', 'District']].drop_duplicates()
-district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
-facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level']))
-fac_levels = set(mfl.Facility_Level)
+# Estimate cost of scenario
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, draws = [0], summarize = True)
+input_costs[input_costs.year == 2018].groupby('cost_category')['cost'].sum()
 
-# Overall cost assumptions
-TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
-discount_rate = 0.03
-
-# Read all cost parameters
-#---------------------------------------
-# Read parameters for HR costs
-hr_cost_parameters = workbook_cost["human_resources"]
-hr_cost_parameters['Facility_Level'] =  hr_cost_parameters['Facility_Level'].astype(str) # Store Facility_Level as string
-
-# Read parameters for consumables costs
-# Load consumables cost data
-unit_price_consumable = workbook_cost["consumables"]
-unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
-unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(drop=True).iloc[1:]
-unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
-
-# Load and prepare equipment cost parameters
-# Unit costs of equipment
-unit_cost_equipment = workbook_cost["equipment"]
-unit_cost_equipment = unit_cost_equipment.rename(columns=unit_cost_equipment.iloc[7]).reset_index(drop=True).iloc[8:]
-unit_cost_equipment = unit_cost_equipment[unit_cost_equipment['Item_code'].notna()] # drop empty row
-# Calculate necessary costs based on HSSP-III assumptions
-unit_cost_equipment['replacement_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost']/(1+(1-(1+discount_rate)**(-row['Life span']+1))/discount_rate), axis=1) # Annuitised over the life span of the equipment assuming outlay at the beginning of the year
-unit_cost_equipment['service_fee_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.8 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years
-unit_cost_equipment['spare_parts_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 / 8 if row['unit_purchase_cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years
-unit_cost_equipment['major_corrective_maintenance_cost_annual'] = unit_cost_equipment.apply(lambda row: row['unit_purchase_cost'] * 0.2 * 0.2 / 8 if row['unit_purchase_cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years
-# TODO consider discounting the other components
-# Quantity needed for each equipment by facility
-unit_cost_equipment = unit_cost_equipment[['Item_code','Equipment_tlo',
-                                           'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual',  'major_corrective_maintenance_cost_annual',
-                                           'Health Post_prioritised', 'Community_prioritised', 'Health Center_prioritised', 'District_prioritised', 'Central_prioritised']]
-unit_cost_equipment = unit_cost_equipment.rename(columns={col: 'Quantity_' + col.replace('_prioritised', '') for col in unit_cost_equipment.columns if col.endswith('_prioritised')})
-unit_cost_equipment = unit_cost_equipment.rename(columns={col: col.replace(' ', '_') for col in unit_cost_equipment.columns})
-
-unit_cost_equipment = pd.wide_to_long(unit_cost_equipment, stubnames=['Quantity_'],
-                          i=['Item_code', 'Equipment_tlo', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual'],
-                          j='Facility_Level', suffix='(\d+|\w+)').reset_index()
-facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'}
-unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
-unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
-
-# Function to prepare cost dataframe ready to be merged across cross categories
-def retain_relevant_column_subset(_df, _category_specific_group):
-    columns_to_retain = ['draw', 'run', 'year', 'cost_subcategory', 'Facility_Level', _category_specific_group, 'cost']
-    _df = _df[columns_to_retain]
-    return _df
-def prepare_cost_dataframe(_df, _category_specific_group, _cost_category):
-    _df = _df.rename(columns = {_category_specific_group: 'cost_subgroup'})
-    _df['cost_category'] = _cost_category
-    return retain_relevant_column_subset(_df, 'cost_subgroup')
-
-
-# CALCULATE FINANCIAL COSTS
-#%%
-# 1. HR cost
-#------------------------
-# Define a function to merge unit cost data with model outputs
-def merge_cost_and_model_data(cost_df, model_df, varnames):
-    merged_df = model_df.copy()
-    for varname in varnames:
-        new_cost_df = cost_df[cost_df['Parameter_name'] == varname][['OfficerType', 'Facility_Level', 'Value']]
-        new_cost_df = new_cost_df.rename(columns={"Value": varname})
-        if ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all()):
-            merged_df[varname] = new_cost_df[varname].mean()
-        elif ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all() == False):
-            merged_df = pd.merge(merged_df, new_cost_df[['Facility_Level',varname]], on=['Facility_Level'], how="left")
-        elif ((new_cost_df['OfficerType'] == 'All').all() == False) and ((new_cost_df['Facility_Level'] == 'All').all()):
-            merged_df = pd.merge(merged_df, new_cost_df[['OfficerType',varname]], on=['OfficerType'], how="left")
-        else:
-            merged_df = pd.merge(merged_df, new_cost_df, on=['OfficerType', 'Facility_Level'], how="left")
-    return merged_df
-
-# Get available staff count for each year and draw
-def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series:
-    """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
-    _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
-    _df.index.name = 'year'
-
-    def change_to_standard_flattened_index_format(col):
-        parts = col.split("_", 3)  # Split by "_" only up to 3 parts
-        if len(parts) > 2:
-            return parts[0] + "=" + parts[1] + "|" + parts[2] + "=" + parts[3]  # Rejoin with "I" at the second occurrence
-        return col  # If there's no second underscore, return the string as it is
-    _df.columns = [change_to_standard_flattened_index_format(col) for col in _df.columns]
-
-    return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
-
-# Staff count by Facility ID
-available_staff_count_by_facid_and_officertype = extract_results(
-    Path(results_folder),
-    module='tlo.methods.healthsystem.summary',
-    key='number_of_hcw_staff',
-    custom_generate_series=get_staff_count_by_facid_and_officer_type,
-    do_scaling=True,
-)
-
-# Update above series to get staff count by Facility_Level
-available_staff_count_by_facid_and_officertype = available_staff_count_by_facid_and_officertype.reset_index().rename(columns= {'FacilityID': 'Facility_ID', 'Officer': 'OfficerType'})
-available_staff_count_by_facid_and_officertype['Facility_ID'] = pd.to_numeric(available_staff_count_by_facid_and_officertype['Facility_ID'])
-available_staff_count_by_facid_and_officertype['Facility_Level'] = available_staff_count_by_facid_and_officertype['Facility_ID'].map(facility_id_levels_dict)
-idx = pd.IndexSlice
-available_staff_count_by_level_and_officer_type = available_staff_count_by_facid_and_officertype.drop(columns = [idx['Facility_ID']]).groupby([idx['year'], idx['Facility_Level'], idx['OfficerType']]).sum()
-available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs(available_staff_count_by_level_and_officer_type.reset_index(), id_vars= ['year', 'Facility_Level', 'OfficerType'])
-available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type['Facility_Level'].astype(str) # make sure facility level is stored as string
-available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop(available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type['Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery
-available_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
-
-# Get list of cadres which were utilised in each run to get the count of staff used in the simulation
-# Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and
-# not the amount of time which was used
-def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series:
-    """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
-    _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date'])
-    _df.index.name = 'year'
-    return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1])  # expanded flattened axis
-
-annual_capacity_used_by_cadre_and_level = extract_results(
-    Path(results_folder),
-    module='tlo.methods.healthsystem.summary',
-    key='Capacity_By_OfficerType_And_FacilityLevel',
-    custom_generate_series=get_capacity_used_by_officer_type_and_facility_level,
-    do_scaling=False,
-)
-
-# Prepare capacity used dataframe to be multiplied by staff count
-average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False)
-# TODO see if cadre-level combinations should be chosen by year
-average_capacity_used_by_cadre_and_level.reset_index(drop=True) # Flatten multi=index column
-average_capacity_used_by_cadre_and_level = average_capacity_used_by_cadre_and_level.melt(id_vars=['OfficerType', 'FacilityLevel'],
-                        var_name=['draw', 'run'],
-                        value_name='capacity_used')
-list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType', 'FacilityLevel', 'draw', 'run']]
-print(f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations")
-list_of_cadre_and_level_combinations_used = list_of_cadre_and_level_combinations_used.rename(columns = {'FacilityLevel':'Facility_Level'})
-
-# Subset scenario staffing level to only include cadre-level combinations used in the simulation
-used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw','run','OfficerType', 'Facility_Level'], how = 'right', validate = 'm:m')
-used_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
-
-# Calculate various components of HR cost
-# 1.1 Salary cost for current total staff
-#---------------------------------------------------------------------------------------------------------------
-salary_for_all_staff = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = available_staff_count_by_level_and_officer_type,
-                                                     varnames = ['salary_usd'])
-salary_for_all_staff['cost'] = salary_for_all_staff['salary_usd'] * salary_for_all_staff['staff_count']
-
-# 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
-#---------------------------------------------------------------------------------------------------------------
-salary_for_staff_used_in_scenario = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
-                                                     varnames = ['salary_usd'])
-salary_for_staff_used_in_scenario['cost'] = salary_for_staff_used_in_scenario['salary_usd'] * salary_for_staff_used_in_scenario['staff_count']
-# summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
-#.set_index(['draw', 'run', 'year', 'OfficerType', 'Facility_Level']).unstack(level=['draw', 'run'])
-#salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
-
-# 1.3 Recruitment cost to fill gap created by attrition
-#---------------------------------------------------------------------------------------------------------------
-recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
-                                                     varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
-recruitment_cost['cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['staff_count'] * \
-                      recruitment_cost['recruitment_cost_per_person_recruited_usd']
-recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'Facility_Level', 'OfficerType', 'cost']]
-
-# 1.4 Pre-service training cost to fill gap created by attrition
-#---------------------------------------------------------------------------------------------------------------
-preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
-                                                     varnames = ['annual_attrition_rate',
-                                                                 'licensure_exam_passing_rate', 'graduation_rate',
-                                                                 'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
-                                                                 'preservice_training_cost_per_staff_recruited_usd'])
-preservice_training_cost['Annual_cost_per_staff_recruited'] = preservice_training_cost['preservice_training_cost_per_staff_recruited_usd'] *\
-                                                (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) *\
-                                                (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) *\
-                                                preservice_training_cost['annual_attrition_rate']
-# Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate
-# the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure
-preservice_training_cost['cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['staff_count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
-preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
-
-# 1.5 In-service training cost to train all staff
-#---------------------------------------------------------------------------------------------------------------
-inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
-                                                     varnames = ['annual_inservice_training_cost_usd'])
-inservice_training_cost['cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd']
-inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
-# TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
-
-# 1.6 Store all HR costs in one standard format dataframe
-#---------------------------------------------------------------------------------------------------------------
-# Function to melt and label the cost category
-def label_rows_of_cost_dataframe(_df, label_var, label):
-    _df = _df.reset_index()
-    _df[label_var] = label
-    return _df
-
-# Initialize HR with the salary data
-human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff_used_in_scenario, 'cost_subcategory', 'salary_for_used_cadres'), 'OfficerType')
-
-# Concatenate additional cost categories
-additional_costs = [
-    (recruitment_cost , 'recruitment_cost_for_attrited_workers'),
-    (preservice_training_cost, 'preservice_training_cost_for_attrited_workers'),
-    (inservice_training_cost, 'inservice_training_cost_for_all_staff')
-]
-# Iterate through additional costs, melt and concatenate
-for df, label in additional_costs:
-    labelled_df = retain_relevant_column_subset(label_rows_of_cost_dataframe(df, 'cost_subcategory', label), 'OfficerType')
-    human_resource_costs = pd.concat([human_resource_costs, labelled_df])
-
-human_resource_costs = prepare_cost_dataframe(human_resource_costs, _category_specific_group = 'OfficerType', _cost_category = 'human resources for health')
-
-# %%
-# 2. Consumables cost
-#------------------------
-def get_quantity_of_consumables_dispensed(results_folder):
-    def get_counts_of_items_requested(_df):
-        _df = drop_outside_period(_df)
-        counts_of_available = defaultdict(lambda: defaultdict(int))
-        counts_of_not_available = defaultdict(lambda: defaultdict(int))
-
-        for _, row in _df.iterrows():
-            date = row['date']
-            for item, num in row['Item_Available'].items():
-                counts_of_available[date][item] += num
-            for item, num in row['Item_NotAvailable'].items():
-                counts_of_not_available[date][item] += num
-        available_df = pd.DataFrame(counts_of_available).fillna(0).astype(int).stack().rename('Available')
-        not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available')
-
-        # Combine the two dataframes into one series with MultiIndex (date, item, availability_status)
-        combined_df = pd.concat([available_df, not_available_df], axis=1).fillna(0).astype(int)
-
-        # Convert to a pd.Series, as expected by the custom_generate_series function
-        return combined_df.stack()
-
-    cons_req = extract_results(
-            results_folder,
-            module='tlo.methods.healthsystem.summary',
-            key='Consumables',
-            custom_generate_series=get_counts_of_items_requested,
-            do_scaling=True)
-
-    cons_dispensed = cons_req.xs("Available", level=2) # only keep actual dispensed amount, i.e. when available
-    return cons_dispensed
-# TODO Extract year of dispensing drugs
-
-consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder)
-consumables_dispensed = consumables_dispensed.reset_index().rename(columns = {'level_0': 'Item_Code', 'level_1': 'year'})
-consumables_dispensed[idx['year']] = pd.to_datetime(consumables_dispensed[idx['year']]).dt.year # Extract only year from date
-consumables_dispensed[idx['Item_Code']] = pd.to_numeric(consumables_dispensed[idx['Item_Code']])
-# Make a list of columns in the DataFrame pertaining to quantity dispensed
-quantity_columns = consumables_dispensed.columns.to_list()
-quantity_columns = [tup for tup in quantity_columns if tup not in [('Item_Code', ''), ('year', '')]]
-
-# 2.1 Cost of consumables dispensed
-#---------------------------------------------------------------------------------------------------------------
-# Multiply number of items needed by cost of consumable
-#consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge
-unit_price_consumable.columns = pd.MultiIndex.from_arrays([unit_price_consumable.columns, [''] * len(unit_price_consumable.columns)])
-cost_of_consumables_dispensed = consumables_dispensed.merge(unit_price_consumable, on = idx['Item_Code'], validate = 'm:1', how = 'left')
-price_column = 'Final_price_per_chosen_unit (USD, 2023)'
-cost_of_consumables_dispensed[quantity_columns] = cost_of_consumables_dispensed[quantity_columns].multiply(
-    cost_of_consumables_dispensed[price_column], axis=0)
-
-# 2.2 Cost of consumables stocked (quantity needed for what is dispensed)
-#---------------------------------------------------------------------------------------------------------------
-# Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry
-# While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon
-# an empirical estimate based on OpenLMIS data
-# Estimate the stock to dispensed ratio from OpenLMIS data
-lmis_consumable_usage = pd.read_csv(path_for_consumable_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
-# TODO Generate a smaller version of this file
-# Collapse individual facilities
-lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
-df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
-df = df.loc[df.index.get_level_values('month') != "Aggregate"]
-# Opening balance in January is the closing balance for the month minus what was received during the month plus what was dispensed
-opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
-                      df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
-                      df.loc[df.index.get_level_values('month') == 'January', 'received']
-closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
-# the consumable inflow during the year is the opening balance in January + what was received throughout the year - what was transferred to the next year (i.e. closing bal of December)
-total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
-                                         opening_bal_january.reset_index(level='month', drop=True) -\
-                                         closing_bal_december.reset_index(level='month', drop=True)
-total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
-inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
-
-# Edit outlier ratios
-inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
-inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
-average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean() # Use average where item-specific ratio is not available
-
-# Multiply number of items needed by cost of consumable
-inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean()
-excess_stock_ratio = inflow_to_outflow_ratio_by_consumable - 1
-excess_stock_ratio = excess_stock_ratio.reset_index().rename(columns = {0: 'excess_stock_proportion_of_dispensed'})
-# TODO Consider whether a more disaggregated version of the ratio dictionary should be applied
-cost_of_excess_consumables_stocked = consumables_dispensed.merge(unit_price_consumable, left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left')
-excess_stock_ratio.columns = pd.MultiIndex.from_arrays([excess_stock_ratio.columns, [''] * len(excess_stock_ratio.columns)]) # TODO convert this into a funciton
-cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.merge(excess_stock_ratio, left_on = 'Item_Code', right_on = 'item_code', validate = 'm:1', how = 'left')
-cost_of_excess_consumables_stocked.loc[cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1# TODO disaggregate the average by program
-cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0)
-cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx['excess_stock_proportion_of_dispensed']], axis=0)
-
-# 2.3 Store all HR costs in one standard format dataframe
-#---------------------------------------------------------------------------------------------------------------
-# Function to melt and label the cost category
-consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
-                             encoding="ISO-8859-1")[['item_code', 'consumable_name_tlo']]
-consumables_dict = consumables_dict.rename(columns = {'item_code': 'Item_Code'})
-consumables_dict = dict(zip(consumables_dict['Item_Code'], consumables_dict['consumable_name_tlo']))
-def melt_and_label_consumables_cost(_df, label):
-    multi_index = pd.MultiIndex.from_tuples(_df.columns)
-    _df.columns = multi_index
-    # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric (these are the (draw,run) columns with cost values)
-    selected_columns = [col for col in _df.columns if
-                        (col[0] in ['Item_Code', 'year']) or (isinstance(col[0], int) and isinstance(col[1], int))]
-    _df = _df[selected_columns]    # Subset the dataframe with the selected columns
-
-    # reshape dataframe and assign 'draw' and 'run' as the correct column headers
-    melted_df = pd.melt(_df, id_vars=['year', 'Item_Code']).rename(columns = {'variable_0': 'draw', 'variable_1': 'run'})
-    # Replace item_code with consumable_name_tlo
-    melted_df['consumable'] = melted_df['Item_Code'].map(consumables_dict)
-    melted_df['cost_subcategory'] = label
-    melted_df['Facility_Level'] = 'all' #TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level
-    melted_df = melted_df.rename(columns = {'value': 'cost'})
-    return melted_df
-
-cost_of_consumables_dispensed = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable')
-cost_of_excess_consumables_stocked = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable')
-consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked])
-consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
-
-# %%
-# 3. Equipment cost
-#--------------------------------------------
-# Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the simulation
-# Get list of equipment used in the simulation by district and level
-def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
-    """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series."""
-    _df = _df.pivot_table(index=['District', 'Facility_Level'],
-                    values='EquipmentEverUsed',
-                    aggfunc='first')
-    _df.index.name = 'year'
-    return _df['EquipmentEverUsed']
-
-list_of_equipment_used_by_draw_and_run = extract_results(
-    Path(results_folder),
-    module='tlo.methods.healthsystem.summary',
-    key='EquipmentEverUsed_ByFacilityID',
-    custom_generate_series=get_equipment_used_by_district_and_facility,
-    do_scaling=False,
-)
-for col in list_of_equipment_used_by_draw_and_run.columns:
-    list_of_equipment_used_by_draw_and_run[col] = list_of_equipment_used_by_draw_and_run[col].apply(ast.literal_eval)
-
-# Initialize an empty DataFrame
-equipment_cost_across_sim = pd.DataFrame()
-
-# Extract equipment cost for each draw and run
-for d in draws:
-    for r in runs:
-        print(f"Now processing draw {d} and run {r}")
-        # Extract a list of equipment which was used at each facility level within each district
-        equipment_used = {district: {level: [] for level in fac_levels} for district in list(district_dict.values())} # create a dictionary with a key for each district and facility level
-        list_of_equipment_used_by_current_draw_and_run = list_of_equipment_used_by_draw_and_run[(d, r)].reset_index()
-        for dist in list(district_dict.values()):
-            for level in fac_levels:
-                equipment_used_subset = list_of_equipment_used_by_current_draw_and_run[(list_of_equipment_used_by_current_draw_and_run['District'] == dist) & (list_of_equipment_used_by_current_draw_and_run['Facility_Level'] == level)]
-                equipment_used_subset.columns = ['District', 'Facility_Level', 'EquipmentEverUsed']
-                equipment_used[dist][level] = set().union(*equipment_used_subset['EquipmentEverUsed'])
-        equipment_used = pd.concat({
-                k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()},
-                axis=0)
-        full_list_of_equipment_used = set(equipment_used.values.flatten())
-        full_list_of_equipment_used = set(filter(pd.notnull, full_list_of_equipment_used))
-
-        equipment_df = pd.DataFrame()
-        equipment_df.index = equipment_used.index
-        for item in full_list_of_equipment_used:
-            equipment_df[str(item)] = 0
-            for dist_fac_index in equipment_df.index:
-                equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1)
-        #equipment_df.to_csv('./outputs/equipment_use.csv')
-
-        equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'})
-        equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'})
-        equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code'])
-        # Merge the count of facilities by district and level
-        equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left')
-        equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0
-
-        # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as that recorded for level 2
-        def update_itemuse_for_level1b_using_level2_data(_df):
-            # Create a list of District and Item_code combinations for which use == True
-            list_of_equipment_used_at_level2 = _df[(_df.Facility_Level == '2') & (_df['whether_item_was_used'] == True)][['District', 'Item_code']]
-            # Now update the 'whether_item_was_used' for 'Facility_Level' == '1b' to match that of level '2'
-            _df.loc[
-                (_df['Facility_Level'] == '1b') &
-                (_df[['District', 'Item_code']].apply(tuple, axis=1).isin(
-                    list_of_equipment_used_at_level2.apply(tuple, axis=1))),
-                'whether_item_was_used'
-            ] = True
-
-            return _df
-
-        equipment_df = update_itemuse_for_level1b_using_level2_data(equipment_df)
-
-        # Merge the two datasets to calculate cost
-        equipment_cost = pd.merge(equipment_df, unit_cost_equipment[['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual']],
-                                  on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1")
-        categories_of_equipment_cost = ['replacement_cost', 'service_fee', 'spare_parts', 'major_corrective_maintenance_cost']
-        for cost_category in categories_of_equipment_cost:
-            # Rename unit cost columns
-            unit_cost_column = cost_category + '_annual_unit'
-            equipment_cost = equipment_cost.rename(columns = {cost_category + '_annual':unit_cost_column })
-            equipment_cost[cost_category + '_annual_total'] = equipment_cost[cost_category + '_annual_unit'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count']
-        equipment_cost['year'] = final_year_of_simulation - 1
-        if equipment_cost_across_sim.empty:
-            equipment_cost_across_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
-            equipment_cost_across_sim['draw'] = d
-            equipment_cost_across_sim['run'] = r
-        else:
-            equipment_cost_for_current_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item  + '_annual_total' for item in categories_of_equipment_cost]].sum()
-            equipment_cost_for_current_sim['draw'] = d
-            equipment_cost_for_current_sim['run'] = r
-            # Concatenate the results
-            equipment_cost_across_sim = pd.concat([equipment_cost_across_sim, equipment_cost_for_current_sim], axis=0)
-
-equipment_costs = pd.melt(equipment_cost_across_sim.reset_index(),
-                  id_vars=['draw', 'run', 'Facility_Level', 'Equipment_tlo'],  # Columns to keep
-                  value_vars=[col for col in equipment_cost_across_sim.columns if col.endswith('_annual_total')],  # Columns to unpivot
-                  var_name='cost_subcategory',  # New column name for the 'sub-category' of cost
-                  value_name='cost')  # New column name for the values
-
-# Assume that the annual costs are constant each year of the simulation
-equipment_costs = pd.concat([equipment_costs.assign(year=year) for year in years])
-# TODO If the logger is updated to include year, we may wish to calculate equipment costs by year - currently we assume the same annuitised equipment cost each year
-equipment_costs = equipment_costs.reset_index(drop=True)
-equipment_costs = equipment_costs.rename(columns = {'Equipment_tlo': 'Equipment'})
-equipment_costs = prepare_cost_dataframe(equipment_costs, _category_specific_group = 'Equipment', _cost_category = 'Medical Equipment')
-
-'''
-equipment_costs_summary = pd.concat(
-    {
-        'mean': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].mean(),
-        'lower': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].quantile(0.025),
-        'upper': equipment_costs.groupby(by=['draw', 'Cost_Sub-category'], sort=False)['value'].quantile(0.975),
-    },
-    axis=1
-)
-equipment_costs_summary =  pd.melt(equipment_costs_summary.reset_index(),
-                  id_vars=['draw', 'Cost_Sub-category'],  # Columns to keep
-                  value_vars=['mean', 'lower', 'upper'],  # Columns to unpivot
-                  var_name='stat',  # New column name for the 'sub-category' of cost
-                  value_name='value')
-'''
-
-# 4. Facility running costs
-# Average running costs by facility level and district times the number of facilities  in the simulation
-
-# %%
-# Store all costs in single dataframe
-#--------------------------------------------
-scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs], ignore_index=True)
+# Create folders to store results
+costing_outputs_folder = Path('./outputs/costing')
+if not os.path.exists(costing_outputs_folder):
+    os.makedirs(costing_outputs_folder)
+figurespath = costing_outputs_folder / "figures"
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
 
 # Additional costs pertaining to simulation
 # IRS costs
@@ -634,9 +660,6 @@ def set_cost_during_years_before_malaria_scaleup_to_zero(_df):
     scenario_cost = pd.concat([scenario_cost, new_df], ignore_index=True)
 scenario_cost.loc[scenario_cost['Cost_Category'].isna(),'Cost_Category'] = 'IRS and Bednet Scale-up Costs'
 
-# Extract all costs to a .csv
-scenario_cost.to_csv(costing_outputs_folder / 'scenario_cost.csv', index = False)
-
 # Calculate total cost
 total_scenario_cost = scenario_cost[(scenario_cost.year >= 2020) & (scenario_cost.year <= 2030)].groupby(['draw', 'stat'])['value'].sum().unstack()
 total_scenario_cost = total_scenario_cost.unstack().reset_index()

From 9a75e13a23f74df602f1fbddc048624688221eb1 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 28 Oct 2024 18:31:22 +0000
Subject: [PATCH 130/230] update the calculation of HR costs - carry out a more
 detailed estimation od pre-service training costs - add mentorship and
 supervision costs

---
 resources/costing/ResourceFile_Costing.xlsx |   4 +-
 src/scripts/costing/costing.py              | 360 +++++---------------
 2 files changed, 83 insertions(+), 281 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index cd1be2a2ad..c14e54fdad 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c69de82e8e03dab76f8ba05c16634a667396b0a4a78dca24763b56039c8adf57
-size 4269628
+oid sha256:e86b39ea2fb3fb9ea42accce43da0466c8fd35205fe42da1c4e0d6d11869ea3f
+size 4274726
diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/costing.py
index 8b4eb0631f..a84a0dbe59 100644
--- a/src/scripts/costing/costing.py
+++ b/src/scripts/costing/costing.py
@@ -36,7 +36,7 @@
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
 def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path = None, draws = None, runs = None,
-                                     summarize: bool = False):
+                                     summarize: bool = False, cost_only_used_staff: bool = True):
     # Useful common functions
     def drop_outside_period(_df):
         """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
@@ -220,55 +220,85 @@ def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.S
     used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw','run','OfficerType', 'Facility_Level'], how = 'right', validate = 'm:m')
     used_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True)
 
-    # Calculate various components of HR cost
-    # 1.1 Salary cost for current total staff
-    #---------------------------------------------------------------------------------------------------------------
-    salary_for_all_staff = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = available_staff_count_by_level_and_officer_type,
-                                                         varnames = ['salary_usd'])
-    salary_for_all_staff['cost'] = salary_for_all_staff['salary_usd'] * salary_for_all_staff['staff_count']
+    if (cost_only_used_staff):
+        print("The input for 'cost_only_used_staff' implies that only cadre-level combinations which have been used in the run are costed")
+        staff_size_chosen_for_costing = used_staff_count_by_level_and_officer_type
+    else:
+        print("The input for 'cost_only_used_staff' implies that all staff are costed regardless of the cadre-level combinations which have been used in the run are costed")
+        staff_size_chosen_for_costing = available_staff_count_by_level_and_officer_type
 
-    # 1.2 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
+    # Calculate various components of HR cost
+    # 1.1 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary)
     #---------------------------------------------------------------------------------------------------------------
-    salary_for_staff_used_in_scenario = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+    salary_for_staff = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_size_chosen_for_costing,
                                                          varnames = ['salary_usd'])
-    salary_for_staff_used_in_scenario['cost'] = salary_for_staff_used_in_scenario['salary_usd'] * salary_for_staff_used_in_scenario['staff_count']
-    # summarize(salary_for_staff_used_in_scenario, only_mean = True, collapse_columns=True)
-    #.set_index(['draw', 'run', 'year', 'OfficerType', 'Facility_Level']).unstack(level=['draw', 'run'])
-    #salary_for_staff_used_in_scenario = salary_for_staff_used_in_scenario.apply(lambda x: pd.to_numeric(x, errors='coerce'))
-
-    # 1.3 Recruitment cost to fill gap created by attrition
-    #---------------------------------------------------------------------------------------------------------------
-    recruitment_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
-                                                         varnames = ['annual_attrition_rate', 'recruitment_cost_per_person_recruited_usd'])
-    recruitment_cost['cost'] = recruitment_cost['annual_attrition_rate'] * recruitment_cost['staff_count'] * \
-                          recruitment_cost['recruitment_cost_per_person_recruited_usd']
-    recruitment_cost = recruitment_cost[['draw', 'run', 'year', 'Facility_Level', 'OfficerType', 'cost']]
+    salary_for_staff['cost'] = salary_for_staff['salary_usd'] * salary_for_staff['staff_count']
 
-    # 1.4 Pre-service training cost to fill gap created by attrition
+    # 1.2 Pre-service training & recruitment cost to fill gap created by attrition
     #---------------------------------------------------------------------------------------------------------------
-    preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+    preservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_size_chosen_for_costing,
                                                          varnames = ['annual_attrition_rate',
                                                                      'licensure_exam_passing_rate', 'graduation_rate',
                                                                      'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad',
-                                                                     'preservice_training_cost_per_staff_recruited_usd'])
-    preservice_training_cost['Annual_cost_per_staff_recruited'] = preservice_training_cost['preservice_training_cost_per_staff_recruited_usd'] *\
+                                                                     'average_annual_preservice_training_cost_for_cadre', 'preservice_training_duration', 'recruitment_cost_per_person_recruited_usd',
+                                                                     'average_length_of_tenure_in_the_public_sector'])
+
+    def calculate_npv_past_training_expenses_by_row(row, r = discount_rate):
+        # Initialize the NPV for the row
+        npv = 0
+        annual_cost = row['average_annual_preservice_training_cost_for_cadre']
+        full_years = int(row['preservice_training_duration'])  # Extract integer part of the year
+        partial_year = row['preservice_training_duration'] - full_years  # Fractional part of the year
+
+        # Iterate over each year of the training duration to calculate compounded cost to the present
+        # Calculate NPV for each full year of training
+        for t in range(full_years):
+            npv += annual_cost * (1 + r) ** (t+1+1) # 1 added twice because range(4) is [0,1,2,3]
+
+        # Account for the fractional year at the end if it exists
+        if partial_year > 0:
+            npv += annual_cost * partial_year * (1 + r) ** (1+r)
+
+        # Add recruitment cost assuming this happens during the partial year or the year after graduation if partial year == 0
+        npv += row['recruitment_cost_per_person_recruited_usd'] * (1+r)
+
+        return npv
+
+    # Calculate NPV for each row using iterrows and store in a new column
+    npv_values = []
+    for index, row in preservice_training_cost.iterrows():
+        npv = calculate_npv_past_training_expenses_by_row(row, r=discount_rate)
+        npv_values.append(npv)
+
+    preservice_training_cost['npv_of_training_and_recruitment_cost'] = npv_values
+    preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] = preservice_training_cost['npv_of_training_and_recruitment_cost'] *\
                                                     (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) *\
-                                                    (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) *\
-                                                    preservice_training_cost['annual_attrition_rate']
+                                                    (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate'])
+    preservice_training_cost['annuitisation_rate']  = 1 + (1 - (1 + discount_rate) ** (-preservice_training_cost['average_length_of_tenure_in_the_public_sector'] + 1)) / discount_rate
+    preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] = preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit']/preservice_training_cost['annuitisation_rate']
+
     # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate
     # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure
-    preservice_training_cost['cost'] = preservice_training_cost['Annual_cost_per_staff_recruited'] * preservice_training_cost['staff_count'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
+    preservice_training_cost['cost'] = preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] * preservice_training_cost['staff_count'] * preservice_training_cost['annual_attrition_rate'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited'
     preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
 
-    # 1.5 In-service training cost to train all staff
+    # 1.3 In-service training cost to train all staff
     #---------------------------------------------------------------------------------------------------------------
-    inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = used_staff_count_by_level_and_officer_type,
+    inservice_training_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_size_chosen_for_costing,
                                                          varnames = ['annual_inservice_training_cost_usd'])
     inservice_training_cost['cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd']
     inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
     # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
 
-    # 1.6 Store all HR costs in one standard format dataframe
+    # 1.4 Regular mentorship and supportive supervision costs
+    #---------------------------------------------------------------------------------------------------------------
+    mentorship_and_supportive_cost = merge_cost_and_model_data(cost_df = hr_cost_parameters, model_df = staff_size_chosen_for_costing,
+                                                         varnames = ['annual_mentorship_and_supervision_cost'])
+    mentorship_and_supportive_cost['cost'] = mentorship_and_supportive_cost['staff_count'] * mentorship_and_supportive_cost['annual_mentorship_and_supervision_cost']
+    mentorship_and_supportive_cost = mentorship_and_supportive_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']]
+    # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate
+
+    # 1.5 Store all HR costs in one standard format dataframe
     #---------------------------------------------------------------------------------------------------------------
     # Function to melt and label the cost category
     def label_rows_of_cost_dataframe(_df, label_var, label):
@@ -277,14 +307,23 @@ def label_rows_of_cost_dataframe(_df, label_var, label):
         return _df
 
     # Initialize HR with the salary data
-    human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff_used_in_scenario, 'cost_subcategory', 'salary_for_used_cadres'), 'OfficerType')
-
-    # Concatenate additional cost categories
-    additional_costs = [
-        (recruitment_cost , 'recruitment_cost_for_attrited_workers'),
-        (preservice_training_cost, 'preservice_training_cost_for_attrited_workers'),
-        (inservice_training_cost, 'inservice_training_cost_for_all_staff')
-    ]
+    if (cost_only_used_staff):
+        human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_cadres_used'), 'OfficerType')
+        # Concatenate additional cost categories
+        additional_costs = [
+            (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'),
+            (inservice_training_cost, 'inservice_training_cost_for_cadres_used'),
+            (mentorship_and_supportive_cost, 'mentorship_and_supportive_cost_for_cadres_used')
+        ]
+    else:
+        human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_all_staff'), 'OfficerType')
+        # Concatenate additional cost categories
+        additional_costs = [
+            (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'),
+            (inservice_training_cost, 'inservice_training_cost_for_all_staff'),
+            (mentorship_and_supportive_cost, 'mentorship_and_supportive_cost_for_all_staff')
+        ]
+
     # Iterate through additional costs, melt and concatenate
     for df, label in additional_costs:
         labelled_df = retain_relevant_column_subset(label_rows_of_cost_dataframe(df, 'cost_subcategory', label), 'OfficerType')
@@ -582,8 +621,9 @@ def update_itemuse_for_level1b_using_level2_data(_df):
 #population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
 
 # Estimate cost of scenario
-input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, draws = [0], summarize = True)
-input_costs[input_costs.year == 2018].groupby('cost_category')['cost'].sum()
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, draws = [0], runs = [0], cost_only_used_staff=True) # summarise = True
+input_costs[(input_costs.year == 2018) & (input_costs.stat == 'mean')].groupby('cost_category')['cost'].sum()
+input_costs[(input_costs.year == 2018) & (input_costs.stat == 'mean')].groupby('cost_subcategory')['cost'].sum()
 
 # Create folders to store results
 costing_outputs_folder = Path('./outputs/costing')
@@ -927,196 +967,6 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 fig.show()
 plt.close(fig)
 
-# 5. Calibration plots
-# Steps: 1. Create a mapping of data labels in model_costing and relevant calibration data, 2. Create a dataframe with model_costs and calibration costs;
-# Prepare data for calibration
-calibration_data = workbook_cost["resource_mapping_r7_summary"]
-# Make sure values are numeric
-budget_columns = ['BUDGETS (USD) (Jul 2019 - Jun 2020)', 'BUDGETS (USD) (Jul 2020 - Jun 2021)',
-       'BUDGETS (USD) (Jul 2021 - Jun 2022)']
-expenditure_columns = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
-calibration_data[budget_columns + expenditure_columns] = calibration_data[budget_columns + expenditure_columns].apply(lambda x: pd.to_numeric(x, errors='coerce'))
-# For calibration to budget figures, we take the maximum value across the three years in the RM to provide an
-# upper limit to calibrate to (expenditure providing the lower limit)
-calibration_data['max_annual_budget_2020-22'] = calibration_data[budget_columns].max(axis=1, skipna = True)
-calibration_data = calibration_data.rename(columns = {'EXPENDITURE (USD) (Jul 2018 - Jun 2019)': 'actual_expenditure_2019',
-                                                      'Calibration_category': 'calibration_category'})
-calibration_data = calibration_data[['calibration_category','actual_expenditure_2019', 'max_annual_budget_2020-22']]
-calibration_data = calibration_data.groupby('calibration_category')[['actual_expenditure_2019', 'max_annual_budget_2020-22']].sum().reset_index()
-calibration_data1 = calibration_data.copy()
-calibration_data1['stat'] = 'lower'
-calibration_data2 = calibration_data.copy()
-calibration_data2['stat'] = 'mean'
-calibration_data3 = calibration_data.copy()
-calibration_data3['stat'] = 'upper'
-calibration_data = pd.concat([calibration_data1, calibration_data2, calibration_data3], axis = 0)
-calibration_data = calibration_data.set_index(['calibration_category', 'stat'])
-
-# Manually create a dataframe of model costs and relevant calibration values
-def get_calibration_relevant_subset(_df):
-    cond_calibration_subset = (_df.year == 2018) & (_df.draw == 0)
-    return _df[cond_calibration_subset]
-def get_calibration_relevant_subset_of_consumables_cost(_df, item):
-    #_df =_df.rename(columns = {('year', ''):'year'})
-    for col in ['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)', 'excess_stock_proportion_of_dispensed','item_code']:
-        try:
-            _df = _df.drop(columns = col)
-        except:
-            pass
-    _df.columns = pd.MultiIndex.from_tuples(_df.columns)
-    _df = _df.melt(id_vars = ['year', 'Item_Code'], var_name=['draw', 'stat'], value_name='value')
-    _df = _df[_df['Item_Code'].isin(item)]
-    _df = _df.groupby(['year', 'draw', 'stat'])['value'].sum()
-    return get_calibration_relevant_subset(_df.reset_index())
-def merged_calibration_relevant_consumables_costs(item, category):
-    merged_df = pd.merge(get_calibration_relevant_subset_of_consumables_cost(cost_of_consumables_dispensed, item),
-                         get_calibration_relevant_subset_of_consumables_cost(cost_of_excess_consumables_stocked, item),
-                         on=['year', 'draw', 'stat'], how='outer', suffixes=('_dispensed', '_excess_stock'))
-    # Fill any missing values in the value columns with 0 (for cases where only one dataframe has a value)
-    # and sum to get total consumable cost
-    merged_df['value'] = merged_df['value_dispensed'].fillna(0) + merged_df['value_excess_stock'].fillna(0)
-    merged_df['calibration_category'] = category
-    return merged_df.set_index(['calibration_category', 'stat'])['value']
-
-def first_positive(series):
-    return next((x for x in series if pd.notna(x) and x > 0), np.nan)
-
-def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibration_category):
-    new_data = get_calibration_relevant_subset(_df[_df['Cost_Sub-category'].isin(_subcategory)]).groupby('stat')['value'].sum()
-    new_data = new_data.reset_index()
-    new_data['calibration_category'] = _calibration_category
-    new_data = new_data.rename(columns =  {'value':'model_cost'})
-    return new_data.set_index(['calibration_category', 'stat'])['model_cost']
-
-# Consumables
-calibration_data['model_cost'] = np.nan
-# Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
-# was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
-art = [2671, 2672, 2673]
-tb_treatment = [176, 177, 179, 178, 181, 2678]
-antimalarials = [162,164,170]
-malaria_rdts = [163]
-hiv_screening = [190,191,196]
-condoms = [2,25]
-tb_tests = [184,187, 175]
-other_drugs = set(cost_of_consumables_dispensed['Item_Code'].unique()) - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
-              - set(condoms) - set(tb_tests)
-
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(art, 'Antiretrovirals') * 80/(0.103*365))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(tb_treatment, 'TB Treatment'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(antimalarials, 'Antimalarials'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(malaria_rdts, 'Malaria RDTs'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(hiv_screening, 'HIV Screening/Diagnostic Tests'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(condoms, 'Condoms and Lubricants'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(tb_tests, 'TB Tests (including RDTs)'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(merged_calibration_relevant_consumables_costs(other_drugs, 'Other Drugs, medical supplies, and commodities'))
-
-# HR
-ratio_of_all_to_used_staff = total_salary_for_all_staff[(0,2018)]/total_salary_for_staff_used_in_scenario[( 0, 'lower')][2018]
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['salary_for_used_cadres'], 'Health Worker Salaries') * ratio_of_all_to_used_staff)
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['preservice_training_cost_for_attrited_workers'], 'Health Worker Training - Pre-Service') * ratio_of_all_to_used_staff)
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['inservice_training_cost_for_all_staff'], 'Health Worker Training - In-Service') * ratio_of_all_to_used_staff)
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['recruitment_cost_for_attrited_workers'], 'Other Human Resources for Health expenses') * ratio_of_all_to_used_staff)
-
-# Equipment
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['replacement_cost_annual_total'], 'Medical Equipment - Purchase'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_other_costs(scenario_cost, ['upfront_repair_cost_annual_total', 'spare_parts_annual_total',
-       'service_fee_annual_total'], 'Medical Equipment - Maintenance'))
-#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
-#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
-
-# Facility operation costs
-#calibration_data[calibration_data['calibration_category'] == 'Facility utility bills - ICT', 'Infrastructure - New Builds'] = get_calibration_relevant_subset()
-
-# Infrastructure
-#calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
-
-# 3. Create calibration plot
-list_of_consumables_costs_for_calibration_only_hiv = ['Antiretrovirals', 'HIV Screening/Diagnostic Tests']
-list_of_consumables_costs_for_calibration_without_hiv =['Antimalarials', 'Condoms and Lubricants','Malaria RDTs', 'TB Tests (including RDTs)', 'TB Treatment', 'Other Drugs, medical supplies, and commodities']
-list_of_hr_costs_for_calibration = [ 'Health Worker Training - In-Service',  'Health Worker Salaries', 'Health Worker Training - Pre-Service', 'Other Human Resources for Health expenses']
-list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
-# Add folder to store calibration plots
-
-calibration_outputs_folder = Path(figurespath / 'calibration')
-if not os.path.exists(calibration_outputs_folder):
-    os.makedirs(calibration_outputs_folder)
-
-def do_cost_calibration_plot(_df, _costs_included):
-    # Filter the dataframe
-    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
-
-    # For df_mean
-    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)/1e6
-    total_mean = pd.DataFrame(df_mean.sum()).T  # Calculate the total and convert it to a DataFrame
-    total_mean.index = ['Total']  # Name the index of the total row as 'Total'
-    df_mean = pd.concat([df_mean, total_mean], axis=0)  # Concatenate the total row
-
-    # For df_lower
-    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)/1e6
-    total_lower = pd.DataFrame(df_lower.sum()).T  # Calculate the total and convert it to a DataFrame
-    total_lower.index = ['Total']  # Name the index of the total row as 'Total'
-    df_lower = pd.concat([df_lower, total_lower], axis=0)  # Concatenate the total row
-
-    # For df_upper
-    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)/1e6
-    total_upper = pd.DataFrame(df_upper.sum()).T  # Calculate the total and convert it to a DataFrame
-    total_upper.index = ['Total']  # Name the index of the total row as 'Total'
-    df_upper = pd.concat([df_upper, total_upper], axis=0)  # Concatenate the total row
-
-    # Create the dot plot
-    plt.figure(figsize=(12, 8))
-
-    # Plot model_cost as dots with confidence interval error bars
-    plt.errorbar(df_mean.index, df_mean['model_cost'],
-                 yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
-                 fmt='o', label='Model Cost', ecolor='gray', capsize=5, color='saddlebrown')
-
-    # Plot annual_expenditure_2019 and max_annual_budget_2020-22 as dots
-    plt.plot(df_mean.index, df_mean['actual_expenditure_2019'], 'bo', label='Actual Expenditure 2019', markersize=8)
-    plt.plot(df_mean.index, df_mean['max_annual_budget_2020-22'], 'go', label='Max Annual Budget 2020-22', markersize=8)
-
-    # Draw a blue line between annual_expenditure_2019 and max_annual_budget_2020-22
-    plt.vlines(df_mean.index, df_mean['actual_expenditure_2019'], df_mean['max_annual_budget_2020-22'], color='blue',
-               label='Budget Range')
-
-    # Add labels to the model_cost dots (yellow color, slightly shifted right)
-    for i, (x, y) in enumerate(zip(df_mean.index, df_mean['model_cost'])):
-        plt.text(i + 0.05, y, f'{y:.2f}', ha='left', va='bottom', fontsize=9,
-                 color='saddlebrown')  # label model_cost values
-
-    # Add labels and title
-    cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
-    cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
-    plt.xlabel('Cost Sub-Category')
-    plt.ylabel('Costs (USD), millions')
-    plt.title(f'Model Cost vs Annual Expenditure 2019 and Max(Annual Budget 2020-22)\n {cost_subcategory}')
-
-    # Rotate x-axis labels for readability
-    plt.xticks(rotation=45, ha='right')
-
-    # Adding a legend
-    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=10)
-
-    # Tight layout and save the figure
-    plt.tight_layout()
-    plt.savefig(calibration_outputs_folder / f'calibration_dot_plot_{cost_subcategory}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-
-# Call the function for each variable and cost list
-all_calibration_costs = list_of_consumables_costs_for_calibration_only_hiv + list_of_consumables_costs_for_calibration_without_hiv + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration
-all_consumable_costs = list_of_consumables_costs_for_calibration_without_hiv + list_of_consumables_costs_for_calibration_only_hiv
-
-do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_without_hiv)
-do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_only_hiv)
-do_cost_calibration_plot(calibration_data,all_consumable_costs)
-do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
-do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
-do_cost_calibration_plot(calibration_data,all_calibration_costs)
-calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
-
 # TODO all these HR plots need to be looked at
 # 1. HR
 # Stacked bar chart of salaries by cadre
@@ -1573,52 +1423,4 @@ def plot_most_expensive_equipment(_df, top_x_values = 10, figname_prefix = "Equi
 # TODO Multiply number of facilities by level with the quantity needed of each equipment and collapse to get total number of equipment (nationally)
 # TODO Which equipment needs to be newly purchased (currently no assumption made for equipment with cost > $250,000)
 
-# Calibration scatter plots
-def do_cost_calibration_plot(_df, _costs_included, _calibration_var):
-    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
-    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)
-    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)
-    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)
-
-    # Create the scatter plot
-    plt.figure(figsize=(10, 6))
-
-    # Plot each point with error bars (for confidence interval)
-    plt.errorbar(df_mean[_calibration_var],
-                 df_mean['model_cost'],
-                 yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
-                 fmt='o',
-                 ecolor='gray',
-                 capsize=5,
-                 label='Calibration Category')
-
-    # Adding the 45-degree line (where y = x)
-    min_val = min(df_mean[_calibration_var].min(), df_mean['model_cost'].min())
-    max_val = max(df_mean[_calibration_var].max(), df_mean['model_cost'].max())
-    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')  # Red dashed line
-
-    # Add labels for each calibration_category
-    for i, label in enumerate(df_mean.index):
-        plt.annotate(label, (df_mean[_calibration_var].iloc[i], df_mean['model_cost'].iloc[i]))
-
-    # Add labels and title
-    plt.xlabel('Actual Expenditure 2019')
-    plt.ylabel('Model Cost (with confidence interval)')
-    plt.title(f'Model Cost vs {_calibration_var}')
-
-    # Show the plot
-    plt.tight_layout()
-    cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
-    cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
-    plt.savefig(calibration_outputs_folder / f'calibration_{_calibration_var}_{cost_subcategory}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-for var in ['actual_expenditure_2019', 'max_annual_budget_2020-22']:
-    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_only_hiv, var)
-    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_without_hiv, var)
-    do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration, var)
-    do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration, var)
-
-
 '''

From 24b944dc84235d73a10793947259913b2d3bddec Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 28 Oct 2024 19:04:44 +0000
Subject: [PATCH 131/230] rename script

---
 src/scripts/costing/{costing.py => cost_estimation.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/scripts/costing/{costing.py => cost_estimation.py} (100%)

diff --git a/src/scripts/costing/costing.py b/src/scripts/costing/cost_estimation.py
similarity index 100%
rename from src/scripts/costing/costing.py
rename to src/scripts/costing/cost_estimation.py

From 0af0c7d94295469778676be7aecc370bc234b816 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 30 Oct 2024 15:15:11 +0000
Subject: [PATCH 132/230] move cost validation script out from main
 cost_estimation script

---
 src/scripts/costing/costing_validation.py | 351 ++++++++++++++++++++++
 1 file changed, 351 insertions(+)
 create mode 100644 src/scripts/costing/costing_validation.py

diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
new file mode 100644
index 0000000000..5db667a4ad
--- /dev/null
+++ b/src/scripts/costing/costing_validation.py
@@ -0,0 +1,351 @@
+import argparse
+from pathlib import Path
+from tlo import Date
+from collections import Counter, defaultdict
+
+import calendar
+import datetime
+import os
+import textwrap
+
+import matplotlib.pyplot as plt
+from matplotlib.ticker import FuncFormatter
+import numpy as np
+import pandas as pd
+import ast
+import math
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    make_age_grp_lookup,
+    make_age_grp_types,
+    summarize,
+    create_pickles_locally,
+    parse_log_file,
+    unflatten_flattened_multi_index_in_logging
+)
+from scripts.costing.cost_estimation import estimate_input_cost_of_scenarios
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Establish common paths
+resourcefilepath = Path("./resources")
+
+# Steps: 1. Create a mapping of data labels in model_costing and relevant calibration data, 2. Create a dataframe with model_costs and calibration costs;
+# Load costing resourcefile
+workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
+                              sheet_name=None)
+# Prepare data for calibration
+calibration_data = workbook_cost["resource_mapping_r7_summary"]
+# Make sure values are numeric
+budget_columns = ['BUDGETS (USD) (Jul 2019 - Jun 2020)', 'BUDGETS (USD) (Jul 2020 - Jun 2021)',
+       'BUDGETS (USD) (Jul 2021 - Jun 2022)']
+expenditure_columns = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
+calibration_data[budget_columns + expenditure_columns] = calibration_data[budget_columns + expenditure_columns].apply(lambda x: pd.to_numeric(x, errors='coerce'))
+# For calibration to budget figures, we take the maximum value across the three years in the RM to provide an
+# the maximum of the budget between 2020 and 2022 provides the upper limit to calibrate to (expenditure providing the lower limit)
+calibration_data['max_annual_budget_2020-22'] = calibration_data[budget_columns].max(axis=1, skipna = True)
+calibration_data = calibration_data.rename(columns = {'EXPENDITURE (USD) (Jul 2018 - Jun 2019)': 'actual_expenditure_2019',
+                                                      'Calibration_category': 'calibration_category'})
+calibration_data = calibration_data[['calibration_category','actual_expenditure_2019', 'max_annual_budget_2020-22']]
+calibration_data = calibration_data.groupby('calibration_category')[['actual_expenditure_2019', 'max_annual_budget_2020-22']].sum().reset_index()
+# Repeat this dataframe three times to map to the lower, upper and mean stats in the cost data
+calibration_data1 = calibration_data.copy()
+calibration_data1['stat'] = 'lower'
+calibration_data2 = calibration_data.copy()
+calibration_data2['stat'] = 'mean'
+calibration_data3 = calibration_data.copy()
+calibration_data3['stat'] = 'upper'
+calibration_data = pd.concat([calibration_data1, calibration_data2, calibration_data3], axis = 0)
+calibration_data = calibration_data.set_index(['calibration_category', 'stat'])
+
+# %%
+# Estimate cost for validation
+#-----------------------------
+# Load result files
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0]
+
+# Estimate costs for 2018
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, draws = [0], summarize = True, cost_only_used_staff=False)
+input_costs = input_costs[input_costs.year == 2018]
+
+# Manually create a dataframe of model costs and relevant calibration values
+def assign_item_codes_to_consumables(_df):
+    path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables"
+    # Retain only consumable costs
+    _df = _df[_df['cost_category'] == 'medical consumables']
+
+    # Create dictionary mapping item_codes to consumables names
+    consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
+                                 encoding="ISO-8859-1")[['item_code', 'consumable_name_tlo']]
+    consumables_dict = consumables_dict.rename(columns = {'item_code': 'Item_Code'})
+    consumables_dict = dict(zip(consumables_dict['consumable_name_tlo'], consumables_dict['Item_Code']))
+
+    # Replace consumable_name_tlo with item_code
+    _df = _df.copy()
+    _df['cost_subgroup'] = _df['cost_subgroup'].map(consumables_dict)
+
+    return _df
+
+def get_calibration_relevant_subset_of_costs(_df, _col, _col_value, _calibration_category):
+    if (len(_col_value) == 1):
+        _df = _df[_df[_col] == _col_value[0]]
+    else:
+        _df = _df[_df[_col].isin(_col_value)]
+    _df['calibration_category'] = _calibration_category
+    return _df.groupby(['calibration_category' ,'stat'])['cost'].sum()
+
+'''
+def get_calibration_relevant_subset_of_consumables_cost(_df, item):
+    for col in ['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)', 'excess_stock_proportion_of_dispensed','item_code']:
+        try:
+            _df = _df.drop(columns = col)
+        except:
+            pass
+    _df.columns = pd.MultiIndex.from_tuples(_df.columns)
+    _df = _df.melt(id_vars = ['year', 'Item_Code'], var_name=['draw', 'stat'], value_name='value')
+    _df = _df[_df['Item_Code'].isin(item)]
+    _df = _df.groupby(['year', 'draw', 'stat'])['value'].sum()
+    return _df.reset_index()
+def merged_calibration_relevant_consumables_costs(item, category):
+    merged_df = pd.merge(get_calibration_relevant_subset_of_consumables_cost(cost_of_consumables_dispensed, item),
+                         get_calibration_relevant_subset_of_consumables_cost(cost_of_excess_consumables_stocked, item),
+                         on=['year', 'draw', 'stat'], how='outer', suffixes=('_dispensed', '_excess_stock'))
+    # Fill any missing values in the value columns with 0 (for cases where only one dataframe has a value)
+    # and sum to get total consumable cost
+    merged_df['value'] = merged_df['value_dispensed'].fillna(0) + merged_df['value_excess_stock'].fillna(0)
+    merged_df['calibration_category'] = category
+    return merged_df.set_index(['calibration_category', 'stat'])['value']
+
+def first_positive(series):
+    return next((x for x in series if pd.notna(x) and x > 0), np.nan)
+
+def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibration_category):
+    new_data = get_calibration_relevant_subset(_df[_df['Cost_Sub-category'].isin(_subcategory)]).groupby('stat')['value'].sum()
+    new_data = new_data.reset_index()
+    new_data['calibration_category'] = _calibration_category
+    new_data = new_data.rename(columns =  {'value':'model_cost'})
+    return new_data.set_index(['calibration_category', 'stat'])['model_cost']
+'''
+
+# Consumables
+#-----------------------------------------------------------------------------------------------------------------------
+calibration_data['model_cost'] = np.nan
+consumables_costs_by_item_code = assign_item_codes_to_consumables(input_costs)
+art = [2671, 2672, 2673]
+tb_treatment = [176, 177, 179, 178, 181, 2678]
+antimalarials = [162,164,170]
+malaria_rdts = [163]
+hiv_screening = [190,191,196]
+condoms = [2,25]
+tb_tests = [184,187, 175]
+other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
+              - set(condoms) - set(tb_tests) - {3}
+# TODO once the quantity dispensed of Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly is fixed we no lnger have to adjust for Item_code 3
+
+# Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
+# was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = art, _calibration_category = 'Antiretrovirals')*  80/(0.103*365))
+# Other consumables costs do not need to be adjusted
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_treatment, _calibration_category = 'TB Treatment'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = antimalarials, _calibration_category = 'Antimalarials'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = malaria_rdts, _calibration_category = 'Malaria RDTs'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [191, 196], _calibration_category = 'HIV Screening/Diagnostic Tests') +
+                                                                       get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [190], _calibration_category = 'HIV Screening/Diagnostic Tests')/4)
+# TODO update above when VL test quantity is adjusted in the module - currently 4 tests per year are assumed
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = condoms, _calibration_category = 'Condoms and Lubricants'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_tests, _calibration_category = 'TB Tests (including RDTs)'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_drugs, _calibration_category = 'Other Drugs, medical supplies, and commodities') +
+                                                                       get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [3], _calibration_category = 'Other Drugs, medical supplies, and commodities')/7)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subcategory', _col_value = ['supply_chain'], _calibration_category = 'Supply Chain'))
+
+
+# HR
+#-----------------------------------------------------------------------------------------------------------------------
+hr_costs = input_costs[input_costs['cost_category'] == 'human resources for health']
+#ratio_of_all_to_used_staff = total_salary_for_all_staff[(0,2018)]/total_salary_for_staff_used_in_scenario[( 0, 'lower')][2018]
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['salary_for_all_staff'], _calibration_category = 'Health Worker Salaries'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['preservice_training_and_recruitment_cost_for_attrited_workers'], _calibration_category = 'Health Worker Training - Pre-Service')) # TODO remove recruitment costs?
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['inservice_training_cost_for_all_staff'], _calibration_category = 'Health Worker Training - In-Service'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['mentorship_and_supportive_cost_for_all_staff'], _calibration_category = 'Mentorships & Supportive Supervision'))
+
+# Equipment
+equipment_costs = input_costs[input_costs['cost_category'] == 'medical equipment']
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = equipment_costs, _col = 'cost_subcategory', _col_value = ['replacement_cost_annual_total'], _calibration_category = 'Medical Equipment - Purchase'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = equipment_costs, _col = 'cost_subcategory',
+                                                                                                                _col_value = ['service_fee_annual_total', 'spare_parts_annual_total','major_corrective_maintenance_cost_annual_total'],
+                                                                                                                _calibration_category = 'Medical Equipment - Maintenance'))
+#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
+#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset()
+
+# Facility operation costs
+#-----------------------------------------------------------------------------------------------------------------------
+#calibration_data[calibration_data['calibration_category'] == 'Facility utility bills - ICT', 'Infrastructure - New Builds'] = get_calibration_relevant_subset()
+
+# Infrastructure
+#-----------------------------------------------------------------------------------------------------------------------
+#calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
+
+# %%
+# 3. Create calibration plot
+list_of_consumables_costs_for_calibration_only_hiv = ['HIV Screening/Diagnostic Tests', 'Antiretrovirals']
+list_of_consumables_costs_for_calibration_without_hiv =['Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Condoms and Lubricants', 'Other Drugs, medical supplies, and commodities']
+list_of_hr_costs_for_calibration = ['Health Worker Salaries', 'Health Worker Training - In-Service', 'Health Worker Training - Pre-Service', 'Mentorships & Supportive Supervision']
+list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
+
+# Create folders to store results
+costing_outputs_folder = Path('./outputs/costing')
+if not os.path.exists(costing_outputs_folder):
+    os.makedirs(costing_outputs_folder)
+figurespath = costing_outputs_folder / "figures"
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+calibration_outputs_folder = Path(figurespath / 'calibration')
+if not os.path.exists(calibration_outputs_folder):
+    os.makedirs(calibration_outputs_folder)
+
+def do_cost_calibration_plot(_df, _costs_included):
+    # Filter the dataframe
+    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
+
+    # Reorder the first level of the index based on _costs_included while keeping the second level intact
+    _df.index = pd.MultiIndex.from_arrays([
+        pd.CategoricalIndex(_df.index.get_level_values(0), categories=_costs_included, ordered=True),
+        _df.index.get_level_values(1)
+    ])
+    _df = _df.sort_index()  # Apply the custom order by sorting the DataFrame
+
+    # For df_mean
+    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)/1e6
+    total_mean = pd.DataFrame(df_mean.sum()).T  # Calculate the total and convert it to a DataFrame
+    total_mean.index = ['Total']  # Name the index of the total row as 'Total'
+    df_mean = pd.concat([df_mean, total_mean], axis=0)  # Concatenate the total row
+
+    # For df_lower
+    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)/1e6
+    total_lower = pd.DataFrame(df_lower.sum()).T  # Calculate the total and convert it to a DataFrame
+    total_lower.index = ['Total']  # Name the index of the total row as 'Total'
+    df_lower = pd.concat([df_lower, total_lower], axis=0)  # Concatenate the total row
+
+    # For df_upper
+    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)/1e6
+    total_upper = pd.DataFrame(df_upper.sum()).T  # Calculate the total and convert it to a DataFrame
+    total_upper.index = ['Total']  # Name the index of the total row as 'Total'
+    df_upper = pd.concat([df_upper, total_upper], axis=0)  # Concatenate the total row
+
+    # Create the dot plot
+    plt.figure(figsize=(12, 8))
+
+    # Plot model_cost as dots with confidence interval error bars
+    yerr_lower = (df_mean['model_cost'] - df_lower['model_cost']).clip(lower = 0)
+    yerr_upper = (df_upper['model_cost'] - df_mean['model_cost']).clip(lower = 0)
+    plt.errorbar(df_mean.index, df_mean['model_cost'],
+                 yerr=[yerr_lower, yerr_upper],
+                 fmt='o', label='Model Cost', ecolor='gray', capsize=5, color='saddlebrown')
+
+    # Plot annual_expenditure_2019 and max_annual_budget_2020-22 as dots
+    plt.plot(df_mean.index, df_mean['actual_expenditure_2019'], 'bo', label='Actual Expenditure 2019', markersize=8)
+    plt.plot(df_mean.index, df_mean['max_annual_budget_2020-22'], 'go', label='Max Annual Budget 2020-22', markersize=8)
+
+    # Draw a blue line between annual_expenditure_2019 and max_annual_budget_2020-22
+    plt.vlines(df_mean.index, df_mean['actual_expenditure_2019'], df_mean['max_annual_budget_2020-22'], color='blue',
+               label='Expenditure-Budget Range')
+
+    # Add labels to the model_cost dots (yellow color, slightly shifted right)
+    for i, (x, y) in enumerate(zip(df_mean.index, df_mean['model_cost'])):
+        plt.text(i + 0.05, y, f'{y:.2f}', ha='left', va='bottom', fontsize=9,
+                 color='saddlebrown')  # label model_cost values
+
+    # Add labels and title
+    cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
+    cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
+    plt.xlabel('Cost Sub-Category')
+    plt.ylabel('Costs (USD), millions')
+    plt.title(f'Model Cost vs Annual Expenditure 2019 and Max(Annual Budget 2020-22)\n {cost_subcategory}')
+
+    # Rotate x-axis labels for readability
+    plt.xticks(rotation=45, ha='right')
+
+    # Adding a legend
+    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=10)
+
+    # Tight layout and save the figure
+    plt.tight_layout()
+    plt.savefig(calibration_outputs_folder / f'calibration_dot_plot_{cost_subcategory}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+
+# Call the function for each variable and cost list
+all_consumable_costs = list_of_consumables_costs_for_calibration_only_hiv + list_of_consumables_costs_for_calibration_without_hiv + ['Supply Chain']
+all_calibration_costs = all_consumable_costs + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration
+
+do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_without_hiv)
+do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_only_hiv)
+do_cost_calibration_plot(calibration_data,all_consumable_costs)
+do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
+do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
+do_cost_calibration_plot(calibration_data,all_calibration_costs)
+calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
+
+'''
+
+# Calibration scatter plots
+def do_cost_calibration_plot(_df, _costs_included, _calibration_var):
+    _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
+    df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)
+    df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)
+    df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)
+
+    # Create the scatter plot
+    plt.figure(figsize=(10, 6))
+
+    # Plot each point with error bars (for confidence interval)
+    plt.errorbar(df_mean[_calibration_var],
+                 df_mean['model_cost'],
+                 yerr=[df_mean['model_cost'] - df_lower['model_cost'], df_upper['model_cost'] - df_mean['model_cost']],
+                 fmt='o',
+                 ecolor='gray',
+                 capsize=5,
+                 label='Calibration Category')
+
+    # Adding the 45-degree line (where y = x)
+    min_val = min(df_mean[_calibration_var].min(), df_mean['model_cost'].min())
+    max_val = max(df_mean[_calibration_var].max(), df_mean['model_cost'].max())
+    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='45-degree line')  # Red dashed line
+
+    # Add labels for each calibration_category
+    for i, label in enumerate(df_mean.index):
+        plt.annotate(label, (df_mean[_calibration_var].iloc[i], df_mean['model_cost'].iloc[i]))
+
+    # Add labels and title
+    plt.xlabel('Actual Expenditure 2019')
+    plt.ylabel('Model Cost (with confidence interval)')
+    plt.title(f'Model Cost vs {_calibration_var}')
+
+    # Show the plot
+    plt.tight_layout()
+    cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0]
+    cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '')
+    plt.savefig(calibration_outputs_folder / f'calibration_{_calibration_var}_{cost_subcategory}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+for var in ['actual_expenditure_2019', 'max_annual_budget_2020-22']:
+    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_only_hiv, var)
+    do_cost_calibration_plot(calibration_data, list_of_consumables_costs_for_calibration_without_hiv, var)
+    do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration, var)
+    do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration, var)
+
+
+'''

From da230713c9b504a51dc1d858875bdf3ae5e45ddf Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 30 Oct 2024 15:15:22 +0000
Subject: [PATCH 133/230] add supply chain costs

---
 src/scripts/costing/cost_estimation.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index a84a0dbe59..774cc73e85 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -459,6 +459,31 @@ def melt_and_label_consumables_cost(_df, label):
     cost_of_consumables_dispensed = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable')
     cost_of_excess_consumables_stocked = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable')
     consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked])
+
+    # 2.4 Supply chain costs
+    #---------------------------------------------------------------------------------------------------------------
+    # Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs
+    # The fixed proportion is based on Resource Mapping Expenditure data from 2018
+    resource_mapping_data = workbook_cost["resource_mapping_r7_summary"]
+    # Make sure values are numeric
+    expenditure_column = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
+    resource_mapping_data[expenditure_column] = resource_mapping_data[expenditure_column].apply(lambda x: pd.to_numeric(x, errors='coerce'))
+    supply_chain_expenditure = resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0]
+    consumables_purchase_expenditure = resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0]
+    supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure
+
+    # Estimate supply chain costs based on the total consumable purchase cost calculated above
+    supply_chain_costs = (consumable_costs.groupby(['draw', 'run', 'year'])[
+                              'cost'].sum() * supply_chain_cost_proportion).reset_index()
+    # Assign relevant additional columns to match the format of the rest of consumables costs
+    supply_chain_costs['Facility_Level'] = 'all'
+    supply_chain_costs['consumable'] = 'NA'
+    supply_chain_costs['cost_subcategory'] = 'supply_chain'
+    assert set(supply_chain_costs.columns) == set(consumable_costs.columns)
+
+    # Append supply chain costs to the full consumable cost dataframe
+    consumable_costs = pd.concat([consumable_costs, supply_chain_costs])
+
     consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
 
     # Only preserve the draws and runs requested

From 46d4f0446a5db389d8deda20e9131decf79158a9 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 30 Oct 2024 19:17:44 +0000
Subject: [PATCH 134/230] move scenario specific costing to a new script -
 update the calculation of malaria scale-up costs

---
 .../costing/cost_analysis_roi_of_hss.py       | 237 ++++++++++++++++++
 src/scripts/costing/cost_estimation.py        | 168 +------------
 2 files changed, 238 insertions(+), 167 deletions(-)
 create mode 100644 src/scripts/costing/cost_analysis_roi_of_hss.py

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
new file mode 100644
index 0000000000..bff0a667a0
--- /dev/null
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -0,0 +1,237 @@
+import argparse
+from pathlib import Path
+from tlo import Date
+from collections import Counter, defaultdict
+
+import calendar
+import datetime
+import os
+import textwrap
+
+import matplotlib.pyplot as plt
+from matplotlib.ticker import FuncFormatter
+import numpy as np
+import pandas as pd
+import ast
+import math
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    make_age_grp_lookup,
+    make_age_grp_types,
+    summarize,
+    create_pickles_locally,
+    parse_log_file,
+    unflatten_flattened_multi_index_in_logging
+)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Load result files
+#-------------------
+#results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
+#results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
+#results_folder = get_scenario_outputs('hss_elements-2024-10-12T111649Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
+
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
+params = extract_params(results_folder)
+population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
+TARGET_PERIOD_INTERVENTION = (Date(2020, 1, 1), Date(2030, 12, 31))
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+
+# Load the list of districts and their IDs
+district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[
+    ['District_Num', 'District']].drop_duplicates()
+district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
+
+# Estimate standard input costs of scenario
+#-----------------------------------------------------------------------------------------------------------------------
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , cost_only_used_staff=True) # summarise = True
+
+# Create folders to store results
+costing_outputs_folder = Path('./outputs/costing')
+if not os.path.exists(costing_outputs_folder):
+    os.makedirs(costing_outputs_folder)
+figurespath = costing_outputs_folder / "global_fund_roi_analysis"
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+
+# Add additional costs pertaining to simulation
+#-----------------------------------------------------------------------------------------------------------------------
+# In this case malaria intervention scale-up costs were not included in the standard estimate_input_cost_of_scenarios function
+list_of_draws_with_malaria_scaleup_parameters = params[(params.module_param == 'Malaria:scaleup_start_year')]
+list_of_draws_with_malaria_scaleup_parameters.loc[:,'value'] = pd.to_numeric(list_of_draws_with_malaria_scaleup_parameters['value'])
+list_of_draws_with_malaria_scaleup_implemented_in_costing_period = list_of_draws_with_malaria_scaleup_parameters[(list_of_draws_with_malaria_scaleup_parameters['value'] < max(relevant_period_for_costing))].index.to_list()
+
+# 1. IRS costs
+irs_coverage_rate = 0.8
+districts_with_irs_scaleup = ['Kasungu', 'Mchinji', 'Lilongwe', 'Lilongwe City', 'Dowa', 'Ntchisi', 'Salima', 'Mangochi',
+                              'Mwanza', 'Likoma', 'Nkhotakota']
+# Convert above list of district names to numeric district identifiers
+district_keys_with_irs_scaleup = [key for key, name in district_dict.items() if name in districts_with_irs_scaleup]
+#proportion_of_district_with_irs_coverage = len(districts_with_irs_scaleup)/mfl.District.nunique()
+TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2030, 12, 31))
+
+# Get population by district
+def get_total_population_by_district(_df):
+    years_needed = [i.year for i in TARGET_PERIOD_MALARIA_SCALEUP]
+    _df['year'] = pd.to_datetime(_df['date']).dt.year
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    _df = pd.melt(_df.drop(columns = 'date'), id_vars = ['year']).rename(columns = {'variable': 'district'})
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .set_index(['year', 'district'])['value']
+    )
+
+district_population_by_year = extract_results(
+    results_folder,
+    module='tlo.methods.malaria',
+    key='pop_district',
+    custom_generate_series=get_total_population_by_district,
+    do_scaling=True
+)
+
+def get_number_of_people_covered_by_malaria_scaleup(_df, list_of_districts_covered = None, draws_included = None):
+    _df = pd.DataFrame(_df)
+    # Reset the index to make 'district' a column
+    _df = _df.reset_index()
+    # Convert the 'district' column to numeric values
+    _df['district'] = pd.to_numeric(_df['district'], errors='coerce')
+    _df = _df.set_index(['year', 'district'])
+    if list_of_districts_covered is not None:
+        _df.loc[~_df.index.get_level_values('district').isin(list_of_districts_covered), :] = 0
+    if draws_included is not None:
+        _df.loc[:, ~_df.columns.get_level_values('draw').isin(draws_included)] = 0
+    return _df
+
+district_population_covered_by_irs_scaleup_by_year = get_number_of_people_covered_by_malaria_scaleup(district_population_by_year,
+                                                                                                 list_of_districts_covered=district_keys_with_irs_scaleup,
+                                                                                                 draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period)
+
+#years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
+#years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
+#years_with_no_malaria_scaleup =  [i.year for i in years_with_no_malaria_scaleup]
+irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)']
+irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate
+total_irs_cost = irs_multiplication_factor.iloc[0] * district_population_covered_by_irs_scaleup_by_year # for districts and scenarios included
+total_irs_cost = total_irs_cost.groupby(level='year').sum()
+# TODO melt irs_cost
+
+# 2. Bednet costs
+bednet_coverage_rate = 0.7
+# We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people.
+unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)']
+annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3
+bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person
+
+district_population_covered_by_bednet_scaleup_by_year = get_number_of_people_covered_by_malaria_scaleup(district_population_by_year,
+                                                                                                 draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period) # All districts covered
+
+total_bednet_cost = bednet_multiplication_factor.iloc[0] * district_population_covered_by_bednet_scaleup_by_year  # for scenarios included
+total_bednet_cost = total_bednet_cost.groupby(level='year').sum()
+
+# Malaria scale-up costs - TOTAL
+malaria_scaleup_costs = [
+    (total_irs_cost.reset_index(), 'cost_of_IRS_scaleup'),
+    (total_bednet_cost.reset_index(), 'cost_of_bednet_scaleup'),
+]
+def melt_and_label_malaria_scaleup_cost(_df, label):
+    multi_index = pd.MultiIndex.from_tuples(_df.columns)
+    _df.columns = multi_index
+
+    # reshape dataframe and assign 'draw' and 'run' as the correct column headers
+    melted_df = pd.melt(_df, id_vars=['year']).rename(columns={'variable_0': 'draw', 'variable_1': 'run'})
+    # Replace item_code with consumable_name_tlo
+    melted_df['cost_subcategory'] = label
+    melted_df['cost_category'] = 'malaria scale-up'
+    melted_df['cost_subgroup'] = 'NA'
+    melted_df['Facility_Level'] = 'all'
+    melted_df = melted_df.rename(columns={'value': 'cost'})
+    return melted_df
+
+# Iterate through additional costs, melt and concatenate
+for df, label in malaria_scaleup_costs:
+    new_df = melt_and_label_malaria_scaleup_cost(df, label)
+    input_costs = pd.concat([input_costs, new_df], ignore_index=True)
+
+
+# Calculate incremental cost
+#-----------------------------------------------------------------------------------------------------------------------
+
+def find_difference_relative_to_comparison(_ser: pd.Series,
+                                           comparison: str,
+                                           scaled: bool = False,
+                                           drop_comparison: bool = True,
+                                           ):
+    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+    within the runs (level 1), relative to where draw = `comparison`.
+    The comparison is `X - COMPARISON`."""
+    return _ser \
+        .unstack(level=0) \
+        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+        .drop(columns=([comparison] if drop_comparison else [])) \
+        .stack()
+
+# TODO the following calculation should first capture the different by run and then be summarised
+incremental_scenario_cost = (pd.DataFrame(
+            find_difference_relative_to_comparison(
+                total_scenario_cost_wide.loc[0],
+                comparison= 0) # sets the comparator to 0 which is the Actual scenario
+        ).T.iloc[0].unstack()).T
+
+# %%
+# Monetary value of health impact
+def get_num_dalys(_df):
+    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+    results from runs that crashed mid-way through the simulation.
+    """
+    years_needed = [i.year for i in TARGET_PERIOD_INTERVENTION]
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .drop(columns=['date', 'sex', 'age_range', 'year'])
+        .sum().sum()
+    )
+
+num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    )
+
+num_dalys_summarized = summarize(num_dalys).loc[0].unstack()
+#num_dalys_summarized['scenario'] = scenarios.to_list() # add when scenarios have names
+#num_dalys_summarized = num_dalys_summarized.set_index('scenario')
+
+# Get absolute DALYs averted
+num_dalys_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison(
+                num_dalys.loc[0],
+                comparison= 0) # sets the comparator to 0 which is the Actual scenario
+        ).T
+    ).iloc[0].unstack()
+#num_dalys_averted['scenario'] = scenarios.to_list()[1:12]
+#num_dalys_averted = num_dalys_averted.set_index('scenario')
+
+chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+monetary_value_of_incremental_health = num_dalys_averted * chosen_cet
+max_ability_to_pay_for_implementation = monetary_value_of_incremental_health - incremental_scenario_cost # monetary value - change in costs
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 774cc73e85..6fcc897e85 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -78,7 +78,7 @@ def melt_model_output_draws_and_runs(_df, id_vars):
     fac_levels = set(mfl.Facility_Level)
 
     # Overall cost assumptions
-    TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
+    TARGET_PERIOD = (Date(first_year_of_simulation, 1, 1), Date(final_year_of_simulation, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
     discount_rate = 0.03
 
     # Read all cost parameters
@@ -631,172 +631,6 @@ def update_itemuse_for_level1b_using_level2_data(_df):
 
     return scenario_cost
 
-# Load result files
-#-------------------
-#results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
-#results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
-resourcefilepath = Path("./resources")
-outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
-#results_folder = get_scenario_outputs('hss_elements-2024-10-12T111649Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
-
-# Check can read results from draw=0, run=0
-#log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
-#params = extract_params(results_folder)
-#population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
-
-# Estimate cost of scenario
-input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, draws = [0], runs = [0], cost_only_used_staff=True) # summarise = True
-input_costs[(input_costs.year == 2018) & (input_costs.stat == 'mean')].groupby('cost_category')['cost'].sum()
-input_costs[(input_costs.year == 2018) & (input_costs.stat == 'mean')].groupby('cost_subcategory')['cost'].sum()
-
-# Create folders to store results
-costing_outputs_folder = Path('./outputs/costing')
-if not os.path.exists(costing_outputs_folder):
-    os.makedirs(costing_outputs_folder)
-figurespath = costing_outputs_folder / "figures"
-if not os.path.exists(figurespath):
-    os.makedirs(figurespath)
-
-# Additional costs pertaining to simulation
-# IRS costs
-irs_coverage_rate = 0.8
-districts_with_irs_scaleup = ['Kasungu', 'Mchinji', 'Lilongwe', 'Lilongwe City', 'Dowa', 'Ntchisi', 'Salima', 'Mangochi',
-                              'Mwanza', 'Likoma', 'Nkhotakota']
-proportion_of_district_with_irs_coverage = len(districts_with_irs_scaleup)/mfl.District.nunique()
-# Get total population (11/total number o districts in mfl)
-#TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2030, 12, 31))
-def get_total_population(_df):
-    years_needed = [i.year for i in TARGET_PERIOD]
-    _df['year'] = pd.to_datetime(_df['date']).dt.year
-    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
-    return pd.Series(
-        data=_df
-        .loc[_df.year.between(*years_needed)]
-        .drop(columns=['male', 'female', 'date']).set_index('year').sum(axis = 1)
-    )
-
-total_population_by_year = summarize(extract_results(
-    results_folder,
-    module='tlo.methods.demography',
-    key='population',
-    custom_generate_series=get_total_population,
-    do_scaling=True
-))
-
-#years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
-#years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
-#years_with_no_malaria_scaleup =  [i.year for i in years_with_no_malaria_scaleup]
-
-irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)']
-irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate * proportion_of_district_with_irs_coverage
-total_irs_cost = irs_multiplication_factor.iloc[0] * total_population_by_year # for districts and scenarios included
-# TODO scenarios_with_irs_scaleup
-# TODO population_in_district from malria logger
-
-# Bednet costs
-bednet_coverage_rate = 0.7
-# All districts covered
-# We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people.
-unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)']
-annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3
-bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person
-total_bednet_cost = bednet_multiplication_factor.iloc[0] * total_population_by_year  # for scenarios included
-
-years_with_no_malaria_scaleup = list(range(first_year_of_simulation, 2024))
-def set_cost_during_years_before_malaria_scaleup_to_zero(_df):
-    for col in _df.columns:
-        for y in years_with_no_malaria_scaleup:
-            _df.loc[_df.index.get_level_values(0) == y, col] = 0
-    return _df
-
-total_bednet_cost = set_cost_during_years_before_malaria_scaleup_to_zero(total_bednet_cost)
-total_irs_cost = set_cost_during_years_before_malaria_scaleup_to_zero(total_irs_cost)
-# TODO Scale-up programmes are implemented from 01/01/2024
-
-# Malaria scale-up costs - TOTAL
-malaria_scaleup_costs = [
-    (total_irs_cost.reset_index(), 'cost_of_IRS_scaleup'),
-    (total_bednet_cost.reset_index(), 'cost_of_bednet_scaleup'),
-]
-# Iterate through additional costs, melt and concatenate
-for df, label in malaria_scaleup_costs:
-    new_df = melt_and_label_consumables_cost(df, label)
-    scenario_cost = pd.concat([scenario_cost, new_df], ignore_index=True)
-scenario_cost.loc[scenario_cost['Cost_Category'].isna(),'Cost_Category'] = 'IRS and Bednet Scale-up Costs'
-
-# Calculate total cost
-total_scenario_cost = scenario_cost[(scenario_cost.year >= 2020) & (scenario_cost.year <= 2030)].groupby(['draw', 'stat'])['value'].sum().unstack()
-total_scenario_cost = total_scenario_cost.unstack().reset_index()
-total_scenario_cost_wide = total_scenario_cost.pivot_table(index=None, columns=['draw', 'stat'], values=0)
-
-# Calculate incremental cost
-def find_difference_relative_to_comparison(_ser: pd.Series,
-                                           comparison: str,
-                                           scaled: bool = False,
-                                           drop_comparison: bool = True,
-                                           ):
-    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
-    within the runs (level 1), relative to where draw = `comparison`.
-    The comparison is `X - COMPARISON`."""
-    return _ser \
-        .unstack(level=0) \
-        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
-        .drop(columns=([comparison] if drop_comparison else [])) \
-        .stack()
-
-# TODO the following calculation should first capture the different by run and then be summarised
-incremental_scenario_cost = (pd.DataFrame(
-            find_difference_relative_to_comparison(
-                total_scenario_cost_wide.loc[0],
-                comparison= 0) # sets the comparator to 0 which is the Actual scenario
-        ).T.iloc[0].unstack()).T
-
-# %%
-# Monetary value of health impact
-TARGET_PERIOD_INTERVENTION = (Date(2020, 1, 1), Date(2030, 12, 31))
-def get_num_dalys(_df):
-    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
-    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
-    results from runs that crashed mid-way through the simulation.
-    """
-    years_needed = [i.year for i in TARGET_PERIOD_INTERVENTION]
-    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
-    return pd.Series(
-        data=_df
-        .loc[_df.year.between(*years_needed)]
-        .drop(columns=['date', 'sex', 'age_range', 'year'])
-        .sum().sum()
-    )
-
-num_dalys = extract_results(
-        results_folder,
-        module='tlo.methods.healthburden',
-        key='dalys_stacked',
-        custom_generate_series=get_num_dalys,
-        do_scaling=True
-    )
-
-num_dalys_summarized = summarize(num_dalys).loc[0].unstack()
-#num_dalys_summarized['scenario'] = scenarios.to_list() # add when scenarios have names
-#num_dalys_summarized = num_dalys_summarized.set_index('scenario')
-
-# Get absolute DALYs averted
-num_dalys_averted = summarize(
-        -1.0 *
-        pd.DataFrame(
-            find_difference_relative_to_comparison(
-                num_dalys.loc[0],
-                comparison= 0) # sets the comparator to 0 which is the Actual scenario
-        ).T
-    ).iloc[0].unstack()
-#num_dalys_averted['scenario'] = scenarios.to_list()[1:12]
-#num_dalys_averted = num_dalys_averted.set_index('scenario')
-
-chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
-monetary_value_of_incremental_health = num_dalys_averted * chosen_cet
-max_ability_to_pay_for_implementation = monetary_value_of_incremental_health - incremental_scenario_cost # monetary value - change in costs
-
 # Plot costs
 ####################################################
 # 1. Stacked bar plot (Total cost + Cost categories)

From ebd129f2f0ed3a742bab9a6c7bf4b9714c432054 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 31 Oct 2024 18:08:18 +0000
Subject: [PATCH 135/230] update the unit cost of O2, blood and IRS to include
 supply chain costs  - and accordingly update the calculation of the supply
 chain costs for the remaining consumables  - separate the cost of O2, blood
 and iRS into a cost_category called 'other'

---
 resources/costing/ResourceFile_Costing.xlsx   |  4 +-
 .../costing/cost_analysis_roi_of_hss.py       | 53 +++++++++++++------
 src/scripts/costing/cost_estimation.py        | 32 +++++++++--
 3 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index c14e54fdad..5295049142 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e86b39ea2fb3fb9ea42accce43da0466c8fd35205fe42da1c4e0d6d11869ea3f
-size 4274726
+oid sha256:a43a39618e76c27f1eeb4874318960e9164ea4c26dfe3385b1d0bdb5101134ec
+size 4274907
diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index bff0a667a0..59edcc3f2b 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -35,6 +35,14 @@
 # Print the start time of the script
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
+# Create folders to store results
+costing_outputs_folder = Path('./outputs/costing')
+if not os.path.exists(costing_outputs_folder):
+    os.makedirs(costing_outputs_folder)
+figurespath = costing_outputs_folder / "global_fund_roi_analysis"
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+
 # Load result files
 #-------------------
 #results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
@@ -59,17 +67,27 @@
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , cost_only_used_staff=True) # summarise = True
-
-# Create folders to store results
-costing_outputs_folder = Path('./outputs/costing')
-if not os.path.exists(costing_outputs_folder):
-    os.makedirs(costing_outputs_folder)
-figurespath = costing_outputs_folder / "global_fund_roi_analysis"
-if not os.path.exists(figurespath):
-    os.makedirs(figurespath)
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = [0], runs = [0], cost_only_used_staff=True) # summarise = True
 
 # Add additional costs pertaining to simulation
 #-----------------------------------------------------------------------------------------------------------------------
+# Extract supply chain cost as a proportion of consumable costs to apply to malaria scale-up commodities
+# Load primary costing resourcefile
+workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
+                              sheet_name=None)
+# Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs
+# The fixed proportion is based on Resource Mapping Expenditure data from 2018
+resource_mapping_data = workbook_cost["resource_mapping_r7_summary"]
+# Make sure values are numeric
+expenditure_column = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
+resource_mapping_data[expenditure_column] = resource_mapping_data[expenditure_column].apply(
+    lambda x: pd.to_numeric(x, errors='coerce'))
+supply_chain_expenditure = \
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0]
+consumables_purchase_expenditure = \
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0]
+supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure
+
 # In this case malaria intervention scale-up costs were not included in the standard estimate_input_cost_of_scenarios function
 list_of_draws_with_malaria_scaleup_parameters = params[(params.module_param == 'Malaria:scaleup_start_year')]
 list_of_draws_with_malaria_scaleup_parameters.loc[:,'value'] = pd.to_numeric(list_of_draws_with_malaria_scaleup_parameters['value'])
@@ -81,12 +99,12 @@
                               'Mwanza', 'Likoma', 'Nkhotakota']
 # Convert above list of district names to numeric district identifiers
 district_keys_with_irs_scaleup = [key for key, name in district_dict.items() if name in districts_with_irs_scaleup]
-#proportion_of_district_with_irs_coverage = len(districts_with_irs_scaleup)/mfl.District.nunique()
 TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2030, 12, 31))
 
 # Get population by district
 def get_total_population_by_district(_df):
-    years_needed = [i.year for i in TARGET_PERIOD_MALARIA_SCALEUP]
+    years_needed = [i.year for i in TARGET_PERIOD_MALARIA_SCALEUP] # we only consider the population for the malaria scale-up period
+    # because those are the years relevant for malaria scale-up costing
     _df['year'] = pd.to_datetime(_df['date']).dt.year
     assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
     _df = pd.melt(_df.drop(columns = 'date'), id_vars = ['year']).rename(columns = {'variable': 'district'})
@@ -121,19 +139,18 @@ def get_number_of_people_covered_by_malaria_scaleup(_df, list_of_districts_cover
                                                                                                  list_of_districts_covered=district_keys_with_irs_scaleup,
                                                                                                  draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period)
 
-#years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
-#years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
-#years_with_no_malaria_scaleup =  [i.year for i in years_with_no_malaria_scaleup]
 irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)']
+# The above unit cost already includes implementation - project management (17%), personnel (6%), vehicles (10%), equipment (6%), monitoring and evaluation (3%), training (3%),
+# other commodities (3%) and buildings (2%) from Alonso et al (2021)
 irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate
 total_irs_cost = irs_multiplication_factor.iloc[0] * district_population_covered_by_irs_scaleup_by_year # for districts and scenarios included
 total_irs_cost = total_irs_cost.groupby(level='year').sum()
-# TODO melt irs_cost
 
 # 2. Bednet costs
 bednet_coverage_rate = 0.7
 # We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people.
-unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)']
+unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)'] * (1 + supply_chain_cost_proportion)
+# We add supply chain costs (procurement + distribution + warehousing) because the unit_cost does not include this
 annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3
 bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person
 
@@ -235,3 +252,9 @@ def get_num_dalys(_df):
 chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
 monetary_value_of_incremental_health = num_dalys_averted * chosen_cet
 max_ability_to_pay_for_implementation = monetary_value_of_incremental_health - incremental_scenario_cost # monetary value - change in costs
+
+'''
+#years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
+#years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
+#years_with_no_malaria_scaleup =  [i.year for i in years_with_no_malaria_scaleup]
+'''
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 6fcc897e85..555feae17f 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -35,6 +35,7 @@
 # Print the start time of the script
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
+#%%
 def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path = None, draws = None, runs = None,
                                      summarize: bool = False, cost_only_used_staff: bool = True):
     # Useful common functions
@@ -432,7 +433,7 @@ def get_counts_of_items_requested(_df):
     cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0)
     cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx['excess_stock_proportion_of_dispensed']], axis=0)
 
-    # 2.3 Store all HR costs in one standard format dataframe
+    # 2.3 Store all consumable costs in one standard format dataframe
     #---------------------------------------------------------------------------------------------------------------
     # Function to melt and label the cost category
     consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
@@ -456,8 +457,25 @@ def melt_and_label_consumables_cost(_df, label):
         melted_df = melted_df.rename(columns = {'value': 'cost'})
         return melted_df
 
-    cost_of_consumables_dispensed = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable')
-    cost_of_excess_consumables_stocked = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable')
+    def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df,
+                                                                   _consumables_dict, # This is a dictionary mapping codes to names
+                                                                   list_of_unique_medical_products):
+        reversed_consumables_dict = {value: key for key, value in _consumables_dict.items()} # reverse dictionary to map names to codes
+        new_df = _df.copy()
+        new_df['item_code'] = new_df['consumable'].map(reversed_consumables_dict)
+        cost_of_consumables = new_df[~new_df['item_code'].isin(list_of_unique_medical_products)]
+        cost_of_separately_managed_medical_supplies = new_df[new_df['item_code'].isin(list_of_unique_medical_products)]
+        cost_of_separately_managed_medical_supplies['cost_subcategory'] = cost_of_separately_managed_medical_supplies['cost_subcategory'] + 'separately_managed_medical_supplies'
+        return cost_of_consumables.drop(columns = 'item_code'), cost_of_separately_managed_medical_supplies.drop(columns = 'item_code')
+
+    separately_managed_medical_supplies = [127, 141, 161] # Oxygen, Blood, IRS
+    cost_of_consumables_dispensed, cost_of_separately_managed_medical_supplies_dispensed = disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable'),
+                                                                                               _consumables_dict = consumables_dict,
+                                                                                               list_of_unique_medical_products = separately_managed_medical_supplies)
+    cost_of_excess_consumables_stocked, cost_of_separately_managed_medical_supplies_excess_stock = disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable'),
+                                                                                                    _consumables_dict=consumables_dict,
+                                                                                                    list_of_unique_medical_products=separately_managed_medical_supplies)
+
     consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked])
 
     # 2.4 Supply chain costs
@@ -473,6 +491,8 @@ def melt_and_label_consumables_cost(_df, label):
     supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure
 
     # Estimate supply chain costs based on the total consumable purchase cost calculated above
+    # Note that  Oxygen, IRS, and Blood costs are already excluded because the unit_cost of these commodities already
+    # includes the procurement/production, storage and distribution costs
     supply_chain_costs = (consumable_costs.groupby(['draw', 'run', 'year'])[
                               'cost'].sum() * supply_chain_cost_proportion).reset_index()
     # Assign relevant additional columns to match the format of the rest of consumables costs
@@ -483,14 +503,18 @@ def melt_and_label_consumables_cost(_df, label):
 
     # Append supply chain costs to the full consumable cost dataframe
     consumable_costs = pd.concat([consumable_costs, supply_chain_costs])
+    other_costs = pd.concat([cost_of_separately_managed_medical_supplies_dispensed, cost_of_separately_managed_medical_supplies_excess_stock])
 
     consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
+    other_costs = prepare_cost_dataframe(other_costs, _category_specific_group = 'consumable', _cost_category = 'other')
 
     # Only preserve the draws and runs requested
     if draws is not None:
         consumable_costs = consumable_costs[consumable_costs.draw.isin(draws)]
+        other_costs = other_costs[other_costs.draw.isin(draws)]
     if runs is not None:
         consumable_costs = consumable_costs[consumable_costs.run.isin(runs)]
+        other_costs = other_costs[other_costs.run.isin(runs)]
 
 
     # %%
@@ -609,7 +633,7 @@ def update_itemuse_for_level1b_using_level2_data(_df):
     # %%
     # Store all costs in single dataframe
     #--------------------------------------------
-    scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs], ignore_index=True)
+    scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs, other_costs], ignore_index=True)
     scenario_cost['cost'] = pd.to_numeric(scenario_cost['cost'], errors='coerce')
 
     # Summarize costs

From e1cc5606c75cd5640dffe5ff4fe801782c15fb71 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 31 Oct 2024 18:39:33 +0000
Subject: [PATCH 136/230] update function to produce stacked bar chart
 summaries of cost

---
 .../costing/cost_analysis_roi_of_hss.py       | 15 +++++++--
 src/scripts/costing/cost_estimation.py        | 32 +++++++------------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index 59edcc3f2b..3bf9526b4e 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -29,6 +29,8 @@
     unflatten_flattened_multi_index_in_logging
 )
 
+from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             do_stacked_bar_plot_of_cost_by_category)
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
 
@@ -67,7 +69,7 @@
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , cost_only_used_staff=True) # summarise = True
-input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = [0], runs = [0], cost_only_used_staff=True) # summarise = True
+#input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = [0], runs = [0], cost_only_used_staff=True, summarize = True)
 
 # Add additional costs pertaining to simulation
 #-----------------------------------------------------------------------------------------------------------------------
@@ -173,7 +175,7 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
     melted_df = pd.melt(_df, id_vars=['year']).rename(columns={'variable_0': 'draw', 'variable_1': 'run'})
     # Replace item_code with consumable_name_tlo
     melted_df['cost_subcategory'] = label
-    melted_df['cost_category'] = 'malaria scale-up'
+    melted_df['cost_category'] = 'other'
     melted_df['cost_subgroup'] = 'NA'
     melted_df['Facility_Level'] = 'all'
     melted_df = melted_df.rename(columns={'value': 'cost'})
@@ -253,6 +255,15 @@ def get_num_dalys(_df):
 monetary_value_of_incremental_health = num_dalys_averted * chosen_cet
 max_ability_to_pay_for_implementation = monetary_value_of_incremental_health - incremental_scenario_cost # monetary value - change in costs
 
+# Plot costs
+#-----------------------------------------------------------------------------------------------------------------------
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables', _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = list(range(2020, 2030)), _outputfilepath = figurespath)
+
 '''
 #years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
 #years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 555feae17f..49ba8a89f6 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -465,7 +465,8 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df,
         new_df['item_code'] = new_df['consumable'].map(reversed_consumables_dict)
         cost_of_consumables = new_df[~new_df['item_code'].isin(list_of_unique_medical_products)]
         cost_of_separately_managed_medical_supplies = new_df[new_df['item_code'].isin(list_of_unique_medical_products)]
-        cost_of_separately_managed_medical_supplies['cost_subcategory'] = cost_of_separately_managed_medical_supplies['cost_subcategory'] + 'separately_managed_medical_supplies'
+        cost_of_separately_managed_medical_supplies['cost_subcategory'] = cost_of_separately_managed_medical_supplies['cost_subcategory'].replace(
+            {'consumables_dispensed': 'separately_managed_medical_supplies_dispensed', 'consumables_stocked': 'separately_managed_medical_supplies_stocked'}, regex=True)
         return cost_of_consumables.drop(columns = 'item_code'), cost_of_separately_managed_medical_supplies.drop(columns = 'item_code')
 
     separately_managed_medical_supplies = [127, 141, 161] # Oxygen, Blood, IRS
@@ -659,44 +660,35 @@ def update_itemuse_for_level1b_using_level2_data(_df):
 ####################################################
 # 1. Stacked bar plot (Total cost + Cost categories)
 #----------------------------------------------------
-def do_stacked_bar_plot(_df, cost_category, year, actual_expenditure):
+def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year = 'all', _outputfilepath: Path = None):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
     _df = _df[_df.stat == 'mean'].copy()
     # Convert 'value' to millions
-    _df['value'] = _df['value'] / 1e6
-    if year == 'all':
+    _df['cost'] = _df['cost'] / 1e6
+    if _year == 'all':
         subset_df = _df
     else:
-        subset_df = _df[_df['year'].isin(year)]
-    if cost_category == 'all':
+        subset_df = _df[_df['year'].isin(_year)]
+    if _cost_category == 'all':
         subset_df = subset_df
-        pivot_df = subset_df.pivot_table(index='draw', columns='Cost_Category', values='value', aggfunc='sum')
+        pivot_df = subset_df.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
     else:
-        subset_df = subset_df[subset_df['Cost_Category'] == cost_category]
-        pivot_df = subset_df.pivot_table(index='draw', columns='Cost_Sub-category', values='value', aggfunc='sum')
+        subset_df = subset_df[subset_df['cost_category'] == _cost_category]
+        pivot_df = subset_df.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
 
     # Plot a stacked bar chart
     pivot_df.plot(kind='bar', stacked=True)
-    # Add a horizontal red line to represent 2018 Expenditure as per resource mapping
-    #plt.axhline(y=actual_expenditure/1e6, color='red', linestyle='--', label='Actual expenditure recorded in 2018')
 
     # Save plot
     plt.xlabel('Scenario')
     plt.ylabel('Cost (2023 USD), millions')
     plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
-    plt.title(f'Costs by Scenario \n (Cost Category = {cost_category} ; Year = {year})')
-    plt.savefig(figurespath / f'stacked_bar_chart_{cost_category}_year_{year}.png', dpi=100,
+    plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Year = {_year})')
+    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_year_{_year}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Medical consumables', year = 2018, actual_expenditure = 206_747_565)
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Human Resources for Health', year = 2018, actual_expenditure = 128_593_787)
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance', year = 2018, actual_expenditure = 6_048_481)
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = [2020], actual_expenditure = 624_054_027)
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = [2024], actual_expenditure = 624_054_027)
-do_stacked_bar_plot(_df = scenario_cost, cost_category = 'all', year = list(range(2020,2031)), actual_expenditure = np.nan)
-
 # 2. Line plots of total costs
 #----------------------------------------------------
 def do_line_plot(_df, cost_category, actual_expenditure, _draw):

From 0b62982539db1e5dabf1e3930a77f5281e01506b Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 1 Nov 2024 16:34:00 +0000
Subject: [PATCH 137/230] clean figure name and title for stacked bar charts

---
 .../costing/cost_analysis_roi_of_hss.py       |  3 +-
 src/scripts/costing/cost_estimation.py        | 29 ++++++++++++++-----
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index 3bf9526b4e..168f8e5574 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -261,8 +261,7 @@ def get_num_dalys(_df):
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _year = [2018], _outputfilepath = figurespath)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _year = [2018], _outputfilepath = figurespath)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = list(range(2020, 2030)), _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = list(range(2020, 2030)), _outputfilepath = figurespath)
 
 '''
 #years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 49ba8a89f6..919f4e18c6 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -680,24 +680,40 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year =
     # Plot a stacked bar chart
     pivot_df.plot(kind='bar', stacked=True)
 
+    # Period included for plot title and name
+    if _year == 'all':
+        period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}")
+    elif (len(_year) == 1):
+        period = (f"{_year[0]}")
+    else:
+        period = (f"{min(_year)} - {max(_year)}")
+
     # Save plot
     plt.xlabel('Scenario')
     plt.ylabel('Cost (2023 USD), millions')
     plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
-    plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Year = {_year})')
-    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_year_{_year}.png', dpi=100,
+    plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Period = {period})')
+    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 
 # 2. Line plots of total costs
 #----------------------------------------------------
-def do_line_plot(_df, cost_category, actual_expenditure, _draw):
+def do_line_plot(_df, cost_category = 'all', _draw = None, _year = 'all'):
     # Filter the dataframe based on the selected draw
-    subset_df = _df[_df.draw == _draw]
+    if _draw == None:
+        subset_df = _df
+    else:
+        subset_df = _df[_df.draw.isin(_draw)]
 
     if cost_category != 'all':
         subset_df = subset_df[subset_df['Cost_Category'] == cost_category]
 
+    if _year == 'all':
+        subset_df = subset_df
+    else:
+        subset_df = subset_df[subset_df['year'].isin(_year)]
+
     # Reset the index for plotting purposes
     subset_df = subset_df.reset_index()
 
@@ -713,9 +729,6 @@ def do_line_plot(_df, cost_category, actual_expenditure, _draw):
     # Add confidence interval using fill_between
     plt.fill_between(mean_values.index.get_level_values(1), lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
 
-    # Add a horizontal red line to represent the actual expenditure
-    plt.axhline(y=actual_expenditure / 1e6, color='red', linestyle='--', label='Actual expenditure recorded in 2018')
-
     # Set plot labels and title
     plt.xlabel('Year')
     plt.ylabel('Cost (2023 USD), millions')
@@ -723,7 +736,7 @@ def do_line_plot(_df, cost_category, actual_expenditure, _draw):
     plt.title(f'Costs by Scenario \n (Cost Category = {cost_category} ; Draw = {_draw})')
 
     # Save the plot
-    plt.savefig(figurespath / f'trend_{cost_category}_{first_year_of_simulation}-{final_year_of_simulation}.png',
+    plt.savefig(figurespath / f'trend_{cost_category}_{min(_year)}-{max(_year)}.png',
                 dpi=100,
                 bbox_inches='tight')
     plt.close()

From 26588b9ae2bb7e423c357c79078454dcf554d4a9 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 1 Nov 2024 16:52:01 +0000
Subject: [PATCH 138/230] clean line plots of trend in cost

---
 .../costing/cost_analysis_roi_of_hss.py       |  9 +++-
 src/scripts/costing/cost_estimation.py        | 50 +++++++++++--------
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index 168f8e5574..29760a6ed5 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -30,7 +30,8 @@
 )
 
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
-                                             do_stacked_bar_plot_of_cost_by_category)
+                                             do_stacked_bar_plot_of_cost_by_category,
+                                             do_line_plot_of_cost)
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
 
@@ -263,6 +264,12 @@ def get_num_dalys(_df):
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = list(range(2020, 2030)), _outputfilepath = figurespath)
 
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical equipment', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', _draws = None, _outputfilepath = figurespath)
+
 '''
 #years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
 #years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 919f4e18c6..276b4c9d46 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -660,16 +660,22 @@ def update_itemuse_for_level1b_using_level2_data(_df):
 ####################################################
 # 1. Stacked bar plot (Total cost + Cost categories)
 #----------------------------------------------------
-def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year = 'all', _outputfilepath: Path = None):
+def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year = 'all', _draws = None, _outputfilepath: Path = None):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
     _df = _df[_df.stat == 'mean'].copy()
     # Convert 'value' to millions
     _df['cost'] = _df['cost'] / 1e6
-    if _year == 'all':
+    if _draws == None:
         subset_df = _df
     else:
-        subset_df = _df[_df['year'].isin(_year)]
+        subset_df = _df[_df.draw.isin(_draws)]
+
+    if _year == 'all':
+        subset_df = subset_df
+    else:
+        subset_df = subset_df[subset_df['year'].isin(_year)]
+
     if _cost_category == 'all':
         subset_df = subset_df
         pivot_df = subset_df.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
@@ -699,15 +705,15 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year =
 
 # 2. Line plots of total costs
 #----------------------------------------------------
-def do_line_plot(_df, cost_category = 'all', _draw = None, _year = 'all'):
+def do_line_plot_of_cost(_df, _cost_category = 'all', _year = 'all', _draws = None, _outputfilepath: Path = None):
     # Filter the dataframe based on the selected draw
-    if _draw == None:
+    if _draws == None:
         subset_df = _df
     else:
-        subset_df = _df[_df.draw.isin(_draw)]
+        subset_df = _df[_df.draw.isin(_draws)]
 
-    if cost_category != 'all':
-        subset_df = subset_df[subset_df['Cost_Category'] == cost_category]
+    if _cost_category != 'all':
+        subset_df = subset_df[subset_df['cost_category'] == _cost_category]
 
     if _year == 'all':
         subset_df = subset_df
@@ -718,34 +724,36 @@ def do_line_plot(_df, cost_category = 'all', _draw = None, _year = 'all'):
     subset_df = subset_df.reset_index()
 
     # Extract mean, lower, and upper values for the plot
-    mean_values = subset_df[subset_df.stat == 'mean'].groupby(['Cost_Category', 'year'])['value'].sum() / 1e6
-    lower_values = subset_df[subset_df.stat == 'lower'].groupby(['Cost_Category', 'year'])['value'].sum() / 1e6
-    upper_values = subset_df[subset_df.stat == 'upper'].groupby(['Cost_Category', 'year'])['value'].sum() / 1e6
-    years = subset_df[subset_df.stat == 'mean']['year']
+    mean_values = subset_df[subset_df.stat == 'mean'].groupby(['year'])['cost'].sum() / 1e6
+    lower_values = subset_df[subset_df.stat == 'lower'].groupby(['year'])['cost'].sum() / 1e6
+    upper_values = subset_df[subset_df.stat == 'upper'].groupby(['year'])['cost'].sum() / 1e6
 
     # Plot the line for 'mean'
-    plt.plot(mean_values.index.get_level_values(1), mean_values, marker='o', linestyle='-', color='b', label='Mean')
+    plt.plot(mean_values.index.get_level_values(0), mean_values, marker='o', linestyle='-', color='b', label='Mean')
 
     # Add confidence interval using fill_between
-    plt.fill_between(mean_values.index.get_level_values(1), lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
+    plt.fill_between(mean_values.index.get_level_values(0), lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
+
+    # Period included for plot title and name
+    if _year == 'all':
+        period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}")
+    elif (len(_year) == 1):
+        period = (f"{_year[0]}")
+    else:
+        period = (f"{min(_year)} - {max(_year)}")
 
     # Set plot labels and title
     plt.xlabel('Year')
     plt.ylabel('Cost (2023 USD), millions')
     plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
-    plt.title(f'Costs by Scenario \n (Cost Category = {cost_category} ; Draw = {_draw})')
+    plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Draw = {_draws}; Period = {period})')
 
     # Save the plot
-    plt.savefig(figurespath / f'trend_{cost_category}_{min(_year)}-{max(_year)}.png',
+    plt.savefig(_outputfilepath / f'trend_{_cost_category}_{period}.png',
                 dpi=100,
                 bbox_inches='tight')
     plt.close()
 
-do_line_plot(_df = scenario_cost, cost_category = 'Medical consumables', _draw = 0, actual_expenditure = 206_747_565)
-do_line_plot(_df = scenario_cost, cost_category = 'Human Resources for Health',  _draw = 0, actual_expenditure = 128_593_787)
-do_line_plot(_df = scenario_cost, cost_category = 'Equipment purchase and maintenance',  _draw = 0, actual_expenditure = 6_048_481)
-do_line_plot(_df = scenario_cost, cost_category = 'all',  _draw = 0, actual_expenditure = 624_054_027)
-
 # 3. Return on Investment Plot
 #----------------------------------------------------
 # Plot ROI at various levels of cost

From 55c134320de09a5422b59e578ea8acb0517dfe46 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 1 Nov 2024 18:56:06 +0000
Subject: [PATCH 139/230] update line plot to allow for disaggregation

---
 .../costing/cost_analysis_roi_of_hss.py       |  25 ++-
 src/scripts/costing/cost_estimation.py        | 159 +++++++++++++-----
 2 files changed, 136 insertions(+), 48 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index 29760a6ed5..81945ccacf 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -70,7 +70,8 @@
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , cost_only_used_staff=True) # summarise = True
-#input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = [0], runs = [0], cost_only_used_staff=True, summarize = True)
+#draws_included = [0,1,2,6,7]
+#input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = draws_included, cost_only_used_staff=True, summarize = True)
 
 # Add additional costs pertaining to simulation
 #-----------------------------------------------------------------------------------------------------------------------
@@ -78,6 +79,14 @@
 # Load primary costing resourcefile
 workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
                               sheet_name=None)
+# Read parameters for consumables costs
+# Load consumables cost data
+unit_price_consumable = workbook_cost["consumables"]
+unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
+unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(
+    drop=True).iloc[1:]
+unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
+
 # Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs
 # The fixed proportion is based on Resource Mapping Expenditure data from 2018
 resource_mapping_data = workbook_cost["resource_mapping_r7_summary"]
@@ -187,6 +196,7 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
     new_df = melt_and_label_malaria_scaleup_cost(df, label)
     input_costs = pd.concat([input_costs, new_df], ignore_index=True)
 
+input_costs = input_costs[input_costs.draw.isin(draws_included)]
 
 # Calculate incremental cost
 #-----------------------------------------------------------------------------------------------------------------------
@@ -258,17 +268,20 @@ def get_num_dalys(_df):
 
 # Plot costs
 #-----------------------------------------------------------------------------------------------------------------------
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables', _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = list(range(2020, 2030)), _outputfilepath = figurespath)
 
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
+
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = None, _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical equipment', _year = 'all', _draws = None, _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', disaggregate_by= 'cost_category', _draws = None, _outputfilepath = figurespath)
 
 '''
 #years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 276b4c9d46..6fcff00b60 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -660,7 +660,7 @@ def update_itemuse_for_level1b_using_level2_data(_df):
 ####################################################
 # 1. Stacked bar plot (Total cost + Cost categories)
 #----------------------------------------------------
-def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year = 'all', _draws = None, _outputfilepath: Path = None):
+def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _disaggregate_by_subgroup: bool = False,_year = 'all', _draws = None, _outputfilepath: Path = None):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
     _df = _df[_df.stat == 'mean'].copy()
@@ -677,14 +677,44 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year =
         subset_df = subset_df[subset_df['year'].isin(_year)]
 
     if _cost_category == 'all':
-        subset_df = subset_df
-        pivot_df = subset_df.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
+        if (_disaggregate_by_subgroup == True):
+            raise ValueError(f"Invalid input for _disaggregate_by_subgroup: '{_disaggregate_by_subgroup}'. "
+                             f"Value can be True only when plotting a specific _cost_category")
+        else:
+            pivot_df = subset_df.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
+            plt_name_suffix = ''
     else:
         subset_df = subset_df[subset_df['cost_category'] == _cost_category]
-        pivot_df = subset_df.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
+        if (_disaggregate_by_subgroup == True):
+            # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category
+            if (len(subset_df['cost_subgroup']) > 10):
+                # Calculate total cost per subgroup
+                subgroup_totals = subset_df.groupby('cost_subgroup')['cost'].sum()
+                # Identify the top 10 subgroups by cost
+                top_10_subgroups = subgroup_totals.nlargest(10).index.tolist()
+                # Label the remaining subgroups as 'other'
+                subset_df['cost_subgroup'] = subset_df['cost_subgroup'].apply(
+                    lambda x: x if x in top_10_subgroups else 'other'
+                )
+
+                pivot_df = subset_df.pivot_table(index=['draw', 'cost_subcategory'], columns='cost_subgroup',
+                                                 values='cost', aggfunc='sum')
+
+            else:
+                pivot_df = subset_df.pivot_table(index=['draw', 'cost_subcategory'], columns='cost_subgroup',
+                                                 values='cost', aggfunc='sum')
+
+            plt_name_suffix = '_by_subgroup'
+        else:
+            pivot_df = subset_df.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
+            plt_name_suffix = ''
 
-    # Plot a stacked bar chart
-    pivot_df.plot(kind='bar', stacked=True)
+    # Sort pivot_df columns in ascending order by total cost
+    sorted_columns = pivot_df.sum(axis=0).sort_values().index
+    pivot_df = pivot_df[sorted_columns]  # Rearrange columns by sorted order
+
+    # Plot the stacked bar chart
+    ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
 
     # Period included for plot title and name
     if _year == 'all':
@@ -697,61 +727,106 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _year =
     # Save plot
     plt.xlabel('Scenario')
     plt.ylabel('Cost (2023 USD), millions')
-    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
+
+    # Arrange the legend in the same ascending order
+    handles, labels = plt.gca().get_legend_handles_labels()
+    plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 1), loc='upper right')
+
     plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Period = {period})')
-    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}.png', dpi=100,
+    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}{plt_name_suffix}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 
 # 2. Line plots of total costs
 #----------------------------------------------------
-def do_line_plot_of_cost(_df, _cost_category = 'all', _year = 'all', _draws = None, _outputfilepath: Path = None):
-    # Filter the dataframe based on the selected draw
-    if _draws == None:
-        subset_df = _df
-    else:
-        subset_df = _df[_df.draw.isin(_draws)]
+def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, disaggregate_by=None,
+                         _outputfilepath: Path = None):
+    # Validate disaggregation options
+    valid_disaggregations = ['cost_category', 'cost_subcategory', 'cost_subgroup']
+    if disaggregate_by not in valid_disaggregations and disaggregate_by is not None:
+        raise ValueError(f"Invalid disaggregation option: {disaggregate_by}. Choose from {valid_disaggregations}.")
+
+    # If more than 1 draw, disaggregations can't be applied
+    if ((_draws is None) or (len(_draws) > 1)) & (disaggregate_by is not None):
+        raise ValueError(f"Invalid: disaggregate_by can be applied only when a single draw is plotted. For example, _draws = [0]")
+
+    # Filter the dataframe by draws, if specified
+    subset_df = _df if _draws is None else _df[_df.draw.isin(_draws)]
+
+    # Filter by year if specified
+    if _year != 'all':
+        subset_df = subset_df[subset_df['year'].isin(_year)]
 
-    if _cost_category != 'all':
+    # Handle scenarios based on `_cost_category` and `disaggregate_by` conditions
+    if _cost_category == 'all':
+        if disaggregate_by == 'cost_subgroup':
+            raise ValueError("Cannot disaggregate by 'cost_subgroup' when `_cost_category='all'` due to data size. If "
+                             "you wish to plot by 'cost_subgroup', choose a specific _cost_category such as 'medical consumables'")
+    else:
+        # Filter subset_df by specific cost category if specified
         subset_df = subset_df[subset_df['cost_category'] == _cost_category]
 
-    if _year == 'all':
-        subset_df = subset_df
+    # Set grouping columns based on the disaggregation level
+    if disaggregate_by == 'cost_category':
+        groupby_columns = ['year', 'cost_category']
+    elif disaggregate_by == 'cost_subcategory':
+        groupby_columns = ['year', 'cost_subcategory']
+    elif disaggregate_by == 'cost_subgroup':
+        # If disaggregating by 'cost_subgroup' and there are more than 10 subgroups, limit to the top 10 + "Other"
+        if len(subset_df['cost_subgroup'].unique()) > 10:
+            # Calculate total cost per subgroup
+            subgroup_totals = subset_df[subset_df.stat == 'mean'].groupby('cost_subgroup')['cost'].sum()
+            # Identify the top 10 subgroups by cost
+            top_10_subgroups = subgroup_totals.nlargest(10).index.tolist()
+            # Reassign smaller subgroups to an "Other" category
+            subset_df['cost_subgroup'] = subset_df['cost_subgroup'].apply(
+                lambda x: x if x in top_10_subgroups else 'Other'
+            )
+        groupby_columns = ['year', 'cost_subgroup']
     else:
-        subset_df = subset_df[subset_df['year'].isin(_year)]
-
-    # Reset the index for plotting purposes
-    subset_df = subset_df.reset_index()
+        groupby_columns = ['year']
 
     # Extract mean, lower, and upper values for the plot
-    mean_values = subset_df[subset_df.stat == 'mean'].groupby(['year'])['cost'].sum() / 1e6
-    lower_values = subset_df[subset_df.stat == 'lower'].groupby(['year'])['cost'].sum() / 1e6
-    upper_values = subset_df[subset_df.stat == 'upper'].groupby(['year'])['cost'].sum() / 1e6
-
-    # Plot the line for 'mean'
-    plt.plot(mean_values.index.get_level_values(0), mean_values, marker='o', linestyle='-', color='b', label='Mean')
-
-    # Add confidence interval using fill_between
-    plt.fill_between(mean_values.index.get_level_values(0), lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
+    mean_values = subset_df[subset_df.stat == 'mean'].groupby(groupby_columns)['cost'].sum() / 1e6
+    lower_values = subset_df[subset_df.stat == 'lower'].groupby(groupby_columns)['cost'].sum() / 1e6
+    upper_values = subset_df[subset_df.stat == 'upper'].groupby(groupby_columns)['cost'].sum() / 1e6
+
+    # Plot each line for the disaggregated values
+    if disaggregate_by:
+        plt_name_suffix = f'_by{disaggregate_by}'
+        for disaggregate_value in mean_values.index.get_level_values(disaggregate_by).unique():
+            # Get mean, lower, and upper values for each disaggregated group
+            value_mean = mean_values.xs(disaggregate_value, level=disaggregate_by)
+            value_lower = lower_values.xs(disaggregate_value, level=disaggregate_by)
+            value_upper = upper_values.xs(disaggregate_value, level=disaggregate_by)
+
+            # Plot line for mean and shaded region for 95% CI
+            plt.plot(value_mean.index, value_mean, marker='o', linestyle='-', label=f'{disaggregate_value}')
+            plt.fill_between(value_mean.index, value_lower, value_upper, alpha=0.2)
+    else:
+        plt_name_suffix = ''
+        plt.plot(mean_values.index, mean_values, marker='o', linestyle='-', color='b', label='Mean')
+        plt.fill_between(mean_values.index, lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
 
-    # Period included for plot title and name
+    # Define period for plot title
     if _year == 'all':
-        period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}")
-    elif (len(_year) == 1):
-        period = (f"{_year[0]}")
+        period = f"{min(subset_df['year'].unique())}-{max(subset_df['year'].unique())}"
+    elif len(_year) == 1:
+        period = str(_year[0])
     else:
-        period = (f"{min(_year)} - {max(_year)}")
+        period = f"{min(_year)}-{max(_year)}"
 
-    # Set plot labels and title
+    # Set labels, legend, and title
     plt.xlabel('Year')
     plt.ylabel('Cost (2023 USD), millions')
-    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper right')
-    plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Draw = {_draws}; Period = {period})')
+    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
+    plot_title = f'Costs by Scenario \n (Category = {_cost_category}, Period = {period})'
+    plt.title(plot_title)
 
-    # Save the plot
-    plt.savefig(_outputfilepath / f'trend_{_cost_category}_{period}.png',
-                dpi=100,
-                bbox_inches='tight')
+    # Save plot
+    filename = f'trend_{_cost_category}_{period}{plt_name_suffix}.png'
+    print(f"Saved figure {_outputfilepath} / {filename}")
+    plt.savefig(_outputfilepath / f'{filename}', dpi=100, bbox_inches='tight')
     plt.close()
 
 # 3. Return on Investment Plot

From a001d745ed3cd78e26ce7e54b50c98398295bbfc Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 1 Nov 2024 19:09:44 +0000
Subject: [PATCH 140/230] update the order of the legend

---
 src/scripts/costing/cost_estimation.py | 59 ++++++++++++++++++--------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 6fcff00b60..b87aaa43a9 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -746,10 +746,6 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     if disaggregate_by not in valid_disaggregations and disaggregate_by is not None:
         raise ValueError(f"Invalid disaggregation option: {disaggregate_by}. Choose from {valid_disaggregations}.")
 
-    # If more than 1 draw, disaggregations can't be applied
-    if ((_draws is None) or (len(_draws) > 1)) & (disaggregate_by is not None):
-        raise ValueError(f"Invalid: disaggregate_by can be applied only when a single draw is plotted. For example, _draws = [0]")
-
     # Filter the dataframe by draws, if specified
     subset_df = _df if _draws is None else _df[_df.draw.isin(_draws)]
 
@@ -760,8 +756,7 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     # Handle scenarios based on `_cost_category` and `disaggregate_by` conditions
     if _cost_category == 'all':
         if disaggregate_by == 'cost_subgroup':
-            raise ValueError("Cannot disaggregate by 'cost_subgroup' when `_cost_category='all'` due to data size. If "
-                             "you wish to plot by 'cost_subgroup', choose a specific _cost_category such as 'medical consumables'")
+            raise ValueError("Cannot disaggregate by 'cost_subgroup' when `_cost_category='all'` due to data size.")
     else:
         # Filter subset_df by specific cost category if specified
         subset_df = subset_df[subset_df['cost_category'] == _cost_category]
@@ -791,42 +786,70 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     lower_values = subset_df[subset_df.stat == 'lower'].groupby(groupby_columns)['cost'].sum() / 1e6
     upper_values = subset_df[subset_df.stat == 'upper'].groupby(groupby_columns)['cost'].sum() / 1e6
 
+    # Prepare to store lines and labels for the legend
+    lines = []
+    labels = []
+
+    # Define a list of colors to rotate through
+    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'orange', 'purple', 'brown', 'gray']  # Add more colors as needed
+    color_cycle = iter(colors)  # Create an iterator from the color list
+
     # Plot each line for the disaggregated values
     if disaggregate_by:
-        plt_name_suffix = f'_by{disaggregate_by}'
         for disaggregate_value in mean_values.index.get_level_values(disaggregate_by).unique():
             # Get mean, lower, and upper values for each disaggregated group
             value_mean = mean_values.xs(disaggregate_value, level=disaggregate_by)
             value_lower = lower_values.xs(disaggregate_value, level=disaggregate_by)
             value_upper = upper_values.xs(disaggregate_value, level=disaggregate_by)
 
+            # Get the next color from the cycle
+            color = next(color_cycle)
+
             # Plot line for mean and shaded region for 95% CI
-            plt.plot(value_mean.index, value_mean, marker='o', linestyle='-', label=f'{disaggregate_value}')
-            plt.fill_between(value_mean.index, value_lower, value_upper, alpha=0.2)
+            line, = plt.plot(value_mean.index, value_mean, marker='o', linestyle='-', color=color, label=f'{disaggregate_value} - Mean')
+            plt.fill_between(value_mean.index, value_lower, value_upper, color=color, alpha=0.2)
+
+            # Append to lines and labels for sorting later
+            lines.append(line)
+            labels.append(disaggregate_value)
     else:
-        plt_name_suffix = ''
-        plt.plot(mean_values.index, mean_values, marker='o', linestyle='-', color='b', label='Mean')
+        line, = plt.plot(mean_values.index, mean_values, marker='o', linestyle='-', color='b', label='Mean')
         plt.fill_between(mean_values.index, lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
 
+        # Append to lines and labels for sorting later
+        lines.append(line)
+        labels.append('Mean')
+
+    # Sort the legend based on total costs
+    total_costs = {label: mean_values.xs(label, level=disaggregate_by).sum() for label in labels}
+    sorted_labels = sorted(total_costs.keys(), key=lambda x: total_costs[x])
+
+    # Reorder lines based on sorted labels
+    handles = [lines[labels.index(label)] for label in sorted_labels]
+
     # Define period for plot title
     if _year == 'all':
-        period = f"{min(subset_df['year'].unique())}-{max(subset_df['year'].unique())}"
+        period = f"{min(subset_df['year'].unique())} - {max(subset_df['year'].unique())}"
     elif len(_year) == 1:
         period = str(_year[0])
     else:
-        period = f"{min(_year)}-{max(_year)}"
+        period = f"{min(_year)} - {max(_year)}"
 
     # Set labels, legend, and title
     plt.xlabel('Year')
     plt.ylabel('Cost (2023 USD), millions')
-    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
+    plt.legend(handles[::-1], sorted_labels[::-1], bbox_to_anchor=(1.05, 1), loc='upper left')
     plot_title = f'Costs by Scenario \n (Category = {_cost_category}, Period = {period})'
     plt.title(plot_title)
 
-    # Save plot
-    filename = f'trend_{_cost_category}_{period}{plt_name_suffix}.png'
-    print(f"Saved figure {_outputfilepath} / {filename}")
-    plt.savefig(_outputfilepath / f'{filename}', dpi=100, bbox_inches='tight')
+    # Save plot with a proper filename
+    if disaggregate_by is None:
+        filename_suffix = "="
+    else:
+        filename_suffix = f"_by_{disaggregate_by}"
+
+    filename = f'trend_{_cost_category}_{period}{filename_suffix}.png'
+    plt.savefig(_outputfilepath / filename, dpi=100, bbox_inches='tight')
     plt.close()
 
 # 3. Return on Investment Plot

From bf51d28ecc74598b21b7dcef8166c8fd9b0ca85d Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 1 Nov 2024 19:41:02 +0000
Subject: [PATCH 141/230] add condition so that disaggregation is only possible
 if one draw is plotted.

---
 src/scripts/costing/cost_estimation.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index b87aaa43a9..66e5d25ba3 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -739,6 +739,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _disagg
 
 # 2. Line plots of total costs
 #----------------------------------------------------
+# TODO: Check why line plot get save without a file name
 def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, disaggregate_by=None,
                          _outputfilepath: Path = None):
     # Validate disaggregation options
@@ -746,6 +747,10 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     if disaggregate_by not in valid_disaggregations and disaggregate_by is not None:
         raise ValueError(f"Invalid disaggregation option: {disaggregate_by}. Choose from {valid_disaggregations}.")
 
+    #
+    if ((_draws is None) or (len(_draws) > 1)) & (disaggregate_by is not None):
+        raise ValueError(f"The disaggregate_by option only works if only one draw is plotted, for exmaple _draws = [0]")
+
     # Filter the dataframe by draws, if specified
     subset_df = _df if _draws is None else _df[_df.draw.isin(_draws)]
 
@@ -814,7 +819,7 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
             labels.append(disaggregate_value)
     else:
         line, = plt.plot(mean_values.index, mean_values, marker='o', linestyle='-', color='b', label='Mean')
-        plt.fill_between(mean_values.index, lower_values, upper_values, color='b', alpha=0.2, label='95% CI')
+        plt.fill_between(mean_values.index, lower_values, upper_values, color='b', alpha=0.2)
 
         # Append to lines and labels for sorting later
         lines.append(line)
@@ -839,12 +844,12 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     plt.xlabel('Year')
     plt.ylabel('Cost (2023 USD), millions')
     plt.legend(handles[::-1], sorted_labels[::-1], bbox_to_anchor=(1.05, 1), loc='upper left')
-    plot_title = f'Costs by Scenario \n (Category = {_cost_category}, Period = {period})'
+    plot_title = f'Total input cost \n (Category = {_cost_category}, Period = {period})'
     plt.title(plot_title)
 
     # Save plot with a proper filename
     if disaggregate_by is None:
-        filename_suffix = "="
+        filename_suffix = ""
     else:
         filename_suffix = f"_by_{disaggregate_by}"
 

From a203c516425eb43bdc94b8b0b980fd0497d7b2d2 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 1 Nov 2024 20:12:40 +0000
Subject: [PATCH 142/230] update the calculation of
 max_ability_to_pay_for_implementation

---
 .../costing/cost_analysis_roi_of_hss.py        | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index 81945ccacf..f5633b46da 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -215,10 +215,12 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
         .drop(columns=([comparison] if drop_comparison else [])) \
         .stack()
 
+total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum()
+
 # TODO the following calculation should first capture the different by run and then be summarised
 incremental_scenario_cost = (pd.DataFrame(
             find_difference_relative_to_comparison(
-                total_scenario_cost_wide.loc[0],
+                total_input_cost,
                 comparison= 0) # sets the comparator to 0 which is the Actual scenario
         ).T.iloc[0].unstack()).T
 
@@ -246,19 +248,20 @@ def get_num_dalys(_df):
         do_scaling=True
     )
 
-num_dalys_summarized = summarize(num_dalys).loc[0].unstack()
+#num_dalys_summarized = summarize(num_dalys).loc[0].unstack()
 #num_dalys_summarized['scenario'] = scenarios.to_list() # add when scenarios have names
 #num_dalys_summarized = num_dalys_summarized.set_index('scenario')
 
 # Get absolute DALYs averted
-num_dalys_averted = summarize(
-        -1.0 *
+num_dalys_averted =(-1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison(
                 num_dalys.loc[0],
                 comparison= 0) # sets the comparator to 0 which is the Actual scenario
-        ).T
-    ).iloc[0].unstack()
+        ).T.iloc[0].unstack(level = 'run'))
+
+#num_dalys = num_dalys.loc[0].unstack()
+num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(draws_included)]
 #num_dalys_averted['scenario'] = scenarios.to_list()[1:12]
 #num_dalys_averted = num_dalys_averted.set_index('scenario')
 
@@ -277,8 +280,7 @@ def get_num_dalys(_df):
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subcategory', _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical equipment', _year = 'all', _draws = None, _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', disaggregate_by= 'cost_category', _draws = None, _outputfilepath = figurespath)

From 642d3da9d10fa8f35d8aaba31df7a67e36ca3c9c Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 4 Nov 2024 11:49:08 +0000
Subject: [PATCH 143/230] update the plots for ROI and maximum ability to pay -
 ROI is now (Benefits - Costs)/Costs - For ROI calculation benefits and costs
 are now compared run to run rather than stat to stat

---
 .../costing/cost_analysis_roi_of_hss.py       | 186 +++++++++++++++++-
 src/scripts/costing/cost_estimation.py        | 113 +----------
 2 files changed, 178 insertions(+), 121 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_roi_of_hss.py
index f5633b46da..893b91afd4 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_roi_of_hss.py
@@ -46,21 +46,46 @@
 if not os.path.exists(figurespath):
     os.makedirs(figurespath)
 
+def summarize_cost_data(_df):
+    _df = _df.stack()
+    collapsed_df = _df.groupby(level='draw').agg([
+            'mean',
+            ('lower', lambda x: x.quantile(0.025)),
+            ('upper', lambda x: x.quantile(0.975))
+        ])
+
+    collapsed_df = collapsed_df.unstack()
+    collapsed_df.index = collapsed_df.index.set_names('stat', level=0)
+    collapsed_df = collapsed_df.unstack(level='stat')
+    return collapsed_df
+
 # Load result files
 #-------------------
 #results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
 #results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
 resourcefilepath = Path("./resources")
 outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-22T163743Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
 #results_folder = get_scenario_outputs('hss_elements-2024-10-12T111649Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
 
 # Check can read results from draw=0, run=0
 log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
 params = extract_params(results_folder)
 population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
-TARGET_PERIOD_INTERVENTION = (Date(2020, 1, 1), Date(2030, 12, 31))
+TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))
 relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+htm_scenarios = {0:"Baseline", 1: "HSS PACKAGE: Perfect", 2: "HSS PACKAGE: Realistic", 3: "HIV Programs Scale-up WITHOUT HSS PACKAGE",
+4: "HIV Programs Scale-up WITH FULL HSS PACKAGE", 5: "HIV Programs Scale-up WITH REALISTIC HSS PACKAGE", 6: "TB Programs Scale-up WITHOUT HSS PACKAGE",
+7: "TB Programs Scale-up WITH FULL HSS PACKAGE", 8: "TB Programs Scale-up WITH REALISTIC HSS PACKAGE", 9: "Malaria Programs Scale-up WITHOUT HSS PACKAGE",
+10: "Malaria Programs Scale-up WITH FULL HSS PACKAGE", 11: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 12: "HTM Programs Scale-up WITHOUT HSS PACKAGE",
+13: "HTM Programs Scale-up WITH FULL HSS PACKAGE", 14: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 15: "HTM Programs Scale-up WITH SUPPLY CHAINS", 16: "HTM Programs Scale-up WITH HRH"}
+htm_scenarios_for_gf_report = [0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 16]
+
+hss_scenarios = {0: "Baseline", 1: "HRH Moderate Scale-up (1%)", 2: "HRH Scale-up Following Historical Growth", 3: "HRH Accelerated Scale-up (6%)",
+                 4: "Increase Capacity at Primary Care Levels", 5: "Increase Capacity of CHW", 6: "Consumables Increased to 75th Percentile",
+                 7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability",
+                 10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"}
+hss_scenarios_for_gf_report = [0, 1, 3, 4, 6, 7, 8, 9, 10, 12]
 
 # Load the list of districts and their IDs
 district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[
@@ -70,7 +95,6 @@
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , cost_only_used_staff=True) # summarise = True
-#draws_included = [0,1,2,6,7]
 #input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = draws_included, cost_only_used_staff=True, summarize = True)
 
 # Add additional costs pertaining to simulation
@@ -111,7 +135,7 @@
                               'Mwanza', 'Likoma', 'Nkhotakota']
 # Convert above list of district names to numeric district identifiers
 district_keys_with_irs_scaleup = [key for key, name in district_dict.items() if name in districts_with_irs_scaleup]
-TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2030, 12, 31))
+TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2035, 12, 31))
 
 # Get population by district
 def get_total_population_by_district(_df):
@@ -196,7 +220,11 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
     new_df = melt_and_label_malaria_scaleup_cost(df, label)
     input_costs = pd.concat([input_costs, new_df], ignore_index=True)
 
-input_costs = input_costs[input_costs.draw.isin(draws_included)]
+# TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
+
+# Aggregate input costs for further analysis
+input_costs_subset = input_costs[(input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
 
 # Calculate incremental cost
 #-----------------------------------------------------------------------------------------------------------------------
@@ -215,7 +243,6 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
         .drop(columns=([comparison] if drop_comparison else [])) \
         .stack()
 
-total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum()
 
 # TODO the following calculation should first capture the different by run and then be summarised
 incremental_scenario_cost = (pd.DataFrame(
@@ -231,7 +258,7 @@ def get_num_dalys(_df):
     Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
     results from runs that crashed mid-way through the simulation.
     """
-    years_needed = [i.year for i in TARGET_PERIOD_INTERVENTION]
+    years_needed = relevant_period_for_costing # [i.year for i in TARGET_PERIOD_INTERVENTION]
     assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
     return pd.Series(
         data=_df
@@ -261,13 +288,14 @@ def get_num_dalys(_df):
         ).T.iloc[0].unstack(level = 'run'))
 
 #num_dalys = num_dalys.loc[0].unstack()
-num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(draws_included)]
+#num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(draws_included)]
 #num_dalys_averted['scenario'] = scenarios.to_list()[1:12]
 #num_dalys_averted = num_dalys_averted.set_index('scenario')
 
 chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
-monetary_value_of_incremental_health = num_dalys_averted * chosen_cet
-max_ability_to_pay_for_implementation = monetary_value_of_incremental_health - incremental_scenario_cost # monetary value - change in costs
+monetary_value_of_incremental_health = (num_dalys_averted * chosen_cet).clip(0.0)
+max_ability_to_pay_for_implementation = (monetary_value_of_incremental_health - incremental_scenario_cost).clip(0.0) # monetary value - change in costs
+#TODO check that the above calculation is correct
 
 # Plot costs
 #-----------------------------------------------------------------------------------------------------------------------
@@ -285,6 +313,144 @@ def get_num_dalys(_df):
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', disaggregate_by= 'cost_category', _draws = None, _outputfilepath = figurespath)
 
+# 3. Return on Investment Plot
+#----------------------------------------------------
+# Plot ROI at various levels of cost
+roi_outputs_folder = Path(figurespath / 'roi' / 'htm')
+if not os.path.exists(roi_outputs_folder):
+    os.makedirs(roi_outputs_folder)
+
+# Iterate over each draw in monetary_value_of_incremental_health
+for draw_index, row in monetary_value_of_incremental_health.iterrows():
+    # Initialize an empty DataFrame to store values for each 'run'
+    all_run_values = pd.DataFrame()
+
+    # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw
+    implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50)
+
+    # Retrieve the corresponding row from incremental_scenario_cost for the same draw
+    scenario_cost_row = incremental_scenario_cost.loc[draw_index]
+
+    # Calculate the values for each individual run
+    for run in scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
+        # Calculate the cost-effectiveness metric for the current run
+        run_values = (row[run] - (implementation_costs + scenario_cost_row[run])) / (
+                implementation_costs + scenario_cost_row[run])
+
+        # Create a DataFrame with index as (draw_index, run) and columns as implementation costs
+        run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']),
+                              columns=implementation_costs)
+
+        # Append the run DataFrame to all_run_values
+        all_run_values = pd.concat([all_run_values, run_df])
+
+    collapsed_data = all_run_values.groupby(level='draw').agg([
+            'mean',
+            ('lower', lambda x: x.quantile(0.025)),
+            ('upper', lambda x: x.quantile(0.975))
+        ])
+
+    collapsed_data = collapsed_data.unstack()
+    collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0)
+    collapsed_data.index = collapsed_data.index.set_names('stat', level=1)
+    collapsed_data = collapsed_data.reset_index().rename(columns = {0: 'roi'})
+    #collapsed_data = collapsed_data.reorder_levels(['draw', 'stat', 'implementation_cost'])
+
+    # Divide rows by the sum of implementation costs and incremental input cost
+    mean_values = collapsed_data[collapsed_data['stat'] == 'mean'][['implementation_cost', 'roi']]
+    lower_values = collapsed_data[collapsed_data['stat'] == 'lower'][['implementation_cost', 'roi']]
+    upper_values = collapsed_data[collapsed_data['stat']  == 'upper'][['implementation_cost', 'roi']]
+
+    # Plot mean line
+    plt.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{htm_scenarios[draw_index]}')
+    # Plot the confidence interval as a shaded region
+    plt.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
+
+    plt.xlabel('Implementation cost, millions')
+    plt.ylabel('Return on Investment')
+    plt.title('Return on Investment of scenario at different levels of implementation cost')
+
+    #plt.text(x=0.95, y=0.8,
+    #         s=f"Monetary value of incremental health = USD {round(monetary_value_of_incremental_health.loc[draw_index]['mean'] / 1e6, 2)}m (USD {round(monetary_value_of_incremental_health.loc[draw_index]['lower'] / 1e6, 2)}m-{round(monetary_value_of_incremental_health.loc[draw_index]['upper'] / 1e6, 2)}m);\n "
+    #           f"Incremental input cost of scenario = USD {round(scenario_cost_row['mean'] / 1e6, 2)}m (USD {round(scenario_cost_row['lower'] / 1e6, 2)}m-{round(scenario_cost_row['upper'] / 1e6, 2)}m)",
+    #         horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9,
+    #         weight='bold', color='black')
+
+    # Show legend
+    plt.legend()
+    # Save
+    plt.savefig(figurespath / f'roi/htm/draw{draw_index}_{htm_scenarios[draw_index]}_ROI.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
+# 4. Plot Maximum ability-to-pay
+#----------------------------------------------------
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    xticks = {(i+1): k for i, k in enumerate(_df.index)}
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+        label=xticks.values()
+    )
+    '''
+    if annotations:
+        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=11)
+
+    ax.set_xticks(list(xticks.keys()))
+    if not xticklabels_horizontal_and_wrapped:
+        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+        ax.set_xticklabels(wrapped_labs, rotation=45, ha='right', fontsize=10)
+    else:
+        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+        ax.set_xticklabels(wrapped_labs, fontsize=10)
+    '''
+
+    # Set font size for y-tick labels
+    ax.tick_params(axis='y', labelsize=12)
+    ax.tick_params(axis='x', labelsize=11)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
+
+# Plot Max ability to pay
+name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' #f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
+fig, ax = do_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean']/1e6, 1)} \n ({round(row['lower']/1e6, 1)}-{round(row['upper']/1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+#ax.set_ylim(0, 120)
+#ax.set_yticks(np.arange(0, 120, 10))
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.show()
+plt.close(fig)
+
 '''
 #years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
 #years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 66e5d25ba3..68dbfe4d43 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -857,115 +857,8 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     plt.savefig(_outputfilepath / filename, dpi=100, bbox_inches='tight')
     plt.close()
 
-# 3. Return on Investment Plot
-#----------------------------------------------------
-# Plot ROI at various levels of cost
-roi_outputs_folder = Path(figurespath / 'roi')
-if not os.path.exists(roi_outputs_folder):
-    os.makedirs(roi_outputs_folder)
-
-# Loop through each row and plot mean, lower, and upper values divided by costs
-for index, row in monetary_value_of_incremental_health.iterrows():
-    # Step 1: Create an array of implementation costs ranging from 0 to the max value of the max ability to pay
-    implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[index]['mean'], 50)
-
-    plt.figure(figsize=(10, 6))
-
-    # Retrieve the corresponding row from incremental_scenario_cost for the same 'index'
-    scenario_cost_row = incremental_scenario_cost.loc[index]
-    # Divide rows by the sum of implementation costs and incremental input cost
-    mean_values = row['mean'] / (implementation_costs + scenario_cost_row['mean'])
-    lower_values = row['lower'] / (implementation_costs + scenario_cost_row['lower'])
-    upper_values = row['upper'] / (implementation_costs + scenario_cost_row['upper'])
-    # Plot mean line
-    plt.plot(implementation_costs/1e6, mean_values, label=f'Draw {index}')
-    # Plot the confidence interval as a shaded region
-    plt.fill_between(implementation_costs/1e6, lower_values, upper_values, alpha=0.2)
-
-    # Step 4: Set plot labels and title
-    plt.xlabel('Implementation cost, millions')
-    plt.ylabel('Return on Investment')
-    plt.title('Return on Investment of scenarios at different levels of implementation cost')
-
-    plt.text(x=0.95, y=0.8, s=f"Monetary value of incremental health = USD {round(monetary_value_of_incremental_health.loc[index]['mean']/1e6,2)}m (USD {round(monetary_value_of_incremental_health.loc[index]['lower']/1e6,2)}m-{round(monetary_value_of_incremental_health.loc[index]['upper']/1e6,2)}m);\n "
-                             f"Incremental input cost of scenario = USD {round(scenario_cost_row['mean']/1e6,2)}m (USD {round(scenario_cost_row['lower']/1e6,2)}m-{round(scenario_cost_row['upper']/1e6,2)}m)",
-             horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9, weight='bold', color='black')
-
-
-    # Show legend
-    plt.legend()
-    # Save
-    plt.savefig(figurespath / f'roi/ROI_draw{index}.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
-
-# 4. Plot Maximum ability-to-pay
-#----------------------------------------------------
-def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
-    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
-    extent of the error bar."""
-
-    yerr = np.array([
-        (_df['mean'] - _df['lower']).values,
-        (_df['upper'] - _df['mean']).values,
-    ])
-
-    xticks = {(i+1): k for i, k in enumerate(_df.index)}
-
-    fig, ax = plt.subplots()
-    ax.bar(
-        xticks.keys(),
-        _df['mean'].values,
-        yerr=yerr,
-        alpha=1,
-        ecolor='black',
-        capsize=10,
-        label=xticks.values()
-    )
-    '''
-    if annotations:
-        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
-            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=11)
-
-    ax.set_xticks(list(xticks.keys()))
-    if not xticklabels_horizontal_and_wrapped:
-        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-        ax.set_xticklabels(wrapped_labs, rotation=45, ha='right', fontsize=10)
-    else:
-        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-        ax.set_xticklabels(wrapped_labs, fontsize=10)
-    '''
-
-    # Set font size for y-tick labels
-    ax.tick_params(axis='y', labelsize=12)
-    ax.tick_params(axis='x', labelsize=11)
-
-    ax.grid(axis="y")
-    ax.spines['top'].set_visible(False)
-    ax.spines['right'].set_visible(False)
-    fig.tight_layout()
-
-    return fig, ax
-
-# Plot Max ability to pay
-name_of_plot = f'Maximum ability to pay, 2020-2030' #f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
-fig, ax = do_bar_plot_with_ci(
-    (max_ability_to_pay_for_implementation / 1e6).clip(lower=0.0),
-    annotations=[
-        f"{round(row['mean']/1e6, 1)} \n ({round(row['lower']/1e6, 1)}-{round(row['upper']/1e6, 1)})"
-        for _, row in max_ability_to_pay_for_implementation.clip(lower=0.0).iterrows()
-    ],
-    xticklabels_horizontal_and_wrapped=False,
-)
-ax.set_title(name_of_plot)
-#ax.set_ylim(0, 120)
-#ax.set_yticks(np.arange(0, 120, 10))
-ax.set_ylabel('Maximum ability to pay \n(Millions)')
-fig.tight_layout()
-fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
-fig.show()
-plt.close(fig)
-
+'''
+# Scratch pad
 # TODO all these HR plots need to be looked at
 # 1. HR
 # Stacked bar chart of salaries by cadre
@@ -1131,8 +1024,6 @@ def plot_inflow_to_outflow_ratio(_dict, groupby_var):
 plt.title('Total Salary by Facility_Level')
 plt.savefig(costing_outputs_folder /  'total_salary_by_level.png')
 
-'''
-# Scratch pad
 
 log['tlo.methods.healthsystem']['Capacity']['Frac_Time_Used_By_Facility_ID'] # for district disaggregation
 

From 07834057e5beb75cff12bf13fda6759b8742d351 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 4 Nov 2024 12:28:18 +0000
Subject: [PATCH 144/230] add function to summarize costs

---
 src/scripts/costing/cost_estimation.py | 57 +++++++++++++++++---------
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 68dbfe4d43..654b1fe541 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -36,8 +36,8 @@
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
 #%%
-def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path = None, draws = None, runs = None,
-                                     summarize: bool = False, cost_only_used_staff: bool = True):
+def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path = None, _draws = None, _runs = None,
+                                     summarize: bool = False, _years = None, cost_only_used_staff: bool = True):
     # Useful common functions
     def drop_outside_period(_df):
         """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
@@ -57,10 +57,10 @@ def melt_model_output_draws_and_runs(_df, id_vars):
     #-------------------------------------
     log = load_pickled_dataframes(results_folder, 0, 0)  # read from 1 draw and run
     info = get_scenario_info(results_folder)  # get basic information about the results
-    if draws is None:
-        draws = range(0, info['number_of_draws'])
-    if runs is None:
-        runs = range(0, info['runs_per_draw'])
+    if _draws is None:
+        _draws = range(0, info['number_of_draws'])
+    if _runs is None:
+        _runs = range(0, info['runs_per_draw'])
     final_year_of_simulation = max(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
     first_year_of_simulation = min(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
     years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
@@ -333,10 +333,10 @@ def label_rows_of_cost_dataframe(_df, label_var, label):
     human_resource_costs = prepare_cost_dataframe(human_resource_costs, _category_specific_group = 'OfficerType', _cost_category = 'human resources for health')
 
     # Only preserve the draws and runs requested
-    if draws is not None:
-        human_resource_costs = human_resource_costs[human_resource_costs.draw.isin(draws)]
-    if runs is not None:
-        human_resource_costs = human_resource_costs[human_resource_costs.run.isin(runs)]
+    if _draws is not None:
+        human_resource_costs = human_resource_costs[human_resource_costs.draw.isin(_draws)]
+    if _runs is not None:
+        human_resource_costs = human_resource_costs[human_resource_costs.run.isin(_runs)]
 
     # %%
     # 2. Consumables cost
@@ -510,12 +510,12 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df,
     other_costs = prepare_cost_dataframe(other_costs, _category_specific_group = 'consumable', _cost_category = 'other')
 
     # Only preserve the draws and runs requested
-    if draws is not None:
-        consumable_costs = consumable_costs[consumable_costs.draw.isin(draws)]
-        other_costs = other_costs[other_costs.draw.isin(draws)]
-    if runs is not None:
-        consumable_costs = consumable_costs[consumable_costs.run.isin(runs)]
-        other_costs = other_costs[other_costs.run.isin(runs)]
+    if _draws is not None:
+        consumable_costs = consumable_costs[consumable_costs.draw.isin(_draws)]
+        other_costs = other_costs[other_costs.draw.isin(_draws)]
+    if _runs is not None:
+        consumable_costs = consumable_costs[consumable_costs.run.isin(_runs)]
+        other_costs = other_costs[other_costs.run.isin(_runs)]
 
 
     # %%
@@ -546,8 +546,8 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series:
     equipment_cost_across_sim = pd.DataFrame()
 
     # Extract equipment cost for each draw and run
-    for d in draws:
-        for r in runs:
+    for d in _draws:
+        for r in _runs:
             print(f"Processing draw {d} and run {r} of equipment costs")
             # Extract a list of equipment which was used at each facility level within each district
             equipment_used = {district: {level: [] for level in fac_levels} for district in list(district_dict.values())} # create a dictionary with a key for each district and facility level
@@ -654,7 +654,26 @@ def update_itemuse_for_level1b_using_level2_data(_df):
                   var_name='stat',  # New column name for the 'sub-category' of cost
                   value_name='cost')
 
-    return scenario_cost
+    if _years is None:
+        return scenario_cost
+    else:
+        return scenario_cost[scenario_cost.year.isin(_years)]
+
+# Define a function to summarize cost data from
+# Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as index, then
+# first unstack the dataframe and subsequently apply the summarize function
+def summarize_cost_data(_df):
+    _df = _df.stack()
+    collapsed_df = _df.groupby(level='draw').agg([
+            'mean',
+            ('lower', lambda x: x.quantile(0.025)),
+            ('upper', lambda x: x.quantile(0.975))
+        ])
+
+    collapsed_df = collapsed_df.unstack()
+    collapsed_df.index = collapsed_df.index.set_names('stat', level=0)
+    collapsed_df = collapsed_df.unstack(level='stat')
+    return collapsed_df
 
 # Plot costs
 ####################################################

From b5cf29646089e6f993d63018d239950adaf993ee Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 4 Nov 2024 13:24:32 +0000
Subject: [PATCH 145/230] Move ROI plot to main cost_estimation script - and
 split the two Global fund analyses into two separate scripts - plot only the
 draws included in final analysis

---
 .../costing/cost_analysis_hss_elements.py     | 238 ++++++++++++++++++
 ...cost_analysis_htm_with_and_without_hss.py} | 212 +++++-----------
 src/scripts/costing/cost_estimation.py        |  69 +++++
 3 files changed, 372 insertions(+), 147 deletions(-)
 create mode 100644 src/scripts/costing/cost_analysis_hss_elements.py
 rename src/scripts/costing/{cost_analysis_roi_of_hss.py => cost_analysis_htm_with_and_without_hss.py} (72%)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
new file mode 100644
index 0000000000..126a216487
--- /dev/null
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -0,0 +1,238 @@
+import argparse
+from pathlib import Path
+from tlo import Date
+from collections import Counter, defaultdict
+
+import calendar
+import datetime
+import os
+import textwrap
+
+import matplotlib.pyplot as plt
+from matplotlib.ticker import FuncFormatter
+import numpy as np
+import pandas as pd
+import ast
+import math
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    make_age_grp_lookup,
+    make_age_grp_types,
+    summarize,
+    create_pickles_locally,
+    parse_log_file,
+    unflatten_flattened_multi_index_in_logging
+)
+
+from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             summarize_cost_data,
+                                             do_stacked_bar_plot_of_cost_by_category,
+                                             do_line_plot_of_cost,
+                                             generate_roi_plots)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Create folders to store results
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+figurespath = Path('./outputs/global_fund_roi_analysis/hss_elements/')
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+roi_outputs_folder = Path(figurespath / 'roi')
+if not os.path.exists(roi_outputs_folder):
+    os.makedirs(roi_outputs_folder)
+
+# Load result files
+# ------------------------------------------------------------------------------------------------------------------
+results_folder = get_scenario_outputs('hss_elements-2024-10-22T163857Z.py', outputfilepath)[0]
+
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0)  # look at one log (so can decide what to extract)
+params = extract_params(results_folder)
+
+# Declare default parameters for cost analysis
+# ------------------------------------------------------------------------------------------------------------------
+# Period relevant for costing
+TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+
+# Scenarios
+hss_scenarios = {0: "Baseline", 1: "HRH Moderate Scale-up (1%)", 2: "HRH Scale-up Following Historical Growth", 3: "HRH Accelerated Scale-up (6%)",
+                 4: "Increase Capacity at Primary Care Levels", 5: "Increase Capacity of CHW", 6: "Consumables Increased to 75th Percentile",
+                 7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability",
+                 10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"}
+hss_scenarios_for_gf_report = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12]
+
+# Cost-effectiveness threshold
+chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+
+# Estimate standard input costs of scenario
+# -----------------------------------------------------------------------------------------------------------------------
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True)
+# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+# TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
+
+# %%
+# Return on Invesment analysis
+# Calculate incremental cost
+# -----------------------------------------------------------------------------------------------------------------------
+# Aggregate input costs for further analysis
+input_costs_subset = input_costs[
+    (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+# TODO the above step may not longer be needed
+total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
+total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
+def find_difference_relative_to_comparison(_ser: pd.Series,
+                                           comparison: str,
+                                           scaled: bool = False,
+                                           drop_comparison: bool = True,
+                                           ):
+    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+    within the runs (level 1), relative to where draw = `comparison`.
+    The comparison is `X - COMPARISON`."""
+    return _ser \
+        .unstack(level=0) \
+        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+        .drop(columns=([comparison] if drop_comparison else [])) \
+        .stack()
+
+
+incremental_scenario_cost = (pd.DataFrame(
+    find_difference_relative_to_comparison(
+        total_input_cost,
+        comparison=0)  # sets the comparator to 0 which is the Actual scenario
+).T.iloc[0].unstack()).T
+
+incremental_scenario_cost = incremental_scenario_cost[
+    incremental_scenario_cost.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+
+
+# Monetary value of health impact
+# -----------------------------------------------------------------------------------------------------------------------
+def get_num_dalys(_df):
+    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+    results from runs that crashed mid-way through the simulation.
+    """
+    years_needed = relevant_period_for_costing  # [i.year for i in TARGET_PERIOD_INTERVENTION]
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .drop(columns=['date', 'sex', 'age_range', 'year'])
+        .sum().sum()
+    )
+
+
+num_dalys = extract_results(
+    results_folder,
+    module='tlo.methods.healthburden',
+    key='dalys_stacked',
+    custom_generate_series=get_num_dalys,
+    do_scaling=True
+)
+
+# Get absolute DALYs averted
+num_dalys_averted = (-1.0 *
+                     pd.DataFrame(
+                         find_difference_relative_to_comparison(
+                             num_dalys.loc[0],
+                             comparison=0)  # sets the comparator to 0 which is the Actual scenario
+                     ).T.iloc[0].unstack(level='run'))
+
+# The monetary value of the health benefit is delta health times CET (negative values are set to 0)
+monetary_value_of_incremental_health = (num_dalys_averted * chosen_cet).clip(lower=0.0)
+monetary_value_of_incremental_health = monetary_value_of_incremental_health[
+    monetary_value_of_incremental_health.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+# TODO check that the above calculation is correct
+
+# 3. Return on Investment Plot
+# ----------------------------------------------------
+# Plot ROI at various levels of cost
+generate_roi_plots(_monetary_value_of_incremental_health=monetary_value_of_incremental_health,
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder)
+
+# 4. Plot Maximum ability-to-pay
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (monetary_value_of_incremental_health - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+
+
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    xticks = {(i + 1): k for i, k in enumerate(_df.index)}
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+        label=xticks.values()
+    )
+    '''
+    if annotations:
+        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=11)
+
+    ax.set_xticks(list(xticks.keys()))
+    if not xticklabels_horizontal_and_wrapped:
+        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+        ax.set_xticklabels(wrapped_labs, rotation=45, ha='right', fontsize=10)
+    else:
+        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+        ax.set_xticklabels(wrapped_labs, fontsize=10)
+    '''
+
+    # Set font size for y-tick labels
+    ax.tick_params(axis='y', labelsize=12)
+    ax.tick_params(axis='x', labelsize=11)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
+
+
+# Plot Max ability to pay
+name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'  # f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
+fig, ax = do_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-{round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
diff --git a/src/scripts/costing/cost_analysis_roi_of_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
similarity index 72%
rename from src/scripts/costing/cost_analysis_roi_of_hss.py
rename to src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index 893b91afd4..a4b9cacda5 100644
--- a/src/scripts/costing/cost_analysis_roi_of_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -30,8 +30,10 @@
 )
 
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             summarize_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
-                                             do_line_plot_of_cost)
+                                             do_line_plot_of_cost,
+                                             generate_roi_plots)
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
 
@@ -39,65 +41,56 @@
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
 # Create folders to store results
-costing_outputs_folder = Path('./outputs/costing')
-if not os.path.exists(costing_outputs_folder):
-    os.makedirs(costing_outputs_folder)
-figurespath = costing_outputs_folder / "global_fund_roi_analysis"
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+figurespath = Path('./outputs/global_fund_roi_analysis/htm_with_and_without_hss')
 if not os.path.exists(figurespath):
     os.makedirs(figurespath)
-
-def summarize_cost_data(_df):
-    _df = _df.stack()
-    collapsed_df = _df.groupby(level='draw').agg([
-            'mean',
-            ('lower', lambda x: x.quantile(0.025)),
-            ('upper', lambda x: x.quantile(0.975))
-        ])
-
-    collapsed_df = collapsed_df.unstack()
-    collapsed_df.index = collapsed_df.index.set_names('stat', level=0)
-    collapsed_df = collapsed_df.unstack(level='stat')
-    return collapsed_df
+roi_outputs_folder = Path(figurespath / 'roi')
+if not os.path.exists(roi_outputs_folder):
+    os.makedirs(roi_outputs_folder)
 
 # Load result files
-#-------------------
-#results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-09-04T143044Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
-#results_folder = get_scenario_outputs('hss_elements-2024-09-04T142900Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 1
-resourcefilepath = Path("./resources")
-outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-22T163743Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
-#results_folder = get_scenario_outputs('hss_elements-2024-10-12T111649Z.py', outputfilepath)[0] # Tara's FCDO/GF scenarios version 2
+#------------------------------------------------------------------------------------------------------------------
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-22T163743Z.py', outputfilepath)[0]
 
 # Check can read results from draw=0, run=0
 log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
 params = extract_params(results_folder)
+
+# Declare default parameters for cost analysis
+#------------------------------------------------------------------------------------------------------------------
+# Population scaling factor for malaria scale-up projections
 population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
-TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))
+# Load the list of districts and their IDs
+district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[
+    ['District_Num', 'District']].drop_duplicates()
+district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
+
+# Period relevant for costing
+TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31)) # This is the period that is costed
 relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+
+# Scenarios
 htm_scenarios = {0:"Baseline", 1: "HSS PACKAGE: Perfect", 2: "HSS PACKAGE: Realistic", 3: "HIV Programs Scale-up WITHOUT HSS PACKAGE",
 4: "HIV Programs Scale-up WITH FULL HSS PACKAGE", 5: "HIV Programs Scale-up WITH REALISTIC HSS PACKAGE", 6: "TB Programs Scale-up WITHOUT HSS PACKAGE",
 7: "TB Programs Scale-up WITH FULL HSS PACKAGE", 8: "TB Programs Scale-up WITH REALISTIC HSS PACKAGE", 9: "Malaria Programs Scale-up WITHOUT HSS PACKAGE",
 10: "Malaria Programs Scale-up WITH FULL HSS PACKAGE", 11: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 12: "HTM Programs Scale-up WITHOUT HSS PACKAGE",
 13: "HTM Programs Scale-up WITH FULL HSS PACKAGE", 14: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 15: "HTM Programs Scale-up WITH SUPPLY CHAINS", 16: "HTM Programs Scale-up WITH HRH"}
+# Subset of scenarios included in analysis
 htm_scenarios_for_gf_report = [0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 16]
 
-hss_scenarios = {0: "Baseline", 1: "HRH Moderate Scale-up (1%)", 2: "HRH Scale-up Following Historical Growth", 3: "HRH Accelerated Scale-up (6%)",
-                 4: "Increase Capacity at Primary Care Levels", 5: "Increase Capacity of CHW", 6: "Consumables Increased to 75th Percentile",
-                 7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability",
-                 10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"}
-hss_scenarios_for_gf_report = [0, 1, 3, 4, 6, 7, 8, 9, 10, 12]
-
-# Load the list of districts and their IDs
-district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[
-    ['District_Num', 'District']].drop_duplicates()
-district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
+# Cost-effectiveness threshold
+chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
 
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
-input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , cost_only_used_staff=True) # summarise = True
-#input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath , draws = draws_included, cost_only_used_staff=True, summarize = True)
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
+                                               _years = list_of_relevant_years_for_costing, cost_only_used_staff=True)
+# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 
-# Add additional costs pertaining to simulation
+# Add additional costs pertaining to simulation (Only for scenarios with Malaria scale-up)
 #-----------------------------------------------------------------------------------------------------------------------
 # Extract supply chain cost as a proportion of consumable costs to apply to malaria scale-up commodities
 # Load primary costing resourcefile
@@ -222,12 +215,16 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
 
 # TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
 
+# %%
+# Return on Invesment analysis
+# Calculate incremental cost
+#-----------------------------------------------------------------------------------------------------------------------
 # Aggregate input costs for further analysis
 input_costs_subset = input_costs[(input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
-total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
+# TODO the above step may not longer be needed
 
-# Calculate incremental cost
-#-----------------------------------------------------------------------------------------------------------------------
+total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
+total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level = 'run'))
 
 def find_difference_relative_to_comparison(_ser: pd.Series,
                                            comparison: str,
@@ -243,16 +240,15 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
         .drop(columns=([comparison] if drop_comparison else [])) \
         .stack()
 
-
-# TODO the following calculation should first capture the different by run and then be summarised
 incremental_scenario_cost = (pd.DataFrame(
             find_difference_relative_to_comparison(
                 total_input_cost,
                 comparison= 0) # sets the comparator to 0 which is the Actual scenario
         ).T.iloc[0].unstack()).T
+incremental_scenario_cost = incremental_scenario_cost[incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
 
-# %%
 # Monetary value of health impact
+#-----------------------------------------------------------------------------------------------------------------------
 def get_num_dalys(_df):
     """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
     Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
@@ -275,10 +271,6 @@ def get_num_dalys(_df):
         do_scaling=True
     )
 
-#num_dalys_summarized = summarize(num_dalys).loc[0].unstack()
-#num_dalys_summarized['scenario'] = scenarios.to_list() # add when scenarios have names
-#num_dalys_summarized = num_dalys_summarized.set_index('scenario')
-
 # Get absolute DALYs averted
 num_dalys_averted =(-1.0 *
         pd.DataFrame(
@@ -287,105 +279,24 @@ def get_num_dalys(_df):
                 comparison= 0) # sets the comparator to 0 which is the Actual scenario
         ).T.iloc[0].unstack(level = 'run'))
 
-#num_dalys = num_dalys.loc[0].unstack()
-#num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(draws_included)]
-#num_dalys_averted['scenario'] = scenarios.to_list()[1:12]
-#num_dalys_averted = num_dalys_averted.set_index('scenario')
-
-chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
-monetary_value_of_incremental_health = (num_dalys_averted * chosen_cet).clip(0.0)
-max_ability_to_pay_for_implementation = (monetary_value_of_incremental_health - incremental_scenario_cost).clip(0.0) # monetary value - change in costs
+# The monetary value of the health benefit is delta health times CET (negative values are set to 0)
+monetary_value_of_incremental_health = (num_dalys_averted * chosen_cet).clip(lower = 0.0)
+monetary_value_of_incremental_health = monetary_value_of_incremental_health[monetary_value_of_incremental_health.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
 #TODO check that the above calculation is correct
 
-# Plot costs
-#-----------------------------------------------------------------------------------------------------------------------
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = list(range(2020, 2030)), _outputfilepath = figurespath)
-
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subcategory', _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical equipment', _year = 'all', _draws = None, _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', disaggregate_by= 'cost_category', _draws = None, _outputfilepath = figurespath)
-
 # 3. Return on Investment Plot
 #----------------------------------------------------
 # Plot ROI at various levels of cost
-roi_outputs_folder = Path(figurespath / 'roi' / 'htm')
-if not os.path.exists(roi_outputs_folder):
-    os.makedirs(roi_outputs_folder)
-
-# Iterate over each draw in monetary_value_of_incremental_health
-for draw_index, row in monetary_value_of_incremental_health.iterrows():
-    # Initialize an empty DataFrame to store values for each 'run'
-    all_run_values = pd.DataFrame()
-
-    # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw
-    implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50)
-
-    # Retrieve the corresponding row from incremental_scenario_cost for the same draw
-    scenario_cost_row = incremental_scenario_cost.loc[draw_index]
-
-    # Calculate the values for each individual run
-    for run in scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
-        # Calculate the cost-effectiveness metric for the current run
-        run_values = (row[run] - (implementation_costs + scenario_cost_row[run])) / (
-                implementation_costs + scenario_cost_row[run])
-
-        # Create a DataFrame with index as (draw_index, run) and columns as implementation costs
-        run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']),
-                              columns=implementation_costs)
-
-        # Append the run DataFrame to all_run_values
-        all_run_values = pd.concat([all_run_values, run_df])
-
-    collapsed_data = all_run_values.groupby(level='draw').agg([
-            'mean',
-            ('lower', lambda x: x.quantile(0.025)),
-            ('upper', lambda x: x.quantile(0.975))
-        ])
-
-    collapsed_data = collapsed_data.unstack()
-    collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0)
-    collapsed_data.index = collapsed_data.index.set_names('stat', level=1)
-    collapsed_data = collapsed_data.reset_index().rename(columns = {0: 'roi'})
-    #collapsed_data = collapsed_data.reorder_levels(['draw', 'stat', 'implementation_cost'])
-
-    # Divide rows by the sum of implementation costs and incremental input cost
-    mean_values = collapsed_data[collapsed_data['stat'] == 'mean'][['implementation_cost', 'roi']]
-    lower_values = collapsed_data[collapsed_data['stat'] == 'lower'][['implementation_cost', 'roi']]
-    upper_values = collapsed_data[collapsed_data['stat']  == 'upper'][['implementation_cost', 'roi']]
-
-    # Plot mean line
-    plt.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{htm_scenarios[draw_index]}')
-    # Plot the confidence interval as a shaded region
-    plt.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
-
-    plt.xlabel('Implementation cost, millions')
-    plt.ylabel('Return on Investment')
-    plt.title('Return on Investment of scenario at different levels of implementation cost')
-
-    #plt.text(x=0.95, y=0.8,
-    #         s=f"Monetary value of incremental health = USD {round(monetary_value_of_incremental_health.loc[draw_index]['mean'] / 1e6, 2)}m (USD {round(monetary_value_of_incremental_health.loc[draw_index]['lower'] / 1e6, 2)}m-{round(monetary_value_of_incremental_health.loc[draw_index]['upper'] / 1e6, 2)}m);\n "
-    #           f"Incremental input cost of scenario = USD {round(scenario_cost_row['mean'] / 1e6, 2)}m (USD {round(scenario_cost_row['lower'] / 1e6, 2)}m-{round(scenario_cost_row['upper'] / 1e6, 2)}m)",
-    #         horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9,
-    #         weight='bold', color='black')
-
-    # Show legend
-    plt.legend()
-    # Save
-    plt.savefig(figurespath / f'roi/htm/draw{draw_index}_{htm_scenarios[draw_index]}_ROI.png', dpi=100,
-                bbox_inches='tight')
-    plt.close()
+generate_roi_plots(_monetary_value_of_incremental_health = monetary_value_of_incremental_health,
+                   _incremental_input_cost = incremental_scenario_cost,
+                   _outputfilepath = roi_outputs_folder)
 
 # 4. Plot Maximum ability-to-pay
 #----------------------------------------------------
+max_ability_to_pay_for_implementation = (monetary_value_of_incremental_health - incremental_scenario_cost).clip(lower = 0.0) # monetary value - change in costs
 max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+
 def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
     """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
     extent of the error bar."""
@@ -443,16 +354,23 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     xticklabels_horizontal_and_wrapped=False,
 )
 ax.set_title(name_of_plot)
-#ax.set_ylim(0, 120)
-#ax.set_yticks(np.arange(0, 120, 10))
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
 fig.tight_layout()
 fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
-fig.show()
 plt.close(fig)
 
-'''
-#years_with_no_malaria_scaleup = set(TARGET_PERIOD).symmetric_difference(set(TARGET_PERIOD_MALARIA_SCALEUP))
-#years_with_no_malaria_scaleup = sorted(list(years_with_no_malaria_scaleup))
-#years_with_no_malaria_scaleup =  [i.year for i in years_with_no_malaria_scaleup]
-'''
+# Plot costs
+#-----------------------------------------------------------------------------------------------------------------------
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = list(range(2020, 2030)), _outputfilepath = figurespath)
+
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subcategory', _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical equipment', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', disaggregate_by= 'cost_category', _draws = None, _outputfilepath = figurespath)
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 654b1fe541..ec58d16194 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -876,6 +876,75 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     plt.savefig(_outputfilepath / filename, dpi=100, bbox_inches='tight')
     plt.close()
 
+# Plot ROI
+def generate_roi_plots(_monetary_value_of_incremental_health, _incremental_input_cost, _scenario_dict, _outputfilepath):
+    # Calculate maximum ability to pay for implementation
+    max_ability_to_pay_for_implementation = (_monetary_value_of_incremental_health - _incremental_input_cost).clip(
+        lower=0.0)  # monetary value - change in costs
+
+    # Iterate over each draw in monetary_value_of_incremental_health
+    for draw_index, row in _monetary_value_of_incremental_health.iterrows():
+        # Initialize an empty DataFrame to store values for each 'run'
+        all_run_values = pd.DataFrame()
+
+        # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw
+        implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50)
+
+        # Retrieve the corresponding row from incremental_scenario_cost for the same draw
+        scenario_cost_row = _incremental_input_cost.loc[draw_index]
+
+        # Calculate the values for each individual run
+        for run in scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
+            # Calculate the cost-effectiveness metric for the current run
+            run_values = (row[run] - (implementation_costs + scenario_cost_row[run])) / (
+                    implementation_costs + scenario_cost_row[run])
+
+            # Create a DataFrame with index as (draw_index, run) and columns as implementation costs
+            run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']),
+                                  columns=implementation_costs)
+
+            # Append the run DataFrame to all_run_values
+            all_run_values = pd.concat([all_run_values, run_df])
+
+        collapsed_data = all_run_values.groupby(level='draw').agg([
+                'mean',
+                ('lower', lambda x: x.quantile(0.025)),
+                ('upper', lambda x: x.quantile(0.975))
+            ])
+
+        collapsed_data = collapsed_data.unstack()
+        collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0)
+        collapsed_data.index = collapsed_data.index.set_names('stat', level=1)
+        collapsed_data = collapsed_data.reset_index().rename(columns = {0: 'roi'})
+        #collapsed_data = collapsed_data.reorder_levels(['draw', 'stat', 'implementation_cost'])
+
+        # Divide rows by the sum of implementation costs and incremental input cost
+        mean_values = collapsed_data[collapsed_data['stat'] == 'mean'][['implementation_cost', 'roi']]
+        lower_values = collapsed_data[collapsed_data['stat'] == 'lower'][['implementation_cost', 'roi']]
+        upper_values = collapsed_data[collapsed_data['stat']  == 'upper'][['implementation_cost', 'roi']]
+
+        # Plot mean line
+        plt.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{_scenario_dict[draw_index]}')
+        # Plot the confidence interval as a shaded region
+        plt.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
+
+        plt.xlabel('Implementation cost, millions')
+        plt.ylabel('Return on Investment')
+        plt.title('Return on Investment of scenario at different levels of implementation cost')
+
+        #plt.text(x=0.95, y=0.8,
+        #         s=f"Monetary value of incremental health = USD {round(monetary_value_of_incremental_health.loc[draw_index]['mean'] / 1e6, 2)}m (USD {round(monetary_value_of_incremental_health.loc[draw_index]['lower'] / 1e6, 2)}m-{round(monetary_value_of_incremental_health.loc[draw_index]['upper'] / 1e6, 2)}m);\n "
+        #           f"Incremental input cost of scenario = USD {round(scenario_cost_row['mean'] / 1e6, 2)}m (USD {round(scenario_cost_row['lower'] / 1e6, 2)}m-{round(scenario_cost_row['upper'] / 1e6, 2)}m)",
+        #         horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9,
+        #         weight='bold', color='black')
+
+        # Show legend
+        plt.legend()
+        # Save
+        plt.savefig(_outputfilepath / f'draw{draw_index}_{_scenario_dict[draw_index]}_ROI.png', dpi=100,
+                    bbox_inches='tight')
+        plt.close()
+
 '''
 # Scratch pad
 # TODO all these HR plots need to be looked at

From 2cb3fa83b886c2004d7f7c2b857527203640a0a1 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 4 Nov 2024 18:02:01 +0000
Subject: [PATCH 146/230] add other cost plot to GF analysis

---
 .../costing/cost_analysis_hss_elements.py     | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 126a216487..071356cf1b 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -236,3 +236,26 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 fig.tight_layout()
 fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
+
+# 4. Plot costs
+# ----------------------------------------------------
+input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_gf_report)]
+# First summarize all input costs
+input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
+    mean=('cost', 'mean'),
+    lower=('cost', lambda x: x.quantile(0.025)),
+    upper=('cost', lambda x: x.quantile(0.975))
+).reset_index()
+input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt(
+    id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'],
+    value_vars=['mean', 'lower', 'upper'],
+    var_name='stat',
+    value_name='cost'
+)
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)

From 5095e19ac6e92b91c1077f02026c98230319f74c Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 5 Nov 2024 12:55:21 +0000
Subject: [PATCH 147/230] wrap the x-tick labels in the stacked bar plots

---
 src/scripts/costing/cost_estimation.py    | 18 +++++++++++++++---
 src/scripts/costing/costing_validation.py | 22 ++++++++++++++++++----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index ec58d16194..c69bf815f8 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -679,7 +679,10 @@ def summarize_cost_data(_df):
 ####################################################
 # 1. Stacked bar plot (Total cost + Cost categories)
 #----------------------------------------------------
-def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _disaggregate_by_subgroup: bool = False,_year = 'all', _draws = None, _outputfilepath: Path = None):
+def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
+                                            _disaggregate_by_subgroup: bool = False,
+                                            _year = 'all', _draws = None,
+                                            _outputfilepath: Path = None):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
     _df = _df[_df.stat == 'mean'].copy()
@@ -735,6 +738,10 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _disagg
     # Plot the stacked bar chart
     ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
 
+    # Format the x-tick labels to wrap text
+    labels = [textwrap.fill(label.get_text(), 10) for label in ax.get_xticklabels()]
+    ax.set_xticklabels(labels, rotation=45, ha='right')
+
     # Period included for plot title and name
     if _year == 'all':
         period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}")
@@ -759,7 +766,9 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all', _disagg
 # 2. Line plots of total costs
 #----------------------------------------------------
 # TODO: Check why line plot get save without a file name
-def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, disaggregate_by=None,
+def do_line_plot_of_cost(_df, _cost_category='all',
+                         _year='all', _draws=None,
+                         disaggregate_by=None,
                          _outputfilepath: Path = None):
     # Validate disaggregation options
     valid_disaggregations = ['cost_category', 'cost_subcategory', 'cost_subgroup']
@@ -877,7 +886,10 @@ def do_line_plot_of_cost(_df, _cost_category='all', _year='all', _draws=None, di
     plt.close()
 
 # Plot ROI
-def generate_roi_plots(_monetary_value_of_incremental_health, _incremental_input_cost, _scenario_dict, _outputfilepath):
+def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
+                       _incremental_input_cost: pd.DataFrame,
+                       _scenario_dict: dict,
+                       _outputfilepath: Path):
     # Calculate maximum ability to pay for implementation
     max_ability_to_pay_for_implementation = (_monetary_value_of_incremental_health - _incremental_input_cost).clip(
         lower=0.0)  # monetary value - change in costs
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 5db667a4ad..1e3af80b22 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -73,11 +73,11 @@
 # Load result files
 resourcefilepath = Path("./resources")
 outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-12T111720Z.py', outputfilepath)[0]
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-22T163743Z.py', outputfilepath)[0]
 
 # Estimate costs for 2018
-input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, draws = [0], summarize = True, cost_only_used_staff=False)
-input_costs = input_costs[input_costs.year == 2018]
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _years = [2018], _draws = [0], summarize = True, cost_only_used_staff=False)
+#input_costs = input_costs[input_costs.year == 2018]
 
 # Manually create a dataframe of model costs and relevant calibration values
 def assign_item_codes_to_consumables(_df):
@@ -161,7 +161,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = antimalarials, _calibration_category = 'Antimalarials'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = malaria_rdts, _calibration_category = 'Malaria RDTs'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [191, 196], _calibration_category = 'HIV Screening/Diagnostic Tests') +
-                                                                       get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [190], _calibration_category = 'HIV Screening/Diagnostic Tests')/4)
+                                                                       get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [190], _calibration_category = 'HIV Screening/Diagnostic Tests'))
 # TODO update above when VL test quantity is adjusted in the module - currently 4 tests per year are assumed
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = condoms, _calibration_category = 'Condoms and Lubricants'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_tests, _calibration_category = 'TB Tests (including RDTs)'))
@@ -298,6 +298,20 @@ def do_cost_calibration_plot(_df, _costs_included):
 do_cost_calibration_plot(calibration_data,all_calibration_costs)
 calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 
+# Stacked bar charts to represent all cost sub-groups
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables',
+                                        _disaggregate_by_subgroup = True,
+                                        _outputfilepath = calibration_outputs_folder)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health',
+                                        _disaggregate_by_subgroup = True,
+                                        _outputfilepath = calibration_outputs_folder)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment',
+                                        _disaggregate_by_subgroup = True,
+                                        _outputfilepath = calibration_outputs_folder)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other',
+                                        _disaggregate_by_subgroup = True,
+                                        _outputfilepath = calibration_outputs_folder)
+
 '''
 
 # Calibration scatter plots

From fea5ad2ff2acf470ae471bf52c7d7b75cc73f822 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 5 Nov 2024 19:08:55 +0000
Subject: [PATCH 148/230] correct supply chain costs

---
 .../cost_analysis_htm_with_and_without_hss.py | 32 ++++++++++++++-----
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index a4b9cacda5..a0e8140c7f 100644
--- a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -114,7 +114,8 @@
 supply_chain_expenditure = \
 resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0]
 consumables_purchase_expenditure = \
-resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0]
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0] + \
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'HIV Drugs and Commodities'][expenditure_column].sum()[0]
 supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure
 
 # In this case malaria intervention scale-up costs were not included in the standard estimate_input_cost_of_scenarios function
@@ -289,6 +290,7 @@ def get_num_dalys(_df):
 # Plot ROI at various levels of cost
 generate_roi_plots(_monetary_value_of_incremental_health = monetary_value_of_incremental_health,
                    _incremental_input_cost = incremental_scenario_cost,
+                   _scenario_dict = htm_scenarios,
                    _outputfilepath = roi_outputs_folder)
 
 # 4. Plot Maximum ability-to-pay
@@ -359,13 +361,27 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
-# Plot costs
-#-----------------------------------------------------------------------------------------------------------------------
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = True, _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other', _year = [2018], _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _year = list(range(2020, 2030)), _outputfilepath = figurespath)
+# 4. Plot costs
+# ----------------------------------------------------
+input_costs_for_plot = input_costs[input_costs.draw.isin(htm_scenarios_for_gf_report)]
+# First summarize all input costs
+input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
+    mean=('cost', 'mean'),
+    lower=('cost', lambda x: x.quantile(0.025)),
+    upper=('cost', lambda x: x.quantile(0.975))
+).reset_index()
+input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt(
+    id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'],
+    value_vars=['mean', 'lower', 'upper'],
+    var_name='stat',
+    value_name='cost'
+)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
 
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)

From 7900a8f96b1d29766f9998385cf6b9e59d8880d8 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 5 Nov 2024 19:09:40 +0000
Subject: [PATCH 149/230] correct supply chain costs

---
 src/scripts/costing/cost_estimation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index c69bf815f8..c05dc602e9 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -488,7 +488,10 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df,
     expenditure_column = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
     resource_mapping_data[expenditure_column] = resource_mapping_data[expenditure_column].apply(lambda x: pd.to_numeric(x, errors='coerce'))
     supply_chain_expenditure = resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0]
-    consumables_purchase_expenditure = resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0]
+    consumables_purchase_expenditure = resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[
+            0] + \
+        resource_mapping_data[resource_mapping_data['Cost Type'] == 'HIV Drugs and Commodities'][
+            expenditure_column].sum()[0]
     supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure
 
     # Estimate supply chain costs based on the total consumable purchase cost calculated above

From 68851664c4b9ebb361564e9ccfca636c6b493132 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 09:03:42 +0000
Subject: [PATCH 150/230] manual fix for depo and oxygen costs

---
 src/scripts/costing/cost_analysis_hss_elements.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 071356cf1b..5d9990897d 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -81,7 +81,11 @@
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
-# TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
+# TODO Remove the manual fix below once the logging for these is corrected
+input_costs[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders'] = \
+    input_costs[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders']/10
+input_costs[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly'] = \
+    input_costs[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly']/7
 
 # %%
 # Return on Invesment analysis

From 849a14798bbe4c612fa46d8b73cb9c5c988621c2 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 13:55:24 +0000
Subject: [PATCH 151/230] Allow for x tick labels to be taken from a dictionary
 in stacked bar plots

---
 src/scripts/costing/cost_estimation.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index c05dc602e9..44ee82cf74 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -36,8 +36,11 @@
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
 #%%
-def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path = None, _draws = None, _runs = None,
-                                     summarize: bool = False, _years = None, cost_only_used_staff: bool = True):
+def estimate_input_cost_of_scenarios(results_folder: Path,
+                                     resourcefilepath: Path = None,
+                                     _draws = None, _runs = None,
+                                     summarize: bool = False, _years = None,
+                                     cost_only_used_staff: bool = True):
     # Useful common functions
     def drop_outside_period(_df):
         """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
@@ -685,6 +688,7 @@ def summarize_cost_data(_df):
 def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
                                             _disaggregate_by_subgroup: bool = False,
                                             _year = 'all', _draws = None,
+                                            _scenario_dict: dict = None,
                                             _outputfilepath: Path = None):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
@@ -741,9 +745,15 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
     # Plot the stacked bar chart
     ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
 
-    # Format the x-tick labels to wrap text
-    labels = [textwrap.fill(label.get_text(), 10) for label in ax.get_xticklabels()]
-    ax.set_xticklabels(labels, rotation=45, ha='right')
+    # Set custom x-tick labels if _scenario_dict is provided
+    if _scenario_dict:
+        labels = [_scenario_dict.get(label, label) for label in pivot_df.index]
+    else:
+        labels = pivot_df.index.astype(str)
+
+    # Wrap x-tick labels for readability
+    wrapped_labels = [textwrap.fill(label, 10) for label in labels]
+    ax.set_xticklabels(wrapped_labels, rotation=45, ha='right')
 
     # Period included for plot title and name
     if _year == 'all':
@@ -911,8 +921,8 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
         # Calculate the values for each individual run
         for run in scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
             # Calculate the cost-effectiveness metric for the current run
-            run_values = (row[run] - (implementation_costs + scenario_cost_row[run])) / (
-                    implementation_costs + scenario_cost_row[run])
+            run_values = np.clip((row[run] - (implementation_costs + scenario_cost_row[run])) / (
+                    implementation_costs + scenario_cost_row[run]),0,None)
 
             # Create a DataFrame with index as (draw_index, run) and columns as implementation costs
             run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']),

From fe0cdbe7b06dc7af947222f90a4b0db45b2687f3 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 14:40:05 +0000
Subject: [PATCH 152/230] update stacked bar plots to plot scenario names as x
 tick labels

---
 .../costing/cost_analysis_hss_elements.py     | 74 +++++++++++--------
 src/scripts/costing/cost_estimation.py        |  2 +-
 2 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 5d9990897d..d849f29f70 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -72,6 +72,20 @@
                  7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability",
                  10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"}
 hss_scenarios_for_gf_report = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12]
+color_map = {
+    'Baseline': '#a50026',
+    'HRH Moderate Scale-up (1%)': '#d73027',
+    'HRH Scale-up Following Historical Growth': '#f46d43',
+    'HRH Accelerated Scale-up (6%)': '#fdae61',
+    'Increase Capacity at Primary Care Levels': '#fee08b',
+    'Increase Capacity of CHW': '#ffffbf',
+    'Consumables Increased to 75th Percentile': '#d9ef8b',
+    'Consumables Available at HIV levels': '#a6d96a',
+    'Consumables Available at EPI levels': '#66bd63',
+    'Perfect Consumables Availability': '#1a9850',
+    'HSS PACKAGE: Perfect': '#5e4fa2',
+    'HSS PACKAGE: Realistic expansion': '#3288bd'
+}
 
 # Cost-effectiveness threshold
 chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
@@ -82,10 +96,10 @@
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 # TODO Remove the manual fix below once the logging for these is corrected
-input_costs[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders'] = \
-    input_costs[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders']/10
-input_costs[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly'] = \
-    input_costs[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly']/7
+input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
+    input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
+input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
+    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
 
 # %%
 # Return on Invesment analysis
@@ -138,7 +152,6 @@ def get_num_dalys(_df):
         .sum().sum()
     )
 
-
 num_dalys = extract_results(
     results_folder,
     module='tlo.methods.healthburden',
@@ -182,40 +195,44 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
     extent of the error bar."""
 
+    # Calculate y-error bars
     yerr = np.array([
         (_df['mean'] - _df['lower']).values,
         (_df['upper'] - _df['mean']).values,
     ])
 
-    xticks = {(i + 1): k for i, k in enumerate(_df.index)}
+    # Map xticks based on the hss_scenarios dictionary
+    xticks = {index: hss_scenarios.get(index, f"Scenario {index}") for index in _df.index}
+
+    # Retrieve colors from color_map based on the xticks labels
+    colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
 
     fig, ax = plt.subplots()
     ax.bar(
         xticks.keys(),
         _df['mean'].values,
         yerr=yerr,
+        color=colors,  # Set bar colors
         alpha=1,
         ecolor='black',
         capsize=10,
         label=xticks.values()
     )
-    '''
+
+    # Add optional annotations above each bar
     if annotations:
         for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
-            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=11)
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
 
+    # Set x-tick labels with wrapped text if required
     ax.set_xticks(list(xticks.keys()))
-    if not xticklabels_horizontal_and_wrapped:
-        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-        ax.set_xticklabels(wrapped_labs, rotation=45, ha='right', fontsize=10)
-    else:
-        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-        ax.set_xticklabels(wrapped_labs, fontsize=10)
-    '''
-
-    # Set font size for y-tick labels
-    ax.tick_params(axis='y', labelsize=12)
-    ax.tick_params(axis='x', labelsize=11)
+    wrapped_labs = ["\n".join(textwrap.wrap(label, 25)) for label in xticks.values()]
+    ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
+                       fontsize=8)
+
+    # Set font size for y-tick labels and grid
+    ax.tick_params(axis='y', labelsize=9)
+    ax.tick_params(axis='x', labelsize=9)
 
     ax.grid(axis="y")
     ax.spines['top'].set_visible(False)
@@ -224,9 +241,8 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     return fig, ax
 
-
-# Plot Max ability to pay
-name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'  # f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
 fig, ax = do_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
@@ -238,7 +254,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 ax.set_title(name_of_plot)
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
 fig.tight_layout()
-fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
 # 4. Plot costs
@@ -257,9 +273,9 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     value_name='cost'
 )
 
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 44ee82cf74..ef850842b0 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -752,7 +752,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
         labels = pivot_df.index.astype(str)
 
     # Wrap x-tick labels for readability
-    wrapped_labels = [textwrap.fill(label, 10) for label in labels]
+    wrapped_labels = [textwrap.fill(label, 20) for label in labels]
     ax.set_xticklabels(wrapped_labels, rotation=45, ha='right')
 
     # Period included for plot title and name

From 604c4042bce0b18d2bf18a49402419a965e9614e Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 14:44:43 +0000
Subject: [PATCH 153/230] create a function to
 get_monetary_value_of_incremental_health

---
 src/scripts/costing/cost_analysis_hss_elements.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index d849f29f70..825cd0d54b 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -167,11 +167,16 @@ def get_num_dalys(_df):
                              num_dalys.loc[0],
                              comparison=0)  # sets the comparator to 0 which is the Actual scenario
                      ).T.iloc[0].unstack(level='run'))
+num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+
+# Assign discounting to num_dalys_averted
+
 
 # The monetary value of the health benefit is delta health times CET (negative values are set to 0)
-monetary_value_of_incremental_health = (num_dalys_averted * chosen_cet).clip(lower=0.0)
-monetary_value_of_incremental_health = monetary_value_of_incremental_health[
-    monetary_value_of_incremental_health.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
+    monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0)
+    return monetary_value_of_incremental_health
+
 # TODO check that the above calculation is correct
 
 # 3. Return on Investment Plot

From 740a669119ee079e23e50b41a9d0746dcceecf27 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 15:59:27 +0000
Subject: [PATCH 154/230] create a function to discount costs

---
 .../costing/cost_analysis_hss_elements.py     | 24 ++++++++++++-------
 src/scripts/costing/cost_estimation.py        |  9 +++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 825cd0d54b..a165987558 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -31,6 +31,7 @@
 
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
                                              summarize_cost_data,
+                                             apply_discounting_to_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
                                              generate_roi_plots)
@@ -90,6 +91,9 @@
 # Cost-effectiveness threshold
 chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
 
+# Discount rate
+discount_rate = 0.03
+
 # Estimate standard input costs of scenario
 # -----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
@@ -108,6 +112,7 @@
 # Aggregate input costs for further analysis
 input_costs_subset = input_costs[
     (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+input_costs_subset = apply_discounting_to_cost_data(input_costs_subset, _discount_rate = discount_rate)
 # TODO the above step may not longer be needed
 total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
 total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
@@ -145,12 +150,16 @@ def get_num_dalys(_df):
     """
     years_needed = relevant_period_for_costing  # [i.year for i in TARGET_PERIOD_INTERVENTION]
     assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
-    return pd.Series(
-        data=_df
-        .loc[_df.year.between(*years_needed)]
-        .drop(columns=['date', 'sex', 'age_range', 'year'])
-        .sum().sum()
-    )
+    _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1)
+
+    # Initial year and discount rate
+    initial_year = min(_df.index.unique())
+    discount_rate = _discount_rate
+
+    # Calculate the discounted values
+    discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year)
+
+    return pd.Series(discounted_values.sum())
 
 num_dalys = extract_results(
     results_folder,
@@ -169,9 +178,6 @@ def get_num_dalys(_df):
                      ).T.iloc[0].unstack(level='run'))
 num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
 
-# Assign discounting to num_dalys_averted
-
-
 # The monetary value of the health benefit is delta health times CET (negative values are set to 0)
 def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
     monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0)
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index ef850842b0..f4494dc916 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -665,6 +665,15 @@ def update_itemuse_for_level1b_using_level2_data(_df):
     else:
         return scenario_cost[scenario_cost.year.isin(_years)]
 
+# Define a function to discount and summarise costs by cost_category
+def apply_discounting_to_cost_data(_df, _discount_rate = 0.03):
+    # Initial year and discount rate
+    initial_year = min(_df['year'].unique())
+
+    # Calculate the discounted values
+    _df['cost'] = _df['cost'] / ((1 + _discount_rate) ** (_df['year'] - initial_year))
+    return _df
+
 # Define a function to summarize cost data from
 # Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as index, then
 # first unstack the dataframe and subsequently apply the summarize function

From 7354adde7a569cc3093abb980fca006719a0876c Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 18:15:25 +0000
Subject: [PATCH 155/230] update roi plots to account for negative increment
 costs or cost savings

---
 .../costing/cost_analysis_hss_elements.py     | 131 ++++++++++--------
 src/scripts/costing/cost_estimation.py        |  88 ++++++++----
 2 files changed, 136 insertions(+), 83 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index a165987558..8268cdfa1e 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -94,6 +94,60 @@
 # Discount rate
 discount_rate = 0.03
 
+# Define a function to create bar plots
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    # Calculate y-error bars
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    # Map xticks based on the hss_scenarios dictionary
+    xticks = {index: hss_scenarios.get(index, f"Scenario {index}") for index in _df.index}
+
+    # Retrieve colors from color_map based on the xticks labels
+    colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        color=colors,  # Set bar colors
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+        label=xticks.values()
+    )
+
+    # Add optional annotations above each bar
+    if annotations:
+        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
+
+    # Set x-tick labels with wrapped text if required
+    ax.set_xticks(list(xticks.keys()))
+    wrapped_labs = ["\n".join(textwrap.wrap(label, 25)) for label in xticks.values()]
+    ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
+                       fontsize=8)
+
+    # Set y-axis limit to upper max + 500
+    ax.set_ylim(_df['upper'].min() - 400, _df['upper'].max() + 400)
+
+    # Set font size for y-tick labels and grid
+    ax.tick_params(axis='y', labelsize=9)
+    ax.tick_params(axis='x', labelsize=9)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
+
 # Estimate standard input costs of scenario
 # -----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
@@ -104,7 +158,7 @@
     input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
 input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
     input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
-
+input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
 # %%
 # Return on Invesment analysis
 # Calculate incremental cost
@@ -112,7 +166,6 @@
 # Aggregate input costs for further analysis
 input_costs_subset = input_costs[
     (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
-input_costs_subset = apply_discounting_to_cost_data(input_costs_subset, _discount_rate = discount_rate)
 # TODO the above step may not longer be needed
 total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
 total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
@@ -188,76 +241,25 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 # 3. Return on Investment Plot
 # ----------------------------------------------------
 # Plot ROI at various levels of cost
-generate_roi_plots(_monetary_value_of_incremental_health=monetary_value_of_incremental_health,
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = 77.4),
                    _incremental_input_cost=incremental_scenario_cost,
                    _scenario_dict = hss_scenarios,
                    _outputfilepath=roi_outputs_folder)
 
 # 4. Plot Maximum ability-to-pay
 # ----------------------------------------------------
-max_ability_to_pay_for_implementation = (monetary_value_of_incremental_health - incremental_scenario_cost).clip(
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = 77.4) - incremental_scenario_cost).clip(
     lower=0.0)  # monetary value - change in costs
 max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
 max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
     max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
 
-
-def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
-    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
-    extent of the error bar."""
-
-    # Calculate y-error bars
-    yerr = np.array([
-        (_df['mean'] - _df['lower']).values,
-        (_df['upper'] - _df['mean']).values,
-    ])
-
-    # Map xticks based on the hss_scenarios dictionary
-    xticks = {index: hss_scenarios.get(index, f"Scenario {index}") for index in _df.index}
-
-    # Retrieve colors from color_map based on the xticks labels
-    colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
-
-    fig, ax = plt.subplots()
-    ax.bar(
-        xticks.keys(),
-        _df['mean'].values,
-        yerr=yerr,
-        color=colors,  # Set bar colors
-        alpha=1,
-        ecolor='black',
-        capsize=10,
-        label=xticks.values()
-    )
-
-    # Add optional annotations above each bar
-    if annotations:
-        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
-            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
-
-    # Set x-tick labels with wrapped text if required
-    ax.set_xticks(list(xticks.keys()))
-    wrapped_labs = ["\n".join(textwrap.wrap(label, 25)) for label in xticks.values()]
-    ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
-                       fontsize=8)
-
-    # Set font size for y-tick labels and grid
-    ax.tick_params(axis='y', labelsize=9)
-    ax.tick_params(axis='x', labelsize=9)
-
-    ax.grid(axis="y")
-    ax.spines['top'].set_visible(False)
-    ax.spines['right'].set_visible(False)
-    fig.tight_layout()
-
-    return fig, ax
-
 # Plot Maximum ability to pay
 name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
 fig, ax = do_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-{round(row['upper'] / 1e6, 1)})"
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
         for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
@@ -268,6 +270,23 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
+# Plot incremental costs
+incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_bar_plot_with_ci(
+    (incremental_scenario_cost_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        for _, row in incremental_scenario_cost_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Cost \n(USD Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
 # 4. Plot costs
 # ----------------------------------------------------
 input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_gf_report)]
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index f4494dc916..bed13c06cc 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -36,11 +36,22 @@
 print('Script Start', datetime.datetime.now().strftime('%H:%M'))
 
 #%%
+
+# Define a function to discount and summarise costs by cost_category
+def apply_discounting_to_cost_data(_df, _discount_rate=0):
+    # Initial year and discount rate
+    initial_year = min(_df['year'].unique())
+
+    # Calculate the discounted values
+    _df['cost'] = _df['cost'] / ((1 + _discount_rate) ** (_df['year'] - initial_year))
+    return _df
+
 def estimate_input_cost_of_scenarios(results_folder: Path,
                                      resourcefilepath: Path = None,
                                      _draws = None, _runs = None,
                                      summarize: bool = False, _years = None,
-                                     cost_only_used_staff: bool = True):
+                                     cost_only_used_staff: bool = True,
+                                     _discount_rate = 0):
     # Useful common functions
     def drop_outside_period(_df):
         """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD"""
@@ -661,18 +672,9 @@ def update_itemuse_for_level1b_using_level2_data(_df):
                   value_name='cost')
 
     if _years is None:
-        return scenario_cost
+        return apply_discounting_to_cost_data(scenario_cost,_discount_rate)
     else:
-        return scenario_cost[scenario_cost.year.isin(_years)]
-
-# Define a function to discount and summarise costs by cost_category
-def apply_discounting_to_cost_data(_df, _discount_rate = 0.03):
-    # Initial year and discount rate
-    initial_year = min(_df['year'].unique())
-
-    # Calculate the discounted values
-    _df['cost'] = _df['cost'] / ((1 + _discount_rate) ** (_df['year'] - initial_year))
-    return _df
+        return apply_discounting_to_cost_data(scenario_cost[scenario_cost.year.isin(_years)],_discount_rate)
 
 # Define a function to summarize cost data from
 # Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as index, then
@@ -918,6 +920,7 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
 
     # Iterate over each draw in monetary_value_of_incremental_health
     for draw_index, row in _monetary_value_of_incremental_health.iterrows():
+        print("running draw ", draw_index)
         # Initialize an empty DataFrame to store values for each 'run'
         all_run_values = pd.DataFrame()
 
@@ -925,26 +928,46 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
         implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50)
 
         # Retrieve the corresponding row from incremental_scenario_cost for the same draw
-        scenario_cost_row = _incremental_input_cost.loc[draw_index]
+        incremental_scenario_cost_row = _incremental_input_cost.loc[draw_index]
 
         # Calculate the values for each individual run
-        for run in scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
-            # Calculate the cost-effectiveness metric for the current run
-            run_values = np.clip((row[run] - (implementation_costs + scenario_cost_row[run])) / (
-                    implementation_costs + scenario_cost_row[run]),0,None)
+        for run in incremental_scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
+            # Calculate the total costs for the current run
+            total_costs = implementation_costs + incremental_scenario_cost_row[run]
+
+            # Initialize run_values as an empty series with the same index as total_costs
+            run_values = pd.Series(index=total_costs, dtype=float)
+
+            # For negative total_costs, set corresponding run_values to infinity
+            run_values[total_costs < 0] = np.inf
+
+            # For non-negative total_costs, calculate the metric and clip at 0
+            non_negative_mask = total_costs >= 0
+            run_values[non_negative_mask] = np.clip(
+                (row[run] - total_costs[non_negative_mask]) / total_costs[non_negative_mask],
+                0,
+                None
+            )
 
             # Create a DataFrame with index as (draw_index, run) and columns as implementation costs
+            run_values = run_values.values # remove index and convert to array
             run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']),
                                   columns=implementation_costs)
 
             # Append the run DataFrame to all_run_values
             all_run_values = pd.concat([all_run_values, run_df])
 
-        collapsed_data = all_run_values.groupby(level='draw').agg([
-                'mean',
-                ('lower', lambda x: x.quantile(0.025)),
-                ('upper', lambda x: x.quantile(0.975))
-            ])
+        # Replace inf with NaN temporarily to handle quantile calculation correctly
+        temp_data = all_run_values.replace([np.inf, -np.inf], np.nan)
+
+        collapsed_data = temp_data.groupby(level='draw').agg([
+            'mean',
+            ('lower', lambda x: x.quantile(0.025)),
+            ('upper', lambda x: x.quantile(0.975))
+        ])
+
+        # Revert the NaNs back to inf
+        collapsed_data = collapsed_data.replace([np.nan], np.inf)
 
         collapsed_data = collapsed_data.unstack()
         collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0)
@@ -957,20 +980,31 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
         lower_values = collapsed_data[collapsed_data['stat'] == 'lower'][['implementation_cost', 'roi']]
         upper_values = collapsed_data[collapsed_data['stat']  == 'upper'][['implementation_cost', 'roi']]
 
+        fig, ax = plt.subplots()  # Create a figure and axis
+
         # Plot mean line
         plt.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{_scenario_dict[draw_index]}')
         # Plot the confidence interval as a shaded region
         plt.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
 
+        # Set y-axis limit to upper max + 500
+        ax.set_ylim(0, mean_values[~np.isinf(mean_values.roi)]['roi'].max()*(1+0.05))
+
         plt.xlabel('Implementation cost, millions')
         plt.ylabel('Return on Investment')
         plt.title('Return on Investment of scenario at different levels of implementation cost')
 
-        #plt.text(x=0.95, y=0.8,
-        #         s=f"Monetary value of incremental health = USD {round(monetary_value_of_incremental_health.loc[draw_index]['mean'] / 1e6, 2)}m (USD {round(monetary_value_of_incremental_health.loc[draw_index]['lower'] / 1e6, 2)}m-{round(monetary_value_of_incremental_health.loc[draw_index]['upper'] / 1e6, 2)}m);\n "
-        #           f"Incremental input cost of scenario = USD {round(scenario_cost_row['mean'] / 1e6, 2)}m (USD {round(scenario_cost_row['lower'] / 1e6, 2)}m-{round(scenario_cost_row['upper'] / 1e6, 2)}m)",
-        #         horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9,
-        #         weight='bold', color='black')
+        monetary_value_of_incremental_health_summarized = summarize_cost_data(_monetary_value_of_incremental_health)
+        incremental_scenario_cost_row_summarized =  incremental_scenario_cost_row.agg(
+                                                        mean='mean',
+                                                        lower=lambda x: x.quantile(0.025),
+                                                        upper=lambda x: x.quantile(0.975))
+
+        plt.text(x=0.95, y=0.8,
+                 s=f"Monetary value of incremental health = \n USD {round(monetary_value_of_incremental_health_summarized.loc[draw_index]['mean'] / 1e6, 2)}m (USD {round(monetary_value_of_incremental_health_summarized.loc[draw_index]['lower'] / 1e6, 2)}m-{round(monetary_value_of_incremental_health_summarized.loc[draw_index]['upper'] / 1e6, 2)}m);\n "
+                   f"Incremental input cost of scenario = \n USD {round(incremental_scenario_cost_row_summarized['mean'] / 1e6, 2)}m (USD {round(incremental_scenario_cost_row_summarized['lower'] / 1e6, 2)}m-{round(incremental_scenario_cost_row_summarized['upper'] / 1e6, 2)}m)",
+                 horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=9,
+                 weight='bold', color='black')
 
         # Show legend
         plt.legend()

From 6499ab15633699363cbbc67fdb875fdef04d9a3d Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 18:44:38 +0000
Subject: [PATCH 156/230] add VSL version of ROI

---
 .../costing/cost_analysis_hss_elements.py     | 52 +++++++++++++++----
 src/scripts/costing/cost_estimation.py        |  6 ++-
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 8268cdfa1e..5330a70485 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -90,6 +90,7 @@
 
 # Cost-effectiveness threshold
 chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+chosen_value_of_statistical_life = 834
 
 # Discount rate
 discount_rate = 0.03
@@ -135,7 +136,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
                        fontsize=8)
 
     # Set y-axis limit to upper max + 500
-    ax.set_ylim(_df['upper'].min() - 400, _df['upper'].max() + 400)
+    ax.set_ylim(_df['lower'].min()*1.25, _df['upper'].max()*1.25)
 
     # Set font size for y-tick labels and grid
     ax.tick_params(axis='y', labelsize=9)
@@ -151,14 +152,16 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 # Estimate standard input costs of scenario
 # -----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
-                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True)
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = discount_rate)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 # TODO Remove the manual fix below once the logging for these is corrected
 input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
     input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
 input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
     input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
-input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
+#input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
+
 # %%
 # Return on Invesment analysis
 # Calculate incremental cost
@@ -190,10 +193,10 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
         comparison=0)  # sets the comparator to 0 which is the Actual scenario
 ).T.iloc[0].unstack()).T
 
+# Keep only scenarios of interest
 incremental_scenario_cost = incremental_scenario_cost[
     incremental_scenario_cost.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
 
-
 # Monetary value of health impact
 # -----------------------------------------------------------------------------------------------------------------------
 def get_num_dalys(_df):
@@ -241,21 +244,28 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 # 3. Return on Investment Plot
 # ----------------------------------------------------
 # Plot ROI at various levels of cost
-generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = 77.4),
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet),
                    _incremental_input_cost=incremental_scenario_cost,
                    _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder)
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'CET')
 
-# 4. Plot Maximum ability-to-pay
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'VSL')
+
+# 4. Plot Maximum ability-to-pay at CET
 # ----------------------------------------------------
-max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = 77.4) - incremental_scenario_cost).clip(
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
     lower=0.0)  # monetary value - change in costs
 max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
 max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
     max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
 
 # Plot Maximum ability to pay
-name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
 fig, ax = do_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
@@ -270,6 +280,30 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
+# 4. Plot Maximum ability-to-pay at VSL
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life) - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at VSL, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay (at VSL) \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
 # Plot incremental costs
 incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
 name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index bed13c06cc..db8e493090 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -910,10 +910,12 @@ def do_line_plot_of_cost(_df, _cost_category='all',
     plt.close()
 
 # Plot ROI
+# TODO update this function to include an input for the monetary value of DALY
 def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
                        _incremental_input_cost: pd.DataFrame,
                        _scenario_dict: dict,
-                       _outputfilepath: Path):
+                       _outputfilepath: Path,
+                       _value_of_life_suffix = ''):
     # Calculate maximum ability to pay for implementation
     max_ability_to_pay_for_implementation = (_monetary_value_of_incremental_health - _incremental_input_cost).clip(
         lower=0.0)  # monetary value - change in costs
@@ -1009,7 +1011,7 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
         # Show legend
         plt.legend()
         # Save
-        plt.savefig(_outputfilepath / f'draw{draw_index}_{_scenario_dict[draw_index]}_ROI.png', dpi=100,
+        plt.savefig(_outputfilepath / f'draw{draw_index}_{_scenario_dict[draw_index]}_ROI_at_{_value_of_life_suffix}.png', dpi=100,
                     bbox_inches='tight')
         plt.close()
 

From 5c691f9c9a129d3b7ff4ca4581f99c6cb1d31d67 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 6 Nov 2024 18:46:06 +0000
Subject: [PATCH 157/230] remove gap between bar plots

---
 src/scripts/costing/cost_analysis_hss_elements.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 5330a70485..0c6f521591 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -112,26 +112,28 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     # Retrieve colors from color_map based on the xticks labels
     colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
 
+    # Generate consecutive x positions for the bars, ensuring no gaps
+    x_positions = np.arange(len(xticks))  # Consecutive integers for each bar position
+
     fig, ax = plt.subplots()
     ax.bar(
-        xticks.keys(),
+        x_positions,
         _df['mean'].values,
         yerr=yerr,
         color=colors,  # Set bar colors
         alpha=1,
         ecolor='black',
         capsize=10,
-        label=xticks.values()
     )
 
     # Add optional annotations above each bar
     if annotations:
-        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+        for xpos, ypos, text in zip(x_positions, _df['upper'].values, annotations):
             ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
 
     # Set x-tick labels with wrapped text if required
-    ax.set_xticks(list(xticks.keys()))
     wrapped_labs = ["\n".join(textwrap.wrap(label, 25)) for label in xticks.values()]
+    ax.set_xticks(x_positions)  # Set x-ticks to consecutive positions
     ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
                        fontsize=8)
 

From a7b1b20643e181aa555ca4bbcd8c1e195e9e3c6c Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 7 Nov 2024 13:53:09 +0000
Subject: [PATCH 158/230] get all scripts running as expected

---
 .../costing/cost_analysis_hss_elements.py     |   1 -
 .../cost_analysis_htm_with_and_without_hss.py | 304 ++++++++++++------
 src/scripts/costing/costing_validation.py     |  19 +-
 3 files changed, 210 insertions(+), 114 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 0c6f521591..cd3483b2fe 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -212,7 +212,6 @@ def get_num_dalys(_df):
 
     # Initial year and discount rate
     initial_year = min(_df.index.unique())
-    discount_rate = _discount_rate
 
     # Calculate the discounted values
     discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year)
diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index a0e8140c7f..a8d12a8d3b 100644
--- a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -31,9 +31,11 @@
 
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
                                              summarize_cost_data,
+                                             apply_discounting_to_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
                                              generate_roi_plots)
+
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
 
@@ -68,7 +70,7 @@
 district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
 
 # Period relevant for costing
-TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31)) # This is the period that is costed
+TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
 relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
 list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
 
@@ -81,14 +83,96 @@
 # Subset of scenarios included in analysis
 htm_scenarios_for_gf_report = [0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 16]
 
+color_map = {
+    'Baseline': '#9e0142',
+    'HSS PACKAGE: Realistic': '#d8434e',
+    'HIV Programs Scale-up WITHOUT HSS PACKAGE': '#f36b48',
+    'HIV Programs Scale-up WITH REALISTIC HSS PACKAGE': '#fca45c',
+    'TB Programs Scale-up WITHOUT HSS PACKAGE': '#fddc89',
+    'TB Programs Scale-up WITH REALISTIC HSS PACKAGE': '#e7f7a0',
+    'Malaria Programs Scale-up WITHOUT HSS PACKAGE': '#a5dc97',
+    'Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE': '#6dc0a6',
+    'HTM Programs Scale-up WITHOUT HSS PACKAGE': '#438fba',
+    'HTM Programs Scale-up WITH REALISTIC HSS PACKAGE': '#5e4fa2',
+    'HTM Programs Scale-up WITH SUPPLY CHAINS': '#3c71aa',
+    'HTM Programs Scale-up WITH HRH': '#2f6094',
+}
+
 # Cost-effectiveness threshold
-chosen_cet = 77.4 # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+chosen_value_of_statistical_life = 834
+
+# Discount rate
+discount_rate = 0.03
+
+# Define a function to create bar plots
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    # Calculate y-error bars
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    # Map xticks based on the hss_scenarios dictionary
+    xticks = {index: htm_scenarios.get(index, f"Scenario {index}") for index in _df.index}
+
+    # Retrieve colors from color_map based on the xticks labels
+    colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
+
+    # Generate consecutive x positions for the bars, ensuring no gaps
+    x_positions = np.arange(len(xticks))  # Consecutive integers for each bar position
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        x_positions,
+        _df['mean'].values,
+        yerr=yerr,
+        color=colors,  # Set bar colors
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+    )
+
+    # Add optional annotations above each bar
+    if annotations:
+        for xpos, ypos, text in zip(x_positions, _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
+
+    # Set x-tick labels with wrapped text if required
+    wrapped_labs = ["\n".join(textwrap.wrap(label,30)) for label in xticks.values()]
+    ax.set_xticks(x_positions)  # Set x-ticks to consecutive positions
+    ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
+                       fontsize=7)
+
+    # Set y-axis limit to upper max + 500
+    ax.set_ylim(_df['lower'].min()*1.25, _df['upper'].max()*1.25)
+
+    # Set font size for y-tick labels and grid
+    ax.tick_params(axis='y', labelsize=9)
+    ax.tick_params(axis='x', labelsize=9)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
 
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
-                                               _years = list_of_relevant_years_for_costing, cost_only_used_staff=True)
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = discount_rate)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+# TODO Remove the manual fix below once the logging for these is corrected
+input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
+    input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
+input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
+    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
+#input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
 
 # Add additional costs pertaining to simulation (Only for scenarios with Malaria scale-up)
 #-----------------------------------------------------------------------------------------------------------------------
@@ -219,14 +303,13 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
 # %%
 # Return on Invesment analysis
 # Calculate incremental cost
-#-----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------------------------------
 # Aggregate input costs for further analysis
-input_costs_subset = input_costs[(input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+input_costs_subset = input_costs[
+    (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
 # TODO the above step may not longer be needed
-
 total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
-total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level = 'run'))
-
+total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
 def find_difference_relative_to_comparison(_ser: pd.Series,
                                            comparison: str,
                                            scaled: bool = False,
@@ -241,124 +324,138 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
         .drop(columns=([comparison] if drop_comparison else [])) \
         .stack()
 
+
 incremental_scenario_cost = (pd.DataFrame(
-            find_difference_relative_to_comparison(
-                total_input_cost,
-                comparison= 0) # sets the comparator to 0 which is the Actual scenario
-        ).T.iloc[0].unstack()).T
-incremental_scenario_cost = incremental_scenario_cost[incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+    find_difference_relative_to_comparison(
+        total_input_cost,
+        comparison=0)  # sets the comparator to 0 which is the Actual scenario
+).T.iloc[0].unstack()).T
+
+# Keep only scenarios of interest
+incremental_scenario_cost = incremental_scenario_cost[
+    incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
 
 # Monetary value of health impact
-#-----------------------------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------------------------------
 def get_num_dalys(_df):
     """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
     Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
     results from runs that crashed mid-way through the simulation.
     """
-    years_needed = relevant_period_for_costing # [i.year for i in TARGET_PERIOD_INTERVENTION]
+    years_needed = relevant_period_for_costing  # [i.year for i in TARGET_PERIOD_INTERVENTION]
     assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
-    return pd.Series(
-        data=_df
-        .loc[_df.year.between(*years_needed)]
-        .drop(columns=['date', 'sex', 'age_range', 'year'])
-        .sum().sum()
-    )
+    _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1)
+
+    # Initial year and discount rate
+    initial_year = min(_df.index.unique())
+
+    # Calculate the discounted values
+    discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year)
+
+    return pd.Series(discounted_values.sum())
 
 num_dalys = extract_results(
-        results_folder,
-        module='tlo.methods.healthburden',
-        key='dalys_stacked',
-        custom_generate_series=get_num_dalys,
-        do_scaling=True
-    )
+    results_folder,
+    module='tlo.methods.healthburden',
+    key='dalys_stacked',
+    custom_generate_series=get_num_dalys,
+    do_scaling=True
+)
 
 # Get absolute DALYs averted
-num_dalys_averted =(-1.0 *
-        pd.DataFrame(
-            find_difference_relative_to_comparison(
-                num_dalys.loc[0],
-                comparison= 0) # sets the comparator to 0 which is the Actual scenario
-        ).T.iloc[0].unstack(level = 'run'))
+num_dalys_averted = (-1.0 *
+                     pd.DataFrame(
+                         find_difference_relative_to_comparison(
+                             num_dalys.loc[0],
+                             comparison=0)  # sets the comparator to 0 which is the Actual scenario
+                     ).T.iloc[0].unstack(level='run'))
+num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
 
 # The monetary value of the health benefit is delta health times CET (negative values are set to 0)
-monetary_value_of_incremental_health = (num_dalys_averted * chosen_cet).clip(lower = 0.0)
-monetary_value_of_incremental_health = monetary_value_of_incremental_health[monetary_value_of_incremental_health.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
-#TODO check that the above calculation is correct
+def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
+    monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0)
+    return monetary_value_of_incremental_health
+
+# TODO check that the above calculation is correct
 
 # 3. Return on Investment Plot
-#----------------------------------------------------
+# ----------------------------------------------------
 # Plot ROI at various levels of cost
-generate_roi_plots(_monetary_value_of_incremental_health = monetary_value_of_incremental_health,
-                   _incremental_input_cost = incremental_scenario_cost,
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet),
+                   _incremental_input_cost=incremental_scenario_cost,
                    _scenario_dict = htm_scenarios,
-                   _outputfilepath = roi_outputs_folder)
-
-# 4. Plot Maximum ability-to-pay
-#----------------------------------------------------
-max_ability_to_pay_for_implementation = (monetary_value_of_incremental_health - incremental_scenario_cost).clip(lower = 0.0) # monetary value - change in costs
-max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
-max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
-
-def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
-    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
-    extent of the error bar."""
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'CET')
 
-    yerr = np.array([
-        (_df['mean'] - _df['lower']).values,
-        (_df['upper'] - _df['mean']).values,
-    ])
-
-    xticks = {(i+1): k for i, k in enumerate(_df.index)}
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'VSL')
 
-    fig, ax = plt.subplots()
-    ax.bar(
-        xticks.keys(),
-        _df['mean'].values,
-        yerr=yerr,
-        alpha=1,
-        ecolor='black',
-        capsize=10,
-        label=xticks.values()
-    )
-    '''
-    if annotations:
-        for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
-            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=11)
-
-    ax.set_xticks(list(xticks.keys()))
-    if not xticklabels_horizontal_and_wrapped:
-        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-        ax.set_xticklabels(wrapped_labs, rotation=45, ha='right', fontsize=10)
-    else:
-        wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-        ax.set_xticklabels(wrapped_labs, fontsize=10)
-    '''
-
-    # Set font size for y-tick labels
-    ax.tick_params(axis='y', labelsize=12)
-    ax.tick_params(axis='x', labelsize=11)
+# 4. Plot Maximum ability-to-pay at CET
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
 
-    ax.grid(axis="y")
-    ax.spines['top'].set_visible(False)
-    ax.spines['right'].set_visible(False)
-    fig.tight_layout()
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
 
-    return fig, ax
+# 4. Plot Maximum ability-to-pay at VSL
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life) - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
 
-# Plot Max ability to pay
-name_of_plot = f'Maximum ability to pay, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' #f'Maximum ability to pay, {first_year_of_simulation} - {final_year_of_simulation}'
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at VSL, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
 fig, ax = do_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
-        f"{round(row['mean']/1e6, 1)} \n ({round(row['lower']/1e6, 1)}-{round(row['upper']/1e6, 1)})"
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
         for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
 )
 ax.set_title(name_of_plot)
-ax.set_ylabel('Maximum ability to pay \n(Millions)')
+ax.set_ylabel('Maximum ability to pay (at VSL) \n(Millions)')
 fig.tight_layout()
-fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# Plot incremental costs
+incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_bar_plot_with_ci(
+    (incremental_scenario_cost_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        for _, row in incremental_scenario_cost_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Cost \n(USD Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
 # 4. Plot costs
@@ -376,17 +473,10 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     var_name='stat',
     value_name='cost'
 )
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath)
-
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical consumables', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subgroup',_outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'human resources for health', _year = 'all', _draws = [0], disaggregate_by= 'cost_subcategory', _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'medical equipment', _year = 'all', _draws = None, _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'other', _year = 'all', _draws = None, _outputfilepath = figurespath)
-do_line_plot_of_cost(_df = input_costs, _cost_category = 'all', _year = 'all', disaggregate_by= 'cost_category', _draws = None, _outputfilepath = figurespath)
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 1e3af80b22..3925bb88df 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -28,7 +28,8 @@
     parse_log_file,
     unflatten_flattened_multi_index_in_logging
 )
-from scripts.costing.cost_estimation import estimate_input_cost_of_scenarios
+from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             do_stacked_bar_plot_of_cost_by_category)
 
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -214,7 +215,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 if not os.path.exists(calibration_outputs_folder):
     os.makedirs(calibration_outputs_folder)
 
-def do_cost_calibration_plot(_df, _costs_included):
+def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
     # Filter the dataframe
     _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))]
 
@@ -273,8 +274,10 @@ def do_cost_calibration_plot(_df, _costs_included):
     plt.ylabel('Costs (USD), millions')
     plt.title(f'Model Cost vs Annual Expenditure 2019 and Max(Annual Budget 2020-22)\n {cost_subcategory}')
 
-    # Rotate x-axis labels for readability
-    plt.xticks(rotation=45, ha='right')
+    # Customize x-axis labels for readability
+    max_label_length = 15  # Define a maximum label length for wrapping
+    wrapped_labels = [textwrap.fill(str(label), max_label_length) for label in df_mean.index]
+    plt.xticks(ticks=range(len(wrapped_labels)), labels=wrapped_labels, rotation=45, ha='right', fontsize=_xtick_fontsize)
 
     # Adding a legend
     plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=10)
@@ -285,7 +288,6 @@ def do_cost_calibration_plot(_df, _costs_included):
                 bbox_inches='tight')
     plt.close()
 
-
 # Call the function for each variable and cost list
 all_consumable_costs = list_of_consumables_costs_for_calibration_only_hiv + list_of_consumables_costs_for_calibration_without_hiv + ['Supply Chain']
 all_calibration_costs = all_consumable_costs + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration
@@ -295,7 +297,7 @@ def do_cost_calibration_plot(_df, _costs_included):
 do_cost_calibration_plot(calibration_data,all_consumable_costs)
 do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
 do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
-do_cost_calibration_plot(calibration_data,all_calibration_costs)
+do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 8)
 calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 
 # Stacked bar charts to represent all cost sub-groups
@@ -311,6 +313,11 @@ def do_cost_calibration_plot(_df, _costs_included):
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'other',
                                         _disaggregate_by_subgroup = True,
                                         _outputfilepath = calibration_outputs_folder)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all',
+                                        _disaggregate_by_subgroup = False,
+                                        _outputfilepath = calibration_outputs_folder)
+
+
 
 '''
 

From 5eb761a4d91a97fcdfe3838db2d5e3a2103ce611 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 7 Nov 2024 16:21:42 +0000
Subject: [PATCH 159/230] update chosen_cet to one from Lomas et al, 2023

---
 src/scripts/costing/cost_analysis_hss_elements.py            | 3 ++-
 .../costing/cost_analysis_htm_with_and_without_hss.py        | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index cd3483b2fe..634471c994 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -89,7 +89,8 @@
 }
 
 # Cost-effectiveness threshold
-chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+chosen_cet = 199.620811947318  # This is based on the estimate from Lomas et al (2023)- $160.595987085533 in 2019 USD coverted to 2023 USD
+# based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is $77.4 in 2023 USD terms
 chosen_value_of_statistical_life = 834
 
 # Discount rate
diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index a8d12a8d3b..04040aeef3 100644
--- a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -99,7 +99,8 @@
 }
 
 # Cost-effectiveness threshold
-chosen_cet = 77.4  # based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is in 2023 USD terms
+chosen_cet = 199.620811947318 # This is based on the estimate from Lomas et al (2023)- $160.595987085533 in 2019 USD coverted to 2023 USD
+# based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is $77.4 in 2023 USD terms
 chosen_value_of_statistical_life = 834
 
 # Discount rate
@@ -299,6 +300,8 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
     input_costs = pd.concat([input_costs, new_df], ignore_index=True)
 
 # TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
+# Extract input_costs for browsing
+input_costs.groupby(['draw', 'run', 'cost_category', 'cost_subcategory', 'cost_subgroup','year'])['cost'].sum().to_csv(figurespath / 'cost_detailed.csv')
 
 # %%
 # Return on Invesment analysis

From 37e605c3320ffa397e5254946e0f2092c84a0715 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 7 Nov 2024 17:12:34 +0000
Subject: [PATCH 160/230] add functionality to plotting more than one ROI curve
 on one plot

---
 .../cost_analysis_htm_with_and_without_hss.py | 32 ++++++-
 src/scripts/costing/cost_estimation.py        | 96 ++++++++++++++++++-
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index 04040aeef3..e2632e6fd8 100644
--- a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -34,7 +34,8 @@
                                              apply_discounting_to_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
-                                             generate_roi_plots)
+                                             generate_roi_plots,
+                                             generate_multiple_scenarios_roi_plot)
 
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -396,6 +397,35 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'VSL')
 
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [2,12,14],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'VSL')
+
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [3,5],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HIV_VSL')
+
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [6,8],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'TB_VSL')
+
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [9,11],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'Malaria_VSL')
+
 # 4. Plot Maximum ability-to-pay at CET
 # ----------------------------------------------------
 max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index db8e493090..9b87728712 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -922,7 +922,7 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
 
     # Iterate over each draw in monetary_value_of_incremental_health
     for draw_index, row in _monetary_value_of_incremental_health.iterrows():
-        print("running draw ", draw_index)
+        print("Plotting ROI for draw ", draw_index)
         # Initialize an empty DataFrame to store values for each 'run'
         all_run_values = pd.DataFrame()
 
@@ -1015,6 +1015,100 @@ def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,
                     bbox_inches='tight')
         plt.close()
 
+def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health: pd.DataFrame,
+                       _incremental_input_cost: pd.DataFrame,
+                       _draws:None,
+                       _scenario_dict: dict,
+                       _outputfilepath: Path,
+                       _value_of_life_suffix = ''):
+    # Calculate maximum ability to pay for implementation
+    _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)]
+    _incremental_input_cost =  _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)]
+    max_ability_to_pay_for_implementation = (_monetary_value_of_incremental_health - _incremental_input_cost).clip(lower=0.0)  # monetary value - change in costs
+
+    # Create a figure and axis to plot all draws together
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    # Iterate over each draw in monetary_value_of_incremental_health
+    for draw_index, row in _monetary_value_of_incremental_health.iterrows():
+        print("Plotting ROI for draw ", draw_index)
+        # Initialize an empty DataFrame to store values for each 'run'
+        all_run_values = pd.DataFrame()
+
+        # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw
+        implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50)
+
+        # Retrieve the corresponding row from incremental_scenario_cost for the same draw
+        incremental_scenario_cost_row = _incremental_input_cost.loc[draw_index]
+
+        # Calculate the values for each individual run
+        for run in incremental_scenario_cost_row.index:  # Assuming 'run' columns are labeled by numbers
+            # Calculate the total costs for the current run
+            total_costs = implementation_costs + incremental_scenario_cost_row[run]
+
+            # Initialize run_values as an empty series with the same index as total_costs
+            run_values = pd.Series(index=total_costs, dtype=float)
+
+            # For negative total_costs, set corresponding run_values to infinity
+            run_values[total_costs < 0] = np.inf
+
+            # For non-negative total_costs, calculate the metric and clip at 0
+            non_negative_mask = total_costs >= 0
+            run_values[non_negative_mask] = np.clip(
+                (row[run] - total_costs[non_negative_mask]) / total_costs[non_negative_mask],
+                0,
+                None
+            )
+
+            # Create a DataFrame with index as (draw_index, run) and columns as implementation costs
+            run_values = run_values.values # remove index and convert to array
+            run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']),
+                                  columns=implementation_costs)
+
+            # Append the run DataFrame to all_run_values
+            all_run_values = pd.concat([all_run_values, run_df])
+
+        # Replace inf with NaN temporarily to handle quantile calculation correctly
+        temp_data = all_run_values.replace([np.inf, -np.inf], np.nan)
+
+        collapsed_data = temp_data.groupby(level='draw').agg([
+            'mean',
+            ('lower', lambda x: x.quantile(0.025)),
+            ('upper', lambda x: x.quantile(0.975))
+        ])
+
+        # Revert the NaNs back to inf
+        collapsed_data = collapsed_data.replace([np.nan], np.inf)
+
+        collapsed_data = collapsed_data.unstack()
+        collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0)
+        collapsed_data.index = collapsed_data.index.set_names('stat', level=1)
+        collapsed_data = collapsed_data.reset_index().rename(columns = {0: 'roi'})
+
+        # Divide rows by the sum of implementation costs and incremental input cost
+        mean_values = collapsed_data[collapsed_data['stat'] == 'mean'][['implementation_cost', 'roi']]
+        lower_values = collapsed_data[collapsed_data['stat'] == 'lower'][['implementation_cost', 'roi']]
+        upper_values = collapsed_data[collapsed_data['stat']  == 'upper'][['implementation_cost', 'roi']]
+
+        # Plot mean line and confidence interval
+        ax.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{_scenario_dict[draw_index]}')
+        ax.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
+
+    # Set y-axis limit
+    ax.set_ylim(0, mean_values[~np.isinf(mean_values.roi)]['roi'].max() * 1.25)
+    ax.set_xlim(left = 0)
+
+    plt.xlabel('Implementation cost, millions')
+    plt.ylabel('Return on Investment')
+    plt.title('Return on Investment of scenario at different levels of implementation cost')
+
+    # Show legend
+    plt.legend()
+    # Save
+    plt.savefig(_outputfilepath / f'draws_{_draws}_ROI_at_{_value_of_life_suffix}.png', dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
 '''
 # Scratch pad
 # TODO all these HR plots need to be looked at

From 6d3d6f416163b6669d320ac999fd918670b82c99 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 7 Nov 2024 18:04:33 +0000
Subject: [PATCH 161/230] update xticklabels to letters

---
 .../cost_analysis_htm_with_and_without_hss.py | 86 ++++++++++++++++++-
 1 file changed, 84 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index e2632e6fd8..9448980134 100644
--- a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -9,6 +9,7 @@
 import textwrap
 
 import matplotlib.pyplot as plt
+import seaborn as sns
 from matplotlib.ticker import FuncFormatter
 import numpy as np
 import pandas as pd
@@ -81,6 +82,13 @@
 7: "TB Programs Scale-up WITH FULL HSS PACKAGE", 8: "TB Programs Scale-up WITH REALISTIC HSS PACKAGE", 9: "Malaria Programs Scale-up WITHOUT HSS PACKAGE",
 10: "Malaria Programs Scale-up WITH FULL HSS PACKAGE", 11: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 12: "HTM Programs Scale-up WITHOUT HSS PACKAGE",
 13: "HTM Programs Scale-up WITH FULL HSS PACKAGE", 14: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 15: "HTM Programs Scale-up WITH SUPPLY CHAINS", 16: "HTM Programs Scale-up WITH HRH"}
+
+htm_scenarios_substitutedict = {0:"0", 1: "1", 2: "A", 3: "B",
+4: "4", 5: "C", 6: "D",
+7: "7", 8: "E", 9: "F",
+10: "10", 11: "G", 12: "H",
+13: "13", 14: "I", 15: "J", 16: "K"}
+
 # Subset of scenarios included in analysis
 htm_scenarios_for_gf_report = [0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 16]
 
@@ -163,6 +171,80 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     return fig, ax
 
+def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
+                        xticklabels_horizontal_and_wrapped=False,
+                        put_labels_in_legend=True,
+                        offset=1e6):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+     extent of the error bar."""
+
+    substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+# TODO should be above be 'median'
+    xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+    if set_colors:
+        colors = [color_map.get(series, 'grey') for series in _df.index]
+    else:
+        cmap = sns.color_palette('Spectral', as_cmap=True)
+        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        ecolor='black',
+        color=colors,
+        capsize=10,
+        label=xticks.values()
+    )
+
+    if annotations:
+        for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)):
+            annotation_y = ypos + offset
+
+            ax.text(
+                xpos,
+                annotation_y,
+                '\n'.join(text.split(' ', 1)),
+                horizontalalignment='center',
+                verticalalignment='bottom',  # Aligns text at the bottom of the annotation position
+                fontsize='x-small',
+                rotation='horizontal'
+            )
+
+    ax.set_xticks(list(xticks.keys()))
+
+    if put_labels_in_legend:
+        # Update xticks label with substitute labels
+        # Insert legend with updated labels that shows correspondence between substitute label and original label
+        xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
+        xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())]
+        h, legs = ax.get_legend_handles_labels()
+        ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+        ax.set_xticklabels(list(xtick_values))
+    else:
+        if not xticklabels_horizontal_and_wrapped:
+            # xticklabels will be vertical and not wrapped
+            ax.set_xticklabels(list(xticks.values()), rotation=90)
+        else:
+            wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+            ax.set_xticklabels(wrapped_labs)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout(pad=2.0)
+    plt.subplots_adjust(left=0.15, right=0.85)  # Adjust left and right margins
+
+    return fig, ax
+
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
@@ -477,7 +559,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 # Plot incremental costs
 incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
 name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(
+fig, ax = do_standard_bar_plot_with_ci(
     (incremental_scenario_cost_summarized / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
@@ -507,7 +589,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
     value_name='cost'
 )
 
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)

From 95ad850c30fedcf3bd9572146ee323f1c887ee49 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 10 Nov 2024 21:11:47 +0000
Subject: [PATCH 162/230] update xticklabels to letters

---
 .../costing/cost_analysis_hss_elements.py     | 112 +++++++++++++++++-
 1 file changed, 107 insertions(+), 5 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 634471c994..6e0980076b 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -72,6 +72,10 @@
                  4: "Increase Capacity at Primary Care Levels", 5: "Increase Capacity of CHW", 6: "Consumables Increased to 75th Percentile",
                  7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability",
                  10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"}
+hs_scenarios_substitutedict = {0:"0", 1: "A", 2: "B", 3: "C",
+4: "D", 5: "5", 6: "E",
+7: "F", 8: "G", 9: "H",
+10: "I", 11: "11", 12: "J"}
 hss_scenarios_for_gf_report = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12]
 color_map = {
     'Baseline': '#a50026',
@@ -84,8 +88,8 @@
     'Consumables Available at HIV levels': '#a6d96a',
     'Consumables Available at EPI levels': '#66bd63',
     'Perfect Consumables Availability': '#1a9850',
-    'HSS PACKAGE: Perfect': '#5e4fa2',
-    'HSS PACKAGE: Realistic expansion': '#3288bd'
+    'HSS PACKAGE: Perfect': '#3288bd',
+    'HSS PACKAGE: Realistic expansion': '#5e4fa2'
 }
 
 # Cost-effectiveness threshold
@@ -152,6 +156,80 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     return fig, ax
 
+def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
+                        xticklabels_horizontal_and_wrapped=False,
+                        put_labels_in_legend=True,
+                        offset=1e6):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+     extent of the error bar."""
+
+    substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+# TODO should be above be 'median'
+    xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+    if set_colors:
+        colors = [color_map.get(series, 'grey') for series in _df.index]
+    else:
+        cmap = sns.color_palette('Spectral', as_cmap=True)
+        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        ecolor='black',
+        color=colors,
+        capsize=10,
+        label=xticks.values()
+    )
+
+    if annotations:
+        for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)):
+            annotation_y = ypos + offset
+
+            ax.text(
+                xpos,
+                annotation_y,
+                '\n'.join(text.split(' ', 1)),
+                horizontalalignment='center',
+                verticalalignment='bottom',  # Aligns text at the bottom of the annotation position
+                fontsize='x-small',
+                rotation='horizontal'
+            )
+
+    ax.set_xticks(list(xticks.keys()))
+
+    if put_labels_in_legend:
+        # Update xticks label with substitute labels
+        # Insert legend with updated labels that shows correspondence between substitute label and original label
+        xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
+        xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())]
+        h, legs = ax.get_legend_handles_labels()
+        ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+        ax.set_xticklabels(list(xtick_values))
+    else:
+        if not xticklabels_horizontal_and_wrapped:
+            # xticklabels will be vertical and not wrapped
+            ax.set_xticklabels(list(xticks.values()), rotation=90)
+        else:
+            wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+            ax.set_xticklabels(wrapped_labs)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout(pad=2.0)
+    plt.subplots_adjust(left=0.15, right=0.85)  # Adjust left and right margins
+
+    return fig, ax
+
 # Estimate standard input costs of scenario
 # -----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
@@ -258,6 +336,30 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'VSL')
 
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,2,3,4],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HR_VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [6,7,8,9],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'Consumables_VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [10,12],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HSS_VSL')
+
 # 4. Plot Maximum ability-to-pay at CET
 # ----------------------------------------------------
 max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
@@ -268,7 +370,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # Plot Maximum ability to pay
 name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(
+fig, ax = do_standard_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
@@ -309,7 +411,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 # Plot incremental costs
 incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
 name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(
+fig, ax = do_standard_bar_plot_with_ci(
     (incremental_scenario_cost_summarized / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
@@ -339,7 +441,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
     value_name='cost'
 )
 
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)

From 0403d0884e6a9f42c28e9ff1c1c9ed70e7433965 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 10 Nov 2024 22:07:32 +0000
Subject: [PATCH 163/230] add figures for GF report

---
 .../costing/cost_analysis_hss_elements.py     | 165 ++++++++++++------
 1 file changed, 114 insertions(+), 51 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py
index 6e0980076b..1c4decab43 100644
--- a/src/scripts/costing/cost_analysis_hss_elements.py
+++ b/src/scripts/costing/cost_analysis_hss_elements.py
@@ -34,7 +34,8 @@
                                              apply_discounting_to_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
-                                             generate_roi_plots)
+                                             generate_roi_plots,
+                                             generate_multiple_scenarios_roi_plot)
 
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -48,9 +49,12 @@
 figurespath = Path('./outputs/global_fund_roi_analysis/hss_elements/')
 if not os.path.exists(figurespath):
     os.makedirs(figurespath)
-roi_outputs_folder = Path(figurespath / 'roi')
-if not os.path.exists(roi_outputs_folder):
-    os.makedirs(roi_outputs_folder)
+roi_outputs_folder_gf = Path(figurespath / 'gf/roi')
+if not os.path.exists(roi_outputs_folder_gf):
+    os.makedirs(roi_outputs_folder_gf)
+roi_outputs_folder_fcdo = Path(figurespath / 'fcdo/roi')
+if not os.path.exists(roi_outputs_folder_fcdo):
+    os.makedirs(roi_outputs_folder_fcdo)
 
 # Load result files
 # ------------------------------------------------------------------------------------------------------------------
@@ -72,11 +76,16 @@
                  4: "Increase Capacity at Primary Care Levels", 5: "Increase Capacity of CHW", 6: "Consumables Increased to 75th Percentile",
                  7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability",
                  10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"}
-hs_scenarios_substitutedict = {0:"0", 1: "A", 2: "B", 3: "C",
+hs_scenarios_substitutedict_fcdo = {0:"0", 1: "A", 2: "B", 3: "C",
 4: "D", 5: "5", 6: "E",
 7: "F", 8: "G", 9: "H",
 10: "I", 11: "11", 12: "J"}
-hss_scenarios_for_gf_report = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12]
+hs_scenarios_substitutedict_gf = {0:"0", 1: "A", 2: "B", 3: "C",
+4: "D", 5: "E", 6: "F",
+7: "G", 8: "H", 9: "9",
+10: "10", 11: "11", 12: "I"}
+hss_scenarios_for_fcdo_report = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12]
+hss_scenarios_for_gf_report = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12]
 color_map = {
     'Baseline': '#a50026',
     'HRH Moderate Scale-up (1%)': '#d73027',
@@ -274,10 +283,6 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
         comparison=0)  # sets the comparator to 0 which is the Actual scenario
 ).T.iloc[0].unstack()).T
 
-# Keep only scenarios of interest
-incremental_scenario_cost = incremental_scenario_cost[
-    incremental_scenario_cost.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
-
 # Monetary value of health impact
 # -----------------------------------------------------------------------------------------------------------------------
 def get_num_dalys(_df):
@@ -312,7 +317,10 @@ def get_num_dalys(_df):
                              num_dalys.loc[0],
                              comparison=0)  # sets the comparator to 0 which is the Actual scenario
                      ).T.iloc[0].unstack(level='run'))
-num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+num_dalys_averted_fcdo_scenarios = num_dalys_averted[
+    num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_fcdo_report)]
+num_dalys_averted_gf_scenarios = num_dalys_averted[
+    num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
 
 # The monetary value of the health benefit is delta health times CET (negative values are set to 0)
 def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
@@ -323,41 +331,54 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # 3. Return on Investment Plot
 # ----------------------------------------------------
-# Plot ROI at various levels of cost
-generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet),
+# FCDO
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,2,3,4],
                    _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder,
-                   _value_of_life_suffix = 'CET')
+                   _outputfilepath=roi_outputs_folder_fcdo,
+                   _value_of_life_suffix = 'HR_VSL')
 
-generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [6,7,8,9],
                    _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder,
-                   _value_of_life_suffix = 'VSL')
+                   _outputfilepath=roi_outputs_folder_fcdo,
+                   _value_of_life_suffix = 'Consumables_VSL')
 
 # Combined ROI plot of relevant scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
-                   _draws = [1,2,3,4],
+                   _draws = [10,12],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_fcdo,
+                   _value_of_life_suffix = 'HSS_VSL')
+
+# Global Fund
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,2,3,4,5],
                    _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder,
+                   _outputfilepath=roi_outputs_folder_gf,
                    _value_of_life_suffix = 'HR_VSL')
 
 # Combined ROI plot of relevant scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
-                   _draws = [6,7,8,9],
+                   _draws = [6,7,8],
                    _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder,
+                   _outputfilepath=roi_outputs_folder_gf,
                    _value_of_life_suffix = 'Consumables_VSL')
 
 # Combined ROI plot of relevant scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
-                   _draws = [10,12],
+                   _draws = [12],
                    _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder,
+                   _outputfilepath=roi_outputs_folder_gf,
                    _value_of_life_suffix = 'HSS_VSL')
 
 # 4. Plot Maximum ability-to-pay at CET
@@ -365,68 +386,110 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
     lower=0.0)  # monetary value - change in costs
 max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
-max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+max_ability_to_pay_for_implementation_summarized_fcdo = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_fcdo_report)]
+max_ability_to_pay_for_implementation_summarized_gf = max_ability_to_pay_for_implementation_summarized[
     max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
 
+# FCDO
 # Plot Maximum ability to pay
 name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
 fig, ax = do_standard_bar_plot_with_ci(
-    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    (max_ability_to_pay_for_implementation_summarized_fcdo / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
-        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+        for _, row in max_ability_to_pay_for_implementation_summarized_fcdo.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
 fig.tight_layout()
-fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
-# 4. Plot Maximum ability-to-pay at VSL
-# ----------------------------------------------------
-max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life) - incremental_scenario_cost).clip(
-    lower=0.0)  # monetary value - change in costs
-max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
-max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
-    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
-
+# Global Fund
 # Plot Maximum ability to pay
-name_of_plot = f'Maximum ability to pay at VSL, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(
-    (max_ability_to_pay_for_implementation_summarized / 1e6),
+name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized_gf / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
-        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+        for _, row in max_ability_to_pay_for_implementation_summarized_gf.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
 )
 ax.set_title(name_of_plot)
-ax.set_ylabel('Maximum ability to pay (at VSL) \n(Millions)')
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
 fig.tight_layout()
-fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
 # Plot incremental costs
 incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
+# Keep only scenarios of interest
+incremental_scenario_cost_summarized_fcdo = incremental_scenario_cost_summarized[
+    incremental_scenario_cost_summarized.index.get_level_values(0).isin(hss_scenarios_for_fcdo_report)]
+incremental_scenario_cost_summarized_gf = incremental_scenario_cost_summarized[
+    incremental_scenario_cost_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)]
+
+# FCDO
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (incremental_scenario_cost_summarized_fcdo / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        for _, row in incremental_scenario_cost_summarized_fcdo.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Cost \n(USD Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# Global Fund
 name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
 fig, ax = do_standard_bar_plot_with_ci(
-    (incremental_scenario_cost_summarized / 1e6),
+    (incremental_scenario_cost_summarized_gf / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
-        for _, row in incremental_scenario_cost_summarized.iterrows()
+        for _, row in incremental_scenario_cost_summarized_gf.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Cost \n(USD Millions)')
 fig.tight_layout()
-fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
 # 4. Plot costs
 # ----------------------------------------------------
+# FCDO
+input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_fcdo_report)]
+# First summarize all input costs
+input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
+    mean=('cost', 'mean'),
+    lower=('cost', lambda x: x.quantile(0.025)),
+    upper=('cost', lambda x: x.quantile(0.975))
+).reset_index()
+input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt(
+    id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'],
+    value_vars=['mean', 'lower', 'upper'],
+    var_name='stat',
+    value_name='cost'
+)
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict_fcdo)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo)
+
+# Global Fund
 input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_gf_report)]
 # First summarize all input costs
 input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
@@ -441,9 +504,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
     value_name='cost'
 )
 
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hss_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'gf'), _scenario_dict = hs_scenarios_substitutedict_gf)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf)

From 77ea5477800e3f6be298a29ccad6b6c3829a74ef Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 10 Nov 2024 22:07:50 +0000
Subject: [PATCH 164/230] Update ylim on ROI plots

---
 src/scripts/costing/cost_estimation.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 9b87728712..f56124eba8 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1029,6 +1029,9 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
     # Create a figure and axis to plot all draws together
     fig, ax = plt.subplots(figsize=(10, 6))
 
+    # Generate a list to store max ROI value to set ylim
+    max_roi = []
+
     # Iterate over each draw in monetary_value_of_incremental_health
     for draw_index, row in _monetary_value_of_incremental_health.iterrows():
         print("Plotting ROI for draw ", draw_index)
@@ -1094,8 +1097,11 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
         ax.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{_scenario_dict[draw_index]}')
         ax.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
 
+        max_val = mean_values[~np.isinf(mean_values['roi'])]['roi'].max()
+        max_roi.append(max_val)
+
     # Set y-axis limit
-    ax.set_ylim(0, mean_values[~np.isinf(mean_values.roi)]['roi'].max() * 1.25)
+    ax.set_ylim(0, max(max_roi) * 1.25)
     ax.set_xlim(left = 0)
 
     plt.xlabel('Implementation cost, millions')

From 06ac969efbec70efd4fe1bf8189b3237eb23ebe7 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 11 Nov 2024 13:31:02 +0000
Subject: [PATCH 165/230] add global fund ROI figure

---
 .../costing/cost_analysis_htm_with_and_without_hss.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
index 9448980134..01eb93e38d 100644
--- a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
+++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py
@@ -83,7 +83,7 @@
 10: "Malaria Programs Scale-up WITH FULL HSS PACKAGE", 11: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 12: "HTM Programs Scale-up WITHOUT HSS PACKAGE",
 13: "HTM Programs Scale-up WITH FULL HSS PACKAGE", 14: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 15: "HTM Programs Scale-up WITH SUPPLY CHAINS", 16: "HTM Programs Scale-up WITH HRH"}
 
-htm_scenarios_substitutedict = {0:"0", 1: "1", 2: "A", 3: "B",
+htm_scenarios_substitutedict_fcdo = {0:"0", 1: "1", 2: "A", 3: "B",
 4: "4", 5: "C", 6: "D",
 7: "7", 8: "E", 9: "F",
 10: "10", 11: "G", 12: "H",
@@ -487,6 +487,13 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'VSL')
 
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [2,12,14,15,16],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'all_HTM_VSL')
+
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [3,5],
@@ -518,7 +525,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # Plot Maximum ability to pay
 name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(
+fig, ax = do_standard_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
         f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"

From 99774bdad03b3a80c7c18e8ac5ea0d98007a35c8 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 10:14:57 +0000
Subject: [PATCH 166/230] update ROI figure title

---
 src/scripts/costing/cost_estimation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index f56124eba8..b8abb44e95 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1106,7 +1106,7 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
 
     plt.xlabel('Implementation cost, millions')
     plt.ylabel('Return on Investment')
-    plt.title('Return on Investment of scenario at different levels of implementation cost')
+    plt.title('Return on Investment at different levels of implementation cost')
 
     # Show legend
     plt.legend()

From 66758867e0d301412fe511794cdb387ed80d7f4b Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 12:33:41 +0000
Subject: [PATCH 167/230] update consumables cost from 'available' to 'used'

---
 src/scripts/costing/cost_estimation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index b8abb44e95..85e82fba5c 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -359,20 +359,20 @@ def label_rows_of_cost_dataframe(_df, label_var, label):
     def get_quantity_of_consumables_dispensed(results_folder):
         def get_counts_of_items_requested(_df):
             _df = drop_outside_period(_df)
-            counts_of_available = defaultdict(lambda: defaultdict(int))
+            counts_of_used = defaultdict(lambda: defaultdict(int))
             counts_of_not_available = defaultdict(lambda: defaultdict(int))
 
             for _, row in _df.iterrows():
                 date = row['date']
-                for item, num in row['Item_Available'].items():
-                    counts_of_available[date][item] += num
+                for item, num in row['Item_Used'].items():
+                    counts_of_used[date][item] += num
                 for item, num in row['Item_NotAvailable'].items():
                     counts_of_not_available[date][item] += num
-            available_df = pd.DataFrame(counts_of_available).fillna(0).astype(int).stack().rename('Available')
+            used_df = pd.DataFrame(counts_of_used).fillna(0).astype(int).stack().rename('Used')
             not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available')
 
             # Combine the two dataframes into one series with MultiIndex (date, item, availability_status)
-            combined_df = pd.concat([available_df, not_available_df], axis=1).fillna(0).astype(int)
+            combined_df = pd.concat([used_df, not_available_df], axis=1).fillna(0).astype(int)
 
             # Convert to a pd.Series, as expected by the custom_generate_series function
             return combined_df.stack()
@@ -384,7 +384,7 @@ def get_counts_of_items_requested(_df):
                 custom_generate_series=get_counts_of_items_requested,
                 do_scaling=True)
 
-        cons_dispensed = cons_req.xs("Available", level=2) # only keep actual dispensed amount, i.e. when available
+        cons_dispensed = cons_req.xs("Used", level=2) # only keep actual dispensed amount, i.e. when available
         return cons_dispensed
     # TODO Extract year of dispensing drugs
 

From 123a4f920ff1bb5eb40d32e40a23a834882928c9 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 14:06:15 +0000
Subject: [PATCH 168/230] new analysis scripts for GF and FCDO analysis

---
 .../cost_analysis_hss_elements_gf.py          | 505 +++++++++++++++
 ...st_analysis_htm_with_and_without_hss_gf.py | 596 ++++++++++++++++++
 2 files changed, 1101 insertions(+)
 create mode 100644 src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
 create mode 100644 src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
new file mode 100644
index 0000000000..165c90e948
--- /dev/null
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
@@ -0,0 +1,505 @@
+import argparse
+from pathlib import Path
+from tlo import Date
+from collections import Counter, defaultdict
+
+import calendar
+import datetime
+import os
+import textwrap
+
+import matplotlib.pyplot as plt
+from matplotlib.ticker import FuncFormatter
+import numpy as np
+import pandas as pd
+import ast
+import math
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    make_age_grp_lookup,
+    make_age_grp_types,
+    summarize,
+    create_pickles_locally,
+    parse_log_file,
+    unflatten_flattened_multi_index_in_logging
+)
+
+from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             summarize_cost_data,
+                                             apply_discounting_to_cost_data,
+                                             do_stacked_bar_plot_of_cost_by_category,
+                                             do_line_plot_of_cost,
+                                             generate_roi_plots,
+                                             generate_multiple_scenarios_roi_plot)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Create folders to store results
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+figurespath = Path('./outputs/global_fund_roi_analysis/hss_elements/')
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+roi_outputs_folder_gf = Path(figurespath / 'gf/roi')
+if not os.path.exists(roi_outputs_folder_gf):
+    os.makedirs(roi_outputs_folder_gf)
+roi_outputs_folder_fcdo = Path(figurespath / 'fcdo/roi')
+if not os.path.exists(roi_outputs_folder_fcdo):
+    os.makedirs(roi_outputs_folder_fcdo)
+
+# Load result files
+# ------------------------------------------------------------------------------------------------------------------
+results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0]
+
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0)  # look at one log (so can decide what to extract)
+params = extract_params(results_folder)
+
+# Declare default parameters for cost analysis
+# ------------------------------------------------------------------------------------------------------------------
+# Period relevant for costing
+TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+
+# Scenarios
+hss_scenarios = {0: "Baseline", 1: "HRH Moderate Scale-up (1%)", 2: "HRH Scale-up Following Historical Growth", 3: "HRH Accelerated Scale-up (6%)",
+                 4: "Increase Capacity at Primary Care Levels", 5: "Consumables Increased to 75th Percentile",
+                 6: "Consumables Available at HIV levels", 7: "Consumables Available at EPI levels", 8: "HSS PACKAGE: Realistic expansion"}
+hs_scenarios_substitutedict = {0:"0", 1: "A", 2: "B", 3: "C",
+4: "D", 5: "E", 6: "F",
+7: "G", 8: "H"}
+hss_scenarios_for_report = [0, 1, 2, 3, 4, 5, 6, 7, 8]
+color_map = {
+    'Baseline': '#a50026',
+    'HRH Moderate Scale-up (1%)': '#d73027',
+    'HRH Scale-up Following Historical Growth': '#f46d43',
+    'HRH Accelerated Scale-up (6%)': '#fdae61',
+    'Increase Capacity at Primary Care Levels': '#fee08b',
+    'Increase Capacity of CHW': '#ffffbf',
+    'Consumables Increased to 75th Percentile': '#d9ef8b',
+    'Consumables Available at HIV levels': '#a6d96a',
+    'Consumables Available at EPI levels': '#66bd63',
+    'Perfect Consumables Availability': '#1a9850',
+    'HSS PACKAGE: Perfect': '#3288bd',
+    'HSS PACKAGE: Realistic expansion': '#5e4fa2'
+}
+
+# Cost-effectiveness threshold
+chosen_cet = 199.620811947318  # This is based on the estimate from Lomas et al (2023)- $160.595987085533 in 2019 USD coverted to 2023 USD
+# based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is $77.4 in 2023 USD terms
+chosen_value_of_statistical_life = 834
+
+# Discount rate
+discount_rate = 0.03
+
+# Define a function to create bar plots
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    # Calculate y-error bars
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    # Map xticks based on the hss_scenarios dictionary
+    xticks = {index: hss_scenarios.get(index, f"Scenario {index}") for index in _df.index}
+
+    # Retrieve colors from color_map based on the xticks labels
+    colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
+
+    # Generate consecutive x positions for the bars, ensuring no gaps
+    x_positions = np.arange(len(xticks))  # Consecutive integers for each bar position
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        x_positions,
+        _df['mean'].values,
+        yerr=yerr,
+        color=colors,  # Set bar colors
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+    )
+
+    # Add optional annotations above each bar
+    if annotations:
+        for xpos, ypos, text in zip(x_positions, _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
+
+    # Set x-tick labels with wrapped text if required
+    wrapped_labs = ["\n".join(textwrap.wrap(label, 25)) for label in xticks.values()]
+    ax.set_xticks(x_positions)  # Set x-ticks to consecutive positions
+    ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
+                       fontsize=8)
+
+    # Set y-axis limit to upper max + 500
+    ax.set_ylim(_df['lower'].min()*1.25, _df['upper'].max()*1.25)
+
+    # Set font size for y-tick labels and grid
+    ax.tick_params(axis='y', labelsize=9)
+    ax.tick_params(axis='x', labelsize=9)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
+
+def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
+                        xticklabels_horizontal_and_wrapped=False,
+                        put_labels_in_legend=True,
+                        offset=1e6):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+     extent of the error bar."""
+
+    substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+# TODO should be above be 'median'
+    xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+    if set_colors:
+        colors = [color_map.get(series, 'grey') for series in _df.index]
+    else:
+        cmap = sns.color_palette('Spectral', as_cmap=True)
+        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        ecolor='black',
+        color=colors,
+        capsize=10,
+        label=xticks.values()
+    )
+
+    if annotations:
+        for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)):
+            annotation_y = ypos + offset
+
+            ax.text(
+                xpos,
+                annotation_y,
+                '\n'.join(text.split(' ', 1)),
+                horizontalalignment='center',
+                verticalalignment='bottom',  # Aligns text at the bottom of the annotation position
+                fontsize='x-small',
+                rotation='horizontal'
+            )
+
+    ax.set_xticks(list(xticks.keys()))
+
+    if put_labels_in_legend:
+        # Update xticks label with substitute labels
+        # Insert legend with updated labels that shows correspondence between substitute label and original label
+        xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
+        xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())]
+        h, legs = ax.get_legend_handles_labels()
+        ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+        ax.set_xticklabels(list(xtick_values))
+    else:
+        if not xticklabels_horizontal_and_wrapped:
+            # xticklabels will be vertical and not wrapped
+            ax.set_xticklabels(list(xticks.values()), rotation=90)
+        else:
+            wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+            ax.set_xticklabels(wrapped_labs)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout(pad=2.0)
+    plt.subplots_adjust(left=0.15, right=0.85)  # Adjust left and right margins
+
+    return fig, ax
+
+# Estimate standard input costs of scenario
+# -----------------------------------------------------------------------------------------------------------------------
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = discount_rate)
+# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+# TODO Remove the manual fix below once the logging for these is corrected
+input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
+    input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
+input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
+    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
+#input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
+
+# %%
+# Return on Invesment analysis
+# Calculate incremental cost
+# -----------------------------------------------------------------------------------------------------------------------
+# Aggregate input costs for further analysis
+input_costs_subset = input_costs[
+    (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+# TODO the above step may not longer be needed
+total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
+total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
+def find_difference_relative_to_comparison(_ser: pd.Series,
+                                           comparison: str,
+                                           scaled: bool = False,
+                                           drop_comparison: bool = True,
+                                           ):
+    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+    within the runs (level 1), relative to where draw = `comparison`.
+    The comparison is `X - COMPARISON`."""
+    return _ser \
+        .unstack(level=0) \
+        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+        .drop(columns=([comparison] if drop_comparison else [])) \
+        .stack()
+
+
+incremental_scenario_cost = (pd.DataFrame(
+    find_difference_relative_to_comparison(
+        total_input_cost,
+        comparison=0)  # sets the comparator to 0 which is the Actual scenario
+).T.iloc[0].unstack()).T
+
+# Monetary value of health impact
+# -----------------------------------------------------------------------------------------------------------------------
+def get_num_dalys(_df):
+    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+    results from runs that crashed mid-way through the simulation.
+    """
+    years_needed = relevant_period_for_costing  # [i.year for i in TARGET_PERIOD_INTERVENTION]
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1)
+
+    # Initial year and discount rate
+    initial_year = min(_df.index.unique())
+
+    # Calculate the discounted values
+    discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year)
+
+    return pd.Series(discounted_values.sum())
+
+num_dalys = extract_results(
+    results_folder,
+    module='tlo.methods.healthburden',
+    key='dalys_stacked',
+    custom_generate_series=get_num_dalys,
+    do_scaling=True
+)
+
+# Get absolute DALYs averted
+num_dalys_averted = (-1.0 *
+                     pd.DataFrame(
+                         find_difference_relative_to_comparison(
+                             num_dalys.loc[0],
+                             comparison=0)  # sets the comparator to 0 which is the Actual scenario
+                     ).T.iloc[0].unstack(level='run'))
+num_dalys_averted_fcdo_scenarios = num_dalys_averted[
+    num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_report)]
+num_dalys_averted_gf_scenarios = num_dalys_averted[
+    num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_report)]
+
+# The monetary value of the health benefit is delta health times CET (negative values are set to 0)
+def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
+    monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0)
+    return monetary_value_of_incremental_health
+
+# TODO check that the above calculation is correct
+
+# 3. Return on Investment Plot
+# ----------------------------------------------------
+# FCDO
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,2,3,4],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_fcdo,
+                   _value_of_life_suffix = 'HR_VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [5,6,7],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_fcdo,
+                   _value_of_life_suffix = 'Consumables_VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [8],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_fcdo,
+                   _value_of_life_suffix = 'HSS_VSL')
+
+# Global Fund
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,2,3,4],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_gf,
+                   _value_of_life_suffix = 'HR_VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [5,6,7],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_gf,
+                   _value_of_life_suffix = 'Consumables_VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [8],
+                   _scenario_dict = hss_scenarios,
+                   _outputfilepath=roi_outputs_folder_gf,
+                   _value_of_life_suffix = 'HSS_VSL')
+
+# 4. Plot Maximum ability-to-pay at CET
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized_fcdo = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_report)]
+max_ability_to_pay_for_implementation_summarized_gf = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_report)]
+
+# FCDO
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized_fcdo / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized_fcdo.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# Global Fund
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized_gf / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized_gf.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# Plot incremental costs
+incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
+# Keep only scenarios of interest
+incremental_scenario_cost_summarized_fcdo = incremental_scenario_cost_summarized[
+    incremental_scenario_cost_summarized.index.get_level_values(0).isin(hss_scenarios_for_report)]
+incremental_scenario_cost_summarized_gf = incremental_scenario_cost_summarized[
+    incremental_scenario_cost_summarized.index.get_level_values(0).isin(hss_scenarios_for_report)]
+
+# FCDO
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (incremental_scenario_cost_summarized_fcdo / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        for _, row in incremental_scenario_cost_summarized_fcdo.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Cost \n(USD Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# Global Fund
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (incremental_scenario_cost_summarized_gf / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        for _, row in incremental_scenario_cost_summarized_gf.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Cost \n(USD Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# 4. Plot costs
+# ----------------------------------------------------
+# FCDO
+input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_report)]
+# First summarize all input costs
+input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
+    mean=('cost', 'mean'),
+    lower=('cost', lambda x: x.quantile(0.025)),
+    upper=('cost', lambda x: x.quantile(0.975))
+).reset_index()
+input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt(
+    id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'],
+    value_vars=['mean', 'lower', 'upper'],
+    var_name='stat',
+    value_name='cost'
+)
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+
+# Global Fund
+input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_report)]
+# First summarize all input costs
+input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
+    mean=('cost', 'mean'),
+    lower=('cost', lambda x: x.quantile(0.025)),
+    upper=('cost', lambda x: x.quantile(0.975))
+).reset_index()
+input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt(
+    id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'],
+    value_vars=['mean', 'lower', 'upper'],
+    var_name='stat',
+    value_name='cost'
+)
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'gf'), _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
new file mode 100644
index 0000000000..e95224451c
--- /dev/null
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -0,0 +1,596 @@
+import argparse
+from pathlib import Path
+from tlo import Date
+from collections import Counter, defaultdict
+
+import calendar
+import datetime
+import os
+import textwrap
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+from matplotlib.ticker import FuncFormatter
+import numpy as np
+import pandas as pd
+import ast
+import math
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    make_age_grp_lookup,
+    make_age_grp_types,
+    summarize,
+    create_pickles_locally,
+    parse_log_file,
+    unflatten_flattened_multi_index_in_logging
+)
+
+from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             summarize_cost_data,
+                                             apply_discounting_to_cost_data,
+                                             do_stacked_bar_plot_of_cost_by_category,
+                                             do_line_plot_of_cost,
+                                             generate_roi_plots,
+                                             generate_multiple_scenarios_roi_plot)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Create folders to store results
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+figurespath = Path('./outputs/global_fund_roi_analysis/htm_with_and_without_hss')
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+roi_outputs_folder = Path(figurespath / 'roi')
+if not os.path.exists(roi_outputs_folder):
+    os.makedirs(roi_outputs_folder)
+
+# Load result files
+#------------------------------------------------------------------------------------------------------------------
+results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-11-12T172503Z.py', outputfilepath)[0]
+
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
+params = extract_params(results_folder)
+
+# Declare default parameters for cost analysis
+#------------------------------------------------------------------------------------------------------------------
+# Population scaling factor for malaria scale-up projections
+population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0]
+# Load the list of districts and their IDs
+district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[
+    ['District_Num', 'District']].drop_duplicates()
+district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
+
+# Period relevant for costing
+TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+
+# Scenarios
+htm_scenarios = {0:"Baseline", 1: "HSS PACKAGE: Realistic", 2: "HIV Programs Scale-up WITHOUT HSS PACKAGE",
+3: "HIV Programs Scale-up WITH REALISTIC HSS PACKAGE", 4: "TB Programs Scale-up WITHOUT HSS PACKAGE",
+5: "TB Programs Scale-up WITH REALISTIC HSS PACKAGE", 6: "Malaria Programs Scale-up WITHOUT HSS PACKAGE",
+7: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 8: "HTM Programs Scale-up WITHOUT HSS PACKAGE",
+9: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 10: "HTM Programs Scale-up WITH SUPPLY CHAINS", 11: "HTM Programs Scale-up WITH HRH"}
+
+htm_scenarios_substitutedict_fcdo = {0:"0", 1: "A", 2: "B", 3: "C",
+4: "D", 5: "E", 6: "F",
+7: "G", 8: "H", 9: "I",
+10: "J", 11: "K"}
+
+# Subset of scenarios included in analysis
+htm_scenarios_for_gf_report = [0,1,2,3,4,5,6,7,8,9,10,11]
+
+color_map = {
+    'Baseline': '#9e0142',
+    'HSS PACKAGE: Realistic': '#d8434e',
+    'HIV Programs Scale-up WITHOUT HSS PACKAGE': '#f36b48',
+    'HIV Programs Scale-up WITH REALISTIC HSS PACKAGE': '#fca45c',
+    'TB Programs Scale-up WITHOUT HSS PACKAGE': '#fddc89',
+    'TB Programs Scale-up WITH REALISTIC HSS PACKAGE': '#e7f7a0',
+    'Malaria Programs Scale-up WITHOUT HSS PACKAGE': '#a5dc97',
+    'Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE': '#6dc0a6',
+    'HTM Programs Scale-up WITHOUT HSS PACKAGE': '#438fba',
+    'HTM Programs Scale-up WITH REALISTIC HSS PACKAGE': '#5e4fa2',
+    'HTM Programs Scale-up WITH SUPPLY CHAINS': '#3c71aa',
+    'HTM Programs Scale-up WITH HRH': '#2f6094',
+}
+
+# Cost-effectiveness threshold
+chosen_cet = 199.620811947318 # This is based on the estimate from Lomas et al (2023)- $160.595987085533 in 2019 USD coverted to 2023 USD
+# based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is $77.4 in 2023 USD terms
+chosen_value_of_statistical_life = 834
+
+# Discount rate
+discount_rate = 0.03
+
+# Define a function to create bar plots
+def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+    extent of the error bar."""
+
+    # Calculate y-error bars
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+
+    # Map xticks based on the hss_scenarios dictionary
+    xticks = {index: htm_scenarios.get(index, f"Scenario {index}") for index in _df.index}
+
+    # Retrieve colors from color_map based on the xticks labels
+    colors = [color_map.get(label, '#333333') for label in xticks.values()]  # default to grey if not found
+
+    # Generate consecutive x positions for the bars, ensuring no gaps
+    x_positions = np.arange(len(xticks))  # Consecutive integers for each bar position
+
+    fig, ax = plt.subplots()
+    ax.bar(
+        x_positions,
+        _df['mean'].values,
+        yerr=yerr,
+        color=colors,  # Set bar colors
+        alpha=1,
+        ecolor='black',
+        capsize=10,
+    )
+
+    # Add optional annotations above each bar
+    if annotations:
+        for xpos, ypos, text in zip(x_positions, _df['upper'].values, annotations):
+            ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8)
+
+    # Set x-tick labels with wrapped text if required
+    wrapped_labs = ["\n".join(textwrap.wrap(label,30)) for label in xticks.values()]
+    ax.set_xticks(x_positions)  # Set x-ticks to consecutive positions
+    ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right',
+                       fontsize=7)
+
+    # Set y-axis limit to upper max + 500
+    ax.set_ylim(_df['lower'].min()*1.25, _df['upper'].max()*1.25)
+
+    # Set font size for y-tick labels and grid
+    ax.tick_params(axis='y', labelsize=9)
+    ax.tick_params(axis='x', labelsize=9)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout()
+
+    return fig, ax
+
+def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
+                        xticklabels_horizontal_and_wrapped=False,
+                        put_labels_in_legend=True,
+                        offset=1e6):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+     extent of the error bar."""
+
+    substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    yerr = np.array([
+        (_df['mean'] - _df['lower']).values,
+        (_df['upper'] - _df['mean']).values,
+    ])
+# TODO should be above be 'median'
+    xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+    if set_colors:
+        colors = [color_map.get(series, 'grey') for series in _df.index]
+    else:
+        cmap = sns.color_palette('Spectral', as_cmap=True)
+        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.bar(
+        xticks.keys(),
+        _df['mean'].values,
+        yerr=yerr,
+        ecolor='black',
+        color=colors,
+        capsize=10,
+        label=xticks.values()
+    )
+
+    if annotations:
+        for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)):
+            annotation_y = ypos + offset
+
+            ax.text(
+                xpos,
+                annotation_y,
+                '\n'.join(text.split(' ', 1)),
+                horizontalalignment='center',
+                verticalalignment='bottom',  # Aligns text at the bottom of the annotation position
+                fontsize='x-small',
+                rotation='horizontal'
+            )
+
+    ax.set_xticks(list(xticks.keys()))
+
+    if put_labels_in_legend:
+        # Update xticks label with substitute labels
+        # Insert legend with updated labels that shows correspondence between substitute label and original label
+        xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
+        xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())]
+        h, legs = ax.get_legend_handles_labels()
+        ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+        ax.set_xticklabels(list(xtick_values))
+    else:
+        if not xticklabels_horizontal_and_wrapped:
+            # xticklabels will be vertical and not wrapped
+            ax.set_xticklabels(list(xticks.values()), rotation=90)
+        else:
+            wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+            ax.set_xticklabels(wrapped_labs)
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    fig.tight_layout(pad=2.0)
+    plt.subplots_adjust(left=0.15, right=0.85)  # Adjust left and right margins
+
+    return fig, ax
+
+# Estimate standard input costs of scenario
+#-----------------------------------------------------------------------------------------------------------------------
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = discount_rate)
+# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+# TODO Remove the manual fix below once the logging for these is corrected
+input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
+    input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
+input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
+    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
+#input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
+
+# Add additional costs pertaining to simulation (Only for scenarios with Malaria scale-up)
+#-----------------------------------------------------------------------------------------------------------------------
+# Extract supply chain cost as a proportion of consumable costs to apply to malaria scale-up commodities
+# Load primary costing resourcefile
+workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
+                              sheet_name=None)
+# Read parameters for consumables costs
+# Load consumables cost data
+unit_price_consumable = workbook_cost["consumables"]
+unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0])
+unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index(
+    drop=True).iloc[1:]
+unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()]
+
+# Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs
+# The fixed proportion is based on Resource Mapping Expenditure data from 2018
+resource_mapping_data = workbook_cost["resource_mapping_r7_summary"]
+# Make sure values are numeric
+expenditure_column = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)']
+resource_mapping_data[expenditure_column] = resource_mapping_data[expenditure_column].apply(
+    lambda x: pd.to_numeric(x, errors='coerce'))
+supply_chain_expenditure = \
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0]
+consumables_purchase_expenditure = \
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0] + \
+resource_mapping_data[resource_mapping_data['Cost Type'] == 'HIV Drugs and Commodities'][expenditure_column].sum()[0]
+supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure
+
+# In this case malaria intervention scale-up costs were not included in the standard estimate_input_cost_of_scenarios function
+list_of_draws_with_malaria_scaleup_parameters = params[(params.module_param == 'Malaria:scaleup_start_year')]
+list_of_draws_with_malaria_scaleup_parameters.loc[:,'value'] = pd.to_numeric(list_of_draws_with_malaria_scaleup_parameters['value'])
+list_of_draws_with_malaria_scaleup_implemented_in_costing_period = list_of_draws_with_malaria_scaleup_parameters[(list_of_draws_with_malaria_scaleup_parameters['value'] < max(relevant_period_for_costing))].index.to_list()
+
+# 1. IRS costs
+irs_coverage_rate = 0.8
+districts_with_irs_scaleup = ['Kasungu', 'Mchinji', 'Lilongwe', 'Lilongwe City', 'Dowa', 'Ntchisi', 'Salima', 'Mangochi',
+                              'Mwanza', 'Likoma', 'Nkhotakota']
+# Convert above list of district names to numeric district identifiers
+district_keys_with_irs_scaleup = [key for key, name in district_dict.items() if name in districts_with_irs_scaleup]
+TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2035, 12, 31))
+
+# Get population by district
+def get_total_population_by_district(_df):
+    years_needed = [i.year for i in TARGET_PERIOD_MALARIA_SCALEUP] # we only consider the population for the malaria scale-up period
+    # because those are the years relevant for malaria scale-up costing
+    _df['year'] = pd.to_datetime(_df['date']).dt.year
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    _df = pd.melt(_df.drop(columns = 'date'), id_vars = ['year']).rename(columns = {'variable': 'district'})
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .set_index(['year', 'district'])['value']
+    )
+
+district_population_by_year = extract_results(
+    results_folder,
+    module='tlo.methods.malaria',
+    key='pop_district',
+    custom_generate_series=get_total_population_by_district,
+    do_scaling=True
+)
+
+def get_number_of_people_covered_by_malaria_scaleup(_df, list_of_districts_covered = None, draws_included = None):
+    _df = pd.DataFrame(_df)
+    # Reset the index to make 'district' a column
+    _df = _df.reset_index()
+    # Convert the 'district' column to numeric values
+    _df['district'] = pd.to_numeric(_df['district'], errors='coerce')
+    _df = _df.set_index(['year', 'district'])
+    if list_of_districts_covered is not None:
+        _df.loc[~_df.index.get_level_values('district').isin(list_of_districts_covered), :] = 0
+    if draws_included is not None:
+        _df.loc[:, ~_df.columns.get_level_values('draw').isin(draws_included)] = 0
+    return _df
+
+district_population_covered_by_irs_scaleup_by_year = get_number_of_people_covered_by_malaria_scaleup(district_population_by_year,
+                                                                                                 list_of_districts_covered=district_keys_with_irs_scaleup,
+                                                                                                 draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period)
+
+irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)']
+# The above unit cost already includes implementation - project management (17%), personnel (6%), vehicles (10%), equipment (6%), monitoring and evaluation (3%), training (3%),
+# other commodities (3%) and buildings (2%) from Alonso et al (2021)
+irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate
+total_irs_cost = irs_multiplication_factor.iloc[0] * district_population_covered_by_irs_scaleup_by_year # for districts and scenarios included
+total_irs_cost = total_irs_cost.groupby(level='year').sum()
+
+# 2. Bednet costs
+bednet_coverage_rate = 0.7
+# We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people.
+unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)'] * (1 + supply_chain_cost_proportion)
+# We add supply chain costs (procurement + distribution + warehousing) because the unit_cost does not include this
+annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3
+bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person
+
+district_population_covered_by_bednet_scaleup_by_year = get_number_of_people_covered_by_malaria_scaleup(district_population_by_year,
+                                                                                                 draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period) # All districts covered
+
+total_bednet_cost = bednet_multiplication_factor.iloc[0] * district_population_covered_by_bednet_scaleup_by_year  # for scenarios included
+total_bednet_cost = total_bednet_cost.groupby(level='year').sum()
+
+# Malaria scale-up costs - TOTAL
+malaria_scaleup_costs = [
+    (total_irs_cost.reset_index(), 'cost_of_IRS_scaleup'),
+    (total_bednet_cost.reset_index(), 'cost_of_bednet_scaleup'),
+]
+def melt_and_label_malaria_scaleup_cost(_df, label):
+    multi_index = pd.MultiIndex.from_tuples(_df.columns)
+    _df.columns = multi_index
+
+    # reshape dataframe and assign 'draw' and 'run' as the correct column headers
+    melted_df = pd.melt(_df, id_vars=['year']).rename(columns={'variable_0': 'draw', 'variable_1': 'run'})
+    # Replace item_code with consumable_name_tlo
+    melted_df['cost_subcategory'] = label
+    melted_df['cost_category'] = 'other'
+    melted_df['cost_subgroup'] = 'NA'
+    melted_df['Facility_Level'] = 'all'
+    melted_df = melted_df.rename(columns={'value': 'cost'})
+    return melted_df
+
+# Iterate through additional costs, melt and concatenate
+for df, label in malaria_scaleup_costs:
+    new_df = melt_and_label_malaria_scaleup_cost(df, label)
+    input_costs = pd.concat([input_costs, new_df], ignore_index=True)
+
+# TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
+# Extract input_costs for browsing
+input_costs.groupby(['draw', 'run', 'cost_category', 'cost_subcategory', 'cost_subgroup','year'])['cost'].sum().to_csv(figurespath / 'cost_detailed.csv')
+
+# %%
+# Return on Invesment analysis
+# Calculate incremental cost
+# -----------------------------------------------------------------------------------------------------------------------
+# Aggregate input costs for further analysis
+input_costs_subset = input_costs[
+    (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+# TODO the above step may not longer be needed
+total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
+total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
+def find_difference_relative_to_comparison(_ser: pd.Series,
+                                           comparison: str,
+                                           scaled: bool = False,
+                                           drop_comparison: bool = True,
+                                           ):
+    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+    within the runs (level 1), relative to where draw = `comparison`.
+    The comparison is `X - COMPARISON`."""
+    return _ser \
+        .unstack(level=0) \
+        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+        .drop(columns=([comparison] if drop_comparison else [])) \
+        .stack()
+
+
+incremental_scenario_cost = (pd.DataFrame(
+    find_difference_relative_to_comparison(
+        total_input_cost,
+        comparison=0)  # sets the comparator to 0 which is the Actual scenario
+).T.iloc[0].unstack()).T
+
+# Keep only scenarios of interest
+incremental_scenario_cost = incremental_scenario_cost[
+    incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+
+# Monetary value of health impact
+# -----------------------------------------------------------------------------------------------------------------------
+def get_num_dalys(_df):
+    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+    results from runs that crashed mid-way through the simulation.
+    """
+    years_needed = relevant_period_for_costing  # [i.year for i in TARGET_PERIOD_INTERVENTION]
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1)
+
+    # Initial year and discount rate
+    initial_year = min(_df.index.unique())
+
+    # Calculate the discounted values
+    discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year)
+
+    return pd.Series(discounted_values.sum())
+
+num_dalys = extract_results(
+    results_folder,
+    module='tlo.methods.healthburden',
+    key='dalys_stacked',
+    custom_generate_series=get_num_dalys,
+    do_scaling=True
+)
+
+# Get absolute DALYs averted
+num_dalys_averted = (-1.0 *
+                     pd.DataFrame(
+                         find_difference_relative_to_comparison(
+                             num_dalys.loc[0],
+                             comparison=0)  # sets the comparator to 0 which is the Actual scenario
+                     ).T.iloc[0].unstack(level='run'))
+num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+
+# The monetary value of the health benefit is delta health times CET (negative values are set to 0)
+def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
+    monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0)
+    return monetary_value_of_incremental_health
+
+# TODO check that the above calculation is correct
+
+# 3. Return on Investment Plot
+# ----------------------------------------------------
+# Plot ROI at various levels of cost
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'CET')
+
+generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'VSL')
+
+# Combined ROI plot of relevant scenarios
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,8,9,10,11],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'all_HTM_VSL')
+
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [2,3],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HIV_VSL')
+
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [4,5],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'TB_VSL')
+
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [6,7],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'Malaria_VSL')
+
+# 4. Plot Maximum ability-to-pay at CET
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# 4. Plot Maximum ability-to-pay at VSL
+# ----------------------------------------------------
+max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life) - incremental_scenario_cost).clip(
+    lower=0.0)  # monetary value - change in costs
+max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
+max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+
+# Plot Maximum ability to pay
+name_of_plot = f'Maximum ability to pay at VSL, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_bar_plot_with_ci(
+    (max_ability_to_pay_for_implementation_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Maximum ability to pay (at VSL) \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# Plot incremental costs
+incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (incremental_scenario_cost_summarized / 1e6),
+    annotations=[
+        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        for _, row in incremental_scenario_cost_summarized.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Cost \n(USD Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+# 4. Plot costs
+# ----------------------------------------------------
+input_costs_for_plot = input_costs[input_costs.draw.isin(htm_scenarios_for_gf_report)]
+# First summarize all input costs
+input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
+    mean=('cost', 'mean'),
+    lower=('cost', lambda x: x.quantile(0.025)),
+    upper=('cost', lambda x: x.quantile(0.975))
+).reset_index()
+input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt(
+    id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'],
+    value_vars=['mean', 'lower', 'upper'],
+    var_name='stat',
+    value_name='cost'
+)
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict_fcdo)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)

From 2694cf8275c71ecbf38afb8d9f9a4fc6a1b40278 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 14:10:09 +0000
Subject: [PATCH 169/230] remove unnecessary packages

---
 .../gf_analyses/cost_analysis_hss_elements_gf.py | 16 ++--------------
 .../cost_analysis_htm_with_and_without_hss_gf.py | 14 +-------------
 2 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
index 165c90e948..082a7bfb80 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
@@ -1,19 +1,14 @@
-import argparse
 from pathlib import Path
 from tlo import Date
-from collections import Counter, defaultdict
 
-import calendar
 import datetime
 import os
 import textwrap
 
 import matplotlib.pyplot as plt
-from matplotlib.ticker import FuncFormatter
+import seaborn as sns
 import numpy as np
 import pandas as pd
-import ast
-import math
 
 from tlo.analysis.utils import (
     extract_params,
@@ -21,20 +16,13 @@
     get_scenario_info,
     get_scenario_outputs,
     load_pickled_dataframes,
-    make_age_grp_lookup,
-    make_age_grp_types,
-    summarize,
-    create_pickles_locally,
-    parse_log_file,
-    unflatten_flattened_multi_index_in_logging
+    summarize
 )
 
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
                                              summarize_cost_data,
-                                             apply_discounting_to_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
-                                             generate_roi_plots,
                                              generate_multiple_scenarios_roi_plot)
 
 # Define a timestamp for script outputs
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index e95224451c..7115d2f837 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -1,20 +1,14 @@
-import argparse
 from pathlib import Path
 from tlo import Date
-from collections import Counter, defaultdict
 
-import calendar
 import datetime
 import os
 import textwrap
 
 import matplotlib.pyplot as plt
 import seaborn as sns
-from matplotlib.ticker import FuncFormatter
 import numpy as np
 import pandas as pd
-import ast
-import math
 
 from tlo.analysis.utils import (
     extract_params,
@@ -22,17 +16,11 @@
     get_scenario_info,
     get_scenario_outputs,
     load_pickled_dataframes,
-    make_age_grp_lookup,
-    make_age_grp_types,
-    summarize,
-    create_pickles_locally,
-    parse_log_file,
-    unflatten_flattened_multi_index_in_logging
+    summarize
 )
 
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
                                              summarize_cost_data,
-                                             apply_discounting_to_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
                                              generate_roi_plots,

From 8747a65bd287697efad50c4957d57adfe6290b69 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 14:12:31 +0000
Subject: [PATCH 170/230] add description

---
 .../gf_analyses/cost_analysis_hss_elements_gf.py   |  7 +++++++
 .../cost_analysis_htm_with_and_without_hss_gf.py   | 14 ++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
index 082a7bfb80..20d3ca6ab5 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
@@ -1,3 +1,10 @@
+"""Produce plots to show the impact each the healthcare system (overall health impact) when running under different
+scenarios (scenario_impact_of_healthsystem.py)
+
+with reduced consumables logging
+/Users/tmangal/PycharmProjects/TLOmodel/outputs/t.mangal@imperial.ac.uk/hss_elements-2024-11-12T172311Z
+"""
+
 from pathlib import Path
 from tlo import Date
 
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index 7115d2f837..c593e8d331 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -1,3 +1,17 @@
+"""Produce plots to show the impact each the healthcare system (overall health impact) when running under different
+scenarios (scenario_vertical_programs_with_and_without_hss.py)
+
+job ID:
+results for FCDO and GF presentations Sept 2024:
+htm_with_and_without_hss-2024-09-04T143044Z
+
+results for updates 30Sept2024 (IRS in high-risk distr and reduced gen pop RDT):
+htm_with_and_without_hss-2024-09-17T083150Z
+
+with reduced consumables logging
+htm_with_and_without_hss-2024-11-12T172503Z
+"""
+
 from pathlib import Path
 from tlo import Date
 

From 4838ee2d290b6a8fea843f2fd1d6165995b5303e Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 14:27:27 +0000
Subject: [PATCH 171/230] update discounting code to overcome pandas warning

---
 src/scripts/costing/cost_estimation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 85e82fba5c..64468c2964 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -43,7 +43,7 @@ def apply_discounting_to_cost_data(_df, _discount_rate=0):
     initial_year = min(_df['year'].unique())
 
     # Calculate the discounted values
-    _df['cost'] = _df['cost'] / ((1 + _discount_rate) ** (_df['year'] - initial_year))
+    _df.loc[:, 'cost'] = _df['cost'] / ((1 + _discount_rate) ** (_df['year'] - initial_year))
     return _df
 
 def estimate_input_cost_of_scenarios(results_folder: Path,

From 6ecb4225846009e52f22a69966a0b41c73265fb2 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 15:17:23 +0000
Subject: [PATCH 172/230] update the cost of IRS and bednets

---
 resources/costing/ResourceFile_Costing.xlsx   |  4 +--
 ...st_analysis_htm_with_and_without_hss_gf.py | 27 +++++++++++--------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 5295049142..ebbd097d37 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a43a39618e76c27f1eeb4874318960e9164ea4c26dfe3385b1d0bdb5101134ec
-size 4274907
+oid sha256:92117887e3aefcd990637d7470de1f93d8099d993efd2248090d58c3f58eb8ed
+size 4274449
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index c593e8d331..817c6a37f9 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -63,6 +63,7 @@
 # Check can read results from draw=0, run=0
 log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract)
 params = extract_params(results_folder)
+info = get_scenario_info(results_folder)
 
 # Declare default parameters for cost analysis
 #------------------------------------------------------------------------------------------------------------------
@@ -74,8 +75,8 @@
 district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
 
 # Period relevant for costing
-TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
-relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+TARGET_PERIOD= (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD]
 list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
 
 # Scenarios
@@ -85,13 +86,13 @@
 7: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 8: "HTM Programs Scale-up WITHOUT HSS PACKAGE",
 9: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 10: "HTM Programs Scale-up WITH SUPPLY CHAINS", 11: "HTM Programs Scale-up WITH HRH"}
 
-htm_scenarios_substitutedict_fcdo = {0:"0", 1: "A", 2: "B", 3: "C",
+htm_scenarios_substitutedict = {0:"0", 1: "A", 2: "B", 3: "C",
 4: "D", 5: "E", 6: "F",
 7: "G", 8: "H", 9: "I",
 10: "J", 11: "K"}
 
 # Subset of scenarios included in analysis
-htm_scenarios_for_gf_report = [0,1,2,3,4,5,6,7,8,9,10,11]
+htm_scenarios_for_report = list(range(0,12))
 
 color_map = {
     'Baseline': '#9e0142',
@@ -249,14 +250,13 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
 # Estimate standard input costs of scenario
 #-----------------------------------------------------------------------------------------------------------------------
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath,
-                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _years= list_of_relevant_years_for_costing, cost_only_used_staff= True,
                                                _discount_rate = discount_rate)
-# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+
 # TODO Remove the manual fix below once the logging for these is corrected
+# Post-run fixes to costs due to challenges with calibration
 input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
     input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
-input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
-    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
 #input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
 
 # Add additional costs pertaining to simulation (Only for scenarios with Malaria scale-up)
@@ -298,7 +298,9 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
                               'Mwanza', 'Likoma', 'Nkhotakota']
 # Convert above list of district names to numeric district identifiers
 district_keys_with_irs_scaleup = [key for key, name in district_dict.items() if name in districts_with_irs_scaleup]
-TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2035, 12, 31))
+year_of_malaria_scaleup_start = list_of_draws_with_malaria_scaleup_parameters.loc[:,'value'].reset_index()['value'][0]
+final_year_for_costing = max(list_of_relevant_years_for_costing)
+TARGET_PERIOD_MALARIA_SCALEUP = (Date(year_of_malaria_scaleup_start, 1, 1), Date(final_year_for_costing, 12, 31))
 
 # Get population by district
 def get_total_population_by_district(_df):
@@ -348,7 +350,11 @@ def get_number_of_people_covered_by_malaria_scaleup(_df, list_of_districts_cover
 # 2. Bednet costs
 bednet_coverage_rate = 0.7
 # We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people.
-unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)'] * (1 + supply_chain_cost_proportion)
+inflation_2011_to_2023 = 1.35
+unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)'] + (8.27 - 3.36) * inflation_2011_to_2023
+# Stelmach et al Tanzania https://pmc.ncbi.nlm.nih.gov/articles/PMC6169190/#_ad93_ (Price in 2011 USD) - This cost includes non-consumable costs - personnel, equipment, fuel, logistics and planning, shipping. The cost is measured per net distributed
+# Note that the cost per net of $3.36 has been replaced with a cost of Malawi Kwacha 667 (2023) as per the Central Medical Stores Trust sales catalogue
+
 # We add supply chain costs (procurement + distribution + warehousing) because the unit_cost does not include this
 annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3
 bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person
@@ -383,7 +389,6 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
     new_df = melt_and_label_malaria_scaleup_cost(df, label)
     input_costs = pd.concat([input_costs, new_df], ignore_index=True)
 
-# TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this
 # Extract input_costs for browsing
 input_costs.groupby(['draw', 'run', 'cost_category', 'cost_subcategory', 'cost_subgroup','year'])['cost'].sum().to_csv(figurespath / 'cost_detailed.csv')
 

From 856bd34918ada02b3ec6eee1c7d43cee1ba1b45f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 16:07:24 +0000
Subject: [PATCH 173/230] update dict for figures

---
 ...st_analysis_htm_with_and_without_hss_gf.py | 66 +++++--------------
 1 file changed, 16 insertions(+), 50 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index 817c6a37f9..bd84b00123 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -396,10 +396,9 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
 # Return on Invesment analysis
 # Calculate incremental cost
 # -----------------------------------------------------------------------------------------------------------------------
-# Aggregate input costs for further analysis
+# Aggregate input costs for further analysis (this step is needed because the malaria specific scale-up costs start from the year or malaria scale-up implementation)
 input_costs_subset = input_costs[
     (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
-# TODO the above step may not longer be needed
 total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
 total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
 def find_difference_relative_to_comparison(_ser: pd.Series,
@@ -425,7 +424,7 @@ def find_difference_relative_to_comparison(_ser: pd.Series,
 
 # Keep only scenarios of interest
 incremental_scenario_cost = incremental_scenario_cost[
-    incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+    incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_report)]
 
 # Monetary value of health impact
 # -----------------------------------------------------------------------------------------------------------------------
@@ -434,7 +433,7 @@ def get_num_dalys(_df):
     Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
     results from runs that crashed mid-way through the simulation.
     """
-    years_needed = relevant_period_for_costing  # [i.year for i in TARGET_PERIOD_INTERVENTION]
+    years_needed = relevant_period_for_costing
     assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
     _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1)
 
@@ -461,7 +460,7 @@ def get_num_dalys(_df):
                              num_dalys.loc[0],
                              comparison=0)  # sets the comparator to 0 which is the Actual scenario
                      ).T.iloc[0].unstack(level='run'))
-num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(htm_scenarios_for_report)]
 
 # The monetary value of the health benefit is delta health times CET (negative values are set to 0)
 def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year):
@@ -472,20 +471,8 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # 3. Return on Investment Plot
 # ----------------------------------------------------
-# Plot ROI at various levels of cost
-generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet),
-                   _incremental_input_cost=incremental_scenario_cost,
-                   _scenario_dict = htm_scenarios,
-                   _outputfilepath=roi_outputs_folder,
-                   _value_of_life_suffix = 'CET')
-
-generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
-                   _incremental_input_cost=incremental_scenario_cost,
-                   _scenario_dict = htm_scenarios,
-                   _outputfilepath=roi_outputs_folder,
-                   _value_of_life_suffix = 'VSL')
-
 # Combined ROI plot of relevant scenarios
+# HTM scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [1,8,9,10,11],
@@ -493,6 +480,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'all_HTM_VSL')
 
+# HIV scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [2,3],
@@ -500,6 +488,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'HIV_VSL')
 
+# TB scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [4,5],
@@ -507,6 +496,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'TB_VSL')
 
+# Malaria scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [6,7],
@@ -520,7 +510,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
     lower=0.0)  # monetary value - change in costs
 max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
 max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
-    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
+    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_report)]
 
 # Plot Maximum ability to pay
 name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
@@ -538,30 +528,6 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
-# 4. Plot Maximum ability-to-pay at VSL
-# ----------------------------------------------------
-max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life) - incremental_scenario_cost).clip(
-    lower=0.0)  # monetary value - change in costs
-max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation)
-max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[
-    max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)]
-
-# Plot Maximum ability to pay
-name_of_plot = f'Maximum ability to pay at VSL, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(
-    (max_ability_to_pay_for_implementation_summarized / 1e6),
-    annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
-        for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
-    ],
-    xticklabels_horizontal_and_wrapped=False,
-)
-ax.set_title(name_of_plot)
-ax.set_ylabel('Maximum ability to pay (at VSL) \n(Millions)')
-fig.tight_layout()
-fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
-plt.close(fig)
-
 # Plot incremental costs
 incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
 name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
@@ -581,7 +547,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # 4. Plot costs
 # ----------------------------------------------------
-input_costs_for_plot = input_costs[input_costs.draw.isin(htm_scenarios_for_gf_report)]
+input_costs_for_plot = input_costs_subset[input_costs_subset.draw.isin(htm_scenarios_for_report)]
 # First summarize all input costs
 input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
     mean=('cost', 'mean'),
@@ -595,9 +561,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
     value_name='cost'
 )
 
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict_fcdo)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)

From f8fb376b1adb571f2543f7e0ec8823160d59971b Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 18:25:39 +0000
Subject: [PATCH 174/230] update y_lim options for roi plot

---
 src/scripts/costing/cost_estimation.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 64468c2964..3fda58b808 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1020,7 +1020,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
                        _draws:None,
                        _scenario_dict: dict,
                        _outputfilepath: Path,
-                       _value_of_life_suffix = ''):
+                       _value_of_life_suffix = '',
+                       _y_axis_lim = None):
     # Calculate maximum ability to pay for implementation
     _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)]
     _incremental_input_cost =  _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)]
@@ -1101,7 +1102,10 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
         max_roi.append(max_val)
 
     # Set y-axis limit
-    ax.set_ylim(0, max(max_roi) * 1.25)
+    if _y_axis_lim == None:
+        ax.set_ylim(0, max(max_roi) * 1.25)
+    else:
+        ax.set_ylim(0, _y_axis_lim)
     ax.set_xlim(left = 0)
 
     plt.xlabel('Implementation cost, millions')

From 2d4b887a775d18bd5b8d606aabd8a935cf0ffc4a Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 19:22:38 +0000
Subject: [PATCH 175/230] add vertical lines at 0, 1b, 3b in combined ROI plot

---
 ...st_analysis_htm_with_and_without_hss_gf.py | 96 ++++++++++++++++++-
 src/scripts/costing/cost_estimation.py        | 31 +++++-
 2 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index bd84b00123..0de0c754f5 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -472,7 +472,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 # 3. Return on Investment Plot
 # ----------------------------------------------------
 # Combined ROI plot of relevant scenarios
-# HTM scenarios
+# HTM scenarios X 5
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [1,8,9,10,11],
@@ -480,6 +480,32 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'all_HTM_VSL')
 
+# HTM scenarios X 3
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [1,8,9],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HTM_full_HSS_VSL')
+
+# Only HSS
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [8,1],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HTM_full_HSS_VSL',
+                   _plot_vertical_lines=True)
+
+# HTM scenarios with HSS
+generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
+                   _incremental_input_cost=incremental_scenario_cost,
+                   _draws = [8,9],
+                   _scenario_dict = htm_scenarios,
+                   _outputfilepath=roi_outputs_folder,
+                   _value_of_life_suffix = 'HTM_full_HSS_VSL',
+                   _plot_vertical_lines=True)
+
 # HIV scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
@@ -494,7 +520,8 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [4,5],
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
-                   _value_of_life_suffix = 'TB_VSL')
+                   _value_of_life_suffix = 'TB_VSL',
+                   _y_axis_lim = 50 )
 
 # Malaria scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -538,6 +565,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
         for _, row in incremental_scenario_cost_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
+    put_labels_in_legend=True
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Cost \n(USD Millions)')
@@ -567,3 +595,67 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+
+'''
+# Population size across scenarios
+def get_total_population_by_age_range(_df):
+    years_needed = [i.year for i in TARGET_PERIOD] # we only consider the population for the malaria scale-up period
+    _df['year'] = pd.to_datetime(_df['date']).dt.year
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    _df = pd.melt(_df.drop(columns = 'date'), id_vars = ['year']).rename(columns = {'variable': 'age_range'})
+    return pd.Series(
+        data=_df
+        .loc[_df.year.between(*years_needed)]
+        .groupby('age_range')['value'].sum()
+    )
+
+male_population_by_age_range = summarize(extract_results(
+    results_folder,
+    module='tlo.methods.demography',
+    key='age_range_m',
+    custom_generate_series=get_total_population_by_age_range,
+    do_scaling=True
+))
+
+female_population_by_age_range = summarize(extract_results(
+    results_folder,
+    module='tlo.methods.demography',
+    key='age_range_f',
+    custom_generate_series=get_total_population_by_age_range,
+    do_scaling=True
+))
+
+# Plot male population (age 0-4)
+name_of_plot = f'Male population, 0-4 years, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (male_population_by_age_range[male_population_by_age_range.index.get_level_values('age_range') == '0-4'].sum()/1e6).unstack(),
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Population \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+
+# Plot female population (age 0-4)
+name_of_plot = f'Female population, 0-4 years, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (female_population_by_age_range[female_population_by_age_range.index.get_level_values('age_range') == '0-4'].sum()/1e6).unstack(),
+    xticklabels_horizontal_and_wrapped=False,
+)
+ax.set_title(name_of_plot)
+ax.set_ylabel('Population \n(Millions)')
+fig.tight_layout()
+fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+'''
+name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_bar_plot_with_ci(incremental_scenario_cost_summarized / 1e6)
+ax.set_title(name_of_plot)
+ax.set_ylabel('(Millions)')
+fig.tight_layout()
+fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+fig.show()
+plt.close(fig)
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 3fda58b808..c92b5c8eb8 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1021,7 +1021,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
                        _scenario_dict: dict,
                        _outputfilepath: Path,
                        _value_of_life_suffix = '',
-                       _y_axis_lim = None):
+                       _y_axis_lim = None,
+                      _plot_vertical_lines = False):
     # Calculate maximum ability to pay for implementation
     _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)]
     _incremental_input_cost =  _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)]
@@ -1032,6 +1033,9 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
 
     # Generate a list to store max ROI value to set ylim
     max_roi = []
+    roi_at_1billion = []
+    roi_at_3billion = []
+    roi_at_0 = []
 
     # Iterate over each draw in monetary_value_of_incremental_health
     for draw_index, row in _monetary_value_of_incremental_health.iterrows():
@@ -1041,6 +1045,9 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
 
         # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw
         implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50)
+        # Add fixed values for ROI ratio calculation
+        additional_costs = np.array([1_000_000_000, 3_000_000_000])
+        implementation_costs = np.sort(np.unique(np.concatenate([implementation_costs, additional_costs])))
 
         # Retrieve the corresponding row from incremental_scenario_cost for the same draw
         incremental_scenario_cost_row = _incremental_input_cost.loc[draw_index]
@@ -1101,6 +1108,28 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
         max_val = mean_values[~np.isinf(mean_values['roi'])]['roi'].max()
         max_roi.append(max_val)
 
+        roi_at_0_value = collapsed_data[(collapsed_data.implementation_cost == 0) & (collapsed_data.stat == 'mean')]['roi'].iloc[0]
+        roi_at_1billion_value = collapsed_data[(collapsed_data.implementation_cost == 1e+09) & (collapsed_data.stat == 'mean')]['roi'].iloc[0]
+        roi_at_3billion_value = collapsed_data[(collapsed_data.implementation_cost == 3e+09) & (collapsed_data.stat == 'mean')]['roi'].iloc[0]
+
+        roi_at_0.append(roi_at_0_value)
+        roi_at_1billion.append(roi_at_1billion_value)
+        roi_at_3billion.append(roi_at_3billion_value)
+
+    ratio_at_0 = max(roi_at_0)/min(roi_at_0)
+    ratio_at_1billion = max(roi_at_1billion)/min(roi_at_1billion)
+    ratio_at_3billion = max(roi_at_3billion) / min(roi_at_3billion)
+    roi_ratio = [ratio_at_0, ratio_at_1billion, ratio_at_3billion]
+
+    if _plot_vertical_lines == True:
+        # Present ratio of returns at different implementation costs
+        i = 0
+        for cost in [0, 1_000_000_000, 3_000_000_000]:
+            ax.axvline(x=cost / 1e6, color='black', linestyle='--', linewidth=1)
+            ax.text(cost / 1e6 + 400, ax.get_ylim()[1] * 0.9, f'At {cost / 1e6:.0f}M, ratio of ROI curves is {round(roi_ratio[i],1)}', color='black', fontsize=10, rotation=90,
+                    verticalalignment='top')
+            i = i + 1
+
     # Set y-axis limit
     if _y_axis_lim == None:
         ax.set_ylim(0, max(max_roi) * 1.25)

From 2b2b4d958c4a3c419f901aff44b8abe6b7467e15 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 19 Nov 2024 19:25:41 +0000
Subject: [PATCH 176/230] make vertical lines in ROI plots flexible

---
 ...st_analysis_htm_with_and_without_hss_gf.py |  4 +-
 src/scripts/costing/cost_estimation.py        | 49 +++++++++----------
 2 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index 0de0c754f5..fbf0fe505d 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -495,7 +495,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'HTM_full_HSS_VSL',
-                   _plot_vertical_lines=True)
+                   _plot_vertical_lines_at = [0, 1e9, 3e9])
 
 # HTM scenarios with HSS
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -504,7 +504,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'HTM_full_HSS_VSL',
-                   _plot_vertical_lines=True)
+                   _plot_vertical_lines_at = [0, 1e9, 3e9])
 
 # HIV scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index c92b5c8eb8..6041577eb3 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1022,7 +1022,7 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
                        _outputfilepath: Path,
                        _value_of_life_suffix = '',
                        _y_axis_lim = None,
-                      _plot_vertical_lines = False):
+                      _plot_vertical_lines_at: list = None):
     # Calculate maximum ability to pay for implementation
     _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)]
     _incremental_input_cost =  _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)]
@@ -1031,11 +1031,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
     # Create a figure and axis to plot all draws together
     fig, ax = plt.subplots(figsize=(10, 6))
 
-    # Generate a list to store max ROI value to set ylim
-    max_roi = []
-    roi_at_1billion = []
-    roi_at_3billion = []
-    roi_at_0 = []
+    # Store ROI values for specific costs
+    roi_at_costs = {cost: [] for cost in (_plot_vertical_lines_at or [])}
 
     # Iterate over each draw in monetary_value_of_incremental_health
     for draw_index, row in _monetary_value_of_incremental_health.iterrows():
@@ -1108,27 +1105,25 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
         max_val = mean_values[~np.isinf(mean_values['roi'])]['roi'].max()
         max_roi.append(max_val)
 
-        roi_at_0_value = collapsed_data[(collapsed_data.implementation_cost == 0) & (collapsed_data.stat == 'mean')]['roi'].iloc[0]
-        roi_at_1billion_value = collapsed_data[(collapsed_data.implementation_cost == 1e+09) & (collapsed_data.stat == 'mean')]['roi'].iloc[0]
-        roi_at_3billion_value = collapsed_data[(collapsed_data.implementation_cost == 3e+09) & (collapsed_data.stat == 'mean')]['roi'].iloc[0]
-
-        roi_at_0.append(roi_at_0_value)
-        roi_at_1billion.append(roi_at_1billion_value)
-        roi_at_3billion.append(roi_at_3billion_value)
-
-    ratio_at_0 = max(roi_at_0)/min(roi_at_0)
-    ratio_at_1billion = max(roi_at_1billion)/min(roi_at_1billion)
-    ratio_at_3billion = max(roi_at_3billion) / min(roi_at_3billion)
-    roi_ratio = [ratio_at_0, ratio_at_1billion, ratio_at_3billion]
-
-    if _plot_vertical_lines == True:
-        # Present ratio of returns at different implementation costs
-        i = 0
-        for cost in [0, 1_000_000_000, 3_000_000_000]:
-            ax.axvline(x=cost / 1e6, color='black', linestyle='--', linewidth=1)
-            ax.text(cost / 1e6 + 400, ax.get_ylim()[1] * 0.9, f'At {cost / 1e6:.0f}M, ratio of ROI curves is {round(roi_ratio[i],1)}', color='black', fontsize=10, rotation=90,
-                    verticalalignment='top')
-            i = i + 1
+        # Capture ROI at specific costs
+        if _plot_vertical_lines_at:
+            for cost in _plot_vertical_lines_at:
+                roi_value = collapsed_data[
+                    (collapsed_data.implementation_cost == cost) &
+                    (collapsed_data.stat == 'mean')
+                    ]['roi']
+                if not roi_value.empty:
+                    roi_at_costs[cost].append(roi_value.iloc[0])
+
+        # Calculate and annotate ROI ratios
+    if _plot_vertical_lines_at:
+        for cost in _plot_vertical_lines_at:
+            if cost in roi_at_costs:
+                ratio = max(roi_at_costs[cost]) / min(roi_at_costs[cost])
+                ax.axvline(x=cost / 1e6, color='black', linestyle='--', linewidth=1)
+                ax.text(cost / 1e6 + 400, ax.get_ylim()[1] * 0.9,
+                        f'At {cost / 1e6:.0f}M, ROI ratio = {round(ratio, 2)}',
+                        color='black', fontsize=10, rotation=90, verticalalignment='top')
 
     # Set y-axis limit
     if _y_axis_lim == None:

From 2f1e036cdb3f5dabccddb5d330f7669ab695bd9f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 20 Nov 2024 18:55:08 +0000
Subject: [PATCH 177/230] add a way to estimate projected health spending and
 include it in ROI plots in the X-tick labels

---
 .../cost_analysis_hss_elements_gf.py          |  78 +++++------
 ...st_analysis_htm_with_and_without_hss_gf.py |  43 ++++---
 src/scripts/costing/cost_estimation.py        | 121 +++++++++++++++++-
 3 files changed, 175 insertions(+), 67 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
index 20d3ca6ab5..8178d77af1 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
@@ -30,7 +30,8 @@
                                              summarize_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
-                                             generate_multiple_scenarios_roi_plot)
+                                             generate_multiple_scenarios_roi_plot,
+                                             estimate_projected_health_spending)
 
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -80,13 +81,10 @@
     'HRH Scale-up Following Historical Growth': '#f46d43',
     'HRH Accelerated Scale-up (6%)': '#fdae61',
     'Increase Capacity at Primary Care Levels': '#fee08b',
-    'Increase Capacity of CHW': '#ffffbf',
     'Consumables Increased to 75th Percentile': '#d9ef8b',
     'Consumables Available at HIV levels': '#a6d96a',
     'Consumables Available at EPI levels': '#66bd63',
-    'Perfect Consumables Availability': '#1a9850',
-    'HSS PACKAGE: Perfect': '#3288bd',
-    'HSS PACKAGE: Realistic expansion': '#5e4fa2'
+    'HSS PACKAGE: Realistic expansion': '#3288bd'
 }
 
 # Cost-effectiveness threshold
@@ -206,11 +204,13 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
     if put_labels_in_legend:
         # Update xticks label with substitute labels
         # Insert legend with updated labels that shows correspondence between substitute label and original label
+        # Use htm_scenarios for the legend
+        xtick_legend = [f'{letter}: {hss_scenarios.get(label, label)}' for letter, label in zip(substitute_labels, xticks.values())]
         xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
-        xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())]
+
         h, legs = ax.get_legend_handles_labels()
         ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
-        ax.set_xticklabels(list(xtick_values))
+        ax.set_xticklabels(xtick_values)
     else:
         if not xticklabels_horizontal_and_wrapped:
             # xticklabels will be vertical and not wrapped
@@ -222,6 +222,7 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
     ax.grid(axis="y")
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
+    #fig.tight_layout()
     fig.tight_layout(pad=2.0)
     plt.subplots_adjust(left=0.15, right=0.85)  # Adjust left and right margins
 
@@ -236,8 +237,8 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
 # TODO Remove the manual fix below once the logging for these is corrected
 input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
     input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
-input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
-    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
+#input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\
+#    input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7
 #input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate)
 
 # %%
@@ -319,6 +320,13 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # 3. Return on Investment Plot
 # ----------------------------------------------------
+projected_health_spending = estimate_projected_health_spending(resourcefilepath,
+                                  results_folder,
+                                 _years = list_of_relevant_years_for_costing,
+                                 _discount_rate = discount_rate,
+                                 _summarize = True)
+projected_health_spending_baseline = projected_health_spending[projected_health_spending.index.get_level_values(0) == 0]['mean'][0]
+
 # FCDO
 # Combined ROI plot of relevant scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -326,7 +334,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [1,2,3,4],
                    _scenario_dict = hss_scenarios,
                    _outputfilepath=roi_outputs_folder_fcdo,
-                   _value_of_life_suffix = 'HR_VSL')
+                   _value_of_life_suffix = 'HR_VSL',
+                    _year_suffix= f' ({str(relevant_period_for_costing[0])} - {str(relevant_period_for_costing[1])})',
+                    _projected_health_spending = projected_health_spending_baseline)
 
 # Combined ROI plot of relevant scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -334,7 +344,10 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [5,6,7],
                    _scenario_dict = hss_scenarios,
                    _outputfilepath=roi_outputs_folder_fcdo,
-                   _value_of_life_suffix = 'Consumables_VSL')
+                   _value_of_life_suffix = 'Consumables_VSL',
+                   _y_axis_lim= 100,
+                    _year_suffix= f' ({str(relevant_period_for_costing[0])} - {str(relevant_period_for_costing[1])})',
+                    _projected_health_spending = projected_health_spending_baseline)
 
 # Combined ROI plot of relevant scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -342,32 +355,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [8],
                    _scenario_dict = hss_scenarios,
                    _outputfilepath=roi_outputs_folder_fcdo,
-                   _value_of_life_suffix = 'HSS_VSL')
-
-# Global Fund
-# Combined ROI plot of relevant scenarios
-generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
-                   _incremental_input_cost=incremental_scenario_cost,
-                   _draws = [1,2,3,4],
-                   _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder_gf,
-                   _value_of_life_suffix = 'HR_VSL')
-
-# Combined ROI plot of relevant scenarios
-generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
-                   _incremental_input_cost=incremental_scenario_cost,
-                   _draws = [5,6,7],
-                   _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder_gf,
-                   _value_of_life_suffix = 'Consumables_VSL')
-
-# Combined ROI plot of relevant scenarios
-generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
-                   _incremental_input_cost=incremental_scenario_cost,
-                   _draws = [8],
-                   _scenario_dict = hss_scenarios,
-                   _outputfilepath=roi_outputs_folder_gf,
-                   _value_of_life_suffix = 'HSS_VSL')
+                   _value_of_life_suffix = 'HSS_VSL',
+                    _year_suffix= f' ({str(relevant_period_for_costing[0])} - {str(relevant_period_for_costing[1])})',
+                    _projected_health_spending = projected_health_spending_baseline)
 
 # 4. Plot Maximum ability-to-pay at CET
 # ----------------------------------------------------
@@ -456,7 +446,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 # 4. Plot costs
 # ----------------------------------------------------
 # FCDO
-input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_report)]
+input_costs_for_plot = input_costs_subset[input_costs_subset.draw.isin(hss_scenarios_for_report)]
 # First summarize all input costs
 input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
     mean=('cost', 'mean'),
@@ -471,14 +461,14 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 )
 
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment',  _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other',  _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict)
 
 # Global Fund
-input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_report)]
+input_costs_for_plot = input_costs_subset[input_costs_subset.draw.isin(hss_scenarios_for_report)]
 # First summarize all input costs
 input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg(
     mean=('cost', 'mean'),
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index fbf0fe505d..bec0f40f7d 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -38,7 +38,8 @@
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
                                              generate_roi_plots,
-                                             generate_multiple_scenarios_roi_plot)
+                                             generate_multiple_scenarios_roi_plot,
+                                             estimate_projected_health_spending)
 
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -75,7 +76,7 @@
 district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
 
 # Period relevant for costing
-TARGET_PERIOD= (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+TARGET_PERIOD= (Date(2027, 1, 1), Date(2029, 12, 31))  # This is the period that is costed
 relevant_period_for_costing = [i.year for i in TARGET_PERIOD]
 list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
 
@@ -226,11 +227,13 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
     if put_labels_in_legend:
         # Update xticks label with substitute labels
         # Insert legend with updated labels that shows correspondence between substitute label and original label
+        # Use htm_scenarios for the legend
+        xtick_legend = [f'{letter}: {htm_scenarios.get(label, label)}' for letter, label in zip(substitute_labels, xticks.values())]
         xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
-        xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())]
+
         h, legs = ax.get_legend_handles_labels()
         ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
-        ax.set_xticklabels(list(xtick_values))
+        ax.set_xticklabels(xtick_values)
     else:
         if not xticklabels_horizontal_and_wrapped:
             # xticklabels will be vertical and not wrapped
@@ -242,6 +245,7 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
     ax.grid(axis="y")
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
+    #fig.tight_layout()
     fig.tight_layout(pad=2.0)
     plt.subplots_adjust(left=0.15, right=0.85)  # Adjust left and right margins
 
@@ -471,6 +475,13 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 
 # 3. Return on Investment Plot
 # ----------------------------------------------------
+projected_health_spending = estimate_projected_health_spending(resourcefilepath,
+                                  results_folder,
+                                 _years = list_of_relevant_years_for_costing,
+                                 _discount_rate = discount_rate,
+                                 _summarize = True)
+projected_health_spending_baseline = projected_health_spending[projected_health_spending.index.get_level_values(0) == 0]['mean'][0]
+
 # Combined ROI plot of relevant scenarios
 # HTM scenarios X 5
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -495,7 +506,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'HTM_full_HSS_VSL',
-                   _plot_vertical_lines_at = [0, 1e9, 3e9])
+                   _plot_vertical_lines_at = [0, 1e9, 3e9],
+                    _year_suffix= f' ({str(relevant_period_for_costing[0])} - {str(relevant_period_for_costing[1])})',
+                    _projected_health_spending = projected_health_spending_baseline)
 
 # HTM scenarios with HSS
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -504,7 +517,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'HTM_full_HSS_VSL',
-                   _plot_vertical_lines_at = [0, 1e9, 3e9])
+                   _plot_vertical_lines_at = [0, 1e9, 3e9],
+                    _year_suffix= f' ({str(relevant_period_for_costing[0])}- {str(relevant_period_for_costing[1])})',
+                   _projected_health_spending = projected_health_spending_baseline)
 
 # HIV scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -548,6 +563,8 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
         for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
+    put_labels_in_legend=True,
+    offset=0.5,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
@@ -565,7 +582,8 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
         for _, row in incremental_scenario_cost_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
-    put_labels_in_legend=True
+    put_labels_in_legend=True,
+    offset=0.5,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Cost \n(USD Millions)')
@@ -589,7 +607,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
     value_name='cost'
 )
 
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025],  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables',  _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict)
@@ -651,11 +669,4 @@ def get_total_population_by_age_range(_df):
 plt.close(fig)
 
 '''
-name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_bar_plot_with_ci(incremental_scenario_cost_summarized / 1e6)
-ax.set_title(name_of_plot)
-ax.set_ylabel('(Millions)')
-fig.tight_layout()
-fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-fig.show()
-plt.close(fig)
+
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 6041577eb3..18f1227400 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -692,6 +692,92 @@ def summarize_cost_data(_df):
     collapsed_df = collapsed_df.unstack(level='stat')
     return collapsed_df
 
+# Estimate projected health spending
+####################################################
+def estimate_projected_health_spending(resourcefilepath: Path = None,
+                                      results_folder: Path =  None,
+                                     _draws = None, _runs = None,
+                                     _years = None,
+                                     _discount_rate = 0,
+                                     _summarize = False):
+    # %% Gathering basic information
+    # Load basic simulation parameters
+    #-------------------------------------
+    log = load_pickled_dataframes(results_folder, 0, 0)  # read from 1 draw and run
+    info = get_scenario_info(results_folder)  # get basic information about the results
+    if _draws is None:
+        _draws = range(0, info['number_of_draws'])
+    if _runs is None:
+        _runs = range(0, info['runs_per_draw'])
+    final_year_of_simulation = max(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
+    first_year_of_simulation = min(log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date']).year
+    if _years == None:
+        _years = list(range(first_year_of_simulation, final_year_of_simulation + 1))
+
+    # Load health spending per capita projections
+    #----------------------------------------
+    # Load health spending projections
+    workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
+                                        sheet_name = None)
+    health_spending_per_capita = workbook_cost["health_spending_projections"]
+    # Assign the fourth row as column names
+    health_spending_per_capita.columns = health_spending_per_capita.iloc[1]
+    health_spending_per_capita = health_spending_per_capita.iloc[2:].reset_index(drop = True)
+    health_spending_per_capita = health_spending_per_capita[health_spending_per_capita.year.isin(list(range(2015,2041)))]
+    total_health_spending_per_capita_mean = health_spending_per_capita[['year', 'total_mean']].set_index('year')
+
+    # Load population projections
+    # ----------------------------------------
+    def get_total_population(_df):
+        years_needed = [min(_years), max(_years)] # we only consider the population for the malaria scale-up period
+        # because those are the years relevant for malaria scale-up costing
+        _df['year'] = pd.to_datetime(_df['date']).dt.year
+        _df = _df[['year', 'total']]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return pd.Series(_df.loc[_df.year.between(*years_needed)].set_index('year')['total'])
+
+    total_population_by_year = extract_results(
+        results_folder,
+        module='tlo.methods.demography',
+        key='population',
+        custom_generate_series=get_total_population,
+        do_scaling=True
+    )
+    population_columns = total_population_by_year.columns
+
+    # Estimate total health spending
+    projected_health_spending = pd.merge(total_health_spending_per_capita_mean,
+                    total_population_by_year,
+                    left_index=True, right_index=True,how='inner')
+    projected_health_spending = projected_health_spending.apply(pd.to_numeric, errors='coerce')
+    projected_health_spending[population_columns] = projected_health_spending[population_columns].multiply(
+        projected_health_spending['total_mean'], axis=0)
+    projected_health_spending = projected_health_spending[population_columns]
+
+    # Apply discount rate
+    # Initial year and discount rate
+    initial_year = min(projected_health_spending.index.get_level_values('year').unique())
+    # Discount factor calculation
+    discount_factors = (1 + _discount_rate) ** (projected_health_spending.index.get_level_values('year') - initial_year)
+    # Apply the discount to the specified columns
+    projected_health_spending.loc[:, population_columns] = (
+        projected_health_spending[population_columns].div(discount_factors, axis=0))
+    # add across years
+    projected_health_spending = projected_health_spending.sum(axis = 0)
+    projected_health_spending.index = pd.MultiIndex.from_tuples(projected_health_spending.index, names=["draw", "run"])
+
+    if _summarize == True:
+        # Calculate the mean and 95% confidence intervals for each group
+        projected_health_spending = projected_health_spending.groupby(level="draw").agg(
+            mean=np.mean,
+            lower=lambda x: np.percentile(x, 2.5),
+            upper=lambda x: np.percentile(x, 97.5)
+        )
+        # Flatten the resulting DataFrame into a single-level MultiIndex Series
+        projected_health_spending = projected_health_spending.stack().rename_axis(["draw", "stat"]).rename("value")
+
+    return projected_health_spending.unstack()
+
 # Plot costs
 ####################################################
 # 1. Stacked bar plot (Total cost + Cost categories)
@@ -763,7 +849,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
         labels = pivot_df.index.astype(str)
 
     # Wrap x-tick labels for readability
-    wrapped_labels = [textwrap.fill(label, 20) for label in labels]
+    wrapped_labels = [textwrap.fill(str(label), 20) for label in labels]
     ax.set_xticklabels(wrapped_labels, rotation=45, ha='right')
 
     # Period included for plot title and name
@@ -1022,7 +1108,9 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
                        _outputfilepath: Path,
                        _value_of_life_suffix = '',
                        _y_axis_lim = None,
-                      _plot_vertical_lines_at: list = None):
+                      _plot_vertical_lines_at: list = None,
+                      _year_suffix = '',
+                      _projected_health_spending = None):
     # Calculate maximum ability to pay for implementation
     _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)]
     _incremental_input_cost =  _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)]
@@ -1032,6 +1120,7 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
     fig, ax = plt.subplots(figsize=(10, 6))
 
     # Store ROI values for specific costs
+    max_roi = []
     roi_at_costs = {cost: [] for cost in (_plot_vertical_lines_at or [])}
 
     # Iterate over each draw in monetary_value_of_incremental_health
@@ -1115,16 +1204,34 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
                 if not roi_value.empty:
                     roi_at_costs[cost].append(roi_value.iloc[0])
 
-        # Calculate and annotate ROI ratios
+    # Calculate and annotate ROI ratios
     if _plot_vertical_lines_at:
         for cost in _plot_vertical_lines_at:
             if cost in roi_at_costs:
                 ratio = max(roi_at_costs[cost]) / min(roi_at_costs[cost])
                 ax.axvline(x=cost / 1e6, color='black', linestyle='--', linewidth=1)
-                ax.text(cost / 1e6 + 400, ax.get_ylim()[1] * 0.9,
-                        f'At {cost / 1e6:.0f}M, ROI ratio = {round(ratio, 2)}',
+                ax.text(cost / 1e6 + 200, ax.get_ylim()[1] * 0.9,
+                        f'At {cost / 1e6:.0f}M, ratio of ROI curves = {round(ratio, 2)}',
                         color='black', fontsize=10, rotation=90, verticalalignment='top')
 
+    # Define fixed x-tick positions with a gap of 2000
+    step_size = (ax.get_xlim()[1] - 0)/5
+    xticks = np.arange(0, ax.get_xlim()[1] + 1, int(round(step_size, -3)))  # From 0 to max x-limit with 5 steps
+    # Get labels
+    xtick_labels = [f'{tick:.0f}M' for tick in xticks]  # Default labels for all ticks
+
+    # Replace specific x-ticks with % of health spending values
+    if _projected_health_spending:
+        for i, tick in enumerate(xticks):
+            if tick == 2000:  # Replace for 2000
+                xtick_labels[i] = f'{tick:.0f}M\n({tick / (_projected_health_spending/1e6) :.2%} of \n projected total \n health spend)'
+            elif tick == 4000:  # Replace for 4000
+                xtick_labels[i] = f'{tick:.0f}M\n({tick / (_projected_health_spending/1e6) :.2%} of \n projected total \n health spend)'
+
+        # Update the x-ticks and labels
+        ax.set_xticks(xticks)
+        ax.set_xticklabels(xtick_labels, fontsize=7)
+
     # Set y-axis limit
     if _y_axis_lim == None:
         ax.set_ylim(0, max(max_roi) * 1.25)
@@ -1134,12 +1241,12 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
 
     plt.xlabel('Implementation cost, millions')
     plt.ylabel('Return on Investment')
-    plt.title('Return on Investment at different levels of implementation cost')
+    plt.title(f'Return on Investment at different levels of implementation cost{_year_suffix}')
 
     # Show legend
     plt.legend()
     # Save
-    plt.savefig(_outputfilepath / f'draws_{_draws}_ROI_at_{_value_of_life_suffix}.png', dpi=100,
+    plt.savefig(_outputfilepath / f'draws_{_draws}_ROI_at_{_value_of_life_suffix}_{_year_suffix}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 

From 4593d3006f62cc93098a5c9ea816c17db501213d Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 20 Nov 2024 18:55:22 +0000
Subject: [PATCH 178/230] add health spending projections assumptions

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index ebbd097d37..6ba29eeb6f 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92117887e3aefcd990637d7470de1f93d8099d993efd2248090d58c3f58eb8ed
-size 4274449
+oid sha256:cd9947ea1d634d9fdff8aa1cde0717361c83c64371f6db95aa1b1fcfb930f8c6
+size 4285545

From dc46b637e023e4ad3a29a366d87336b45798b37e Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 20 Nov 2024 18:58:06 +0000
Subject: [PATCH 179/230] add projected health spending to all xticklabels in
 ROI plots

---
 src/scripts/costing/cost_estimation.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 18f1227400..7339de4d37 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1222,11 +1222,10 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
 
     # Replace specific x-ticks with % of health spending values
     if _projected_health_spending:
+        xtick_labels[1] = f'{xticks[1]:.0f}M\n({xticks[1] / (_projected_health_spending / 1e6) :.2%} of \n projected total \n health spend)'
         for i, tick in enumerate(xticks):
-            if tick == 2000:  # Replace for 2000
-                xtick_labels[i] = f'{tick:.0f}M\n({tick / (_projected_health_spending/1e6) :.2%} of \n projected total \n health spend)'
-            elif tick == 4000:  # Replace for 4000
-                xtick_labels[i] = f'{tick:.0f}M\n({tick / (_projected_health_spending/1e6) :.2%} of \n projected total \n health spend)'
+            if (i != 0) & (i != 1):  # Replace for 4000
+                xtick_labels[i] = f'{tick:.0f}M\n({tick / (_projected_health_spending/1e6) :.2%})'
 
         # Update the x-ticks and labels
         ax.set_xticks(xticks)

From 7853cce3f93c04462ce8e4871f6396eea2f9c3cf Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 20 Nov 2024 19:08:49 +0000
Subject: [PATCH 180/230] add proportion of health spend as data labels on bar
 plots

---
 .../cost_analysis_hss_elements_gf.py          | 46 +++++--------------
 ...st_analysis_htm_with_and_without_hss_gf.py |  9 +++-
 src/scripts/costing/cost_estimation.py        |  2 +-
 3 files changed, 19 insertions(+), 38 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
index 8178d77af1..3b43e7e3c3 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
@@ -219,6 +219,11 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
             wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
             ax.set_xticklabels(wrapped_labs)
 
+    # Extend ylim to accommodate data labels
+    ymin, ymax = ax.get_ylim()
+    extension = 0.1 * (ymax - ymin) # 10% of range
+    ax.set_ylim(ymin - extension, ymax + extension) # Set new y-axis limits with the extended range
+
     ax.grid(axis="y")
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
@@ -375,10 +380,12 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized_fcdo / 1e6),
     annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in max_ability_to_pay_for_implementation_summarized_fcdo.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
+    put_labels_in_legend=True,
+    offset=0.5,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
@@ -386,23 +393,6 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
-# Global Fund
-# Plot Maximum ability to pay
-name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_standard_bar_plot_with_ci(
-    (max_ability_to_pay_for_implementation_summarized_gf / 1e6),
-    annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
-        for _, row in max_ability_to_pay_for_implementation_summarized_gf.iterrows()
-    ],
-    xticklabels_horizontal_and_wrapped=False,
-)
-ax.set_title(name_of_plot)
-ax.set_ylabel('Maximum ability to pay \n(Millions)')
-fig.tight_layout()
-fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', ''))
-plt.close(fig)
-
 # Plot incremental costs
 incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost)
 # Keep only scenarios of interest
@@ -416,10 +406,12 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (incremental_scenario_cost_summarized_fcdo / 1e6),
     annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in incremental_scenario_cost_summarized_fcdo.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
+    put_labels_in_legend=True,
+    offset=0.5,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Cost \n(USD Millions)')
@@ -427,22 +419,6 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
 
-# Global Fund
-name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
-fig, ax = do_standard_bar_plot_with_ci(
-    (incremental_scenario_cost_summarized_gf / 1e6),
-    annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
-        for _, row in incremental_scenario_cost_summarized_gf.iterrows()
-    ],
-    xticklabels_horizontal_and_wrapped=False,
-)
-ax.set_title(name_of_plot)
-ax.set_ylabel('Cost \n(USD Millions)')
-fig.tight_layout()
-fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', ''))
-plt.close(fig)
-
 # 4. Plot costs
 # ----------------------------------------------------
 # FCDO
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index bec0f40f7d..150a646b94 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -242,6 +242,11 @@ def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None,
             wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
             ax.set_xticklabels(wrapped_labs)
 
+    # Extend ylim to accommodate data labels
+    ymin, ymax = ax.get_ylim()
+    extension = 0.1 * (ymax - ymin) # 10% of range
+    ax.set_ylim(ymin - extension, ymax + extension) # Set new y-axis limits with the extended range
+
     ax.grid(axis="y")
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
@@ -559,7 +564,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
@@ -578,7 +583,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (incremental_scenario_cost_summarized / 1e6),
     annotations=[
-        f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in incremental_scenario_cost_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 7339de4d37..3fcf67c726 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1229,7 +1229,7 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
 
         # Update the x-ticks and labels
         ax.set_xticks(xticks)
-        ax.set_xticklabels(xtick_labels, fontsize=7)
+        ax.set_xticklabels(xtick_labels, fontsize=10)
 
     # Set y-axis limit
     if _y_axis_lim == None:

From 472f9d7732f8dec7ad9cc14c0353ac1e32824985 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 21 Nov 2024 12:21:37 +0000
Subject: [PATCH 181/230] change output folder for cost_validation

---
 src/scripts/costing/costing_validation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 3925bb88df..8c2ffc4b2b 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -74,7 +74,7 @@
 # Load result files
 resourcefilepath = Path("./resources")
 outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-22T163743Z.py', outputfilepath)[0]
+results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0]
 
 # Estimate costs for 2018
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _years = [2018], _draws = [0], summarize = True, cost_only_used_staff=False)
@@ -151,7 +151,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 condoms = [2,25]
 tb_tests = [184,187, 175]
 other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
-              - set(condoms) - set(tb_tests) - {3}
+              - set(condoms) - set(tb_tests)# - {3}
 # TODO once the quantity dispensed of Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly is fixed we no lnger have to adjust for Item_code 3
 
 # Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
@@ -208,7 +208,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 costing_outputs_folder = Path('./outputs/costing')
 if not os.path.exists(costing_outputs_folder):
     os.makedirs(costing_outputs_folder)
-figurespath = costing_outputs_folder / "figures"
+figurespath = costing_outputs_folder / "figures_post_cons_fix"
 if not os.path.exists(figurespath):
     os.makedirs(figurespath)
 calibration_outputs_folder = Path(figurespath / 'calibration')

From 862a09298a39ca86d6e15ad12a47031b63480715 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 21 Nov 2024 17:26:35 +0000
Subject: [PATCH 182/230] discount_rate in the estimate_cost function used
 incorrectly

---
 src/scripts/costing/cost_estimation.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 38d37461d8..ee11c5b30a 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -94,7 +94,7 @@ def melt_model_output_draws_and_runs(_df, id_vars):
 
     # Overall cost assumptions
     TARGET_PERIOD = (Date(first_year_of_simulation, 1, 1), Date(final_year_of_simulation, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
-    discount_rate = 0.03
+    discount_rate = _discount_rate
 
     # Read all cost parameters
     #---------------------------------------
@@ -717,18 +717,19 @@ def estimate_projected_health_spending(resourcefilepath: Path = None,
     #----------------------------------------
     # Load health spending projections
     workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"),
-                                        sheet_name = None)
+                                  sheet_name=None)
     health_spending_per_capita = workbook_cost["health_spending_projections"]
     # Assign the fourth row as column names
     health_spending_per_capita.columns = health_spending_per_capita.iloc[1]
-    health_spending_per_capita = health_spending_per_capita.iloc[2:].reset_index(drop = True)
-    health_spending_per_capita = health_spending_per_capita[health_spending_per_capita.year.isin(list(range(2015,2041)))]
+    health_spending_per_capita = health_spending_per_capita.iloc[2:].reset_index(drop=True)
+    health_spending_per_capita = health_spending_per_capita[
+        health_spending_per_capita.year.isin(list(range(2015, 2041)))]
     total_health_spending_per_capita_mean = health_spending_per_capita[['year', 'total_mean']].set_index('year')
 
     # Load population projections
     # ----------------------------------------
     def get_total_population(_df):
-        years_needed = [min(_years), max(_years)] # we only consider the population for the malaria scale-up period
+        years_needed = [min(_years), max(_years)]  # we only consider the population for the malaria scale-up period
         # because those are the years relevant for malaria scale-up costing
         _df['year'] = pd.to_datetime(_df['date']).dt.year
         _df = _df[['year', 'total']]

From 43f50db028e8c59eb130a955509f1519be954fc4 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 21 Nov 2024 17:26:47 +0000
Subject: [PATCH 183/230] minor fixes to plots

---
 .../gf_analyses/cost_analysis_hss_elements_gf.py  | 12 +++++++-----
 .../cost_analysis_htm_with_and_without_hss_gf.py  | 15 ++++++++++-----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
index 3b43e7e3c3..7d3274343d 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_hss_elements_gf.py
@@ -350,7 +350,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _scenario_dict = hss_scenarios,
                    _outputfilepath=roi_outputs_folder_fcdo,
                    _value_of_life_suffix = 'Consumables_VSL',
-                   _y_axis_lim= 100,
+                   _y_axis_lim= 50,
                     _year_suffix= f' ({str(relevant_period_for_costing[0])} - {str(relevant_period_for_costing[1])})',
                     _projected_health_spending = projected_health_spending_baseline)
 
@@ -380,15 +380,16 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized_fcdo / 1e6),
     annotations=[
-        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}- {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in max_ability_to_pay_for_implementation_summarized_fcdo.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
     put_labels_in_legend=True,
-    offset=0.5,
+    offset=50,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
+ax.set_ylim(bottom=0)
 fig.tight_layout()
 fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
@@ -406,15 +407,16 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (incremental_scenario_cost_summarized_fcdo / 1e6),
     annotations=[
-        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}- {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in incremental_scenario_cost_summarized_fcdo.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
     put_labels_in_legend=True,
-    offset=0.5,
+    offset=50,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Cost \n(USD Millions)')
+ax.set_ylim(bottom=0)
 fig.tight_layout()
 fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index 150a646b94..42d495adc1 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -532,6 +532,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [2,3],
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
+                   _year_suffix=f' ({str(relevant_period_for_costing[0])}- {str(relevant_period_for_costing[1])})',
                    _value_of_life_suffix = 'HIV_VSL')
 
 # TB scenarios
@@ -540,8 +541,9 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [4,5],
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
+                   _year_suffix=f' ({str(relevant_period_for_costing[0])}- {str(relevant_period_for_costing[1])})',
                    _value_of_life_suffix = 'TB_VSL',
-                   _y_axis_lim = 50 )
+                   _y_axis_lim = 30)
 
 # Malaria scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
@@ -549,6 +551,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _draws = [6,7],
                    _scenario_dict = htm_scenarios,
                    _outputfilepath=roi_outputs_folder,
+                   _year_suffix=f' ({str(relevant_period_for_costing[0])}- {str(relevant_period_for_costing[1])})',
                    _value_of_life_suffix = 'Malaria_VSL')
 
 # 4. Plot Maximum ability-to-pay at CET
@@ -564,15 +567,16 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (max_ability_to_pay_for_implementation_summarized / 1e6),
     annotations=[
-        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}- \n {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in max_ability_to_pay_for_implementation_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
     put_labels_in_legend=True,
-    offset=0.5,
+    offset=50,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Maximum ability to pay \n(Millions)')
+ax.set_ylim(bottom=0)
 fig.tight_layout()
 fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)
@@ -583,15 +587,16 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 fig, ax = do_standard_bar_plot_with_ci(
     (incremental_scenario_cost_summarized / 1e6),
     annotations=[
-        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}-\n {row['upper'] / projected_health_spending_baseline:.2%})"
+        f"{row['mean'] / projected_health_spending_baseline :.2%} ({row['lower'] / projected_health_spending_baseline :.2%}- {row['upper'] / projected_health_spending_baseline:.2%})"
         for _, row in incremental_scenario_cost_summarized.iterrows()
     ],
     xticklabels_horizontal_and_wrapped=False,
     put_labels_in_legend=True,
-    offset=0.5,
+    offset=50,
 )
 ax.set_title(name_of_plot)
 ax.set_ylabel('Cost \n(USD Millions)')
+ax.set_ylim(bottom=0)
 fig.tight_layout()
 fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', ''))
 plt.close(fig)

From 7d209a1b8d87d18b28150517ff99d0bcf6a3b163 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 21 Nov 2024 19:53:02 +0000
Subject: [PATCH 184/230] update discounting function

---
 src/scripts/costing/cost_estimation.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index ee11c5b30a..2afe85ee59 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -38,9 +38,12 @@
 #%%
 
 # Define a function to discount and summarise costs by cost_category
-def apply_discounting_to_cost_data(_df, _discount_rate=0):
-    # Initial year and discount rate
-    initial_year = min(_df['year'].unique())
+def apply_discounting_to_cost_data(_df, _discount_rate=0, _year = None):
+    if _year == None:
+        # Initial year and discount rate
+        initial_year = min(_df['year'].unique())
+    else:
+        initial_year = _year
 
     # Calculate the discounted values
     _df.loc[:, 'cost'] = _df['cost'] / ((1 + _discount_rate) ** (_df['year'] - initial_year))
@@ -94,7 +97,7 @@ def melt_model_output_draws_and_runs(_df, id_vars):
 
     # Overall cost assumptions
     TARGET_PERIOD = (Date(first_year_of_simulation, 1, 1), Date(final_year_of_simulation, 12, 31)) # Declare period for which the results will be generated (defined inclusively)
-    discount_rate = _discount_rate
+    discount_rate = 0.03 # this is the discount rate for annuitization
 
     # Read all cost parameters
     #---------------------------------------
@@ -1210,7 +1213,7 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
             if cost in roi_at_costs:
                 ratio = max(roi_at_costs[cost]) / min(roi_at_costs[cost])
                 ax.axvline(x=cost / 1e6, color='black', linestyle='--', linewidth=1)
-                ax.text(cost / 1e6 + 200, ax.get_ylim()[1] * 0.9,
+                ax.text(cost / 1e6 + ax.get_xlim()[1] * 0.011, ax.get_ylim()[1] * 0.75,
                         f'At {cost / 1e6:.0f}M, ratio of ROI curves = {round(ratio, 2)}',
                         color='black', fontsize=10, rotation=90, verticalalignment='top')
 

From 6f2c5ea4336fba4d106db6153d979fb126f46be0 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 21 Nov 2024 19:53:42 +0000
Subject: [PATCH 185/230] discount malaria costs

---
 .../cost_analysis_htm_with_and_without_hss_gf.py   | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index 42d495adc1..f09248feec 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -39,7 +39,8 @@
                                              do_line_plot_of_cost,
                                              generate_roi_plots,
                                              generate_multiple_scenarios_roi_plot,
-                                             estimate_projected_health_spending)
+                                             estimate_projected_health_spending,
+                                             apply_discounting_to_cost_data)
 
 # Define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -350,8 +351,7 @@ def get_number_of_people_covered_by_malaria_scaleup(_df, list_of_districts_cover
                                                                                                  draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period)
 
 irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)']
-# The above unit cost already includes implementation - project management (17%), personnel (6%), vehicles (10%), equipment (6%), monitoring and evaluation (3%), training (3%),
-# other commodities (3%) and buildings (2%) from Alonso et al (2021)
+# This cost includes non-consumable costs - personnel, equipment, fuel, logistics and planning, shipping, PPE. The cost is measured per person protected. Based on Stelmach et al (2018)
 irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate
 total_irs_cost = irs_multiplication_factor.iloc[0] * district_population_covered_by_irs_scaleup_by_year # for districts and scenarios included
 total_irs_cost = total_irs_cost.groupby(level='year').sum()
@@ -396,6 +396,7 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
 # Iterate through additional costs, melt and concatenate
 for df, label in malaria_scaleup_costs:
     new_df = melt_and_label_malaria_scaleup_cost(df, label)
+    new_df = apply_discounting_to_cost_data(new_df, _discount_rate= discount_rate, _year = relevant_period_for_costing[0])
     input_costs = pd.concat([input_costs, new_df], ignore_index=True)
 
 # Extract input_costs for browsing
@@ -408,6 +409,10 @@ def melt_and_label_malaria_scaleup_cost(_df, label):
 # Aggregate input costs for further analysis (this step is needed because the malaria specific scale-up costs start from the year or malaria scale-up implementation)
 input_costs_subset = input_costs[
     (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])]
+
+# Extract input_costs for TGF
+input_costs_subset.groupby(['draw', 'run', 'cost_category', 'year'])['cost'].sum().to_csv(figurespath / 'cost_for_gf.csv')
+
 total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum()
 total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run'))
 def find_difference_relative_to_comparison(_ser: pd.Series,
@@ -488,6 +493,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
 projected_health_spending_baseline = projected_health_spending[projected_health_spending.index.get_level_values(0) == 0]['mean'][0]
 
 # Combined ROI plot of relevant scenarios
+'''
 # HTM scenarios X 5
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
@@ -504,6 +510,8 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _outputfilepath=roi_outputs_folder,
                    _value_of_life_suffix = 'HTM_full_HSS_VSL')
 
+'''
+
 # Only HSS
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,

From 540b7a4ef0d7f7e7a57b1fba17dc2d554776c031 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 25 Nov 2024 17:26:29 +0000
Subject: [PATCH 186/230] update stacked bar plot legend position and ylim and
 update time period

---
 .../cost_analysis_htm_with_and_without_hss_gf.py       |  2 +-
 src/scripts/costing/cost_estimation.py                 | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index f09248feec..2167ba4d5e 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -77,7 +77,7 @@
 district_dict = dict(zip(district_dict['District_Num'], district_dict['District']))
 
 # Period relevant for costing
-TARGET_PERIOD= (Date(2027, 1, 1), Date(2029, 12, 31))  # This is the period that is costed
+TARGET_PERIOD= (Date(2025, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
 relevant_period_for_costing = [i.year for i in TARGET_PERIOD]
 list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
 
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 2afe85ee59..7a242a0965 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -869,7 +869,15 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
 
     # Arrange the legend in the same ascending order
     handles, labels = plt.gca().get_legend_handles_labels()
-    plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 1), loc='upper right')
+    plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 0.7), loc='center left', fontsize='small')
+
+    # Extend the y-axis by 25%
+    max_y = ax.get_ylim()[1]
+    ax.set_ylim(0, max_y*1.25)
+
+    # Save the plot with tight layout
+    plt.tight_layout(pad=2.0)  # Ensure there is enough space for the legend
+    plt.subplots_adjust(right=0.8) # Adjust to ensure legend doesn't overlap
 
     plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Period = {period})')
     plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}{plt_name_suffix}.png', dpi=100,

From 6490726fa33d84f2a3e966920a6ba8aeddedc0b9 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 25 Nov 2024 20:02:23 +0000
Subject: [PATCH 187/230] add draft facility operating costs

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +--
 src/scripts/costing/cost_estimation.py      | 38 ++++++++++++++++++++-
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 6ba29eeb6f..5607bce7de 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd9947ea1d634d9fdff8aa1cde0717361c83c64371f6db95aa1b1fcfb930f8c6
-size 4285545
+oid sha256:52506610a230279ddff6e3fe3440f840f144e2ecfa6cfdd7da30f6a835b91ba7
+size 4287554
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 7a242a0965..691a6e0eaa 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -14,6 +14,7 @@
 import pandas as pd
 import ast
 import math
+import itertools
 
 from tlo.analysis.utils import (
     extract_params,
@@ -137,6 +138,9 @@ def melt_model_output_draws_and_runs(_df, id_vars):
     unit_cost_equipment['Facility_Level'] = unit_cost_equipment['Facility_Level'].replace(facility_level_mapping)
     unit_cost_equipment = unit_cost_equipment.rename(columns = {'Quantity_': 'Quantity'})
 
+    # Load and prepare facility operation cost parameters
+    unit_cost_fac_operations = workbook_cost["facility_operations"]
+
     # Function to prepare cost dataframe ready to be merged across cross categories
     def retain_relevant_column_subset(_df, _category_specific_group):
         columns_to_retain = ['draw', 'run', 'year', 'cost_subcategory', 'Facility_Level', _category_specific_group, 'cost']
@@ -649,11 +653,43 @@ def update_itemuse_for_level1b_using_level2_data(_df):
 
     # 4. Facility running costs
     # Average running costs by facility level and district times the number of facilities  in the simulation
+    # Convert unit_costs to long format
+    unit_cost_fac_operations = pd.melt(
+        unit_cost_fac_operations,
+        id_vars=["Facility_Level"],  # Columns to keep as identifiers
+        var_name="operating_cost_type",  # Name for the new 'cost_category' column
+        value_name="unit_cost"  # Name for the new 'cost' column
+    )
+    unit_cost_fac_operations['Facility_Level'] = unit_cost_fac_operations['Facility_Level'].astype(str)
+    fac_count_by_district_and_level = mfl[['Facility_Level', 'Facility_Count', 'District']].groupby(['Facility_Level', 'District']).sum().reset_index()
+
+    facility_operation_cost = pd.merge(unit_cost_fac_operations, fac_count_by_district_and_level, on = 'Facility_Level', how = 'left', validate = 'm:m')
+    facility_operation_cost['Facility_Count'] = facility_operation_cost['Facility_Count'].fillna(0).astype(int)
+    facility_operation_cost['cost'] =  facility_operation_cost['unit_cost'] * facility_operation_cost['Facility_Count']
+
+    # Duplicate the same set of facility operation costs for all draws and runs
+    # Create the Cartesian product of `_draws` and `_runs`
+    combinations = list(itertools.product(_draws, _runs))
+    comb_df = pd.DataFrame(combinations, columns=["draw", "run"])
+    facility_operation_cost = facility_operation_cost.merge(comb_df, how="cross")
+    facility_operation_cost['cost_category'] = 'Facility operating cost'
+    operating_cost_mapping = {'Electricity': 'utilities_and_maintenance', 'Water': 'utilities_and_maintenance', 'Cleaning':'utilities_and_maintenance',
+                              'Security':'utilities_and_maintenance', 'Building maintenance': 'building_maintenance',
+                             'Facility management': 'utilities_and_maintenance', 'Vehicle maintenance': 'vehicle_maintenance',
+                              'Ambulance fuel': 'fuel_for_ambulance', 'Food for inpatient cases': 'food_for_inpatient_care'}
+    facility_operation_cost['cost_subcategory'] = facility_operation_cost['operating_cost_type']
+    facility_operation_cost['cost_subcategory'] = facility_operation_cost['cost_subcategory'].map(operating_cost_mapping)
+    # Assume that the annual costs are constant each year of the simulation
+    facility_operation_cost = pd.concat([facility_operation_cost.assign(year=year) for year in years])
+
+    # Assume that the annual costs are constant each year of the simulation
+    facility_operation_cost = prepare_cost_dataframe(facility_operation_cost, _category_specific_group = 'operating_cost_type', _cost_category = 'facility operating cost')
+
 
     # %%
     # Store all costs in single dataframe
     #--------------------------------------------
-    scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs, other_costs], ignore_index=True)
+    scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs, other_costs, facility_operation_cost], ignore_index=True)
     scenario_cost['cost'] = pd.to_numeric(scenario_cost['cost'], errors='coerce')
 
     # Summarize costs

From ceb0ccbbee659817a7b6880d21e9bca940ebb14e Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 15:02:52 +0000
Subject: [PATCH 188/230] allow ROI plots with set colours for each draw

---
 ...st_analysis_htm_with_and_without_hss_gf.py |  4 +++-
 src/scripts/costing/cost_estimation.py        | 22 ++++++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
index 2167ba4d5e..1840d47cae 100644
--- a/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
+++ b/src/scripts/comparison_of_horizontal_and_vertical_programs/gf_analyses/cost_analysis_htm_with_and_without_hss_gf.py
@@ -524,6 +524,7 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                     _projected_health_spending = projected_health_spending_baseline)
 
 # HTM scenarios with HSS
+draw_colors = {8: '#438FBA', 9:'#5E4FA2'}
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
                    _incremental_input_cost=incremental_scenario_cost,
                    _draws = [8,9],
@@ -532,7 +533,8 @@ def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_o
                    _value_of_life_suffix = 'HTM_full_HSS_VSL',
                    _plot_vertical_lines_at = [0, 1e9, 3e9],
                     _year_suffix= f' ({str(relevant_period_for_costing[0])}- {str(relevant_period_for_costing[1])})',
-                   _projected_health_spending = projected_health_spending_baseline)
+                   _projected_health_spending = projected_health_spending_baseline,
+                   _draw_colors = draw_colors)
 
 # HIV scenarios
 generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life),
diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 691a6e0eaa..53965901af 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1157,7 +1157,12 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
                        _y_axis_lim = None,
                       _plot_vertical_lines_at: list = None,
                       _year_suffix = '',
-                      _projected_health_spending = None):
+                      _projected_health_spending = None,
+                      _draw_colors = None):
+    # Default color mapping if not provided
+    if _draw_colors is None:
+        _draw_colors = {draw: color for draw, color in zip(_draws, plt.cm.tab10.colors[:len(_draws)])}
+
     # Calculate maximum ability to pay for implementation
     _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)]
     _incremental_input_cost =  _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)]
@@ -1235,8 +1240,19 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health:
         upper_values = collapsed_data[collapsed_data['stat']  == 'upper'][['implementation_cost', 'roi']]
 
         # Plot mean line and confidence interval
-        ax.plot(implementation_costs / 1e6, mean_values['roi'], label=f'{_scenario_dict[draw_index]}')
-        ax.fill_between(implementation_costs / 1e6, lower_values['roi'], upper_values['roi'], alpha=0.2)
+        ax.plot(
+            implementation_costs / 1e6,
+            mean_values['roi'],
+            label=f'{_scenario_dict[draw_index]}',
+            color=_draw_colors.get(draw_index, 'black'),
+        )
+        ax.fill_between(
+            implementation_costs / 1e6,
+            lower_values['roi'],
+            upper_values['roi'],
+            alpha=0.2,
+            color=_draw_colors.get(draw_index, 'black'),
+        )
 
         max_val = mean_values[~np.isinf(mean_values['roi'])]['roi'].max()
         max_roi.append(max_val)

From 693715326339c202c794038bcc0bbb9f06dca719 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 16:41:43 +0000
Subject: [PATCH 189/230] update RF to apply historical scaling to HR

---
 .../scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
index a633e6fc92..36b9fd0dc2 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2d74390498e497ee0bf68773327868f6b199c1c9569337b173fa330c0f2f926
-size 24593
+oid sha256:b8388ef18f073c9470c01f8408bff572017484763cfc4c87bb0212c38ee0b6d7
+size 25488

From 417bc58947ddda8bafa084c3752aa978bc5e6af9 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 16:42:17 +0000
Subject: [PATCH 190/230] update costing scenarios - real world and perfect
 health system

---
 .../costing/example_costing_scenario.py       | 94 ++++++++++++++++---
 1 file changed, 82 insertions(+), 12 deletions(-)

diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py
index 586c7dec12..30ca966f96 100644
--- a/src/scripts/costing/example_costing_scenario.py
+++ b/src/scripts/costing/example_costing_scenario.py
@@ -1,6 +1,4 @@
 '''
-
-
 Run on the batch system using:
 ```tlo batch-submit src/scripts/costing/example_costing_scenario.py```
 
@@ -9,42 +7,114 @@
 
 '''
 
+from pathlib import Path
+from typing import Dict
+
 from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
 from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
 
-class SampleCostingScenario(BaseScenario):
+class CostingScenarios(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2013, 1, 1)
-        self.pop_size = 20_000  # <- recommended population size for the runs
-        self.number_of_draws = 2  # <- one scenario
+        self.end_date = Date(2030, 1, 1)
+        self.pop_size = 1_000  # <- recommended population size for the runs
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
         self.runs_per_draw = 2 # <- repeated this many times
 
     def log_configuration(self):
         return {
-            'filename': 'example_costing_scenario',
+            'filename': 'cost_scenarios',
             'directory': './outputs',  # <- (specified only for local running)
             'custom_levels': {
                 '*': logging.WARNING,
                 'tlo.methods.demography': logging.INFO,
-                'tlo.methods.healthsystem': logging.INFO,
+                'tlo.methods.healthburden': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
             }
         }
 
     def modules(self):
-        return fullmodel(resourcefilepath=self.resources)
+        return (fullmodel(resourcefilepath=self.resources) +
+                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])
 
     def draw_parameters(self, draw_number, rng):
+        if draw_number < len(self._scenarios):
+            return list(self._scenarios.values())[draw_number]
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+        """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
+
+        self.YEAR_OF_SYSTEM_CHANGE = 2020
+        self.mode_appt_constraints_postSwitch = [1,2]
+        self.cons_availability = ['default', 'all']
+        self.healthsystem_function = [[False, False], [False, True]]
+        self.healthcare_seeking = [[False, False], [False, True]]
+
         return {
-            'HealthSystem': {
-                'cons_availability': ['default', 'all'][draw_number]
-            }
+            "Real world": self._common_baseline(),
+
+            "Perfect health system":
+                mix_scenarios(
+                    self._common_baseline(),
+                    {
+                    'HealthSystem': {
+                    # Human Resources
+                    'mode_appt_constraints_postSwitch': self.mode_appt_constraints_postSwitch[1], # <-- Mode 2 post-change to show effects of HRH
+                    "scale_to_effective_capabilities": True,  # <-- Transition into Mode2 with the effective capabilities in HRH 'revealed' in Mode 1
+                    "year_mode_switch": self.YEAR_OF_SYSTEM_CHANGE,
+
+                    # Consumables
+                    'cons_availability_postSwitch': self.cons_availability[1],
+                    'year_cons_availability_switch': self.YEAR_OF_SYSTEM_CHANGE,
+                },
+                'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
+                    'max_healthcare_seeking': self.healthcare_seeking[1],
+                    'max_healthsystem_function': self.healthsystem_function[1],
+                    'year_of_switch': self.YEAR_OF_SYSTEM_CHANGE,
+                }
+                    }
+                ),
         }
 
+    def _common_baseline(self) -> Dict:
+        """Return the Dict with values for the parameter changes that define the baseline scenario. """
+        return mix_scenarios(
+            get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets
+            # Set up the HealthSystem to transition from Mode 1 -> Mode 2, with rescaling when there are HSS changes
+            {
+                'HealthSystem': {
+                    # Human resources
+                    'mode_appt_constraints': 1, # <-- Mode 1 prior to change to preserve calibration
+                    'mode_appt_constraints_postSwitch': self.mode_appt_constraints_postSwitch[0], # <-- Mode 2 post-change to show effects of HRH
+                    "scale_to_effective_capabilities": True,  # <-- Transition into Mode2 with the effective capabilities in HRH 'revealed' in Mode 1
+                    # This happens in the year before mode change, as the model calibration is done by that year
+                    "year_mode_switch": self.YEAR_OF_SYSTEM_CHANGE,
+                    'yearly_HR_scaling_mode': 'historical_scaling',  # for 5 years of 2020-2024; source data year 2019
+
+                    # Consumables
+                    'cons_availability': 'default',
+                    'cons_availability_postSwitch': self.cons_availability[0],
+                    'year_cons_availability_switch': self.YEAR_OF_SYSTEM_CHANGE,
+
+                    # Normalize the behaviour of Mode 2
+                    "policy_name": 'Naive',
+                    "tclose_overwrite": 1,
+                    "tclose_days_offset_overwrite": 7,
+                },
+                'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
+                    'max_healthcare_seeking': self.healthcare_seeking[0],
+                    'max_healthsystem_function': self.healthsystem_function[0],
+                    'year_of_switch': self.YEAR_OF_SYSTEM_CHANGE,
+                }
+            },
+        )
+
 
 if __name__ == '__main__':
     from tlo.cli import scenario_run

From 74b42fb9ae8042f51be8c9aafe19a8352cc23855 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 17:19:01 +0000
Subject: [PATCH 191/230] convert facility operations cots to annual costs

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 5607bce7de..9bd07060e1 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52506610a230279ddff6e3fe3440f840f144e2ecfa6cfdd7da30f6a835b91ba7
-size 4287554
+oid sha256:aa046187fd0edf597646fe4795c428f0053cdeb0b56537780cc945ac283fea1f
+size 4287514

From 0a42b722004519576473332c437273c560fa68f7 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 17:56:11 +0000
Subject: [PATCH 192/230] pre-define color pallettes for cost categories

---
 src/scripts/costing/cost_estimation.py | 44 ++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 53965901af..9cd7461147 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -842,6 +842,17 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
         subset_df = subset_df[subset_df['year'].isin(_year)]
 
     if _cost_category == 'all':
+        # Predefined color mapping for cost categories
+        color_mapping = {
+            'human resources for health': '#1f77b4',  # Muted blue
+            'medical consumables': '#ff7f0e',  # Muted orange
+            'medical equipment': '#2ca02c',  # Muted green
+            'other': '#d62728',  # Muted red
+            'facility operating cost': '#9467bd',  # Muted purple
+        }
+        # Default color for unexpected categories
+        default_color = 'gray'
+
         if (_disaggregate_by_subgroup == True):
             raise ValueError(f"Invalid input for _disaggregate_by_subgroup: '{_disaggregate_by_subgroup}'. "
                              f"Value can be True only when plotting a specific _cost_category")
@@ -878,8 +889,11 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
     sorted_columns = pivot_df.sum(axis=0).sort_values().index
     pivot_df = pivot_df[sorted_columns]  # Rearrange columns by sorted order
 
+    # Define custom colors for the bars
+    column_colors = [color_mapping.get(col, default_color) for col in sorted_columns]
+
     # Plot the stacked bar chart
-    ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
+    ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6), color=column_colors)
 
     # Set custom x-tick labels if _scenario_dict is provided
     if _scenario_dict:
@@ -980,9 +994,21 @@ def do_line_plot_of_cost(_df, _cost_category='all',
     lines = []
     labels = []
 
-    # Define a list of colors to rotate through
-    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'orange', 'purple', 'brown', 'gray']  # Add more colors as needed
-    color_cycle = iter(colors)  # Create an iterator from the color list
+    # Define a list of colors
+    if disaggregate_by == 'cost_category':
+        color_mapping = {
+            'human resources for health': '#1f77b4',  # Muted blue
+            'medical consumables': '#ff7f0e',  # Muted orange
+            'medical equipment': '#2ca02c',  # Muted green
+            'other': '#d62728',  # Muted red
+            'facility operating cost': '#9467bd',  # Muted purple
+        }
+        # Default color for unexpected categories
+        default_color = 'gray'
+    else:
+        # Define a list of colors to rotate through
+        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'orange', 'purple', 'brown', 'gray']  # Add more colors as needed
+        color_cycle = iter(colors)  # Create an iterator from the color list
 
     # Plot each line for the disaggregated values
     if disaggregate_by:
@@ -992,8 +1018,11 @@ def do_line_plot_of_cost(_df, _cost_category='all',
             value_lower = lower_values.xs(disaggregate_value, level=disaggregate_by)
             value_upper = upper_values.xs(disaggregate_value, level=disaggregate_by)
 
-            # Get the next color from the cycle
-            color = next(color_cycle)
+            if disaggregate_by == 'cost_category':
+                color = color_mapping.get(disaggregate_value, default_color)
+            else:
+                # Get the next color from the cycle
+                color = next(color_cycle)
 
             # Plot line for mean and shaded region for 95% CI
             line, = plt.plot(value_mean.index, value_mean, marker='o', linestyle='-', color=color, label=f'{disaggregate_value} - Mean')
@@ -1038,7 +1067,8 @@ def do_line_plot_of_cost(_df, _cost_category='all',
     else:
         filename_suffix = f"_by_{disaggregate_by}"
 
-    filename = f'trend_{_cost_category}_{period}{filename_suffix}.png'
+    draw_suffix = 'all' if _draws is None else str(_draws)
+    filename = f'trend_{_cost_category}_{period}{filename_suffix}_draw-{draw_suffix}.png'
     plt.savefig(_outputfilepath / filename, dpi=100, bbox_inches='tight')
     plt.close()
 

From f8a70f2980c7ccc0cabbb529b4f8dac08c23cb88 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 20:13:58 +0000
Subject: [PATCH 193/230] edit figure name

---
 src/scripts/costing/cost_estimation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 9cd7461147..295af26c1f 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -825,7 +825,8 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
                                             _disaggregate_by_subgroup: bool = False,
                                             _year = 'all', _draws = None,
                                             _scenario_dict: dict = None,
-                                            _outputfilepath: Path = None):
+                                            _outputfilepath: Path = None,
+                                            _add_figname_suffix = ''):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
     _df = _df[_df.stat == 'mean'].copy()
@@ -930,7 +931,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
     plt.subplots_adjust(right=0.8) # Adjust to ensure legend doesn't overlap
 
     plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Period = {period})')
-    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}{plt_name_suffix}.png', dpi=100,
+    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}{plt_name_suffix}{_add_figname_suffix}.png', dpi=100,
                 bbox_inches='tight')
     plt.close()
 

From 60fd3d01a4f071b2a9be146a9dcbf5404b8a08bd Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 20:14:48 +0000
Subject: [PATCH 194/230] add draft script to generate outputs for costing
 overview paper

---
 .../costing/costing_overview_analysis.py      | 173 ++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 src/scripts/costing/costing_overview_analysis.py

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
new file mode 100644
index 0000000000..a64b7e9fc6
--- /dev/null
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -0,0 +1,173 @@
+
+from pathlib import Path
+from tlo import Date
+
+import datetime
+import os
+import textwrap
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+
+from tlo.analysis.utils import (
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    summarize
+)
+
+from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
+                                             summarize_cost_data,
+                                             do_stacked_bar_plot_of_cost_by_category,
+                                             do_line_plot_of_cost,
+                                             generate_multiple_scenarios_roi_plot,
+                                             estimate_projected_health_spending)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Create folders to store results
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
+figurespath = Path('./outputs/costing/overview/')
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+
+# Load result files
+# ------------------------------------------------------------------------------------------------------------------
+results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0]
+#results_folder = Path('./outputs/cost_scenarios-2024-11-26T164353Z')
+
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0)  # look at one log (so can decide what to extract)
+params = extract_params(results_folder)
+
+# Declare default parameters for cost analysis
+# ------------------------------------------------------------------------------------------------------------------
+# Period relevant for costing
+TARGET_PERIOD_INTERVENTION = (Date(2020, 1, 1), Date(2030, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+
+# Scenarios
+cost_scenarios = {0: "Real world", 1: "Perfect health system"}
+
+# Costing parameters
+discount_rate = 0.03
+
+# Estimate standard input costs of scenario
+# -----------------------------------------------------------------------------------------------------------------------
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0],
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = discount_rate, summarize = True)
+# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+# TODO Remove the manual fix below once the logging for these is corrected
+input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
+    input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
+
+input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0,1],
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = 0, summarize = True)
+# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
+# TODO Remove the manual fix below once the logging for these is corrected
+input_costs_undiscounted.loc[input_costs_undiscounted.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
+    input_costs_undiscounted.loc[input_costs_undiscounted.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
+
+# Get figures for overview paper
+# -----------------------------------------------------------------------------------------------------------------------
+# Figure 1: Estimated costs by cost category
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
+
+# Figure 2: Estimated costs by year
+do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
+                         _year='all', _draws= [0],
+                         disaggregate_by= 'cost_category',
+                         _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
+                         _year='all', _draws= [1],
+                         disaggregate_by= 'cost_category',
+                         _outputfilepath = figurespath)
+
+# Figure 3: Comparison of model-based cost estimates with actual expenditure recorded for 2018/19 and budget planned for 2020/21-2022/23
+
+# Figure 4: Total cost by scenario assuming 0% discount rate
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_undiscounted,
+                                        _cost_category = 'all',
+                                        _disaggregate_by_subgroup = False,
+                                        _outputfilepath = figurespath,
+                                        _scenario_dict = cost_scenarios,
+                                        _add_figname_suffix = '_UNDISCOUNTED')
+
+# Figure 5: Total cost by scenario applying changing discount rates
+
+# Get tables for overview paper
+# -----------------------------------------------------------------------------------------------------------------------
+# Group data and aggregate cost for each draw and stat
+def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = False):
+    edited_input_costs = input_costs.copy()
+    edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('_', ' ', regex=True)
+    edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('%', '\%', regex=True)
+    edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('&', '\&', regex=True)
+
+    grouped_costs = edited_input_costs.groupby(['cost_category', _groupby_var, 'draw', 'stat'])['cost'].sum()
+    # Format the 'cost' values before creating the LaTeX table
+    grouped_costs = grouped_costs.apply(lambda x: f"{float(x):,.2f}")
+    # Remove underscores from all column values
+
+    # Create a pivot table to restructure the data for LaTeX output
+    pivot_data = {}
+    for draw in [0, 1]:
+        draw_data = grouped_costs.xs(draw, level='draw').unstack(fill_value=0)  # Unstack to get 'stat' as columns
+        # Concatenate 'mean' with 'lower-upper' in the required format
+        pivot_data[draw] = draw_data['mean'].astype(str) + ' [' + \
+                           draw_data['lower'].astype(str) + '-' + \
+                           draw_data['upper'].astype(str) + ']'
+
+    # Combine draw data into a single DataFrame
+    table_data = pd.concat([pivot_data[0], pivot_data[1]], axis=1, keys=['draw=0', 'draw=1']).reset_index()
+
+    # Rename columns for clarity
+    table_data.columns = ['Cost Category', _groupby_var_name, 'Real World', 'Perfect Health System']
+
+    # Replace '\n' with '\\' for LaTeX line breaks
+    #table_data['Real World'] = table_data['Real World'].apply(lambda x: x.replace("\n", "\\\\"))
+    #table_data['Perfect Health System'] = table_data['Perfect Health System'].apply(lambda x: x.replace("\n", "\\\\"))
+
+    # Convert to LaTeX format with horizontal lines after every row
+    latex_table = table_data.to_latex(
+        longtable=_longtable,  # Use the longtable environment for large tables
+        column_format='|R{4cm}|R{5cm}|R{3.5cm}|R{3.5cm}|',
+        caption=f"Summarized Costs by Category and {_groupby_var_name}",
+        label=f"tab:cost_by_{_groupby_var}",
+        position="h",
+        index=False,
+        escape=False,  # Prevent escaping special characters like \n
+        header=True
+    )
+
+    # Add \hline after the header and after every row for horizontal lines
+    latex_table = latex_table.replace("\\\\", "\\\\ \\hline")  # Add \hline after each row
+    #latex_table = latex_table.replace("_", " ")  # Add \hline after each row
+
+    # Specify the file path to save
+    latex_file_path = figurespath / f'cost_by_{_groupby_var}.tex'
+
+    # Write to a file
+    with open(latex_file_path, 'w') as latex_file:
+        latex_file.write(latex_table)
+
+    # Print latex for reference
+    print(latex_table)
+
+# Table : Cost by cost subcategory
+generate_detail_cost_table(_groupby_var = 'cost_subcategory', _groupby_var_name = 'Cost Subcategory')
+# Table : Cost by cost subgroup
+generate_detail_cost_table(_groupby_var = 'cost_subgroup', _groupby_var_name = 'Category Subgroup', _longtable = True)
+

From 63f748bf81a41f3783e6222ac613c267bff7e7da Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 20:37:24 +0000
Subject: [PATCH 195/230] merge in changes from master - consumables and HR
 logging

---
 src/tlo/methods/consumables.py  |  90 +++++++++------
 src/tlo/methods/healthsystem.py | 191 ++++++++++++++++++++++++++------
 src/tlo/methods/hsi_event.py    |  12 +-
 tests/test_consumables.py       |  69 +++++++++---
 tests/test_healthsystem.py      | 127 ++++++++++++++++++++-
 5 files changed, 404 insertions(+), 85 deletions(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 9a96ae93cd..e51a95fe74 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -54,13 +54,13 @@ def __init__(self,
         self._prob_item_codes_available = None  # Data on the probability of each item_code being available
         self._is_available = None  # Dict of sets giving the set of item_codes available, by facility_id
         self._is_unknown_item_available = None  # Whether an unknown item is available, by facility_id
-        self._not_recognised_item_codes = set()  # The item codes requested but which are not recognised.
+        self._not_recognised_item_codes = defaultdict(set)  # The item codes requested but which are not recognised.
 
         # Save designations
         self._item_code_designations = item_code_designations
 
         # Save all item_codes that are defined and pd.Series with probs of availability from ResourceFile
-        self.item_codes,  self._processed_consumables_data = \
+        self.item_codes, self._processed_consumables_data = \
             self._process_consumables_data(availability_data=availability_data)
 
         # Set the availability based on the argument provided (this can be updated later after the class is initialised)
@@ -199,7 +199,8 @@ def _determine_default_return_value(cons_availability, default_return_value):
 
     def _request_consumables(self,
                              facility_info: 'FacilityInfo',  # noqa: F821
-                             item_codes: dict,
+                             essential_item_codes: dict,
+                             optional_item_codes: Optional[dict] = None,
                              to_log: bool = True,
                              treatment_id: Optional[str] = None
                              ) -> dict:
@@ -208,40 +209,52 @@ def _request_consumables(self,
 
         :param facility_info: The facility_info from which the request for consumables originates
         :param item_codes: dict of the form {<item_code>: <quantity>} for the items requested
+        :param optional_item_codes: dict of the form {<item_code>: <quantity>} for the optional items requested
         :param to_log: whether the request is logged.
         :param treatment_id: the TREATMENT_ID of the HSI (which is entered to the log, if provided).
         :return: dict of the form {<item_code>: <bool>} indicating the availability of each item requested.
         """
+        # If optional_item_codes is None, treat it as an empty dictionary
+        optional_item_codes = optional_item_codes or {}
+        _all_item_codes = {**essential_item_codes, **optional_item_codes}
 
         # Issue warning if any item_code is not recognised.
-        if not self.item_codes.issuperset(item_codes.keys()):
-            self._not_recognised_item_codes.add((treatment_id, tuple(set(item_codes.keys()) - self.item_codes)))
+        not_recognised_item_codes = _all_item_codes.keys() - self.item_codes
+        if len(not_recognised_item_codes) > 0:
+            self._not_recognised_item_codes[treatment_id] |= not_recognised_item_codes
 
         # Look-up whether each of these items is available in this facility currently:
-        available = self._lookup_availability_of_consumables(item_codes=item_codes, facility_info=facility_info)
+        available = self._lookup_availability_of_consumables(item_codes=_all_item_codes, facility_info=facility_info)
 
         # Log the request and the outcome:
         if to_log:
-            items_available = {k: v for k, v in item_codes.items() if available[k]}
-            items_not_available = {k: v for k, v in item_codes.items() if not available[k]}
-            logger.info(key='Consumables',
-                        data={
-                            'TREATMENT_ID': (treatment_id if treatment_id is not None else ""),
-                            'Item_Available': str(items_available),
-                            'Item_NotAvailable': str(items_not_available),
-                        },
-                        # NB. Casting the data to strings because logger complains with dict of varying sizes/keys
-                        description="Record of each consumable item that is requested."
-                        )
-
-            self._summary_counter.record_availability(items_available=items_available,
-                                                      items_not_available=items_not_available)
+            items_available = {k: v for k, v in _all_item_codes.items() if available[k]}
+            items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]}
+
+            # Log items used if all essential items are available
+            items_used = items_available if all(available.get(k, False) for k in essential_item_codes) else {}
+
+            logger.info(
+                key='Consumables',
+                data={
+                    'TREATMENT_ID': treatment_id or "",
+                    'Item_Available': str(items_available),
+                    'Item_NotAvailable': str(items_not_available),
+                    'Item_Used': str(items_used),
+                },
+                description="Record of requested and used consumable items."
+            )
+            self._summary_counter.record_availability(
+                items_available=items_available,
+                items_not_available=items_not_available,
+                items_used=items_used,
+            )
 
         # Return the result of the check on availability
         return available
 
     def _lookup_availability_of_consumables(self,
-                                            facility_info: 'FacilityInfo',   # noqa: F821
+                                            facility_info: 'FacilityInfo',  # noqa: F821
                                             item_codes: dict
                                             ) -> dict:
         """Lookup whether a particular item_code is in the set of available items for that facility (in
@@ -265,15 +278,24 @@ def _lookup_availability_of_consumables(self,
         return avail
 
     def on_simulation_end(self):
-        """Do tasks at the end of the simulation: Raise warnings and enter to log about item_codes not recognised."""
-        if self._not_recognised_item_codes:
-            warnings.warn(UserWarning(f"Item_Codes were not recognised./n"
-                                      f"{self._not_recognised_item_codes}"))
-            for _treatment_id, _item_codes in self._not_recognised_item_codes:
-                logger.info(
-                    key="item_codes_not_recognised",
-                    data={_treatment_id if _treatment_id is not None else "": list(_item_codes)}
+        """Do tasks at the end of the simulation.
+
+        Raise warnings and enter to log about item_codes not recognised.
+        """
+        if len(self._not_recognised_item_codes) > 0:
+            not_recognised_item_codes = {
+                treatment_id if treatment_id is not None else "": sorted(codes)
+                for treatment_id, codes in self._not_recognised_item_codes.items()
+            }
+            warnings.warn(
+                UserWarning(
+                    f"Item_Codes were not recognised.\n{not_recognised_item_codes}"
                 )
+            )
+            logger.info(
+                key="item_codes_not_recognised",
+                data=not_recognised_item_codes,
+            )
 
     def on_end_of_year(self):
         self._summary_counter.write_to_log_and_reset_counters()
@@ -353,10 +375,11 @@ def _reset_internal_stores(self) -> None:
 
         self._items = {
             'Available': defaultdict(int),
-            'NotAvailable': defaultdict(int)
+            'NotAvailable': defaultdict(int),
+            'Used': defaultdict(int),
         }
 
-    def record_availability(self, items_available: dict, items_not_available: dict) -> None:
+    def record_availability(self, items_available: dict, items_not_available: dict, items_used: dict) -> None:
         """Add information about the availability of requested items to the running summaries."""
 
         # Record items that were available
@@ -367,6 +390,10 @@ def record_availability(self, items_available: dict, items_not_available: dict)
         for _item, _num in items_not_available.items():
             self._items['NotAvailable'][_item] += _num
 
+        # Record items that were used
+        for _item, _num in items_used.items():
+            self._items['Used'][_item] += _num
+
     def write_to_log_and_reset_counters(self):
         """Log summary statistics and reset the data structures."""
 
@@ -377,6 +404,7 @@ def write_to_log_and_reset_counters(self):
             data={
                 "Item_Available": self._items['Available'],
                 "Item_NotAvailable": self._items['NotAvailable'],
+                "Item_Used": self._items['Used'],
             },
         )
 
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index fe57444feb..1dbd47fb63 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -165,7 +165,7 @@ class HealthSystem(Module):
         'use_funded_or_actual_staffing': Parameter(
             Types.STRING, "If `actual`, then use the numbers and distribution of staff estimated to be available"
                           " currently; If `funded`, then use the numbers and distribution of staff that are "
-                          "potentially available. If 'funded_plus`, then use a dataset in which the allocation of "
+                          "potentially available. If `funded_plus`, then use a dataset in which the allocation of "
                           "staff to facilities is tweaked so as to allow each appointment type to run at each "
                           "facility_level in each district for which it is defined. N.B. This parameter is "
                           "over-ridden if an argument is provided to the module initialiser.",
@@ -236,6 +236,9 @@ class HealthSystem(Module):
         'year_cons_availability_switch': Parameter(
             Types.INT, "Year in which consumable availability switch is enforced. The change happens"
                        "on 1st January of that year.)"),
+        'year_use_funded_or_actual_staffing_switch': Parameter(
+            Types.INT, "Year in which switch for `use_funded_or_actual_staffing` is enforced. (The change happens"
+                       "on 1st January of that year.)"),
         'priority_rank': Parameter(
             Types.DICT, "Data on the priority ranking of each of the Treatment_IDs to be adopted by "
                         " the queueing system under different policies, where the lower the number the higher"
@@ -323,7 +326,10 @@ class HealthSystem(Module):
             Types.INT, 'Mode considered after a mode switch in year_mode_switch.'),
         'cons_availability_postSwitch': Parameter(
             Types.STRING, 'Consumables availability after switch in `year_cons_availability_switch`. Acceptable values'
-                          'are the same as those for Parameter `cons_availability`.')
+                          'are the same as those for Parameter `cons_availability`.'),
+        'use_funded_or_actual_staffing_postSwitch': Parameter(
+            Types.STRING, 'Staffing availability after switch in `year_use_funded_or_actual_staffing_switch`. '
+                          'Acceptable values are the same as those for Parameter `use_funded_or_actual_staffing`.'),
     }
 
     PROPERTIES = {
@@ -447,10 +453,9 @@ def __init__(
             assert isinstance(capabilities_coefficient, float)
         self.capabilities_coefficient = capabilities_coefficient
 
-        # Find which set of assumptions to use - those for the actual staff available or the funded staff available
-        if use_funded_or_actual_staffing is not None:
-            assert use_funded_or_actual_staffing in ['actual', 'funded', 'funded_plus']
+        # Save argument for assumptions to use for 'use_funded_or_actual_staffing`
         self.arg_use_funded_or_actual_staffing = use_funded_or_actual_staffing
+        self._use_funded_or_actual_staffing = None  # <-- this is the private internal store of the value that is used.
 
         # Define (empty) list of registered disease modules (filled in at `initialise_simulation`)
         self.recognised_modules_names = []
@@ -637,9 +642,15 @@ def pre_initialise_population(self):
         # Determine service_availability
         self.service_availability = self.get_service_availability()
 
-        self.process_human_resources_files(
-            use_funded_or_actual_staffing=self.get_use_funded_or_actual_staffing()
-        )
+        # Process health system organisation files (Facilities, Appointment Types, Time Taken etc.)
+        self.process_healthsystem_organisation_files()
+
+        # Set value for `use_funded_or_actual_staffing` and process Human Resources Files
+        # (Initially set value should be equal to what is specified by the parameter, but overwritten with what was
+        # provided in argument if an argument was specified -- provided for backward compatibility/debugging.)
+        self.use_funded_or_actual_staffing = self.parameters['use_funded_or_actual_staffing'] \
+            if self.arg_use_funded_or_actual_staffing is None \
+            else self.arg_use_funded_or_actual_staffing
 
         # Initialise the BedDays class
         self.bed_days = BedDays(hs_module=self,
@@ -738,6 +749,16 @@ def initialise_simulation(self, sim):
             Date(self.parameters["year_equip_availability_switch"], 1, 1)
         )
 
+        # Schedule an equipment availability switch
+        sim.schedule_event(
+            HealthSystemChangeParameters(
+                self,
+                parameters={
+                    'use_funded_or_actual_staffing': self.parameters['use_funded_or_actual_staffing_postSwitch']
+                }
+            ),
+            Date(self.parameters["year_use_funded_or_actual_staffing_switch"], 1, 1)
+        )
 
         # Schedule a one-off rescaling of _daily_capabilities broken down by officer type and level.
         # This occurs on 1st January of the year specified in the parameters.
@@ -754,6 +775,9 @@ def initialise_simulation(self, sim):
         # whilst the actual scaling will only take effect from 2011 onwards.
         sim.schedule_event(DynamicRescalingHRCapabilities(self), Date(sim.date))
 
+        # Schedule the logger to occur at the start of every year
+        sim.schedule_event(HealthSystemLogger(self), Date(sim.date.year, 1, 1))
+
     def on_birth(self, mother_id, child_id):
         self.bed_days.on_birth(self.sim.population.props, mother_id, child_id)
 
@@ -809,8 +833,15 @@ def setup_priority_policy(self):
         if 'Tb' in self.sim.modules:
             self.list_fasttrack.append(('tb_diagnosed', 'FT_if_tbdiagnosed'))
 
-    def process_human_resources_files(self, use_funded_or_actual_staffing: str):
-        """Create the data-structures needed from the information read into the parameters."""
+    def process_healthsystem_organisation_files(self):
+        """Create the data-structures needed from the information read into the parameters:
+         * self._facility_levels
+         * self._appointment_types
+         * self._appt_times
+         * self._appt_type_by_facLevel
+         * self._facility_by_facility_id
+         * self._facilities_for_each_district
+        """
 
         # * Define Facility Levels
         self._facility_levels = set(self.parameters['Master_Facilities_List']['Facility_Level']) - {'5'}
@@ -903,23 +934,26 @@ def process_human_resources_files(self, use_funded_or_actual_staffing: str):
         self._facility_by_facility_id = facilities_by_facility_id
         self._facilities_for_each_district = facilities_per_level_and_district
 
+    def setup_daily_capabilities(self, use_funded_or_actual_staffing):
+        """Set up `self._daily_capabilities` and `self._officers_with_availability`.
+        This is called when the value for `use_funded_or_actual_staffing` is set - at the beginning of the simulation
+         and when the assumption when the underlying assumption for `use_funded_or_actual_staffing` is updated"""
         # * Store 'DailyCapabilities' in correct format and using the specified underlying assumptions
-        self._daily_capabilities = self.format_daily_capabilities(use_funded_or_actual_staffing)
+        self._daily_capabilities, self._daily_capabilities_per_staff = self.format_daily_capabilities(use_funded_or_actual_staffing)
 
         # Also, store the set of officers with non-zero daily availability
         # (This is used for checking that scheduled HSI events do not make appointment requiring officers that are
         # never available.)
         self._officers_with_availability = set(self._daily_capabilities.index[self._daily_capabilities > 0])
 
-    def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Series:
+    def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple[pd.Series,pd.Series]:
         """
-        This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to include
-        every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity
+        This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to:
+        1. include every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity
         is available.
-
-        It also give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type)
-
+        2. Give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type)
         (This is so that its easier to track where demands are being placed where there is no capacity)
+        3. Compute daily capabilities per staff. This will be used to compute staff count in a way that is independent of assumed efficiency.
         """
 
         # Get the capabilities data imported (according to the specified underlying assumptions).
@@ -928,6 +962,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se
         )
         capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'})  # neaten
 
+        # Create new column where capabilities per staff are computed
+        capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count']
+
+
         # Create dataframe containing background information about facility and officer types
         facility_ids = self.parameters['Master_Facilities_List']['Facility_ID'].values
         officer_type_codes = set(self.parameters['Officer_Types_Table']['Officer_Category'].values)
@@ -947,6 +985,9 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se
         mfl = self.parameters['Master_Facilities_List']
         capabilities_ex = capabilities_ex.merge(mfl, on='Facility_ID', how='left')
 
+        # Create a copy of this to store staff counts
+        capabilities_per_staff_ex = capabilities_ex.copy()
+
         # Merge in information about officers
         # officer_types = self.parameters['Officer_Types_Table'][['Officer_Type_Code', 'Officer_Type']]
         # capabilities_ex = capabilities_ex.merge(officer_types, on='Officer_Type_Code', how='left')
@@ -960,6 +1001,13 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se
         )
         capabilities_ex = capabilities_ex.fillna(0)
 
+        capabilities_per_staff_ex = capabilities_per_staff_ex.merge(
+            capabilities[['Facility_ID', 'Officer_Type_Code', 'Mins_Per_Day_Per_Staff']],
+            on=['Facility_ID', 'Officer_Type_Code'],
+            how='left',
+        )
+        capabilities_per_staff_ex = capabilities_per_staff_ex.fillna(0)
+
         # Give the standard index:
         capabilities_ex = capabilities_ex.set_index(
             'FacilityID_'
@@ -968,15 +1016,24 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se
             + capabilities_ex['Officer_Type_Code']
         )
 
+        # Give the standard index:
+        capabilities_per_staff_ex = capabilities_per_staff_ex.set_index(
+            'FacilityID_'
+            + capabilities_ex['Facility_ID'].astype(str)
+            + '_Officer_'
+            + capabilities_ex['Officer_Type_Code']
+        )
+
         # Rename 'Total_Minutes_Per_Day'
         capabilities_ex = capabilities_ex.rename(columns={'Total_Mins_Per_Day': 'Total_Minutes_Per_Day'})
 
         # Checks
         assert abs(capabilities_ex['Total_Minutes_Per_Day'].sum() - capabilities['Total_Mins_Per_Day'].sum()) < 1e-7
         assert len(capabilities_ex) == len(facility_ids) * len(officer_type_codes)
+        assert len(capabilities_per_staff_ex) == len(facility_ids) * len(officer_type_codes)
 
         # return the pd.Series of `Total_Minutes_Per_Day' indexed for each type of officer at each facility
-        return capabilities_ex['Total_Minutes_Per_Day']
+        return capabilities_ex['Total_Minutes_Per_Day'], capabilities_per_staff_ex['Mins_Per_Day_Per_Staff']
 
     def _rescale_capabilities_to_capture_effective_capability(self):
         # Notice that capabilities will only be expanded through this process
@@ -999,6 +1056,11 @@ def _rescale_capabilities_to_capture_effective_capability(self):
             if rescaling_factor > 1 and rescaling_factor != float("inf"):
                 self._daily_capabilities[officer] *= rescaling_factor
 
+                # We assume that increased daily capabilities is a result of each staff performing more
+                # daily patient facing time per day than contracted (or equivalently performing appts more
+                # efficiently).
+                self._daily_capabilities_per_staff[officer] *= rescaling_factor
+
     def update_consumables_availability_to_represent_merging_of_levels_1b_and_2(self, df_original):
         """To represent that facility levels '1b' and '2' are merged together under the label '2', we replace the
         availability of consumables at level 2 with new values."""
@@ -1142,13 +1204,17 @@ def get_mode_appt_constraints(self) -> int:
             if self.arg_mode_appt_constraints is None \
             else self.arg_mode_appt_constraints
 
-    def get_use_funded_or_actual_staffing(self) -> str:
-        """Returns `use_funded_or_actual_staffing`. (Should be equal to what is specified by the parameter, but
-        overwrite with what was provided in argument if an argument was specified -- provided for backward
-        compatibility/debugging.)"""
-        return self.parameters['use_funded_or_actual_staffing'] \
-            if self.arg_use_funded_or_actual_staffing is None \
-            else self.arg_use_funded_or_actual_staffing
+    @property
+    def use_funded_or_actual_staffing(self) -> str:
+        """Returns value for `use_funded_or_actual_staffing`."""
+        return self._use_funded_or_actual_staffing
+
+    @use_funded_or_actual_staffing.setter
+    def use_funded_or_actual_staffing(self, use_funded_or_actual_staffing) -> str:
+        """Set value for `use_funded_or_actual_staffing` and update the daily_capabilities accordingly. """
+        assert use_funded_or_actual_staffing in ['actual', 'funded', 'funded_plus']
+        self._use_funded_or_actual_staffing = use_funded_or_actual_staffing
+        self.setup_daily_capabilities(self._use_funded_or_actual_staffing)
 
     def get_priority_policy_initial(self) -> str:
         """Returns `priority_policy`. (Should be equal to what is specified by the parameter, but
@@ -1172,8 +1238,13 @@ def load_priority_policy(self, policy):
             ].iloc[0]
 
             # Convert policy dataframe into dictionary to speed-up look-up process.
-            self.priority_rank_dict = \
-                Policy_df.set_index("Treatment", drop=True).to_dict(orient="index")
+            self.priority_rank_dict = (
+                Policy_df.set_index("Treatment", drop=True)
+                # Standardize dtypes to ensure any integers represented as floats are
+                # converted to integer dtypes
+                .convert_dtypes()
+                .to_dict(orient="index")
+            )
             del self.priority_rank_dict["lowest_priority_considered"]
 
     def schedule_hsi_event(
@@ -1747,7 +1818,7 @@ def write_to_never_ran_hsi_log(
                 'Number_By_Appt_Type_Code': dict(event_details.appt_footprint),
                 'Person_ID': person_id,
                 'priority': priority,
-                'Facility_Level': event_details.facility_level if event_details.facility_level is not None else -99,
+                'Facility_Level': event_details.facility_level if event_details.facility_level is not None else "-99",
                 'Facility_ID': facility_id if facility_id is not None else -99,
             },
             description="record of each HSI event that never ran"
@@ -2425,8 +2496,7 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None:
                                         )
 
                             # Update today's footprint based on actual call and squeeze factor
-                            self.module.running_total_footprint -= original_call
-                            self.module.running_total_footprint += updated_call
+                            self.module.running_total_footprint.update(updated_call)
 
                             # Write to the log
                             self.module.record_hsi_event(
@@ -2613,6 +2683,11 @@ def _reset_internal_stores(self) -> None:
         self._appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
         # <--Same as `self._appts` but also split by facility_level
 
+        # Log HSI_Events that have a non-blank appointment footprint
+        self._no_blank_appt_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s with non-blank footprint
+        self._no_blank_appt_appts = defaultdict(int)  # As above, but for `HSI_Event`s that with non-blank footprint
+        self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
+
         # Log HSI_Events that never ran to monitor shortcoming of Health System
         self._never_ran_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s that never ran
         self._never_ran_appts = defaultdict(int)  # As above, but for `HSI_Event`s that have never ran
@@ -2646,6 +2721,13 @@ def record_hsi_event(self,
             self._appts[appt_type] += number
             self._appts_by_level[level][appt_type] += number
 
+        # Count the non-blank appointment footprints
+        if len(appt_footprint):
+            self._no_blank_appt_treatment_ids[treatment_id] += 1
+            for appt_type, number in appt_footprint:
+                self._no_blank_appt_appts[appt_type] += number
+                self._no_blank_appt_by_level[level][appt_type] += number
+
     def record_never_ran_hsi_event(self,
                                    treatment_id: str,
                                    hsi_event_name: str,
@@ -2690,6 +2772,15 @@ def write_to_log_and_reset_counters(self):
                 }
             },
         )
+        logger_summary.info(
+            key="HSI_Event_non_blank_appt_footprint",
+            description="Same as for key 'HSI_Event' but limited to HSI_Event that have non-blank footprints",
+            data={
+            "TREATMENT_ID": self._no_blank_appt_treatment_ids,
+            "Number_By_Appt_Type_Code": self._no_blank_appt_appts,
+            "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level,
+            },
+        )
 
         # Log summary of HSI_Events that never ran
         logger_summary.info(
@@ -2763,6 +2854,7 @@ class HealthSystemChangeParameters(Event, PopulationScopeEventMixin):
         * `cons_availability`
         * `beds_availability`
         * `equip_availability`
+        * `use_funded_or_actual_staffing`
     Note that no checking is done here on the suitability of values of each parameter."""
 
     def __init__(self, module: HealthSystem, parameters: Dict):
@@ -2784,11 +2876,17 @@ def apply(self, population):
             self.module.consumables.availability = self._parameters['cons_availability']
 
         if 'beds_availability' in self._parameters:
-            self.module.bed_days.availability = self._parameters['beds_availability']
+            self.module.bed_days.switch_beddays_availability(
+                new_availability=self._parameters["beds_availability"],
+                effective_on_and_from=self.sim.date,
+                model_to_data_popsize_ratio=self.sim.modules["Demography"].initial_model_to_data_popsize_ratio
+            )
 
         if 'equip_availability' in self._parameters:
             self.module.equipment.availability = self._parameters['equip_availability']
 
+        if 'use_funded_or_actual_staffing' in self._parameters:
+            self.module.use_funded_or_actual_staffing = self._parameters['use_funded_or_actual_staffing']
 
 class DynamicRescalingHRCapabilities(RegularEvent, PopulationScopeEventMixin):
     """ This event exists to scale the daily capabilities assumed at fixed time intervals"""
@@ -2901,3 +2999,34 @@ def apply(self, population):
                          f"Now using mode: "
                          f"{self.module.mode_appt_constraints}"
                     )
+
+
+class HealthSystemLogger(RegularEvent, PopulationScopeEventMixin):
+    """ This event runs at the start of each year and does any logging jobs for the HealthSystem module."""
+
+    def __init__(self, module):
+        super().__init__(module, frequency=DateOffset(years=1))
+
+    def apply(self, population):
+        """Things to do at the start of the year"""
+        self.log_number_of_staff()
+
+    def log_number_of_staff(self):
+        """Write to the summary log with the counts of staff (by cadre/facility/level) taking into account:
+         * Any scaling of capabilities that has taken place, year-by-year, or cadre-by-cadre
+         * Any re-scaling that has taken place at the transition into Mode 2.
+        """
+
+        hs = self.module  # HealthSystem module
+
+        # Compute staff counts from available capabilities (hs.capabilities_today) and daily capabilities per staff,
+        # both of which would have been rescaled to current efficiency levels if scale_to_effective_capabilities=True
+        # This returns the number of staff counts normalised by the self.capabilities_coefficient parameter
+        current_staff_count = dict((hs.capabilities_today/hs._daily_capabilities_per_staff).sort_index())
+
+        logger_summary.info(
+            key="number_of_hcw_staff",
+            description="The number of hcw_staff this year",
+            data=current_staff_count,
+        )
+
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 5daa6e66f9..c252a40974 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -127,13 +127,6 @@ def bed_days_allocated_to_this_event(self):
 
         return self._received_info_about_bed_days
 
-    @property
-    def target_is_alive(self) -> bool:
-        """Return True if the target of this HSI event is alive,
-        otherwise False.
-        """
-        return self.sim.population.props.at[self.target, "is_alive"]
-
     @property
     def sim(self) -> Simulation:
         return self.module.sim
@@ -232,7 +225,8 @@ def get_consumables(
 
         # Checking the availability and logging:
         rtn = self.healthcare_system.consumables._request_consumables(
-            item_codes={**_item_codes, **_optional_item_codes},
+            essential_item_codes=_item_codes,
+            optional_item_codes=_optional_item_codes,
             to_log=_to_log,
             facility_info=self.facility_info,
             treatment_id=self.TREATMENT_ID,
@@ -365,7 +359,7 @@ def _check_if_appt_footprint_can_run(self) -> bool:
             ):
                 return True
             else:
-                logger.warning(
+                logger.debug(
                     key="message",
                     data=(
                         f"The expected footprint of {self.TREATMENT_ID} is not possible with the configuration of "
diff --git a/tests/test_consumables.py b/tests/test_consumables.py
index 6eee6dac38..c45f1532ed 100644
--- a/tests/test_consumables.py
+++ b/tests/test_consumables.py
@@ -61,12 +61,12 @@ def test_using_recognised_item_codes(seed):
 
     # Make requests for consumables (which would normally come from an instance of `HSI_Event`).
     rtn = cons._request_consumables(
-        item_codes={0: 1, 1: 1},
+        essential_item_codes={0: 1, 1: 1},
         facility_info=facility_info_0
     )
 
     assert {0: False, 1: True} == rtn
-    assert not cons._not_recognised_item_codes  # No item_codes recorded as not recognised.
+    assert len(cons._not_recognised_item_codes) == 0  # No item_codes recorded as not recognised.
 
 
 def test_unrecognised_item_code_is_recorded(seed):
@@ -88,12 +88,12 @@ def test_unrecognised_item_code_is_recorded(seed):
 
     # Make requests for consumables (which would normally come from an instance of `HSI_Event`).
     rtn = cons._request_consumables(
-        item_codes={99: 1},
+        essential_item_codes={99: 1},
         facility_info=facility_info_0
     )
 
     assert isinstance(rtn[99], bool)
-    assert cons._not_recognised_item_codes  # Some item_codes recorded as not recognised.
+    assert len(cons._not_recognised_item_codes) > 0  # Some item_codes recorded as not recognised.
 
     # Check warning is issued at end of simulation
     with pytest.warns(UserWarning) as recorded_warnings:
@@ -128,7 +128,8 @@ def test_consumables_availability_options(seed):
         cons.on_start_of_day(date=date)
 
         assert _expected_result == cons._request_consumables(
-            item_codes={_item_code: 1 for _item_code in all_items_request}, to_log=False, facility_info=facility_info_0
+            essential_item_codes={_item_code: 1 for _item_code in all_items_request},
+            to_log=False, facility_info=facility_info_0
         )
 
 
@@ -153,7 +154,8 @@ def request_item(cons, item_code: Union[list, int]):
             item_code = [item_code]
 
         return all(cons._request_consumables(
-            item_codes={_i: 1 for _i in item_code}, to_log=False, facility_info=facility_info_0
+            essential_item_codes={_i: 1 for _i in item_code},
+            to_log=False, facility_info=facility_info_0
         ).values())
 
     rng = get_rng(seed)
@@ -250,7 +252,7 @@ def test_consumables_available_at_right_frequency(seed):
     for _ in range(n_trials):
         cons.on_start_of_day(date=date)
         rtn = cons._request_consumables(
-            item_codes=requested_items,
+            essential_item_codes=requested_items,
             facility_info=facility_info_0,
         )
         for _i in requested_items:
@@ -273,6 +275,47 @@ def is_obs_frequency_consistent_with_expected_probability(n_obs, n_trials, p):
                                                                  p=average_availability_of_known_items)
 
 
+@pytest.mark.parametrize("p_known_items, expected_items_used", [
+    # Test 1
+    ({0: 0.0, 1: 1.0, 2: 1.0, 3: 1.0}, {}),
+    # Test 2
+    ({0: 1.0, 1: 1.0, 2: 0.0, 3: 1.0}, {0: 5, 1: 10, 3: 2})
+])
+def test_items_used_includes_only_available_items(seed, p_known_items, expected_items_used):
+    """
+    Test that 'items_used' includes only items that are available.
+    Items should only be logged if the essential items are ALL available
+    If essential items are available, then optional items can be logged as items_used if available
+    Test 1: should not have any items_used as essential item 0 is not available
+    Test 2: should have essential items logged as items_used, but optional item 2 is not available
+    """
+
+    data = create_dummy_data_for_cons_availability(
+        intrinsic_availability=p_known_items,
+        months=[1],
+        facility_ids=[0]
+    )
+    rng = get_rng(seed)
+    date = datetime.datetime(2010, 1, 1)
+
+    cons = Consumables(availability_data=data, rng=rng)
+
+    # Define essential and optional item codes
+    essential_item_codes = {0: 5, 1: 10}  # these must match parameters above
+    optional_item_codes = {2: 7, 3: 2}
+
+    cons.on_start_of_day(date=date)
+    cons._request_consumables(
+        essential_item_codes=essential_item_codes,
+        optional_item_codes=optional_item_codes,
+        facility_info=facility_info_0,
+    )
+
+    # Access items used from the Consumables summary counter
+    items_used = getattr(cons._summary_counter, '_items', {}).get('Used')
+    assert items_used == expected_items_used, f"Expected items_used to be {expected_items_used}, but got {items_used}"
+
+
 def get_sim_with_dummy_module_registered(tmpdir=None, run=True, data=None):
     """Return an initialised simulation object with a Dummy Module registered. If the `data` argument is provided,
     the parameter in HealthSystem that holds the data on consumables availability is over-written."""
@@ -321,7 +364,7 @@ def initialise_simulation(self, sim):
     return sim
 
 
-def get_dummy_hsi_event_instance(module, facility_id=None):
+def get_dummy_hsi_event_instance(module, facility_id=None, to_log=False):
     """Make an HSI Event that runs for person_id=0 in a particular facility_id and requests consumables,
     and for which its parent is the identified module."""
 
@@ -340,7 +383,7 @@ def apply(self, person_id, squeeze_factor):
             """Requests all recognised consumables."""
             self.get_consumables(
                 item_codes=list(self.sim.modules['HealthSystem'].consumables.item_codes),
-                to_log=True,
+                to_log=to_log,
                 return_individual_results=False
             )
 
@@ -446,7 +489,7 @@ def schedule_hsi_that_will_request_consumables(sim):
 
         # Schedule the HSI event for person_id=0
         sim.modules['HealthSystem'].schedule_hsi_event(
-            hsi_event=get_dummy_hsi_event_instance(module=sim.modules['DummyModule'], facility_id=0),
+            hsi_event=get_dummy_hsi_event_instance(module=sim.modules['DummyModule'], facility_id=0, to_log=True),
             topen=sim.start_date,
             tclose=None,
             priority=0
@@ -500,12 +543,12 @@ def test_every_declared_consumable_for_every_possible_hsi_using_actual_data(recw
                     facility_id=_facility_id
                 )
                 for _item_code in item_codes:
-                    hsi_event.get_consumables(item_codes=_item_code)
+                    hsi_event.get_consumables(item_codes=_item_code, to_log=False)
 
     sim.modules['HealthSystem'].on_simulation_end()
 
-    # Check that no warnings raised or item_codes recorded as being not recogised.
-    assert not sim.modules['HealthSystem'].consumables._not_recognised_item_codes
+    # Check that no warnings raised or item_codes recorded as being not recognised.
+    assert len(sim.modules['HealthSystem'].consumables._not_recognised_item_codes) == 0
     assert not any_warnings_about_item_code(recwarn)
 
 
diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 801150fdcc..6eeabc4995 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -952,7 +952,7 @@ def apply(self, person_id, squeeze_factor):
             } == set(detailed_hsi_event.columns)
     assert {'date', 'Frac_Time_Used_Overall', 'Frac_Time_Used_By_Facility_ID', 'Frac_Time_Used_By_OfficerType',
             } == set(detailed_capacity.columns)
-    assert {'date', 'TREATMENT_ID', 'Item_Available', 'Item_NotAvailable'
+    assert {'date', 'TREATMENT_ID', 'Item_Available', 'Item_NotAvailable', 'Item_Used'
             } == set(detailed_consumables.columns)
 
     bed_types = sim.modules['HealthSystem'].bed_days.bed_types
@@ -1019,6 +1019,9 @@ def dict_all_close(dict_1, dict_2):
     assert summary_consumables['Item_NotAvailable'].apply(pd.Series).sum().to_dict() == \
            detailed_consumables['Item_NotAvailable'].apply(
                lambda x: {f'{k}': v for k, v in eval(x).items()}).apply(pd.Series).sum().to_dict()
+    assert summary_consumables['Item_Used'].apply(pd.Series).sum().to_dict() == \
+           detailed_consumables['Item_Used'].apply(
+               lambda x: {f'{k}': v for k, v in eval(x).items()}).apply(pd.Series).sum().to_dict()
 
     #  - Bed-Days (bed-type by bed-type and year by year)
     for _bed_type in bed_types:
@@ -1347,6 +1350,7 @@ def test_HealthSystemChangeParameters(seed, tmpdir):
         'cons_availability': 'all',
         'beds_availability': 'default',
         'equip_availability': 'default',
+        'use_funded_or_actual_staffing': 'funded_plus',
     }
     new_parameters = {
         'mode_appt_constraints': 2,
@@ -1355,6 +1359,7 @@ def test_HealthSystemChangeParameters(seed, tmpdir):
         'cons_availability': 'none',
         'beds_availability': 'none',
         'equip_availability': 'all',
+        'use_funded_or_actual_staffing': 'actual',
     }
 
     class CheckHealthSystemParameters(RegularEvent, PopulationScopeEventMixin):
@@ -1371,6 +1376,7 @@ def apply(self, population):
             _params['cons_availability'] = hs.consumables.availability
             _params['beds_availability'] = hs.bed_days.availability
             _params['equip_availability'] = hs.equipment.availability
+            _params['use_funded_or_actual_staffing'] = hs.use_funded_or_actual_staffing
 
             logger = logging.getLogger('tlo.methods.healthsystem')
             logger.info(key='CheckHealthSystemParameters', data=_params)
@@ -2514,3 +2520,122 @@ def run_sim(dynamic_HR_scaling_factor: Dict[int, float]) -> tuple:
     ratio_in_sim = caps / initial_caps
 
     assert np.allclose(ratio_in_sim, expected_overall_scaling)
+
+
+def test_scaling_up_HRH_using_yearly_scaling_and_scaling_by_level_together(seed):
+    """We want the behaviour of HRH 'yearly scaling' and 'scaling_by_level' to operate together, so that, for instance,
+    the total capabilities is greater when scaling up by level _and_ by yearly-scaling than by using either
+    independently."""
+
+    def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bool) -> float:
+        """Return total capabilities of HRH when optionally using 'yearly scaling' and/or 'scaling_by_level'"""
+        sim = Simulation(start_date=start_date, seed=seed)
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath),
+            simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+        )
+        params = sim.modules['HealthSystem'].parameters
+
+        # In Mode 1, from the beginning.
+        params["mode_appt_constraints"] = 1
+
+        if yearly_scaling:
+            params['yearly_HR_scaling_mode'] = 'GDP_growth_fHE_case5'
+            # This is above-GDP growth after 2018 (baseline year for HRH)
+
+        if scaling_by_level:
+            params['year_HR_scaling_by_level_and_officer_type'] = 2018  # <--  same time as yearly-scaling
+            params['HR_scaling_by_level_and_officer_type_mode'] = 'x2_fac0&1'
+
+        if rescaling:
+            # Switch to Mode 2, with the rescaling, at the same time as the other changes occur
+            params["mode_appt_constraints_postSwitch"] = 2
+            params["scale_to_effective_capabilities"] = True
+            params["year_mode_switch"] = 2018
+
+        popsize = 100
+        sim.make_initial_population(n=popsize)
+        sim.simulate(end_date=sim.date + pd.DateOffset(years=10, days=1))  # run simulation until at least past 2018
+
+        return sim.modules['HealthSystem'].capabilities_today.sum()
+
+    # - When running without any rescaling
+    caps_only_scaling_by_level = get_capabilities(yearly_scaling=False, scaling_by_level=True, rescaling=False)
+    caps_only_scaling_by_year = get_capabilities(yearly_scaling=True, scaling_by_level=False, rescaling=False)
+    caps_scaling_by_both = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=False)
+    assert caps_scaling_by_both > caps_only_scaling_by_level
+    assert caps_scaling_by_both > caps_only_scaling_by_year
+
+    # - When there is also rescaling as we go from Mode 2 into Mode 1
+    caps_only_scaling_by_level_with_rescaling = get_capabilities(yearly_scaling=False, scaling_by_level=True, rescaling=True)
+    caps_only_scaling_by_year_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=False, rescaling=True)
+    caps_scaling_by_both_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=True)
+    assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_level_with_rescaling
+    assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling
+
+
+def test_logging_of_only_hsi_events_with_non_blank_footprints(tmpdir):
+    """Run the simulation with an HSI_Event that may have a blank_footprint and examine the healthsystem.summary logger.
+     * If the footprint is blank, the HSI event should be recorded in the usual loggers but not the 'no_blank' logger
+     * If the footprint is non-blank, the HSI event should be recorded in the usual and the 'no_blank' loggers.
+     """
+
+    def run_simulation_and_return_healthsystem_summary_log(tmpdir: Path, blank_footprint: bool) -> dict:
+        """Return the `healthsystem.summary` logger for a simulation. In that simulation, there is HSI_Event run on the
+        first day of the simulation and its `EXPECTED_APPT_FOOTPRINT` may or may not be blank. The simulation is run for one
+        year in order that the summary logger is active (it runs annually)."""
+
+        class HSI_Dummy(HSI_Event, IndividualScopeEventMixin):
+            def __init__(self, module, person_id, _is_footprint_blank):
+                super().__init__(module, person_id=person_id)
+                self.TREATMENT_ID = 'Dummy'
+                self.ACCEPTED_FACILITY_LEVEL = '0'
+                self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({}) if blank_footprint \
+                    else self.make_appt_footprint({'ConWithDCSA': 1})
+
+            def apply(self, person_id, squeeze_factor):
+                pass
+
+        class DummyModule(Module):
+            METADATA = {Metadata.DISEASE_MODULE}
+
+            def read_parameters(self, data_folder):
+                pass
+
+            def initialise_population(self, population):
+                pass
+
+            def initialise_simulation(self, sim):
+                hsi_event = HSI_Dummy(module=self, person_id=0, _is_footprint_blank=blank_footprint)
+                sim.modules['HealthSystem'].schedule_hsi_event(hsi_event=hsi_event, topen=sim.date, priority=0)
+
+        start_date = Date(2010, 1, 1)
+        sim = Simulation(start_date=start_date, seed=0, log_config={'filename': 'tmp', 'directory': tmpdir})
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath, mode_appt_constraints=0),
+            DummyModule(),
+            # Disable sorting + checks to avoid error due to missing dependencies
+            sort_modules=False,
+            check_all_dependencies=False
+        )
+        sim.make_initial_population(n=100)
+        sim.simulate(end_date=sim.start_date + pd.DateOffset(years=1))
+
+        return parse_log_file(sim.log_filepath)['tlo.methods.healthsystem.summary']
+    # When the footprint is blank:
+    log = run_simulation_and_return_healthsystem_summary_log(tmpdir, blank_footprint=True)
+    assert log['HSI_Event']['TREATMENT_ID'].iloc[0] == {'Dummy': 1}  # recorded in usual logger
+    assert log['HSI_Event_non_blank_appt_footprint']['TREATMENT_ID'].iloc[0] == {}  # not recorded in 'non-blank' logger
+
+    # When the footprint is non-blank:
+    log = run_simulation_and_return_healthsystem_summary_log(tmpdir, blank_footprint=False)
+    assert not log['HSI_Event'].empty
+    assert 'TREATMENT_ID' in log['HSI_Event'].columns
+    assert 'TREATMENT_ID' in log['HSI_Event_non_blank_appt_footprint'].columns
+    assert(    log['HSI_Event']['TREATMENT_ID'].iloc[0]
+        == log['HSI_Event_non_blank_appt_footprint']['TREATMENT_ID'].iloc[0]
+        == {'Dummy': 1}
+        # recorded in both the usual and the 'non-blank' logger
+    )

From ee4e8bd1aa1404f5fa69d5721e58d77771c707ef Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 20:51:28 +0000
Subject: [PATCH 196/230] merge in changes from master - scenario

---
 src/tlo/scenario.py | 124 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 110 insertions(+), 14 deletions(-)

diff --git a/src/tlo/scenario.py b/src/tlo/scenario.py
index d92f0b4aef..f64325f9ec 100644
--- a/src/tlo/scenario.py
+++ b/src/tlo/scenario.py
@@ -60,17 +60,20 @@ def draw_parameters(self, draw_number, rng):
 """
 
 import abc
+import argparse
 import datetime
 import json
 import pickle
+from collections.abc import Iterable
 from itertools import product
 from pathlib import Path, PurePosixPath
-from typing import Optional
+from typing import List, Optional
 
 import numpy as np
 
 from tlo import Date, Simulation, logging
 from tlo.analysis.utils import parse_log_file
+from tlo.util import str_to_pandas_date
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -125,6 +128,57 @@ def __init__(
         self.resources = resources_path
         self.rng = None
         self.scenario_path = None
+        self.arguments = []
+
+    def parse_arguments(self, extra_arguments: List[str]) -> None:
+        """Base class command line arguments handling for scenarios. This should not be overridden by subclasses.
+        Subclasses can add argument handling to their classes by implementing the `add_arguments` method."""
+
+        if extra_arguments is None:
+            return
+
+        assert isinstance(extra_arguments, Iterable), "Arguments must be a list of strings"
+
+        self.arguments = extra_arguments
+
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--resume-simulation",
+            type=str,
+            help="Directory containing suspended state files to resume simulation from",
+        )
+        parser.add_argument(
+            "--suspend-date",
+            type=str_to_pandas_date,
+            help="Date to suspend the simulation at",
+        )
+
+        # add arguments from the subclass
+        self.add_arguments(parser)
+
+        arguments = parser.parse_args(self.arguments)
+
+        # set the arguments as attributes of the scenario
+        for key, value in vars(arguments).items():
+            if value is not None:
+                if hasattr(self, key):
+                    logger.info(key="message", data=f"Overriding attribute: {key}: {getattr(self, key)} -> {value}")
+                setattr(self, key, value)
+
+    def add_arguments(self, parser: argparse.ArgumentParser) -> None:
+        """Add scenario-specific arguments that can be passed to scenario from the command line.
+
+        This method is called to add scenario-specific arguments to the command line parser. The method should add
+        arguments to the parser using the `add_argument` method. Arguments that have a value of None are not set or
+        overridden.
+
+        :param parser: An instance of `argparse.ArgumentParser` to which arguments should be added.
+
+        Example::
+
+            parser.add_argument('--pop-size', type=int, default=20_000, help='Population size')
+        """
+        pass
 
     @abc.abstractmethod
     def log_configuration(self, **kwargs):
@@ -282,11 +336,13 @@ def get_run_config(self, scenario_path):
         return {
             "scenario_script_path": str(PurePosixPath(scenario_path)),
             "scenario_seed": self.scenario.seed,
+            "arguments": self.scenario.arguments,
             "runs_per_draw": self.runs_per_draw,
             "draws": self.draws,
         }
 
-    def save_config(self, config, output_path):
+    @staticmethod
+    def save_config(config, output_path):
         with open(output_path, "w") as f:
             f.write(json.dumps(config, indent=2))
 
@@ -297,6 +353,8 @@ def __init__(self, run_configuration_path):
         with open(run_configuration_path, "r") as f:
             self.run_config = json.load(f)
         self.scenario = ScenarioLoader(self.run_config["scenario_script_path"]).get_scenario()
+        if self.run_config["arguments"] is not None:
+            self.scenario.parse_arguments(self.run_config["arguments"])
         logger.info(key="message", data=f"Loaded scenario using {run_configuration_path}")
         logger.info(key="message", data=f"Found {self.number_of_draws} draws; {self.runs_per_draw} runs/draw")
 
@@ -335,20 +393,58 @@ def run_sample_by_number(self, output_directory, draw_number, sample_number):
         sample = self.get_sample(draw, sample_number)
         log_config = self.scenario.get_log_config(output_directory)
 
-        logger.info(key="message", data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}")
-
-        sim = Simulation(
-            start_date=self.scenario.start_date,
-            seed=sample["simulation_seed"],
-            log_config=log_config
+        logger.info(
+            key="message",
+            data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}",
         )
-        sim.register(*self.scenario.modules())
-
-        if sample["parameters"] is not None:
-            self.override_parameters(sim, sample["parameters"])
 
-        sim.make_initial_population(n=self.scenario.pop_size)
-        sim.simulate(end_date=self.scenario.end_date)
+        # if user has specified a restore simulation, we load it from a pickle file
+        if (
+            hasattr(self.scenario, "resume_simulation")
+            and self.scenario.resume_simulation is not None
+        ):
+            suspended_simulation_path = (
+                Path(self.scenario.resume_simulation)
+                / str(draw_number)
+                / str(sample_number)
+                / "suspended_simulation.pickle"
+            )
+            logger.info(
+                key="message",
+                data=f"Loading pickled suspended simulation from {suspended_simulation_path}",
+            )
+            sim = Simulation.load_from_pickle(pickle_path=suspended_simulation_path, log_config=log_config)
+        else:
+            sim = Simulation(
+                start_date=self.scenario.start_date,
+                seed=sample["simulation_seed"],
+                log_config=log_config,
+            )
+            sim.register(*self.scenario.modules())
+
+            if sample["parameters"] is not None:
+                self.override_parameters(sim, sample["parameters"])
+
+            sim.make_initial_population(n=self.scenario.pop_size)
+            sim.initialise(end_date=self.scenario.end_date)
+
+        # if user has specified a suspend date, we run the simulation to that date and
+        # save it to a pickle file
+        if (
+            hasattr(self.scenario, "suspend_date")
+            and self.scenario.suspend_date is not None
+        ):
+            sim.run_simulation_to(to_date=self.scenario.suspend_date)
+            suspended_simulation_path = Path(log_config["directory"]) / "suspended_simulation.pickle"
+            sim.save_to_pickle(pickle_path=suspended_simulation_path)
+            sim.close_output_file()
+            logger.info(
+                key="message",
+                data=f"Simulation suspended at {self.scenario.suspend_date} and saved to {suspended_simulation_path}",
+            )
+        else:
+            sim.run_simulation_to(to_date=self.scenario.end_date)
+            sim.finalise()
 
         if sim.log_filepath is not None:
             outputs = parse_log_file(sim.log_filepath)

From c5967c85033e3d923d72f1982794042c900fced5 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 26 Nov 2024 20:54:08 +0000
Subject: [PATCH 197/230] merge in changes from master - util

---
 src/tlo/util.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/src/tlo/util.py b/src/tlo/util.py
index cafd04f738..168b1d41a1 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -1,11 +1,12 @@
 """This file contains helpful utility functions."""
 import hashlib
 from collections import defaultdict
-from typing import Dict, List, Optional, Set, Union
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Union
 
 import numpy as np
 import pandas as pd
-from pandas import DateOffset
+from pandas import DataFrame, DateOffset
 
 from tlo import Population, Property, Types
 
@@ -407,6 +408,11 @@ def random_date(start, end, rng):
     return start + DateOffset(days=rng.randint(0, (end - start).days))
 
 
+def str_to_pandas_date(date_string):
+    """Convert a string with the format YYYY-MM-DD to a pandas Timestamp (aka TLO Date) object."""
+    return pd.to_datetime(date_string, format="%Y-%m-%d")
+
+
 def hash_dataframe(dataframe: pd.DataFrame):
     def coerce_lists_to_tuples(df: pd.DataFrame) -> pd.DataFrame:
         """Coerce columns in a pd.DataFrame that are lists to tuples. This step is needed before hashing a pd.DataFrame
@@ -431,3 +437,76 @@ def get_person_id_to_inherit_from(child_id, mother_id, population_dataframe, rng
         return abs(mother_id)
     elif mother_id >= 0:
         return mother_id
+
+
+def convert_excel_files_to_csv(folder: Path, files: Optional[list[str]] = None, *, delete_excel_files: bool = False) -> None:
+    """ convert Excel files to csv files.
+
+    :param folder: Folder containing Excel files.
+    :param files: List of Excel file names to convert to csv files. When `None`, all Excel files in the folder and
+                  subsequent folders within this folder will be converted to csv files with Excel file name becoming
+                  folder name and sheet names becoming csv file names.
+    :param delete_excel_files: When true, the Excel file we are generating csv files from will get deleted.
+    """
+    # get path to Excel files
+    if files is None:
+        excel_file_paths = sorted(folder.rglob("*.xlsx"))
+    else:
+        excel_file_paths = [folder / file for file in files]
+    # exit function if no Excel file is given or found within the path
+    if excel_file_paths is None:
+        return
+
+    for excel_file_path in excel_file_paths:
+        sheet_dataframes: dict[Any, DataFrame] = pd.read_excel(excel_file_path, sheet_name=None)
+        excel_file_directory: Path = excel_file_path.with_suffix("")
+        # Create a container directory for per sheet CSVs
+        if excel_file_directory.exists():
+            print(f"Directory {excel_file_directory} already exists")
+        else:
+            excel_file_directory.mkdir()
+        # Write a CSV for each worksheet
+        for sheet_name, dataframe in sheet_dataframes.items():
+            dataframe.to_csv(f'{excel_file_directory / sheet_name}.csv', index=False)
+
+        if delete_excel_files:
+            # Remove no longer needed Excel file
+            Path(folder/excel_file_path).unlink()
+
+
+def read_csv_files(folder: Path, files: Optional[list[str]] = None) -> DataFrame | dict[str, DataFrame]:
+    """
+    A function to read CSV files in a similar way pandas reads Excel files (:py:func:`pandas.read_excel`).
+
+    NB: Converting Excel files to csv files caused all columns that had no relevant data to simulation (i.e.
+    parameter descriptions or data references) to be named `Unnamed1, Unnamed2, ....., UnnamedN` in the csv files.
+    We are therefore using :py:func:`pandas.filter` to track all unnamed columns and silently drop them using
+    :py:func:`pandas.drop`.
+
+    :param folder: Path to folder containing CSV files to read.
+    :param files: preferred csv file name(s). This is the same as sheet names in Excel file. Note that if None(no files
+                  selected) then all files in the containing folder will be loaded
+
+    """
+    all_data: dict[str, DataFrame] = {}  # dataframes dictionary
+
+    def clean_dataframe(dataframes_dict: dict[str, DataFrame]) -> None:
+        """ silently drop all columns that have no relevant data to simulation (all columns with a name starting with
+        Unnamed
+        :param dataframes_dict: Dictionary of dataframes to clean
+        """
+        for _key, dataframe in dataframes_dict.items():
+            all_data[_key] = dataframe.drop(dataframe.filter(like='Unnamed'), axis=1)  # filter and drop Unnamed columns
+
+    if files is None:
+        for f_name in folder.rglob("*.csv"):
+            all_data[f_name.stem] = pd.read_csv(f_name)
+
+    else:
+        for f_name in files:
+            all_data[f_name] = pd.read_csv((folder / f_name).with_suffix(".csv"))
+    # clean and return the dataframe dictionary
+    clean_dataframe(all_data)
+    # If only one file loaded return dataframe directly rather than dict
+    return next(iter(all_data.values())) if len(all_data) == 1 else all_data
+

From 8e620b99824062f3f38b4c266ca756b86204d884 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 29 Nov 2024 14:12:20 +0000
Subject: [PATCH 198/230] update the categorisation of blood and oxygen to
 'medical consumables' from 'other'

---
 src/scripts/costing/cost_estimation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 295af26c1f..34fe56c98a 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -530,7 +530,7 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df,
     other_costs = pd.concat([cost_of_separately_managed_medical_supplies_dispensed, cost_of_separately_managed_medical_supplies_excess_stock])
 
     consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
-    other_costs = prepare_cost_dataframe(other_costs, _category_specific_group = 'consumable', _cost_category = 'other')
+    other_costs = prepare_cost_dataframe(other_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables')
 
     # Only preserve the draws and runs requested
     if _draws is not None:

From 12e605e023a3c91ad643335ca0234e28d33f11cc Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 29 Nov 2024 16:18:17 +0000
Subject: [PATCH 199/230] add treemap + update supply chain cost subgroup from
 NA to supply chain (all consumables)

---
 src/scripts/costing/cost_estimation.py | 78 +++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 7 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 34fe56c98a..238a161dc8 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -10,6 +10,7 @@
 
 import matplotlib.pyplot as plt
 from matplotlib.ticker import FuncFormatter
+import squarify
 import numpy as np
 import pandas as pd
 import ast
@@ -521,7 +522,7 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df,
                               'cost'].sum() * supply_chain_cost_proportion).reset_index()
     # Assign relevant additional columns to match the format of the rest of consumables costs
     supply_chain_costs['Facility_Level'] = 'all'
-    supply_chain_costs['consumable'] = 'NA'
+    supply_chain_costs['consumable'] = 'supply chain (all consumables)'
     supply_chain_costs['cost_subcategory'] = 'supply_chain'
     assert set(supply_chain_costs.columns) == set(consumable_costs.columns)
 
@@ -864,14 +865,14 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
         subset_df = subset_df[subset_df['cost_category'] == _cost_category]
         if (_disaggregate_by_subgroup == True):
             # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category
-            if (len(subset_df['cost_subgroup']) > 10):
+            if (len(subset_df['cost_subgroup'].unique()) > 10):
                 # Calculate total cost per subgroup
                 subgroup_totals = subset_df.groupby('cost_subgroup')['cost'].sum()
                 # Identify the top 10 subgroups by cost
                 top_10_subgroups = subgroup_totals.nlargest(10).index.tolist()
                 # Label the remaining subgroups as 'other'
                 subset_df['cost_subgroup'] = subset_df['cost_subgroup'].apply(
-                    lambda x: x if x in top_10_subgroups else 'other'
+                    lambda x: x if x in top_10_subgroups else 'All other consumables'
                 )
 
                 pivot_df = subset_df.pivot_table(index=['draw', 'cost_subcategory'], columns='cost_subgroup',
@@ -891,10 +892,13 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
     pivot_df = pivot_df[sorted_columns]  # Rearrange columns by sorted order
 
     # Define custom colors for the bars
-    column_colors = [color_mapping.get(col, default_color) for col in sorted_columns]
-
-    # Plot the stacked bar chart
-    ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6), color=column_colors)
+    if _cost_category == 'all':
+        column_colors = [color_mapping.get(col, default_color) for col in sorted_columns]
+        # Plot the stacked bar chart with set colours
+        ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6), color=column_colors)
+    else:
+        # Plot the stacked bar chart without set colours
+        ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
 
     # Set custom x-tick labels if _scenario_dict is provided
     if _scenario_dict:
@@ -1073,6 +1077,66 @@ def do_line_plot_of_cost(_df, _cost_category='all',
     plt.savefig(_outputfilepath / filename, dpi=100, bbox_inches='tight')
     plt.close()
 
+# Treemap by category subgroup
+#-----------------------------------------------------------------------------------------------
+def create_summary_treemap_by_cost_subgroup(_df, _cost_category = None, _draw = None, _year = 'all',
+                                            _outputfilepath = figurespath):
+    # Function to wrap text to fit within treemap rectangles
+    def wrap_text(text, width=15):
+        return "\n".join(textwrap.wrap(text, width))
+
+    valid_cost_categories = ['human resources for health', 'medical consumables',
+       'medical equipment', 'facility operating cost']
+    if _cost_category == None:
+        raise ValueError(f"Specify one of the following as _cost_category - {valid_cost_categories})")
+    elif _cost_category not in valid_cost_categories:
+        raise ValueError(f"Invalid input for _cost_category: '{_cost_category}'. "
+                     f"Specify one of the following - {valid_cost_categories})")
+    else:
+        _df = _df[_df['cost_category'] == _cost_category]
+
+    if _draw != None:
+        _df = _df[_df.draw == _draw]
+
+    # Create summary dataframe for treemap
+    _df = _df.groupby('cost_subgroup')['cost'].sum().reset_index()
+    _df = _df.sort_values(by="cost", ascending=False)
+    top_10 = _df.iloc[:10]
+
+    if (len(_df['cost_subgroup'].unique()) > 10):
+        # Step 2: Group all other consumables into "Other"
+        other_cost = _df.iloc[10:]["cost"].sum()
+        top_10 = top_10.append({"cost_subgroup": "Other", "cost": other_cost}, ignore_index=True)
+
+    # Prepare data for the treemap
+    total_cost = top_10["cost"].sum()
+    top_10["proportion"] = top_10["cost"]/total_cost
+    sizes = top_10["cost"]
+
+    # Exclude labels for small proportions
+    labels = [
+        f"{wrap_text(name)}\n${round(cost, 1)}m\n({round(prop * 100, 1)}%)"
+        if prop >= 0.01 else ""
+        for name, cost, prop in zip(top_10["cost_subgroup"], top_10["cost"] / 1e6, top_10["proportion"])
+    ]
+    # Period included for plot title and name
+    if _year == 'all':
+        period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}")
+    elif (len(_year) == 1):
+        period = (f"{_year[0]}")
+    else:
+        period = (f"{min(_year)} - {max(_year)}")
+
+    # Step 4: Plot the treemap
+    plt.figure(figsize=(12, 8))
+    squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=plt.cm.Paired.colors)
+    plt.axis("off")
+    plt.title(f'{_cost_category} ; Period = {period}')
+    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_[{_draw}]_{period}.png',
+                dpi=100,
+                bbox_inches='tight')
+    plt.close()
+
 # Plot ROI
 # TODO update this function to include an input for the monetary value of DALY
 def generate_roi_plots(_monetary_value_of_incremental_health: pd.DataFrame,

From 5644b975ffc97ea6827bbbc03d9cd5dddea6e84a Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 29 Nov 2024 16:25:35 +0000
Subject: [PATCH 200/230] update treemap saved figure name

---
 src/scripts/costing/cost_estimation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 238a161dc8..0def453e39 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1132,7 +1132,7 @@ def wrap_text(text, width=15):
     squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=plt.cm.Paired.colors)
     plt.axis("off")
     plt.title(f'{_cost_category} ; Period = {period}')
-    plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_[{_draw}]_{period}.png',
+    plt.savefig(_outputfilepath / f'treemap_{_cost_category}_[{_draw}]_{period}.png',
                 dpi=100,
                 bbox_inches='tight')
     plt.close()

From a98d6ec31cdeadd096208798c4c90b9a3a198139 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 2 Dec 2024 13:25:50 +0000
Subject: [PATCH 201/230] update treemap to fix the colourmap for consumables
 and adjust fontsize

these changes make the treepmap more readable and comparable across draws
---
 src/scripts/costing/cost_estimation.py        | 24 ++++++-
 .../costing/costing_overview_analysis.py      | 70 ++++++++++++++++---
 2 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 0def453e39..36d5c252e0 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -16,6 +16,7 @@
 import ast
 import math
 import itertools
+from itertools import cycle
 
 from tlo.analysis.utils import (
     extract_params,
@@ -1080,6 +1081,7 @@ def do_line_plot_of_cost(_df, _cost_category='all',
 # Treemap by category subgroup
 #-----------------------------------------------------------------------------------------------
 def create_summary_treemap_by_cost_subgroup(_df, _cost_category = None, _draw = None, _year = 'all',
+                                            _color_map = None, _label_fontsize = 10,
                                             _outputfilepath = figurespath):
     # Function to wrap text to fit within treemap rectangles
     def wrap_text(text, width=15):
@@ -1098,6 +1100,10 @@ def wrap_text(text, width=15):
     if _draw != None:
         _df = _df[_df.draw == _draw]
 
+    # Remove non-specific subgroup for consumables
+    if _cost_category == 'medical consumables':
+        _df = _df[~(_df.cost_subgroup == 'supply chain (all consumables)')]
+
     # Create summary dataframe for treemap
     _df = _df.groupby('cost_subgroup')['cost'].sum().reset_index()
     _df = _df.sort_values(by="cost", ascending=False)
@@ -1113,6 +1119,20 @@ def wrap_text(text, width=15):
     top_10["proportion"] = top_10["cost"]/total_cost
     sizes = top_10["cost"]
 
+    # Handle color map
+    if _color_map is None:
+        # Generate automatic colors if no color map is provided
+        auto_colors = plt.cm.Paired.colors
+        color_cycle = cycle(auto_colors)  # Cycle through the automatic colors
+        color_map = {subgroup: next(color_cycle) for subgroup in top_10["cost_subgroup"]}
+    else:
+        # Use the provided color map, fallback to a default color for missing subgroups
+        fallback_color = '#cccccc'
+        color_map = {subgroup: _color_map.get(subgroup, fallback_color) for subgroup in top_10["cost_subgroup"]}
+
+    # Get colors for each subgroup
+    colors = [color_map[subgroup] for subgroup in top_10["cost_subgroup"]]
+
     # Exclude labels for small proportions
     labels = [
         f"{wrap_text(name)}\n${round(cost, 1)}m\n({round(prop * 100, 1)}%)"
@@ -1127,9 +1147,9 @@ def wrap_text(text, width=15):
     else:
         period = (f"{min(_year)} - {max(_year)}")
 
-    # Step 4: Plot the treemap
+    # Plot the treemap
     plt.figure(figsize=(12, 8))
-    squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=plt.cm.Paired.colors)
+    squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=colors, text_kwargs={'fontsize': _label_fontsize})
     plt.axis("off")
     plt.title(f'{_cost_category} ; Period = {period}')
     plt.savefig(_outputfilepath / f'treemap_{_cost_category}_[{_draw}]_{period}.png',
diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index a64b7e9fc6..b391090599 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -1,3 +1,10 @@
+"""Produce outputs for cost overview paper.
+The draft version of the paper uses outputs from scenario_impact_of_healthsystem.py, used to model HSS scenarios for
+FCDO and Global Fund.
+
+with reduced consumables logging
+/Users/tmangal/PycharmProjects/TLOmodel/outputs/t.mangal@imperial.ac.uk/hss_elements-2024-11-12T172311Z
+"""
 
 from pathlib import Path
 from tlo import Date
@@ -8,8 +15,10 @@
 
 import matplotlib.pyplot as plt
 import seaborn as sns
+import squarify
 import numpy as np
 import pandas as pd
+from itertools import cycle
 
 from tlo.analysis.utils import (
     extract_params,
@@ -24,7 +33,7 @@
                                              summarize_cost_data,
                                              do_stacked_bar_plot_of_cost_by_category,
                                              do_line_plot_of_cost,
-                                             generate_multiple_scenarios_roi_plot,
+                                             create_summary_treemap_by_cost_subgroup,
                                              estimate_projected_health_spending)
 
 # Define a timestamp for script outputs
@@ -43,7 +52,7 @@
 # Load result files
 # ------------------------------------------------------------------------------------------------------------------
 results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0]
-#results_folder = Path('./outputs/cost_scenarios-2024-11-26T164353Z')
+#results_folder = Path('./outputs/cost_scenarios-2024-11-26T205921Z')
 
 # Check can read results from draw=0, run=0
 log = load_pickled_dataframes(results_folder, 0, 0)  # look at one log (so can decide what to extract)
@@ -52,19 +61,21 @@
 # Declare default parameters for cost analysis
 # ------------------------------------------------------------------------------------------------------------------
 # Period relevant for costing
-TARGET_PERIOD_INTERVENTION = (Date(2020, 1, 1), Date(2030, 12, 31))  # This is the period that is costed
-relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION]
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD]
 list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+list_of_years_for_plot = list(range(2019, 2036))
 
 # Scenarios
-cost_scenarios = {0: "Real world", 1: "Perfect health system"}
+cost_scenarios = {0: "Actual", 3: "Expanded HRH", 5: "Improved consumable availability",
+                  8: "Expanded HRH + Improved consumable availability"}
 
 # Costing parameters
 discount_rate = 0.03
 
 # Estimate standard input costs of scenario
 # -----------------------------------------------------------------------------------------------------------------------
-input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0],
+input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = discount_rate, summarize = True)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
@@ -72,7 +83,7 @@
 input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
     input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
 
-input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0,1],
+input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = 0, summarize = True)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
@@ -83,15 +94,25 @@
 # Get figures for overview paper
 # -----------------------------------------------------------------------------------------------------------------------
 # Figure 1: Estimated costs by cost category
-do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all', _disaggregate_by_subgroup = False,
+                                        _year = list_of_years_for_plot,
+                                        _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
 
 # Figure 2: Estimated costs by year
 do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
-                         _year='all', _draws= [0],
+                         _year=list_of_years_for_plot, _draws= [0],
                          disaggregate_by= 'cost_category',
                          _outputfilepath = figurespath)
 do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
-                         _year='all', _draws= [1],
+                         _year=list_of_years_for_plot, _draws= [3],
+                         disaggregate_by= 'cost_category',
+                         _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
+                         _year=list_of_years_for_plot, _draws= [5],
+                         disaggregate_by= 'cost_category',
+                         _outputfilepath = figurespath)
+do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
+                         _year=list_of_years_for_plot, _draws= [8],
                          disaggregate_by= 'cost_category',
                          _outputfilepath = figurespath)
 
@@ -100,6 +121,7 @@
 # Figure 4: Total cost by scenario assuming 0% discount rate
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs_undiscounted,
                                         _cost_category = 'all',
+                                        _year=list_of_years_for_plot,
                                         _disaggregate_by_subgroup = False,
                                         _outputfilepath = figurespath,
                                         _scenario_dict = cost_scenarios,
@@ -107,6 +129,34 @@
 
 # Figure 5: Total cost by scenario applying changing discount rates
 
+cost_categories = ['human resources for health', 'medical consumables',
+       'medical equipment', 'facility operating cost']
+draws = input_costs.draw.unique().tolist()
+colourmap_for_consumables = {'First-line ART regimen: adult':'#1f77b4',
+                             'Test, HIV EIA Elisa': '#ff7f0e',
+                             'VL Test': '#2ca02c',
+                             'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly': '#d62728',
+                             'Oxygen, 1000 liters, primarily with oxygen cylinders': '#9467bd',
+                             'Phenobarbital, 100 mg': '#8c564b',
+                             'Rotavirus vaccine': '#e377c2',
+                             'Carbamazepine 200mg_1000_CMST': '#7f7f7f',
+                             'Infant resuscitator, clear plastic + mask + bag_each_CMST': '#bcbd22',
+                             'Dietary supplements (country-specific)': '#17becf',
+                             'Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg': '#2b8cbe',
+                             'Blood, one unit': '#ffdd44',
+                             'Pneumococcal vaccine':'#756bb1'}
+
+for _cat in cost_categories:
+    for _d in draws:
+        if _cat == 'medical consumables':
+            create_summary_treemap_by_cost_subgroup(_df = input_costs, _year = list_of_years_for_plot,
+                                               _cost_category = _cat, _draw = _d, _color_map=colourmap_for_consumables,
+                                                _label_fontsize= 8)
+        else:
+            create_summary_treemap_by_cost_subgroup(_df=input_costs, _year=list_of_years_for_plot,
+                                                    _cost_category=_cat, _draw=_d, _label_fontsize= 8.5)
+
+
 # Get tables for overview paper
 # -----------------------------------------------------------------------------------------------------------------------
 # Group data and aggregate cost for each draw and stat

From 08dd2da29670b0a639b675640536b802e2a31dfd Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 2 Dec 2024 14:56:50 +0000
Subject: [PATCH 202/230] fix outputfile path input

---
 src/scripts/costing/cost_estimation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 36d5c252e0..81ece6a7c0 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -1082,7 +1082,7 @@ def do_line_plot_of_cost(_df, _cost_category='all',
 #-----------------------------------------------------------------------------------------------
 def create_summary_treemap_by_cost_subgroup(_df, _cost_category = None, _draw = None, _year = 'all',
                                             _color_map = None, _label_fontsize = 10,
-                                            _outputfilepath = figurespath):
+                                            _outputfilepath: Path = None):
     # Function to wrap text to fit within treemap rectangles
     def wrap_text(text, width=15):
         return "\n".join(textwrap.wrap(text, width))

From 90c21388ea593de80e291aab9062962dc7ad127f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 2 Dec 2024 15:51:57 +0000
Subject: [PATCH 203/230] correct facility operation cost estimation - remove
 district disaggregation

---
 src/scripts/costing/cost_estimation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 81ece6a7c0..b0fc4f8b39 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -663,9 +663,9 @@ def update_itemuse_for_level1b_using_level2_data(_df):
         value_name="unit_cost"  # Name for the new 'cost' column
     )
     unit_cost_fac_operations['Facility_Level'] = unit_cost_fac_operations['Facility_Level'].astype(str)
-    fac_count_by_district_and_level = mfl[['Facility_Level', 'Facility_Count', 'District']].groupby(['Facility_Level', 'District']).sum().reset_index()
+    fac_count_by_level = mfl[['Facility_Level', 'Facility_Count']].groupby(['Facility_Level']).sum().reset_index()
 
-    facility_operation_cost = pd.merge(unit_cost_fac_operations, fac_count_by_district_and_level, on = 'Facility_Level', how = 'left', validate = 'm:m')
+    facility_operation_cost = pd.merge(unit_cost_fac_operations, fac_count_by_level, on = 'Facility_Level', how = 'left', validate = 'm:m')
     facility_operation_cost['Facility_Count'] = facility_operation_cost['Facility_Count'].fillna(0).astype(int)
     facility_operation_cost['cost'] =  facility_operation_cost['unit_cost'] * facility_operation_cost['Facility_Count']
 

From 377d4ca83d543139b7594f3479bc87cbfbca945e Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 2 Dec 2024 15:57:49 +0000
Subject: [PATCH 204/230] validate facility operation costs - disaggregate Rm
 data on vehicles purchase and maintenance

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 src/scripts/costing/costing_validation.py   | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 9bd07060e1..49e5341d6e 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa046187fd0edf597646fe4795c428f0053cdeb0b56537780cc945ac283fea1f
-size 4287514
+oid sha256:ea4a0cb4912ce0e0dba3bbb25aaa6b4a16a17cc72dd35e4b20348c15562c6744
+size 4287822
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 8c2ffc4b2b..367d91f847 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -191,7 +191,10 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 
 # Facility operation costs
 #-----------------------------------------------------------------------------------------------------------------------
-#calibration_data[calibration_data['calibration_category'] == 'Facility utility bills - ICT', 'Infrastructure - New Builds'] = get_calibration_relevant_subset()
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Electricity', 'Water', 'Cleaning', 'Security', 'Food for inpatient cases', 'Facility management'], _calibration_category = 'Facility utility bills'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Building maintenance'], _calibration_category = 'Infrastructure - Rehabilitation'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Vehicle maintenance'], _calibration_category = 'Vehicles - Maintenance'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Ambulance fuel'], _calibration_category = 'Vehicles - Fuel'))
 
 # Infrastructure
 #-----------------------------------------------------------------------------------------------------------------------
@@ -203,6 +206,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 list_of_consumables_costs_for_calibration_without_hiv =['Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Condoms and Lubricants', 'Other Drugs, medical supplies, and commodities']
 list_of_hr_costs_for_calibration = ['Health Worker Salaries', 'Health Worker Training - In-Service', 'Health Worker Training - Pre-Service', 'Mentorships & Supportive Supervision']
 list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
+list_of_operating_costs_for_calibration = ['Facility utility bills', 'Infrastructure - Rehabilitation', 'Vehicles - Maintenance','Vehicles - Fuel' ]
 
 # Create folders to store results
 costing_outputs_folder = Path('./outputs/costing')
@@ -290,13 +294,14 @@ def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
 
 # Call the function for each variable and cost list
 all_consumable_costs = list_of_consumables_costs_for_calibration_only_hiv + list_of_consumables_costs_for_calibration_without_hiv + ['Supply Chain']
-all_calibration_costs = all_consumable_costs + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration
+all_calibration_costs = all_consumable_costs + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration + list_of_operating_costs_for_calibration
 
 do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_without_hiv)
 do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_only_hiv)
 do_cost_calibration_plot(calibration_data,all_consumable_costs)
 do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
 do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
+do_cost_calibration_plot(calibration_data, list_of_operating_costs_for_calibration)
 do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 8)
 calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 

From 27c5a5cd23d9cddc1ecb613c08ca905d3995807d Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 2 Dec 2024 16:10:23 +0000
Subject: [PATCH 205/230] updates for facility operating costs validation

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 src/scripts/costing/costing_validation.py   | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 49e5341d6e..a86165c4d2 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea4a0cb4912ce0e0dba3bbb25aaa6b4a16a17cc72dd35e4b20348c15562c6744
-size 4287822
+oid sha256:d87b5254116208e339125f306ecea59f901e03871c0cec5e33bd656e0c430931
+size 4287823
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 367d91f847..b9275e9906 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -193,8 +193,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 #-----------------------------------------------------------------------------------------------------------------------
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Electricity', 'Water', 'Cleaning', 'Security', 'Food for inpatient cases', 'Facility management'], _calibration_category = 'Facility utility bills'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Building maintenance'], _calibration_category = 'Infrastructure - Rehabilitation'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Vehicle maintenance'], _calibration_category = 'Vehicles - Maintenance'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Ambulance fuel'], _calibration_category = 'Vehicles - Fuel'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Vehicle maintenance', 'Ambulance fuel'], _calibration_category = 'Vehicles - Fuel and Maintenance'))
 
 # Infrastructure
 #-----------------------------------------------------------------------------------------------------------------------
@@ -206,7 +205,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 list_of_consumables_costs_for_calibration_without_hiv =['Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Condoms and Lubricants', 'Other Drugs, medical supplies, and commodities']
 list_of_hr_costs_for_calibration = ['Health Worker Salaries', 'Health Worker Training - In-Service', 'Health Worker Training - Pre-Service', 'Mentorships & Supportive Supervision']
 list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
-list_of_operating_costs_for_calibration = ['Facility utility bills', 'Infrastructure - Rehabilitation', 'Vehicles - Maintenance','Vehicles - Fuel' ]
+list_of_operating_costs_for_calibration = ['Facility utility bills', 'Infrastructure - Rehabilitation', 'Vehicles - Maintenance','Vehicles - Fuel and Maintenance']
 
 # Create folders to store results
 costing_outputs_folder = Path('./outputs/costing')

From 67bc7203001dad6d400e5aa64c48d0cd007962d2 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 4 Dec 2024 18:31:53 +0000
Subject: [PATCH 206/230] disaggregate Rm consumables costs  + remove manual
 fixes for VL tests and depo

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +--
 src/scripts/costing/costing_validation.py   | 32 ++++++++++++++-------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index a86165c4d2..aeeec761a3 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d87b5254116208e339125f306ecea59f901e03871c0cec5e33bd656e0c430931
-size 4287823
+oid sha256:b366ea03e22e5e7504be07f1613f6601c8efd12c57a327ed07e1471551bf9f25
+size 4302372
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index b9275e9906..2e3f6cb9e5 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -143,6 +143,13 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 #-----------------------------------------------------------------------------------------------------------------------
 calibration_data['model_cost'] = np.nan
 consumables_costs_by_item_code = assign_item_codes_to_consumables(input_costs)
+
+irs = [161]
+bednets = [160]
+undernutrition = [213, 1220, 1221, 1223, 1227]
+cervical_cancer = [261, 1239]
+other_family_planning = [1, 3,7,12,13]
+vaccines = [150, 151, 153, 155, 157, 158, 1197]
 art = [2671, 2672, 2673]
 tb_treatment = [176, 177, 179, 178, 181, 2678]
 antimalarials = [162,164,170]
@@ -150,9 +157,16 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 hiv_screening = [190,191,196]
 condoms = [2,25]
 tb_tests = [184,187, 175]
-other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
-              - set(condoms) - set(tb_tests)# - {3}
-# TODO once the quantity dispensed of Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly is fixed we no lnger have to adjust for Item_code 3
+other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(irs) - set(bednets) - set(undernutrition) - set(cervical_cancer) - set(other_family_planning) - set(vaccines) \
+              - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
+              - set(condoms) - set(tb_tests)
+
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = irs, _calibration_category = 'Indoor Residual Spray'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = bednets, _calibration_category = 'Bednets'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = undernutrition, _calibration_category = 'Undernutrition commodities'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = cervical_cancer, _calibration_category = 'Cervical Cancer'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_family_planning, _calibration_category = 'Other family planning commodities'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = vaccines, _calibration_category = 'Vaccines'))
 
 # Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
 # was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
@@ -161,16 +175,12 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_treatment, _calibration_category = 'TB Treatment'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = antimalarials, _calibration_category = 'Antimalarials'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = malaria_rdts, _calibration_category = 'Malaria RDTs'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [191, 196], _calibration_category = 'HIV Screening/Diagnostic Tests') +
-                                                                       get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [190], _calibration_category = 'HIV Screening/Diagnostic Tests'))
-# TODO update above when VL test quantity is adjusted in the module - currently 4 tests per year are assumed
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = hiv_screening, _calibration_category = 'HIV Screening/Diagnostic Tests'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = condoms, _calibration_category = 'Condoms and Lubricants'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_tests, _calibration_category = 'TB Tests (including RDTs)'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_drugs, _calibration_category = 'Other Drugs, medical supplies, and commodities') +
-                                                                       get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = [3], _calibration_category = 'Other Drugs, medical supplies, and commodities')/7)
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_drugs, _calibration_category = 'Other Drugs, medical supplies, and commodities'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subcategory', _col_value = ['supply_chain'], _calibration_category = 'Supply Chain'))
 
-
 # HR
 #-----------------------------------------------------------------------------------------------------------------------
 hr_costs = input_costs[input_costs['cost_category'] == 'human resources for health']
@@ -202,7 +212,9 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 # %%
 # 3. Create calibration plot
 list_of_consumables_costs_for_calibration_only_hiv = ['HIV Screening/Diagnostic Tests', 'Antiretrovirals']
-list_of_consumables_costs_for_calibration_without_hiv =['Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Condoms and Lubricants', 'Other Drugs, medical supplies, and commodities']
+list_of_consumables_costs_for_calibration_without_hiv =['Indoor Residual Spray', 'Bednets', 'Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Vaccines',
+                                                        'Condoms and Lubricants', 'Other family planning commodities',
+                                                        'Undernutrition commodities', 'Cervical Cancer', 'Other Drugs, medical supplies, and commodities']
 list_of_hr_costs_for_calibration = ['Health Worker Salaries', 'Health Worker Training - In-Service', 'Health Worker Training - Pre-Service', 'Mentorships & Supportive Supervision']
 list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
 list_of_operating_costs_for_calibration = ['Facility utility bills', 'Infrastructure - Rehabilitation', 'Vehicles - Maintenance','Vehicles - Fuel and Maintenance']

From da548b7a9a5d9f6c81ddaee3ff8c94bd712535ef Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 4 Dec 2024 20:09:29 +0000
Subject: [PATCH 207/230] minor fixes to calibration data

---
 resources/costing/ResourceFile_Costing.xlsx |  4 +--
 src/scripts/costing/costing_validation.py   | 30 ++++++++++++---------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index aeeec761a3..1e2447f62f 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b366ea03e22e5e7504be07f1613f6601c8efd12c57a327ed07e1471551bf9f25
-size 4302372
+oid sha256:ddf76cb8ea1571bcd4be40ba315deb59802edd6ca31d5ab118402918e4d7faf9
+size 4302456
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 2e3f6cb9e5..d655788a6b 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -86,11 +86,20 @@ def assign_item_codes_to_consumables(_df):
     # Retain only consumable costs
     _df = _df[_df['cost_category'] == 'medical consumables']
 
-    # Create dictionary mapping item_codes to consumables names
+    '''
     consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
                                  encoding="ISO-8859-1")[['item_code', 'consumable_name_tlo']]
     consumables_dict = consumables_dict.rename(columns = {'item_code': 'Item_Code'})
     consumables_dict = dict(zip(consumables_dict['consumable_name_tlo'], consumables_dict['Item_Code']))
+    '''
+
+    # Create dictionary mapping item_codes to consumables names
+    consumables_df = workbook_cost["consumables"]
+    consumables_df = consumables_df.rename(columns=consumables_df.iloc[0])
+    consumables_df = consumables_df[['Item_Code', 'Consumable_name_tlo']].reset_index(
+        drop=True).iloc[1:]
+    consumables_df = consumables_df[consumables_df['Item_Code'].notna()]
+    consumables_dict = dict(zip(consumables_df['Consumable_name_tlo'], consumables_df['Item_Code']))
 
     # Replace consumable_name_tlo with item_code
     _df = _df.copy()
@@ -157,21 +166,21 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 hiv_screening = [190,191,196]
 condoms = [2,25]
 tb_tests = [184,187, 175]
+circumcision = [197]
 other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(irs) - set(bednets) - set(undernutrition) - set(cervical_cancer) - set(other_family_planning) - set(vaccines) \
               - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
               - set(condoms) - set(tb_tests)
 
+# Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
+# was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = art, _calibration_category = 'Antiretrovirals')*  80/(0.103*365))
+# Other consumables costs do not need to be adjusted
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = irs, _calibration_category = 'Indoor Residual Spray'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = bednets, _calibration_category = 'Bednets'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = undernutrition, _calibration_category = 'Undernutrition commodities'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = cervical_cancer, _calibration_category = 'Cervical Cancer'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_family_planning, _calibration_category = 'Other family planning commodities'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = vaccines, _calibration_category = 'Vaccines'))
-
-# Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
-# was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = art, _calibration_category = 'Antiretrovirals')*  80/(0.103*365))
-# Other consumables costs do not need to be adjusted
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_treatment, _calibration_category = 'TB Treatment'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = antimalarials, _calibration_category = 'Antimalarials'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = malaria_rdts, _calibration_category = 'Malaria RDTs'))
@@ -179,6 +188,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = condoms, _calibration_category = 'Condoms and Lubricants'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_tests, _calibration_category = 'TB Tests (including RDTs)'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_drugs, _calibration_category = 'Other Drugs, medical supplies, and commodities'))
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = circumcision, _calibration_category = 'Voluntary Male Medical Circumcision'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subcategory', _col_value = ['supply_chain'], _calibration_category = 'Supply Chain'))
 
 # HR
@@ -205,13 +215,9 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Building maintenance'], _calibration_category = 'Infrastructure - Rehabilitation'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Vehicle maintenance', 'Ambulance fuel'], _calibration_category = 'Vehicles - Fuel and Maintenance'))
 
-# Infrastructure
-#-----------------------------------------------------------------------------------------------------------------------
-#calibration_data[calibration_data['calibration_category'] == 'Infrastructure - Rehabilitation'] = get_calibration_relevant_subset()
-
 # %%
 # 3. Create calibration plot
-list_of_consumables_costs_for_calibration_only_hiv = ['HIV Screening/Diagnostic Tests', 'Antiretrovirals']
+list_of_consumables_costs_for_calibration_only_hiv = ['Voluntary Male Medical Circumcision', 'HIV Screening/Diagnostic Tests', 'Antiretrovirals']
 list_of_consumables_costs_for_calibration_without_hiv =['Indoor Residual Spray', 'Bednets', 'Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Vaccines',
                                                         'Condoms and Lubricants', 'Other family planning commodities',
                                                         'Undernutrition commodities', 'Cervical Cancer', 'Other Drugs, medical supplies, and commodities']
@@ -313,7 +319,7 @@ def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
 do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration)
 do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
 do_cost_calibration_plot(calibration_data, list_of_operating_costs_for_calibration)
-do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 8)
+do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 7)
 calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 
 # Stacked bar charts to represent all cost sub-groups

From a2cc2532351255cbdc12cb461953dc9de8eeafde Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 4 Dec 2024 20:13:32 +0000
Subject: [PATCH 208/230] change the cost of oxygen back from OHT 2016 cost

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 1e2447f62f..e244bbb345 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddf76cb8ea1571bcd4be40ba315deb59802edd6ca31d5ab118402918e4d7faf9
-size 4302456
+oid sha256:88154d352def8fcbb00ec65de19a74ca15bd55344194a13c4624787bf394e19a
+size 4301985

From 8f80a70051308fcfdfafa8bfa999bd7d70e3ee3f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 9 Dec 2024 12:16:28 +0000
Subject: [PATCH 209/230] update the cost of disposable male circumcision kit
 to UNICEF catalogue prices

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index e244bbb345..b8f40ea2d5 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88154d352def8fcbb00ec65de19a74ca15bd55344194a13c4624787bf394e19a
-size 4301985
+oid sha256:9ba4f67c9a52b24feddf81c18c35bc117dba6dd89939f2de6bf752032e30235c
+size 4302143

From 83e5857907cc2f7ee797180fb7dfb1531b9feff2 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 9 Dec 2024 16:19:12 +0000
Subject: [PATCH 210/230] update tables to include all 4 scenarios

---
 .../costing/costing_overview_analysis.py      | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index b391090599..f5ccf1a0ac 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -79,17 +79,11 @@
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = discount_rate, summarize = True)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
-# TODO Remove the manual fix below once the logging for these is corrected
-input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
-    input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
 
 input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = 0, summarize = True)
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
-# TODO Remove the manual fix below once the logging for these is corrected
-input_costs_undiscounted.loc[input_costs_undiscounted.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \
-    input_costs_undiscounted.loc[input_costs_undiscounted.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10
 
 # Get figures for overview paper
 # -----------------------------------------------------------------------------------------------------------------------
@@ -144,17 +138,21 @@
                              'Dietary supplements (country-specific)': '#17becf',
                              'Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg': '#2b8cbe',
                              'Blood, one unit': '#ffdd44',
-                             'Pneumococcal vaccine':'#756bb1'}
+                             'Pneumococcal vaccine': '#fdae61',
+                             'Pentavalent vaccine (DPT, Hep B, Hib)': '#d73027',
+                             'Ceftriaxone 1g, PFR_1_CMST': '#66c2a5',
+                             'male circumcision kit, consumables (10 procedures)_1_IDA': '#756bb1'}
 
 for _cat in cost_categories:
     for _d in draws:
         if _cat == 'medical consumables':
             create_summary_treemap_by_cost_subgroup(_df = input_costs, _year = list_of_years_for_plot,
                                                _cost_category = _cat, _draw = _d, _color_map=colourmap_for_consumables,
-                                                _label_fontsize= 8)
+                                                _label_fontsize= 8, _outputfilepath=figurespath)
         else:
             create_summary_treemap_by_cost_subgroup(_df=input_costs, _year=list_of_years_for_plot,
-                                                    _cost_category=_cat, _draw=_d, _label_fontsize= 8.5)
+                                                    _cost_category=_cat, _draw=_d, _label_fontsize= 8.5,
+                                                    _outputfilepath=figurespath)
 
 
 # Get tables for overview paper
@@ -173,7 +171,7 @@ def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = Fal
 
     # Create a pivot table to restructure the data for LaTeX output
     pivot_data = {}
-    for draw in [0, 1]:
+    for draw in [0, 3, 5, 8]:
         draw_data = grouped_costs.xs(draw, level='draw').unstack(fill_value=0)  # Unstack to get 'stat' as columns
         # Concatenate 'mean' with 'lower-upper' in the required format
         pivot_data[draw] = draw_data['mean'].astype(str) + ' [' + \
@@ -181,10 +179,10 @@ def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = Fal
                            draw_data['upper'].astype(str) + ']'
 
     # Combine draw data into a single DataFrame
-    table_data = pd.concat([pivot_data[0], pivot_data[1]], axis=1, keys=['draw=0', 'draw=1']).reset_index()
+    table_data = pd.concat([pivot_data[0], pivot_data[3], pivot_data[5], pivot_data[8]], axis=1, keys=['draw=0', 'draw=3', 'draw=5', 'draw=8']).reset_index()
 
     # Rename columns for clarity
-    table_data.columns = ['Cost Category', _groupby_var_name, 'Real World', 'Perfect Health System']
+    table_data.columns = ['Cost Category', _groupby_var_name, 'Actual', 'Expanded HRH', 'Improved consumable availability', 'Expanded HRH +\n Improved consumable availability']
 
     # Replace '\n' with '\\' for LaTeX line breaks
     #table_data['Real World'] = table_data['Real World'].apply(lambda x: x.replace("\n", "\\\\"))
@@ -193,7 +191,7 @@ def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = Fal
     # Convert to LaTeX format with horizontal lines after every row
     latex_table = table_data.to_latex(
         longtable=_longtable,  # Use the longtable environment for large tables
-        column_format='|R{4cm}|R{5cm}|R{3.5cm}|R{3.5cm}|',
+        column_format='|R{4cm}|R{5cm}|R{3.5cm}|R{3.5cm}|R{3.5cm}|R{3.5cm}|',
         caption=f"Summarized Costs by Category and {_groupby_var_name}",
         label=f"tab:cost_by_{_groupby_var}",
         position="h",

From 8f9e443a734763148b7c723a5e9ecf4a10b8edc8 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 9 Dec 2024 16:34:20 +0000
Subject: [PATCH 211/230] add plot for inflow to outflow ratio

---
 .../costing/costing_overview_analysis.py      | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index f5ccf1a0ac..666a1dd21a 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -219,3 +219,55 @@ def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = Fal
 # Table : Cost by cost subgroup
 generate_detail_cost_table(_groupby_var = 'cost_subgroup', _groupby_var_name = 'Category Subgroup', _longtable = True)
 
+# Generate consumable inflow to outflow ratio figure
+# -----------------------------------------------------------------------------------------------------------------------
+# Estimate the stock to dispensed ratio from OpenLMIS data
+lmis_consumable_usage = pd.read_csv(path_for_consumable_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
+# Collapse individual facilities
+lmis_consumable_usage_by_item_level_month = lmis_consumable_usage.groupby(['category', 'item_code', 'district', 'fac_type_tlo', 'month'])[['closing_bal', 'dispensed', 'received']].sum()
+df = lmis_consumable_usage_by_item_level_month # Drop rows where monthly OpenLMIS data wasn't available
+df = df.loc[df.index.get_level_values('month') != "Aggregate"]
+opening_bal_january = df.loc[df.index.get_level_values('month') == 'January', 'closing_bal'] + \
+                      df.loc[df.index.get_level_values('month') == 'January', 'dispensed'] - \
+                      df.loc[df.index.get_level_values('month') == 'January', 'received']
+closing_bal_december = df.loc[df.index.get_level_values('month') == 'December', 'closing_bal']
+total_consumables_inflow_during_the_year = df.loc[df.index.get_level_values('month') != 'January', 'received'].groupby(level=[0,1,2,3]).sum() +\
+                                         opening_bal_january.reset_index(level='month', drop=True) -\
+                                         closing_bal_december.reset_index(level='month', drop=True)
+total_consumables_outflow_during_the_year  = df['dispensed'].groupby(level=[0,1,2,3]).sum()
+inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
+
+# Edit outlier ratios
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
+inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio > inflow_to_outflow_ratio.quantile(0.95)] = inflow_to_outflow_ratio.quantile(0.95) # Trim values greater than the 95th percentile
+#average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio.mean()
+inflow_to_outflow_ratio = inflow_to_outflow_ratio.reset_index().rename(columns = {0:'inflow_to_outflow_ratio'})
+
+def plot_inflow_to_outflow_ratio(_df, groupby_var, _outputfilepath):
+    # Plot the bar plot
+    plt.figure(figsize=(10, 6))
+    sns.barplot(data=_df , x=groupby_var, y= 'inflow_to_outflow_ratio', errorbar=None)
+
+    # Add points representing the distribution of individual values
+    sns.stripplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', color='black', size=5, alpha=0.2)
+
+    # Set labels and title
+    plt.xlabel(groupby_var)
+    plt.ylabel('Inflow to Outflow Ratio')
+    plt.title('Average Inflow to Outflow Ratio by ' + f'{groupby_var}')
+    plt.xticks(rotation=45)
+
+    # Show plot
+    plt.tight_layout()
+    plt.savefig(_outputfilepath / 'inflow_to_outflow_ratio_by' f'{groupby_var}' )
+
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'fac_type_tlo', _outputfilepath = figurespath)
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'district', _outputfilepath = figurespath)
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code', _outputfilepath = figurespath)
+plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category', _outputfilepath = figurespath)
+
+print(f"Inflow to Outflow ratio by consumable varies from "
+      f"{round(min(inflow_to_outflow_ratio.groupby('item_code')['inflow_to_outflow_ratio'].mean()),2)} "
+      f"to {round(max(inflow_to_outflow_ratio.groupby('item_code')['inflow_to_outflow_ratio'].mean()),2)}")
+
+

From d6a7b0e4fe36428bcdc11fe17a16bbd509e695d7 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 10 Dec 2024 10:39:27 +0000
Subject: [PATCH 212/230] extract values for manuscript text

---
 .../costing/costing_overview_analysis.py      | 29 ++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index 666a1dd21a..1c4d289b3b 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -78,13 +78,37 @@
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = discount_rate, summarize = True)
+input_costs = input_costs[(input_costs.year > 2018) & (input_costs.year < 2036)]
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 
 input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = 0, summarize = True)
+input_costs_undiscounted = input_costs_undiscounted[(input_costs_undiscounted.year > 2018) & (input_costs_undiscounted.year < 2036)]
+
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 
+# Get overall estimates for main text
+# -----------------------------------------------------------------------------------------------------------------------
+cost_by_draw = input_costs.groupby(['draw', 'stat'])['cost'].sum()
+print(f"The total estimated cost of healthcare delivery in Malawi between 2019 and 2035 was estimated to be "
+      f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], under the actual scenario, and increased to "
+      f"\${cost_by_draw[5,'mean']/1e9:,.2f} billion[\${cost_by_draw[5,'lower']/1e9:,.2f}b - \${cost_by_draw[5,'upper']/1e9:,.2f}b] under the improved consumable availability scenario, "
+      f"followed by \${cost_by_draw[3,'mean']/1e9:,.2f} billion[\${cost_by_draw[3,'lower']/1e9:,.2f}b - \${cost_by_draw[3,'upper']/1e9:,.2f}b] under the expanded HRH scenario and finally "
+      f"\${cost_by_draw[8,'mean']/1e9:,.2f} billion[\${cost_by_draw[8,'lower']/1e9:,.2f}b - \${cost_by_draw[8,'upper']/1e9:,.2f}b] under the expanded HRH + improved consumable availability scenario.")
+
+print(f"The total cost of healthcare delivery in Malawi (from a health system perspective) between 2019 and 2035 was estimated at "
+      f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b] under current constraints. "
+      f"Alternative scenarios reflecting improvements in supply chain efficiency and workforce capacity increased costs by "
+      f"{(cost_by_draw[5,'mean']/cost_by_draw[0,'mean'] - 1):.2%} to "
+      f"{(cost_by_draw[8,'mean']/cost_by_draw[0,'mean'] - 1):.2%}. "
+      f"Importantly, our 2019 cost estimates closely aligned with reported actual expenditures, supporting the reliability of our approach.")
+
+consumable_cost_by_draw = input_costs[(input_costs.cost_category == 'medical consumables') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum()
+print(f"Notably, we find that the improved consumable availability scenario results in a {(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1):.2%} "
+      f"increase in cost of medical consumables. However, when combined with expanded HRH, the increase in consumables dispensed is "
+      f"{(consumable_cost_by_draw[8]/consumable_cost_by_draw[0] - 1):.2%} more than the actual scenario because the health system is able to deliver more appointments.")
+
 # Get figures for overview paper
 # -----------------------------------------------------------------------------------------------------------------------
 # Figure 1: Estimated costs by cost category
@@ -166,7 +190,7 @@ def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = Fal
 
     grouped_costs = edited_input_costs.groupby(['cost_category', _groupby_var, 'draw', 'stat'])['cost'].sum()
     # Format the 'cost' values before creating the LaTeX table
-    grouped_costs = grouped_costs.apply(lambda x: f"{float(x):,.2f}")
+    grouped_costs = grouped_costs.apply(lambda x: f"{float(x):,.0f}")
     # Remove underscores from all column values
 
     # Create a pivot table to restructure the data for LaTeX output
@@ -270,4 +294,7 @@ def plot_inflow_to_outflow_ratio(_df, groupby_var, _outputfilepath):
       f"{round(min(inflow_to_outflow_ratio.groupby('item_code')['inflow_to_outflow_ratio'].mean()),2)} "
       f"to {round(max(inflow_to_outflow_ratio.groupby('item_code')['inflow_to_outflow_ratio'].mean()),2)}")
 
+inflow_to_outflow_ratio_by_item = inflow_to_outflow_ratio.groupby('item_code')['inflow_to_outflow_ratio'].mean().reset_index().rename(columns = {0: 'inflow_to_outflow_ratio'})
+inflow_to_outflow_ratio_by_item[inflow_to_outflow_ratio_by_item.inflow_to_outflow_ratio == min(inflow_to_outflow_ratio_by_item.inflow_to_outflow_ratio)]['item_code']
+inflow_to_outflow_ratio_by_item[inflow_to_outflow_ratio_by_item.inflow_to_outflow_ratio == max(inflow_to_outflow_ratio_by_item.inflow_to_outflow_ratio)]['item_code']
 

From 8f3887e491477388a254bbd2cdc91f059456fc36 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 13 Dec 2024 19:23:00 +0000
Subject: [PATCH 213/230] update facility operating unit costs based on further
 cleaning of the data - see commit
 https://github.com/HEPUMW/TLM-Study-Data/commit/aa541845c2812f2fdcf05a98c99be1dc3d1061d0
 in data repository

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index b8f40ea2d5..03058f17d1 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ba4f67c9a52b24feddf81c18c35bc117dba6dd89939f2de6bf752032e30235c
-size 4302143
+oid sha256:64b8309a46f06da416a35d8323fe3839df14be18f7d00d4dfeeb9c297248461a
+size 4301938

From cfde29c6cc8cb14368e22a59a8fd4dbe7fd17e61 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Sun, 15 Dec 2024 19:17:23 +0000
Subject: [PATCH 214/230] update stacked bar plot to add labels and error bars
 around the total value

---
 src/scripts/costing/cost_estimation.py | 152 +++++++++++++++++--------
 1 file changed, 102 insertions(+), 50 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index b0fc4f8b39..17e9a9ea56 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -831,81 +831,133 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
                                             _add_figname_suffix = ''):
     # Subset and Pivot the data to have 'Cost Sub-category' as columns
     # Make a copy of the dataframe to avoid modifying the original
-    _df = _df[_df.stat == 'mean'].copy()
-    # Convert 'value' to millions
-    _df['cost'] = _df['cost'] / 1e6
-    if _draws == None:
-        subset_df = _df
-    else:
-        subset_df = _df[_df.draw.isin(_draws)]
-
-    if _year == 'all':
-        subset_df = subset_df
-    else:
-        subset_df = subset_df[subset_df['year'].isin(_year)]
+    _df_mean = _df[_df.stat == 'mean'].copy()
+    _df_lower = _df[_df.stat == 'lower'].copy()
+    _df_upper = _df[_df.stat == 'upper'].copy()
+
+    # Subset the dataframes to keep the s=relevant categories for the plot
+    dfs = {"_df_mean": _df_mean, "_df_lower": _df_lower, "_df_upper": _df_upper} # create a dict of dataframes
+    for name, df in dfs.items():
+        dfs[name] = df.copy()  # Choose the dataframe to modify
+        # Convert 'cost' to millions
+        dfs[name]['cost'] = dfs[name]['cost'] / 1e6
+        # Subset data
+        if _draws is not None:
+            dfs[name] = dfs[name][dfs[name].draw.isin(_draws)]
+        if _year != 'all':
+            dfs[name] = dfs[name][dfs[name]['year'].isin(_year)]
+        if _cost_category != 'all':
+            dfs[name] = dfs[name][dfs[name]['cost_category'] == _cost_category]
+
+    # Extract the updated DataFrames back from the dictionary
+    _df_mean, _df_lower, _df_upper = dfs["_df_mean"], dfs["_df_lower"], dfs["_df_upper"]
 
     if _cost_category == 'all':
-        # Predefined color mapping for cost categories
-        color_mapping = {
-            'human resources for health': '#1f77b4',  # Muted blue
-            'medical consumables': '#ff7f0e',  # Muted orange
-            'medical equipment': '#2ca02c',  # Muted green
-            'other': '#d62728',  # Muted red
-            'facility operating cost': '#9467bd',  # Muted purple
-        }
-        # Default color for unexpected categories
-        default_color = 'gray'
-
         if (_disaggregate_by_subgroup == True):
             raise ValueError(f"Invalid input for _disaggregate_by_subgroup: '{_disaggregate_by_subgroup}'. "
                              f"Value can be True only when plotting a specific _cost_category")
         else:
-            pivot_df = subset_df.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
-            plt_name_suffix = ''
+            pivot_mean = _df_mean.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
+            pivot_lower = _df_lower.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
+            pivot_upper = _df_upper.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum')
     else:
-        subset_df = subset_df[subset_df['cost_category'] == _cost_category]
         if (_disaggregate_by_subgroup == True):
-            # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category
-            if (len(subset_df['cost_subgroup'].unique()) > 10):
-                # Calculate total cost per subgroup
-                subgroup_totals = subset_df.groupby('cost_subgroup')['cost'].sum()
-                # Identify the top 10 subgroups by cost
-                top_10_subgroups = subgroup_totals.nlargest(10).index.tolist()
-                # Label the remaining subgroups as 'other'
-                subset_df['cost_subgroup'] = subset_df['cost_subgroup'].apply(
-                    lambda x: x if x in top_10_subgroups else 'All other consumables'
-                )
-
-                pivot_df = subset_df.pivot_table(index=['draw', 'cost_subcategory'], columns='cost_subgroup',
-                                                 values='cost', aggfunc='sum')
-
-            else:
-                pivot_df = subset_df.pivot_table(index=['draw', 'cost_subcategory'], columns='cost_subgroup',
-                                                 values='cost', aggfunc='sum')
+            for name, df in dfs.items():
+                dfs[name] = df.copy()  # Choose the dataframe to modify
+                # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category
+                if (len(dfs[name]['cost_subgroup'].unique()) > 10):
+                    # Calculate total cost per subgroup
+                    subgroup_totals = dfs[name].groupby('cost_subgroup')['cost'].sum()
+                    # Identify the top 10 subgroups by cost
+                    top_10_subgroups = subgroup_totals.nlargest(10).index.tolist()
+                    # Label the remaining subgroups as 'other'
+                    dfs[name]['cost_subgroup'] = dfs[name]['cost_subgroup'].apply(
+                        lambda x: x if x in top_10_subgroups else 'All other items'
+                    )
+
+            # Extract the updated DataFrames back from the dictionary
+            _df_mean, _df_lower, _df_upper = dfs["_df_mean"], dfs["_df_lower"], dfs["_df_upper"]
+
+            pivot_mean = _df_mean.pivot_table(index='draw', columns='cost_subgroup',
+                                             values='cost', aggfunc='sum')
+            pivot_lower = _df_lower.pivot_table(index='draw', columns='cost_subgroup',
+                                        values='cost', aggfunc='sum')
+            pivot_upper = _df_upper.pivot_table(index='draw', columns='cost_subgroup',
+                                        values='cost', aggfunc='sum')
 
             plt_name_suffix = '_by_subgroup'
         else:
-            pivot_df = subset_df.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
+            pivot_mean = _df_mean.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
+            pivot_lower = _df_lower.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
+            pivot_upper = _df_upper.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum')
             plt_name_suffix = ''
 
     # Sort pivot_df columns in ascending order by total cost
-    sorted_columns = pivot_df.sum(axis=0).sort_values().index
-    pivot_df = pivot_df[sorted_columns]  # Rearrange columns by sorted order
+    sorted_columns = pivot_mean.sum(axis=0).sort_values().index
+    pivot_mean = pivot_mean[sorted_columns]
+    pivot_lower = pivot_lower[sorted_columns]
+    pivot_upper = pivot_upper[sorted_columns]
+
+    # Error bars
+    lower_bounds = pivot_mean.sum(axis=1) - pivot_lower.sum(axis=1)
+    upper_bounds = pivot_upper.sum(axis=1) - pivot_mean.sum(axis=1)
+
+    if _cost_category == 'all':
+        # Predefined color mapping for cost categories
+        color_mapping = {
+            'human resources for health': '#1f77b4',  # Muted blue
+            'medical consumables': '#ff7f0e',  # Muted orange
+            'medical equipment': '#2ca02c',  # Muted green
+            'other': '#d62728',  # Muted red
+            'facility operating cost': '#9467bd',  # Muted purple
+        }
+        # Default color for unexpected categories
+        default_color = 'gray'
+        plt_name_suffix = ''
 
     # Define custom colors for the bars
     if _cost_category == 'all':
         column_colors = [color_mapping.get(col, default_color) for col in sorted_columns]
         # Plot the stacked bar chart with set colours
-        ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6), color=column_colors)
+        ax = pivot_mean.plot(kind='bar', stacked=True, figsize=(10, 6), color=column_colors)
+
+        # Add data labels
+        for c in ax.containers:
+            # Add label only if the value of the segment is > 1.20th of the ylim
+            max_y = ax.get_ylim()[1]
+            labels = [round(v.get_height(),1) if v.get_height() > max_y/20 else '' for v in c]
+            # remove the labels parameter if it's not needed for customized labels
+            ax.bar_label(c, labels=labels, label_type='center')
+
+        # Add error bars
+        x_pos = np.arange(len(pivot_mean.index))
+        total_means = pivot_mean.sum(axis=1)
+        error_bars = [lower_bounds, upper_bounds]
+        ax.errorbar(x_pos, total_means, yerr=error_bars, fmt='o', color='black', capsize=5)
+
     else:
         # Plot the stacked bar chart without set colours
-        ax = pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
+        ax = pivot_mean.plot(kind='bar', stacked=True, figsize=(10, 6))
+
+        # Add data labels
+        for c in ax.containers:
+            # Add label only if the value of the segment is > 1.20th of the ylim
+            max_y = ax.get_ylim()[1]
+            labels = [round(v.get_height(),1) if v.get_height() > max_y/20 else '' for v in c]
+            # remove the labels parameter if it's not needed for customized labels
+            ax.bar_label(c, labels=labels, label_type='center')
+
+        # Add error bars
+        x_pos = np.arange(len(pivot_mean.index))
+        total_means = pivot_mean.sum(axis=1)
+        error_bars = [lower_bounds, upper_bounds]
+        ax.errorbar(x_pos, total_means, yerr=error_bars, fmt='o', color='black', capsize=5)
 
     # Set custom x-tick labels if _scenario_dict is provided
     if _scenario_dict:
         labels = [_scenario_dict.get(label, label) for label in pivot_df.index]
     else:
-        labels = pivot_df.index.astype(str)
+        labels = pivot_mean.index.astype(str)
 
     # Wrap x-tick labels for readability
     wrapped_labels = [textwrap.fill(str(label), 20) for label in labels]
@@ -913,7 +965,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
 
     # Period included for plot title and name
     if _year == 'all':
-        period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}")
+        period = (f"{min(_df_mean['year'].unique())} - {max(_df_mean['year'].unique())}")
     elif (len(_year) == 1):
         period = (f"{_year[0]}")
     else:

From 3f1bc7b1e3cc422b2a6710078491dc52b179c069 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 14 Jan 2025 15:22:58 +0000
Subject: [PATCH 215/230] update the cost of family planning commodities based
 on UNFPA prices

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 03058f17d1..ed339c24c5 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64b8309a46f06da416a35d8323fe3839df14be18f7d00d4dfeeb9c297248461a
-size 4301938
+oid sha256:bdfa71708384a84d1426e59a8897022ed4b0a7fb342916407127ebd675eed4db
+size 4302273

From 917b793128fe676c61d8c6606bb4c40a0db5cac9 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 14 Jan 2025 15:42:41 +0000
Subject: [PATCH 216/230] update the cost of vitamin A and K based on UNFPA
 prices

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index ed339c24c5..da8c23c01b 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdfa71708384a84d1426e59a8897022ed4b0a7fb342916407127ebd675eed4db
-size 4302273
+oid sha256:516592795437a99c21e6090c61656fe0ee68e23b93ffb602f4b460cecd25b806
+size 4303188

From 434a1be934539e231a4b450ad364f46699240000 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 14 Jan 2025 16:38:36 +0000
Subject: [PATCH 217/230] minor edits to consumable names to ensure that the
 names match with RF_Items_and_Packages

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index da8c23c01b..7260b234ad 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:516592795437a99c21e6090c61656fe0ee68e23b93ffb602f4b460cecd25b806
-size 4303188
+oid sha256:b330681fcae030273f089bf2d2765a76cc7eb36d2f3eb752871a536aad377265
+size 4303166

From 027bfbf51a22bacbe587b2ecce82fc2129ccdcb0 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 14 Jan 2025 16:42:12 +0000
Subject: [PATCH 218/230] minor edits to consumable names to ensure that the
 names match with RF_Items_and_Packages - sutures

---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 7260b234ad..886e1a40fd 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b330681fcae030273f089bf2d2765a76cc7eb36d2f3eb752871a536aad377265
-size 4303166
+oid sha256:2bf49b624a0bfe37c237a9bc563615c01ccdeb61e028168f36ae689db4b1113c
+size 4302681

From 378917e56f02b9a9f24d4f210205b1a8d6f37077 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 16 Jan 2025 17:53:37 +0000
Subject: [PATCH 219/230] improve look of calibration figures

---
 src/scripts/costing/costing_validation.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index d655788a6b..51d5d33132 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -295,6 +295,14 @@ def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
     plt.ylabel('Costs (USD), millions')
     plt.title(f'Model Cost vs Annual Expenditure 2019 and Max(Annual Budget 2020-22)\n {cost_subcategory}')
 
+    # Set a white background and black border
+    plt.grid(False)
+    ax = plt.gca()  # Get current axes
+    ax.set_facecolor('white')  # Set the background color to white
+    for spine in ax.spines.values():  # Iterate over all borders (spines)
+        spine.set_edgecolor('black')  # Set the border color to black
+        spine.set_linewidth(1.5)  # Adjust the border width if desired
+
     # Customize x-axis labels for readability
     max_label_length = 15  # Define a maximum label length for wrapping
     wrapped_labels = [textwrap.fill(str(label), max_label_length) for label in df_mean.index]

From 9335ef908b1efa752260c1c4263b10512c04511f Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Thu, 16 Jan 2025 17:57:00 +0000
Subject: [PATCH 220/230] update facility operation unit costs

- see commits 8d74279 to a814900 in the TLM-Study-Data repository
---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 886e1a40fd..2bb765ea04 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bf49b624a0bfe37c237a9bc563615c01ccdeb61e028168f36ae689db4b1113c
-size 4302681
+oid sha256:b2fb1955c2baf1ddbcd56486b294337dcc3109e379ee38f72345c24bc5101521
+size 4303056

From e907f56619f4a7d1651a546d966ff10dee79655e Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 17 Jan 2025 17:17:40 +0000
Subject: [PATCH 221/230] Make adjustments to calibration RM data -

1. Drug costs recategorised as non-consumable costs removed from supply chain estimates; Supply chain costs from non-EHP costs removed from calibration

2. For 'Travel and Transport' under 'Program Management and Admin'', exclude 7.3% (for CHAM exp) and 12.9% of govt exp from calibration - treat this as higher level admin cost based on Ahmat et al (2022). Also exclude travel and transport for other financial sources.

3. For Infrastructure Rehab, exclude 12.9% as higher level admin cost from calibration.
---
 resources/costing/ResourceFile_Costing.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 2bb765ea04..03532cf3ad 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2fb1955c2baf1ddbcd56486b294337dcc3109e379ee38f72345c24bc5101521
-size 4303056
+oid sha256:dc8a77f1ed79a67878fa9a03a3588febd38fafb08769ec8eb1c23880c3559807
+size 4304056

From 392d4c28103b5f7d18c4d9eb3d4b8d8bc83d2687 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 17 Jan 2025 17:39:53 +0000
Subject: [PATCH 222/230] Adjust font size in stacked bar chart

---
 src/scripts/costing/cost_estimation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 17e9a9ea56..39f183ff9f 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -927,7 +927,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
             max_y = ax.get_ylim()[1]
             labels = [round(v.get_height(),1) if v.get_height() > max_y/20 else '' for v in c]
             # remove the labels parameter if it's not needed for customized labels
-            ax.bar_label(c, labels=labels, label_type='center')
+            ax.bar_label(c, labels=labels, label_type='center', fontsize='small')
 
         # Add error bars
         x_pos = np.arange(len(pivot_mean.index))
@@ -945,7 +945,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
             max_y = ax.get_ylim()[1]
             labels = [round(v.get_height(),1) if v.get_height() > max_y/20 else '' for v in c]
             # remove the labels parameter if it's not needed for customized labels
-            ax.bar_label(c, labels=labels, label_type='center')
+            ax.bar_label(c, labels=labels, label_type='center', fontsize='small')
 
         # Add error bars
         x_pos = np.arange(len(pivot_mean.index))
@@ -955,13 +955,13 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
 
     # Set custom x-tick labels if _scenario_dict is provided
     if _scenario_dict:
-        labels = [_scenario_dict.get(label, label) for label in pivot_df.index]
+        labels = [_scenario_dict.get(label, label) for label in pivot_mean.index]
     else:
         labels = pivot_mean.index.astype(str)
 
     # Wrap x-tick labels for readability
     wrapped_labels = [textwrap.fill(str(label), 20) for label in labels]
-    ax.set_xticklabels(wrapped_labels, rotation=45, ha='right')
+    ax.set_xticklabels(wrapped_labels, rotation=45, ha='right', fontsize='small')
 
     # Period included for plot title and name
     if _year == 'all':

From e3b5dc3bdbcb37e1ad1e3aaa6a705d84811da09d Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 17 Jan 2025 17:40:09 +0000
Subject: [PATCH 223/230] Change time from 17 years to 8 years

---
 .../costing/costing_overview_analysis.py      | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index 1c4d289b3b..731da4fc32 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -61,10 +61,11 @@
 # Declare default parameters for cost analysis
 # ------------------------------------------------------------------------------------------------------------------
 # Period relevant for costing
-TARGET_PERIOD = (Date(2010, 1, 1), Date(2035, 12, 31))  # This is the period that is costed
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2030, 12, 31))  # This is the period that is costed
 relevant_period_for_costing = [i.year for i in TARGET_PERIOD]
 list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
-list_of_years_for_plot = list(range(2019, 2036))
+list_of_years_for_plot = list(range(2023, 2031))
+number_of_years_costed = relevant_period_for_costing[1] - 2023 + 1
 
 # Scenarios
 cost_scenarios = {0: "Actual", 3: "Expanded HRH", 5: "Improved consumable availability",
@@ -78,31 +79,38 @@
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = discount_rate, summarize = True)
-input_costs = input_costs[(input_costs.year > 2018) & (input_costs.year < 2036)]
+input_costs = input_costs[(input_costs.year > 2022) & (input_costs.year < 2031)]
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 
 input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
                                                _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
                                                _discount_rate = 0, summarize = True)
-input_costs_undiscounted = input_costs_undiscounted[(input_costs_undiscounted.year > 2018) & (input_costs_undiscounted.year < 2036)]
+input_costs_undiscounted = input_costs_undiscounted[(input_costs_undiscounted.year > 2022) & (input_costs_undiscounted.year < 2031)]
 
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 
 # Get overall estimates for main text
 # -----------------------------------------------------------------------------------------------------------------------
 cost_by_draw = input_costs.groupby(['draw', 'stat'])['cost'].sum()
-print(f"The total estimated cost of healthcare delivery in Malawi between 2019 and 2035 was estimated to be "
+print(f"The total estimated cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be "
       f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], under the actual scenario, and increased to "
       f"\${cost_by_draw[5,'mean']/1e9:,.2f} billion[\${cost_by_draw[5,'lower']/1e9:,.2f}b - \${cost_by_draw[5,'upper']/1e9:,.2f}b] under the improved consumable availability scenario, "
       f"followed by \${cost_by_draw[3,'mean']/1e9:,.2f} billion[\${cost_by_draw[3,'lower']/1e9:,.2f}b - \${cost_by_draw[3,'upper']/1e9:,.2f}b] under the expanded HRH scenario and finally "
       f"\${cost_by_draw[8,'mean']/1e9:,.2f} billion[\${cost_by_draw[8,'lower']/1e9:,.2f}b - \${cost_by_draw[8,'upper']/1e9:,.2f}b] under the expanded HRH + improved consumable availability scenario.")
 
-print(f"The total cost of healthcare delivery in Malawi (from a health system perspective) between 2019 and 2035 was estimated at "
+undiscounted_cost_by_draw = input_costs_undiscounted.groupby(['draw', 'stat'])['cost'].sum()
+print(f"The average annual estimated cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be "
+      f"\${undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}b], under the actual scenario, and increased to "
+      f"\${undiscounted_cost_by_draw[5,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[5,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[5,'upper']/1e6/number_of_years_costed:,.2f}b] under the improved consumable availability scenario, "
+      f"followed by \${undiscounted_cost_by_draw[3,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[3,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[3,'upper']/1e6/number_of_years_costed:,.2f}b] under the expanded HRH scenario and finally "
+      f"\${undiscounted_cost_by_draw[8,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[8,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[8,'upper']/1e6/number_of_years_costed:,.2f}b] under the expanded HRH + improved consumable availability scenario.")
+
+print(f"The total cost of healthcare delivery in Malawi (from a health system perspective) between 2023 and 2030 was estimated at "
       f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b] under current constraints. "
       f"Alternative scenarios reflecting improvements in supply chain efficiency and workforce capacity increased costs by "
       f"{(cost_by_draw[5,'mean']/cost_by_draw[0,'mean'] - 1):.2%} to "
       f"{(cost_by_draw[8,'mean']/cost_by_draw[0,'mean'] - 1):.2%}. "
-      f"Importantly, our 2019 cost estimates closely aligned with reported actual expenditures, supporting the reliability of our approach.")
+      f"Importantly, our 2018 cost estimates closely aligned with reported actual expenditures, supporting the reliability of our approach.")
 
 consumable_cost_by_draw = input_costs[(input_costs.cost_category == 'medical consumables') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum()
 print(f"Notably, we find that the improved consumable availability scenario results in a {(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1):.2%} "

From 86141563b38cc7c0c61e9c9756457e95348d1dab Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Fri, 17 Jan 2025 17:51:49 +0000
Subject: [PATCH 224/230] update colour map for consumable treemap

---
 src/scripts/costing/costing_overview_analysis.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index 731da4fc32..3aefe89e53 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -169,11 +169,11 @@
                              'Infant resuscitator, clear plastic + mask + bag_each_CMST': '#bcbd22',
                              'Dietary supplements (country-specific)': '#17becf',
                              'Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg': '#2b8cbe',
-                             'Blood, one unit': '#ffdd44',
                              'Pneumococcal vaccine': '#fdae61',
                              'Pentavalent vaccine (DPT, Hep B, Hib)': '#d73027',
-                             'Ceftriaxone 1g, PFR_1_CMST': '#66c2a5',
-                             'male circumcision kit, consumables (10 procedures)_1_IDA': '#756bb1'}
+                             'male circumcision kit, consumables (10 procedures)_1_IDA': '#756bb1',
+                             'Jadelle (implant), box of 2_CMST': '#ffdd44',
+                             'Urine analysis': '#66c2a5'}
 
 for _cat in cost_categories:
     for _d in draws:

From 9b9431582d96d5565394a4c22cdb213e68954847 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 20 Jan 2025 11:57:10 +0000
Subject: [PATCH 225/230] add result for main text on HIV testing consumables
 cost across scenarios

---
 src/scripts/costing/costing_overview_analysis.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index 3aefe89e53..cbcbbe713e 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -117,6 +117,11 @@
       f"increase in cost of medical consumables. However, when combined with expanded HRH, the increase in consumables dispensed is "
       f"{(consumable_cost_by_draw[8]/consumable_cost_by_draw[0] - 1):.2%} more than the actual scenario because the health system is able to deliver more appointments.")
 
+cost_of_hiv_testing =  input_costs[(input_costs.cost_subgroup == 'Test, HIV EIA Elisa') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum()
+print(f"For instance, the cost of HIV testing consumables increases by {(cost_of_hiv_testing[3]/cost_of_hiv_testing[0] - 1):.2%} under the expanded HRH scenario and by "
+      f"{(cost_of_hiv_testing[8]/cost_of_hiv_testing[0] - 1):.2%} under the combined expanded HRH and improved consumable availability scenario, "
+      f"while showing almost no change under the scenario with improved consumable availability alone")
+
 # Get figures for overview paper
 # -----------------------------------------------------------------------------------------------------------------------
 # Figure 1: Estimated costs by cost category

From 99b29c53c2b14e88a6f65d08332528eaa15f38df Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 20 Jan 2025 18:01:12 +0000
Subject: [PATCH 226/230] extract cleaned calibration table for appendix

---
 resources/costing/ResourceFile_Costing.xlsx |   4 +-
 src/scripts/costing/costing_validation.py   | 107 ++++++++++++++++++++
 2 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 03532cf3ad..9c96549b97 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc8a77f1ed79a67878fa9a03a3588febd38fafb08769ec8eb1c23880c3559807
-size 4304056
+oid sha256:04d0f948e8bfc74312506d525d32c086fc27d9133f7ef9eaff6dfaceab3367e4
+size 4304109
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index 51d5d33132..a6532db27c 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -330,6 +330,113 @@ def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
 do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 7)
 calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 
+# Extract calibration data table for manuscript appendix
+calibration_data_extract = calibration_data[calibration_data.index.get_level_values(1) == 'mean']
+calibration_data_extract = calibration_data_extract.droplevel(level=1).reset_index()
+# Create a higher level cost category in the calibration data
+calibration_categories_dict = {'Other Drugs, medical supplies, and commodities': 'medical consumables',
+'Program Management & Administration': 'Not represented in TLO model',
+'Non-EHP consumables': 'Not represented in TLO model',
+'Voluntary Male Medical Circumcision': 'medical consumables',
+'Indoor Residual Spray': 'medical consumables',
+'Bednets': 'medical consumables',
+'Antimalarials': 'medical consumables',
+'Undernutrition commodities': 'medical consumables',
+'Cervical Cancer': 'medical consumables',
+'Condoms and Lubricants': 'medical consumables',
+'Other family planning commodities': 'medical consumables',
+'TB Tests (including RDTs)': 'medical consumables',
+'TB Treatment': 'medical consumables',
+'Vaccines': 'medical consumables',
+'Malaria RDTs': 'medical consumables',
+'HIV Screening/Diagnostic Tests': 'medical consumables',
+'Antiretrovirals': 'medical consumables',
+'Health Worker Salaries': 'human resources for health',
+'Health Worker Training - In-Service': 'human resources for health',
+'Health Worker Training - Pre-Service': 'human resources for health',
+'Mentorships & Supportive Supervision': 'human resources for health',
+'Facility utility bills': 'facility operating cost',
+'Infrastructure - New Builds': 'Not represented in TLO model',
+'Infrastructure - Rehabilitation': 'facility operating cost',
+'Infrastructure - Upgrades': 'Not represented in TLO model',
+'Medical Equipment - Maintenance': 'medical equipment',
+'Medical Equipment - Purchase': 'medical equipment',
+'Vehicles - Fuel and Maintenance': 'facility operating cost',
+'Vehicles - Purchase': 'Not represented in TLO model',
+'Vehicles - Fuel and Maintenance (Beyond Government and CHAM)': 'Not represented in TLO model',
+'Supply Chain': 'medical consumables',
+'Supply Chain - non-EHP consumables': 'Not represented in TLO model',
+'Unclassified': 'Not represented in TLO model'}
+calibration_data_extract['cost_category'] = calibration_data_extract['calibration_category'].map(calibration_categories_dict)
+
+# Add a column show deviation from actual expenditure
+calibration_data_extract['Deviation of estimated cost from actual expenditure (%)'] = (
+    (calibration_data_extract['model_cost'] - calibration_data_extract['actual_expenditure_2019'])
+    /calibration_data_extract['actual_expenditure_2019'])
+
+# Format the deviation as a percentage with 2 decimal points
+calibration_data_extract['Deviation of estimated cost from actual expenditure (%)'] = (
+    calibration_data_extract['Deviation of estimated cost from actual expenditure (%)']
+    .map(lambda x: f"{x * 100:.2f}%")
+)
+calibration_data_extract.loc[calibration_data_extract['Deviation of estimated cost from actual expenditure (%)'] == 'nan%', 'Deviation of estimated cost from actual expenditure (%)'] = 'NA'
+# Replace if calibration is fine
+calibration_condition_met = ((calibration_data_extract['model_cost'] > calibration_data_extract[['actual_expenditure_2019', 'max_annual_budget_2020-22']].min(axis=1)) &
+    (calibration_data_extract['model_cost'] < calibration_data_extract[['actual_expenditure_2019', 'max_annual_budget_2020-22']].max(axis=1)))
+
+calibration_data_extract.loc[calibration_condition_met,
+    'Deviation of estimated cost from actual expenditure (%)'
+] = 'Within target range'
+
+calibration_data_extract.loc[calibration_data_extract['model_cost'].isna(), 'model_cost'] = 'NA'
+
+calibration_data_extract = calibration_data_extract.sort_values(by=['cost_category', 'calibration_category'])
+calibration_data_extract = calibration_data_extract[['cost_category', 'calibration_category', 'actual_expenditure_2019', 'max_annual_budget_2020-22', 'model_cost', 'Deviation of estimated cost from actual expenditure (%)']]
+calibration_data_extract = calibration_data_extract.rename(columns = {'cost_category': 'Cost Category',
+                                                            'calibration_category': 'Relevant RM group',
+                                                            'actual_expenditure_2019': 'Recorded Expenditure (FY 2018/19)',
+                                                            'max_annual_budget_2020-22': 'Maximum Recorded Annual Budget (FY 2019/20 - 2021/22)',
+                                                            'model_cost': 'Estimated cost (TLO Model, 2018)'
+    })
+def convert_df_to_latex(_df, _longtable = False, numeric_columns = []):
+    _df['Relevant RM group'] = _df['Relevant RM group'].str.replace('&', r'\&', regex=False)
+    # Format numbers to the XX,XX,XXX.XX format for all numeric columns
+    _df[numeric_columns] = _df[numeric_columns].applymap(lambda x: f"{x:,.2f}" if isinstance(x, (int, float)) else x)
+
+    # Convert to LaTeX format with horizontal lines after every row
+    latex_table = _df.to_latex(
+        longtable=_longtable,  # Use the longtable environment for large tables
+        column_format='|R{3.5cm}|R{3.5cm}|R{2.1cm}|R{2.1cm}|R{2.1cm}|R{2.1cm}|',
+        caption=f"Comparison of Model Estimates with Resource Mapping data",
+        label=f"tab:calibration_breakdown",
+        position="h",
+        index=False,
+        escape=False,  # Prevent escaping special characters like \n
+        header=True
+    )
+
+    # Add \hline after the header and after every row for horizontal lines
+    latex_table = latex_table.replace("\\\\", "\\\\ \\hline")  # Add \hline after each row
+    latex_table = latex_table.replace("%", "\%")  # Add \hline after each row
+    latex_table = latex_table.replace("Program Management & Administration", "Program Management \& Administration")  # Add \hline after each row
+    latex_table = latex_table.replace("Mentorships & Supportive Supervision", "Mentorships \& Supportive Supervision")  # Add \hline after each row
+
+    # latex_table = latex_table.replace("_", " ")  # Add \hline after each row
+
+    # Specify the file path to save
+    latex_file_path = calibration_outputs_folder / f'calibration_breakdown.tex'
+
+    # Write to a file
+    with open(latex_file_path, 'w') as latex_file:
+        latex_file.write(latex_table)
+
+    # Print latex for reference
+    print(latex_table)
+
+convert_df_to_latex(calibration_data_extract, _longtable = True, numeric_columns = ['Recorded Expenditure (FY 2018/19)',
+                                                                                     'Maximum Recorded Annual Budget (FY 2019/20 - 2021/22)',
+                                                                                     'Estimated cost (TLO Model, 2018)'])
+
 # Stacked bar charts to represent all cost sub-groups
 do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical consumables',
                                         _disaggregate_by_subgroup = True,

From a57d97f43b462dcc358f699cc2c7dba324eaeb73 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Tue, 21 Jan 2025 08:51:13 +0000
Subject: [PATCH 227/230] edits to manuscript text extract

---
 .../costing/costing_overview_analysis.py      | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index cbcbbe713e..5cb7d80158 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -92,25 +92,26 @@
 # Get overall estimates for main text
 # -----------------------------------------------------------------------------------------------------------------------
 cost_by_draw = input_costs.groupby(['draw', 'stat'])['cost'].sum()
-print(f"The total estimated cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be "
-      f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], under the actual scenario, and increased to "
-      f"\${cost_by_draw[5,'mean']/1e9:,.2f} billion[\${cost_by_draw[5,'lower']/1e9:,.2f}b - \${cost_by_draw[5,'upper']/1e9:,.2f}b] under the improved consumable availability scenario, "
-      f"followed by \${cost_by_draw[3,'mean']/1e9:,.2f} billion[\${cost_by_draw[3,'lower']/1e9:,.2f}b - \${cost_by_draw[3,'upper']/1e9:,.2f}b] under the expanded HRH scenario and finally "
-      f"\${cost_by_draw[8,'mean']/1e9:,.2f} billion[\${cost_by_draw[8,'lower']/1e9:,.2f}b - \${cost_by_draw[8,'upper']/1e9:,.2f}b] under the expanded HRH + improved consumable availability scenario.")
+print(f"The total cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be "
+      f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion [95\% confidence interval (CI), \${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], under the actual scenario, and increased to "
+      f"\${cost_by_draw[5,'mean']/1e9:,.2f} billion [\${cost_by_draw[5,'lower']/1e9:,.2f}b - \${cost_by_draw[5,'upper']/1e9:,.2f}b] under the improved consumable availability scenario, "
+      f"followed by \${cost_by_draw[3,'mean']/1e9:,.2f} billion [\${cost_by_draw[3,'lower']/1e9:,.2f}b - \${cost_by_draw[3,'upper']/1e9:,.2f}b] under the expanded HRH scenario and finally "
+      f"\${cost_by_draw[8,'mean']/1e9:,.2f} billion [\${cost_by_draw[8,'lower']/1e9:,.2f}b - \${cost_by_draw[8,'upper']/1e9:,.2f}b] under the expanded HRH + improved consumable availability scenario.")
 
 undiscounted_cost_by_draw = input_costs_undiscounted.groupby(['draw', 'stat'])['cost'].sum()
-print(f"The average annual estimated cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be "
-      f"\${undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}b], under the actual scenario, and increased to "
-      f"\${undiscounted_cost_by_draw[5,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[5,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[5,'upper']/1e6/number_of_years_costed:,.2f}b] under the improved consumable availability scenario, "
-      f"followed by \${undiscounted_cost_by_draw[3,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[3,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[3,'upper']/1e6/number_of_years_costed:,.2f}b] under the expanded HRH scenario and finally "
-      f"\${undiscounted_cost_by_draw[8,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[8,'lower']/1e6/number_of_years_costed:,.2f}b - \${undiscounted_cost_by_draw[8,'upper']/1e6/number_of_years_costed:,.2f}b] under the expanded HRH + improved consumable availability scenario.")
-
-print(f"The total cost of healthcare delivery in Malawi (from a health system perspective) between 2023 and 2030 was estimated at "
-      f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b] under current constraints. "
-      f"Alternative scenarios reflecting improvements in supply chain efficiency and workforce capacity increased costs by "
+print(f"The average annual cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be "
+      f"\${undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}m], under the actual scenario, and increased to "
+      f"\${undiscounted_cost_by_draw[5,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[5,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[5,'upper']/1e6/number_of_years_costed:,.2f}m] under the improved consumable availability scenario, "
+      f"followed by \${undiscounted_cost_by_draw[3,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[3,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[3,'upper']/1e6/number_of_years_costed:,.2f}m] under the expanded HRH scenario and finally "
+      f"\${undiscounted_cost_by_draw[8,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[8,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[8,'upper']/1e6/number_of_years_costed:,.2f}m] under the expanded HRH + improved consumable availability scenario.")
+
+print(f"Based on this method, the total cost of healthcare delivery in Malawi between 2023 and 2030 was estimated at "
+      f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion[\${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b]"
+      f"(average annual cost of \${undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million[\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}m] "
+      f"under current health system capacity. "
+      f"Alternative scenarios reflecting improvements in consuambles supply chain performance and workforce size increased costs by "
       f"{(cost_by_draw[5,'mean']/cost_by_draw[0,'mean'] - 1):.2%} to "
-      f"{(cost_by_draw[8,'mean']/cost_by_draw[0,'mean'] - 1):.2%}. "
-      f"Importantly, our 2018 cost estimates closely aligned with reported actual expenditures, supporting the reliability of our approach.")
+      f"{(cost_by_draw[8,'mean']/cost_by_draw[0,'mean'] - 1):.2%}. ")
 
 consumable_cost_by_draw = input_costs[(input_costs.cost_category == 'medical consumables') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum()
 print(f"Notably, we find that the improved consumable availability scenario results in a {(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1):.2%} "

From 6659f84549efd98bfaa707193c6c379a125aed68 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 22 Jan 2025 19:16:07 +0000
Subject: [PATCH 228/230] stacked bar - adjustments + only show CI > 0

---
 src/scripts/costing/cost_estimation.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py
index 39f183ff9f..4168e5ed12 100644
--- a/src/scripts/costing/cost_estimation.py
+++ b/src/scripts/costing/cost_estimation.py
@@ -900,6 +900,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
 
     # Error bars
     lower_bounds = pivot_mean.sum(axis=1) - pivot_lower.sum(axis=1)
+    lower_bounds[lower_bounds<0] = 0
     upper_bounds = pivot_upper.sum(axis=1) - pivot_mean.sum(axis=1)
 
     if _cost_category == 'all':
@@ -987,6 +988,12 @@ def do_stacked_bar_plot_of_cost_by_category(_df, _cost_category = 'all',
     plt.tight_layout(pad=2.0)  # Ensure there is enough space for the legend
     plt.subplots_adjust(right=0.8) # Adjust to ensure legend doesn't overlap
 
+    # Add gridlines and border
+    plt.grid(visible=True, which='major', linestyle='--', linewidth=0.5, color='gray')
+    #plt.rcParams['figure.facecolor'] = 'white'
+    plt.rcParams['figure.edgecolor'] = 'gray'
+    plt.rcParams['figure.frameon'] = True
+
     plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Period = {period})')
     plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}{plt_name_suffix}{_add_figname_suffix}.png', dpi=100,
                 bbox_inches='tight')
@@ -1113,6 +1120,8 @@ def do_line_plot_of_cost(_df, _cost_category='all',
         period = f"{min(_year)} - {max(_year)}"
 
     # Set labels, legend, and title
+    # Add gridlines and border
+    plt.grid(visible=True, which='major', linestyle='--', linewidth=0.5, color='gray')
     plt.xlabel('Year')
     plt.ylabel('Cost (2023 USD), millions')
     plt.legend(handles[::-1], sorted_labels[::-1], bbox_to_anchor=(1.05, 1), loc='upper left')

From 21a6b61539b0916046f918b74788c0e405492447 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Wed, 22 Jan 2025 19:16:32 +0000
Subject: [PATCH 229/230] add temporary figure for lomas et al discount rates

---
 .../costing/costing_overview_analysis.py      | 35 ++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py
index 5cb7d80158..a4f1bc24db 100644
--- a/src/scripts/costing/costing_overview_analysis.py
+++ b/src/scripts/costing/costing_overview_analysis.py
@@ -73,6 +73,7 @@
 
 # Costing parameters
 discount_rate = 0.03
+discount_rate_lomas = -0.0079
 
 # Estimate standard input costs of scenario
 # -----------------------------------------------------------------------------------------------------------------------
@@ -87,6 +88,11 @@
                                                _discount_rate = 0, summarize = True)
 input_costs_undiscounted = input_costs_undiscounted[(input_costs_undiscounted.year > 2022) & (input_costs_undiscounted.year < 2031)]
 
+input_costs_variable_discounting = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8],
+                                               _years=list_of_relevant_years_for_costing, cost_only_used_staff=True,
+                                               _discount_rate = discount_rate_lomas, summarize = True)
+input_costs_variable_discounting = input_costs_variable_discounting[(input_costs_variable_discounting.year > 2022) & (input_costs_variable_discounting.year < 2031)]
+
 # _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs
 
 # Get overall estimates for main text
@@ -130,6 +136,24 @@
                                         _year = list_of_years_for_plot,
                                         _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
 
+revised_consumable_subcategories = {'cost_of_separately_managed_medical_supplies_dispensed':'cost_of_consumables_dispensed', 'cost_of_excess_separately_managed_medical_supplies_stocked': 'cost_of_excess_consumables_stocked', 'supply_chain':'supply_chain'}
+input_costs_new = input_costs.copy()
+input_costs_new['cost_subcategory'] = input_costs_new['cost_subcategory'].map(revised_consumable_subcategories).fillna(input_costs_new['cost_subcategory'])
+
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_new, _cost_category = 'medical consumables', _disaggregate_by_subgroup = False,
+                                        _year = list_of_years_for_plot,
+                                        _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = False,
+                                        _year = list_of_years_for_plot,
+                                        _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = False,
+                                        _year = list_of_years_for_plot,
+                                        _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'facility operating cost', _disaggregate_by_subgroup = False,
+                                        _year = list_of_years_for_plot,
+                                        _outputfilepath = figurespath, _scenario_dict = cost_scenarios)
+
+
 # Figure 2: Estimated costs by year
 do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all',
                          _year=list_of_years_for_plot, _draws= [0],
@@ -160,6 +184,15 @@
                                         _add_figname_suffix = '_UNDISCOUNTED')
 
 # Figure 5: Total cost by scenario applying changing discount rates
+# Figure 4: Total cost by scenario assuming 0% discount rate
+do_stacked_bar_plot_of_cost_by_category(_df = input_costs_variable_discounting,
+                                        _cost_category = 'all',
+                                        _year=list_of_years_for_plot,
+                                        _disaggregate_by_subgroup = False,
+                                        _outputfilepath = figurespath,
+                                        _scenario_dict = cost_scenarios,
+                                        _add_figname_suffix = '_VARIABLE_DISCOUNTING')
+
 
 cost_categories = ['human resources for health', 'medical consumables',
        'medical equipment', 'facility operating cost']
@@ -229,7 +262,7 @@ def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = Fal
     # Convert to LaTeX format with horizontal lines after every row
     latex_table = table_data.to_latex(
         longtable=_longtable,  # Use the longtable environment for large tables
-        column_format='|R{4cm}|R{5cm}|R{3.5cm}|R{3.5cm}|R{3.5cm}|R{3.5cm}|',
+        column_format='|R{3cm}|R{3cm}|R{2.2cm}|R{2.2cm}|R{2.2cm}|R{2.2cm}|',
         caption=f"Summarized Costs by Category and {_groupby_var_name}",
         label=f"tab:cost_by_{_groupby_var}",
         position="h",

From b92e56fb3d884f93c79e4d62a5fbc2a10598e2c7 Mon Sep 17 00:00:00 2001
From: sakshimohan <sakshi.mohan@york.ac.uk>
Date: Mon, 27 Jan 2025 15:34:45 +0000
Subject: [PATCH 230/230] clean consumables lists for calibration

+ update calculation of deviation of model estimate from RM data - take min of absolute deviation from expenditure and budget
+ Add cervical cancer consumables to 'Other drugs and commodities'
---
 resources/costing/ResourceFile_Costing.xlsx |   4 +-
 src/scripts/costing/costing_validation.py   | 123 +++++++++++++++-----
 2 files changed, 94 insertions(+), 33 deletions(-)

diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx
index 9c96549b97..cc427e145c 100644
--- a/resources/costing/ResourceFile_Costing.xlsx
+++ b/resources/costing/ResourceFile_Costing.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04d0f948e8bfc74312506d525d32c086fc27d9133f7ef9eaff6dfaceab3367e4
-size 4304109
+oid sha256:224bf212ab783a5cd673d27a588345cb78062187cad0566c06dc6b2fe04dffcd
+size 4307162
diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py
index a6532db27c..4b4432a9b7 100644
--- a/src/scripts/costing/costing_validation.py
+++ b/src/scripts/costing/costing_validation.py
@@ -28,6 +28,7 @@
     parse_log_file,
     unflatten_flattened_multi_index_in_logging
 )
+from tlo.methods.healthsystem import get_item_code_from_item_name
 from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios,
                                              do_stacked_bar_plot_of_cost_by_category)
 
@@ -74,7 +75,8 @@
 # Load result files
 resourcefilepath = Path("./resources")
 outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk')
-results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0]
+#results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0] # November 2024 runs
+results_folder = get_scenario_outputs('htm_and_hss_runs-2025-01-16T135243Z.py', outputfilepath)[0] # January 2025 runs
 
 # Estimate costs for 2018
 input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _years = [2018], _draws = [0], summarize = True, cost_only_used_staff=False)
@@ -152,33 +154,85 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 #-----------------------------------------------------------------------------------------------------------------------
 calibration_data['model_cost'] = np.nan
 consumables_costs_by_item_code = assign_item_codes_to_consumables(input_costs)
-
-irs = [161]
-bednets = [160]
-undernutrition = [213, 1220, 1221, 1223, 1227]
-cervical_cancer = [261, 1239]
-other_family_planning = [1, 3,7,12,13]
-vaccines = [150, 151, 153, 155, 157, 158, 1197]
-art = [2671, 2672, 2673]
-tb_treatment = [176, 177, 179, 178, 181, 2678]
-antimalarials = [162,164,170]
-malaria_rdts = [163]
-hiv_screening = [190,191,196]
-condoms = [2,25]
-tb_tests = [184,187, 175]
-circumcision = [197]
-other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(irs) - set(bednets) - set(undernutrition) - set(cervical_cancer) - set(other_family_planning) - set(vaccines) \
+consumable_list = pd.read_csv(resourcefilepath / 'healthsystem' / 'consumables' / 'ResourceFile_Consumables_Items_and_Packages.csv')
+def get_item_code(item):
+    return get_item_code_from_item_name(consumable_list, item)
+
+# Malaria consumables
+irs = [get_item_code('Indoor residual spraying drugs/supplies to service a client')]
+bednets = [get_item_code('Insecticide-treated net')]
+antimalarials = [get_item_code('Lumefantrine 120mg/Artemether 20mg,  30x18_540_CMST'),
+                 get_item_code('Injectable artesunate'),
+                 get_item_code('Fansidar (sulphadoxine / pyrimethamine tab)')]
+malaria_rdts = [get_item_code('Malaria test kit (RDT)')]
+
+# HIV consumables
+hiv_screening = [get_item_code('Test, HIV EIA Elisa'), get_item_code('VL Test'), get_item_code('CD4 test')]
+
+art = [get_item_code("First-line ART regimen: adult"), get_item_code("Cotrimoxizole, 960mg pppy"), # adult
+        get_item_code("First line ART regimen: older child"), get_item_code("Cotrimoxazole 120mg_1000_CMST"), # Older children
+        get_item_code("First line ART regimen: young child"), # younger children (also get cotrimoxazole 120mg
+        get_item_code('Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg'),
+        get_item_code("Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg"), # Adult prep
+        get_item_code("Nevirapine, oral solution, 10 mg/ml")] # infant prep
+
+circumcision = [get_item_code('male circumcision kit, consumables (10 procedures)_1_IDA')]
+
+# Tuberculosis consumables
+tb_tests = [get_item_code("ZN Stain"), get_item_code("Sputum container"), get_item_code("Microscope slides, lime-soda-glass, pack of 50"),
+            get_item_code("Xpert"), get_item_code("Lead rubber x-ray protective aprons up to 150kVp 0.50mm_each_CMST"),
+            get_item_code("X-ray"), get_item_code("MGIT960 Culture and DST"),
+            get_item_code("Solid culture and DST")]
+# consider removing X-ray
+tb_treatment = [get_item_code("Cat. I & III Patient Kit A"), # adult primary
+                get_item_code("Cat. I & III Patient Kit B"), # child primary
+                get_item_code("Cat. II Patient Kit A1"), # adult secondary
+                get_item_code("Cat. II Patient Kit A2"), # child secondary
+                get_item_code("Treatment: second-line drugs"), # MDR
+                get_item_code("Isoniazid/Pyridoxine, tablet 300 mg"), # IPT
+                get_item_code("Isoniazid/Rifapentine")] # 3 HP
+# Family planning consumables
+other_family_planning = [get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"), # pill
+                        get_item_code("IUD, Copper T-380A"), # IUD
+                         get_item_code("Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly"), # injection
+                         get_item_code("Jadelle (implant), box of 2_CMST"), # implant
+                         get_item_code('Implanon (Etonogestrel 68 mg)'), # implant - not currently in use in the model
+                         get_item_code("Atropine sulphate  600 micrograms/ml, 1ml_each_CMST")] # female sterilization
+condoms = [get_item_code("Condom, male"),
+           get_item_code("Female Condom_Each_CMST")]
+# Undernutrition
+undernutrition = [get_item_code('Supplementary spread, sachet 92g/CAR-150'),
+                  get_item_code('Complementary feeding--education only drugs/supplies to service a client'),
+                  get_item_code('SAM theraputic foods'),
+                  get_item_code('SAM medicines'),
+                  get_item_code('Therapeutic spread, sachet 92g/CAR-150'),
+                  get_item_code('F-100 therapeutic diet, sach., 114g/CAR-90')]
+# Cervical cancer
+cervical_cancer = [get_item_code('Specimen container'),
+                   get_item_code('Biopsy needle'),
+                   get_item_code('Cyclophosphamide, 1 g')]
+# Vaccines
+vaccines = [get_item_code("Syringe, autodisposable, BCG, 0.1 ml, with needle"),
+            get_item_code("Polio vaccine"),
+            get_item_code("Pentavalent vaccine (DPT, Hep B, Hib)"),
+            get_item_code("Rotavirus vaccine"),
+            get_item_code("Measles vaccine"),
+            get_item_code("Pneumococcal vaccine"),
+            get_item_code("HPV vaccine"),
+            get_item_code("Tetanus toxoid, injection")] # not sure if this should be included
+
+other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(irs) - set(bednets) - set(undernutrition) - set(other_family_planning) - set(vaccines) \
               - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\
-              - set(condoms) - set(tb_tests)
+              - set(condoms) - set(tb_tests) # - set(cervical_cancer)
 
 # Note that the main ARV  regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this
-# was $80 per year (80/(0.103*365)) times what's estimated by the model so let's update this
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = art, _calibration_category = 'Antiretrovirals')*  80/(0.103*365))
+# was $82 per year (80/(0.103*365)) times what's estimated by the model so let's update this
+calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = art, _calibration_category = 'Antiretrovirals')*  82/(0.103*365))
 # Other consumables costs do not need to be adjusted
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = irs, _calibration_category = 'Indoor Residual Spray'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = bednets, _calibration_category = 'Bednets'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = undernutrition, _calibration_category = 'Undernutrition commodities'))
-calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = cervical_cancer, _calibration_category = 'Cervical Cancer'))
+#calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = cervical_cancer, _calibration_category = 'Cervical Cancer'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_family_planning, _calibration_category = 'Other family planning commodities'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = vaccines, _calibration_category = 'Vaccines'))
 calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_treatment, _calibration_category = 'TB Treatment'))
@@ -220,7 +274,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 list_of_consumables_costs_for_calibration_only_hiv = ['Voluntary Male Medical Circumcision', 'HIV Screening/Diagnostic Tests', 'Antiretrovirals']
 list_of_consumables_costs_for_calibration_without_hiv =['Indoor Residual Spray', 'Bednets', 'Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Vaccines',
                                                         'Condoms and Lubricants', 'Other family planning commodities',
-                                                        'Undernutrition commodities', 'Cervical Cancer', 'Other Drugs, medical supplies, and commodities']
+                                                        'Undernutrition commodities', 'Other Drugs, medical supplies, and commodities']
 list_of_hr_costs_for_calibration = ['Health Worker Salaries', 'Health Worker Training - In-Service', 'Health Worker Training - Pre-Service', 'Mentorships & Supportive Supervision']
 list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance']
 list_of_operating_costs_for_calibration = ['Facility utility bills', 'Infrastructure - Rehabilitation', 'Vehicles - Maintenance','Vehicles - Fuel and Maintenance']
@@ -229,7 +283,7 @@ def get_calibration_relevant_subset_of_other_costs(_df, _subcategory, _calibrati
 costing_outputs_folder = Path('./outputs/costing')
 if not os.path.exists(costing_outputs_folder):
     os.makedirs(costing_outputs_folder)
-figurespath = costing_outputs_folder / "figures_post_cons_fix"
+figurespath = costing_outputs_folder / "figures_post_jan2025fix"
 if not os.path.exists(figurespath):
     os.makedirs(figurespath)
 calibration_outputs_folder = Path(figurespath / 'calibration')
@@ -328,7 +382,6 @@ def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
 do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration)
 do_cost_calibration_plot(calibration_data, list_of_operating_costs_for_calibration)
 do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 7)
-calibration_data.to_csv(figurespath / 'calibration/calibration.csv')
 
 # Extract calibration data table for manuscript appendix
 calibration_data_extract = calibration_data[calibration_data.index.get_level_values(1) == 'mean']
@@ -369,35 +422,43 @@ def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10):
 'Unclassified': 'Not represented in TLO model'}
 calibration_data_extract['cost_category'] = calibration_data_extract['calibration_category'].map(calibration_categories_dict)
 
-# Add a column show deviation from actual expenditure
-calibration_data_extract['Deviation of estimated cost from actual expenditure (%)'] = (
+calibration_data_extract['deviation_from_expenditure'] = abs(
     (calibration_data_extract['model_cost'] - calibration_data_extract['actual_expenditure_2019'])
     /calibration_data_extract['actual_expenditure_2019'])
+calibration_data_extract['deviation_from_budget'] = abs(
+    (calibration_data_extract['model_cost'] - calibration_data_extract['max_annual_budget_2020-22'])
+    /calibration_data_extract['max_annual_budget_2020-22'])
+calibration_data_extract['Absolute deviation of estimated cost from data (%)'] = (
+    calibration_data_extract[['deviation_from_expenditure', 'deviation_from_budget']]
+    .min(axis=1, skipna=True)  # Use axis=1 to compute the minimum row-wise.
+)
 
 # Format the deviation as a percentage with 2 decimal points
-calibration_data_extract['Deviation of estimated cost from actual expenditure (%)'] = (
-    calibration_data_extract['Deviation of estimated cost from actual expenditure (%)']
+calibration_data_extract['Absolute deviation of estimated cost from data (%)'] = (
+    calibration_data_extract['Absolute deviation of estimated cost from data (%)']
     .map(lambda x: f"{x * 100:.2f}%")
 )
-calibration_data_extract.loc[calibration_data_extract['Deviation of estimated cost from actual expenditure (%)'] == 'nan%', 'Deviation of estimated cost from actual expenditure (%)'] = 'NA'
+calibration_data_extract.loc[calibration_data_extract['Absolute deviation of estimated cost from data (%)'] == 'nan%', 'Absolute deviation of estimated cost from data (%)'] = 'NA'
 # Replace if calibration is fine
 calibration_condition_met = ((calibration_data_extract['model_cost'] > calibration_data_extract[['actual_expenditure_2019', 'max_annual_budget_2020-22']].min(axis=1)) &
     (calibration_data_extract['model_cost'] < calibration_data_extract[['actual_expenditure_2019', 'max_annual_budget_2020-22']].max(axis=1)))
 
 calibration_data_extract.loc[calibration_condition_met,
-    'Deviation of estimated cost from actual expenditure (%)'
+    'Absolute deviation of estimated cost from data (%)'
 ] = 'Within target range'
 
 calibration_data_extract.loc[calibration_data_extract['model_cost'].isna(), 'model_cost'] = 'NA'
 
 calibration_data_extract = calibration_data_extract.sort_values(by=['cost_category', 'calibration_category'])
-calibration_data_extract = calibration_data_extract[['cost_category', 'calibration_category', 'actual_expenditure_2019', 'max_annual_budget_2020-22', 'model_cost', 'Deviation of estimated cost from actual expenditure (%)']]
+calibration_data_extract = calibration_data_extract[['cost_category', 'calibration_category', 'actual_expenditure_2019', 'max_annual_budget_2020-22', 'model_cost', 'Absolute deviation of estimated cost from data (%)']]
 calibration_data_extract = calibration_data_extract.rename(columns = {'cost_category': 'Cost Category',
                                                             'calibration_category': 'Relevant RM group',
                                                             'actual_expenditure_2019': 'Recorded Expenditure (FY 2018/19)',
                                                             'max_annual_budget_2020-22': 'Maximum Recorded Annual Budget (FY 2019/20 - 2021/22)',
                                                             'model_cost': 'Estimated cost (TLO Model, 2018)'
     })
+
+calibration_data_extract.to_csv(figurespath / 'calibration/calibration.csv')
 def convert_df_to_latex(_df, _longtable = False, numeric_columns = []):
     _df['Relevant RM group'] = _df['Relevant RM group'].str.replace('&', r'\&', regex=False)
     # Format numbers to the XX,XX,XXX.XX format for all numeric columns