@@ -55,9 +55,7 @@ def create(
5555 :return: a function to initialise the object
5656 :rtype: Callable[[str, str], Databricks]
5757 """
58- return lambda fyear , _ : DatabricksNational (
59- spark , data_path , fyear , sample_rate , seed
60- )
58+ return lambda fyear , _ : DatabricksNational (spark , data_path , fyear , sample_rate , seed )
6159
6260 def get_ip (self ) -> pd .DataFrame :
6361 """Get the inpatients dataframe.
@@ -98,14 +96,10 @@ def get_op(self) -> pd.DataFrame:
9896 # TODO: temporary fix, see #353
9997 .withColumn ("sushrg_trimmed" , F .lit ("HRG" ))
10098 .withColumn ("imd_quintile" , F .lit (0 ))
101- .groupBy (
102- op .drop ("index" , "fyear" , "attendances" , "tele_attendances" ).columns
103- )
99+ .groupBy (op .drop ("index" , "fyear" , "attendances" , "tele_attendances" ).columns )
104100 .agg (
105101 (F .sum ("attendances" ) * self ._sample_rate ).alias ("attendances" ),
106- (F .sum ("tele_attendances" ) * self ._sample_rate ).alias (
107- "tele_attendances"
108- ),
102+ (F .sum ("tele_attendances" ) * self ._sample_rate ).alias ("tele_attendances" ),
109103 )
110104 # TODO: how do we make this stable? at the moment we can't use full model results with
111105 # national
@@ -209,4 +203,15 @@ def get_hsa_gams(self):
209203 """Get the health status adjustment gams."""
210204 # this is not supported in our data bricks environment currently
211205 raise NotImplementedError
212- raise NotImplementedError
206+
207+ def get_inequalities (self ) -> pd .DataFrame :
208+ """Get the inequalities dataframe.
209+
210+ Returns:
211+ The inequalities dataframe.
212+ """
213+ return (
214+ self ._spark .read .parquet (f"{ self ._data_path } /inequalities" )
215+ .filter (F .col ("fyear" ) == self ._year )
216+ .toPandas ()
217+ )
0 commit comments