Merge branch 'cbrelease-4.8.28' of https://github.com/KB-iGOT/cb-core-data into cbrelease-4.8.28

abhishekpnt · abhishekpnt · commit 9e2b75e943d2 · 2025-10-09T16:11:46.000+05:30
diff --git a/dfutil/enrolment/acbp/acbpDFUtil.py b/dfutil/enrolment/acbp/acbpDFUtil.py
@@ -58,7 +58,8 @@ def preComputeACBPData(spark):
 
 def explodeAcbpData(spark, acbp_df):
     selectColumns = ["userID", "fullName", "userPrimaryEmail", "userMobile", "designation", "group", "userOrgID",
-                     "ministry_name", "dept_name", "userOrgName", "userStatus", "isapar", "acbpID",
+                     "ministry_name", "dept_name", "userOrgName", "cadreName", "civilServiceType", "civilServiceName",
+                     "cadreBatch", "organised_service", "userStatus", "isapar", "acbpID",
                      "assignmentType", "completionDueDate", "allocatedOn", "acbpCourseIDList", "acbpStatus",
                      "acbpCreatedBy", "cbPlanName"]
     user_df = spark.read.parquet(ParquetFileConstants.USER_ORG_COMPUTED_FILE)
diff --git a/jobs/default_config.py b/jobs/default_config.py
@@ -181,6 +181,8 @@
     'stateLearningWeekStart': '2024-10-19 00:00:00',
     'stateLearningWeekEnd': '2025-03-14 23:59:59',
     'sizeBucketString': '1-100-XS,101-500-S,501-1000-M,1001-10000-L,10001-25000-X,above 25000-XXL',
+    'overridesForSlw': {},
+    'rollupRequiredOrgs': [],
 
     # Zip Reports Configuration
     'prefixDirectoryPath': 'standalone-reports',
diff --git a/jobs/stage-2/acbpReport.py b/jobs/stage-2/acbpReport.py
@@ -122,7 +122,8 @@ def process_data(self, spark, config):
                 .select(
                 # Select only needed columns early to reduce data shuffling
                 "fullName", "userPrimaryEmail", "userMobile", "userOrgName", "group",
-                "designation", "ministry_name", "dept_name", "courseName", "isapar",
+                "designation", "ministry_name", "dept_name", "cadreName", "civilServiceType", "civilServiceName",
+                "cadreBatch", "organised_service", "courseName", "isapar",
                 "userOrgID", "dbCompletionStatus", "courseCompletedTimestamp",
                 "allocatedOn", "completionDueDate"
             ) \
@@ -170,6 +171,11 @@ def process_data(self, spark, config):
                 col("Ministry"),
                 col("Department"),
                 col("Organization"),
+                col("cadreName").alias("Cadre"),
+                col("civilServiceType").alias("Civil Service Type"),
+                col("civilServiceName").alias("Civil Services"),
+                col("cadreBatch").alias("Cadre Batch"),
+                col("organised_service").alias("Is From Organised Service of Govt"),
                 col("courseName").alias("Name of CBP Allocated Course"),
                 col("isapar"),
                 col("allocatedOn").alias("Allocated On"),
@@ -196,7 +202,8 @@ def process_data(self, spark, config):
             ) \
                 .groupBy(
                 "userID", "fullName", "userPrimaryEmail", "userMobile",
-                "designation", "group", "userOrgID", "ministry_name",
+                "designation", "cadreName", "civilServiceType", "civilServiceName",
+                "cadreBatch", "organised_service", "group", "userOrgID", "ministry_name",
                 "dept_name", "userOrgName"
             ) \
                 .agg(
@@ -218,6 +225,11 @@ def process_data(self, spark, config):
                 col("userOrgName").alias("MDO_Name"),
                 col("group").alias("Group"),
                 col("designation").alias("Designation"),
+                col("cadreName").alias("Cadre"),
+                col("civilServiceType").alias("Civil Service Type"),
+                col("civilServiceName").alias("Civil Services"),
+                col("cadreBatch").alias("Cadre Batch"),
+                col("organised_service").alias("Is From Organised Service of Govt"),
                 when(
                     (col("ministry_name").isNull()) | (col("ministry_name") == ""),
                     col("userOrgName")
diff --git a/jobs/stage-2/dataWarehouse.py b/jobs/stage-2/dataWarehouse.py
@@ -148,7 +148,6 @@ def write_postgres_table(self, df, url: str, table: str, username: str, password
             .option("user", username) \
             .option("password", password) \
             .option("driver", "org.postgresql.Driver") \
-            .option("truncate", "true") \
             .mode(mode) \
             .save()
 
diff --git a/jobs/stage-2/nationalLearningWeek.py b/jobs/stage-2/nationalLearningWeek.py
diff --git a/jobs/stage-2/odcsRecommendation.py b/jobs/stage-2/odcsRecommendation.py
@@ -36,14 +36,14 @@ def name(self):
 
     def process_data(self, spark, config):
         try:
-            all_enrolments_df = spark.read.parquet(f"{config.warehouseReportDir}/{config.dwEnrollmentsTable}")
-            content_df = spark.read.parquet(f"{config.warehouseReportDir}/{config.dwCourseTable}") \
+            all_enrolments_df = spark.read.parquet(ParquetFileConstants.ENROLMENT_WAREHOUSE_COMPUTED_PARQUET_FILE).withColumnRenamed("userID", "user_id")
+            content_df = spark.read.parquet(ParquetFileConstants.CONTENT_WAREHOUSE_COMPUTED_PARQUET_FILE) \
                 .filter(col("content_sub_type").isin("Course", "Program", "Moderated Course", "Moderated Program"))
             enrolments_df = all_enrolments_df.join(
                 content_df.select("content_id"), ["content_id"], "inner"
             )
 
-            user_df = spark.read.parquet(f"{config.warehouseReportDir}/{config.dwUserTable}")
+            user_df = spark.read.parquet(ParquetFileConstants.USER_WAREHOUSE_COMPUTED_PARQUET_FILE)
             rating_draft_df = spark.read.parquet(ParquetFileConstants.RATING_PARQUET_FILE)
 
             completion_df = enrolments_df.groupBy("content_id").agg(count("user_id").alias("total_enrolments"),
@@ -145,4 +145,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/jobs/stage-2/userEnrolment.py b/jobs/stage-2/userEnrolment.py
@@ -179,6 +179,11 @@ def process_data(self, spark,config):
                     col("Ministry"),
                     col("Department"),
                     col("Organization"),
+                    col("cadreName").alias("Cadre"),
+                    col("civilServiceType").alias("Civil Service Type"),
+                    col("civilServiceName").alias("Civil Services"),
+                    col("cadreBatch").alias("Cadre Batch"),
+                    col("organised_service").alias("Is From Organised Service of Govt"),
                     col("courseOrgName").alias("Content_Provider"),
                     col("courseName").alias("Content_Name"),
                     col("category").alias("Content_Type"),
@@ -280,6 +285,11 @@ def process_data(self, spark,config):
                     col("Ministry"),
                     col("Department"),
                     col("Organization"),
+                    col("cadreName").alias("Cadre"),
+                    col("civilServiceType").alias("Civil Service Type"),
+                    col("civilServiceName").alias("Civil Services"),
+                    col("cadreBatch").alias("Cadre Batch"),
+                    col("organised_service").alias("Is From Organised Service of Govt"),
                     col("courseOrgName").alias("Content_Provider"),
                     col("courseName").alias("Content_Name"),
                     col("category").alias("Content_Type"),
diff --git a/jobs/stage-2/userReport.py b/jobs/stage-2/userReport.py
@@ -142,6 +142,11 @@ def processUserReport(config):
             col("Organization"),
             from_unixtime(col("userCreatedTimestamp") / 1000, ParquetFileConstants.DATE_FORMAT).alias(
                 "User_Registration_Date"),
+            col("cadreName").alias("Cadre"),
+            col("civilServiceType").alias("Civil Service Type"),
+            col("civilServiceName").alias("Civil Services"),
+            col("cadreBatch").alias("Cadre Batch"),
+            col("organised_service").alias("Is From Organised Service of Govt"),
             col("role").alias("Roles"),
             col("personalDetails.gender").alias("Gender"),
             col("personalDetails.category").alias("Category"),