Skip to content

Commit 9e2b75e

Browse files
committed
Merge branch 'cbrelease-4.8.28' of https://github.com/KB-iGOT/cb-core-data into cbrelease-4.8.28
2 parents f11f031 + 9b7dd42 commit 9e2b75e

File tree

8 files changed

+507
-8
lines changed

8 files changed

+507
-8
lines changed

dfutil/enrolment/acbp/acbpDFUtil.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def preComputeACBPData(spark):
5858

5959
def explodeAcbpData(spark, acbp_df):
6060
selectColumns = ["userID", "fullName", "userPrimaryEmail", "userMobile", "designation", "group", "userOrgID",
61-
"ministry_name", "dept_name", "userOrgName", "userStatus", "isapar", "acbpID",
61+
"ministry_name", "dept_name", "userOrgName", "cadreName", "civilServiceType", "civilServiceName",
62+
"cadreBatch", "organised_service", "userStatus", "isapar", "acbpID",
6263
"assignmentType", "completionDueDate", "allocatedOn", "acbpCourseIDList", "acbpStatus",
6364
"acbpCreatedBy", "cbPlanName"]
6465
user_df = spark.read.parquet(ParquetFileConstants.USER_ORG_COMPUTED_FILE)

jobs/default_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@
181181
'stateLearningWeekStart': '2024-10-19 00:00:00',
182182
'stateLearningWeekEnd': '2025-03-14 23:59:59',
183183
'sizeBucketString': '1-100-XS,101-500-S,501-1000-M,1001-10000-L,10001-25000-X,above 25000-XXL',
184+
'overridesForSlw': {},
185+
'rollupRequiredOrgs': [],
184186

185187
# Zip Reports Configuration
186188
'prefixDirectoryPath': 'standalone-reports',

jobs/stage-2/acbpReport.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ def process_data(self, spark, config):
122122
.select(
123123
# Select only needed columns early to reduce data shuffling
124124
"fullName", "userPrimaryEmail", "userMobile", "userOrgName", "group",
125-
"designation", "ministry_name", "dept_name", "courseName", "isapar",
125+
"designation", "ministry_name", "dept_name", "cadreName", "civilServiceType", "civilServiceName",
126+
"cadreBatch", "organised_service", "courseName", "isapar",
126127
"userOrgID", "dbCompletionStatus", "courseCompletedTimestamp",
127128
"allocatedOn", "completionDueDate"
128129
) \
@@ -170,6 +171,11 @@ def process_data(self, spark, config):
170171
col("Ministry"),
171172
col("Department"),
172173
col("Organization"),
174+
col("cadreName").alias("Cadre"),
175+
col("civilServiceType").alias("Civil Service Type"),
176+
col("civilServiceName").alias("Civil Services"),
177+
col("cadreBatch").alias("Cadre Batch"),
178+
col("organised_service").alias("Is From Organised Service of Govt"),
173179
col("courseName").alias("Name of CBP Allocated Course"),
174180
col("isapar"),
175181
col("allocatedOn").alias("Allocated On"),
@@ -196,7 +202,8 @@ def process_data(self, spark, config):
196202
) \
197203
.groupBy(
198204
"userID", "fullName", "userPrimaryEmail", "userMobile",
199-
"designation", "group", "userOrgID", "ministry_name",
205+
"designation", "cadreName", "civilServiceType", "civilServiceName",
206+
"cadreBatch", "organised_service", "group", "userOrgID", "ministry_name",
200207
"dept_name", "userOrgName"
201208
) \
202209
.agg(
@@ -218,6 +225,11 @@ def process_data(self, spark, config):
218225
col("userOrgName").alias("MDO_Name"),
219226
col("group").alias("Group"),
220227
col("designation").alias("Designation"),
228+
col("cadreName").alias("Cadre"),
229+
col("civilServiceType").alias("Civil Service Type"),
230+
col("civilServiceName").alias("Civil Services"),
231+
col("cadreBatch").alias("Cadre Batch"),
232+
col("organised_service").alias("Is From Organised Service of Govt"),
221233
when(
222234
(col("ministry_name").isNull()) | (col("ministry_name") == ""),
223235
col("userOrgName")

jobs/stage-2/dataWarehouse.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ def write_postgres_table(self, df, url: str, table: str, username: str, password
148148
.option("user", username) \
149149
.option("password", password) \
150150
.option("driver", "org.postgresql.Driver") \
151-
.option("truncate", "true") \
152151
.mode(mode) \
153152
.save()
154153

jobs/stage-2/nationalLearningWeek.py

Lines changed: 470 additions & 0 deletions
Large diffs are not rendered by default.

jobs/stage-2/odcsRecommendation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ def name(self):
3636

3737
def process_data(self, spark, config):
3838
try:
39-
all_enrolments_df = spark.read.parquet(f"{config.warehouseReportDir}/{config.dwEnrollmentsTable}")
40-
content_df = spark.read.parquet(f"{config.warehouseReportDir}/{config.dwCourseTable}") \
39+
all_enrolments_df = spark.read.parquet(ParquetFileConstants.ENROLMENT_WAREHOUSE_COMPUTED_PARQUET_FILE).withColumnRenamed("userID", "user_id")
40+
content_df = spark.read.parquet(ParquetFileConstants.CONTENT_WAREHOUSE_COMPUTED_PARQUET_FILE) \
4141
.filter(col("content_sub_type").isin("Course", "Program", "Moderated Course", "Moderated Program"))
4242
enrolments_df = all_enrolments_df.join(
4343
content_df.select("content_id"), ["content_id"], "inner"
4444
)
4545

46-
user_df = spark.read.parquet(f"{config.warehouseReportDir}/{config.dwUserTable}")
46+
user_df = spark.read.parquet(ParquetFileConstants.USER_WAREHOUSE_COMPUTED_PARQUET_FILE)
4747
rating_draft_df = spark.read.parquet(ParquetFileConstants.RATING_PARQUET_FILE)
4848

4949
completion_df = enrolments_df.groupBy("content_id").agg(count("user_id").alias("total_enrolments"),
@@ -145,4 +145,4 @@ def main():
145145

146146

147147
if __name__ == "__main__":
148-
main()
148+
main()

jobs/stage-2/userEnrolment.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,11 @@ def process_data(self, spark,config):
179179
col("Ministry"),
180180
col("Department"),
181181
col("Organization"),
182+
col("cadreName").alias("Cadre"),
183+
col("civilServiceType").alias("Civil Service Type"),
184+
col("civilServiceName").alias("Civil Services"),
185+
col("cadreBatch").alias("Cadre Batch"),
186+
col("organised_service").alias("Is From Organised Service of Govt"),
182187
col("courseOrgName").alias("Content_Provider"),
183188
col("courseName").alias("Content_Name"),
184189
col("category").alias("Content_Type"),
@@ -280,6 +285,11 @@ def process_data(self, spark,config):
280285
col("Ministry"),
281286
col("Department"),
282287
col("Organization"),
288+
col("cadreName").alias("Cadre"),
289+
col("civilServiceType").alias("Civil Service Type"),
290+
col("civilServiceName").alias("Civil Services"),
291+
col("cadreBatch").alias("Cadre Batch"),
292+
col("organised_service").alias("Is From Organised Service of Govt"),
283293
col("courseOrgName").alias("Content_Provider"),
284294
col("courseName").alias("Content_Name"),
285295
col("category").alias("Content_Type"),

jobs/stage-2/userReport.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ def processUserReport(config):
142142
col("Organization"),
143143
from_unixtime(col("userCreatedTimestamp") / 1000, ParquetFileConstants.DATE_FORMAT).alias(
144144
"User_Registration_Date"),
145+
col("cadreName").alias("Cadre"),
146+
col("civilServiceType").alias("Civil Service Type"),
147+
col("civilServiceName").alias("Civil Services"),
148+
col("cadreBatch").alias("Cadre Batch"),
149+
col("organised_service").alias("Is From Organised Service of Govt"),
145150
col("role").alias("Roles"),
146151
col("personalDetails.gender").alias("Gender"),
147152
col("personalDetails.category").alias("Category"),

0 commit comments

Comments
 (0)