-
-
Notifications
You must be signed in to change notification settings - Fork 71
feat: data batch processing of number assessment events #2292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9e1ff42
0c23220
07d1858
b5dece0
f73eeca
e3abcff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| package ai.elimu.tasks.analytics; | ||
|
|
||
| import ai.elimu.dao.StudentDao; | ||
| import ai.elimu.dao.NumberAssessmentEventDao; | ||
| import ai.elimu.dao.NumberDao; | ||
| import ai.elimu.entity.analytics.NumberAssessmentEvent; | ||
| import ai.elimu.entity.analytics.students.Student; | ||
| import ai.elimu.model.v2.enums.Language; | ||
| import ai.elimu.rest.v2.analytics.NumberAssessmentEventsRestController; | ||
| import ai.elimu.util.ConfigHelper; | ||
| import ai.elimu.util.DiscordHelper; | ||
| import ai.elimu.util.DiscordHelper.Channel; | ||
| import ai.elimu.util.DomainHelper; | ||
| import ai.elimu.util.csv.CsvAnalyticsExtractionHelper; | ||
| import java.io.File; | ||
| import java.util.List; | ||
| import lombok.RequiredArgsConstructor; | ||
| import lombok.extern.slf4j.Slf4j; | ||
| import org.springframework.scheduling.annotation.Scheduled; | ||
| import org.springframework.stereotype.Service; | ||
|
|
||
| /** | ||
| * Extracts assessment events from CSV files previously received by the {@link NumberAssessmentEventsRestController}, and imports them into the database. | ||
| * <p/> | ||
| * <p> | ||
| * Expected folder structure: | ||
| * <pre> | ||
| * ├── lang-ENG | ||
| * │ ├── analytics | ||
| * │ │ ├── android-id-e387e38700000001 | ||
| * │ │ │ └── number-assessment-events | ||
| * │ │ │ ├── e387e38700000001_3001018_number-assessment-events_2024-10-09.csv | ||
| * │ │ │ ├── e387e38700000001_3001018_number-assessment-events_2024-10-10.csv | ||
| * │ │ │ ├── e387e38700000001_3001018_number-assessment-events_2024-10-11.csv | ||
| * │ │ │ ├── e387e38700000001_3001018_number-assessment-events_2024-10-14.csv | ||
| * │ │ │ ├── e387e38700000001_3001018_number-assessment-events_2024-10-18.csv | ||
| * │ │ │ └── e387e38700000001_3001018_number-assessment-events_2024-10-20.csv | ||
| * │ │ ├── android-id-e387e38700000002 | ||
| * │ │ │ └── number-assessment-events | ||
| * │ │ │ ├── e387e38700000002_3001018_number-assessment-events_2024-10-09.csv | ||
| * │ │ │ ├── e387e38700000002_3001018_number-assessment-events_2024-10-10.csv | ||
| * │ │ │ ├── e387e38700000002_3001018_number-assessment-events_2024-10-11.csv | ||
| * </pre> | ||
| */ | ||
| @Service | ||
| @RequiredArgsConstructor | ||
| @Slf4j | ||
| public class NumberAssessmentEventImportScheduler { | ||
|
|
||
| private final NumberAssessmentEventDao numberAssessmentEventDao; | ||
| private final NumberDao numberDao; | ||
| private final StudentDao studentDao; | ||
|
|
||
| @Scheduled(cron = "00 25 * * * *") // 25 minutes past every hour | ||
| public synchronized void execute() { | ||
| log.info("execute"); | ||
|
|
||
| try { | ||
| // Lookup CSV files stored on the filesystem | ||
| File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai"); | ||
| File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language"))); | ||
| File analyticsDir = new File(languageDir, "analytics"); | ||
| log.info("analyticsDir: " + analyticsDir); | ||
| analyticsDir.mkdirs(); | ||
| for (File analyticsDirFile : analyticsDir.listFiles()) { | ||
| if (analyticsDirFile.getName().startsWith("android-id-")) { | ||
| File androidIdDir = new File(analyticsDir, analyticsDirFile.getName()); | ||
| for (File androidIdDirFile : androidIdDir.listFiles()) { | ||
| Long studentId = null; | ||
| Integer eventImportCount = 0; | ||
| if (androidIdDirFile.getName().equals("number-assessment-events")) { | ||
| File numberAssessmentEventsDir = new File(androidIdDir, androidIdDirFile.getName()); | ||
| for (File csvFile : numberAssessmentEventsDir.listFiles()) { | ||
| log.info("csvFile: " + csvFile); | ||
|
|
||
| // Convert from CSV to Java | ||
| List<NumberAssessmentEvent> events = CsvAnalyticsExtractionHelper.extractNumberAssessmentEvents(csvFile); | ||
| log.info("events.size(): " + events.size()); | ||
|
|
||
| // Store in database | ||
| for (NumberAssessmentEvent event : events) { | ||
| // Check if the event has already been stored in the database | ||
| NumberAssessmentEvent existingNumberAssessmentEvent = numberAssessmentEventDao.read(event.getTimestamp(), event.getAndroidId(), event.getPackageName()); | ||
| if (existingNumberAssessmentEvent != null) { | ||
| log.warn("The event has already been stored in the database. Skipping data import."); | ||
| continue; | ||
| } | ||
|
|
||
| // Generate Student ID | ||
| Student existingStudent = studentDao.read(event.getAndroidId()); | ||
| if (existingStudent == null) { | ||
| Student student = new Student(); | ||
| student.setAndroidId(event.getAndroidId()); | ||
| studentDao.create(student); | ||
| log.info("Stored Student in database with ID " + student.getId()); | ||
| studentId = student.getId(); | ||
| } else { | ||
| studentId = existingStudent.getId(); | ||
| } | ||
|
|
||
| // If content ID has been provided, look for match in the database | ||
| if (event.getNumberId() != null) { | ||
| event.setNumber(numberDao.read(event.getNumberId())); | ||
| } | ||
|
|
||
| // Store the event in the database | ||
| numberAssessmentEventDao.create(event); | ||
| log.info("Stored event in database with ID " + event.getId()); | ||
| eventImportCount++; | ||
| } | ||
| } | ||
| } | ||
| if ((studentId != null) && (eventImportCount > 0)) { | ||
| String contentUrl = DomainHelper.getBaseUrl() + "/analytics/students/" + studentId; | ||
| DiscordHelper.postToChannel(Channel.ANALYTICS, "Imported " + eventImportCount + " number assessment events: " + contentUrl); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } catch (Exception e) { | ||
| log.error("Error during data import:", e); | ||
| DiscordHelper.postToChannel(Channel.ANALYTICS, "Error during import of number assessment events: `" + e.getClass() + ": " + e.getMessage() + "`"); | ||
| } | ||
|
|
||
| log.info("execute complete"); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -51,7 +51,7 @@ public class StoryBookLearningEventImportScheduler { | |||||||||
| private final StoryBookDao storyBookDao; | ||||||||||
| private final StudentDao studentDao; | ||||||||||
|
|
||||||||||
| @Scheduled(cron = "00 45 * * * *") // 35 minutes past every hour | ||||||||||
| @Scheduled(cron = "00 45 * * * *") // 45 minutes past every hour | ||||||||||
| public synchronized void execute() { | ||||||||||
|
Comment on lines
+54
to
55
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Verification agent ❓ Verification inconclusiveSpecify a timezone to avoid server-dependent trigger drift
- @Scheduled(cron = "00 45 * * * *") // 45 minutes past every hour
+ @Scheduled(cron = "00 45 * * * *", zone = "UTC") // 45 minutes past every UTC hour#!/bin/bash 1. Verify that @scheduled is imported from Springgrep -R "import org.springframework.scheduling.annotation.Scheduled" -n src 2. Check for Spring Boot parent or Spring Framework version in pom.xmlgrep -R "spring-boot-starter-parent" -n pom.xml -A2 || echo "No Spring Boot parent found" #!/bin/bash 1. Confirm that @scheduled is imported in the scheduler classgrep -n "import org.springframework.scheduling.annotation.Scheduled" src/main/java/ai/elimu/tasks/analytics/StoryBookLearningEventImportScheduler.java 2. Check for Spring Boot parent version in pom.xmlgrep -n "<spring-boot-starter-parent" -n pom.xml -A1 || echo "No spring-boot-starter-parent found" Add explicit timezone to @scheduled to avoid trigger drift • File: src/main/java/ai/elimu/tasks/analytics/StoryBookLearningEventImportScheduler.java (lines 54–55) - @Scheduled(cron = "00 45 * * * *") // 45 minutes past every hour
+ @Scheduled(cron = "00 45 * * * *", zone = "UTC") // 45 minutes past every UTC hour📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||
| log.info("execute"); | ||||||||||
|
|
||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| import ai.elimu.entity.analytics.LetterSoundAssessmentEvent; | ||
| import ai.elimu.entity.analytics.LetterSoundLearningEvent; | ||
| import ai.elimu.entity.analytics.NumberAssessmentEvent; | ||
| import ai.elimu.entity.analytics.NumberLearningEvent; | ||
| import ai.elimu.entity.analytics.StoryBookLearningEvent; | ||
| import ai.elimu.entity.analytics.VideoLearningEvent; | ||
|
|
@@ -198,7 +199,79 @@ public static List<LetterSoundLearningEvent> extractLetterSoundLearningEvents(Fi | |
| } | ||
|
|
||
|
|
||
| // TODO: number assessment events | ||
| public static List<NumberAssessmentEvent> extractNumberAssessmentEvents(File csvFile) { | ||
| log.info("extractNumberAssessmentEvents"); | ||
|
|
||
| Integer versionCode = AnalyticsHelper.extractVersionCodeFromCsvFilename(csvFile.getName()); | ||
| log.info("versionCode: " + versionCode); | ||
|
|
||
| List<NumberAssessmentEvent> numberAssessmentEvents = new ArrayList<>(); | ||
|
|
||
| // Iterate each row in the CSV file | ||
| Path csvFilePath = Paths.get(csvFile.toURI()); | ||
| log.info("csvFilePath: " + csvFilePath); | ||
| try { | ||
| Reader reader = Files.newBufferedReader(csvFilePath); | ||
| CSVFormat csvFormat = CSVFormat.DEFAULT.withFirstRecordAsHeader(); | ||
| log.info("header: " + Arrays.toString(csvFormat.getHeader())); | ||
| CSVParser csvParser = new CSVParser(reader, csvFormat); | ||
| for (CSVRecord csvRecord : csvParser) { | ||
| log.info("csvRecord: " + csvRecord); | ||
|
|
||
| // Convert from CSV to Java | ||
|
|
||
| NumberAssessmentEvent numberAssessmentEvent = new NumberAssessmentEvent(); | ||
|
|
||
| long timestampInMillis = Long.valueOf(csvRecord.get("timestamp").substring(0, 10)) * 1_000; | ||
| Calendar timestamp = Calendar.getInstance(TimeZone.getTimeZone("UTC")); | ||
| timestamp.setTimeInMillis(timestampInMillis); | ||
| numberAssessmentEvent.setTimestamp(timestamp); | ||
|
|
||
| String androidId = AnalyticsHelper.extractAndroidIdFromCsvFilename(csvFile.getName()); | ||
| numberAssessmentEvent.setAndroidId(androidId); | ||
|
|
||
| String packageName = csvRecord.get("package_name"); | ||
| numberAssessmentEvent.setPackageName(packageName); | ||
|
|
||
| Float masteryScore = Float.valueOf(csvRecord.get("mastery_score")); | ||
| numberAssessmentEvent.setMasteryScore(masteryScore); | ||
|
|
||
| Long timeSpentMs = Long.valueOf(csvRecord.get("time_spent_ms")); | ||
| numberAssessmentEvent.setTimeSpentMs(timeSpentMs); | ||
|
|
||
| String additionalData = csvRecord.get("additional_data"); | ||
| if (StringUtils.isNotBlank(additionalData)) { | ||
| numberAssessmentEvent.setAdditionalData(additionalData); | ||
| } | ||
|
|
||
| int researchExperimentOrdinal = Integer.valueOf(csvRecord.get("research_experiment")); | ||
| ResearchExperiment researchExperiment = ResearchExperiment.values()[researchExperimentOrdinal]; | ||
| numberAssessmentEvent.setResearchExperiment(researchExperiment); | ||
|
|
||
| int experimentGroupOrdinal = Integer.valueOf(csvRecord.get("experiment_group")); | ||
| ExperimentGroup experimentGroup = ExperimentGroup.values()[experimentGroupOrdinal]; | ||
| numberAssessmentEvent.setExperimentGroup(experimentGroup); | ||
|
|
||
| Integer numberValue = Integer.valueOf(csvRecord.get("number_value")); | ||
| numberAssessmentEvent.setNumberValue(numberValue); | ||
|
|
||
| // String numberSymbol = csvRecord.get("number_symbol"); | ||
| // numberAssessmentEvent.setNumberSymbol(numberSymbol); | ||
|
|
||
| if (StringUtils.isNotBlank(csvRecord.get("number_id"))) { | ||
| Long numberId = Long.valueOf(csvRecord.get("number_id")); | ||
| numberAssessmentEvent.setNumberId(numberId); | ||
| } | ||
|
|
||
| numberAssessmentEvents.add(numberAssessmentEvent); | ||
| } | ||
| csvParser.close(); | ||
| } catch (IOException ex) { | ||
| log.error(ex.getMessage()); | ||
| } | ||
|
|
||
| return numberAssessmentEvents; | ||
| } | ||
|
Comment on lines
+202
to
+274
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix critical inconsistencies with established assessment event extraction patterns. The implementation has several issues that make it inconsistent with other assessment event extractors in this class:
Apply this diff to fix the inconsistencies and align with the - long timestampInMillis = Long.valueOf(csvRecord.get("timestamp").substring(0, 10)) * 1_000;
+ String timestampColumnName = null;
+ if (versionCode < 3004000) {
+ timestampColumnName = "time";
+ } else {
+ // https://github.com/elimu-ai/analytics/releases/tag/3.4.0
+ timestampColumnName = "timestamp";
+ }
+ long timestampInMillis = Long.valueOf(csvRecord.get(timestampColumnName).substring(0, 10)) * 1_000;- String additionalData = csvRecord.get("additional_data");
- if (StringUtils.isNotBlank(additionalData)) {
- numberAssessmentEvent.setAdditionalData(additionalData);
- }
+ if (versionCode >= 3006000) {
+ // https://github.com/elimu-ai/analytics/releases/tag/3.6.0
+ String additionalData = csvRecord.get("additional_data");
+ if (StringUtils.isNotBlank(additionalData)) {
+ numberAssessmentEvent.setAdditionalData(additionalData);
+ }
+ }- int researchExperimentOrdinal = Integer.valueOf(csvRecord.get("research_experiment"));
- ResearchExperiment researchExperiment = ResearchExperiment.values()[researchExperimentOrdinal];
- numberAssessmentEvent.setResearchExperiment(researchExperiment);
-
- int experimentGroupOrdinal = Integer.valueOf(csvRecord.get("experiment_group"));
- ExperimentGroup experimentGroup = ExperimentGroup.values()[experimentGroupOrdinal];
- numberAssessmentEvent.setExperimentGroup(experimentGroup);
+ if (versionCode >= 3005009) {
+ // https://github.com/elimu-ai/analytics/releases/tag/3.5.9
+
+ if (StringUtils.isNotBlank(csvRecord.get("research_experiment"))) {
+ int researchExperimentOrdinal = Integer.valueOf(csvRecord.get("research_experiment"));
+ ResearchExperiment researchExperiment = ResearchExperiment.values()[researchExperimentOrdinal];
+ numberAssessmentEvent.setResearchExperiment(researchExperiment);
+ }
+
+ if (StringUtils.isNotBlank(csvRecord.get("experiment_group"))) {
+ int experimentGroupOrdinal = Integer.valueOf(csvRecord.get("experiment_group"));
+ ExperimentGroup experimentGroup = ExperimentGroup.values()[experimentGroupOrdinal];
+ numberAssessmentEvent.setExperimentGroup(experimentGroup);
+ }
+ }- // String numberSymbol = csvRecord.get("number_symbol");
- // numberAssessmentEvent.setNumberSymbol(numberSymbol);
+ String numberSymbol = csvRecord.get("number_symbol");
+ numberAssessmentEvent.setNumberSymbol(numberSymbol);🤖 Prompt for AI Agents |
||
|
|
||
| public static List<NumberLearningEvent> extractNumberLearningEvents(File csvFile) { | ||
| log.info("extractNumberLearningEvents"); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| package ai.elimu.web.analytics.students; | ||
|
|
||
| import ai.elimu.dao.NumberAssessmentEventDao; | ||
| import ai.elimu.dao.StudentDao; | ||
| import ai.elimu.entity.analytics.NumberAssessmentEvent; | ||
| import ai.elimu.entity.analytics.students.Student; | ||
| import ai.elimu.util.DiscordHelper; | ||
| import ai.elimu.util.DiscordHelper.Channel; | ||
| import jakarta.servlet.http.HttpServletResponse; | ||
| import java.io.IOException; | ||
| import java.io.OutputStream; | ||
| import java.io.StringWriter; | ||
| import java.util.List; | ||
| import lombok.RequiredArgsConstructor; | ||
| import lombok.extern.slf4j.Slf4j; | ||
| import org.apache.commons.csv.CSVFormat; | ||
| import org.apache.commons.csv.CSVPrinter; | ||
| import org.springframework.http.HttpStatus; | ||
| import org.springframework.stereotype.Controller; | ||
| import org.springframework.web.bind.annotation.GetMapping; | ||
| import org.springframework.web.bind.annotation.PathVariable; | ||
| import org.springframework.web.bind.annotation.RequestMapping; | ||
|
|
||
| @Controller | ||
| @RequestMapping("/analytics/students/{studentId}/number-assessment-events.csv") | ||
| @RequiredArgsConstructor | ||
| @Slf4j | ||
| public class NumberAssessmentEventsCsvExportController { | ||
|
|
||
| private final StudentDao studentDao; | ||
|
|
||
| private final NumberAssessmentEventDao numberAssessmentEventDao; | ||
|
|
||
| @GetMapping | ||
| public void handleRequest( | ||
| @PathVariable Long studentId, | ||
| HttpServletResponse response, | ||
| OutputStream outputStream | ||
| ) throws IOException { | ||
| log.info("handleRequest"); | ||
|
|
||
| try { | ||
| Student student = studentDao.read(studentId); | ||
| log.info("student.getAndroidId(): " + student.getAndroidId()); | ||
|
|
||
| List<NumberAssessmentEvent> numberAssessmentEvents = numberAssessmentEventDao.readAll(student.getAndroidId()); | ||
| log.info("numberAssessmentEvents.size(): " + numberAssessmentEvents.size()); | ||
|
|
||
| CSVFormat csvFormat = CSVFormat.DEFAULT.builder() | ||
| .setHeader( | ||
| "id", | ||
| "timestamp", | ||
| "package_name", | ||
| "mastery_score", | ||
| "time_spent_ms", | ||
| "additional_data", | ||
| "research_experiment", | ||
| "experiment_group", | ||
| "number_value", | ||
| "number_id" | ||
| ).build(); | ||
| StringWriter stringWriter = new StringWriter(); | ||
| CSVPrinter csvPrinter = new CSVPrinter(stringWriter, csvFormat); | ||
| for (NumberAssessmentEvent event : numberAssessmentEvents) { | ||
| log.info("event.getId(): " + event.getId()); | ||
| csvPrinter.printRecord( | ||
| event.getId(), | ||
| event.getTimestamp().getTimeInMillis() / 1_000, | ||
| event.getPackageName(), | ||
| event.getMasteryScore(), | ||
| event.getTimeSpentMs(), | ||
| event.getAdditionalData(), | ||
| (event.getResearchExperiment() != null) ? event.getResearchExperiment().ordinal() : null, | ||
| (event.getExperimentGroup() != null) ? event.getExperimentGroup().ordinal() : null, | ||
| event.getNumberValue(), | ||
| event.getNumberId() | ||
| ); | ||
| } | ||
| csvPrinter.flush(); | ||
| csvPrinter.close(); | ||
|
|
||
| String csvFileContent = stringWriter.toString(); | ||
| response.setContentType("text/csv"); | ||
| byte[] bytes = csvFileContent.getBytes(); | ||
| response.setContentLength(bytes.length); | ||
|
|
||
| outputStream.write(bytes); | ||
| outputStream.flush(); | ||
| outputStream.close(); | ||
| } catch (Exception ex) { | ||
| log.error(ex.getMessage()); | ||
| response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value()); | ||
| DiscordHelper.postToChannel(Channel.ANALYTICS, "Error during CSV export of number assessment events: `" + ex.getClass() + ": " + ex.getMessage() + "`"); | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Verification agent
🧩 Analysis chain
Method
extractNumberAssessmentEventsis not implemented.The method
CsvAnalyticsExtractionHelper.extractNumberAssessmentEventscalled here doesn't exist in the helper class, which will cause a compilation error.Let me verify if this method exists elsewhere in the codebase:
🏁 Script executed:
Length of output: 955
Implement the missing
extractNumberAssessmentEventsmethodThe call at
src/main/java/ai/elimu/tasks/analytics/NumberAssessmentEventImportScheduler.java:77will fail to compile because
CsvAnalyticsExtractionHelper.extractNumberAssessmentEventsdoes not exist.Action items:
src/main/java/ai/elimu/util/csv/CsvAnalyticsExtractionHelper.java:extractWordLearningEvents,extractLetterSoundLearningEvents) as a template for CSV parsing logic and error handling.🤖 Prompt for AI Agents