Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom-dependency-tree.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ai.elimu:webapp:war:2.6.101-SNAPSHOT
ai.elimu:webapp:war:2.6.102-SNAPSHOT
+- ai.elimu:model:jar:model-2.0.114:compile
| \- com.google.code.gson:gson:jar:2.13.1:compile
| \- com.google.errorprone:error_prone_annotations:jar:2.38.0:compile
Expand Down
38 changes: 10 additions & 28 deletions src/main/java/ai/elimu/tasks/LetterSoundUsageCountScheduler.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@

import ai.elimu.dao.LetterSoundDao;
import ai.elimu.dao.WordDao;
import ai.elimu.entity.content.Letter;
import ai.elimu.entity.content.LetterSound;
import ai.elimu.entity.content.Sound;
import ai.elimu.entity.content.Word;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

/**
* Iterates all words and calculates the frequency of each letter-sound.
*/
@Service
@RequiredArgsConstructor
@Slf4j
Expand All @@ -25,41 +24,24 @@ public class LetterSoundUsageCountScheduler {

private final LetterSoundDao letterSoundDao;

@Scheduled(cron = "00 15 06 * * *") // At 06:15 every day
@Scheduled(cron = "00 10 06 * * *") // At 06:10 every day
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count for LetterSounds");
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();

// <id, usageCount>
Map<Long, Integer> letterSoundFrequencyMap = new HashMap<>();

List<Word> words = wordDao.readAll();
log.info("words.size(): " + words.size());
for (Word word : words) {
log.debug("word.getText(): " + word.getText());
// Calculate the frequency of each letter-sound
for (Word word : wordDao.readAll()) {
for (LetterSound letterSound : word.getLetterSounds()) {
letterSoundFrequencyMap.put(letterSound.getId(),
letterSoundFrequencyMap.getOrDefault(letterSound.getId(), 0) + word.getUsageCount());
frequencyMap.put(letterSound.getId(), frequencyMap.getOrDefault(letterSound.getId(), 0) + 1);
}
}

// Update the values previously stored in the database
for (LetterSound letterSound : letterSoundDao.readAll()) {
log.debug("letterSound.getId(): " + letterSound.getId());
log.debug("letterSound Letters: \"" + letterSound.getLetters().stream().map(Letter::getText).collect(Collectors.joining()) + "\"");
log.debug("letterSound Sounds: /" + letterSound.getSounds().stream().map(Sound::getValueIpa).collect(Collectors.joining()) + "/");
log.debug("letterSound.getUsageCount() (before update): " + letterSound.getUsageCount());

int newUsageCount = 0;
if (letterSoundFrequencyMap.containsKey(letterSound.getId())) {
newUsageCount = letterSoundFrequencyMap.get(letterSound.getId());
}
log.info("newUsageCount: " + newUsageCount);

letterSound.setUsageCount(newUsageCount);
letterSound.setUsageCount(frequencyMap.getOrDefault(letterSound.getId(), 0));
letterSoundDao.update(letterSound);
log.info("letterSound.getUsageCount() (after update): " + letterSound.getUsageCount());
}

log.info("execute complete");
Expand Down
63 changes: 17 additions & 46 deletions src/main/java/ai/elimu/tasks/LetterUsageCountScheduler.java
Original file line number Diff line number Diff line change
@@ -1,28 +1,19 @@
package ai.elimu.tasks;

import ai.elimu.dao.LetterDao;
import ai.elimu.dao.StoryBookChapterDao;
import ai.elimu.dao.StoryBookDao;
import ai.elimu.dao.StoryBookParagraphDao;
import ai.elimu.dao.LetterSoundDao;
import ai.elimu.entity.content.Letter;
import ai.elimu.entity.content.StoryBook;
import ai.elimu.entity.content.StoryBookChapter;
import ai.elimu.entity.content.StoryBookParagraph;
import ai.elimu.model.v2.enums.Language;
import ai.elimu.util.ConfigHelper;
import ai.elimu.util.LetterFrequencyHelper;
import java.util.ArrayList;
import ai.elimu.entity.content.LetterSound;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

/**
* Iterates all StoryBooks and calculates the frequency of each letter. Lower-case and
* upper-case variants are considered as two different letters, e.g. 'a' and 'A'.
* Iterates all letter-sounds and calculates the frequency of each letter.
Comment thread
jo-elimu marked this conversation as resolved.
*/
@Service
@RequiredArgsConstructor
Expand All @@ -31,46 +22,26 @@ public class LetterUsageCountScheduler {

private final LetterDao letterDao;

private final StoryBookDao storyBookDao;
private final StoryBookChapterDao storyBookChapterDao;
private final StoryBookParagraphDao storyBookParagraphDao;
private final LetterSoundDao letterSoundDao;

@Scheduled(cron = "00 15 06 * * *") // At 06:15 every day
@Scheduled(cron = "00 20 06 * * *") // At 06:20 every day
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count for Letters");

Map<String, Integer> letterFrequencyMap = new HashMap<>();

Language language = Language.valueOf(ConfigHelper.getProperty("content.language"));

List<StoryBook> storyBooks = storyBookDao.readAllOrdered();
log.info("storyBooks.size(): " + storyBooks.size());
for (StoryBook storyBook : storyBooks) {
log.debug("storyBook.getTitle(): " + storyBook.getTitle());

List<String> paragraphs = new ArrayList<>();
List<StoryBookChapter> storyBookChapters = storyBookChapterDao.readAll(storyBook);
for (StoryBookChapter storyBookChapter : storyBookChapters) {
List<StoryBookParagraph> storyBookParagraphs = storyBookParagraphDao.readAll(storyBookChapter);
for (StoryBookParagraph storyBookParagraph : storyBookParagraphs) {
paragraphs.add(storyBookParagraph.getOriginalText());
}
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Calculate the frequency of each letter
for (LetterSound letterSound : letterSoundDao.readAll()) {
for (Letter letter : letterSound.getLetters()) {
frequencyMap.put(letter.getId(), frequencyMap.getOrDefault(letter.getId(), 0) + 1);
}

Map<String, Integer> letterFrequencyMapForBook = LetterFrequencyHelper.getLetterFrequency(paragraphs, language);
letterFrequencyMapForBook.keySet().forEach(letterText -> letterFrequencyMap.put(letterText, letterFrequencyMap.getOrDefault(letterText, 0) + letterFrequencyMapForBook.get(letterText)));
}

log.info("letterFrequencyMap: " + letterFrequencyMap);

for (String letterText : letterFrequencyMap.keySet()) {
Letter existingLetter = letterDao.readByText(letterText);
if (existingLetter != null) {
existingLetter.setUsageCount(letterFrequencyMap.get(letterText));
letterDao.update(existingLetter);
}
// Update the values previously stored in the database
for (Letter letter : letterDao.readAll()) {
letter.setUsageCount(frequencyMap.getOrDefault(letter.getId(), 0));
letterDao.update(letter);
}

log.info("execute complete");
Expand Down
17 changes: 6 additions & 11 deletions src/main/java/ai/elimu/tasks/SoundUsageCountScheduler.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
import org.springframework.stereotype.Service;

/**
* Iterates all Words and calculates the frequency of Sounds, based on the Word's frequency in StoryBooks.
* <p/>
* For this to work, the frequency of each {@link Word} must have been calculated and stored previously (see {@link WordUsageCountScheduler} and {@link LetterSoundUsageCountScheduler}).
* Iterates all letter-sounds and calculates the frequency of each sound.
*/
@Slf4j
@Service
Expand All @@ -32,11 +30,8 @@ public class SoundUsageCountScheduler {
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count of Sounds");

// Long = Sound ID
// Integer = Usage count
Map<Long, Integer> soundFrequencyMap = new HashMap<>();
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();

// Summarize the usage count of each Word's Sounds based on the LetterSound's
// usage count (see LetterSoundUsageCountScheduler).
Expand All @@ -45,14 +40,14 @@ public synchronized void execute() {
for (Word word : words) {
for (LetterSound letterSound : word.getLetterSounds()) {
for (Sound sound : letterSound.getSounds()) {
soundFrequencyMap.put(sound.getId(), soundFrequencyMap.getOrDefault(sound.getId(), 0) + letterSound.getUsageCount());
frequencyMap.put(sound.getId(), frequencyMap.getOrDefault(sound.getId(), 0) + letterSound.getUsageCount());
}
}
}
// Update each Sound's usage count in the database
for (Long soundId : soundFrequencyMap.keySet()) {
for (Long soundId : frequencyMap.keySet()) {
Sound sound = soundDao.read(soundId);
sound.setUsageCount(soundFrequencyMap.get(soundId));
sound.setUsageCount(frequencyMap.get(soundId));
soundDao.update(sound);
}

Expand Down
61 changes: 13 additions & 48 deletions src/main/java/ai/elimu/tasks/WordUsageCountScheduler.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,20 @@
package ai.elimu.tasks;

import ai.elimu.dao.StoryBookChapterDao;
import ai.elimu.dao.StoryBookDao;
import ai.elimu.dao.StoryBookParagraphDao;
import ai.elimu.dao.WordDao;
import ai.elimu.entity.content.StoryBook;
import ai.elimu.entity.content.StoryBookChapter;
import ai.elimu.entity.content.StoryBookParagraph;
import ai.elimu.entity.content.Word;
import ai.elimu.model.v2.enums.Language;
import ai.elimu.util.ConfigHelper;
import ai.elimu.util.WordFrequencyHelper;
import java.util.ArrayList;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

/**
* Iterates all StoryBooks and calculates the frequency of each word. Does not separate words with differing upper-case and lower-case letters.
* Iterates all storybook paragraphs and calculates the frequency of each word.
*/
@Service
@RequiredArgsConstructor
Expand All @@ -30,54 +23,26 @@ public class WordUsageCountScheduler {

private final WordDao wordDao;

private final StoryBookDao storyBookDao;

private final StoryBookChapterDao storyBookChapterDao;

private final StoryBookParagraphDao storyBookParagraphDao;

@Scheduled(cron = "00 00 06 * * *") // At 06:00 every day
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count for Words");

Map<String, Integer> wordFrequencyMap = new HashMap<>();

Language language = Language.valueOf(ConfigHelper.getProperty("content.language"));
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();

List<StoryBook> storyBooks = storyBookDao.readAllOrdered();
log.info("storyBooks.size(): " + storyBooks.size());
for (StoryBook storyBook : storyBooks) {
log.debug("storyBook.getTitle(): " + storyBook.getTitle());

List<String> paragraphs = new ArrayList<>();
List<StoryBookChapter> storyBookChapters = storyBookChapterDao.readAll(storyBook);
for (StoryBookChapter storyBookChapter : storyBookChapters) {
List<StoryBookParagraph> storyBookParagraphs = storyBookParagraphDao.readAll(storyBookChapter);
for (StoryBookParagraph storyBookParagraph : storyBookParagraphs) {
paragraphs.add(storyBookParagraph.getOriginalText());
}
// Calculate the frequency of each word
for (StoryBookParagraph storyBookParagraph : storyBookParagraphDao.readAll()) {
for (Word word : storyBookParagraph.getWords()) {
frequencyMap.put(word.getId(), frequencyMap.getOrDefault(word.getId(), 0) + 1);
}

Map<String, Integer> wordFrequencyMapForBook = WordFrequencyHelper.getWordFrequency(paragraphs, language);
wordFrequencyMapForBook.keySet().forEach(word -> wordFrequencyMap.put(word, wordFrequencyMap.getOrDefault(word, 0) + wordFrequencyMapForBook.get(word)));
}

for (String word : wordFrequencyMap.keySet()) {
log.info("word: \"" + word + "\"");
Word existingWord = wordDao.readByText(word);
if (existingWord != null) {
existingWord.setUsageCount(wordFrequencyMap.get(word));

// Temporary fix for "jakarta.validation.ConstraintViolationException"
if (existingWord.getLetterSounds().isEmpty()) {
log.warn("Letter-sound correspondences not yet added. Skipping usage count update for word...");
continue;
}

wordDao.update(existingWord);
}
// Update the values previously stored in the database
for (Word word : wordDao.readAll()) {
word.setUsageCount(frequencyMap.getOrDefault(word.getId(), 0));
wordDao.update(word);
}

log.info("execute complete");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public String handleRequest(Model model) {
}
model.addAttribute("maxUsageCount", maxUsageCount);

// Extract letter frequency distribution from storybook paragraphs
// Extract word frequency distribution from storybook paragraphs
List<String> paragraphs = new ArrayList<>();
for (StoryBookParagraph storyBookParagraph : storyBookParagraphDao.readAll()) {
if (StringUtils.isNotBlank(storyBookParagraph.getOriginalText())) {
Expand Down
Loading