Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 20 additions & 26 deletions src/main/java/ai/elimu/tasks/LetterSoundUsageCountScheduler.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@

import ai.elimu.dao.LetterSoundDao;
import ai.elimu.dao.WordDao;
import ai.elimu.entity.content.Letter;
import ai.elimu.entity.content.LetterSound;
import ai.elimu.entity.content.Sound;
import ai.elimu.entity.content.Word;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

/**
* Iterates all words and calculates the frequency of each letter-sound.
*/
@Service
@RequiredArgsConstructor
@Slf4j
Expand All @@ -25,41 +25,35 @@ public class LetterSoundUsageCountScheduler {

private final LetterSoundDao letterSoundDao;

@Scheduled(cron = "00 15 06 * * *") // At 06:15 every day
@Scheduled(cron = "00 10 06 * * *") // At 06:10 every day
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count for LetterSounds");
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();

// <id, usageCount>
Map<Long, Integer> letterSoundFrequencyMap = new HashMap<>();
List<LetterSound> letterSounds = letterSoundDao.readAll();
log.info("letterSounds.size(): " + letterSounds.size());

List<Word> words = wordDao.readAll();
log.info("words.size(): " + words.size());
for (Word word : words) {
log.debug("word.getText(): " + word.getText());
for (LetterSound letterSound : word.getLetterSounds()) {
letterSoundFrequencyMap.put(letterSound.getId(),
letterSoundFrequencyMap.getOrDefault(letterSound.getId(), 0) + word.getUsageCount());

// Calculate the frequency of each letter-sound
for (LetterSound letterSound : letterSounds) {
frequencyMap.put(letterSound.getId(), 0);
for (Word word : words) {
for (LetterSound letterSoundInWord : word.getLetterSounds()) {
if (letterSoundInWord.getId() == letterSound.getId()) {
frequencyMap.put(letterSound.getId(), frequencyMap.get(letterSound.getId()) + 1);
}
}
}
}

// Update the values previously stored in the database
for (LetterSound letterSound : letterSoundDao.readAll()) {
log.debug("letterSound.getId(): " + letterSound.getId());
log.debug("letterSound Letters: \"" + letterSound.getLetters().stream().map(Letter::getText).collect(Collectors.joining()) + "\"");
log.debug("letterSound Sounds: /" + letterSound.getSounds().stream().map(Sound::getValueIpa).collect(Collectors.joining()) + "/");
log.debug("letterSound.getUsageCount() (before update): " + letterSound.getUsageCount());

int newUsageCount = 0;
if (letterSoundFrequencyMap.containsKey(letterSound.getId())) {
newUsageCount = letterSoundFrequencyMap.get(letterSound.getId());
}
log.info("newUsageCount: " + newUsageCount);

letterSound.setUsageCount(newUsageCount);
for (LetterSound letterSound : letterSounds) {
letterSound.setUsageCount(frequencyMap.get(letterSound.getId()));
letterSoundDao.update(letterSound);
log.info("letterSound.getUsageCount() (after update): " + letterSound.getUsageCount());
}

log.info("execute complete");
Expand Down
18 changes: 8 additions & 10 deletions src/main/java/ai/elimu/tasks/LetterUsageCountScheduler.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
import org.springframework.stereotype.Service;

/**
* Iterates all StoryBooks and calculates the frequency of each letter. Lower-case and
* upper-case variants are considered as two different letters, e.g. 'a' and 'A'.
* Iterates all letter-sounds and calculates the frequency of each letter.
Comment thread
jo-elimu marked this conversation as resolved.
*/
@Service
@RequiredArgsConstructor
Expand All @@ -35,13 +34,12 @@ public class LetterUsageCountScheduler {
private final StoryBookChapterDao storyBookChapterDao;
private final StoryBookParagraphDao storyBookParagraphDao;

@Scheduled(cron = "00 15 06 * * *") // At 06:15 every day
@Scheduled(cron = "00 20 06 * * *") // At 06:20 every day
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count for Letters");

Map<String, Integer> letterFrequencyMap = new HashMap<>();
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Language language = Language.valueOf(ConfigHelper.getProperty("content.language"));

Expand All @@ -60,15 +58,15 @@ public synchronized void execute() {
}

Map<String, Integer> letterFrequencyMapForBook = LetterFrequencyHelper.getLetterFrequency(paragraphs, language);
letterFrequencyMapForBook.keySet().forEach(letterText -> letterFrequencyMap.put(letterText, letterFrequencyMap.getOrDefault(letterText, 0) + letterFrequencyMapForBook.get(letterText)));
letterFrequencyMapForBook.keySet().forEach(letterText -> frequencyMap.put(letterText, frequencyMap.getOrDefault(letterText, 0) + letterFrequencyMapForBook.get(letterText)));
}

log.info("letterFrequencyMap: " + letterFrequencyMap);
log.info("letterFrequencyMap: " + frequencyMap);

for (String letterText : letterFrequencyMap.keySet()) {
for (String letterText : frequencyMap.keySet()) {
Letter existingLetter = letterDao.readByText(letterText);
if (existingLetter != null) {
existingLetter.setUsageCount(letterFrequencyMap.get(letterText));
existingLetter.setUsageCount(frequencyMap.get(letterText));
letterDao.update(existingLetter);
}
}
Expand Down
17 changes: 6 additions & 11 deletions src/main/java/ai/elimu/tasks/SoundUsageCountScheduler.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
import org.springframework.stereotype.Service;

/**
* Iterates all Words and calculates the frequency of Sounds, based on the Word's frequency in StoryBooks.
* <p/>
* For this to work, the frequency of each {@link Word} must have been calculated and stored previously (see {@link WordUsageCountScheduler} and {@link LetterSoundUsageCountScheduler}).
* Iterates all letter-sounds and calculates the frequency of each sound.
*/
@Slf4j
@Service
Expand All @@ -32,11 +30,8 @@ public class SoundUsageCountScheduler {
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count of Sounds");

// Long = Sound ID
// Integer = Usage count
Map<Long, Integer> soundFrequencyMap = new HashMap<>();
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();

// Summarize the usage count of each Word's Sounds based on the LetterSound's
// usage count (see LetterSoundUsageCountScheduler).
Expand All @@ -45,14 +40,14 @@ public synchronized void execute() {
for (Word word : words) {
for (LetterSound letterSound : word.getLetterSounds()) {
for (Sound sound : letterSound.getSounds()) {
soundFrequencyMap.put(sound.getId(), soundFrequencyMap.getOrDefault(sound.getId(), 0) + letterSound.getUsageCount());
frequencyMap.put(sound.getId(), frequencyMap.getOrDefault(sound.getId(), 0) + letterSound.getUsageCount());
}
}
}
// Update each Sound's usage count in the database
for (Long soundId : soundFrequencyMap.keySet()) {
for (Long soundId : frequencyMap.keySet()) {
Sound sound = soundDao.read(soundId);
sound.setUsageCount(soundFrequencyMap.get(soundId));
sound.setUsageCount(frequencyMap.get(soundId));
soundDao.update(sound);
}

Expand Down
65 changes: 21 additions & 44 deletions src/main/java/ai/elimu/tasks/WordUsageCountScheduler.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
package ai.elimu.tasks;

import ai.elimu.dao.StoryBookChapterDao;
import ai.elimu.dao.StoryBookDao;
import ai.elimu.dao.StoryBookParagraphDao;
import ai.elimu.dao.WordDao;
import ai.elimu.entity.content.StoryBook;
import ai.elimu.entity.content.StoryBookChapter;
import ai.elimu.entity.content.StoryBookParagraph;
import ai.elimu.entity.content.Word;
import ai.elimu.model.v2.enums.Language;
import ai.elimu.util.ConfigHelper;
import ai.elimu.util.WordFrequencyHelper;
import java.util.ArrayList;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

/**
* Iterates all StoryBooks and calculates the frequency of each word. Does not separate words with differing upper-case and lower-case letters.
* Iterates all storybook paragraphs and calculates the frequency of each word.
*/
@Service
@RequiredArgsConstructor
Expand All @@ -30,54 +24,37 @@ public class WordUsageCountScheduler {

private final WordDao wordDao;

private final StoryBookDao storyBookDao;

private final StoryBookChapterDao storyBookChapterDao;

private final StoryBookParagraphDao storyBookParagraphDao;

@Scheduled(cron = "00 00 06 * * *") // At 06:00 every day
public synchronized void execute() {
log.info("execute");

log.info("Calculating usage count for Words");

Map<String, Integer> wordFrequencyMap = new HashMap<>();
// <ID, frequency>
Map<Long, Integer> frequencyMap = new HashMap<>();

Language language = Language.valueOf(ConfigHelper.getProperty("content.language"));
List<Word> words = wordDao.readAll();
log.info("words.size(): " + words.size());

List<StoryBook> storyBooks = storyBookDao.readAllOrdered();
log.info("storyBooks.size(): " + storyBooks.size());
for (StoryBook storyBook : storyBooks) {
log.debug("storyBook.getTitle(): " + storyBook.getTitle());
List<StoryBookParagraph> storyBookParagraphs = storyBookParagraphDao.readAll();
log.info("storyBookParagraphs.size(): " + storyBookParagraphs.size());

List<String> paragraphs = new ArrayList<>();
List<StoryBookChapter> storyBookChapters = storyBookChapterDao.readAll(storyBook);
for (StoryBookChapter storyBookChapter : storyBookChapters) {
List<StoryBookParagraph> storyBookParagraphs = storyBookParagraphDao.readAll(storyBookChapter);
for (StoryBookParagraph storyBookParagraph : storyBookParagraphs) {
paragraphs.add(storyBookParagraph.getOriginalText());
// Calculate the frequency of each word
for (Word word : words) {
frequencyMap.put(word.getId(), 0);
for (StoryBookParagraph storyBookParagraph : storyBookParagraphs) {
for (Word wordInParagraph : storyBookParagraph.getWords()) {
if (wordInParagraph.getId() == word.getId()) {
frequencyMap.put(word.getId(), frequencyMap.get(word.getId()) + 1);
}
}
}

Map<String, Integer> wordFrequencyMapForBook = WordFrequencyHelper.getWordFrequency(paragraphs, language);
wordFrequencyMapForBook.keySet().forEach(word -> wordFrequencyMap.put(word, wordFrequencyMap.getOrDefault(word, 0) + wordFrequencyMapForBook.get(word)));
}

for (String word : wordFrequencyMap.keySet()) {
log.info("word: \"" + word + "\"");
Word existingWord = wordDao.readByText(word);
if (existingWord != null) {
existingWord.setUsageCount(wordFrequencyMap.get(word));

// Temporary fix for "jakarta.validation.ConstraintViolationException"
if (existingWord.getLetterSounds().isEmpty()) {
log.warn("Letter-sound correspondences not yet added. Skipping usage count update for word...");
continue;
}

wordDao.update(existingWord);
}
// Update the values previously stored in the database
for (Word word : words) {
word.setUsageCount(frequencyMap.get(word.getId()));
wordDao.update(word);
}

log.info("execute complete");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public String handleRequest(Model model) {
}
model.addAttribute("maxUsageCount", maxUsageCount);

// Extract letter frequency distribution from storybook paragraphs
// Extract word frequency distribution from storybook paragraphs
List<String> paragraphs = new ArrayList<>();
for (StoryBookParagraph storyBookParagraph : storyBookParagraphDao.readAll()) {
if (StringUtils.isNotBlank(storyBookParagraph.getOriginalText())) {
Expand Down
Loading