Skip to content

Text formatting and spell checker #165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ RUN cp /etc/cron.daily/logrotate /etc/cron.hourly

RUN apt-get update
RUN apt-get install -y curl build-essential autoconf automake ffmpeg
RUN apt-get install libhunspell-dev

# setup yarn
RUN /pd_build/nodejs.sh
Expand Down
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,6 @@ gem "tailwindcss-rails", "~> 2.7"
gem "escompress", "~> 1.0"

gem "inline_svg", "~> 1.10"

gem "add", "~> 0.3.2"
gem "ffi-hunspell", require: false
5 changes: 5 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ GEM
i18n (>= 1.6, < 2)
minitest (>= 5.1)
tzinfo (~> 2.0)
add (0.3.2)
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
afm (0.2.2)
Expand Down Expand Up @@ -191,6 +192,8 @@ GEM
faraday_middleware (1.2.0)
faraday (~> 1.0)
ffi (1.16.3)
ffi-hunspell (0.6.1)
ffi (~> 1.0)
fog-aws (3.14.0)
fog-core (~> 2.1)
fog-json (~> 1.1)
Expand Down Expand Up @@ -578,6 +581,7 @@ DEPENDENCIES
activeadmin (~> 3.2.3)
activeadmin-searchable_select (>= 1.8.0)
activerecord-import
add (~> 0.3.2)
addressable
annotate
aws-sdk-s3 (~> 1.130)
Expand All @@ -597,6 +601,7 @@ DEPENDENCIES
dotenv
engtagger
escompress (~> 1.0)
ffi-hunspell
fog-aws
image_processing
inline_svg (~> 1.10)
Expand Down
141 changes: 141 additions & 0 deletions lib/text/content_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
require 'ffi/hunspell'

class ContentValidator
attr_reader :translation_id, :ayahs, :issues

def initialize(translation_id)
@translation_id = translation_id
@issues = []
end

def generate_report
fetch_translations.each do |ayah|
process_text(ayah)
end

generate_html
end

private

def fetch_translations(translation_id)
Translation.where(resource_content_id: translation_id).order('verse_id ASC')
end

# Process each ayah text for issues
def process_text(ayah)
text = ayah.text
ayah_issues = []

# Check for formatting issues
text.gsub!(/(\w)([.,!?])(\w)/) do
ayah_issues << "Missing space after punctuation near: '#{$~[0]}'"
"#{$1}#{$2} <span class='highlight'>[missing space]</span> #{$3}"
end
text.gsub!(/\s{2,}/) do
ayah_issues << "Extra spaces found."
" <span class='highlight'>[extra space]</span> "
end
text.gsub!(/(\s+)\./) do
ayah_issues << "Space before full stop found."
"<span class='highlight'>[space before full stop]</span>."
end

hunspell = Hunspell.new('/usr/share/hunspell/en_US.aff', '/usr/share/hunspell/en_US.dic')
text.split.each do |word|
clean_word = word.gsub(/[.,!?]/, '') # Remove punctuation for spell check
next if hunspell.spell?(clean_word)

ayah_issues << "Spelling mistake: '#{clean_word}'"
text.gsub!(word, "<span class='highlight'>#{word}</span>")
end

# Record issues for the ayah if any
unless ayah_issues.empty?
issues << { ayah_id: ayah[:id], issues: ayah_issues, highlighted_text: text }
end
end

# Generate HTML report
def generate_html
<<~HTML
<!DOCTYPE html>
<html>
<head>
<style>
body {
font-family: Arial, sans-serif;
}
table {
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}
th, td {
border: 1px solid #ddd;
padding: 8px;
}
th {
background-color: #f2f2f2;
cursor: pointer;
}
.highlight {
background-color: yellow;
font-weight: bold;
}
</style>
<script>
// Simple search and sort functionality
function searchTable() {
const input = document.getElementById('searchInput');
const filter = input.value.toLowerCase();
const rows = document.querySelectorAll('#ayahTable tbody tr');
rows.forEach(row => {
row.style.display = row.textContent.toLowerCase().includes(filter) ? '' : 'none';
});
}

function sortTable(columnIndex) {
const table = document.getElementById('ayahTable');
const rows = Array.from(table.rows).slice(1);
const sortedRows = rows.sort((a, b) => {
const aText = a.cells[columnIndex].textContent.trim();
const bText = b.cells[columnIndex].textContent.trim();
return aText.localeCompare(bText, undefined, { numeric: true });
});
const tbody = table.tBodies[0];
sortedRows.forEach(row => tbody.appendChild(row));
}
</script>
</head>
<body>
<h1>Content Validation Report</h1>
<input type="text" id="searchInput" onkeyup="searchTable()" placeholder="Search for ayahs...">
<table id="ayahTable">
<thead>
<tr>
<th onclick="sortTable(0)">Ayah ID</th>
<th onclick="sortTable(1)">Issues</th>
<th>Highlighted Text</th>
</tr>
</thead>
<tbody>
#{issues.map { |issue| generate_table_row(issue) }.join("\n")}
</tbody>
</table>
</body>
</html>
HTML
end

# Generate HTML table row for each issue
def generate_table_row(issue)
<<~HTML
<tr>
<td>#{issue[:ayah_id]}</td>
<td>#{issue[:issues].join('<br>')}</td>
<td>#{issue[:highlighted_text]}</td>
</tr>
HTML
end
end