diff --git a/Dockerfile b/Dockerfile index 2697bf9f..69c2d706 100644 --- a/Dockerfile +++ b/Dockerfile @@ -135,6 +135,7 @@ RUN cp /etc/cron.daily/logrotate /etc/cron.hourly RUN apt-get update RUN apt-get install -y curl build-essential autoconf automake ffmpeg +RUN apt-get install libhunspell-dev # setup yarn RUN /pd_build/nodejs.sh diff --git a/Gemfile b/Gemfile index 7a9f314f..8640c5a2 100644 --- a/Gemfile +++ b/Gemfile @@ -158,3 +158,6 @@ gem "tailwindcss-rails", "~> 2.7" gem "escompress", "~> 1.0" gem "inline_svg", "~> 1.10" + +gem "add", "~> 0.3.2" +gem "ffi-hunspell", require: false diff --git a/Gemfile.lock b/Gemfile.lock index 5501847d..9baba1cf 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -83,6 +83,7 @@ GEM i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) + add (0.3.2) addressable (2.8.0) public_suffix (>= 2.0.2, < 5.0) afm (0.2.2) @@ -191,6 +192,8 @@ GEM faraday_middleware (1.2.0) faraday (~> 1.0) ffi (1.16.3) + ffi-hunspell (0.6.1) + ffi (~> 1.0) fog-aws (3.14.0) fog-core (~> 2.1) fog-json (~> 1.1) @@ -578,6 +581,7 @@ DEPENDENCIES activeadmin (~> 3.2.3) activeadmin-searchable_select (>= 1.8.0) activerecord-import + add (~> 0.3.2) addressable annotate aws-sdk-s3 (~> 1.130) @@ -597,6 +601,7 @@ DEPENDENCIES dotenv engtagger escompress (~> 1.0) + ffi-hunspell fog-aws image_processing inline_svg (~> 1.10) diff --git a/lib/text/content_validator.rb b/lib/text/content_validator.rb new file mode 100644 index 00000000..4e116a1d --- /dev/null +++ b/lib/text/content_validator.rb @@ -0,0 +1,141 @@ +require 'ffi/hunspell' + +class ContentValidator + attr_reader :translation_id, :ayahs, :issues + + def initialize(translation_id) + @translation_id = translation_id + @issues = [] + end + + def generate_report + fetch_translations.each do |ayah| + process_text(ayah) + end + + generate_html + end + + private + + def fetch_translations(translation_id) + Translation.where(resource_content_id: translation_id).order('verse_id ASC') + end + + # Process each ayah text for issues + def process_text(ayah) + text = ayah.text + ayah_issues = [] + + # Check for formatting issues + text.gsub!(/(\w)([.,!?])(\w)/) do + ayah_issues << "Missing space after punctuation near: '#{$~[0]}'" + "#{$1}#{$2} [missing space] #{$3}" + end + text.gsub!(/\s{2,}/) do + ayah_issues << "Extra spaces found." + " [extra space] " + end + text.gsub!(/(\s+)\./) do + ayah_issues << "Space before full stop found." + "[space before full stop]." + end + + hunspell = Hunspell.new('/usr/share/hunspell/en_US.aff', '/usr/share/hunspell/en_US.dic') + text.split.each do |word| + clean_word = word.gsub(/[.,!?]/, '') # Remove punctuation for spell check + next if hunspell.spell?(clean_word) + + ayah_issues << "Spelling mistake: '#{clean_word}'" + text.gsub!(word, "#{word}") + end + + # Record issues for the ayah if any + unless ayah_issues.empty? + issues << { ayah_id: ayah[:id], issues: ayah_issues, highlighted_text: text } + end + end + + # Generate HTML report + def generate_html + <<~HTML + + + + + + + +

Content Validation Report

+ + + + + + + + + + + #{issues.map { |issue| generate_table_row(issue) }.join("\n")} + +
Ayah IDIssuesHighlighted Text
+ + + HTML + end + + # Generate HTML table row for each issue + def generate_table_row(issue) + <<~HTML + + #{issue[:ayah_id]} + #{issue[:issues].join('
')} + #{issue[:highlighted_text]} + + HTML + end +end