From 49a6592245bc89db9647fade51c348b67efb2a23 Mon Sep 17 00:00:00 2001 From: Nat Date: Wed, 23 Apr 2025 14:56:33 +0800 Subject: [PATCH 1/5] FEATURE: Translate categories --- app/jobs/regular/translate_categories.rb | 45 +++++++ .../automatic_category_translation.rb | 18 +++ .../category_translator.rb | 22 +++ .../discourse_translator/translator.rb | 18 +++ config/settings.yml | 3 + spec/jobs/translate_categories_spec.rb | 127 ++++++++++++++++++ spec/services/category_translator_spec.rb | 65 +++++++++ 7 files changed, 298 insertions(+) create mode 100644 app/jobs/regular/translate_categories.rb create mode 100644 app/jobs/scheduled/automatic_category_translation.rb create mode 100644 app/services/discourse_translator/category_translator.rb create mode 100644 app/services/discourse_translator/translator.rb create mode 100644 spec/jobs/translate_categories_spec.rb create mode 100644 spec/services/category_translator_spec.rb diff --git a/app/jobs/regular/translate_categories.rb b/app/jobs/regular/translate_categories.rb new file mode 100644 index 0000000..9f36a44 --- /dev/null +++ b/app/jobs/regular/translate_categories.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Jobs + class TranslateCategories < ::Jobs::Base + cluster_concurrency 1 + BATCH_SIZE = 50 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_category_translation + + locales = SiteSetting.automatic_translation_target_languages.split("|") + return if locales.blank? + + cat_id = args[:from_category_id] || Category.order(:id).first&.id + last_id = cat_id + + # we're just gonna take all categories and keep it simple + # instead of checking in the db which ones are absent + categories = Category.where("id >= ?", cat_id).order(:id).limit(BATCH_SIZE) + return if categories.empty? + + categories.each do |category| + CategoryLocalization.transaction do + locales.each do |locale| + next if CategoryLocalization.exists?(category_id: category.id, locale: locale) + begin + DiscourseTranslator::CategoryTranslator.translate(category, locale) + rescue => e + Rails.logger.error( + "Discourse Translator: Failed to translate category #{category.id} to #{locale}: #{e.message}", + ) + end + end + end + last_id = category.id + end + + # from batch if needed + if categories.size == BATCH_SIZE + Jobs.enqueue_in(10.seconds, :translate_categories, from_category_id: last_id + 1) + end + end + end +end diff --git a/app/jobs/scheduled/automatic_category_translation.rb b/app/jobs/scheduled/automatic_category_translation.rb new file mode 100644 index 0000000..ef28e47 --- /dev/null +++ b/app/jobs/scheduled/automatic_category_translation.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Jobs + class AutomaticCategoryTranslation < ::Jobs::Scheduled + every 12.hours + cluster_concurrency 1 + + def execute(args) + return unless SiteSetting.translator_enabled + return unless SiteSetting.experimental_category_translation + + locales = SiteSetting.automatic_translation_target_languages.split("|") + return if locales.blank? + + Jobs.enqueue(:translate_categories) + end + end +end diff --git a/app/services/discourse_translator/category_translator.rb b/app/services/discourse_translator/category_translator.rb new file mode 100644 index 0000000..89fb0cf --- /dev/null +++ b/app/services/discourse_translator/category_translator.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module DiscourseTranslator + class CategoryTranslator + # unlike post and topics, categories do not have a detected locale + # and will translate two fields, name and description + + def self.translate(category, target_locale = I18n.locale) + return if category.blank? || target_locale.blank? + + # locale can come in various forms + # standardize it to a _ symbol + target_locale_sym = target_locale.to_s.sub("-", "_").to_sym + + translator = DiscourseTranslator::Provider::TranslatorProvider.get + translated_name = translator.translate_text!(category.name, target_locale_sym) + translated_description = translator.translate_text!(category.description, target_locale_sym) + + category.update!(name: translated_name, description: translated_description) + end + end +end diff --git a/app/services/discourse_translator/translator.rb b/app/services/discourse_translator/translator.rb new file mode 100644 index 0000000..1d23526 --- /dev/null +++ b/app/services/discourse_translator/translator.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module DiscourseTranslator + # The canonical class for all your translation needs + class Translator + # this invokes the specific methods + def translate(translatable, target_locale = I18n.locale) + target_locale_sym = target_locale.to_s.sub("-", "_").to_sym + + case translatable.class.name + when "Post", "Topic" + DiscourseTranslator::Provider.TranslatorProvider.get.translate(translatable, target_locale_sym) + when "Category" + CategoryTranslator.translate(translatable, target_locale) + end + end + end +end diff --git a/config/settings.yml b/config/settings.yml index bb54394..e293e9e 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -129,6 +129,9 @@ discourse_translator: experimental_inline_translation: default: false client: true + experimental_category_translation: + default: false + hidden: true discourse_translator_verbose_logs: default: false client: false diff --git a/spec/jobs/translate_categories_spec.rb b/spec/jobs/translate_categories_spec.rb new file mode 100644 index 0000000..77434ae --- /dev/null +++ b/spec/jobs/translate_categories_spec.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe Jobs::TranslateCategories do + let(:translator) { mock } + + def localize_all_categories(*locales) + Category.all.each do |category| + locales.each { |locale| Fabricate(:category_localization, category:, locale:, name: "x") } + end + end + + before do + SiteSetting.translator_enabled = true + SiteSetting.experimental_category_translation = true + SiteSetting.automatic_translation_backfill_rate = 100 + SiteSetting.automatic_translation_target_languages = "pt|zh_CN" + + DiscourseTranslator::Provider.stubs(:get).returns(translator) + Jobs.run_immediately! + end + + it "does nothing when translator is disabled" do + SiteSetting.translator_enabled = false + + translator.expects(:translate_text!).never + + subject.execute({}) + end + + it "does nothing when experimental_category_translation is disabled" do + SiteSetting.experimental_category_translation = false + + translator.expects(:translate_text!).never + + subject.execute({}) + end + + it "does nothing when no target languages are configured" do + SiteSetting.automatic_translation_target_languages = "" + + translator.expects(:translate_text!).never + + subject.execute({}) + end + + it "does nothing when no categories exist" do + Category.destroy_all + + translator.expects(:translate_text!).never + + subject.execute({}) + end + + it "translates categories to the configured locales" do + number_of_categories = Category.count + DiscourseTranslator::CategoryTranslator + .expects(:translate) + .with(is_a(Category), "pt") + .times(number_of_categories) + DiscourseTranslator::CategoryTranslator + .expects(:translate) + .with(is_a(Category), "zh_CN") + .times(number_of_categories) + + subject.execute({}) + end + + it "skips categories that already have localizations" do + localize_all_categories("pt", "zh_CN") + + category1 = + Fabricate(:category, name: "First Category", description: "First category description") + Fabricate(:category_localization, category: category1, locale: "pt", name: "Primeira Categoria") + + # It should only translate to Chinese, not Portuguese + DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "pt").never + DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "zh_CN").once + + subject.execute({}) + end + + it "continues from a specified category ID" do + category1 = Fabricate(:category, name: "First", description: "First description") + category2 = Fabricate(:category, name: "Second", description: "Second description") + + DiscourseTranslator::CategoryTranslator + .expects(:translate) + .with(category1, any_parameters) + .never + DiscourseTranslator::CategoryTranslator + .expects(:translate) + .with(category2, any_parameters) + .twice + + subject.execute(from_category_id: category2.id) + end + + it "handles translation errors gracefully" do + localize_all_categories("pt", "zh_CN") + + category1 = Fabricate(:category, name: "First", description: "First description") + DiscourseTranslator::CategoryTranslator + .expects(:translate) + .with(category1, "pt") + .raises(StandardError.new("API error")) + DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "zh_CN").once + + expect { subject.execute({}) }.not_to raise_error + end + + it "enqueues the next batch when there are more categories" do + Jobs::TranslateCategories.const_set(:BATCH_SIZE, 1) + + Jobs + .expects(:enqueue_in) + .with(10.seconds, :translate_categories, from_category_id: any_parameters) + .times(Category.count) + + subject.execute({}) + + # Reset the constant + Jobs::TranslateCategories.send(:remove_const, :BATCH_SIZE) + Jobs::TranslateCategories.const_set(:BATCH_SIZE, 50) + end +end diff --git a/spec/services/category_translator_spec.rb b/spec/services/category_translator_spec.rb new file mode 100644 index 0000000..8e2d1b5 --- /dev/null +++ b/spec/services/category_translator_spec.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +describe DiscourseTranslator::CategoryTranslator do + fab!(:category) do + Fabricate(:category, name: "Test Category", description: "This is a test category") + end + + describe ".translate" do + let(:target_locale) { :fr } + let(:translator) { mock } + + before { DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) } + + it "translates the category name and description" do + translator + .expects(:translate_text!) + .with(category.name, target_locale) + .returns("Catégorie de Test") + translator + .expects(:translate_text!) + .with(category.description, target_locale) + .returns("C'est une catégorie de test") + + DiscourseTranslator::CategoryTranslator.translate(category, target_locale) + + expect(category.name).to eq("Catégorie de Test") + expect(category.description).to eq("C'est une catégorie de test") + end + + it "handles locale format standardization" do + translator.expects(:translate_text!).with(category.name, :fr_CA).returns("Catégorie de Test") + translator + .expects(:translate_text!) + .with(category.description, :fr_CA) + .returns("C'est une catégorie de test") + + DiscourseTranslator::CategoryTranslator.translate(category, "fr-CA") + + expect(category.name).to eq("Catégorie de Test") + expect(category.description).to eq("C'est une catégorie de test") + end + + it "returns nil if category is blank" do + expect(DiscourseTranslator::CategoryTranslator.translate(nil)).to be_nil + end + + it "returns nil if target locale is blank" do + expect(DiscourseTranslator::CategoryTranslator.translate(category, nil)).to be_nil + end + + it "uses I18n.locale as default when no target locale is provided" do + I18n.locale = :es + translator.expects(:translate_text!).with(category.name, :es).returns("Categoría de Prueba") + translator + .expects(:translate_text!) + .with(category.description, :es) + .returns("Esta es una categoría de prueba") + + DiscourseTranslator::CategoryTranslator.translate(category) + + expect(category.name).to eq("Categoría de Prueba") + expect(category.description).to eq("Esta es una categoría de prueba") + end + end +end From be2e80972b201998851ed994e4c6b774c6be06d5 Mon Sep 17 00:00:00 2001 From: Nat Date: Wed, 23 Apr 2025 17:53:35 +0800 Subject: [PATCH 2/5] nil --- app/jobs/regular/translate_categories.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/regular/translate_categories.rb b/app/jobs/regular/translate_categories.rb index 9f36a44..ca008ae 100644 --- a/app/jobs/regular/translate_categories.rb +++ b/app/jobs/regular/translate_categories.rb @@ -13,7 +13,7 @@ def execute(args) return if locales.blank? cat_id = args[:from_category_id] || Category.order(:id).first&.id - last_id = cat_id + last_id = nil # we're just gonna take all categories and keep it simple # instead of checking in the db which ones are absent From a4f31c7244e9a85e19c1c8ba1ba10f9f846518f0 Mon Sep 17 00:00:00 2001 From: Nat Date: Thu, 24 Apr 2025 10:10:32 +0800 Subject: [PATCH 3/5] Lint --- app/services/discourse_translator/translator.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/services/discourse_translator/translator.rb b/app/services/discourse_translator/translator.rb index 1d23526..0171693 100644 --- a/app/services/discourse_translator/translator.rb +++ b/app/services/discourse_translator/translator.rb @@ -9,7 +9,10 @@ def translate(translatable, target_locale = I18n.locale) case translatable.class.name when "Post", "Topic" - DiscourseTranslator::Provider.TranslatorProvider.get.translate(translatable, target_locale_sym) + DiscourseTranslator::Provider.TranslatorProvider.get.translate( + translatable, + target_locale_sym, + ) when "Category" CategoryTranslator.translate(translatable, target_locale) end From 157cf926b6a04ced0d25c8ce68c1fbc84e5930fb Mon Sep 17 00:00:00 2001 From: Nat Date: Thu, 24 Apr 2025 10:15:10 +0800 Subject: [PATCH 4/5] Rubo --- spec/jobs/translate_categories_spec.rb | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/spec/jobs/translate_categories_spec.rb b/spec/jobs/translate_categories_spec.rb index 77434ae..78fbdcb 100644 --- a/spec/jobs/translate_categories_spec.rb +++ b/spec/jobs/translate_categories_spec.rb @@ -3,6 +3,8 @@ require "rails_helper" describe Jobs::TranslateCategories do + subject(:job) { described_class.new } + let(:translator) { mock } def localize_all_categories(*locales) @@ -26,7 +28,7 @@ def localize_all_categories(*locales) translator.expects(:translate_text!).never - subject.execute({}) + job.execute({}) end it "does nothing when experimental_category_translation is disabled" do @@ -34,7 +36,7 @@ def localize_all_categories(*locales) translator.expects(:translate_text!).never - subject.execute({}) + job.execute({}) end it "does nothing when no target languages are configured" do @@ -42,7 +44,7 @@ def localize_all_categories(*locales) translator.expects(:translate_text!).never - subject.execute({}) + job.execute({}) end it "does nothing when no categories exist" do @@ -50,7 +52,7 @@ def localize_all_categories(*locales) translator.expects(:translate_text!).never - subject.execute({}) + job.execute({}) end it "translates categories to the configured locales" do @@ -64,7 +66,7 @@ def localize_all_categories(*locales) .with(is_a(Category), "zh_CN") .times(number_of_categories) - subject.execute({}) + job.execute({}) end it "skips categories that already have localizations" do @@ -78,7 +80,7 @@ def localize_all_categories(*locales) DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "pt").never DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "zh_CN").once - subject.execute({}) + job.execute({}) end it "continues from a specified category ID" do @@ -94,7 +96,7 @@ def localize_all_categories(*locales) .with(category2, any_parameters) .twice - subject.execute(from_category_id: category2.id) + job.execute(from_category_id: category2.id) end it "handles translation errors gracefully" do @@ -107,7 +109,7 @@ def localize_all_categories(*locales) .raises(StandardError.new("API error")) DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "zh_CN").once - expect { subject.execute({}) }.not_to raise_error + expect { job.execute({}) }.not_to raise_error end it "enqueues the next batch when there are more categories" do @@ -118,7 +120,7 @@ def localize_all_categories(*locales) .with(10.seconds, :translate_categories, from_category_id: any_parameters) .times(Category.count) - subject.execute({}) + job.execute({}) # Reset the constant Jobs::TranslateCategories.send(:remove_const, :BATCH_SIZE) From fa3746c6b9dd94b647b12b6528d5849ee4a82f5c Mon Sep 17 00:00:00 2001 From: Nat Date: Thu, 24 Apr 2025 10:34:23 +0800 Subject: [PATCH 5/5] rubocopII --- spec/jobs/translate_categories_spec.rb | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/spec/jobs/translate_categories_spec.rb b/spec/jobs/translate_categories_spec.rb index 78fbdcb..0392ad7 100644 --- a/spec/jobs/translate_categories_spec.rb +++ b/spec/jobs/translate_categories_spec.rb @@ -113,16 +113,23 @@ def localize_all_categories(*locales) end it "enqueues the next batch when there are more categories" do + Jobs.run_later! + freeze_time Jobs::TranslateCategories.const_set(:BATCH_SIZE, 1) - Jobs - .expects(:enqueue_in) - .with(10.seconds, :translate_categories, from_category_id: any_parameters) - .times(Category.count) - job.execute({}) - # Reset the constant + Category.all.each do |category| + puts category.id + expect_job_enqueued( + job: :translate_categories, + args: { + from_category_id: category.id + 1, + }, + at: 10.seconds.from_now, + ) + end + Jobs::TranslateCategories.send(:remove_const, :BATCH_SIZE) Jobs::TranslateCategories.const_set(:BATCH_SIZE, 50) end