Skip to content

FEATURE: Translate categories with selected provider #282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 24, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions app/jobs/regular/translate_categories.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# frozen_string_literal: true

module Jobs
class TranslateCategories < ::Jobs::Base
cluster_concurrency 1
BATCH_SIZE = 50

def execute(args)
return unless SiteSetting.translator_enabled
return unless SiteSetting.experimental_category_translation

locales = SiteSetting.automatic_translation_target_languages.split("|")
return if locales.blank?

cat_id = args[:from_category_id] || Category.order(:id).first&.id
last_id = cat_id

# we're just gonna take all categories and keep it simple
# instead of checking in the db which ones are absent
categories = Category.where("id >= ?", cat_id).order(:id).limit(BATCH_SIZE)
return if categories.empty?

categories.each do |category|
CategoryLocalization.transaction do
locales.each do |locale|
next if CategoryLocalization.exists?(category_id: category.id, locale: locale)
begin
DiscourseTranslator::CategoryTranslator.translate(category, locale)
rescue => e
Rails.logger.error(
"Discourse Translator: Failed to translate category #{category.id} to #{locale}: #{e.message}",
)
end
end
end
last_id = category.id
end

# from batch if needed
if categories.size == BATCH_SIZE
Jobs.enqueue_in(10.seconds, :translate_categories, from_category_id: last_id + 1)
end
end
end
end
18 changes: 18 additions & 0 deletions app/jobs/scheduled/automatic_category_translation.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

module Jobs
class AutomaticCategoryTranslation < ::Jobs::Scheduled
every 12.hours
cluster_concurrency 1

def execute(args)
return unless SiteSetting.translator_enabled
return unless SiteSetting.experimental_category_translation

locales = SiteSetting.automatic_translation_target_languages.split("|")
return if locales.blank?

Jobs.enqueue(:translate_categories)
end
end
end
22 changes: 22 additions & 0 deletions app/services/discourse_translator/category_translator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true

module DiscourseTranslator
class CategoryTranslator
# unlike post and topics, categories do not have a detected locale
# and will translate two fields, name and description

def self.translate(category, target_locale = I18n.locale)
return if category.blank? || target_locale.blank?

# locale can come in various forms
# standardize it to a _ symbol
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym

translator = DiscourseTranslator::Provider::TranslatorProvider.get
translated_name = translator.translate_text!(category.name, target_locale_sym)
translated_description = translator.translate_text!(category.description, target_locale_sym)

category.update!(name: translated_name, description: translated_description)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that in a job we loop through locale, translate and then update name and description column. So if we have SiteSetting.automatic_translation_target_languages = "en|pl|de", only last locale translation will be saved.

Is it expected?

Copy link
Contributor Author

@nattsw nattsw Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that in a job we loop through locale

In the job it loops category and locale:

      categories.each do |category|
          locales.each do |locale|
            # invoke this translation method
          end
      end

Then this .translate method translates for the specific category and locale.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, fixed here #283

end
end
end
18 changes: 18 additions & 0 deletions app/services/discourse_translator/translator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

module DiscourseTranslator
# The canonical class for all your translation needs
class Translator
# this invokes the specific methods
def translate(translatable, target_locale = I18n.locale)
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym

case translatable.class.name
when "Post", "Topic"
DiscourseTranslator::Provider.TranslatorProvider.get.translate(translatable, target_locale_sym)
when "Category"
CategoryTranslator.translate(translatable, target_locale)
end
end
end
end
3 changes: 3 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ discourse_translator:
experimental_inline_translation:
default: false
client: true
experimental_category_translation:
default: false
hidden: true
discourse_translator_verbose_logs:
default: false
client: false
Expand Down
127 changes: 127 additions & 0 deletions spec/jobs/translate_categories_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# frozen_string_literal: true

require "rails_helper"

describe Jobs::TranslateCategories do
let(:translator) { mock }

def localize_all_categories(*locales)
Category.all.each do |category|
locales.each { |locale| Fabricate(:category_localization, category:, locale:, name: "x") }
end
end

before do
SiteSetting.translator_enabled = true
SiteSetting.experimental_category_translation = true
SiteSetting.automatic_translation_backfill_rate = 100
SiteSetting.automatic_translation_target_languages = "pt|zh_CN"

DiscourseTranslator::Provider.stubs(:get).returns(translator)
Jobs.run_immediately!
end

it "does nothing when translator is disabled" do
SiteSetting.translator_enabled = false

translator.expects(:translate_text!).never

subject.execute({})
end

it "does nothing when experimental_category_translation is disabled" do
SiteSetting.experimental_category_translation = false

translator.expects(:translate_text!).never

subject.execute({})
end

it "does nothing when no target languages are configured" do
SiteSetting.automatic_translation_target_languages = ""

translator.expects(:translate_text!).never

subject.execute({})
end

it "does nothing when no categories exist" do
Category.destroy_all

translator.expects(:translate_text!).never

subject.execute({})
end

it "translates categories to the configured locales" do
number_of_categories = Category.count
DiscourseTranslator::CategoryTranslator
.expects(:translate)
.with(is_a(Category), "pt")
.times(number_of_categories)
DiscourseTranslator::CategoryTranslator
.expects(:translate)
.with(is_a(Category), "zh_CN")
.times(number_of_categories)

subject.execute({})
end

it "skips categories that already have localizations" do
localize_all_categories("pt", "zh_CN")

category1 =
Fabricate(:category, name: "First Category", description: "First category description")
Fabricate(:category_localization, category: category1, locale: "pt", name: "Primeira Categoria")

# It should only translate to Chinese, not Portuguese
DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "pt").never
DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "zh_CN").once

subject.execute({})
end

it "continues from a specified category ID" do
category1 = Fabricate(:category, name: "First", description: "First description")
category2 = Fabricate(:category, name: "Second", description: "Second description")

DiscourseTranslator::CategoryTranslator
.expects(:translate)
.with(category1, any_parameters)
.never
DiscourseTranslator::CategoryTranslator
.expects(:translate)
.with(category2, any_parameters)
.twice

subject.execute(from_category_id: category2.id)
end

it "handles translation errors gracefully" do
localize_all_categories("pt", "zh_CN")

category1 = Fabricate(:category, name: "First", description: "First description")
DiscourseTranslator::CategoryTranslator
.expects(:translate)
.with(category1, "pt")
.raises(StandardError.new("API error"))
DiscourseTranslator::CategoryTranslator.expects(:translate).with(category1, "zh_CN").once

expect { subject.execute({}) }.not_to raise_error
end

it "enqueues the next batch when there are more categories" do
Jobs::TranslateCategories.const_set(:BATCH_SIZE, 1)

Jobs
.expects(:enqueue_in)
.with(10.seconds, :translate_categories, from_category_id: any_parameters)
.times(Category.count)

subject.execute({})

# Reset the constant
Jobs::TranslateCategories.send(:remove_const, :BATCH_SIZE)
Jobs::TranslateCategories.const_set(:BATCH_SIZE, 50)
end
end
65 changes: 65 additions & 0 deletions spec/services/category_translator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# frozen_string_literal: true

describe DiscourseTranslator::CategoryTranslator do
fab!(:category) do
Fabricate(:category, name: "Test Category", description: "This is a test category")
end

describe ".translate" do
let(:target_locale) { :fr }
let(:translator) { mock }

before { DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) }

it "translates the category name and description" do
translator
.expects(:translate_text!)
.with(category.name, target_locale)
.returns("Catégorie de Test")
translator
.expects(:translate_text!)
.with(category.description, target_locale)
.returns("C'est une catégorie de test")

DiscourseTranslator::CategoryTranslator.translate(category, target_locale)

expect(category.name).to eq("Catégorie de Test")
expect(category.description).to eq("C'est une catégorie de test")
end

it "handles locale format standardization" do
translator.expects(:translate_text!).with(category.name, :fr_CA).returns("Catégorie de Test")
translator
.expects(:translate_text!)
.with(category.description, :fr_CA)
.returns("C'est une catégorie de test")

DiscourseTranslator::CategoryTranslator.translate(category, "fr-CA")

expect(category.name).to eq("Catégorie de Test")
expect(category.description).to eq("C'est une catégorie de test")
end

it "returns nil if category is blank" do
expect(DiscourseTranslator::CategoryTranslator.translate(nil)).to be_nil
end

it "returns nil if target locale is blank" do
expect(DiscourseTranslator::CategoryTranslator.translate(category, nil)).to be_nil
end

it "uses I18n.locale as default when no target locale is provided" do
I18n.locale = :es
translator.expects(:translate_text!).with(category.name, :es).returns("Categoría de Prueba")
translator
.expects(:translate_text!)
.with(category.description, :es)
.returns("Esta es una categoría de prueba")

DiscourseTranslator::CategoryTranslator.translate(category)

expect(category.name).to eq("Categoría de Prueba")
expect(category.description).to eq("Esta es una categoría de prueba")
end
end
end
Loading