Skip to content

Commit 5b008e5

Browse files
committed
FEATURE: Detect locale and translate posts from core table
1 parent 5f5ab6b commit 5b008e5

10 files changed

+393
-21
lines changed
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class DetectPostsLocale < ::Jobs::Base
5+
cluster_concurrency 1
6+
BATCH_SIZE = 50
7+
8+
def execute(args)
9+
return unless SiteSetting.translator_enabled
10+
return unless SiteSetting.experimental_content_translation
11+
12+
posts =
13+
Post
14+
.where(locale: nil)
15+
.where(deleted_at: nil)
16+
.where("posts.user_id > 0")
17+
.where.not(raw: [nil, ""])
18+
.order(id: :desc)
19+
.limit(BATCH_SIZE)
20+
return if posts.empty?
21+
22+
posts.each do |post|
23+
begin
24+
DiscourseTranslator::PostLocaleDetector.detect_locale(post)
25+
rescue => e
26+
Rails.logger.error(
27+
"Discourse Translator: Failed to detect post #{post.id}'s locale: #{e.message}",
28+
)
29+
end
30+
end
31+
32+
DiscourseTranslator::VerboseLogger.log("Detected #{posts.size} post locales")
33+
end
34+
end
35+
end

app/jobs/regular/translate_posts.rb

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class TranslatePosts < ::Jobs::Base
5+
cluster_concurrency 1
6+
BATCH_SIZE = 50
7+
8+
def execute(args)
9+
return unless SiteSetting.translator_enabled
10+
return unless SiteSetting.experimental_content_translation
11+
12+
locales = SiteSetting.automatic_translation_target_languages.split("|")
13+
return if locales.blank?
14+
15+
# keeping this query simple by just getting any post with a missing localization
16+
posts =
17+
Post
18+
.left_joins(:post_localizations)
19+
.where(deleted_at: nil)
20+
.where("posts.user_id > 0")
21+
.where.not(raw: [nil, ""])
22+
.group("posts.id")
23+
.having(
24+
"COUNT(DISTINCT CASE WHEN post_localizations.locale IN (?) THEN post_localizations.locale END) < ?",
25+
locales,
26+
locales.size,
27+
)
28+
.order(id: :desc)
29+
.limit(BATCH_SIZE)
30+
31+
return if posts.empty?
32+
33+
posts.each do |post|
34+
locales.each do |locale|
35+
next if post.locale == locale
36+
next if post.has_localization?(locale)
37+
38+
begin
39+
DiscourseTranslator::PostTranslator.translate(post, locale)
40+
rescue => e
41+
Rails.logger.error(
42+
"Discourse Translator: Failed to translate post #{post.id} to #{locale}: #{e.message}",
43+
)
44+
end
45+
end
46+
end
47+
48+
DiscourseTranslator::VerboseLogger.log(
49+
"Translated #{posts.size} posts to #{locales.join(", ")}",
50+
)
51+
end
52+
end
53+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseTranslator
4+
class PostLocaleDetector
5+
def self.detect_locale(post)
6+
return if post.blank?
7+
8+
translator = DiscourseTranslator::Provider::TranslatorProvider.get
9+
detected_locale = translator.detect!(post)
10+
post.update!(locale: detected_locale)
11+
detected_locale
12+
end
13+
end
14+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseTranslator
4+
class PostTranslator
5+
def self.translate(post, target_locale = I18n.locale)
6+
return if post.blank? || target_locale.blank?
7+
8+
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
9+
10+
translator = DiscourseTranslator::Provider::TranslatorProvider.get
11+
translated_raw = translator.translate_post!(post, target_locale_sym)
12+
13+
localization =
14+
PostLocalization.find_or_initialize_by(post_id: post.id, locale: target_locale_sym.to_s)
15+
16+
localization.raw = translated_raw
17+
localization.cooked = PrettyText.cook(translated_raw)
18+
localization.post_version = post.version
19+
localization.localizer_user_id = Discourse.system_user.id
20+
localization.save!
21+
localization
22+
end
23+
end
24+
end

app/services/discourse_translator/provider/base_provider.rb

+9-6
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,7 @@ def self.translate(translatable, target_locale_sym = I18n.locale)
5555
[detected_lang, translated]
5656
end
5757

58-
# Subclasses must implement this method to translate the text of a
59-
# post or topic and return only the translated text.
60-
# Subclasses should use text_for_translation
61-
# @param translatable [Post|Topic]
62-
# @param target_locale_sym [Symbol]
63-
# @return [String]
58+
# TODO: Deprecate this in favour of translate_<model>
6459
def self.translate_translatable!(translatable, target_locale_sym = I18n.locale)
6560
raise "Not Implemented"
6661
end
@@ -69,6 +64,14 @@ def self.translate_text!(text, target_locale_sym = I18n.locale)
6964
raise "Not Implemented"
7065
end
7166

67+
def self.translate_post!(post, target_locale_sym = I18n.locale)
68+
translate_translatable!(post, target_locale_sym)
69+
end
70+
71+
def self.translate_topic!(topic, target_locale_sym = I18n.locale)
72+
translate_translatable!(topic, target_locale_sym)
73+
end
74+
7275
# Returns the stored detected locale of a post or topic.
7376
# If the locale does not exist yet, it will be detected first via the API then stored.
7477
# @param translatable [Post|Topic]

app/services/discourse_translator/provider/discourse_ai.rb

+22-15
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,32 @@ def self.detect!(topic_or_post)
1616
end
1717

1818
def self.translate_translatable!(translatable, target_locale_sym = I18n.locale)
19+
if (translatable.class.name == "Post")
20+
translate_post!(translatable, target_locale_sym)
21+
elsif (translatable.class.name == "Topic")
22+
translate_topic!(translatable, target_locale_sym)
23+
end
24+
end
25+
26+
def self.translate_post!(post, target_locale_sym = I18n.locale)
1927
validate_required_settings!
2028

21-
language = get_language_name(target_locale_sym)
29+
text = text_for_translation(post, raw: true)
30+
chunks = DiscourseTranslator::ContentSplitter.split(text)
2231
translated =
23-
case translatable.class.name
24-
when "Post"
25-
text = text_for_translation(translatable, raw: true)
26-
chunks = DiscourseTranslator::ContentSplitter.split(text)
27-
chunks
28-
.map { |chunk| ::DiscourseAi::PostTranslator.new(chunk, target_locale_sym).translate }
29-
.join("")
30-
when "Topic"
31-
::DiscourseAi::TopicTranslator.new(
32-
text_for_translation(translatable),
33-
language,
34-
).translate
35-
end
32+
chunks
33+
.map { |chunk| ::DiscourseAi::PostTranslator.new(chunk, target_locale_sym).translate }
34+
.join("")
35+
DiscourseTranslator::TranslatedContentNormalizer.normalize(post, translated)
36+
end
3637

37-
DiscourseTranslator::TranslatedContentNormalizer.normalize(translatable, translated)
38+
def self.translate_topic!(topic, target_locale_sym = I18n.locale)
39+
validate_required_settings!
40+
41+
language = get_language_name(target_locale_sym)
42+
translated =
43+
::DiscourseAi::TopicTranslator.new(text_for_translation(topic), language).translate
44+
DiscourseTranslator::TranslatedContentNormalizer.normalize(topic, translated)
3845
end
3946

4047
def self.translate_text!(text, target_locale_sym = I18n.locale)

spec/jobs/detect_posts_locale_spec.rb

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# frozen_string_literal: true
2+
3+
describe Jobs::DetectPostsLocale do
4+
fab!(:post) { Fabricate(:post, locale: nil) }
5+
subject(:job) { described_class.new }
6+
7+
before do
8+
SiteSetting.translator_enabled = true
9+
SiteSetting.experimental_content_translation = true
10+
end
11+
12+
it "does nothing when translator is disabled" do
13+
SiteSetting.translator_enabled = false
14+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never
15+
16+
job.execute({})
17+
end
18+
19+
it "does nothing when content translation is disabled" do
20+
SiteSetting.experimental_content_translation = false
21+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never
22+
23+
job.execute({})
24+
end
25+
26+
it "does nothing when there are no posts to detect" do
27+
Post.update_all(locale: "en")
28+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).never
29+
30+
job.execute({})
31+
end
32+
33+
it "detects locale for posts with nil locale" do
34+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).once
35+
job.execute({})
36+
end
37+
38+
it "skips bot posts" do
39+
post.update!(user: Discourse.system_user)
40+
DiscourseTranslator::PostLocaleDetector.expects(:detect_locale).with(post).never
41+
42+
job.execute({})
43+
end
44+
45+
it "handles detection errors gracefully" do
46+
DiscourseTranslator::PostLocaleDetector
47+
.expects(:detect_locale)
48+
.with(post)
49+
.raises(StandardError.new("jiboomz"))
50+
.once
51+
52+
expect { job.execute({}) }.not_to raise_error
53+
end
54+
55+
it "logs a summary after running" do
56+
DiscourseTranslator::PostLocaleDetector.stubs(:detect_locale)
57+
DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Detected 1 post locales"))
58+
59+
job.execute({})
60+
end
61+
end

spec/jobs/translate_posts_spec.rb

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# frozen_string_literal: true
2+
3+
describe Jobs::TranslatePosts do
4+
fab!(:post)
5+
subject(:job) { described_class.new }
6+
7+
let(:locales) { %w[en ja] }
8+
9+
before do
10+
SiteSetting.translator_enabled = true
11+
SiteSetting.experimental_content_translation = true
12+
SiteSetting.automatic_translation_backfill_rate = 1
13+
SiteSetting.automatic_translation_target_languages = locales.join("|")
14+
end
15+
16+
it "does nothing when translator is disabled" do
17+
SiteSetting.translator_enabled = false
18+
DiscourseTranslator::PostTranslator.expects(:translate).never
19+
20+
job.execute({})
21+
end
22+
23+
it "does nothing when content translation is disabled" do
24+
SiteSetting.experimental_content_translation = false
25+
DiscourseTranslator::PostTranslator.expects(:translate).never
26+
27+
job.execute({})
28+
end
29+
30+
it "does nothing when no target languages are configured" do
31+
SiteSetting.automatic_translation_target_languages = ""
32+
DiscourseTranslator::PostTranslator.expects(:translate).never
33+
34+
job.execute({})
35+
end
36+
37+
it "does nothing when there are no posts to translate" do
38+
Post.destroy_all
39+
DiscourseTranslator::PostTranslator.expects(:translate).never
40+
41+
job.execute({})
42+
end
43+
44+
it "translates posts to the configured locales" do
45+
DiscourseTranslator::PostTranslator.expects(:translate).with(post, "en").at_least_once
46+
DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").at_least_once
47+
48+
job.execute({})
49+
end
50+
51+
it "skips posts that already have localizations" do
52+
Post.all.each do |post|
53+
Fabricate(:post_localization, post:, locale: "en")
54+
Fabricate(:post_localization, post:, locale: "ja")
55+
end
56+
DiscourseTranslator::PostTranslator.expects(:translate).never
57+
58+
job.execute({})
59+
end
60+
61+
it "skips bot posts" do
62+
post.update!(user: Discourse.system_user)
63+
DiscourseTranslator::PostTranslator.expects(:translate).with(post, "en").never
64+
DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").never
65+
66+
job.execute({})
67+
end
68+
69+
it "handles translation errors gracefully" do
70+
DiscourseTranslator::PostTranslator
71+
.expects(:translate)
72+
.with(post, "en")
73+
.raises(StandardError.new("API error"))
74+
DiscourseTranslator::PostTranslator.expects(:translate).with(post, "ja").once
75+
76+
expect { job.execute({}) }.not_to raise_error
77+
end
78+
79+
it "logs a summary after translation" do
80+
DiscourseTranslator::PostTranslator.stubs(:translate)
81+
DiscourseTranslator::VerboseLogger.expects(:log).with(includes("Translated 1 posts to en, ja"))
82+
83+
job.execute({})
84+
end
85+
end
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# frozen_string_literal: true
2+
3+
describe DiscourseTranslator::PostLocaleDetector do
4+
describe ".detect_locale" do
5+
fab!(:post) { Fabricate(:post, raw: "Hello world", locale: nil) }
6+
7+
let(:translator) { mock }
8+
9+
before { DiscourseTranslator::Provider::TranslatorProvider.stubs(:get).returns(translator) }
10+
11+
it "returns nil if post is blank" do
12+
expect(described_class.detect_locale(nil)).to eq(nil)
13+
end
14+
15+
it "calls detect! on the provider with the post" do
16+
translator.expects(:detect!).with(post).returns("ja")
17+
expect(described_class.detect_locale(post)).to eq("ja")
18+
end
19+
20+
it "updates the post locale with the detected locale" do
21+
translator.stubs(:detect!).with(post).returns("ja")
22+
expect { described_class.detect_locale(post) }.to change { post.reload.locale }.from(nil).to(
23+
"ja",
24+
)
25+
end
26+
end
27+
end

0 commit comments

Comments
 (0)