feat(poet): make multiple sentences

ksqsf · ksqsf · commit 77a8feb77730 · 2026-04-26T14:28:52.000+02:00
diff --git a/src/rime/common.h b/src/rime/common.h
@@ -18,6 +18,7 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <deque>
 #define BOOST_BIND_NO_PLACEHOLDERS
 #include <boost/signals2/connection.hpp>
 #include <boost/signals2/signal.hpp>
@@ -37,6 +38,7 @@
 
 namespace rime {
 
+using std::deque;
 using std::function;
 using std::list;
 using std::make_pair;
diff --git a/src/rime/gear/poet.cc b/src/rime/gear/poet.cc
@@ -26,6 +26,7 @@ struct Line {
   const DictEntry* entry;
   size_t end_pos;
   double weight;
+  size_t text_hash;  // for dedup
 
   static const Line kEmpty;
 
@@ -68,7 +69,7 @@ struct Line {
   }
 };
 
-const Line Line::kEmpty{nullptr, nullptr, 0, 0.0};
+const Line Line::kEmpty{nullptr, nullptr, 0, 0.0, 0};
 
 inline static Grammar* create_grammar(Config* config) {
   if (auto* grammar = Grammar::Require("grammar")) {
@@ -251,4 +252,96 @@ an<Sentence> Poet::MakeSentence(const WordGraph& graph,
                         graph, total_length, preceding_text);
 }
 
+// Make `max_sentences` sentences using beam search and dp on word graph.
+//
+// There is no strategy because it unconditionally use grammar.
+deque<an<Sentence>> Poet::MakeSentences(const WordGraph& graph,
+                                        size_t total_length,
+                                        const string& preceding_text,
+                                        size_t max_sentences) {
+  size_t beam_width =
+      max_sentences * 3;  // allow more possibilities during search
+  using State = std::list<Line>;
+  map<int, State> states;
+  states[0].push_back(Line::kEmpty);
+  for (const auto& sv : graph) {
+    size_t start_pos = sv.first;
+    if (states.find(start_pos) == states.end())
+      continue;
+
+    const auto& source_state = states[start_pos];
+    for (const auto& ev : sv.second) {
+      size_t end_pos = ev.first;
+      if (start_pos == 0 && end_pos == total_length)
+        continue;
+      const DictEntryList& entries = ev.second;
+      bool is_rear = end_pos == total_length;
+      auto& target_state = states[end_pos];
+
+      for (const auto& source_line : source_state) {
+        for (const auto& entry : entries) {
+          const string& context =
+              source_line.empty() ? preceding_text : source_line.context();
+          double weight = source_line.weight +
+                          Grammar::Evaluate(context, entry->text, entry->weight,
+                                            is_rear, grammar_.get());
+          size_t new_hash = source_line.text_hash;
+          for (char c : entry->text) {
+            new_hash = new_hash * 31 + c;
+          }
+          Line new_line{&source_line, entry.get(), end_pos, weight, new_hash};
+
+          // dedup by text hash
+          auto dup = std::find_if(
+              target_state.begin(), target_state.end(),
+              [&](const Line& l) { return l.text_hash == new_line.text_hash; });
+          if (dup != target_state.end()) {
+            if (new_line.weight > dup->weight) {
+              target_state.erase(dup);
+            } else {
+              continue;
+            }
+          }
+
+          // insert in descending order of weight
+          auto it = std::find_if(
+              target_state.begin(), target_state.end(),
+              [&](const Line& l) { return l.weight < new_line.weight; });
+          target_state.insert(it, new_line);
+          if (target_state.size() > beam_width)
+            target_state.pop_back();
+        }
+      }
+    }
+  }
+
+  auto found = states.find(total_length);
+  if (found == states.end() || found->second.empty())
+    return {};
+
+  deque<an<Sentence>> results;
+  size_t i = 0;
+  double last_weight = found->second.front().weight;
+  for (const auto& candidate : found->second) {
+    i++;
+    if (i > max_sentences)
+      break;
+    double cur_weight = candidate.weight;
+    // idea: if the current sentence is, on average, not too rare when
+    // compared to last sentence, we should consider it too
+    if (fabs(cur_weight - last_weight) / fabs(last_weight) > 0.05) {
+      break;
+    }
+    last_weight = cur_weight;
+    auto sentence = New<Sentence>(language_);
+    for (const auto* c : candidate.components()) {
+      if (!c->entry)
+        continue;
+      sentence->Extend(*c->entry, c->end_pos, c->weight);
+    }
+    results.emplace_back(sentence);
+  }
+  return results;
+}
+
 }  // namespace rime
diff --git a/src/rime/gear/poet.h b/src/rime/gear/poet.h
@@ -39,6 +39,10 @@ class Poet {
   an<Sentence> MakeSentence(const WordGraph& graph,
                             size_t total_length,
                             const string& preceding_text);
+  deque<an<Sentence>> MakeSentences(const WordGraph& graph,
+                                    size_t total_length,
+                                    const string& preceding_text,
+                                    size_t count);
 
   template <class TranslatorT>
   an<Translation> ContextualWeighted(an<Translation> translation,