Skip to content

Conversation

@llvmbot
Copy link
Member

@llvmbot llvmbot commented Jan 19, 2026

Backport 3ca7a72

Requested by: @da-viper

@llvmbot
Copy link
Member Author

llvmbot commented Jan 19, 2026

@da-viper What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Jan 19, 2026

@llvm/pr-subscribers-lldb

Author: None (llvmbot)

Changes

Backport 3ca7a72

Requested by: @da-viper


Patch is 21.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176796.diff

9 Files Affected:

  • (modified) lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py (+6-1)
  • (modified) lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py (+37-25)
  • (modified) lldb/test/API/tools/lldb-dap/completions/main.cpp (+1)
  • (modified) lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp (+99-46)
  • (modified) lldb/tools/lldb-dap/LLDBUtils.cpp (+35)
  • (modified) lldb/tools/lldb-dap/LLDBUtils.h (+18)
  • (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+3-3)
  • (modified) lldb/unittests/DAP/LLDBUtilsTest.cpp (+28)
  • (modified) lldb/unittests/DAP/ProtocolTypesTest.cpp (+3-3)
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 6a98bb50c9036..a79d766118b9d 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -1393,7 +1393,12 @@ def request_compileUnits(self, moduleId):
         return response
 
     def request_completions(self, text, frameId=None):
-        args_dict = {"text": text, "column": len(text) + 1}
+        def code_units(input: str) -> int:
+            utf16_bytes = input.encode("utf-16-le")
+            # one UTF16 codeunit = 2 bytes.
+            return len(utf16_bytes) // 2
+
+        args_dict = {"text": text, "column": code_units(text) + 1}
         if frameId:
             args_dict["frameId"] = frameId
         command_dict = {
diff --git a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
index 0ebecf6872a7d..1792ff9953efe 100644
--- a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
+++ b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
@@ -5,31 +5,32 @@
 import lldbdap_testcase
 import dap_server
 from lldbsuite.test import lldbutil
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import skipIf
+from lldbsuite.test.lldbtest import line_number
 
 session_completion = {
     "text": "session",
-    "label": "session -- Commands controlling LLDB session.",
+    "label": "session",
+    "detail": "Commands controlling LLDB session.",
 }
 settings_completion = {
     "text": "settings",
-    "label": "settings -- Commands for managing LLDB settings.",
+    "label": "settings",
+    "detail": "Commands for managing LLDB settings.",
 }
 memory_completion = {
     "text": "memory",
-    "label": "memory -- Commands for operating on memory in the current target process.",
+    "label": "memory",
+    "detail": "Commands for operating on memory in the current target process.",
 }
 command_var_completion = {
     "text": "var",
-    "label": "var -- Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.",
+    "label": "var",
+    "detail": "Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.",
 }
-variable_var_completion = {
-    "text": "var",
-    "label": "var -- vector<baz> &",
-}
-variable_var1_completion = {"text": "var1", "label": "var1 -- int &"}
-variable_var2_completion = {"text": "var2", "label": "var2 -- int &"}
+variable_var_completion = {"text": "var", "label": "var", "detail": "vector<baz> &"}
+variable_var1_completion = {"text": "var1", "label": "var1", "detail": "int &"}
+variable_var2_completion = {"text": "var2", "label": "var2", "detail": "int &"}
 
 
 # Older version of libcxx produce slightly different typename strings for
@@ -79,11 +80,13 @@ def test_command_completions(self):
             [
                 {
                     "text": "read",
-                    "label": "read -- Read from the memory of the current target process.",
+                    "label": "read",
+                    "detail": "Read from the memory of the current target process.",
                 },
                 {
                     "text": "region",
-                    "label": "region -- Get information on the memory region containing an address in the current target process.",
+                    "label": "region",
+                    "detail": "Get information on the memory region containing an address in the current target process.",
                 },
             ],
         )
@@ -110,7 +113,8 @@ def test_command_completions(self):
             [
                 {
                     "text": "set",
-                    "label": "set -- Set the value of the specified debugger setting.",
+                    "label": "set",
+                    "detail": "Set the value of the specified debugger setting.",
                 }
             ],
         )
@@ -167,7 +171,7 @@ def test_variable_completions(self):
         self.verify_completions(
             self.dap_server.get_completions("str"),
             [{"text": "struct", "label": "struct"}],
-            [{"text": "str1", "label": "str1 -- std::string &"}],
+            [{"text": "str1", "label": "str1", "detail": "std::string &"}],
         )
 
         self.continue_to_next_stop()
@@ -189,42 +193,46 @@ def test_variable_completions(self):
             self.dap_server.get_completions("str"),
             [
                 {"text": "struct", "label": "struct"},
-                {"text": "str1", "label": "str1 -- std::string &"},
+                {"text": "str1", "label": "str1", "detail": "std::string &"},
             ],
         )
 
+        self.assertIsNotNone(self.dap_server.get_completions("ƒ"))
+        # Test utf8 after ascii.
+        self.dap_server.get_completions("mƒ")
+
         # Completion also works for more complex expressions
         self.verify_completions(
             self.dap_server.get_completions("foo1.v"),
-            [{"text": "var1", "label": "foo1.var1 -- int"}],
+            [{"text": "var1", "label": "foo1.var1", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1.my_bar_object.v"),
-            [{"text": "var1", "label": "foo1.my_bar_object.var1 -- int"}],
+            [{"text": "var1", "label": "foo1.my_bar_object.var1", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1.var1 + foo1.v"),
-            [{"text": "var1", "label": "foo1.var1 -- int"}],
+            [{"text": "var1", "label": "foo1.var1", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1.var1 + v"),
-            [{"text": "var1", "label": "var1 -- int &"}],
+            [{"text": "var1", "label": "var1", "detail": "int &"}],
         )
 
         # should correctly handle spaces between objects and member operators
         self.verify_completions(
             self.dap_server.get_completions("foo1 .v"),
-            [{"text": "var1", "label": ".var1 -- int"}],
-            [{"text": "var2", "label": ".var2 -- int"}],
+            [{"text": "var1", "label": ".var1", "detail": "int"}],
+            [{"text": "var2", "label": ".var2", "detail": "int"}],
         )
 
         self.verify_completions(
             self.dap_server.get_completions("foo1 . v"),
-            [{"text": "var1", "label": "var1 -- int"}],
-            [{"text": "var2", "label": "var2 -- int"}],
+            [{"text": "var1", "label": "var1", "detail": "int"}],
+            [{"text": "var2", "label": "var2", "detail": "int"}],
         )
 
         # Even in variable mode, we can still use the escape prefix
@@ -273,6 +281,10 @@ def test_auto_completions(self):
             ],
         )
 
+        # TODO: Note we are not checking the result because the `expression --` command adds an extra character
+        # for non ascii variables.
+        self.assertIsNotNone(self.dap_server.get_completions("ƒ"))
+
         self.continue_to_exit()
         console_str = self.get_console()
         # we check in console to avoid waiting for output event.
diff --git a/lldb/test/API/tools/lldb-dap/completions/main.cpp b/lldb/test/API/tools/lldb-dap/completions/main.cpp
index 4314067cfe951..a12ce18201d97 100644
--- a/lldb/test/API/tools/lldb-dap/completions/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/completions/main.cpp
@@ -29,6 +29,7 @@ int main(int argc, char const *argv[]) {
   fun(vec);
   bar bar1 = {2};
   bar *bar2 = &bar1;
+  int ƒake_f = 200;
   foo foo1 = {3, &bar1, bar1, NULL};
   return 0; // breakpoint 2
 }
diff --git a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
index de9a15dcb73f4..9a3973190f7e7 100644
--- a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp
@@ -7,17 +7,70 @@
 //===----------------------------------------------------------------------===//
 
 #include "DAP.h"
-#include "JSONUtils.h"
+#include "LLDBUtils.h"
 #include "Protocol/ProtocolRequests.h"
 #include "Protocol/ProtocolTypes.h"
 #include "RequestHandler.h"
 #include "lldb/API/SBStringList.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace lldb_dap;
+using namespace lldb;
 using namespace lldb_dap::protocol;
 
 namespace lldb_dap {
+/// Gets the position in the UTF8 string where the specified line started.
+static size_t GetLineStartPos(StringRef text, uint32_t line) {
+  if (line == 0) // Invalid line.
+    return StringRef::npos;
+
+  if (line == 1)
+    return 0;
+
+  uint32_t cur_line = 1;
+  size_t pos = 0;
+
+  while (cur_line < line) {
+    const size_t new_line_pos = text.find('\n', pos);
+
+    if (new_line_pos == StringRef::npos)
+      return new_line_pos;
+
+    pos = new_line_pos + 1;
+    // text may end with a new line
+    if (pos >= text.size())
+      return StringRef::npos;
+
+    cur_line++;
+  }
+
+  assert(pos < text.size());
+  return pos;
+}
+
+static std::optional<size_t> GetCursorPos(StringRef text, uint32_t line,
+                                          uint32_t utf16_codeunits) {
+  if (text.empty())
+    return std::nullopt;
+
+  const size_t line_start_pos = GetLineStartPos(text, line);
+  if (line_start_pos == StringRef::npos)
+    return std::nullopt;
+
+  const StringRef completion_line =
+      text.substr(line_start_pos, text.find('\n', line_start_pos));
+  if (completion_line.empty())
+    return std::nullopt;
+
+  const std::optional<size_t> cursor_pos_opt =
+      UTF16CodeunitToBytes(completion_line, utf16_codeunits);
+  if (!cursor_pos_opt)
+    return std::nullopt;
+
+  const size_t cursor_pos = line_start_pos + *cursor_pos_opt;
+  return cursor_pos;
+}
 
 /// Returns a list of possible completions for a given caret position and text.
 ///
@@ -25,85 +78,85 @@ namespace lldb_dap {
 /// `supportsCompletionsRequest` is true.
 Expected<CompletionsResponseBody>
 CompletionsRequestHandler::Run(const CompletionsArguments &args) const {
+  std::string text = args.text;
+  const uint32_t line = args.line;
+  // column starts at 1.
+  const uint32_t utf16_codeunits = args.column - 1;
+
+  const auto cursor_pos_opt = GetCursorPos(text, line, utf16_codeunits);
+  if (!cursor_pos_opt)
+    return CompletionsResponseBody{};
+
+  size_t cursor_pos = *cursor_pos_opt;
+
   // If we have a frame, try to set the context for variable completions.
   lldb::SBFrame frame = dap.GetLLDBFrame(args.frameId);
   if (frame.IsValid()) {
-    frame.GetThread().GetProcess().SetSelectedThread(frame.GetThread());
-    frame.GetThread().SetSelectedFrame(frame.GetFrameID());
-  }
-
-  std::string text = args.text;
-  auto original_column = args.column;
-  auto original_line = args.line;
-  auto offset = original_column - 1;
-  if (original_line > 1) {
-    SmallVector<StringRef, 2> lines;
-    StringRef(text).split(lines, '\n');
-    for (int i = 0; i < original_line - 1; i++) {
-      offset += lines[i].size();
-    }
+    lldb::SBThread frame_thread = frame.GetThread();
+    frame_thread.GetProcess().SetSelectedThread(frame_thread);
+    frame_thread.SetSelectedFrame(frame.GetFrameID());
   }
 
-  std::vector<CompletionItem> targets;
-
-  bool had_escape_prefix =
-      StringRef(text).starts_with(dap.configuration.commandEscapePrefix);
-  ReplMode completion_mode = dap.DetectReplMode(frame, text, true);
-
-  // Handle the offset change introduced by stripping out the
+  const StringRef escape_prefix = dap.configuration.commandEscapePrefix;
+  const bool had_escape_prefix = StringRef(text).starts_with(escape_prefix);
+  const ReplMode repl_mode = dap.DetectReplMode(frame, text, true);
+  // Handle the cursor_pos change introduced by stripping out the
   // `command_escape_prefix`.
   if (had_escape_prefix) {
-    if (offset <
-        static_cast<int64_t>(dap.configuration.commandEscapePrefix.size())) {
-      return CompletionsResponseBody{std::move(targets)};
-    }
-    offset -= dap.configuration.commandEscapePrefix.size();
+    if (cursor_pos < escape_prefix.size())
+      return CompletionsResponseBody{};
+
+    cursor_pos -= escape_prefix.size();
   }
 
   // While the user is typing then we likely have an incomplete input and cannot
   // reliably determine the precise intent (command vs variable), try completing
   // the text as both a command and variable expression, if applicable.
   const std::string expr_prefix = "expression -- ";
-  std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = {
-      {std::make_tuple(ReplMode::Command, text, offset),
+  const std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = {
+      {std::make_tuple(ReplMode::Command, text, cursor_pos),
        std::make_tuple(ReplMode::Variable, expr_prefix + text,
-                       offset + expr_prefix.size())}};
+                       cursor_pos + expr_prefix.size())}};
+
+  CompletionsResponseBody response;
+  std::vector<CompletionItem> &targets = response.targets;
+  lldb::SBCommandInterpreter interpreter = dap.debugger.GetCommandInterpreter();
   for (const auto &[mode, line, cursor] : exprs) {
-    if (completion_mode != ReplMode::Auto && completion_mode != mode)
+    if (repl_mode != ReplMode::Auto && repl_mode != mode)
       continue;
 
     lldb::SBStringList matches;
     lldb::SBStringList descriptions;
-    if (!dap.debugger.GetCommandInterpreter().HandleCompletionWithDescriptions(
-            line.c_str(), cursor, 0, 100, matches, descriptions))
+    if (!interpreter.HandleCompletionWithDescriptions(
+            line.c_str(), cursor, 0, 50, matches, descriptions))
       continue;
 
     // The first element is the common substring after the cursor position for
     // all the matches. The rest of the elements are the matches so ignore the
     // first result.
-    for (size_t i = 1; i < matches.GetSize(); i++) {
-      std::string match = matches.GetStringAtIndex(i);
-      std::string description = descriptions.GetStringAtIndex(i);
+    for (uint32_t i = 1; i < matches.GetSize(); i++) {
+      const StringRef match = matches.GetStringAtIndex(i);
+      const StringRef description = descriptions.GetStringAtIndex(i);
 
-      CompletionItem item;
       StringRef match_ref = match;
-      for (StringRef commit_point : {".", "->"}) {
-        if (match_ref.contains(commit_point)) {
-          match_ref = match_ref.rsplit(commit_point).second;
+      for (const StringRef commit_point : {".", "->"}) {
+        if (const size_t pos = match_ref.rfind(commit_point);
+            pos != StringRef::npos) {
+          match_ref = match_ref.substr(pos + commit_point.size());
         }
       }
-      item.text = match_ref;
 
-      if (description.empty())
-        item.label = match;
-      else
-        item.label = match + " -- " + description;
+      CompletionItem item;
+      item.text = match_ref;
+      item.label = match;
+      if (!description.empty())
+        item.detail = description;
 
       targets.emplace_back(std::move(item));
     }
   }
 
-  return CompletionsResponseBody{std::move(targets)};
+  return response;
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp
index 8a651249aee02..e7407e9da9efb 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.cpp
+++ b/lldb/tools/lldb-dap/LLDBUtils.cpp
@@ -19,6 +19,7 @@
 #include "lldb/lldb-defines.h"
 #include "lldb/lldb-enumerations.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/raw_ostream.h"
@@ -277,4 +278,38 @@ lldb::SBLineEntry GetLineEntryForAddress(lldb::SBTarget &target,
   return sc.GetLineEntry();
 }
 
+std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line,
+                                           uint32_t utf16_codeunits) {
+  size_t bytes_count = 0;
+  size_t utf16_seen_cu = 0;
+  size_t idx = 0;
+  const size_t line_size = line.size();
+
+  while (idx < line_size && utf16_seen_cu < utf16_codeunits) {
+    const char first_char = line[idx];
+    const auto num_bytes = llvm::getNumBytesForUTF8(first_char);
+
+    if (num_bytes == 4) {
+      utf16_seen_cu += 2;
+    } else if (num_bytes < 4) {
+      utf16_seen_cu += 1;
+    } else {
+      // getNumBytesForUTF8 may return bytes greater than 4 this is not valid
+      // UTF8
+      return std::nullopt;
+    }
+
+    idx += num_bytes;
+    if (utf16_seen_cu <= utf16_codeunits) {
+      bytes_count = idx;
+    } else {
+      // We are in the middle of a codepoint or the utf16_codeunits ends in the
+      // middle of a codepoint.
+      return std::nullopt;
+    }
+  }
+
+  return bytes_count;
+}
+
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h
index 9545654504e8d..19174ba59654d 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.h
+++ b/lldb/tools/lldb-dap/LLDBUtils.h
@@ -248,6 +248,24 @@ llvm::Error ToError(const lldb::SBError &error, bool show_user = true);
 /// Provides the string value if this data structure is a string type.
 std::string GetStringValue(const lldb::SBStructuredData &data);
 
+/// Converts UTF16 column codeunits to bytes.
+/// we are recieving utf8 from the specification.
+/// UTF16 codunit size => 2 bytes.
+/// UTF8 codunit size => 1 byte.
+/// Example
+/// | info     | info  | utf16_cu | size in bytes |
+/// | fake f   | ƒ     | 1        | 2             |
+/// | fake c   | ç     | 1        | 3             |
+/// | poop char| 💩    | 2        | 4             |
+///
+/// so with inputs string of
+/// (`ƒ💩`, 3) we have 3 utf16_u and ( 2 + 4 ) bytes.
+/// (`ƒ💩`, 2) we have 3 utf16_u and ( 2 + 4 ) bytes but the position is in
+///  between the 💩 char so we return null since the codepoint is not complete.
+///
+/// see https://utf8everywhere.org/#characters for more info.
+std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line,
+                                           uint32_t utf16_codeunits);
 } // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 9d5bdf1efe94c..f7b7bf2e4dda4 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -313,7 +313,7 @@ bool fromJSON(const llvm::json::Value &, LaunchRequestArguments &,
 /// field is required.
 using LaunchResponse = VoidResponse;
 
-#define LLDB_DAP_INVALID_PORT -1
+#define LLDB_DAP_INVALID_PORT (-1)
 /// An invalid 'frameId' default value.
 #define LLDB_DAP_INVALID_FRAME_ID UINT64_MAX
 
@@ -407,11 +407,11 @@ struct CompletionsArguments {
   /// The position within `text` for which to determine the completion
   /// proposals. It is measured in UTF-16 code units and the client capability
   /// `columnsStartAt1` determines whether it is 0- or 1-based.
-  int64_t column = 0;
+  uint32_t column = LLDB_INVALID_COLUMN_NUMBER;
 
   /// A line for which to determine the completion proposals. If missing the
   /// first line of the text is assumed.
-  int64_t line = 0;
+  uint32_t line = 1;
 };
 bool fromJSON(const llvm::json::Value &, CompletionsArguments &,
               llvm::json::Path);
diff --git a/lldb/unittests/DAP/LLDBUtilsTest.cpp b/lldb/unittests/DAP/LLDBUtilsTest.cpp
index 4f619af2b136e..ade452baf12ba 100644
--- a/lldb/unittests/DAP/LLDBUtilsTest.cpp
+++ b/lldb/unittests/DAP/LLDBUtilsTest.cpp
@@ -9,6 +9,7 @@
 #include "LLDBUtils.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBStructuredData.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Error.h"
 #include "gtest/gtest.h"
 
@@ -63,3 +64,30 @@ TEST(LLDBUtilsTest, ToError) {
   std::string error_message = toString(std::move(llvm_error));
   EXPECT_EQ(error_message, "Test error message");
 }
+
+TEST(LLDBUtilsTest, UTF16Codeunits) {
+  using Expect = std::optional<size_t>;
+
+  EXPECT_EQ(UTF16CodeunitToBytes("a", 0), Expect{0});
+  EXPECT_EQ(UTF16CodeunitToBytes("some word", 4), Expect{4});
+  EX...
[truncated]

@c-rhodes c-rhodes moved this from Needs Triage to Needs Review in LLVM Release Status Jan 20, 2026
@github-project-automation github-project-automation bot moved this from Needs Review to Needs Merge in LLVM Release Status Jan 20, 2026
@da-viper da-viper requested a review from ashgti January 20, 2026 19:03
lldb-dap currently crashes when the first character is non ascii. This
is because we assume that the request column is ascii based instead of
UTF16 code units,
and end up in the middle of a character code point. causing an assertion
since we cannot not send invalid UTF-8 values.

This also handles the case in multilines and the column is outside the
range of the text.

Move completion description to the `CompletionItem.detail` property.

(cherry picked from commit 3ca7a72)
@c-rhodes c-rhodes merged commit dc1046e into llvm:release/22.x Jan 21, 2026
1 check was pending
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Jan 21, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

Development

Successfully merging this pull request may close these issues.

4 participants