Merge branch 'master' into data/refactor-projection-stub-column

slfan1989 · web-flow · commit 689eb9240c83 · 2026-02-07T11:15:40.000+08:00
diff --git a/python/ray/_private/debug_api.py b/python/ray/_private/debug_api.py
diff --git a/python/ray/_raylet.pyx b/python/ray/_raylet.pyx
@@ -4616,17 +4616,6 @@ cdef class CoreWorker:
 
         return ref_counts
 
-    def get_reference_counter_debug_json(self):
-        """Returns a JSON string of the internal state of the ReferenceCounter.
-
-        NOTE: This is NOT a stable API. It should only be used for debugging and
-        NEVER in tests or production code.
-        """
-        cdef:
-            c_string debug_json
-        debug_json = CCoreWorkerProcess.GetCoreWorker().GetReferenceCounterDebugJson()
-        return debug_json.decode('utf-8')
-
     def set_get_async_callback(self, ObjectRef object_ref, user_callback: Callable):
         # NOTE: we need to manually increment the Python reference count to avoid the
         # callback object being garbage collected before it's called by the core worker.
diff --git a/python/ray/includes/libcoreworker.pxd b/python/ray/includes/libcoreworker.pxd
@@ -328,7 +328,6 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
         void YieldCurrentFiber(CFiberEvent &coroutine_done)
 
         unordered_map[CObjectID, pair[size_t, size_t]] GetAllReferenceCounts()
-        c_string GetReferenceCounterDebugJson() const
         c_vector[CTaskID] GetPendingChildrenTasks(const CTaskID &task_id) const
 
         void GetAsync(const CObjectID &object_id,
diff --git a/python/ray/llm/_internal/serve/engines/vllm/vllm_engine.py b/python/ray/llm/_internal/serve/engines/vllm/vllm_engine.py
@@ -322,13 +322,17 @@ async def start(self) -> None:
                 args=args,
             )
 
-        self._oai_models = state.openai_serving_models
-        self._oai_serving_chat = state.openai_serving_chat
-        self._oai_serving_completion = state.openai_serving_completion
-        self._oai_serving_embedding = state.openai_serving_embedding
-        self._oai_serving_transcription = state.openai_serving_transcription
-        self._oai_serving_scores = state.openai_serving_scores
-        self._oai_serving_tokenization = state.openai_serving_tokenization
+        self._oai_models = getattr(state, "openai_serving_models", None)
+        self._oai_serving_chat = getattr(state, "openai_serving_chat", None)
+        self._oai_serving_completion = getattr(state, "openai_serving_completion", None)
+        self._oai_serving_embedding = getattr(state, "openai_serving_embedding", None)
+        self._oai_serving_transcription = getattr(
+            state, "openai_serving_transcription", None
+        )
+        self._oai_serving_scores = getattr(state, "openai_serving_scores", None)
+        self._oai_serving_tokenization = getattr(
+            state, "openai_serving_tokenization", None
+        )
 
         self._validate_openai_serving_models()
         self._validate_engine_client()
@@ -346,38 +350,53 @@ def _validate_openai_serving_models(self):
             self._oai_models, "load_lora_adapter"
         ), "oai_models must have a load_lora_adapter attribute"
 
-    def _validate_openai_serving_chat(self):
-        assert hasattr(
-            self._oai_serving_chat, "create_chat_completion"
-        ), "oai_serving_chat must have a create_chat_completion attribute"
+    @staticmethod
+    def _make_error(message: str) -> ErrorResponse:
+        return ErrorResponse(
+            error=ErrorInfo(message=message, type="invalid_request_error", code=400)
+        )
 
-    def _validate_openai_serving_completion(self):
-        assert hasattr(
-            self._oai_serving_completion, "create_completion"
-        ), "oai_serving_completion must have a create_completion attribute"
+    def _validate_openai_serving_chat(self) -> Optional[ErrorResponse]:
+        if self._oai_serving_chat is None:
+            return self._make_error(
+                "This model does not support the 'generate' task. "
+                "The chat completion endpoint is not available for this model."
+            )
 
-    def _validate_openai_serving_embedding(self):
-        assert hasattr(
-            self._oai_serving_embedding, "create_embedding"
-        ), "oai_serving_embedding must have a create_embedding attribute"
+    def _validate_openai_serving_completion(self) -> Optional[ErrorResponse]:
+        if self._oai_serving_completion is None:
+            return self._make_error(
+                "This model does not support the 'generate' task. "
+                "The completion endpoint is not available for this model."
+            )
 
-    def _validate_openai_serving_transcription(self):
-        assert hasattr(
-            self._oai_serving_transcription, "create_transcription"
-        ), "oai_serving_transcription must have a create_transcription attribute"
+    def _validate_openai_serving_embedding(self) -> Optional[ErrorResponse]:
+        if self._oai_serving_embedding is None:
+            return self._make_error(
+                "This model does not support the 'embed' task. "
+                "The embedding endpoint is not available for this model."
+            )
 
-    def _validate_openai_serving_scores(self):
-        assert hasattr(
-            self._oai_serving_scores, "create_score"
-        ), "oai_serving_scores must have a create_score attribute"
+    def _validate_openai_serving_transcription(self) -> Optional[ErrorResponse]:
+        if self._oai_serving_transcription is None:
+            return self._make_error(
+                "This model does not support the 'transcription' task. "
+                "The transcription endpoint is not available for this model."
+            )
 
-    def _validate_openai_serving_tokenization(self):
-        assert hasattr(
-            self._oai_serving_tokenization, "create_tokenize"
-        ), "oai_serving_tokenization must have a create_tokenize attribute"
-        assert hasattr(
-            self._oai_serving_tokenization, "create_detokenize"
-        ), "oai_serving_tokenization must have a create_detokenize attribute"
+    def _validate_openai_serving_scores(self) -> Optional[ErrorResponse]:
+        if self._oai_serving_scores is None:
+            return self._make_error(
+                "This model does not support the 'score' task. "
+                "The score endpoint is not available for this model."
+            )
+
+    def _validate_openai_serving_tokenization(self) -> Optional[ErrorResponse]:
+        if self._oai_serving_tokenization is None:
+            return self._make_error(
+                "This model does not support the 'tokenization' task. "
+                "The tokenization endpoint is not available for this model."
+            )
 
     def _validate_engine_client(self):
         assert hasattr(
@@ -486,7 +505,9 @@ async def chat(
         request: ChatCompletionRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[str, ChatCompletionResponse, ErrorResponse], None]:
-        self._validate_openai_serving_chat()
+        if error := self._validate_openai_serving_chat():
+            yield error
+            return
 
         raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
             raw_request_info
@@ -514,7 +535,9 @@ async def completions(
         request: CompletionRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[str, CompletionResponse, ErrorResponse], None]:
-        self._validate_openai_serving_completion()
+        if error := self._validate_openai_serving_completion():
+            yield error
+            return
 
         raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
             raw_request_info
@@ -544,7 +567,9 @@ async def embeddings(
         request: EmbeddingRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[EmbeddingResponse, ErrorResponse], None]:
-        self._validate_openai_serving_embedding()
+        if error := self._validate_openai_serving_embedding():
+            yield error
+            return
 
         raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
             raw_request_info
@@ -566,7 +591,9 @@ async def transcriptions(
         request: TranscriptionRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[str, TranscriptionResponse, ErrorResponse], None]:
-        self._validate_openai_serving_transcription()
+        if error := self._validate_openai_serving_transcription():
+            yield error
+            return
 
         # Extract audio data from the request file
         audio_data = await request.file.read()
@@ -600,7 +627,9 @@ async def score(
         request: ScoreRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[ScoreResponse, ErrorResponse], None]:
-        self._validate_openai_serving_scores()
+        if error := self._validate_openai_serving_scores():
+            yield error
+            return
 
         raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
             raw_request_info
@@ -620,7 +649,9 @@ async def tokenize(
         request: TokenizeRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[TokenizeResponse, ErrorResponse], None]:
-        self._validate_openai_serving_tokenization()
+        if error := self._validate_openai_serving_tokenization():
+            yield error
+            return
 
         raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
             raw_request_info
@@ -640,7 +671,9 @@ async def detokenize(
         request: DetokenizeRequest,
         raw_request_info: Optional[RawRequestInfo] = None,
     ) -> AsyncGenerator[Union[DetokenizeResponse, ErrorResponse], None]:
-        self._validate_openai_serving_tokenization()
+        if error := self._validate_openai_serving_tokenization():
+            yield error
+            return
 
         raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
             raw_request_info
diff --git a/src/ray/core_worker/BUILD.bazel b/src/ray/core_worker/BUILD.bazel
@@ -169,7 +169,6 @@ ray_cc_library(
         "//src/ray/util:network_util",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/synchronization",
-        "@nlohmann_json",
     ],
 )
 
diff --git a/src/ray/core_worker/core_worker.cc b/src/ray/core_worker/core_worker.cc
@@ -856,10 +856,6 @@ CoreWorker::GetAllReferenceCounts() const {
   return counts;
 }
 
-std::string CoreWorker::GetReferenceCounterDebugJson() const {
-  return reference_counter_->ToJsonString();
-}
-
 std::vector<TaskID> CoreWorker::GetPendingChildrenTasks(const TaskID &task_id) const {
   return task_manager_->GetPendingChildrenTasks(task_id);
 }
diff --git a/src/ray/core_worker/core_worker.h b/src/ray/core_worker/core_worker.h
@@ -398,12 +398,6 @@ class CoreWorker : public std::enable_shared_from_this<CoreWorker> {
   /// (local, submitted_task) reference counts. For debugging purposes.
   std::unordered_map<ObjectID, std::pair<size_t, size_t>> GetAllReferenceCounts() const;
 
-  /// Returns a JSON string representation of the internal state of the
-  /// ReferenceCounter.
-  /// NOTE: This is very expensive and must only be used for debugging.
-  /// Please do NOT use this for production observability or testing.
-  std::string GetReferenceCounterDebugJson() const;
-
   /// Return all pending children task ids for a given parent task id.
   /// The parent task id should exist in the current worker.
   /// For debugging and testing only.
diff --git a/src/ray/core_worker/reference_counter.cc b/src/ray/core_worker/reference_counter.cc
@@ -15,7 +15,6 @@
 #include "ray/core_worker/reference_counter.h"
 
 #include <memory>
-#include <nlohmann/json.hpp>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -25,52 +24,6 @@
 #include "ray/util/logging.h"
 #include "ray/util/network_util.h"
 
-using json = nlohmann::json;
-
-namespace {
-
-json AddressToJson(const ray::rpc::Address &address) {
-  return {
-      {"node_id", ray::NodeID::FromBinary(address.node_id()).Hex()},
-      {"ip_address", address.ip_address()},
-      {"port", address.port()},
-      {"worker_id", ray::WorkerID::FromBinary(address.worker_id()).Hex()},
-  };
-}
-
-template <class Container>
-json IdContainerToJsonArray(const Container &c) {
-  json output = json::array();
-  for (const auto &id : c) {
-    output.push_back(id.Hex());
-  }
-  return output;
-}
-
-constexpr const char *LineageReconstructionEligibilityToString(
-    ray::core::LineageReconstructionEligibility lre) noexcept {
-  switch (lre) {
-  case ray::core::LineageReconstructionEligibility::ELIGIBLE:
-    return "ELIGIBLE";
-  case ray::core::LineageReconstructionEligibility::INELIGIBLE_PUT:
-    return "INELIGIBLE_PUT";
-  case ray::core::LineageReconstructionEligibility::INELIGIBLE_NO_RETRIES:
-    return "INELIGIBLE_NO_RETRIES";
-  case ray::core::LineageReconstructionEligibility::INELIGIBLE_LOCAL_MODE:
-    return "INELIGIBLE_LOCAL_MODE";
-  case ray::core::LineageReconstructionEligibility::INELIGIBLE_LINEAGE_EVICTED:
-    return "INELIGIBLE_LINEAGE_EVICTED";
-  case ray::core::LineageReconstructionEligibility::INELIGIBLE_LINEAGE_DISABLED:
-    return "INELIGIBLE_LINEAGE_DISABLED";
-  case ray::core::LineageReconstructionEligibility::INELIGIBLE_REF_NOT_FOUND:
-    return "INELIGIBLE_REF_NOT_FOUND";
-  default:
-    return "UNKNOWN";
-  };
-};
-
-};  // namespace
-
 #define PRINT_REF_COUNT(it) \
   RAY_LOG(DEBUG) << "REF " << it->first << ": " << it->second.DebugString();
 
@@ -1813,76 +1766,6 @@ std::string ReferenceCounter::DebugString() const {
   return ss.str();
 }
 
-json ReferenceCounter::NestedReferenceCount::ToJson() const {
-  return {
-      {"contained_in_owned", IdContainerToJsonArray(contained_in_owned)},
-      {"contained_in_borrowed_ids", IdContainerToJsonArray(contained_in_borrowed_ids)},
-      {"contains", IdContainerToJsonArray(contains)}};
-}
-
-json ReferenceCounter::BorrowInfo::ToJson() const {
-  json stored_in_objects_json = json::array();
-  for (const auto &[object_id, addr] : stored_in_objects) {
-    stored_in_objects_json.push_back(
-        {{"object_id", object_id.Hex()}, {"address", AddressToJson(addr)}});
-  }
-  json borrowers_json = json::array();
-  for (const auto &address : borrowers) {
-    borrowers_json.push_back(AddressToJson(address));
-  }
-  return {{"stored_in_objects", stored_in_objects_json}, {"borrowers", borrowers_json}};
-}
-
-std::string ReferenceCounter::ToJsonString() const {
-  absl::MutexLock lock(&mutex_);
-  json ref_table_json = json::array();
-  for (const auto &[obj_id, reference] : object_id_refs_) {
-    ref_table_json.push_back({
-        {"object_id", obj_id.Hex()},
-        {"reference", reference.ToJson()},
-    });
-  }
-  json output = {{"rpc_address", AddressToJson(rpc_address_)},
-                 {"reference_table", ref_table_json},
-                 {"freed_objects", IdContainerToJsonArray(freed_objects_)},
-                 {"reconstructable_owned_objects",
-                  IdContainerToJsonArray(reconstructable_owned_objects_)},
-                 {"objects_to_recover", IdContainerToJsonArray(objects_to_recover_)}};
-  return output.dump();
-}
-
-json ReferenceCounter::Reference::ToJson() const {
-  return {
-      {"call_site", call_site_},
-      {"object_size", object_size_},
-      {"locations", IdContainerToJsonArray(locations)},
-      {"owner_address", owner_address_ ? AddressToJson(*owner_address_) : json(nullptr)},
-      {"pinned_at_node_id",
-       pinned_at_node_id_ ? json(pinned_at_node_id_->Hex()) : json(nullptr)},
-      {"tensor_transport", tensor_transport_ ? json(*tensor_transport_) : json(nullptr)},
-      {"owned_by_us", owned_by_us_},
-      {"lineage_eligibility",
-       LineageReconstructionEligibilityToString(lineage_eligibility_)},
-      {"lineage_ref_count", lineage_ref_count},
-      {"local_ref_count", local_ref_count},
-      {"submitted_task_ref_count", submitted_task_ref_count},
-      {"nested_reference_count",
-       nested_reference_count ? nested_reference_count->ToJson() : json(nullptr)},
-      {"borrow_info", borrow_info ? borrow_info->ToJson() : json(nullptr)},
-      {"num_object_out_of_scope_or_freed_callbacks",
-       on_object_out_of_scope_or_freed_callbacks.size()},
-      {"num_object_ref_deleted_callbacks", object_ref_deleted_callbacks.size()},
-      {"publish_ref_removed", publish_ref_removed},
-      {"spilled_url", spilled_url},
-      {"spilled_node_id", spilled_node_id.Hex()},
-      {"spilled", spilled},
-      {"foreign_owner_already_monitoring", foreign_owner_already_monitoring},
-      {"has_nested_refs_report", has_nested_refs_to_report},
-      {"pending_creation", pending_creation_},
-      {"did_spill", did_spill},
-  };
-}
-
 std::string ReferenceCounter::Reference::DebugString() const {
   std::stringstream ss;
   ss << "Reference{borrowers: " << borrow().borrowers.size()
diff --git a/src/ray/core_worker/reference_counter.h b/src/ray/core_worker/reference_counter.h
diff --git a/src/ray/core_worker/reference_counter_interface.h b/src/ray/core_worker/reference_counter_interface.h

Original file line number	Diff line number	Diff line change
`@@ -169,7 +169,6 @@ ray_cc_library(`
`169`	`169`	`"//src/ray/util:network_util",`
`170`	`170`	`"@com_google_absl//absl/base:core_headers",`
`171`	`171`	`"@com_google_absl//absl/synchronization",`
`172`		`- "@nlohmann_json",`
`173`	`172`	`],`
`174`	`173`	`)`
`175`	`174`
Original file line number	Diff line number	Diff line change
`@@ -856,10 +856,6 @@ CoreWorker::GetAllReferenceCounts() const {`
`856`	`856`	`return counts;`
`857`	`857`	`}`
`858`	`858`
`859`		`-std::string CoreWorker::GetReferenceCounterDebugJson() const {`
`860`		`- return reference_counter_->ToJsonString();`
`861`		`-}`
`862`		`-`
`863`	`859`	`std::vector<TaskID> CoreWorker::GetPendingChildrenTasks(const TaskID &task_id) const {`
`864`	`860`	`return task_manager_->GetPendingChildrenTasks(task_id);`
`865`	`861`	`}`