Skip to content

Commit 689eb92

Browse files
authored
Merge branch 'master' into data/refactor-projection-stub-column
2 parents 925d325 + f3d444a commit 689eb92

File tree

10 files changed

+74
-206
lines changed

10 files changed

+74
-206
lines changed

python/ray/_private/debug_api.py

Lines changed: 0 additions & 12 deletions
This file was deleted.

python/ray/_raylet.pyx

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4616,17 +4616,6 @@ cdef class CoreWorker:
46164616

46174617
return ref_counts
46184618

4619-
def get_reference_counter_debug_json(self):
4620-
"""Returns a JSON string of the internal state of the ReferenceCounter.
4621-
4622-
NOTE: This is NOT a stable API. It should only be used for debugging and
4623-
NEVER in tests or production code.
4624-
"""
4625-
cdef:
4626-
c_string debug_json
4627-
debug_json = CCoreWorkerProcess.GetCoreWorker().GetReferenceCounterDebugJson()
4628-
return debug_json.decode('utf-8')
4629-
46304619
def set_get_async_callback(self, ObjectRef object_ref, user_callback: Callable):
46314620
# NOTE: we need to manually increment the Python reference count to avoid the
46324621
# callback object being garbage collected before it's called by the core worker.

python/ray/includes/libcoreworker.pxd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,6 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
328328
void YieldCurrentFiber(CFiberEvent &coroutine_done)
329329

330330
unordered_map[CObjectID, pair[size_t, size_t]] GetAllReferenceCounts()
331-
c_string GetReferenceCounterDebugJson() const
332331
c_vector[CTaskID] GetPendingChildrenTasks(const CTaskID &task_id) const
333332

334333
void GetAsync(const CObjectID &object_id,

python/ray/llm/_internal/serve/engines/vllm/vllm_engine.py

Lines changed: 74 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -322,13 +322,17 @@ async def start(self) -> None:
322322
args=args,
323323
)
324324

325-
self._oai_models = state.openai_serving_models
326-
self._oai_serving_chat = state.openai_serving_chat
327-
self._oai_serving_completion = state.openai_serving_completion
328-
self._oai_serving_embedding = state.openai_serving_embedding
329-
self._oai_serving_transcription = state.openai_serving_transcription
330-
self._oai_serving_scores = state.openai_serving_scores
331-
self._oai_serving_tokenization = state.openai_serving_tokenization
325+
self._oai_models = getattr(state, "openai_serving_models", None)
326+
self._oai_serving_chat = getattr(state, "openai_serving_chat", None)
327+
self._oai_serving_completion = getattr(state, "openai_serving_completion", None)
328+
self._oai_serving_embedding = getattr(state, "openai_serving_embedding", None)
329+
self._oai_serving_transcription = getattr(
330+
state, "openai_serving_transcription", None
331+
)
332+
self._oai_serving_scores = getattr(state, "openai_serving_scores", None)
333+
self._oai_serving_tokenization = getattr(
334+
state, "openai_serving_tokenization", None
335+
)
332336

333337
self._validate_openai_serving_models()
334338
self._validate_engine_client()
@@ -346,38 +350,53 @@ def _validate_openai_serving_models(self):
346350
self._oai_models, "load_lora_adapter"
347351
), "oai_models must have a load_lora_adapter attribute"
348352

349-
def _validate_openai_serving_chat(self):
350-
assert hasattr(
351-
self._oai_serving_chat, "create_chat_completion"
352-
), "oai_serving_chat must have a create_chat_completion attribute"
353+
@staticmethod
354+
def _make_error(message: str) -> ErrorResponse:
355+
return ErrorResponse(
356+
error=ErrorInfo(message=message, type="invalid_request_error", code=400)
357+
)
353358

354-
def _validate_openai_serving_completion(self):
355-
assert hasattr(
356-
self._oai_serving_completion, "create_completion"
357-
), "oai_serving_completion must have a create_completion attribute"
359+
def _validate_openai_serving_chat(self) -> Optional[ErrorResponse]:
360+
if self._oai_serving_chat is None:
361+
return self._make_error(
362+
"This model does not support the 'generate' task. "
363+
"The chat completion endpoint is not available for this model."
364+
)
358365

359-
def _validate_openai_serving_embedding(self):
360-
assert hasattr(
361-
self._oai_serving_embedding, "create_embedding"
362-
), "oai_serving_embedding must have a create_embedding attribute"
366+
def _validate_openai_serving_completion(self) -> Optional[ErrorResponse]:
367+
if self._oai_serving_completion is None:
368+
return self._make_error(
369+
"This model does not support the 'generate' task. "
370+
"The completion endpoint is not available for this model."
371+
)
363372

364-
def _validate_openai_serving_transcription(self):
365-
assert hasattr(
366-
self._oai_serving_transcription, "create_transcription"
367-
), "oai_serving_transcription must have a create_transcription attribute"
373+
def _validate_openai_serving_embedding(self) -> Optional[ErrorResponse]:
374+
if self._oai_serving_embedding is None:
375+
return self._make_error(
376+
"This model does not support the 'embed' task. "
377+
"The embedding endpoint is not available for this model."
378+
)
368379

369-
def _validate_openai_serving_scores(self):
370-
assert hasattr(
371-
self._oai_serving_scores, "create_score"
372-
), "oai_serving_scores must have a create_score attribute"
380+
def _validate_openai_serving_transcription(self) -> Optional[ErrorResponse]:
381+
if self._oai_serving_transcription is None:
382+
return self._make_error(
383+
"This model does not support the 'transcription' task. "
384+
"The transcription endpoint is not available for this model."
385+
)
373386

374-
def _validate_openai_serving_tokenization(self):
375-
assert hasattr(
376-
self._oai_serving_tokenization, "create_tokenize"
377-
), "oai_serving_tokenization must have a create_tokenize attribute"
378-
assert hasattr(
379-
self._oai_serving_tokenization, "create_detokenize"
380-
), "oai_serving_tokenization must have a create_detokenize attribute"
387+
def _validate_openai_serving_scores(self) -> Optional[ErrorResponse]:
388+
if self._oai_serving_scores is None:
389+
return self._make_error(
390+
"This model does not support the 'score' task. "
391+
"The score endpoint is not available for this model."
392+
)
393+
394+
def _validate_openai_serving_tokenization(self) -> Optional[ErrorResponse]:
395+
if self._oai_serving_tokenization is None:
396+
return self._make_error(
397+
"This model does not support the 'tokenization' task. "
398+
"The tokenization endpoint is not available for this model."
399+
)
381400

382401
def _validate_engine_client(self):
383402
assert hasattr(
@@ -486,7 +505,9 @@ async def chat(
486505
request: ChatCompletionRequest,
487506
raw_request_info: Optional[RawRequestInfo] = None,
488507
) -> AsyncGenerator[Union[str, ChatCompletionResponse, ErrorResponse], None]:
489-
self._validate_openai_serving_chat()
508+
if error := self._validate_openai_serving_chat():
509+
yield error
510+
return
490511

491512
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
492513
raw_request_info
@@ -514,7 +535,9 @@ async def completions(
514535
request: CompletionRequest,
515536
raw_request_info: Optional[RawRequestInfo] = None,
516537
) -> AsyncGenerator[Union[str, CompletionResponse, ErrorResponse], None]:
517-
self._validate_openai_serving_completion()
538+
if error := self._validate_openai_serving_completion():
539+
yield error
540+
return
518541

519542
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
520543
raw_request_info
@@ -544,7 +567,9 @@ async def embeddings(
544567
request: EmbeddingRequest,
545568
raw_request_info: Optional[RawRequestInfo] = None,
546569
) -> AsyncGenerator[Union[EmbeddingResponse, ErrorResponse], None]:
547-
self._validate_openai_serving_embedding()
570+
if error := self._validate_openai_serving_embedding():
571+
yield error
572+
return
548573

549574
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
550575
raw_request_info
@@ -566,7 +591,9 @@ async def transcriptions(
566591
request: TranscriptionRequest,
567592
raw_request_info: Optional[RawRequestInfo] = None,
568593
) -> AsyncGenerator[Union[str, TranscriptionResponse, ErrorResponse], None]:
569-
self._validate_openai_serving_transcription()
594+
if error := self._validate_openai_serving_transcription():
595+
yield error
596+
return
570597

571598
# Extract audio data from the request file
572599
audio_data = await request.file.read()
@@ -600,7 +627,9 @@ async def score(
600627
request: ScoreRequest,
601628
raw_request_info: Optional[RawRequestInfo] = None,
602629
) -> AsyncGenerator[Union[ScoreResponse, ErrorResponse], None]:
603-
self._validate_openai_serving_scores()
630+
if error := self._validate_openai_serving_scores():
631+
yield error
632+
return
604633

605634
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
606635
raw_request_info
@@ -620,7 +649,9 @@ async def tokenize(
620649
request: TokenizeRequest,
621650
raw_request_info: Optional[RawRequestInfo] = None,
622651
) -> AsyncGenerator[Union[TokenizeResponse, ErrorResponse], None]:
623-
self._validate_openai_serving_tokenization()
652+
if error := self._validate_openai_serving_tokenization():
653+
yield error
654+
return
624655

625656
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
626657
raw_request_info
@@ -640,7 +671,9 @@ async def detokenize(
640671
request: DetokenizeRequest,
641672
raw_request_info: Optional[RawRequestInfo] = None,
642673
) -> AsyncGenerator[Union[DetokenizeResponse, ErrorResponse], None]:
643-
self._validate_openai_serving_tokenization()
674+
if error := self._validate_openai_serving_tokenization():
675+
yield error
676+
return
644677

645678
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
646679
raw_request_info

src/ray/core_worker/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,6 @@ ray_cc_library(
169169
"//src/ray/util:network_util",
170170
"@com_google_absl//absl/base:core_headers",
171171
"@com_google_absl//absl/synchronization",
172-
"@nlohmann_json",
173172
],
174173
)
175174

src/ray/core_worker/core_worker.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -856,10 +856,6 @@ CoreWorker::GetAllReferenceCounts() const {
856856
return counts;
857857
}
858858

859-
std::string CoreWorker::GetReferenceCounterDebugJson() const {
860-
return reference_counter_->ToJsonString();
861-
}
862-
863859
std::vector<TaskID> CoreWorker::GetPendingChildrenTasks(const TaskID &task_id) const {
864860
return task_manager_->GetPendingChildrenTasks(task_id);
865861
}

src/ray/core_worker/core_worker.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -398,12 +398,6 @@ class CoreWorker : public std::enable_shared_from_this<CoreWorker> {
398398
/// (local, submitted_task) reference counts. For debugging purposes.
399399
std::unordered_map<ObjectID, std::pair<size_t, size_t>> GetAllReferenceCounts() const;
400400

401-
/// Returns a JSON string representation of the internal state of the
402-
/// ReferenceCounter.
403-
/// NOTE: This is very expensive and must only be used for debugging.
404-
/// Please do NOT use this for production observability or testing.
405-
std::string GetReferenceCounterDebugJson() const;
406-
407401
/// Return all pending children task ids for a given parent task id.
408402
/// The parent task id should exist in the current worker.
409403
/// For debugging and testing only.

src/ray/core_worker/reference_counter.cc

Lines changed: 0 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "ray/core_worker/reference_counter.h"
1616

1717
#include <memory>
18-
#include <nlohmann/json.hpp>
1918
#include <string>
2019
#include <unordered_map>
2120
#include <unordered_set>
@@ -25,52 +24,6 @@
2524
#include "ray/util/logging.h"
2625
#include "ray/util/network_util.h"
2726

28-
using json = nlohmann::json;
29-
30-
namespace {
31-
32-
json AddressToJson(const ray::rpc::Address &address) {
33-
return {
34-
{"node_id", ray::NodeID::FromBinary(address.node_id()).Hex()},
35-
{"ip_address", address.ip_address()},
36-
{"port", address.port()},
37-
{"worker_id", ray::WorkerID::FromBinary(address.worker_id()).Hex()},
38-
};
39-
}
40-
41-
template <class Container>
42-
json IdContainerToJsonArray(const Container &c) {
43-
json output = json::array();
44-
for (const auto &id : c) {
45-
output.push_back(id.Hex());
46-
}
47-
return output;
48-
}
49-
50-
constexpr const char *LineageReconstructionEligibilityToString(
51-
ray::core::LineageReconstructionEligibility lre) noexcept {
52-
switch (lre) {
53-
case ray::core::LineageReconstructionEligibility::ELIGIBLE:
54-
return "ELIGIBLE";
55-
case ray::core::LineageReconstructionEligibility::INELIGIBLE_PUT:
56-
return "INELIGIBLE_PUT";
57-
case ray::core::LineageReconstructionEligibility::INELIGIBLE_NO_RETRIES:
58-
return "INELIGIBLE_NO_RETRIES";
59-
case ray::core::LineageReconstructionEligibility::INELIGIBLE_LOCAL_MODE:
60-
return "INELIGIBLE_LOCAL_MODE";
61-
case ray::core::LineageReconstructionEligibility::INELIGIBLE_LINEAGE_EVICTED:
62-
return "INELIGIBLE_LINEAGE_EVICTED";
63-
case ray::core::LineageReconstructionEligibility::INELIGIBLE_LINEAGE_DISABLED:
64-
return "INELIGIBLE_LINEAGE_DISABLED";
65-
case ray::core::LineageReconstructionEligibility::INELIGIBLE_REF_NOT_FOUND:
66-
return "INELIGIBLE_REF_NOT_FOUND";
67-
default:
68-
return "UNKNOWN";
69-
};
70-
};
71-
72-
}; // namespace
73-
7427
#define PRINT_REF_COUNT(it) \
7528
RAY_LOG(DEBUG) << "REF " << it->first << ": " << it->second.DebugString();
7629

@@ -1813,76 +1766,6 @@ std::string ReferenceCounter::DebugString() const {
18131766
return ss.str();
18141767
}
18151768

1816-
json ReferenceCounter::NestedReferenceCount::ToJson() const {
1817-
return {
1818-
{"contained_in_owned", IdContainerToJsonArray(contained_in_owned)},
1819-
{"contained_in_borrowed_ids", IdContainerToJsonArray(contained_in_borrowed_ids)},
1820-
{"contains", IdContainerToJsonArray(contains)}};
1821-
}
1822-
1823-
json ReferenceCounter::BorrowInfo::ToJson() const {
1824-
json stored_in_objects_json = json::array();
1825-
for (const auto &[object_id, addr] : stored_in_objects) {
1826-
stored_in_objects_json.push_back(
1827-
{{"object_id", object_id.Hex()}, {"address", AddressToJson(addr)}});
1828-
}
1829-
json borrowers_json = json::array();
1830-
for (const auto &address : borrowers) {
1831-
borrowers_json.push_back(AddressToJson(address));
1832-
}
1833-
return {{"stored_in_objects", stored_in_objects_json}, {"borrowers", borrowers_json}};
1834-
}
1835-
1836-
std::string ReferenceCounter::ToJsonString() const {
1837-
absl::MutexLock lock(&mutex_);
1838-
json ref_table_json = json::array();
1839-
for (const auto &[obj_id, reference] : object_id_refs_) {
1840-
ref_table_json.push_back({
1841-
{"object_id", obj_id.Hex()},
1842-
{"reference", reference.ToJson()},
1843-
});
1844-
}
1845-
json output = {{"rpc_address", AddressToJson(rpc_address_)},
1846-
{"reference_table", ref_table_json},
1847-
{"freed_objects", IdContainerToJsonArray(freed_objects_)},
1848-
{"reconstructable_owned_objects",
1849-
IdContainerToJsonArray(reconstructable_owned_objects_)},
1850-
{"objects_to_recover", IdContainerToJsonArray(objects_to_recover_)}};
1851-
return output.dump();
1852-
}
1853-
1854-
json ReferenceCounter::Reference::ToJson() const {
1855-
return {
1856-
{"call_site", call_site_},
1857-
{"object_size", object_size_},
1858-
{"locations", IdContainerToJsonArray(locations)},
1859-
{"owner_address", owner_address_ ? AddressToJson(*owner_address_) : json(nullptr)},
1860-
{"pinned_at_node_id",
1861-
pinned_at_node_id_ ? json(pinned_at_node_id_->Hex()) : json(nullptr)},
1862-
{"tensor_transport", tensor_transport_ ? json(*tensor_transport_) : json(nullptr)},
1863-
{"owned_by_us", owned_by_us_},
1864-
{"lineage_eligibility",
1865-
LineageReconstructionEligibilityToString(lineage_eligibility_)},
1866-
{"lineage_ref_count", lineage_ref_count},
1867-
{"local_ref_count", local_ref_count},
1868-
{"submitted_task_ref_count", submitted_task_ref_count},
1869-
{"nested_reference_count",
1870-
nested_reference_count ? nested_reference_count->ToJson() : json(nullptr)},
1871-
{"borrow_info", borrow_info ? borrow_info->ToJson() : json(nullptr)},
1872-
{"num_object_out_of_scope_or_freed_callbacks",
1873-
on_object_out_of_scope_or_freed_callbacks.size()},
1874-
{"num_object_ref_deleted_callbacks", object_ref_deleted_callbacks.size()},
1875-
{"publish_ref_removed", publish_ref_removed},
1876-
{"spilled_url", spilled_url},
1877-
{"spilled_node_id", spilled_node_id.Hex()},
1878-
{"spilled", spilled},
1879-
{"foreign_owner_already_monitoring", foreign_owner_already_monitoring},
1880-
{"has_nested_refs_report", has_nested_refs_to_report},
1881-
{"pending_creation", pending_creation_},
1882-
{"did_spill", did_spill},
1883-
};
1884-
}
1885-
18861769
std::string ReferenceCounter::Reference::DebugString() const {
18871770
std::stringstream ss;
18881771
ss << "Reference{borrowers: " << borrow().borrowers.size()

0 commit comments

Comments
 (0)