diff --git a/.bazelrc b/.bazelrc index c71651133b52..12e251b4e99d 100644 --- a/.bazelrc +++ b/.bazelrc @@ -10,7 +10,7 @@ build:linux --workspace_status_command="bash ./bazel/workspace_status.sh" build --action_env=RAY_BUILD_ENV ############################################################################### -# On Windows, provide: BAZEL_SH, and BAZEL_LLVM (if using clang-cl) +# On Windows, provide: BAZEL_SH, and BAZEL_LLVM # On all platforms, provide: PYTHON3_BIN_PATH=python ############################################################################### build:windows --action_env=PATH @@ -19,12 +19,12 @@ build --compilation_mode=opt # Using C++ 17 on all platforms. build:linux --host_cxxopt="-std=c++17" build:macos --host_cxxopt="-std=c++17" -build:clang-cl --host_cxxopt="-std=c++17" +build:clang-cl --host_cxxopt="-std:c++17" build:msvc-cl --host_cxxopt="/std:c++17" build:windows --host_cxxopt="/std:c++17" build:linux --cxxopt="-std=c++17" build:macos --cxxopt="-std=c++17" -build:clang-cl --cxxopt="-std=c++17" +build:clang-cl --cxxopt="-std:c++17" build:msvc-cl --cxxopt="/std:c++17" build:windows --cxxopt="/std:c++17" # This workaround is needed to prevent Bazel from compiling the same file twice (once PIC and once not). @@ -51,10 +51,10 @@ build:windows --enable_runfiles # TODO(mehrdadn): Revert the "-\\.(asm|S)$" exclusion when this Bazel bug # for compiling assembly files is fixed on Windows: # https://github.com/bazelbuild/bazel/issues/8924 -# Warnings should be errors -build:linux --per_file_copt="-\\.(asm|S)$@-Werror" -build:macos --per_file_copt="-\\.(asm|S)$@-Werror" -build:clang-cl --per_file_copt="-\\.(asm|S)$@-Werror" +# Warnings should be errors, except for deprecated protobuf CreateMessage use +build:linux --per_file_copt="-\\.(asm|S)$@-Werror,-Wno-deprecated-declarations" +build:macos --per_file_copt="-\\.(asm|S)$@-Werror,-Wno-deprecated-declarations" +build:clang-cl --per_file_copt="-\\.(asm|S)$@-Werror,-Wno-deprecated-declarations" build:msvc-cl --per_file_copt="-\\.(asm|S)$@-WX" # Ignore warnings for protobuf generated files and external projects. build --per_file_copt="\\.pb\\.cc$@-w" @@ -238,3 +238,5 @@ try-import %workspace%/.user.bazelrc build:macos --sandbox_block_path=/usr/local/ # This option controls whether javac checks for missing direct dependencies. build --experimental_strict_java_deps=off +build:clang-cl --per_file_copt="-external/com_github_redis_hiredis/ssl.c$@-Wno-parenthesis,-Wno-int-conversion" +build --local_cpu_resources=HOST_CPUS*0.75 diff --git a/.buildkite/core.rayci.yml b/.buildkite/core.rayci.yml index 71ed4084c0e2..d7f70478ce16 100644 --- a/.buildkite/core.rayci.yml +++ b/.buildkite/core.rayci.yml @@ -204,7 +204,7 @@ steps: tags: - python - skip-on-premerge - instance_type: medium + instance_type: large commands: - bazel run //ci/ray_ci:test_in_docker -- //python/ray/tests/... core --install-mask all-ray-libraries diff --git a/WORKSPACE b/WORKSPACE index 20fd81787491..b0b2515c1e46 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -64,6 +64,7 @@ python_register_toolchains( name = "python3_9", python_version = "3.9", register_toolchains = False, + ignore_root_user_error = True, ) load("@python3_9//:defs.bzl", python39 = "interpreter") diff --git a/bazel/ray_deps_setup.bzl b/bazel/ray_deps_setup.bzl index 13a6a96c162e..cfdddf2ae914 100644 --- a/bazel/ray_deps_setup.bzl +++ b/bazel/ray_deps_setup.bzl @@ -86,20 +86,16 @@ def auto_http_archive( def ray_deps_setup(): # Explicitly bring in protobuf dependency to work around # https://github.com/ray-project/ray/issues/14117 - # This is copied from grpc's bazel/grpc_deps.bzl - # - # Pinned grpc version: v23.4 http_archive( name = "com_google_protobuf", - sha256 = "76a33e2136f23971ce46c72fd697cd94dc9f73d56ab23b753c3e16854c90ddfd", - strip_prefix = "protobuf-2c5fa078d8e86e5f4bd34e6f4c9ea9e8d7d4d44a", + sha256 = "b2340aa47faf7ef10a0328190319d3f3bee1b24f426d4ce8f4253b6f27ce16db", + strip_prefix = "protobuf-28.2", urls = [ - "https://github.com/protocolbuffers/protobuf/archive/2c5fa078d8e86e5f4bd34e6f4c9ea9e8d7d4d44a.tar.gz", + "https://github.com/protocolbuffers/protobuf/archive/refs/tags/v28.2.tar.gz", ], patches = [ - "@com_github_grpc_grpc//third_party:protobuf.patch", + "@io_ray//thirdparty/patches:protobuf-windows-const-nan.patch", ], - patch_args = ["-p1"], ) # NOTE(lingxuan.zlx): 3rd party dependencies could be accessed, so it suggests @@ -250,10 +246,10 @@ def ray_deps_setup(): # TODO(owner): Upgrade abseil to latest version after protobuf updated, which requires to upgrade `rules_cc` first. auto_http_archive( name = "com_google_absl", - sha256 = "987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed", - strip_prefix = "abseil-cpp-20230802.1", + sha256 = "f50e5ac311a81382da7fa75b97310e4b9006474f9560ac46f54a9967f07d4ae3", + strip_prefix = "abseil-cpp-20240722.0", urls = [ - "https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/refs/tags/20240722.0.tar.gz", ], patches = [ # TODO (israbbani): #55430 Separate the compiler flags and remove this patch @@ -278,10 +274,11 @@ def ray_deps_setup(): auto_http_archive( name = "com_github_grpc_grpc", # NOTE: If you update this, also update @boringssl's hash. - url = "https://github.com/grpc/grpc/archive/refs/tags/v1.57.1.tar.gz", - sha256 = "0762f809b9de845e6a7c809cabccad6aa4143479fd43b396611fe5a086c0aeeb", + url = "https://github.com/grpc/grpc/archive/refs/tags/v1.67.1.tar.gz", + sha256 = "d74f8e99a433982a12d7899f6773e285c9824e1d9a173ea1d1fb26c9bd089299", patches = [ "@io_ray//thirdparty/patches:grpc-cython-copts.patch", + "@io_ray//thirdparty/patches:grpc-avoid-goaway-messages.patch", "@io_ray//thirdparty/patches:grpc-zlib-fdopen.patch", "@io_ray//thirdparty/patches:grpc-configurable-thread-count.patch", ], @@ -323,13 +320,13 @@ def ray_deps_setup(): http_archive( # This rule is used by @com_github_grpc_grpc, and using a GitHub mirror # provides a deterministic archive hash for caching. Explanation here: - # https://github.com/grpc/grpc/blob/1ff1feaa83e071d87c07827b0a317ffac673794f/bazel/grpc_deps.bzl#L189 # Ensure this rule matches the rule used by grpc's bazel/grpc_deps.bzl + # https://github.com/grpc/grpc/blob/v1.67.1/bazel/grpc_deps.bzl#L33 name = "boringssl", - sha256 = "0675a4f86ce5e959703425d6f9063eaadf6b61b7f3399e77a154c0e85bad46b1", - strip_prefix = "boringssl-342e805bc1f5dfdd650e3f031686d6c939b095d9", + sha256 = "c70d519e4ee709b7a74410a5e3a937428b8198d793a3d771be3dd2086ae167c8", + strip_prefix = "boringssl-b8b3e6e11166719a8ebfa43c0cde9ad7d57a84f6", urls = [ - "https://github.com/google/boringssl/archive/342e805bc1f5dfdd650e3f031686d6c939b095d9.tar.gz", + "https://github.com/google/boringssl/archive/b8b3e6e11166719a8ebfa43c0cde9ad7d57a84f6.tar.gz", ], ) @@ -345,6 +342,7 @@ def ray_deps_setup(): urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.19.4.tar.gz"], sha256 = "3bd7828aa5af4b13b99c191e8b1e884ebfa9ad371b0ce264605d347f135d2568", ) + auto_http_archive( name = "rules_proto_grpc", url = "https://github.com/rules-proto-grpc/rules_proto_grpc/archive/a74fef39c5fe636580083545f76d1eab74f6450d.tar.gz", diff --git a/src/ray/core_worker/tests/task_event_buffer_export_event_test.cc b/src/ray/core_worker/tests/task_event_buffer_export_event_test.cc index 066ad5e4c965..48370c714550 100644 --- a/src/ray/core_worker/tests/task_event_buffer_export_event_test.cc +++ b/src/ray/core_worker/tests/task_event_buffer_export_event_test.cc @@ -153,7 +153,7 @@ TEST_F(TaskEventTestWriteExport, TestWriteTaskExportEvents) { auto task_ids = GenTaskIDs(num_events); google::protobuf::util::JsonPrintOptions options; options.preserve_proto_field_names = true; - options.always_print_primitive_fields = true; + options.always_print_fields_with_no_presence = true; std::vector source_types = { rpc::ExportEvent_SourceType::ExportEvent_SourceType_EXPORT_TASK}; diff --git a/src/ray/gcs/gcs_server/tests/gcs_actor_manager_test.cc b/src/ray/gcs/gcs_server/tests/gcs_actor_manager_test.cc index 1c4a60bff97c..2d9cb3ad434d 100644 --- a/src/ray/gcs/gcs_server/tests/gcs_actor_manager_test.cc +++ b/src/ray/gcs/gcs_server/tests/gcs_actor_manager_test.cc @@ -1224,8 +1224,7 @@ TEST_F(GcsActorManagerTest, TestGetAllActorInfoFilters) { rpc::GetAllActorInfoRequest request; request.mutable_filters()->set_actor_id(actor->GetActorID().Binary()); - auto &reply = - *google::protobuf::Arena::CreateMessage(&arena); + auto &reply = *google::protobuf::Arena::Create(&arena); gcs_actor_manager_->HandleGetAllActorInfo(request, &reply, callback); ASSERT_EQ(reply.actor_table_data().size(), 1); ASSERT_EQ(reply.total(), 1 + num_other_actors); @@ -1237,8 +1236,7 @@ TEST_F(GcsActorManagerTest, TestGetAllActorInfoFilters) { rpc::GetAllActorInfoRequest request; request.mutable_filters()->set_job_id(job_id.Binary()); - auto &reply = - *google::protobuf::Arena::CreateMessage(&arena); + auto &reply = *google::protobuf::Arena::Create(&arena); gcs_actor_manager_->HandleGetAllActorInfo(request, &reply, callback); ASSERT_EQ(reply.actor_table_data().size(), 1); ASSERT_EQ(reply.num_filtered(), num_other_actors); @@ -1249,8 +1247,7 @@ TEST_F(GcsActorManagerTest, TestGetAllActorInfoFilters) { rpc::GetAllActorInfoRequest request; request.mutable_filters()->set_state(rpc::ActorTableData::ALIVE); - auto &reply = - *google::protobuf::Arena::CreateMessage(&arena); + auto &reply = *google::protobuf::Arena::Create(&arena); gcs_actor_manager_->HandleGetAllActorInfo(request, &reply, callback); ASSERT_EQ(reply.actor_table_data().size(), 1); ASSERT_EQ(reply.num_filtered(), num_other_actors); @@ -1262,8 +1259,7 @@ TEST_F(GcsActorManagerTest, TestGetAllActorInfoFilters) { request.mutable_filters()->set_state(rpc::ActorTableData::ALIVE); request.mutable_filters()->set_job_id(job_id.Binary()); - auto &reply = - *google::protobuf::Arena::CreateMessage(&arena); + auto &reply = *google::protobuf::Arena::Create(&arena); gcs_actor_manager_->HandleGetAllActorInfo(request, &reply, callback); ASSERT_EQ(reply.actor_table_data().size(), 1); ASSERT_EQ(reply.num_filtered(), num_other_actors); @@ -1273,8 +1269,7 @@ TEST_F(GcsActorManagerTest, TestGetAllActorInfoFilters) { request.mutable_filters()->set_state(rpc::ActorTableData::DEAD); request.mutable_filters()->set_job_id(job_id.Binary()); - auto &reply = - *google::protobuf::Arena::CreateMessage(&arena); + auto &reply = *google::protobuf::Arena::Create(&arena); gcs_actor_manager_->HandleGetAllActorInfo(request, &reply, callback); ASSERT_EQ(reply.num_filtered(), num_other_actors + 1); ASSERT_EQ(reply.actor_table_data().size(), 0); @@ -1297,14 +1292,13 @@ TEST_F(GcsActorManagerTest, TestGetAllActorInfoLimit) { { rpc::GetAllActorInfoRequest request; auto &reply = - *google::protobuf::Arena::CreateMessage(&arena); + *google::protobuf::Arena::Create(&arena); auto callback = [](Status, std::function, std::function) {}; gcs_actor_manager_->HandleGetAllActorInfo(request, &reply, callback); ASSERT_EQ(reply.actor_table_data().size(), 3); request.set_limit(2); - auto &reply_2 = - *google::protobuf::Arena::CreateMessage(&arena); + auto &reply_2 = *google::protobuf::Arena::Create(&arena); gcs_actor_manager_->HandleGetAllActorInfo(request, &reply_2, callback); ASSERT_EQ(reply_2.actor_table_data().size(), 2); ASSERT_EQ(reply_2.total(), 3); diff --git a/src/ray/rpc/server_call.h b/src/ray/rpc/server_call.h index 698767d7e25b..cdbbb5941801 100644 --- a/src/ray/rpc/server_call.h +++ b/src/ray/rpc/server_call.h @@ -190,7 +190,7 @@ class ServerCallImpl : public ServerCall { cluster_id_(cluster_id), start_time_(0), record_metrics_(record_metrics) { - reply_ = google::protobuf::Arena::CreateMessage(&arena_); + reply_ = google::protobuf::Arena::Create(&arena_); // TODO(Yi Cheng) call_name_ sometimes get corrunpted due to memory issues. RAY_CHECK(!call_name_.empty()) << "Call name is empty"; if (record_metrics_) { diff --git a/src/ray/util/event.cc b/src/ray/util/event.cc index bcb4422c6502..10a87d0a7b22 100644 --- a/src/ray/util/event.cc +++ b/src/ray/util/event.cc @@ -138,7 +138,7 @@ std::string LogEventReporter::ExportEventToString(const rpc::ExportEvent &export google::protobuf::util::JsonPrintOptions options; options.preserve_proto_field_names = true; // Required so enum with value 0 is not omitted - options.always_print_primitive_fields = true; + options.always_print_fields_with_no_presence = true; if (export_event.has_task_event_data()) { RAY_CHECK(google::protobuf::util::MessageToJsonString( export_event.task_event_data(), &event_data_as_string, options) diff --git a/thirdparty/patches/grpc-avoid-goaway-messages.patch b/thirdparty/patches/grpc-avoid-goaway-messages.patch new file mode 100644 index 000000000000..0cd558aa0a0e --- /dev/null +++ b/thirdparty/patches/grpc-avoid-goaway-messages.patch @@ -0,0 +1,19 @@ +diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.cc b/src/core/ext/transport/chttp2/transport/chttp2_transport.cc +index d3c5d994c1..1edc7ae29d 100644 +--- src/core/ext/transport/chttp2/transport/chttp2_transport.cc ++++ src/core/ext/transport/chttp2/transport/chttp2_transport.cc +@@ -1176,13 +1176,6 @@ void grpc_chttp2_add_incoming_goaway(grpc_chttp2_transport* t, + GRPC_TRACE_LOG(http, INFO) + << "transport " << t << " got goaway with last stream id " + << last_stream_id; +- // We want to log this irrespective of whether http tracing is enabled if we +- // received a GOAWAY with a non NO_ERROR code. +- if (goaway_error != GRPC_HTTP2_NO_ERROR) { +- LOG(INFO) << t->peer_string.as_string_view() << ": Got goaway [" +- << goaway_error +- << "] err=" << grpc_core::StatusToString(t->goaway_error); +- } + if (t->is_client) { + cancel_unstarted_streams(t, t->goaway_error, false); + // Cancel all unseen streams + diff --git a/thirdparty/patches/grpc-configurable-thread-count.patch b/thirdparty/patches/grpc-configurable-thread-count.patch index 26387f51a5be..f1ae73ec4a1d 100644 --- a/thirdparty/patches/grpc-configurable-thread-count.patch +++ b/thirdparty/patches/grpc-configurable-thread-count.patch @@ -1,7 +1,6 @@ -diff --git src/core/lib/gpr/linux/cpu.cc b/src/core/lib/gpr/linux/cpu.cc -index 670ca6551c..043021dc4a 100644 ---- src/core/lib/gpr/linux/cpu.cc -+++ src/core/lib/gpr/linux/cpu.cc +diff --git src/core/util/linux/cpu.cc src/core/util/linux/cpu.cc +--- src/core/util/linux/cpu.cc ++++ src/core/util/linux/cpu.cc @@ -24,6 +24,7 @@ #ifdef GPR_CPU_LINUX @@ -10,7 +9,7 @@ index 670ca6551c..043021dc4a 100644 #include #include #include -@@ -49,7 +50,17 @@ static void init_num_cpus() { +@@ -50,7 +51,17 @@ static void init_num_cpus() { #endif // This must be signed. sysconf returns -1 when the number cannot be // determined @@ -27,5 +26,5 @@ index 670ca6551c..043021dc4a 100644 + ncpus = static_cast(sysconf(_SC_NPROCESSORS_CONF)); + } if (ncpus < 1) { - gpr_log(GPR_ERROR, "Cannot determine number of CPUs: assuming 1"); + LOG(ERROR) << "Cannot determine number of CPUs: assuming 1"; ncpus = 1; diff --git a/thirdparty/patches/grpc-zlib-fdopen.patch b/thirdparty/patches/grpc-zlib-fdopen.patch index 83dfba2b95ff..e69de29bb2d1 100644 --- a/thirdparty/patches/grpc-zlib-fdopen.patch +++ b/thirdparty/patches/grpc-zlib-fdopen.patch @@ -1,13 +0,0 @@ -diff -u bazel/grpc_deps.bzl ---- bazel/grpc_deps.bzl -+++ bazel/grpc_deps.bzl -@@ -238,6 +238,9 @@ - "https://storage.googleapis.com/grpc-bazel-mirror/github.com/madler/zlib/archive/04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz", - "https://github.com/madler/zlib/archive/04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz", - ], -+ patches = [ -+ "@io_ray//thirdparty/patches:zlib-fdopen.patch", -+ ] - ) - - if "com_google_protobuf" not in native.existing_rules(): diff --git a/thirdparty/patches/protobuf-windows-const-nan.patch b/thirdparty/patches/protobuf-windows-const-nan.patch new file mode 100644 index 000000000000..99b548f49f83 --- /dev/null +++ b/thirdparty/patches/protobuf-windows-const-nan.patch @@ -0,0 +1,46 @@ +diff -u upb/message/internal/message.c /tmp/message.c +--- upb/message/internal/message.c ++++ upb/message/internal/message.c +@@ -19,6 +19,26 @@ + + const float kUpb_FltInfinity = INFINITY; + const double kUpb_Infinity = INFINITY; +-const double kUpb_NaN = NAN; ++ ++// The latest win32 SDKs have an invalid definition of NAN. ++// https://developercommunity.visualstudio.com/t/NAN-is-no-longer-compile-time-constant-i/10688907 ++// ++// Unfortunately, the `0.0 / 0.0` workaround doesn't work in Clang under C23, so ++// try __builtin_nan first, if that exists. ++#ifdef _WIN32 ++#ifdef __has_builtin ++#if __has_builtin(__builtin_nan) ++#define UPB_NAN __builtin_nan("0") ++#endif ++#endif ++#ifndef UPB_NAN ++#define UPB_NAN 0.0 / 0.0 ++#endif ++#else ++// For !_WIN32, assume math.h works. ++#define UPB_NAN NAN ++#endif ++const double kUpb_NaN = UPB_NAN; ++ + + bool UPB_PRIVATE(_upb_Message_Realloc)(struct upb_Message* msg, size_t need, + +diff --git .bazelrc .bazelrc +--- .bazelrc ++++ .bazelrc +@@ -1,4 +1,9 @@ +-build --cxxopt=-std=c++17 --host_cxxopt=-std=c++17 ++build --enable_platform_specific_config ++build:linux --cxxopt=-std=c++17 --host_cxxopt=-std=c++17 ++build:macos --cxxopt=-std=c++17 --host_cxxopt=-std=c++17 ++build:windows --compiler=clang-cl ++build:windows --cxxopt=-Wno-invalid-offsetof ++build:windows --cxxopt=-std:c++17 --host_cxxopt=-std:c++17 + + build:dbg --compilation_mode=dbg +