Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions custom_ops/gpu_ops/beam_search_softmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "helper.h"
#include "stdint.h"
Expand Down
3 changes: 3 additions & 0 deletions custom_ops/gpu_ops/custom_ftok.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#pragma once

// POSIX-only: IPC headers
#if !defined(_WIN32)
#include <errno.h>
#include <stdio.h>
#include <sys/stat.h>
Expand All @@ -35,3 +37,4 @@ inline key_t custom_ftok(const char* path, int id) {
return static_cast<key_t>(((st.st_dev & 0x0f) << 28) |
((st.st_ino & 0xff) << 20) | (id & 0xfffff));
}
#endif
6 changes: 4 additions & 2 deletions custom_ops/gpu_ops/dequant_int8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "helper.h"

Expand Down
3 changes: 3 additions & 0 deletions custom_ops/gpu_ops/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#pragma once

#include <cstdlib>
#include <string>

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 建议 env.h 新增了 <cstdlib><string> 两个头文件,这不是注释或 #ifdef 守卫,而是修复了原有的隐式头文件依赖(std::getenv 需要 <cstdlib>std::string 需要 <string>)。

建议在 PR 描述的 Modifications 中补充说明此项改动,例如:

Added missing <cstdlib> and <string> includes to env.h to fix implicit header dependencies.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated PR description — the env.h portability fix (<cstdlib> + <string>) is now documented in the Modifications section.

inline uint32_t get_decoder_block_shape_q() {
static const char* decoder_block_shape_q_env =
std::getenv("FLAGS_dec_block_shape_q");
Expand Down
6 changes: 4 additions & 2 deletions custom_ops/gpu_ops/fused_get_rotary_embedding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "paddle/extension.h"

Expand Down
6 changes: 4 additions & 2 deletions custom_ops/gpu_ops/fused_hadamard_quant_fp8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <algorithm>
#include "helper.h"

Expand Down
8 changes: 8 additions & 0 deletions custom_ops/gpu_ops/get_data_ptr_ipc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "helper.h"

namespace {
#if !defined(_WIN32)
int sharedMemoryOpen2(const char *name, size_t sz, sharedMemoryInfo *info) {
info->size = sz;
info->shmFd = shm_open(name, O_RDWR, 0777);
Expand All @@ -31,10 +32,16 @@ int sharedMemoryOpen2(const char *name, size_t sz, sharedMemoryInfo *info) {

return 0;
}
#endif
} // namespace

std::vector<paddle::Tensor> GetDataPtrIpc(const paddle::Tensor &tmp_input,
const std::string &shm_name) {
#if defined(_WIN32)
PD_THROW(
"GetDataPtrIpc is not supported on Windows "
"(POSIX shared memory required).");
#else
auto out_data_ptr_tensor =
paddle::full({1}, 0, paddle::DataType::INT64, paddle::CPUPlace());
auto out_data_ptr_tensor_ptr = out_data_ptr_tensor.data<int64_t>();
Expand All @@ -53,6 +60,7 @@ std::vector<paddle::Tensor> GetDataPtrIpc(const paddle::Tensor &tmp_input,

out_data_ptr_tensor_ptr[0] = reinterpret_cast<int64_t>(ptr);
return {out_data_ptr_tensor};
#endif
}

PD_BUILD_STATIC_OP(get_data_ptr_ipc)
Expand Down
9 changes: 8 additions & 1 deletion custom_ops/gpu_ops/get_output.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
// POSIX-only: sys/ipc.h, sys/msg.h
#if !defined(_WIN32)
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "paddle/extension.h"

Expand All @@ -36,6 +39,9 @@ void GetOutput(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag,
int msg_queue_id) {
#if defined(_WIN32)
PD_THROW("GetOutput is not supported on Windows (POSIX IPC required).");
#else
if (rank_id > 0) {
return;
}
Expand Down Expand Up @@ -81,6 +87,7 @@ void GetOutput(const paddle::Tensor& x,
#endif

return;
#endif
}

void GetOutputStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
Expand Down
13 changes: 12 additions & 1 deletion custom_ops/gpu_ops/get_output_ep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "msg_utils.h"
#include "paddle/extension.h"
Expand All @@ -29,6 +31,10 @@
void GetOutputKVSignal(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag) {
#if defined(_WIN32)
PD_THROW(
"GetOutputKVSignal is not supported on Windows (POSIX IPC required).");
#else
int msg_queue_id = 1024;
if (const char* msg_que_str_tmp = std::getenv("INFERENCE_MSG_QUEUE_ID")) {
std::string msg_que_str(msg_que_str_tmp);
Expand Down Expand Up @@ -57,12 +63,16 @@ void GetOutputKVSignal(const paddle::Tensor& x,
out_data[i] = msg_rcv.mtext[i];
}
return;
#endif
}

void GetOutputEp(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag,
int msg_queue_id) {
#if defined(_WIN32)
PD_THROW("GetOutputEp is not supported on Windows (POSIX IPC required).");
#else
static struct msgdata msg_rcv;
if (const char* inference_msg_queue_id_env_p =
std::getenv("INFERENCE_MSG_QUEUE_ID")) {
Expand Down Expand Up @@ -108,6 +118,7 @@ void GetOutputEp(const paddle::Tensor& x,
#endif

return;
#endif
}

void GetOutputEPStatic(const paddle::Tensor& x,
Expand Down
8 changes: 7 additions & 1 deletion custom_ops/gpu_ops/get_output_msg_with_topk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "paddle/extension.h"

Expand All @@ -40,6 +42,9 @@ void GetOutputTopK(const paddle::Tensor& x,
int k,
int64_t rank_id,
bool wait_flag) {
#if defined(_WIN32)
PD_THROW("GetOutputTopK is not supported on Windows (POSIX IPC required).");
#else
static struct msgdata msg_rcv;
int msg_queue_id = 1;

Expand Down Expand Up @@ -101,6 +106,7 @@ void GetOutputTopK(const paddle::Tensor& x,
ranks_data[i] = (int64_t)msg_rcv.mtext_ranks[i];
}
return;
#endif
}

PD_BUILD_STATIC_OP(get_output_topk)
Expand Down
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@
#ifndef PADDLE_WITH_COREX
#include "glog/logging.h"
#endif
#include <fcntl.h>
#include <nvml.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
// POSIX-only headers
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <cassert>
#include <cstdlib>
#include <cstring>
Expand Down
7 changes: 5 additions & 2 deletions custom_ops/gpu_ops/msg_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@

#pragma once

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
// POSIX-only: shared memory headers
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/msg.h>
#include <unistd.h>
#endif
#include "paddle/extension.h"

#define MAX_BSZ 512
Expand Down
6 changes: 6 additions & 0 deletions custom_ops/gpu_ops/remote_cache_kv_ipc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ bool RemoteCacheKvIpc::kv_complete_signal_shmem_opened = false;
RemoteCacheKvIpc::save_cache_kv_complete_signal_layerwise_meta_data
RemoteCacheKvIpc::open_shm_and_get_complete_signal_meta_data(
const int rank_id, const int device_id, const bool keep_pd_step_flag) {
#if defined(_WIN32)
PD_THROW(
"open_shm_and_get_complete_signal_meta_data is not supported on "
"Windows (POSIX shared memory required).");
#else
if (RemoteCacheKvIpc::kv_complete_signal_shmem_opened) {
if (keep_pd_step_flag) {
return RemoteCacheKvIpc::kv_complete_signal_meta_data;
Expand Down Expand Up @@ -103,6 +108,7 @@ RemoteCacheKvIpc::open_shm_and_get_complete_signal_meta_data(
RemoteCacheKvIpc::kv_complete_signal_identity_ptr = identity_ptr;
RemoteCacheKvIpc::kv_complete_signal_shmem_opened = true;
return meta_data;
#endif
}

void CUDART_CB
Expand Down
21 changes: 18 additions & 3 deletions custom_ops/gpu_ops/remote_cache_kv_ipc.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@

#pragma once

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
// POSIX-only: mmap, shm_open
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/msg.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif

#include "custom_ftok.h"
#include "driver_types.h"
Expand Down Expand Up @@ -58,6 +61,11 @@ struct RemoteCacheKvIpc {
const int rank,
const int num_layers,
const int real_bsz) {
#if defined(_WIN32)
PD_THROW(
"RemoteCacheKvIpc::init is not supported on Windows "
"(POSIX IPC required).");
#else
layer_id_ = 0;
num_layers_ = num_layers;
msg_sed.mtype = 1;
Expand Down Expand Up @@ -85,9 +93,15 @@ struct RemoteCacheKvIpc {
msgid = msgget(key, IPC_CREAT | 0666);
inited = true;
}
#endif
}

void CUDART_CB send_signal() {
#if defined(_WIN32)
PD_THROW(
"RemoteCacheKvIpc::send_signal is not supported on Windows "
"(POSIX IPC required).");
#else
if (inited) {
msg_sed.mtext[1] = layer_id_;
if ((msgsnd(msgid, &msg_sed, (MAX_BSZ * 3 + 2) * 4, 0)) == -1) {
Expand All @@ -96,6 +110,7 @@ struct RemoteCacheKvIpc {
layer_id_ = (layer_id_ + 1);
assert(layer_id_ <= num_layers_);
}
#endif
}
};

Expand Down
8 changes: 7 additions & 1 deletion custom_ops/gpu_ops/save_output_msg_with_topk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#if !defined(_WIN32)
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#endif
#include "custom_ftok.h"
#include "paddle/extension.h"

Expand All @@ -42,6 +44,9 @@ void SaveOutMmsgTopK(const paddle::Tensor& x,
const paddle::Tensor& not_need_stop,
const paddle::Tensor& preempted_idx,
int64_t rank_id) {
#if defined(_WIN32)
PD_THROW("SaveOutMmsgTopK is not supported on Windows (POSIX IPC required).");
#else
if (rank_id > 0) {
return;
}
Expand Down Expand Up @@ -145,6 +150,7 @@ void SaveOutMmsgTopK(const paddle::Tensor& x,
printf("full msg buffer\n");
}
return;
#endif
}

PD_BUILD_STATIC_OP(save_output_topk)
Expand Down
Loading