diff --git a/bin/Index/Importer.cpp b/bin/Index/Importer.cpp index e084074b3..25fdbe5a0 100644 --- a/bin/Index/Importer.cpp +++ b/bin/Index/Importer.cpp @@ -577,7 +577,8 @@ static const std::string_view kMLLVM("-mllvm"); static bool IsOptNeedingFixing(std::string_view arg) { return arg == kOptNoStdInc || arg == kOptNoStdIncxx || arg == kOptNoBuiltinInc || arg == kOptNoStdSystemInc || - arg.starts_with("-fsanitize="); + arg.starts_with("-fsanitize=") || + arg.starts_with("-mrelocation-model"); } static bool IsOpt1NeedingFixing(std::string_view arg) { diff --git a/bin/Index/Main.cpp b/bin/Index/Main.cpp index 8236d7fbf..8c1befafb 100644 --- a/bin/Index/Main.cpp +++ b/bin/Index/Main.cpp @@ -71,6 +71,9 @@ DEFINE_string(workspace, "mx-workspace", DEFINE_bool(generate_sourceir, false, "Generate SourceIR from the top-level declarations"); +DEFINE_bool(fork_mode, false, "Use --fork_mode if running inside docker"); +DEFINE_bool(reproc_mode, false, "Use --reproc_mode to use reproc library"); + namespace { std::unique_ptr @@ -165,6 +168,8 @@ int main(int argc, char *argv[], char *envp[]) { << " [--env PATH_TO_COPIED_ENV_VARS]\n" << " [--show_progress]\n" << " [--generate_sourceir]\n" + << " --fork_mode\n" + << " --reproc_mode\n" << " --db DATABASE\n" << " --workspace INDEXER_WORKSPACE_DIR\n" << " --target COMPILE_COMMANDS\n"; @@ -187,7 +192,13 @@ int main(int argc, char *argv[], char *envp[]) { if (FLAGS_target.empty()) { std::cerr << "Must specify a path to a target file to import with --target. " - "Use - or /dev/stdin to read from stdin."; + "Use - or /dev/stdin to read from stdin.\n"; + return EXIT_FAILURE; + } + + if (!FLAGS_reproc_mode && !FLAGS_fork_mode) { + std::cerr + << "Must specify --reproc_mode or --fork_mode\n"; return EXIT_FAILURE; } diff --git a/bin/Index/Subprocess.cpp b/bin/Index/Subprocess.cpp index 18f864002..9e7dfed83 100644 --- a/bin/Index/Subprocess.cpp +++ b/bin/Index/Subprocess.cpp @@ -2,25 +2,373 @@ // // This source code is licensed in accordance with the terms specified in // the LICENSE file found in the root directory of this source tree. - #include "Subprocess.h" #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include -namespace indexer { +DECLARE_bool(fork_mode); +DECLARE_bool(reproc_mode); +namespace indexer { // Execute the command specified in `args` with the ability to feed the // command input and capture output. Passing `nullptr` to any of `input`, // `output`, or `error` is acceptable. -std::variant Subprocess::Execute( - const std::vector &cmd, - const std::unordered_map *env, - std::string *input, std::string *output, std::string *error) { +// Constants for timeouts +constexpr int TIMEOUT_MS = 300000; // 5 minutes +constexpr int GRACEFUL_TERMINATION_MS = 100000; // 100ms +constexpr size_t IO_BUFFER_SIZE = 4096; // Buffer size for read/write + +// Helper to check system calls and return error codes +#define CHECK_SYSTEM_CALL(call, cleanup) \ + if ((call) == -1) { \ + cleanup(); \ + return std::make_error_code(static_cast(errno)); \ + } + +// Struct to manage a single pipe (stdin, stdout, or stderr) +struct PipeHandler { + int fd = -1; // Pipe file descriptor + bool done = false; // Stream completion status + std::string* buffer = nullptr; // Output buffer (for stdout/stderr) + const char* input_data = nullptr; // Input data (for stdin) + size_t input_pos = 0; // Current position (for stdin) + size_t input_remaining = 0; // Remaining bytes (for stdin) + + // Setup pollfd for this pipe + void setup_poll(pollfd& pfd) const { + pfd = {fd, static_cast(buffer ? POLLIN : POLLOUT), 0}; + } + + // Handle I/O for this pipe (read for stdout/stderr, write for stdin) + void handle_io(std::vector& poll_fds, size_t poll_index, + std::array& buffer) { + if (done || !(poll_fds[poll_index].revents & (POLLIN | POLLOUT | POLLHUP | POLLERR | POLLNVAL))) { + return; + } + + if (poll_fds[poll_index].revents & (POLLHUP | POLLERR | POLLNVAL)) { + close(fd); + fd = -1; + done = true; + poll_fds.erase(poll_fds.begin() + static_cast(poll_index)); + return; + } + + if (this->buffer) { // Read from stdout/stderr (buffer is non-null) + ssize_t bytes_read = read(fd, buffer.data(), buffer.size()); + if (bytes_read > 0) { + this->buffer->append(buffer.data(), static_cast(bytes_read)); + } else if (bytes_read == 0 || (bytes_read == -1 && errno != EAGAIN && errno != EINTR)) { + close(fd); + fd = -1; + done = true; + poll_fds.erase(poll_fds.begin() + static_cast(poll_index)); + } + } else { // Write to stdin (buffer is null) + ssize_t written = write(fd, input_data + input_pos, input_remaining); + if (written > 0) { + input_pos += static_cast(written); + input_remaining -= static_cast(written); + if (input_remaining == 0) { + close(fd); + fd = -1; + done = true; + poll_fds.erase(poll_fds.begin() + static_cast(poll_index)); + } + } else if (written == 0 || (written == -1 && errno != EAGAIN && errno != EINTR)) { + close(fd); + fd = -1; + done = true; + poll_fds.erase(poll_fds.begin() + static_cast(poll_index)); + } + } + } +}; + +// Setup pipes for stdin, stdout, stderr +std::error_code setup_pipes(int stdin_pipe[2], int stdout_pipe[2], int stderr_pipe[2], + std::string* input, std::string* output, std::string* error, + std::function cleanup) { + if (input && !input->empty()) { + CHECK_SYSTEM_CALL(pipe(stdin_pipe), cleanup); + } + if (output) { + CHECK_SYSTEM_CALL(pipe(stdout_pipe), cleanup); + } + if (error) { + CHECK_SYSTEM_CALL(pipe(stderr_pipe), cleanup); + } + return {}; +} + +// Configure child process (pipe redirection, environment, exec) +void handle_child(const std::vector& cmd, + const std::unordered_map* env, + int stdin_pipe[2], int stdout_pipe[2], int stderr_pipe[2], + std::string* input, std::string* output, std::string* error) { + try { + // Setup stdin + if (input && !input->empty()) { + close(stdin_pipe[1]); + if (dup2(stdin_pipe[0], STDIN_FILENO) == -1) _exit(1); + close(stdin_pipe[0]); + } + + // Setup stdout + if (output) { + close(stdout_pipe[0]); + if (dup2(stdout_pipe[1], STDOUT_FILENO) == -1) _exit(1); + close(stdout_pipe[1]); + } else { + int dev_null = open("/dev/null", O_WRONLY); + if (dev_null == -1 || dup2(dev_null, STDOUT_FILENO) == -1) _exit(1); + close(dev_null); + } + + // Setup stderr + if (error) { + close(stderr_pipe[0]); + if (dup2(stderr_pipe[1], STDERR_FILENO) == -1) _exit(1); + close(stderr_pipe[1]); + } else { + int dev_null = open("/dev/null", O_WRONLY); + if (dev_null == -1 || dup2(dev_null, STDERR_FILENO) == -1) _exit(1); + close(dev_null); + } + + // Setup environment variables + if (env) { + for (const auto& kv : *env) { + if (setenv(kv.first.c_str(), kv.second.c_str(), 1) != 0) { + _exit(1); + } + } + } + + // Prepare arguments for execvp + std::vector args; + args.reserve(cmd.size() + 1); + for (const auto& arg : cmd) { + args.push_back(const_cast(arg.c_str())); + } + args.push_back(nullptr); + + execvp(cmd[0].c_str(), args.data()); + _exit(1); // execvp failed + } catch (...) { + _exit(1); // Catch exceptions in child + } +} + +// Handle parent process I/O (non-blocking read/write with polling) +std::error_code handle_parent_io(pid_t child_pid, int stdin_pipe[2], int stdout_pipe[2], + int stderr_pipe[2], std::string* input, + std::string* output, std::string* error, + std::function cleanup) { + // Close unused pipe ends + if (stdin_pipe[0] != -1) close(stdin_pipe[0]); + if (stdout_pipe[1] != -1) close(stdout_pipe[1]); + if (stderr_pipe[1] != -1) close(stderr_pipe[1]); + + // Set pipes to non-blocking + if (input && !input->empty()) { + CHECK_SYSTEM_CALL(fcntl(stdin_pipe[1], F_SETFL, O_NONBLOCK), cleanup); + } + if (output) { + CHECK_SYSTEM_CALL(fcntl(stdout_pipe[0], F_SETFL, O_NONBLOCK), cleanup); + } + if (error) { + CHECK_SYSTEM_CALL(fcntl(stderr_pipe[0], F_SETFL, O_NONBLOCK), cleanup); + } + + // Initialize pipe handlers + std::string stdout_buf, stderr_buf; + std::vector pipes = { + {stdin_pipe[1], !input || input->empty(), nullptr, input ? input->data() : nullptr, + 0, input ? input->size() : 0}, + {stdout_pipe[0], !output, output ? &stdout_buf : nullptr}, + {stderr_pipe[0], !error, error ? &stderr_buf : nullptr} + }; + + // I/O buffer + std::array buffer; + + // Main I/O loop + while (std::any_of(pipes.begin(), pipes.end(), [](const auto& p) { return !p.done; })) { + std::vector poll_fds; + for (const auto& pipe : pipes) { + if (!pipe.done) { + pollfd pfd; + pipe.setup_poll(pfd); + poll_fds.push_back(pfd); + } + } + + int poll_result = poll(poll_fds.data(), poll_fds.size(), TIMEOUT_MS); + if (poll_result == 0) { + kill(child_pid, SIGTERM); + usleep(GRACEFUL_TERMINATION_MS); + kill(child_pid, SIGKILL); + cleanup(); + return std::make_error_code(std::errc::timed_out); + } else if (poll_result < 0 && errno != EINTR) { + cleanup(); + return std::make_error_code(static_cast(errno)); + } + + size_t poll_index = 0; + for (size_t i = 0; i < pipes.size(); ++i) { + if (!pipes[i].done) { + pipes[i].handle_io(poll_fds, poll_index++, buffer); + } + } + } + + // Move output buffers + if (output) *output = std::move(stdout_buf); + if (error) *error = std::move(stderr_buf); + + return {}; +} + +// Wait for child process and get exit status +std::variant wait_for_child(pid_t child_pid) { + int status; + struct timespec ts; + ts.tv_sec = TIMEOUT_MS / 1000; + ts.tv_nsec = (TIMEOUT_MS % 1000) * 1000000; + + siginfo_t info; + int wait_result = waitid(P_PID, static_cast(child_pid), &info, WEXITED | WNOWAIT); + if (wait_result == -1) { + if (errno == EINTR) { + if (waitpid(child_pid, &status, 0) == -1) { + return std::make_error_code(static_cast(errno)); + } + } else { + return std::make_error_code(static_cast(errno)); + } + } else { + if (waitpid(child_pid, &status, 0) == -1) { + return std::make_error_code(static_cast(errno)); + } + } + + if (WIFEXITED(status)) { + return WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + return 128 + WTERMSIG(status); + } + return 1; // Unknown error +} + +// Detect if running inside a Docker container +bool Subprocess::IsRunningInDocker() { + std::ifstream cgroup("/proc/self/cgroup"); + if (cgroup.is_open()) { + std::string line; + while (std::getline(cgroup, line)) { + if (line.find("docker") != std::string::npos) { + return true; + } + } + } + + // Check for .dockerenv file + std::ifstream dockerenv("/.dockerenv"); + if (dockerenv.good()) { + return true; + } + + return false; +} + +// Execute command using fork+exec approach +std::variant +Subprocess::ExecuteFork(const std::vector& cmd, + const std::unordered_map* env, + std::string* input, std::string* output, std::string* error) { + if (cmd.empty()) { + return std::make_error_code(std::errc::invalid_argument); + } + + // Initialize pipes + int stdin_pipe[2] = {-1, -1}; + int stdout_pipe[2] = {-1, -1}; + int stderr_pipe[2] = {-1, -1}; + + auto cleanup_pipes = [&]() { + for (int fd : {stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], + stdout_pipe[1], stderr_pipe[0], stderr_pipe[1]}) { + if (fd != -1) close(fd); + } + }; + + // Setup pipes + auto ec = setup_pipes(stdin_pipe, stdout_pipe, stderr_pipe, input, output, error, cleanup_pipes); + if (ec) { + return ec; + } + + // Fork the process + pid_t child_pid; + CHECK_SYSTEM_CALL(child_pid = fork(), cleanup_pipes); + + if (child_pid == 0) { + handle_child(cmd, env, stdin_pipe, stdout_pipe, stderr_pipe, input, output, error); + } + + // Parent process: handle I/O + ec = handle_parent_io(child_pid, stdin_pipe, stdout_pipe, stderr_pipe, input, output, error, cleanup_pipes); + if (ec) { + return ec; + } + + // Wait for child + return wait_for_child(child_pid); +} + +// Execute function that chooses the appropriate implementation +std::variant +Subprocess::Execute(const std::vector& cmd, + const std::unordered_map* env, + std::string* input, std::string* output, + std::string* error) { + // Check if running in Docker and use fork+exec if so + bool insideDocker = IsRunningInDocker(); + if (!FLAGS_reproc_mode) { + if (insideDocker) { + if (!FLAGS_fork_mode) { + LOG(ERROR) << "Docker environment found. Use --fork_mode"; + _exit(1); + } + LOG(INFO) << "Executing fork+exec way for Docker environments" << std::endl; + return ExecuteFork(cmd, env, input, output, error); + } + } + + // reproc implementation + if (insideDocker) { + LOG(INFO) << "Using reproc library for Docker environment" << std::endl; + } else { + LOG(INFO) << "Using reproc library for non-Docker environments" << std::endl; + } reproc::process process; reproc::arguments args(cmd); reproc::options options; @@ -52,9 +400,9 @@ std::variant Subprocess::Execute( if (input && !input->empty()) { size_t next_i = 0u; auto data = reinterpret_cast(input->data()); - for (auto max_i = input->size(); next_i < max_i; ) { - auto [num_written_bytes, write_ec] = process.write( - &(data[next_i]), max_i - next_i); + for (auto max_i = input->size(); next_i < max_i;) { + auto [num_written_bytes, write_ec] = + process.write(&(data[next_i]), max_i - next_i); if (write_ec) { return write_ec; } else { diff --git a/bin/Index/Subprocess.h b/bin/Index/Subprocess.h index 4cb69d69e..2f04f904c 100644 --- a/bin/Index/Subprocess.h +++ b/bin/Index/Subprocess.h @@ -25,6 +25,12 @@ class Subprocess { std::string *input=nullptr, std::string *output=nullptr, std::string *error=nullptr); + private: + static std::variant ExecuteFork( + const std::vector &cmd, + const std::unordered_map *env, + std::string *input, std::string *output, std::string *error); + static bool IsRunningInDocker(); }; } // namespace indexer diff --git a/docs/INDEXING.md b/docs/INDEXING.md index edbfe57e8..d0bc0717b 100644 --- a/docs/INDEXING.md +++ b/docs/INDEXING.md @@ -50,9 +50,14 @@ compile commands. --target /path/to/compile_commands.json \ # Compile commands --workspace /path/to/workspace \ # Workspace directory --env /path/to/environment-vars \ # Saved environment variables - --show_progress # Show progress indicators + --show_progress \ # Show progress indicators + --reproc_mode/--fork_mode # use reproc lib or traditional fork way ``` +**Note:** Use the `--reproc_mode` option to launch Multiplier with the `reproc` library. +Alternatively, the `--fork_mode` option enables the traditional `fork+exec` method for +executing programs. It also serves as a fallback when `--reproc_mode` fails, such as in certain Docker environments. + **Note:** You generally should run `mx-index` on the same machine / environment as the build. `mx-index` will invoke the original build compiler in the original build directories as an oracle to gather relevant information. If you aren't