diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d6002db..77fd5cef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ option(HAVE_PROMETHEUS_HISTOGRAM_BUCKETS "Enable Prometheus Histogram Buckets." option(ENABLE_PROFILING "Compile with -pg to enable gprof." OFF) set(DEFAULT_BUCKET_NAME "default" CACHE STRING "Name of the default bucket.") -set(BOOST_VERSION "1.80.0" CACHE STRING "Boost version") +set(BOOST_VERSION "1.83.0" CACHE STRING "Boost version") message(STATUS "HAVE_TESTS: ${HAVE_TESTS}") message(STATUS "HAVE_PYTHON_BINDINGS: ${HAVE_PYTHON_BINDINGS}") @@ -147,6 +147,18 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(magic_enum) +# # Capnproto +FetchContent_Declare( + CapnProto + GIT_REPOSITORY https://github.com/capnproto/capnproto + GIT_TAG v1.0.1 +) +set(CapnProto_CXXFLAGS "-Wno-deprecated") +set(CapnProto_BUILD_TESTING OFF CACHE INTERNAL "") +set(CapnProto_CAPNP_LITE ON CACHE INTERNAL "") +FetchContent_MakeAvailable(CapnProto) + + # S3 if(DEFINED ENV{AWS_SDK_INSTALL_DIR}) set(AWSSDK_ROOT $ENV{AWS_SDK_INSTALL_DIR}) diff --git a/DEPENDENCIES b/DEPENDENCIES index a17c2d04..b122f182 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -11,7 +11,7 @@ export AWS_SDK_VERSION="1.10.20" # GRPC Version export GRPC_VERSION="v1.51.1" # Boost Version -export BOOST_VERSION="1.80.0" +export BOOST_VERSION="1.83.0" # GEDS Docker Version export GEDS_DOCKER_VERSION="1.0" diff --git a/doc/GEDS Daemon Mode.md b/doc/GEDS Daemon Mode.md new file mode 100644 index 00000000..b0e54d72 --- /dev/null +++ b/doc/GEDS Daemon Mode.md @@ -0,0 +1,146 @@ +# GEDS Daemon Mode Redesign. + +GEDS shall be redesigned to support multiple local instances. A central daemon is responsible +for managing files and sharing files between other daemons. +GEDS clients write and read to a mounted CSI hostpath which shares the data between pods. +NVMeoF and GPU Direct RDMA should be supported in a second step. + +Goal: +- Enable data-sharing of GEDS objects between multiple Python processes. +- Enable data-sharing of GEDS objects across Kubernetes pods using a CSI hostpath. +- Data should survive crashes. +- Facilitate encryption. +- Enable reading/writing data to/from GPU and/or NVMe directly +- Native integration with Kubernetes + + +## Overview + +The diagram shows a high-level overview. +``` + ┌────────────────────────────────────────────────────────────────────────────┐ + │ Kubernetes Node │ + │ ┌────────────────────────────────────────┐ │ + │ │ Kubernetes Pod │ │ + │ IPC │ ┌─────────────┐ ┌─────────────────┐ │ │ + │ ┌───────────┼───► GEDS Client │ │ Apache Spark │ │ │ + │ │ │ └─────────────┘ └─────────────────┘ │ │ + │ │ │ │ │ + │ ┌─────────────────┼────┐ └────────────────────────────────────────┘ │ + │ │ Kubernetes Pod │ │ │ + │ │ ┌──────────────▼──┐ │ ┌────────────────────────────────────────┐ │ + │ │ │ GEDS Daemon ◄─┼──┐ │ Kubernetes Pod │ │ + │ │ └─────────────────┘ │ │ │ ┌─────────────┐ ┌──────────────────┐ │ │ + │ │ │ ├───┼───► GEDS Client │ │ Python + Pytorch │ │ │ + │ │ ┌─────────────────┐ │ │ │ └─────────────┘ └──────────────────┘ │ │ + │ │ │ PVC / Local SSD │ │ │ │ │ │ + │ │ └─────────────────┘ │ │ │ ┌─────────────┐ ┌──────────────────┐ │ │ + │ │ │ └───┼───► GEDS Client │ │ Python + Pytorch │ │ │ + │ └────────────────▲─────┘ │ └─────────────┘ └──────────────────┘ │ │ + │ │ │ │ │ + │ │ └────────────────────────────────────────┘ │ + │ │ TCP/IP │ + │ │ │ + └───────────────────┼────────────────────────────────────────────────────────┘ + │ + ┌───────────────────┼────────────────────────────────────────────────────────┐ + │ Kubernetes Node │ │ + │ │ │ + │ ┌────────────────▼─────┐ ┌────────────────────────────────────┐ │ + │ │ Kubernetes Pod │ │ Kubernetes Pod │ │ + │ │ ┌─────────────────┐ │ │ ┌───────────────────────────────┐ │ │ + │ │ │ GEDS Daemon │ │ │ │ GEDS Client │ │ │ + │ │ └─────────────────┘ │ │ └─────▲────────────────▲────▲───┘ │ │ + │ │ │ │ │ I/O + MMAP │ │ │ │ + │ │ ┌─────────────────┐ │Mounts│ ┌─────▼──────────┐ │ │ │ │ + │ │ │ PVC / Local SSD ◄─┼──────┼──┤ Hostpath CSI │ │ │ │ │ + │ │ └─────────────────┘ │ │ └────────────────┘ │ │ │ │ + │ │ │ │ │ │ │ │ + │ └──────────────────────┘ └─────────────────────────┼────┼─────┘ │ + │ │ │ │ + │ GPU Direct │ │NVMeOF │ + │ ┌─────────────────────────▼┐ │ │ + │ │ GPU │ │ │ + │ └───────────────────▲──────┘ │ │ + │ │ │ │ + └────────────────────────────────────────────────────┼──────────┼────────────┘ + │ │ + GPU Direct RDMA │ │ + ┌────────────────────────────────────────────────────▼──────────▼────────────┐ + │ NVMe JBOD │ + │ │ + │ │ + │ │ + │ │ + │ │ + └────────────────────────────────────────────────────────────────────────────┘ +``` + +## Operations + +### Creating a file + +1. GEDS Creates file with `BUCKET/PATH` and `UUID`. +2. GEDS Client creates a temporary file in the hostpath volume. +3. GEDS Client writes to the file. +4. GEDS Client seals the file. This transfers the ownership of the file to the + GEDS daemon. FD is kept open and the daemon does a rename. +5. GEDS Daemon tells the metadata server that a new file has been + created. + + Conflict resolution: + 1. If no file with `BUCKET/PATH` does exist, the entry is created. + 2. If `BUCKET/PATH` already exists, then the existing entry is overridden. + +6. GEDS Client reopens the file `Read-Only` and opens a file-descriptor. +7. Other GEDS Clients are now able to open this file. + +### Opening a file + +1. GEDS Client tells the daemon that it wants to open `FILE`. +2. GEDS Daemon: + 1. Checks with Metadata Server if the `BUCKET/PATH` exists. Receives `UUID` and locations. + 2. Increases the usage-count count of the filehandle to prevent deletion. + 3. Tells the `PATH` and size of `FILE` to the `Client`. + +### Reading from a file + +A file can be on one of the following locations: + +- **Local**, if the file was created by a client on the same node, or if it is cached. +- **DFS**, if the file was stored on a distributed file system. +- **S3**, if the file is stored in a bucket on a S3-like service. +- **Remote**, if the file is stored on a different GEDS Daemon. + +The GEDS Client will forward all reads to the GEDS Dameon for **Remote** files. All other file-types can be read directly by the GEDS Client. + + +### Closing a file + +1. GEDS Client tells the daemon that it closed the `BUCKET/PATH`. +2. GEDS Daemon: + 1. Decreases the file-counter. + 2. If the file-counter is zero and the file is marked as deleted, delete the file. + +### Deleting a file + +1. GEDS Client tells the daemon that it wants to delete `BUCKET/PATH`. +2. GEDS Client closes local FD. +2. GEDS Daemon: + 1. Decreases reference counter for open `UUID`. + 2. Tells the metadata server that the path `BUCKET/PATH` has been deleted. + 3. Marks file as deleted. Delete the file if the reference counter is zero. + +### Garbage collection + +1. A daemon process automatically checks local file-usage. +2. If the defined quota is reached no files can be created. +3. Local copies can be deleted if: + 1. The file is marked as deleted. + 2. The file is not open and other copies exist (UUID matches). + 3. A previous version of the file is not referenced any more. + +## Technical Implementations + +- The TCP/IP transport is a GEDS Client as well. This way the transport requires no extra + file/open/close mechanisms. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 77d348b7..275b2548 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(s3) add_subdirectory(libgeds) # Binaries +add_subdirectory(daemon) add_subdirectory(benchmarks) add_subdirectory(geds) add_subdirectory(geds_cmd) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 927694d2..535c4201 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -7,6 +7,8 @@ set(SOURCES DirectoryMarker.cpp DirectoryMarker.h + Filesystem.cpp + Filesystem.h GEDSFileStatus.h ) @@ -18,6 +20,12 @@ target_compile_definitions(geds_common "HAVE_RDMA=$" _POSIX_C_SOURCE=200809L ) + +target_link_libraries( + geds_common PUBLIC + absl::status +) + target_include_directories(geds_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/libgeds/CMakeLists.txt b/src/libgeds/CMakeLists.txt index 05a167a7..8338dc8a 100644 --- a/src/libgeds/CMakeLists.txt +++ b/src/libgeds/CMakeLists.txt @@ -4,55 +4,14 @@ # SET(SOURCES - Filesystem.cpp - Filesystem.h - FileTransferProtocol.cpp - FileTransferProtocol.h - FileTransferService.cpp - FileTransferService.h - - TcpTransport.h - TcpTransport.cpp - GEDS.cpp GEDS.h - GEDSAbstractFileHandle.cpp - GEDSAbstractFileHandle.h - GEDSCachedFileHandle.cpp - GEDSCachedFileHandle.h - GEDSConfig.h - GEDSConfig.cpp GEDSFile.h GEDSFile.cpp GEDSFileHandle.h GEDSFileHandle.cpp - GEDSProtocol.h - GEDSProtocol.cpp - GEDSRelocatableFileHandle.cpp - GEDSRelocatableFileHandle.h - GEDSService.cpp - GEDSService.h - HttpServer.cpp - HttpServer.h - HttpSession.cpp - HttpSession.h - MetadataService.cpp - MetadataService.h - GEDSInternal.cpp - GEDSInternal.h - GEDSLocalFileHandle.h - GEDSMMapFileHandle.h GEDSRemoteFileHandle.cpp GEDSRemoteFileHandle.h - GEDSS3FileHandle.cpp - GEDSS3FileHandle.h - - LocalFile.cpp - LocalFile.h - MMAPFile.cpp - MMAPFile.h - Server.cpp - Server.h ) # Create an object lib to build both a dynamic and a static library. @@ -107,6 +66,6 @@ target_link_libraries(libgeds_dynamic PUBLIC geds_objlib) install(TARGETS libgeds libgeds_dynamic COMPONENT geds) -if(HAVE_TESTS) - add_subdirectory(tests) -endif() +# if(HAVE_TESTS) +# add_subdirectory(tests) +# endif() diff --git a/src/libgeds/FileTransferProtocol.cpp b/src/libgeds/FileTransferProtocol.cpp deleted file mode 100644 index bfc68a8b..00000000 --- a/src/libgeds/FileTransferProtocol.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "FileTransferProtocol.h" - -#include "geds.pb.h" - -static_assert(static_cast(geds::rpc::FileTransferProtocol::Socket) == - static_cast(geds::FileTransferProtocol::Socket)); - -static_assert(static_cast(geds::rpc::FileTransferProtocol::RDMA) == - static_cast(geds::FileTransferProtocol::RDMA)); - -namespace geds { - -const std::vector &supportedProtocols() { - static const auto result = std::vector { - geds::rpc::FileTransferProtocol::Socket, -#if HAVE_RDMA - FileTransferProtocol::RDMA, -#endif - }; - return result; -} -} // namespace geds diff --git a/src/libgeds/FileTransferProtocol.h b/src/libgeds/FileTransferProtocol.h deleted file mode 100644 index 9eec54fa..00000000 --- a/src/libgeds/FileTransferProtocol.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef FILE_TRANSFER_PROTOCOL_H -#define FILE_TRANSFER_PROTOCOL_H - -#include -#include -#include -#include -#include - -#if HAVE_RDMA -#include -#endif - -#include "geds.pb.h" - -namespace geds { -/** - * @brief FileTransferProtocol - * - * The types of this file need to match the types in geds.proto. - */ -enum class FileTransferProtocol : uint8_t { Socket = 0, RDMA = 1 }; - -struct ObjTransferEndpoint { - std::string hostname; - struct sockaddr laddr; - struct sockaddr raddr; - union { - int socket; -#if HAVE_RDMA - struct rdma_cm_id *cm_id; -#endif - }; - geds::FileTransferProtocol type; -}; - -} // namespace geds - -#endif diff --git a/src/libgeds/FileTransferService.cpp b/src/libgeds/FileTransferService.cpp deleted file mode 100644 index 1cfb7741..00000000 --- a/src/libgeds/FileTransferService.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "FileTransferService.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "FileTransferProtocol.h" -#include "GEDS.h" -#include "GEDSInternal.h" -#include "Logging.h" -#include "Object.h" -#include "Status.h" -#include "TcpTransport.h" -#include "geds.grpc.pb.h" -#include "geds.pb.h" -#include "status.pb.h" - -namespace geds { -using std::string; - -#define CHECK_CONNECTED \ - if (_connectionState != ConnectionState::Connected) { \ - LOG_ERROR("Unable to connect"); \ - return absl::FailedPreconditionError("Not connected."); \ - } - -FileTransferService::FileTransferService(std::string nodeAddress, std::shared_ptr geds, - std::shared_ptr tcpTrans) - : _connectionState(ConnectionState::Disconnected), _channel(nullptr), _geds(geds), - _tcp(tcpTrans), nodeAddress(std::move(nodeAddress)) {} - -FileTransferService::~FileTransferService() { - if (_connectionState == ConnectionState::Connected) { - disconnect().IgnoreError(); - } -} - -absl::Status FileTransferService::connect() { - if (_connectionState != ConnectionState::Disconnected) { - return absl::FailedPreconditionError("Cannot reinitialize service."); - } - auto lock = getWriteLock(); - try { - assert(_channel.get() == nullptr); - _channel = grpc::CreateChannel(nodeAddress, grpc::InsecureChannelCredentials()); - auto success = _channel->WaitForConnected(grpcDefaultDeadline()); - if (!success) { - return absl::UnavailableError("Could not connect to " + nodeAddress + "."); - } - _stub = geds::rpc::GEDSService::NewStub(_channel); - } catch (const std::exception &e) { - return absl::UnavailableError("Could not open channel with " + nodeAddress + - ". Reason: " + e.what()); - } - LOG_DEBUG("About to check for available file transfer endpoints"); - - auto endpoints = availTransportEndpoints(); - for (auto &addr : *endpoints) { - if (std::get<1>(addr) == FileTransferProtocol::Socket) { - struct sockaddr saddr = std::get<0>(addr); - auto peer = _tcp->getPeer(&saddr); - - if (peer) { - _tcpPeer = peer; - break; - } - } - } - _connectionState = ConnectionState::Connected; - return absl::OkStatus(); -} - -absl::Status FileTransferService::disconnect() { - if (_connectionState != ConnectionState::Connected) { - return absl::UnknownError("The service is in the wrong state!"); - } - auto lock = getWriteLock(); - _tcpPeer.reset(); - _channel = nullptr; - return absl::OkStatus(); -} - -absl::StatusOr>> -FileTransferService::availTransportEndpoints() { - // Function is called during connect, so no check. - geds::rpc::EmptyParams request; - geds::rpc::AvailTransportEndpoints response; - grpc::ClientContext context; - - auto status = _stub->GetAvailEndpoints(&context, request, &response); - if (!status.ok()) { - LOG_ERROR("Unable to execute grpc call, status: ", status.error_code(), " ", - status.error_details()); - return absl::UnknownError("Unable to execute command"); - } - - const auto rpc_results = response.endpoint(); - auto results = std::vector>(); - results.reserve(rpc_results.size()); - - for (auto &i : rpc_results) { - sockaddr saddr{}; - auto inaddr = (sockaddr_in *)&saddr; - inaddr->sin_addr.s_addr = inet_addr(i.address().c_str()); - inaddr->sin_port = i.port(); - inaddr->sin_family = AF_INET; - - // For now only TCP connections are possible - if (i.type() == rpc::Socket) { - results.emplace_back(saddr, FileTransferProtocol::Socket); - } - } - return results; -} - -absl::StatusOr FileTransferService::readBytes(const std::string &bucket, - const std::string &key, uint8_t *buffer, - size_t position, size_t length) { - CHECK_CONNECTED - - std::future> fut; - // Create a scope for the std::shared_ptr so that the peer is automatically cleaned up. - { - auto lock = getReadLock(); - if (_tcpPeer.expired()) { - return absl::UnavailableError("TCP readBytes: no peer: " + nodeAddress); - } - - LOG_DEBUG("TCP readBytes: ", nodeAddress, ", REQ: ", length); - auto peer = _tcpPeer.lock(); - lock.unlock(); - auto prom = peer->sendRpcRequest((uint64_t)buffer, bucket + "/" + key, position, length); - fut = prom->get_future(); - } - auto status = fut.get(); - if (status.ok()) { - LOG_DEBUG("TCP readBytes: ", nodeAddress, ", DONE: ", length); - return *status; - } - // Close the FileTransferService on error. - if (status.status().code() == absl::StatusCode::kAborted) { - auto lock = getWriteLock(); - _tcpPeer.reset(); - } - return status.status(); -} - -} // namespace geds diff --git a/src/libgeds/FileTransferService.h b/src/libgeds/FileTransferService.h deleted file mode 100644 index ea15db9f..00000000 --- a/src/libgeds/FileTransferService.h +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef FILE_TRANSFER_SERVICE_H -#define FILE_TRANSFER_SERVICE_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "FileTransferProtocol.h" -#include "GEDSInternal.h" -#include "Object.h" -#include "RWConcurrentObjectAdaptor.h" -#include "TcpTransport.h" -#include "geds.grpc.pb.h" - -class GEDS; - -namespace geds { - -struct RemoteFileInfo { - const ObjectID id; - const size_t length; -}; - -class FileTransferService : public utility::RWConcurrentObjectAdaptor { - ConnectionState _connectionState; - std::shared_ptr _channel; - std::unique_ptr _stub; - std::shared_ptr _geds; - std::shared_ptr _tcp; - std::weak_ptr _tcpPeer; - - absl::StatusOr>> - availTransportEndpoints(); - -public: - const std::string nodeAddress; - - FileTransferService(std::string nodeAddress, std::shared_ptr geds, - std::shared_ptr tcpTrans); - ~FileTransferService(); - - absl::Status connect(); - absl::Status disconnect(); - - absl::StatusOr> get(const ObjectID &id, size_t position, size_t length); - absl::StatusOr> get(const std::string &bucket, const std::string &key, - size_t position, size_t length); - - absl::StatusOr readBytes(const std::string &bucket, const std::string &key, - uint8_t *buffer, size_t position, size_t length); - - template ::value>> - absl::StatusOr read(const std::string &bucket, const std::string &key, T *buffer, - size_t position, size_t length) { - size_t lengthBytes = length * sizeof(T); - size_t positionBytes = position * sizeof(T); - - auto readStatus = // NOLINTNEXTLINE - readBytes(bucket, key, reinterpret_cast(buffer), positionBytes, lengthBytes); - if (!readStatus.ok()) { - return readStatus.status(); - } - return readStatus.value() / sizeof(T); - } - - template ::value>> - absl::StatusOr read(const std::string &bucket, const std::string &key, - std::vector &buffer, size_t offset, size_t position, - size_t length) { - size_t requiredLength = offset + length; - if (buffer.size() < requiredLength) { - buffer.resize(requiredLength); - } - return read(bucket, key, &buffer[offset], position, length); - } -}; - -} // namespace geds - -#endif diff --git a/src/libgeds/Filesystem.cpp b/src/libgeds/Filesystem.cpp deleted file mode 100644 index 0c39fd72..00000000 --- a/src/libgeds/Filesystem.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "Filesystem.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Logging.h" - -namespace geds { -namespace filesystem { - -absl::StatusOr createFile(const std::string &path) { - mode_t mode = S_IRUSR | S_IWUSR; - int fd = open(path.c_str(), O_CREAT | O_EXCL, mode); - if (fd < 0 && (errno != EEXIST)) { - int error = errno; - return absl::UnknownError("Unable to create file " + path + ": " + std::strerror(error)); - } - return fd; -} - -absl::Status touchFile(const std::string &path) { - auto fileStatus = createFile(path); - if (!fileStatus.ok()) { - return fileStatus.status(); - } - (void)close(fileStatus.value()); - return absl::OkStatus(); -} - -absl::Status removeFile(const std::string &path) { - int err = unlink(path.c_str()); - if (err != 0 && (errno != ENOENT)) { - int error = errno; - auto message = "Unable to delete file " + path + ": " + std::strerror(error); - LOG_ERROR(message); - return absl::UnknownError(message); - } - LOG_DEBUG("Removed ", path); - return absl::OkStatus(); -} - -absl::Status mkdir(const std::string &path) { - std::error_code errorCode; - auto fsPath = std::filesystem::path(path); - if (!std::filesystem::is_directory(fsPath)) { - bool success = std::filesystem::create_directory(fsPath, errorCode); - if (!success && errorCode.value() != 0) { - return absl::UnknownError("Unable to create directory " + path + - "' Reason: " + errorCode.message()); - } - } - return absl::OkStatus(); -} - -std::string mktempdir(const std::string &name) { - auto path = name; - if (!path.ends_with("XXXXXX")) { - path += "XXXXXX"; - } - char *r = mkdtemp(path.data()); - if (r == NULL) { - int err = errno; - auto errorMessage = "mkdtemp returned an error while trying to create a tempdir with pattern " + - path + ": " + std::strerror(err); - LOG_ERROR(errorMessage); - throw std::runtime_error(errorMessage); - } - return path; -} - -std::string tempFile() { - auto prefix = "GEDS_tempfile"; - return tempFile(prefix); -} - -std::string tempFile(const std::string &prefix) { - auto folder = std::filesystem::temp_directory_path(); - return tempFile(folder, prefix); -} - -std::string tempFile(const std::string &folder, const std::string &prefix) { - auto path = folder + "/" + prefix + "XXXXXX"; - - std::string tmp = path + "XXXXXX"; - int fd = mkstemp(tmp.data()); - if (fd < 0) { - int error = errno; - auto errorMessage = - "mkstemp returned an invalid file descriptor for " + prefix + ": " + std::strerror(error); - LOG_ERROR(errorMessage); - throw std::runtime_error(errorMessage); - } - (void)close(fd); - return tmp; -} - -} // namespace filesystem -} // namespace geds diff --git a/src/libgeds/Filesystem.h b/src/libgeds/Filesystem.h deleted file mode 100644 index 4c0266e8..00000000 --- a/src/libgeds/Filesystem.h +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_FILESYSTEM_H -#define GEDS_FILESYSTEM_H - -#include -#include -#include - -namespace geds::filesystem { - -absl::StatusOr createFile(const std::string &path); -absl::Status touchFile(const std::string &path); -absl::Status removeFile(const std::string &path); -absl::Status mkdir(const std::string &path); -std::string mktempdir(const std::string &name); -std::string tempFile(const std::string &folder, const std::string &prefix); -std::string tempFile(const std::string &prefix); -std::string tempFile(); - -} // namespace geds::filesystem - -#endif diff --git a/src/libgeds/GEDS.cpp b/src/libgeds/GEDS.cpp index e44aa24d..40cbe207 100644 --- a/src/libgeds/GEDS.cpp +++ b/src/libgeds/GEDS.cpp @@ -4,1044 +4,145 @@ */ #include "GEDS.h" +#include "absl/status/status.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "DirectoryMarker.h" -#include "FileTransferService.h" -#include "Filesystem.h" -#include "GEDSCachedFileHandle.h" -#include "GEDSConfig.h" -#include "GEDSFile.h" -#include "GEDSFileHandle.h" -#include "GEDSFileStatus.h" -#include "GEDSInternal.h" -#include "GEDSLocalFileHandle.h" -#include "GEDSMMapFileHandle.h" -#include "GEDSRelocatableFileHandle.h" -#include "GEDSRemoteFileHandle.h" -#include "GEDSS3FileHandle.h" -#include "Logging.h" -#include "Object.h" -#include "Path.h" -#include "Platform.h" -#include "Ports.h" -#include "Statistics.h" -#include "TcpTransport.h" -#include "Version.h" - -using namespace geds; - -static std::string computeHostUri(const std::string &hostname, uint16_t port) { - return "geds://" + hostname + ":" + std::to_string(port); -} - -static std::string createUUID() { - auto uuid = boost::uuids::random_generator()(); - std::stringstream ss; - ss << uuid; - return ss.str(); -} - -GEDS::GEDS(GEDSConfig &&argConfig) - : std::enable_shared_from_this(), _config(argConfig), - _server(_config.listenAddress, _config.port), - _metadataService(_config.metadataServiceAddress), _pathPrefix(_config.localStoragePath), - _hostname(_config.hostname.value_or("")), _httpServer(_config.portHttpServer), - _ioThreadPool(_config.io_thread_pool_size), _storageCounters(_config.available_local_storage), - _memoryCounters(_config.available_local_memory), uuid(createUUID()) { - std::error_code ec; - auto success = std::filesystem::create_directories(_pathPrefix, ec); - if (!success && ec.value() != 0) { - auto message = "Unable to create prefix " + _pathPrefix + ". Reason " + ec.message(); - LOG_ERROR(message); - throw std::runtime_error(message); - } -} - -static std::string computeLocalStoragePath(std::string path) { - if (path.ends_with("XXXXXX")) { - return geds::filesystem::mktempdir(path); - } - return path; -} - -std::shared_ptr GEDS::factory(GEDSConfig config) { - config.localStoragePath = computeLocalStoragePath(config.localStoragePath); - // Call private CTOR. - return std::make_shared(std::move(config)); -} +std::shared_ptr GEDS::factory() { return std::shared_ptr(new GEDS()); } GEDS::~GEDS() { - LOG_DEBUG("GEDS Destructor. state: ", magic_enum::enum_name(_state.load())); - if (_state == ServiceState::Running) { - LOG_INFO("Stopping GEDS Service"); - (void)stop(); - } -} - -#define GEDS_CHECK_SERVICE_RUNNING \ - if (_state != ServiceState::Running) { \ - return absl::FailedPreconditionError("The service is " + to_string(_state) + "."); \ - } - -absl::Status GEDS::start() { - std::cout << "Starting GEDS (" << utility::GEDSVersion() << ")\n" - << "- prefix: " << _pathPrefix << "\n" - << "- metadata service: " << _metadataService.serverAddress << std::endl; - if (_state != ServiceState::Stopped) { - return absl::FailedPreconditionError("The service is " + to_string(_state) + "."); - } - _state = ServiceState::Unknown; - // Connect to metadata service. - auto result = _metadataService.connect(); - if (!result.ok()) { - return result; - } - // Start GEDS Service. - result = _server.start(shared_from_this()); - if (!result.ok()) { - return result; - } - - if (!_hostname.empty()) { - _hostURI = computeHostUri(_hostname, _server.port()); - LOG_INFO("Hostname was declared, using ", _hostURI, " to announce myself."); - } else { - auto hostIP = _metadataService.getConnectionInformation(); - if (!hostIP.ok()) { - return absl::UnknownError("Unable to obtain connection IP! Reason: " + - std::string{hostIP.status().message()}); - } - _hostname = hostIP.value(); - _hostURI = computeHostUri(hostIP.value(), _server.port()); - LOG_INFO("Using ", _hostURI, " to announce myself."); - } - - _tcpTransport = TcpTransport::factory(shared_from_this()); - _tcpTransport->start(); - - result = _httpServer.start(); - if (!result.ok()) { - LOG_ERROR("Unable to start webserver."); - } - - // Update state. - _state = ServiceState::Running; - - startStorageMonitoringThread(); - startPubSubStreamThread(); - - auto st = syncObjectStoreConfigs(); - if (!syncObjectStoreConfigs().ok()) { - LOG_ERROR("Unable to synchronize object store configs on boot."); - } - - return absl::OkStatus(); -} - -absl::Status GEDS::stop() { - GEDS_CHECK_SERVICE_RUNNING - LOG_INFO("Stopping"); - LOG_INFO("Printing statistics"); - - geds::Statistics::print(); - // Relocate to S3 if available. - if (_config.force_relocation_when_stopping) { - relocate(true); - } - - auto result = _metadataService.disconnect(); - if (!result.ok()) { - LOG_ERROR("cannot disconnect metadata service: ", result.message()); - } - result = _server.stop(); - if (!result.ok()) { - LOG_ERROR("cannot stop server: ", result.message()); - } - - _httpServer.stop(); - - // XXX TODO: Properly cleanup files - _fileHandles.clear(); - _fileTransfers.clear(); - _tcpTransport->stop(); - - _state = ServiceState::Stopped; - - _storageMonitoringThread.join(); - - if (_pubSubStreamThread.joinable()) { - _pubSubStreamThread.join(); - } - - return result; + // TODO: NYI } -absl::Status GEDS::isValidBucketName(const std::string &bucket) { - // Loosely follow the S3 bucket naming rules. - static std::regex regex("[a-z\\d][a-z\\d\\.\\-]+[a-z\\d]", std::regex_constants::ECMAScript); - if (std::regex_match(bucket, regex)) { - if (bucket.size() >= 4 && bucket.substr(0, 4).compare("xn--") == 0) { - return absl::FailedPreconditionError("Invalid bucket name."); - } - return absl::OkStatus(); - } - return absl::FailedPreconditionError("Invalid bucket name."); -} - -absl::Status GEDS::isValidKeyName(const std::string &key) { - if (key.size() == 0) { - return absl::FailedPreconditionError("Zero-length keys are not allowed."); - } - if (key.back() == '/') { - return absl::FailedPreconditionError("Keys are not allowed to end with \"/\"."); - } - if (key.size() >= 2 && key.substr(0, 1).compare("./") == 0) { - return absl::FailedPreconditionError("Keys are not allowed to start with \"./\"."); - } - if (key.size() >= 3 && key.substr(0, 3).compare("../") == 0) { - return absl::FailedPreconditionError("Keys are not allowed to start with \"../\"."); - } - if (key.find("/../") != std::string::npos) { - return absl::FailedPreconditionError("Keys are not allowed to contain \"/../\""); - } - return absl::OkStatus(); -} - -absl::Status GEDS::isValid(const std::string &bucket, const std::string &key) { - auto result = isValidBucketName(bucket); - if (!result.ok()) { - return result; - } - result = isValidKeyName(key); - if (!result.ok()) { - return result; - } - return absl::OkStatus(); -} +absl::Status GEDS::start() { return absl::UnimplementedError("NYI"); } -absl::StatusOr> -GEDS::parseObjectName(const std::string &objectName) { - auto separator = objectName.find('/'); - if (separator == std::string::npos) { - auto message = "cannot create file: " + objectName + " invalid format!"; - LOG_ERROR(message); - return absl::InvalidArgumentError(message); - } - - auto bucket = objectName.substr(0, separator); - auto key = objectName.substr(separator + 1); - return {{bucket, key}}; -} +absl::Status GEDS::stop() { return absl::UnimplementedError("NYI"); } absl::StatusOr GEDS::create(const std::string &objectName, bool overwrite) { - auto s = parseObjectName(objectName); - if (!s.ok()) { - return s.status(); - } - auto [bucket, key] = *s; - return create(bucket, key, overwrite); + return absl::UnimplementedError("NYI"); } - absl::StatusOr GEDS::create(const std::string &bucket, const std::string &key, bool overwrite) { - LOG_DEBUG("create ", bucket, "/", key); - auto result = createAsFileHandle(bucket, key, overwrite); - if (result.ok()) { - *_statisticsFilesCreated += 1; - return (*result)->open(); - } - return result.status(); + return absl::UnimplementedError("NYI"); } -absl::StatusOr> -GEDS::createAsFileHandle(const std::string &bucket, const std::string &key, bool overwrite) { - GEDS_CHECK_SERVICE_RUNNING - - LOG_DEBUG(bucket, "/", key); - const auto check = GEDS::isValid(bucket, key); - if (!check.ok()) { - return check; - } - auto bucketStatus = createBucket(bucket); - if (!bucketStatus.ok()) { - return bucketStatus; - } - - const auto path = getPath(bucket, key); - auto local_handle = GEDSLocalFileHandle::factory(shared_from_this(), bucket, key, std::nullopt); - if (!local_handle.ok()) { - return local_handle.status(); - } - auto handle = GEDSRelocatableFileHandle::factory(shared_from_this(), *local_handle); - - if (overwrite) { - _fileHandles.insertOrReplace(path, handle); - return handle; - } - auto newHandle = _fileHandles.insertOrExists(path, handle); - if (newHandle.get() != handle.get()) { - return absl::AlreadyExistsError("The file " + path.name + "already exists!"); - } - return newHandle; +absl::StatusOr> GEDS::createAsFileHandle(const std::string &bucket, + const std::string &key, + bool overwrite = false) { + return absl::UnimplementedError("NYI"); } absl::Status GEDS::mkdirs(const std::string &bucket, const std::string &path, char delimiter) { - LOG_DEBUG(bucket, "/", path); - - if (path.empty() || (path.size() == 1 && path[0] == delimiter)) { - return absl::OkStatus(); - } - if (path.back() != delimiter) { - return mkdirs(bucket, path + delimiter); - } - auto folderPath = path + Default_DirectoryMarker; - LOG_DEBUG("Creating ", bucket, "/", folderPath); - auto mkdir = create(bucket, folderPath); - if (!mkdir.ok() && mkdir.status().code() != absl::StatusCode::kAlreadyExists) { - LOG_ERROR("Unable to create folder ", folderPath); - return mkdir.status(); - } - auto status = mkdir->seal(); - if (!status.ok() && status.code() != absl::StatusCode::kAlreadyExists) { - LOG_ERROR("Unable to seal directory marker: ", path, " reason: ", status.message()); - return status; - } - if (path.size() >= 2) { - auto stripPos = path.substr(0, path.size() - 1).rfind(delimiter); - if (stripPos == std::string::npos) { - return absl::OkStatus(); - } - return mkdirs(bucket, path.substr(0, stripPos)); - } - return absl::OkStatus(); + return absl::UnimplementedError("NYI"); } absl::Status GEDS::createBucket(const std::string &bucket) { - LOG_DEBUG(bucket); - - auto status = GEDS::isValidBucketName(bucket); - if (!status.ok()) { - return status; - } - if (lookupBucket(bucket).ok()) { - return absl::OkStatus(); - } - status = _metadataService.createBucket(bucket); - // Allow multiple creations of the same bucket. - if (status.ok() || status.code() == absl::StatusCode::kAlreadyExists) { - return absl::OkStatus(); - } - return status; + return absl::UnimplementedError("NYI"); } absl::Status GEDS::lookupBucket(const std::string &bucket) { - LOG_DEBUG(bucket); - - if (_knownBuckets.exists(bucket)) { - return absl::OkStatus(); - } - auto status = _metadataService.lookupBucket(bucket); - if (!status.ok()) { - return status; - } - _knownBuckets.insert(bucket); - return absl::OkStatus(); + return absl::UnimplementedError("NYI"); } absl::StatusOr GEDS::open(const std::string &objectName) { - auto s = parseObjectName(objectName); - if (!s.ok()) { - return s.status(); - } - auto [bucket, key] = *s; - return open(bucket, key); + return absl::UnimplementedError("NYI"); } - absl::StatusOr GEDS::open(const std::string &bucket, const std::string &key, bool retry) { - LOG_DEBUG("open ", bucket, "/", key); - auto fh = openAsFileHandle(bucket, key); - if (!fh.ok()) { - return fh.status(); - } - auto lock = (*fh)->lockFile(); - if ((*fh)->isValid()) { - *_statisticsFilesOpened += 1; - return (*fh)->open(); - } - // Remove invalid filehandle. - const auto path = getPath(bucket, key); - _fileHandles.removeIf(path, [&](const std::shared_ptr &check) { - return (*fh).get() == check.get(); - }); - if (retry) { - return open(bucket, key, false); - } - return absl::UnavailableError("The file " + path.name + " is invalid."); + return absl::UnimplementedError("NYI"); } - -absl::StatusOr GEDS::localOpen(const std::string &objectName) { - auto s = parseObjectName(objectName); - if (!s.ok()) { - return s.status(); - } - auto [bucket, key] = *s; - return localOpen(bucket, key); -} - -absl::StatusOr GEDS::localOpen(const std::string &bucket, const std::string &key) { - GEDS_CHECK_SERVICE_RUNNING - - LOG_DEBUG(bucket, "/", key); - const auto path = getPath(bucket, key); - auto fileHandle = _fileHandles.get(path); - if (fileHandle.has_value() && (*fileHandle)->rawFd().ok()) { - // The filehandle has an FD, and is thus local. - // TODO: FIXME. - auto lock = (*fileHandle)->lockFile(); - return (*fileHandle)->open(); - } - return absl::NotFoundError(path.name + " is not available on this machine"); -} - -absl::StatusOr> -GEDS::reopen(std::shared_ptr existing) { - GEDS_CHECK_SERVICE_RUNNING; - - LOG_DEBUG(existing->identifier); - - // Avoid race condition when reopening. - auto lock = existing->lockFile(); - - auto path = getPath(existing->bucket, existing->key); - _fileHandles.removeIf(path, [&existing](const std::shared_ptr check) { - return existing.get() == check.get(); - }); - return openAsFileHandle(existing->bucket, existing->key); -} - absl::StatusOr> GEDS::openAsFileHandle(const std::string &bucket, const std::string &key) { - GEDS_CHECK_SERVICE_RUNNING - - LOG_DEBUG(bucket, "/", key); - - const auto path = getPath(bucket, key); - auto check = GEDS::isValid(bucket, key); - if (!check.ok()) { - return check; - } - - // Check if file is already open on the machine. - { - auto fileHandle = _fileHandles.get(path); - if (fileHandle.has_value()) { - return (*fileHandle); - } - } - - auto fileHandle = reopenFileHandle(bucket, key, false); - if (!fileHandle.ok()) { - return fileHandle.status(); - } - - // Wrap filehandle. - auto wrapped = GEDSRelocatableFileHandle::factory(shared_from_this(), *fileHandle); - return _fileHandles.insertOrExists(path, wrapped); + return absl::UnimplementedError("NYI"); } - absl::StatusOr> GEDS::reopenFileHandle(const std::string &bucket, const std::string &key, bool invalidate) { - auto status_file = _metadataService.lookup(bucket, key, invalidate); - if (!status_file.ok()) { - return status_file.status(); - } - - const auto &object = status_file.value(); - const auto &location = object.info.location; - - const std::string_view s3Prefix{"s3://"}; - const std::string_view gedsPrefix{"geds://"}; - - absl::StatusOr> fileHandle; - if (location.compare(0, gedsPrefix.size(), gedsPrefix) == 0) { - fileHandle = GEDSRemoteFileHandle::factory(shared_from_this(), object); - } else if (location.compare(0, s3Prefix.size(), s3Prefix) == 0) { - if (_config.cache_objects_from_s3) { - fileHandle = GEDSCachedFileHandle::factory(shared_from_this(), object); - } else { - fileHandle = GEDSS3FileHandle::factory(shared_from_this(), object); - } - } else { - return absl::UnknownError("The remote location format " + location + " is not known."); - } - - if (!fileHandle.ok()) { - if (!invalidate) { - return reopenFileHandle(bucket, key, true); - } - LOG_ERROR("Unable to open ", bucket, "/", key, " reason: ", fileHandle.status().message()); - return fileHandle.status(); - } - return fileHandle; + return absl::UnimplementedError("NYI"); } -absl::StatusOr> GEDS::getS3Endpoint(const std::string &bucket) { - return _objectStores.get(bucket); +absl::StatusOr> +GEDS::reopen(std::shared_ptr existing) { + return absl::UnimplementedError("NYI"); } -absl::StatusOr> -GEDS::getFileTransferService(const std::string &hostname) { - LOG_DEBUG(hostname); - - { - auto fileTransferService = _fileTransfers.get(hostname); - if (fileTransferService.has_value()) { - return *fileTransferService; - } - } - auto fileTransferService = - std::make_shared(hostname, shared_from_this(), _tcpTransport); - auto status = fileTransferService->connect(); - if (!status.ok()) { - LOG_ERROR("Unable to connect to ", hostname, " for FileTransferService.: ", status.code()); - return absl::UnavailableError("Unable to connect to " + hostname + ": " + - std::string{status.message()}); - } - // Insert file transfer service. - return _fileTransfers.insertOrExists(hostname, fileTransferService); +absl::StatusOr GEDS::localOpen(const std::string &objectName) { + return absl::UnimplementedError("NYI"); +} +absl::StatusOr GEDS::localOpen(const std::string &bucket, const std::string &key) { + return absl::UnimplementedError("NYI"); } absl::Status GEDS::seal(GEDSFileHandle &fileHandle, bool update, size_t size, std::optional uri) { - GEDS_CHECK_SERVICE_RUNNING - - LOG_DEBUG(fileHandle.identifier); - - auto obj = - geds::Object{geds::ObjectID{fileHandle.bucket, fileHandle.key}, - geds::ObjectInfo{uri.value_or(_hostURI), size, size, fileHandle.metadata()}}; - - if (update) { - return _metadataService.updateObject(obj); - } - return _metadataService.createObject(obj); -} - -std::string GEDS::getLocalPath(const std::string &bucket, const std::string &key) const { - auto postfix = bucket + "/" + key; - auto exists = _fileNames.get(postfix); - if (exists.has_value()) { - return _pathPrefix + "/" + bucket + "/" + std::to_string(*exists); - } - auto value = _fileNameCounter++; - auto n2 = _fileNames.insertOrExists(key, value); - return _pathPrefix + "/" + bucket + "/" + std::to_string(n2); -} - -std::string GEDS::getLocalPath(const GEDSFile &file) const { - return getLocalPath(file.bucket(), file.key()); + return absl::UnimplementedError("NYI"); } absl::StatusOr> GEDS::list(const std::string &bucket, const std::string &prefix) { - return list(bucket, prefix, 0); + return absl::UnimplementedError("NYI"); } -absl::StatusOr> GEDS::list(const std::string &bucket, - const std::string &prefix, char delimiter) { - LOG_DEBUG(bucket, "/", prefix); - - bool prefixExists = false; - absl::StatusOr, std::vector>> list; - if (_config.pubSubEnabled) { - list = _metadataService.listPrefixFromCache(bucket, prefix, delimiter); - } else { - list = _metadataService.listPrefix(bucket, prefix, delimiter); - } - if (!list.ok()) { - return list.status(); - } - _knownBuckets.insert(bucket); - const std::string folderString = delimiter + Default_DirectoryMarker; - std::set result; - for (const auto &value : list->first) { - prefixExists = true; - const auto &key = value.id.key; - if (delimiter != 0 && key.ends_with(folderString)) { - // Don't list current directory. - continue; - } else { - result.emplace(GEDSFileStatus{.key = key, .size = value.info.size, .isDirectory = false}); - } - } - for (const auto &prefix : list->second) { - result.emplace(GEDSFileStatus{.key = prefix, .size = 0, .isDirectory = true}); - } - if (result.empty() && delimiter && !prefixExists) { - return absl::NotFoundError("Prefix not found: " + prefix); - } - return std::vector{result.begin(), result.end()}; +absl::StatusOr> list(const std::string &bucket, + const std::string &prefix, char delimiter) { + return absl::UnimplementedError("NYI"); } absl::StatusOr> GEDS::listAsFolder(const std::string &bucket, const std::string &prefix) { - return list(bucket, prefix, Default_GEDSFolderDelimiter); -} - -absl::StatusOr GEDS::status(const std::string &bucket, const std::string &key) { - return status(bucket, key, Default_GEDSFolderDelimiter); + return absl::UnimplementedError("NYI"); } +absl::StatusOr GEDS::status(const std::string &bucket, const std::string &key); absl::StatusOr GEDS::status(const std::string &bucket, const std::string &key, char delimiter) { - LOG_DEBUG(bucket, "/", key); - - // Base case: Empty key, or key matching `/`. - if (key.size() == 0 || (key.size() == 1 && key[0] == delimiter)) { - auto isRegistered = lookupBucket(bucket); - if (isRegistered.ok() || _objectStores.get(bucket).ok()) { - return GEDSFileStatus{.key = key, .size = 0, .isDirectory = true}; - } - return absl::NotFoundError("Bucket not found!"); - } - - auto listDir = [&](const std::string &k) -> absl::StatusOr { - auto list = _metadataService.listPrefix(bucket, k, delimiter); - if (list.ok() && list->second.size() > 0) { - return GEDSFileStatus{.key = k, .size = 0, .isDirectory = true}; - } - auto s3 = _objectStores.get(bucket); - if (s3.ok()) { - auto s3Status = (*s3)->folderStatus(bucket, k, delimiter); - if (s3Status.ok() || s3Status.status().code() != absl::StatusCode::kNotFound) { - return s3Status; - } - } - return absl::NotFoundError("Key " + key + " not found!"); - }; - - // Location is a directory. - auto isDir = key.back() == delimiter; - if (isDir) { - return listDir(key); - } - - // Location is most likely a file. - auto obj = _metadataService.lookup(bucket, key); - if (obj.ok()) { - return GEDSFileStatus{.key = key, .size = obj->info.size, .isDirectory = false}; - } - auto s3 = _objectStores.get(bucket); - if (s3.ok()) { - auto s3Status = (*s3)->fileStatus(bucket, key); - if (s3Status.ok()) { - return s3Status; - } - if (s3Status.status().code() != absl::StatusCode::kNotFound) { - return s3Status.status(); - } - } - if (delimiter != 0) { - // Try listing as a directory. - return listDir(key + delimiter); - } - return absl::NotFoundError("Key " + key + " not found!"); + return absl::UnimplementedError("NYI"); } -absl::Status GEDS::renamePrefix(const std::string &bucket, const std::string &srcKey, - const std::string &destKey) { - return renamePrefix(bucket, srcKey, bucket, destKey); +absl::Status GEDS::renamePrefix(const std::string &bucket, const std::string &srcPrefix, + const std::string &destPrefix) { + return absl::UnimplementedError("NYI"); } - -absl::Status GEDS::renamePrefix(const std::string &srcBucket, const std::string &srcKey, - const std::string &destBucket, const std::string &destKey) { - LOG_DEBUG("rename", srcBucket, "/", srcKey, " to ", destBucket, "/", destKey); - auto prefixList = list(srcBucket, srcKey); - if (!prefixList.ok()) { - return prefixList.status(); - } - for (const auto &element : *prefixList) { - if (element.isDirectory) { - continue; - } - const auto &key = element.key; - auto newKey = destKey + key.substr(srcKey.size()); - auto status = rename(srcBucket, element.key, destBucket, newKey); - if (!status.ok()) { - return status; - } - } - return absl::OkStatus(); +absl::Status GEDS::renamePrefix(const std::string &srcBucket, const std::string &srcPrefix, + const std::string &destPrefix, const std::string &destKey) { + return absl::UnimplementedError("NYI"); } absl::Status GEDS::rename(const std::string &bucket, const std::string &srcKey, const std::string &destKey) { - return rename(bucket, srcKey, bucket, destKey); + return absl::UnimplementedError("NYI"); } absl::Status GEDS::rename(const std::string &srcBucket, const std::string &srcKey, const std::string &destBucket, const std::string &destKey) { - // ToDo: Actually move the files. - auto status = copy(srcBucket, srcKey, destBucket, destKey); - if (!status.ok()) { - return status; - } - return deleteObject(srcBucket, srcKey); -} - -absl::Status GEDS::copyPrefix(const std::string &bucket, const std::string &srcKey, - const std::string &destKey) { - return copyPrefix(bucket, srcKey, bucket, destKey); -} - -absl::Status GEDS::copyPrefix(const std::string &srcBucket, const std::string &srcKey, - const std::string &destBucket, const std::string &destKey) { - auto prefixList = list(srcBucket, srcKey); - if (!prefixList.ok()) { - return prefixList.status(); - } - for (const auto &element : *prefixList) { - if (element.isDirectory) { - continue; - } - const auto &key = element.key; - auto newKey = destKey + key.substr(srcKey.size()); - auto status = copy(srcBucket, element.key, destBucket, newKey); - if (!status.ok()) { - return status; - } - } - return absl::OkStatus(); -} - -absl::Status GEDS::copy(const std::string &bucket, const std::string &srcKey, - const std::string &destKey) { - return copy(bucket, srcKey, bucket, destKey); + return absl::UnimplementedError("NYI"); } -absl::Status GEDS::copy(const std::string &srcBucket, const std::string &srcKey, - const std::string &destBucket, const std::string &destKey) { - auto srcFile = open(srcBucket, srcKey); - if (!srcFile.ok()) { - return srcFile.status(); - } - auto destFile = create(destBucket, destKey); - if (!destFile.ok()) { - return destFile.status(); - } - auto status = srcFile->copyTo(*destFile); - if (status.ok()) { - return destFile->seal(); - } - return status; +absl::Status GEDS::copyPrefix(const std::string &bucket, const std::string &srcPrefix, + const std::string &destPrefix) { + return absl::UnimplementedError("NYI"); } - -absl::Status GEDS::deleteObject(const std::string &bucket, const std::string &key) { - LOG_DEBUG("DeleteObject ", bucket, "/", key); - // Delete on metadata service. - { - auto status = _metadataService.deleteObject(bucket, key); - if (!status.ok() && status.code() != absl::StatusCode::kNotFound) { - // Omit local deletion if we cannot communicate with Metadata service. - return status; - } - } - // Delete on s3. - { - auto storeStatus = _objectStores.get(bucket); - if (storeStatus.ok() && !key.starts_with(GEDSCachedFileHandle::CacheBlockMarker)) { - auto deleteStatus = storeStatus.value()->deleteObject(bucket, key); - if (!deleteStatus.ok()) { - LOG_ERROR("Unable to delete ", bucket, "/", key, " on S3:", deleteStatus.message()); - } - } - } - - // Delete the file locally. - auto path = getPath(bucket, key); - auto removed = _fileHandles.remove(path); - if (!removed) { - LOG_ERROR("The file ", path.name, " did not exist locally!"); - } - return absl::OkStatus(); +absl::Status copyPrefix(const std::string &srcBucket, const std::string &srcPrefix, + const std::string &destPrefix, const std::string &destKey) { + return absl::UnimplementedError("NYI"); } -absl::Status GEDS::deleteObjectPrefix(const std::string &bucket, const std::string &prefix) { - LOG_DEBUG("deleteObjectPrefix ", bucket, "/", prefix); - - // Delete on GEDS. - { - auto status = _metadataService.deleteObjectPrefix(bucket, prefix); - if (!status.ok() && status.code() != absl::StatusCode::kNotFound) { - // Omit local deletion if we cannot communicate with Metadata service. - return status; - } - } - // Delete on S3. - { - auto storeStatus = _objectStores.get(bucket); - if (storeStatus.ok()) { - auto deleteStatus = storeStatus.value()->deletePrefix(bucket, prefix); - if (!deleteStatus.ok()) { - LOG_ERROR("Unable to delete prefix ", bucket, "/", prefix, - " on S3: ", deleteStatus.message()); - } - } - } - // Mark the file as deleted and remove it. - _fileHandles.removeRange(utility::PathPrefixProbe{prefix}); - return absl::OkStatus(); -} - -absl::Status GEDS::registerObjectStoreConfig(const std::string &bucket, - const std::string &endpointUrl, - const std::string &accessKey, - const std::string &secretKey) { - auto status = _metadataService.registerObjectStoreConfig( - ObjectStoreConfig(bucket, endpointUrl, accessKey, secretKey)); - if (!status.ok() && status.code() != absl::StatusCode::kAlreadyExists) { - return status; - } - status = createBucket(bucket); - if (!status.ok()) { - return status; - } - // Trigger update. - return syncObjectStoreConfigs(); -} - -absl::Status GEDS::syncObjectStoreConfigs() { - auto configs = _metadataService.listObjectStoreConfigs(); - if (!configs.ok()) { - LOG_ERROR("Unable to list object store: ", configs.status().message()); - return configs.status(); - } - for (const auto &c : configs.value()) { - LOG_INFO("Registering object store for ", c->bucket, " and endpoint ", c->endpointURL); - auto status = - _objectStores.registerStore(c->bucket, c->endpointURL, c->accessKey, c->secretKey); - if (!status.ok()) { - LOG_ERROR("Unable to setup object store for ", c->bucket, ": ", status.message()); - } - } - return absl::OkStatus(); -} - -void GEDS::relocate(bool force) { - LOG_INFO("Relocating..."); - std::vector> relocatable; - - _fileHandles.forall([&relocatable, force](std::shared_ptr &item) { - if (item->openCount() == 0 || force) { - relocatable.push_back(item); - } - }); - relocate(relocatable, force); -} - -void GEDS::relocate(std::vector> &relocatable, bool force) { - struct RelocateHelper { - std::mutex mutex; - std::condition_variable cv; - std::atomic nTasks; - }; - auto h = std::make_shared(); - { - std::lock_guard lock(h->mutex); - h->nTasks = relocatable.size(); - } - - LOG_INFO("Relocating ", relocatable.size(), " objects."); - - auto self = shared_from_this(); - for (auto fh : relocatable) { - boost::asio::post(_ioThreadPool, [self, fh, h, force]() { - try { - self->relocate(fh, force); - } catch (...) { - LOG_ERROR("Encountered an exception during relocation ", fh->identifier); - } - { - std::lock_guard lock(h->mutex); - h->nTasks -= 1; - } - h->cv.notify_all(); - }); - - const auto tp_size = _config.io_thread_pool_size; - std::unique_lock lock(h->mutex); - h->cv.wait(lock, [h, tp_size]() { return h->nTasks <= (tp_size + 1); }); - } - std::unique_lock lock(h->mutex); - h->cv.wait(lock, [h]() { return h->nTasks == 0; }); - LOG_INFO("Relocated ", relocatable.size(), " objects."); -} - -void GEDS::relocate(std::shared_ptr handle, bool force) { - LOG_DEBUG(handle->identifier); - - auto lock = handle->lockFile(); - if (handle->openCount() > 0 && !force) { - // File is open: Unable to relocate. - return; - } - - static auto stats = geds::Statistics::createCounter("GEDS: Storage Relocated"); - auto fsize = handle->localStorageSize(); - *stats += handle->localStorageSize(); - - // Remove cached files. - const auto path = getPath(handle->bucket, handle->key); - if (handle->key.starts_with(GEDSCachedFileHandle::CacheBlockMarker)) { - auto status = - _fileHandles.removeIf(path, [handle](const std::shared_ptr &existing) { - return handle.get() == existing.get(); - }); - if (status) { - *stats += fsize; - } - return; - } - - // Relocate all other files. - auto status = handle->relocate(); - if (status.ok()) { - *stats += fsize; - } +/** + * @brief Copy an object. + */ +absl::Status copy(const std::string &bucket, const std::string &srcKey, + const std::string &destKey) { + return absl::UnimplementedError("NYI"); } - -void GEDS::startStorageMonitoringThread() { - _storageMonitoringThread = std::thread([&]() { - auto statsLocalStorageUsed = geds::Statistics::createGauge("GEDS: Local Storage used"); - auto statsLocalStorageFree = geds::Statistics::createGauge("GEDS: Local Storage free"); - auto statsLocalStorageAllocated = - geds::Statistics::createGauge("GEDS: Local Storage allocated"); - auto statsLocalMemoryUsed = geds::Statistics::createGauge("GEDS: Local Memory used"); - auto statsLocalMemoryFree = geds::Statistics::createGauge("GEDS: Local Memory free"); - auto statsLocalMemoryAllocated = geds::Statistics::createGauge("GEDS: Local Memory allocated"); - - while (_state.load() == ServiceState::Running) { - std::vector> relocatable; - size_t memoryUsed = 0; - size_t storageUsed = 0; - { // Extract all file handles to avoid deadlocks. - std::vector> allFileHandles; - _fileHandles.forall([&allFileHandles](std::shared_ptr &fh) { - allFileHandles.push_back(fh); - }); - for (const auto &fh : allFileHandles) { - auto storageSize = fh->localStorageSize(); - auto memSize = fh->localMemorySize(); - storageUsed += storageSize; - memoryUsed += memSize; - if (fh->isRelocatable() && fh->openCount() == 0) { - relocatable.push_back(fh); - } - } - } - - _storageCounters.updateUsed(storageUsed); - _memoryCounters.updateUsed(memoryUsed); - - { - auto lock = _storageCounters.getReadLock(); - *statsLocalStorageUsed = _storageCounters.used; - *statsLocalStorageAllocated = _storageCounters.allocated; - *statsLocalStorageFree = _storageCounters.free; - } - - { - auto lock = _memoryCounters.getReadLock(); - *statsLocalMemoryAllocated = _memoryCounters.allocated; - *statsLocalMemoryUsed = _memoryCounters.used; - *statsLocalMemoryFree = _memoryCounters.free; - } - - auto targetStorage = - (size_t)(_config.storage_spilling_fraction * (double)_config.available_local_storage); - if (storageUsed > targetStorage) { - std::sort(std::begin(relocatable), std::end(relocatable), - [](std::shared_ptr a, std::shared_ptr b) { - return a->lastReleased() < b->lastReleased(); - }); - - std::vector> tasks; - size_t relocateBytes = 0; - for (auto &f : relocatable) { - if (relocateBytes > (storageUsed - targetStorage)) { - break; - } - relocateBytes += f->localStorageSize(); - tasks.push_back(f); - } - if (tasks.size()) { - relocate(tasks); - } else { - LOG_WARNING("Unable to relocate files: No task found!"); - } - } - relocatable.clear(); - sleep(1); - } - }); +absl::Status copy(const std::string &srcBucket, const std::string &srcKey, + const std::string &destBucket, const std::string &destKey) { + return absl::UnimplementedError("NYI"); } -void GEDS::startPubSubStreamThread() { - if (!_config.pubSubEnabled) { - LOG_DEBUG("PubSub streaming thread not enabled."); - return; - } - if (_state == ServiceState::Running) { - _pubSubStreamThread = std::thread([&]() { auto status = _metadataService.subscribeStream(); }); - } else { - LOG_ERROR("Unable to start pub/sub streaming thread."); - } - LOG_DEBUG("PubSub streaming thread enabled."); +absl::Status deleteObject(const std::string &bucket, const std::string &key) { + return absl::UnimplementedError("NYI"); } -absl::Status GEDS::subscribe(const geds::SubscriptionEvent &event) { - GEDS_CHECK_SERVICE_RUNNING - if (!_config.pubSubEnabled) { - return absl::FailedPreconditionError("publish/subscribe is not enabled."); - } - return _metadataService.subscribe(event); +absl::Status deleteObjectPrefix(const std::string &bucket, const std::string &prefix) { + return absl::UnimplementedError("NYI"); } -absl::Status GEDS::unsubscribe(const geds::SubscriptionEvent &event) { - GEDS_CHECK_SERVICE_RUNNING - if (!_config.pubSubEnabled) { - return absl::FailedPreconditionError("publish/subscribe is not enabled."); - } - return _metadataService.unsubscribe(event); +absl::Status registerObjectStoreConfig(const std::string &bucket, const std::string &endpointUrl, + const std::string &accessKey, const std::string &secretKey) { + return absl::UnimplementedError("NYI"); } diff --git a/src/libgeds/GEDS.h b/src/libgeds/GEDS.h index 33768196..851454d5 100644 --- a/src/libgeds/GEDS.h +++ b/src/libgeds/GEDS.h @@ -23,109 +23,26 @@ #include #include -#include "ConcurrentMap.h" -#include "ConcurrentSet.h" -#include "FileTransferService.h" -#include "GEDSConfig.h" #include "GEDSFileHandle.h" #include "GEDSFileStatus.h" -#include "GEDSInternal.h" -#include "GEDSLocalFileHandle.h" -#include "HttpServer.h" -#include "MetadataService.h" -#include "Object.h" -#include "ObjectStoreConfig.h" -#include "Path.h" #include "RWConcurrentObjectAdaptor.h" -#include "S3Endpoint.h" -#include "S3ObjectStores.h" -#include "Server.h" -#include "Statistics.h" -#include "StorageCounter.h" -#include "TcpTransport.h" const char Default_GEDSFolderDelimiter = '/'; class GEDSFile; class GEDS : public std::enable_shared_from_this, utility::RWConcurrentObjectAdaptor { - GEDSConfig _config; - -public: - const GEDSConfig &config() const { return _config; } protected: - /** - * @brief GEDS Server instance that allows file transfers. - */ - geds::Server _server; - - /** - * @brief GEDS Service state. - */ - std::atomic _state{geds::ServiceState::Stopped}; - - /** - * @brief Metadata service. - */ - geds::MetadataService _metadataService; - - /** - * @brief GEDS local directory path. Folder/path-prefix which stores all local GEDS data. - * - */ - const std::string _pathPrefix; - mutable std::atomic _fileNameCounter; - mutable utility::ConcurrentMap _fileNames; - - /** - * @brief URI for Local Host. - */ - std::string _hostURI; - std::string _hostname; - - /** - * @brief Filehandles known to the local GEDS instance. - */ - utility::ConcurrentMap, std::less<>> _fileHandles; - inline utility::Path getPath(const std::string &bucket, const std::string &key) { - return {bucket + "/" + key}; - } - utility::ConcurrentMap> _fileTransfers; - - utility::ConcurrentSet _knownBuckets; - - geds::s3::ObjectStores _objectStores; - - std::shared_ptr _statisticsFilesOpened = - geds::Statistics::createCounter("GEDS: files opened"); - std::shared_ptr _statisticsFilesCreated = - geds::Statistics::createCounter("GEDS: files created"); - - geds::HttpServer _httpServer; - - boost::asio::thread_pool _ioThreadPool; - std::thread _storageMonitoringThread; - void startStorageMonitoringThread(); - - geds::StorageCounter _storageCounters; - geds::StorageCounter _memoryCounters; - - std::thread _pubSubStreamThread; - void startPubSubStreamThread(); + GEDS() = default; public: const std::string uuid; - /** - * @brief GEDS CTOR. Note: This CTOR needs to be wrapped in a SHARED_POINTER! - */ - GEDS(GEDSConfig &&argConfig); - /** * @brief Constructor wrapper which forces a shared_ptr. */ - [[nodiscard]] static std::shared_ptr factory(GEDSConfig config); + [[nodiscard]] static std::shared_ptr factory(); virtual ~GEDS(); @@ -139,35 +56,6 @@ class GEDS : public std::enable_shared_from_this, utility::RWConcurrentObj */ absl::Status stop(); - /** - * @brief Check if the bucket name is allowed. - * In order to ensure compatibility with S3 we make sure that a bucket name - * - Consists of only lower case ASCII characers, numbers, dots and hypens. - * - The bucket name must begin and end with a letter or number. - * - The bucket must be at least 3 characters long. - */ - static absl::Status isValidBucketName(const std::string &bucket); - - /** - * @brief Validate the name of the key. - * - Keys shall not start with `/`. - * - A length of zero is not allowed. - * - Keys are not allowed to end with `/`. - * - `/../` as part of a key is not allowed. - */ - static absl::Status isValidKeyName(const std::string &key); - - /** - * @brief Validate the name of bucket and key. - */ - static absl::Status isValid(const std::string &bucket, const std::string &key); - - /** - * @brief Parse the object name and split it into bucket and key. - */ - static absl::StatusOr> - parseObjectName(const std::string &objectName); - /** * @brief Create object located at bucket/key. * The object is registered with the metadata service once the file is sealed. @@ -293,40 +181,12 @@ class GEDS : public std::enable_shared_from_this, utility::RWConcurrentObj */ absl::Status deleteObjectPrefix(const std::string &bucket, const std::string &prefix); - /** - * @brief Compute the path to the files stored in `_pathPrefix` folder. - */ - std::string getLocalPath(const std::string &bucket, const std::string &key) const; - std::string getLocalPath(const GEDSFile &file) const; - - /** - * @brief Tcp inter-node object transport service. - * - */ - std::shared_ptr _tcpTransport; - /** * @brief Register an object store configuration with GEDS. */ absl::Status registerObjectStoreConfig(const std::string &bucket, const std::string &endpointUrl, const std::string &accessKey, const std::string &secretKey); - - /** - * @brief Sync object store configs. - */ - absl::Status syncObjectStoreConfigs(); - - absl::StatusOr> getS3Endpoint(const std::string &s3Bucket); - absl::StatusOr> - getFileTransferService(const std::string &hostname); - - void relocate(bool force = false); - void relocate(std::vector> &relocatable, bool force = false); - void relocate(std::shared_ptr handle, bool force = false); - - absl::Status subscribe(const geds::SubscriptionEvent &event); - absl::Status unsubscribe(const geds::SubscriptionEvent &event); }; #endif // GEDS_GEDS_H diff --git a/src/libgeds/GEDSAbstractFileHandle.cpp b/src/libgeds/GEDSAbstractFileHandle.cpp deleted file mode 100644 index 12758fa3..00000000 --- a/src/libgeds/GEDSAbstractFileHandle.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "GEDSAbstractFileHandle.h" - -#include "GEDS.h" - -namespace geds::service { -std::string getLocalPath(std::shared_ptr geds, const std::string &bucket, - const std::string &key) { - return geds->getLocalPath(bucket, key); -} - -absl::Status seal(std::shared_ptr geds, GEDSFileHandle &fileHandle, bool update, - size_t size) { - return geds->seal(fileHandle, update, size); -} - -absl::StatusOr> getS3Endpoint(std::shared_ptr geds, - const std::string &bucket) { - return geds->getS3Endpoint(bucket); -} - -} // namespace geds::service diff --git a/src/libgeds/GEDSAbstractFileHandle.h b/src/libgeds/GEDSAbstractFileHandle.h deleted file mode 100644 index a3eea31d..00000000 --- a/src/libgeds/GEDSAbstractFileHandle.h +++ /dev/null @@ -1,222 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Filesystem.h" -#include "GEDSFile.h" -#include "GEDSFileHandle.h" -#include "GEDSS3FileHandle.h" -#include "Logging.h" -#include "MMAPFile.h" -#include "Statistics.h" - -namespace geds::service { -std::string getLocalPath(std::shared_ptr geds, const std::string &bucket, - const std::string &key); -absl::Status seal(std::shared_ptr geds, GEDSFileHandle &fileHandle, bool update, size_t size); -absl::StatusOr> getS3Endpoint(std::shared_ptr geds, - const std::string &bucket); - -} // namespace geds::service - -template class GEDSAbstractFileHandle : public GEDSFileHandle { - bool _isSealed{false}; - - T _file; - - std::shared_ptr _readStatistics; - std::shared_ptr _writeStatistics; - -private: - // Constructors are private to enable `shared_from_this`. - GEDSAbstractFileHandle(std::shared_ptr gedsService, std::string bucketArg, - std::string keyArg, std::optional metadataArg, - std::string pathArg) - : GEDSFileHandle(gedsService, std::move(bucketArg), std::move(keyArg), - std::move(metadataArg)), - _file(T(std::move(pathArg))), _readStatistics(geds::Statistics::createCounter( - "GEDS" + _file.statisticsLabel() + "Handle: bytes read")), - _writeStatistics(geds::Statistics::createCounter("GEDS" + _file.statisticsLabel() + - "Handle: bytes written")) { - static auto counter = - geds::Statistics::createCounter("GEDS" + _file.statisticsLabel() + "Handle: count"); - *counter += 1; - } - -public: - [[nodiscard]] static absl::StatusOr> - factory(std::shared_ptr gedsService, std::string bucketArg, std::string keyArg, - std::optional metadataArg, - std::optional pathArg = std::nullopt) { - try { - auto path = pathArg.has_value() ? pathArg.value() - : geds::service::getLocalPath(gedsService, bucketArg, keyArg); - auto pathDir = std::filesystem::path(path).remove_filename(); - auto dirStatus = geds::filesystem::mkdir(pathDir); - if (!dirStatus.ok()) { - return dirStatus; - } - return std::shared_ptr( - new GEDSAbstractFileHandle(std::move(gedsService), std::move(bucketArg), - std::move(keyArg), std::move(metadataArg), path)); - } catch (const std::runtime_error &e) { - return absl::UnknownError(e.what()); - } - } - - GEDSAbstractFileHandle() = delete; - GEDSAbstractFileHandle(GEDSAbstractFileHandle &) = delete; - GEDSAbstractFileHandle(GEDSAbstractFileHandle &&) = delete; - GEDSAbstractFileHandle &operator=(GEDSAbstractFileHandle &) = delete; - GEDSAbstractFileHandle &operator=(GEDSAbstractFileHandle &&) = delete; - ~GEDSAbstractFileHandle() override = default; - - bool isRelocatable() const override { return true; } - absl::StatusOr size() const override { return _file.size(); } - size_t localStorageSize() const override { return _file.localStorageSize(); } - size_t localMemorySize() const override { return _file.localMemorySize(); } - - bool isWriteable() const override { return true; } - - absl::Status setMetadata(std::optional metadata, bool seal) override { - auto lock = lockFile(); - _metadata = std::move(metadata); - if (seal) { - return this->seal(); - } - return absl::OkStatus(); - } - - absl::StatusOr readBytes(uint8_t *bytes, size_t position, size_t length) override { - auto lock = lockShared(); - auto result = _file.readBytes(bytes, position, length); - if (result.ok()) { - *_readStatistics += *result; - } - return result; - } - - absl::Status writeBytes(const uint8_t *bytes, size_t position, size_t length) override { - auto lock = lockShared(); - auto result = _file.writeBytes(bytes, position, length); - if (result.ok()) { - *_writeStatistics += length; - } - return result; - } - - absl::Status write(std::istream &stream, size_t position, - std::optional lengthOptional) override { - auto lock = lockShared(); - auto result = _file.write(stream, position, lengthOptional); - if (result.ok()) { - *_writeStatistics += *result; - } - return absl::OkStatus(); - } - - absl::Status truncate(size_t targetSize) override { - auto lock = lockExclusive(); - return _file.truncate(targetSize); - } - - absl::Status seal() override { - auto lock = lockFile(); - auto ioLock = lockExclusive(); - size_t currentSize = _file.size(); - absl::Status status = absl::OkStatus(); - // FIXME: Create a GEDS Service mock to skip this abonimation below here. - if (_gedsService != nullptr) { // Allow faking the GEDS Service for unittests. - status = geds::service::seal(_gedsService, *this, _isSealed, currentSize); - } - if (status.ok()) { - _isSealed = true; - } - return status; - } - - void notifyUnused() override { - auto lock = lockFile(); - auto iolock = lockExclusive(); - if (_openCount > 0) { - return; - } - _file.notifyUnused(); - }; - - absl::StatusOr rawFd() const override { - auto lock = lockShared(); - return _file.rawFd(); - } - - absl::StatusOr rawPtr() override { - auto lock = lockShared(); - return _file.rawPtr(); - } - - absl::StatusOr> relocate() override { - auto lock = lockFile(); - auto iolock = lockExclusive(); - if (!isValid()) { - return absl::UnavailableError("The file " + identifier + " is no longer valid!"); - } - LOG_INFO("Relocating ", identifier, " (size: ", _file.size(), ") "); - if (_openCount > 0) { - auto message = "Unable to relocate " + identifier + " reason: The file is still in use."; - LOG_ERROR(message); - return absl::UnavailableError(message); - } - auto s3Endpoint = geds::service::getS3Endpoint(_gedsService, bucket); - if (!s3Endpoint.ok()) { - auto message = - "Unable to relocate " + identifier + " reason: No tier configured for " + bucket; - LOG_ERROR(message); - return absl::UnavailableError(message); - } - - absl::Status s3Put; - auto rawPtr = _file.rawPtr(); - if (rawPtr.ok()) { - s3Put = (*s3Endpoint)->putObject(bucket, key, *rawPtr, _file.size()); - } else { - auto stream = - std::make_shared(_file.path(), std::ios_base::binary | std::ios_base::in); - s3Put = (*s3Endpoint)->putObject(bucket, key, stream, std::make_optional(_file.size())); - } - if (!s3Put.ok()) { - auto message = - "Unable to relocate " + identifier + " to s3: Reason " + std::string{s3Put.message()}; - LOG_ERROR(message); - return absl::UnknownError(message); - } - auto fh = GEDSS3FileHandle::factory(_gedsService, bucket, key, metadata()); - if (!fh.ok()) { - LOG_ERROR("Unable to reopen the relocated file ", identifier, - " on s3:", fh.status().message()); - return fh.status(); - } - auto status = (*fh)->seal(); - if (!status.ok()) { - LOG_ERROR("Unable to seal relocated file: ", status.message()); - (void)(*s3Endpoint)->deleteObject(bucket, key); - return status; - } - // Mark as invalid. - _isValid = false; - return fh; - } -}; diff --git a/src/libgeds/GEDSCachedFileHandle.cpp b/src/libgeds/GEDSCachedFileHandle.cpp deleted file mode 100644 index b7be5820..00000000 --- a/src/libgeds/GEDSCachedFileHandle.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "GEDSCachedFileHandle.h" -#include "GEDSFile.h" -#include "GEDSFileHandle.h" -#include "Logging.h" -#include "Statistics.h" - -const std::string GEDSCachedFileHandle::CacheBlockMarker = {"_$cachedblock$/"}; - -GEDSCachedFileHandle::GEDSCachedFileHandle(std::shared_ptr gedsService, std::string bucketArg, - std::string keyArg, - std::optional metadataArg, - std::shared_ptr remoteFileHandle) - : GEDSFileHandle(gedsService, std::move(bucketArg), std::move(keyArg), std::move(metadataArg)), - _remoteFileHandle(remoteFileHandle), _blockSize(gedsService->config().cacheBlockSize) { - static auto counter = geds::Statistics::createCounter("GEDSCachedFileHandle: count"); - *counter += 1; - - auto fileOpenStatus = _remoteFileHandle->open(); - if (!fileOpenStatus.ok()) { - auto message = "Unable to open GEDSFile from filehandle: " + - std::string{fileOpenStatus.status().message()}; - LOG_ERROR(message); - throw std::runtime_error(message); - } - _remoteFile = std::make_shared(std::move(*fileOpenStatus)); - - _remoteSize = _remoteFile->size(); - _blocks = std::vector>(_remoteSize / _blockSize + 1, nullptr); - _blockMutex = std::vector(_remoteSize / _blockSize + 1); -} - -absl::StatusOr GEDSCachedFileHandle::size() const { return _remoteSize; } - -size_t GEDSCachedFileHandle::localStorageSize() const { - size_t result = 0; - for (size_t idx = 0; idx < _blocks.size(); idx++) { - auto lock = std::lock_guard(_blockMutex[idx]); - if (_blocks[idx].get() == nullptr) { - continue; - } - auto fh = _blocks[idx]->fileHandle(); - result += fh->localStorageSize(); - } - return result; -} - -size_t GEDSCachedFileHandle::localMemorySize() const { - auto lock = lockShared(); - size_t result = 0; - for (size_t idx = 0; idx < _blocks.size(); idx++) { - auto lock = std::lock_guard(_blockMutex[idx]); - if (_blocks[idx].get() == nullptr) { - continue; - } - auto fh = _blocks[idx]->fileHandle(); - result += fh->localMemorySize(); - } - return result; -} -absl::StatusOr GEDSCachedFileHandle::readBytes(uint8_t *bytes, size_t position, - size_t length) { - if (position >= _remoteSize || length == 0) { - return 0; - } - auto lock = lockShared(); - length = std::min(length, _remoteSize - position); - - auto computeBlock = [&](size_t pos) { return pos / _blockSize; }; - auto openBlock = [&](size_t idx) -> absl::StatusOr { - if (_blocks[idx].get() != nullptr) { - return *_blocks[idx]; - } - auto cacheKey = CacheBlockMarker + key + "_" + std::to_string(idx); - auto lock = std::lock_guard(_blockMutex[idx]); - auto exists = _gedsService->open(bucket, cacheKey); - if (exists.ok()) { - _blocks[idx] = std::make_shared(std::move(*exists)); - return *_blocks[idx]; - } - - auto newFile = _gedsService->createAsFileHandle(bucket, cacheKey); - if (!newFile.ok()) { - return newFile.status(); - } - auto copyStatus = _remoteFileHandle->downloadRange(*newFile, _blockSize * idx, _blockSize, 0); - if (!copyStatus.ok()) { - return copyStatus.status(); - } - *_cacheSize += *copyStatus; - - auto f = (*newFile)->open(); - if (!f.ok()) { - return f.status(); - } - *_numCachedBlocks += 1; - _blocks[idx] = std::make_shared(std::move(*(*newFile)->open())); - auto sealStatus = (*newFile)->seal(); - if (!sealStatus.ok()) { - LOG_ERROR("Unable to seal block ", identifier, " with ", idx, ": ", sealStatus.message()); - } - return *_blocks[idx]; - }; - - auto purgeBlock = [&](size_t idx, GEDSFile &file) { - LOG_INFO("PurgeBlock ", file.identifier()); - auto lock = std::lock_guard(_blockMutex[idx]); - if (_blocks[idx].get() == nullptr) { - return; - } - const GEDSFileHandle *e = _blocks[idx]->fileHandle().get(); - const GEDSFileHandle *t = file.fileHandle().get(); - if (e == t) { - *_numPurgedBlocks += 1; - _blocks[idx] = nullptr; - LOG_INFO("About to purge block", file.identifier()); - (void)_gedsService->deleteObject(file.bucket(), file.key()); - LOG_INFO("Purged block ", file.identifier()); - } - }; - - auto startBlock = computeBlock(position); - auto endBlock = computeBlock(position + length); - - const size_t MAX_RETRIES = 1; - size_t count = 0; - for (size_t idx = startBlock; idx <= endBlock; idx++) { - size_t retryCount = 0; - while (true) { - auto fileBlock = openBlock(idx); - if (!fileBlock.ok()) { - return fileBlock.status(); - } - auto file = *fileBlock; - absl::StatusOr copyCount; - size_t expectedCount = std::min(length - count, _blockSize); - copyCount = file.read(bytes + count, - (position + count) % _blockSize, // - expectedCount); - if (copyCount.ok()) { - *_readStatistics += *copyCount; - count += *copyCount; - break; - } - if (retryCount >= MAX_RETRIES) { - return copyCount.status(); - } - LOG_INFO("Unable to download block from ", identifier, - ". Reason: ", copyCount.status().message(), ". Retrying"); - // Purge block and retry. - purgeBlock(idx, file); - retryCount++; - } - } - return count; -} - -absl::Status GEDSCachedFileHandle::seal() { - auto lock = lockFile(); - auto iolock = lockExclusive(); - return _remoteFileHandle->seal(); -} - -absl::StatusOr> GEDSCachedFileHandle::relocate() { - // Cached file handles are purged by default. - auto lock = lockFile(); - auto ioLock = lockExclusive(); - for (size_t idx = 0; idx < _blocks.size(); idx++) { - auto lock = std::lock_guard(_blockMutex[idx]); - if (_blocks[idx].get() == nullptr) { - continue; - } - auto file = _blocks[idx]; - auto bucket = file->bucket(); - auto key = file->key(); - auto fh = file->fileHandle(); - if (fh->localStorageSize()) { - (void)_gedsService->deleteObject(bucket, key); - _blocks[idx] = nullptr; - } - } - return shared_from_this(); -} diff --git a/src/libgeds/GEDSCachedFileHandle.h b/src/libgeds/GEDSCachedFileHandle.h deleted file mode 100644 index 69c7fe85..00000000 --- a/src/libgeds/GEDSCachedFileHandle.h +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_CACHED_FILE_HANDLE_H -#define GEDS_CACHED_FILE_HANDLE_H - -#include -#include -#include -#include - -#include "GEDSFile.h" -#include "MMAPFile.h" -#include "Object.h" - -#include "GEDS.h" - -class GEDSCachedFileHandle : public GEDSFileHandle { - std::shared_ptr _remoteFileHandle; - std::shared_ptr _remoteFile; - - size_t _remoteSize; - size_t _blockSize; - - std::vector> _blocks; - mutable std::vector _blockMutex; - - std::shared_ptr _readStatistics = - geds::Statistics::createCounter("GEDSCachedFileHandle: bytes read"); - std::shared_ptr _cacheSize = - geds::Statistics::createCounter("GEDSCachedFileHandle: local cache size"); - std::shared_ptr _numCachedBlocks = - geds::Statistics::createCounter("GEDSCachedFileHandle: number of locally cached blocks"); - std::shared_ptr _numPurgedBlocks = - geds::Statistics::createCounter("GEDSCachedFileHandle: number of purged blocks"); - // private: -public: - GEDSCachedFileHandle(std::shared_ptr gedsService, std::string bucketArg, std::string keyArg, - std::optional metadataArg, - std::shared_ptr remoteFile); - -public: - static const std::string CacheBlockMarker; - - template - [[nodiscard]] static absl::StatusOr> - factory(std::shared_ptr gedsService, const std::string &bucket, const std::string &key, - std::optional metadataArg) { - auto remoteFH = TRemote::factory(gedsService, bucket, key, metadataArg); - if (!remoteFH.ok()) { - return remoteFH.status(); - } - return std::shared_ptr( - new GEDSCachedFileHandle(gedsService, bucket, key, std::move(metadataArg), *remoteFH)); - } - - template - [[nodiscard]] static absl::StatusOr> - factory(std::shared_ptr gedsService, const geds::Object &object) { - auto remoteFH = TRemote::factory(gedsService, object); - if (!remoteFH.ok()) { - return remoteFH.status(); - } - return std::shared_ptr(new GEDSCachedFileHandle( - gedsService, object.id.bucket, object.id.key, object.info.metadata, *remoteFH)); - } - - GEDSCachedFileHandle() = delete; - GEDSCachedFileHandle(const GEDSCachedFileHandle &) = delete; - GEDSCachedFileHandle(GEDSCachedFileHandle &&) = delete; - ~GEDSCachedFileHandle() override = default; - GEDSCachedFileHandle &operator=(const GEDSCachedFileHandle &) = delete; - GEDSCachedFileHandle &operator=(GEDSCachedFileHandle &&) = delete; - - bool isRelocatable() const override { return true; } - absl::StatusOr size() const override; - size_t localStorageSize() const override; - size_t localMemorySize() const override; - - absl::StatusOr readBytes(uint8_t *bytes, size_t position, size_t length) override; - - absl::Status seal() override; - - absl::StatusOr> relocate() override; -}; - -#endif diff --git a/src/libgeds/GEDSConfig.cpp b/src/libgeds/GEDSConfig.cpp deleted file mode 100644 index 1b321546..00000000 --- a/src/libgeds/GEDSConfig.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/** - * Copyright 2023- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "GEDSConfig.h" - -#include "Logging.h" - -absl::Status GEDSConfig::set(const std::string &key, const std::string &value) { - LOG_DEBUG("Trying to set '", key, "' to '", value, "'"); - if (key == "listen_address") { - listenAddress = value; - } else if (key == "hostname") { - hostname = value == "" ? std::nullopt : std::make_optional(value); - } else if (key == "local_storage_path") { - localStoragePath = value; - } else if (key == "pub_sub_enabled" && value == "true") { - pubSubEnabled = true; - } else { - LOG_ERROR("Configuration " + key + " not supported (type: string)."); - return absl::NotFoundError("Key " + key + " not found."); - } - LOG_INFO("Set '", key, "' to '", value, "'"); - return absl::OkStatus(); -} - -absl::Status GEDSConfig::set(const std::string &key, size_t value) { - LOG_DEBUG("Trying to set '", key, "' to '", value, "'"); - if (value == 0) { - return absl::InvalidArgumentError("Value " + std::to_string(value) + " is out of range for " + - key); - } - if (key == "port") { - if (value > INT16_MAX) { - return absl::InvalidArgumentError("Value " + std::to_string(value) + " is out of range for " + - key); - } - port = value; - } else if (key == "http_server_port") { - if (value > INT16_MAX) { - return absl::InvalidArgumentError("Value " + std::to_string(value) + " is out of range for " + - key); - } - portHttpServer = value; - } else if (key == "cache_block_size") { - cacheBlockSize = value; - } else if (key == "io_thread_pool_size") { - io_thread_pool_size = value; - } else if (key == "available_local_storage") { - available_local_storage = value; - } else if (key == "available_local_memory") { - available_local_memory = value; - } else if (key == "pub_sub_enabled") { - pubSubEnabled = value != 0; - } else if (key == "cache_objects_from_s3") { - cache_objects_from_s3 = value != 0; - } else if (key == "force_relocation_when_stopping") { - force_relocation_when_stopping = value != 0; - } else { - LOG_ERROR("Configuration " + key + " not supported (type: signed/unsigned integer)."); - return absl::NotFoundError("Key " + key + " not found."); - } - LOG_INFO("Set '", key, "' to '", value, "'"); - return absl::OkStatus(); -} - -absl::Status GEDSConfig::set(const std::string &key, int64_t value) { - if (value < 0) { - return absl::InvalidArgumentError("Value " + std::to_string(value) + " is out of range for " + - key); - } - return set(key, (size_t)value); -} - -absl::Status GEDSConfig::set(const std::string &key, double value) { - if (key == "storage_spilling_fraction") { - storage_spilling_fraction = value; - return absl::OkStatus(); - } - LOG_ERROR("Configuration " + key + " not supported (type: double)."); - return absl::NotFoundError("Key " + key + " not found."); -} - -absl::StatusOr GEDSConfig::getString(const std::string &key) const { - LOG_INFO("Get ", key, " as string"); - if (key == "listen_address") { - return listenAddress; - } - if (key == "hostname") { - return hostname.value_or(""); - } - if (key == "local_storage_path") { - return localStoragePath; - } - LOG_ERROR("Configuration " + key + " not supported (type: string)."); - return absl::NotFoundError("Key " + key + " not found."); -} - -absl::StatusOr GEDSConfig::getUnsignedInt(const std::string &key) const { - LOG_INFO("Get ", key, " as integer"); - if (key == "port") { - return port; - } - if (key == "http_server_port") { - return portHttpServer; - } - if (key == "cache_block_size") { - return cacheBlockSize; - } - if (key == "io_thread_pool_size") { - return io_thread_pool_size; - } - if (key == "available_local_storage") { - return available_local_storage; - } - if (key == "available_local_memory") { - return available_local_memory; - } - LOG_ERROR("Configuration " + key + " not supported (type: signed/unsigned integer)."); - return absl::NotFoundError("Key " + key + " not found."); -} - -absl::StatusOr GEDSConfig::getSignedInt(const std::string &key) const { - auto value = getUnsignedInt(key); - if (value.ok()) { - return (int64_t)*value; - } - return value.status(); -} - -absl::StatusOr GEDSConfig::getDouble(const std::string &key) const { - if (key == "storage_spilling_fraction") { - return storage_spilling_fraction; - } - LOG_ERROR("Configuration " + key + " not supported (type: double)."); - return absl::NotFoundError("Key " + key + " (double) not found."); -} diff --git a/src/libgeds/GEDSConfig.h b/src/libgeds/GEDSConfig.h deleted file mode 100644 index 84ac7bc7..00000000 --- a/src/libgeds/GEDSConfig.h +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Copyright 2023- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - -#include "Ports.h" - -struct GEDSConfig { - /** - * @brief The hostname of the metadata service/ - * - * Format: `{HOSTNAME/IP}:{PORT}` - */ - std::string metadataServiceAddress; - - /** - * @brief GEDS listening address. - */ - std::string listenAddress = "0.0.0.0"; - - /** - * @brief Hostname/IP used by GEDS to announce itself. - * - * If null, the hostname is determined by querying the metadata service. - */ - std::optional hostname = std::nullopt; - - /** - * @brief GEDS listening port. - */ - uint16_t port = defaultGEDSPort; - - /** - * @brief GEDS web server port. - * - * E.g. prometheus endpoint. - */ - uint16_t portHttpServer = defaultPrometheusPort; - - /** - * @brief Storage path for files create by GEDS. - * - * The Postfix XXXXXX indicates that the path can be randomized. - */ - std::string localStoragePath = "/tmp/GEDS_XXXXXX"; - - /** - * @brief Block size used for caching. - * - * Hadoop S3A uses 32MB - thus we set the same value. - */ - size_t cacheBlockSize = 32 * 1024 * 1024; - - /** - * @brief Size of I/O thread pool. - */ - size_t io_thread_pool_size = std::max(std::thread::hardware_concurrency() / 2, (uint32_t)8); - - /** - * @brief Available local storage. - */ - size_t available_local_storage = 100 * 1024 * 1024 * (size_t)1024; - - size_t available_local_memory = 16 * 1024 * 1024 * (size_t)1024; - - /** - * @brief Publish/Subscribe is enabled. - */ - bool pubSubEnabled = false; - - /** - * @brief Cache objects located in S3. - */ - bool cache_objects_from_s3 = false; - - /** - * @brief Force relocation when stopping. - */ - bool force_relocation_when_stopping = false; - - /** - * @brief Fraction of the storage where GEDS should start spilling. - */ - double storage_spilling_fraction = 0.7; - - GEDSConfig(std::string metadataServiceAddressArg) - : metadataServiceAddress(std::move(metadataServiceAddressArg)) { - if (available_local_storage <= 4 * 1024 * 1024 * (size_t)1024) { - io_thread_pool_size = std::min(io_thread_pool_size, 6); - storage_spilling_fraction = 0.9; - } - } - - absl::Status set(const std::string &key, const std::string &value); - absl::Status set(const std::string &key, size_t value); - absl::Status set(const std::string &key, int64_t value); - absl::Status set(const std::string &key, double value); - - absl::StatusOr getString(const std::string &key) const; - absl::StatusOr getUnsignedInt(const std::string &key) const; - absl::StatusOr getSignedInt(const std::string &key) const; - absl::StatusOr getDouble(const std::string &key) const; -}; diff --git a/src/libgeds/GEDSInternal.cpp b/src/libgeds/GEDSInternal.cpp deleted file mode 100644 index 36d09755..00000000 --- a/src/libgeds/GEDSInternal.cpp +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "GEDSInternal.h" - -#include - -namespace geds { - -std::string to_string(ConnectionState state) { return std::string{magic_enum::enum_name(state)}; } - -std::string to_string(ServiceState state) { return std::string{magic_enum::enum_name(state)}; } - -} // namespace geds diff --git a/src/libgeds/GEDSInternal.h b/src/libgeds/GEDSInternal.h deleted file mode 100644 index 5de74608..00000000 --- a/src/libgeds/GEDSInternal.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_GEDSINTERNAL_H -#define GEDS_GEDSINTERNAL_H - -#include -#include - -namespace geds { -enum class ConnectionState : int { Disconnected = 0, Connected, Unknown }; -enum class ServiceState : int { Stopped = 0, Running, Unknown }; -enum class FileMode : int { ReadWrite = 0, ReadOnly = 1 }; - -std::string to_string(geds::ConnectionState state); - -std::string to_string(geds::ServiceState state); - -inline auto grpcDefaultDeadline() { - return std::chrono::system_clock::now() + std::chrono::seconds(10); // NOLINT -} - -} // namespace geds - -#endif // GEDS_GEDSINTERNAL_H diff --git a/src/libgeds/GEDSLocalFileHandle.h b/src/libgeds/GEDSLocalFileHandle.h deleted file mode 100644 index 67902c17..00000000 --- a/src/libgeds/GEDSLocalFileHandle.h +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "GEDSAbstractFileHandle.h" -#include "LocalFile.h" -#include "MMAPFile.h" - -using GEDSLocalFileHandle = GEDSAbstractFileHandle; -// using GEDSLocalFileHandle = GEDSAbstractFileHandle; diff --git a/src/libgeds/GEDSMMapFileHandle.h b/src/libgeds/GEDSMMapFileHandle.h deleted file mode 100644 index ff5a6fff..00000000 --- a/src/libgeds/GEDSMMapFileHandle.h +++ /dev/null @@ -1,11 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include "GEDSAbstractFileHandle.h" -#include "MMAPFile.h" - -using GEDSMMapFileHandle = GEDSAbstractFileHandle; diff --git a/src/libgeds/GEDSProtocol.cpp b/src/libgeds/GEDSProtocol.cpp deleted file mode 100644 index c8663681..00000000 --- a/src/libgeds/GEDSProtocol.cpp +++ /dev/null @@ -1,12 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "GEDSProtocol.h" - -#include - -std::string geds::to_string(geds::Protocol protocol) { - return std::string{magic_enum::enum_name(protocol)}; -} diff --git a/src/libgeds/GEDSProtocol.h b/src/libgeds/GEDSProtocol.h deleted file mode 100644 index 8e1c71eb..00000000 --- a/src/libgeds/GEDSProtocol.h +++ /dev/null @@ -1,17 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_GEDSPROTOCOL_H -#define GEDS_GEDSPROTOCOL_H - -#include - -namespace geds { -enum class Protocol { socket, rdma, s3a, cos }; -std::string to_string(geds::Protocol protocol); - -} // namespace geds - -#endif // GEDS_GEDSPROTOCOL_H diff --git a/src/libgeds/GEDSRelocatableFileHandle.cpp b/src/libgeds/GEDSRelocatableFileHandle.cpp deleted file mode 100644 index 4f3ea564..00000000 --- a/src/libgeds/GEDSRelocatableFileHandle.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/** - * Copyright 2023- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "GEDSRelocatableFileHandle.h" - -#include "GEDS.h" -#include "GEDSFile.h" -#include "Logging.h" - -std::shared_ptr -GEDSRelocatableFileHandle::factory(std::shared_ptr gedsService, - std::shared_ptr wrapped) { - return std::shared_ptr(new GEDSRelocatableFileHandle(gedsService, wrapped)); -} - -bool GEDSRelocatableFileHandle::isRelocatable() const { return true; } - -absl::StatusOr GEDSRelocatableFileHandle::size() const { - auto lock = lockShared(); - return _fileHandle->size(); -} - -size_t GEDSRelocatableFileHandle::localStorageSize() const { - auto lock = lockShared(); - return _fileHandle->localStorageSize(); -} - -size_t GEDSRelocatableFileHandle::localMemorySize() const { - auto lock = lockShared(); - return _fileHandle->localMemorySize(); -} - -bool GEDSRelocatableFileHandle::isWriteable() const { - auto lock = lockShared(); - // ToDo: Download relocated file again to make it writeable. - return _fileHandle->isWriteable(); -} - -std::optional GEDSRelocatableFileHandle::metadata() const { - auto lock = lockFile(); - return _fileHandle->metadata(); -}; - -absl::Status GEDSRelocatableFileHandle::setMetadata(std::optional metadata, - bool seal) { - auto lock = lockFile(); - return _fileHandle->setMetadata(metadata, seal); -} - -absl::StatusOr GEDSRelocatableFileHandle::readBytes(uint8_t *bytes, size_t position, - size_t length) { - GEDSFileHandle *oldFh; - { - auto lock = lockShared(); - oldFh = _fileHandle.get(); - auto success = _fileHandle->readBytes(bytes, position, length); - if (success.ok()) { - return *success; - } - } - // Reopen in case of read failures. - { - auto lock = lockFile(); - auto ioLock = lockExclusive(); - if (_fileHandle.get() != oldFh) { - // The file has already been reopened. - return _fileHandle->readBytes(bytes, position, length); - } - LOG_INFO("Reopening file ", identifier); - // Force lookup in MDS. - auto newFh = _gedsService->reopenFileHandle(bucket, key, true); - if (!newFh.ok()) { - LOG_INFO("Unable to reopen file: ", identifier, " reason: ", newFh.status().message()); - return newFh.status(); - } - // LOG_INFO("Reopened file", identifier); - _fileHandle = *newFh; - return _fileHandle->readBytes(bytes, position, length); - } -} - -absl::Status GEDSRelocatableFileHandle::writeBytes(const uint8_t *bytes, size_t position, - size_t length) { - auto lock = lockShared(); - return _fileHandle->writeBytes(bytes, position, length); -} - -absl::Status GEDSRelocatableFileHandle::write(std::istream &stream, size_t position, - std::optional lengthOptional) { - auto lock = lockShared(); - return _fileHandle->write(stream, position, lengthOptional); -} - -absl::Status GEDSRelocatableFileHandle::truncate(size_t targetSize) { - auto lock = lockExclusive(); - return _fileHandle->truncate(targetSize); -} - -absl::Status GEDSRelocatableFileHandle::seal() { - auto lock = lockExclusive(); - return _fileHandle->seal(); -} - -void GEDSRelocatableFileHandle::notifyUnused() { - auto lock = lockFile(); - auto lockIo = lockExclusive(); - return _fileHandle->notifyUnused(); -}; - -absl::StatusOr GEDSRelocatableFileHandle::rawFd() const { - auto lock = lockShared(); - return _fileHandle->rawFd(); -} - -absl::StatusOr> GEDSRelocatableFileHandle::relocate() { - auto lock = lockFile(); - auto lockIo = lockExclusive(); - - auto newFh = _fileHandle->relocate(); - if (newFh.ok()) { - _fileHandle = *newFh; - } - return shared_from_this(); -} diff --git a/src/libgeds/GEDSRelocatableFileHandle.h b/src/libgeds/GEDSRelocatableFileHandle.h deleted file mode 100644 index bd3d12df..00000000 --- a/src/libgeds/GEDSRelocatableFileHandle.h +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Copyright 2023- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "GEDSFileHandle.h" - -/** - * @brief A file handle that supports relocation of objects. - */ -class GEDSRelocatableFileHandle : public GEDSFileHandle { - - std::atomic _retrying{false}; - - std::shared_ptr _fileHandle; - mutable std::shared_mutex _fileHandleMutex; - -private: - GEDSRelocatableFileHandle(std::shared_ptr gedsService, - std::shared_ptr fileHandle) - : GEDSFileHandle(gedsService, fileHandle->bucket, fileHandle->key, std::nullopt), - _fileHandle(fileHandle) {} - -public: - GEDSRelocatableFileHandle() = delete; - GEDSRelocatableFileHandle(GEDSRelocatableFileHandle &) = delete; - GEDSRelocatableFileHandle(GEDSRelocatableFileHandle &&) = delete; - GEDSRelocatableFileHandle &operator=(GEDSRelocatableFileHandle &) = delete; - GEDSRelocatableFileHandle &operator=(GEDSRelocatableFileHandle &&) = delete; - ~GEDSRelocatableFileHandle() override = default; - - [[nodiscard]] static std::shared_ptr - factory(std::shared_ptr gedsService, std::shared_ptr wrapped); - - bool isRelocatable() const override; - - absl::StatusOr size() const override; - - size_t localStorageSize() const override; - - size_t localMemorySize() const override; - - bool isWriteable() const override; - - std::optional metadata() const override; - - absl::Status setMetadata(std::optional metadata, bool seal) override; - - absl::StatusOr readBytes(uint8_t *bytes, size_t position, size_t length) override; - - absl::Status writeBytes(const uint8_t *bytes, size_t position, size_t length) override; - - absl::Status write(std::istream &stream, size_t position, - std::optional lengthOptional) override; - - absl::Status truncate(size_t targetSize) override; - - absl::Status seal() override; - - void notifyUnused() override; - - absl::StatusOr rawFd() const override; - - absl::StatusOr> relocate() override; -}; diff --git a/src/libgeds/GEDSS3FileHandle.cpp b/src/libgeds/GEDSS3FileHandle.cpp deleted file mode 100644 index 0aecf14b..00000000 --- a/src/libgeds/GEDSS3FileHandle.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "GEDS.h" -#include "GEDSFileHandle.h" -#include "GEDSS3FileHandle.h" -#include "Logging.h" - -GEDSS3FileHandle::GEDSS3FileHandle(std::shared_ptr gedsService, - std::shared_ptr s3Endpoint, - const std::string &bucketArg, const std::string &keyArg, - std::optional metadataArg, - const std::string &s3BucketArg, const std::string &s3KeyArg, - std::optional fileSize) - : GEDSFileHandle(gedsService, bucketArg, keyArg, std::move(metadataArg)), s3Bucket(s3BucketArg), - s3Key(s3KeyArg), location("s3://" + s3Bucket + "/" + s3Key), _s3Endpoint(s3Endpoint) { - static auto counter = geds::Statistics::createCounter("GEDSS3FileHandle: count"); - *counter += 1; - if (fileSize.has_value()) { - _size = *fileSize; - } else { - auto sizeStatus = _s3Endpoint->fileStatus(s3Bucket, s3Key); - if (!sizeStatus.ok()) { - throw sizeStatus.status(); - } - _size = sizeStatus->size; - } -} - -absl::StatusOr> -GEDSS3FileHandle::factory(std::shared_ptr gedsService, const geds::Object &object) { - const auto &bucket = object.id.bucket; - const auto &key = object.id.key; - const auto &location = object.info.location; - const std::string_view prefix{"s3://"}; - - if (location.compare(0, prefix.size(), prefix) != 0) { - return absl::UnknownError("Object location has invalid prefix for S3 FileHandle: " + location + - " expected '" + std::string{prefix} + "' as prefix."); - } - auto splitpos = location.find('/', prefix.size()); - if (splitpos == std::string::npos) { - return absl::UnknownError(location + " invalid! Expected format: " + std::string{prefix} + - "bucket/path"); - } - auto s3Bucket = location.substr(prefix.size(), splitpos - prefix.size()); - auto s3Key = location.substr(splitpos + 1); - - auto s3Endpoint = gedsService->getS3Endpoint(s3Bucket); - if (!s3Endpoint.ok()) { - return s3Endpoint.status(); - } - auto exists = (*s3Endpoint)->fileStatus(s3Bucket, s3Key); - if (!exists.ok()) { - return exists.status(); - } - try { - return std::shared_ptr(new GEDSS3FileHandle( - gedsService, s3Endpoint.value(), bucket, key, object.info.metadata, s3Bucket, s3Key)); - } catch (absl::Status &err) { - return err; - } -} - -absl::StatusOr> -GEDSS3FileHandle::factory(std::shared_ptr gedsService, const std::string &bucket, - const std::string &key, std::optional metadataArg) { - auto s3Endpoint = gedsService->getS3Endpoint(bucket); - if (!s3Endpoint.ok()) { - return s3Endpoint.status(); - } - auto exists = (*s3Endpoint)->fileStatus(bucket, key); - if (!exists.ok()) { - return exists.status(); - } - try { - return std::shared_ptr(new GEDSS3FileHandle( - gedsService, s3Endpoint.value(), bucket, key, std::move(metadataArg), bucket, key)); - } catch (absl::Status &err) { - return err; - } -} - -absl::StatusOr GEDSS3FileHandle::size() const { return _size; } - -absl::StatusOr GEDSS3FileHandle::readBytes(uint8_t *bytes, size_t position, size_t length) { - if (!_isValid) { - return absl::NotFoundError("The file is no longer valid!"); - } - auto lock = lockShared(); - auto status = _s3Endpoint->readBytes(s3Bucket, s3Key, bytes, position, length); - if (!status.ok() || status.status().code() == absl::StatusCode::kNotFound) { - auto flock = lockFile(); - _isValid = false; - } - *_readStatistics += status.value_or(0); - return status; -} - -absl::StatusOr GEDSS3FileHandle::downloadRange(std::shared_ptr destination, - size_t srcPosition, size_t length, - size_t destPosition) { - std::stringstream outputStream; - auto count = _s3Endpoint->read(bucket, key, outputStream, srcPosition, length); - if (!count.ok()) { - return count.status(); - } - *_readStatistics += *count; - auto writeStatus = destination->write(outputStream, destPosition, *count); - if (!writeStatus.ok()) { - return writeStatus; - } - return *count; -} - -absl::Status GEDSS3FileHandle::seal() { - auto lock = lockFile(); - auto ioLock = lockExclusive(); - return _gedsService->seal(*this, false, _size, location); -} diff --git a/src/libgeds/GEDSS3FileHandle.h b/src/libgeds/GEDSS3FileHandle.h deleted file mode 100644 index 97e6f527..00000000 --- a/src/libgeds/GEDSS3FileHandle.h +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_S3_FILE_HANDLE_H -#define GEDS_S3_FILE_HANDLE_H - -#include "GEDSFileHandle.h" -#include "Object.h" -#include "S3Endpoint.h" -#include "Statistics.h" -#include - -class GEDSS3FileHandle : public GEDSFileHandle { -public: - const std::string s3Bucket; - const std::string s3Key; - const std::string location; - -protected: - const std::shared_ptr _s3Endpoint; - size_t _size; - std::shared_ptr _readStatistics = - geds::Statistics::createCounter("GEDSS3FileHandle: bytes read"); - -private: - GEDSS3FileHandle(std::shared_ptr gedsService, - std::shared_ptr objectStore, const std::string &bucket, - const std::string &key, std::optional metadataArg, - const std::string &s3Bucket, const std::string &s3Key, - std::optional fileSize = std::nullopt); - -public: - ~GEDSS3FileHandle() override = default; - - [[nodiscard]] static absl::StatusOr> - factory(std::shared_ptr gedsService, const geds::Object &object); - - [[nodiscard]] static absl::StatusOr> - factory(std::shared_ptr gedsService, const std::string &bucket, const std::string &key, - std::optional metadataArg); - - absl::StatusOr size() const override; - - absl::StatusOr readBytes(uint8_t *bytes, size_t position, size_t length) override; - - absl::StatusOr downloadRange(std::shared_ptr destination, - size_t srcPosition, size_t length, - size_t destPosition) override; - - absl::Status seal() override; -}; - -#endif diff --git a/src/libgeds/GEDSService.cpp b/src/libgeds/GEDSService.cpp deleted file mode 100644 index 00d37f9d..00000000 --- a/src/libgeds/GEDSService.cpp +++ /dev/null @@ -1,8 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "GEDSService.h" - -GEDSService::GEDSService(geds::Protocol protocol) : _protocol(protocol) {} diff --git a/src/libgeds/GEDSService.h b/src/libgeds/GEDSService.h deleted file mode 100644 index 437e8311..00000000 --- a/src/libgeds/GEDSService.h +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_GEDSSERVICE_H -#define GEDS_GEDSSERVICE_H - -#include "GEDSProtocol.h" - -class GEDSService { -protected: - const geds::Protocol _protocol; - -public: - GEDSService(geds::Protocol protocol); - virtual ~GEDSService() = default; - - virtual std::string remoteObjectUrl(const std::string &bucket, const std::string &key) = 0; -}; - -#endif // GEDS_GEDSSERVICE_H diff --git a/src/libgeds/HttpServer.cpp b/src/libgeds/HttpServer.cpp deleted file mode 100644 index 1e4e66da..00000000 --- a/src/libgeds/HttpServer.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "HttpServer.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "HttpSession.h" -#include "Logging.h" - -namespace geds { - -HttpServer::HttpServer(uint16_t port) : _port(port) {} - -absl::Status HttpServer::start() { - if (_acceptor != nullptr) { - return absl::UnknownError("The server is already running!"); - } - try { - auto host = boost::asio::ip::make_address("0.0.0.0"); - _acceptor = std::unique_ptr( - new boost::asio::ip::tcp::acceptor(_ioContext, {host, _port})); - _thread = std::thread([&] { - accept(); - _ioContext.run(); - }); - } catch (boost::exception &e) { - // Workaround until GEDS properly supports multiple processes. - auto diag = boost::diagnostic_information(e, false); - return absl::InternalError("Unable to start webserver: " + diag); - } - return absl::OkStatus(); -} - -void HttpServer::stop() { - _ioContext.stop(); - _acceptor->close(); - _thread.join(); -} - -void HttpServer::accept() { - _acceptor->async_accept(boost::asio::make_strand(_ioContext), - [&](boost::beast::error_code ec, boost::asio::ip::tcp::socket socket) { - if (ec) { - LOG_ERROR("Unable to accept ", ec.message(), " ABORT."); - return; - } - std::make_shared(std::move(socket))->start(); - accept(); - }); -} - -} // namespace geds diff --git a/src/libgeds/HttpServer.h b/src/libgeds/HttpServer.h deleted file mode 100644 index a6a160a6..00000000 --- a/src/libgeds/HttpServer.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ -#pragma once - -#include "HttpSession.h" - -#include -#include - -#include -#include - -namespace geds { - -class HttpServer { - uint16_t _port; - - boost::asio::io_context _ioContext{1}; - std::unique_ptr _acceptor = nullptr; - std::thread _thread; - -public: - HttpServer(uint16_t port); - absl::Status start(); - void stop(); - -private: - void accept(); -}; - -} // namespace geds diff --git a/src/libgeds/HttpSession.cpp b/src/libgeds/HttpSession.cpp deleted file mode 100644 index 03cbf6da..00000000 --- a/src/libgeds/HttpSession.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "HttpSession.h" - -#include "Logging.h" -#include "Statistics.h" -#include - -namespace geds { - -HttpSession::HttpSession(boost::asio::ip::tcp::socket &&socket) : _stream(std::move(socket)) {} - -HttpSession::~HttpSession() { close(); } - -void HttpSession::start() { - LOG_DEBUG("Start connection"); - auto self = shared_from_this(); - boost::asio::dispatch(_stream.get_executor(), - boost::beast::bind_front_handler(&HttpSession::awaitRequest, self)); -} - -void HttpSession::awaitRequest() { - auto self = shared_from_this(); - _request = {}; - _stream.expires_after(std::chrono::seconds(10)); - - boost::beast::http::async_read( - _stream, _buffer, _request, - [self](boost::beast::error_code ec, std::size_t /* bytes_transferred */) { - if (ec == boost::beast::http::error::end_of_stream) { - return; - } - if (ec) { - LOG_ERROR("Failed reading stream", ec.message()); - return; - } - self->handleRequest(); - }); -} - -void HttpSession::prepareHtmlReply() { - _response.result(boost::beast::http::status::ok); - _response.set(boost::beast::http::field::server, BOOST_BEAST_VERSION_STRING); - _response.set(boost::beast::http::field::content_type, "text/html"); - _response.keep_alive(_request.keep_alive()); - - boost::beast::ostream(_response.body()) << "" - << "" - << "GEDS Service" - << "" - << "" - << "
"
-                                          << "GEDS Service"
-                                          << "
" - << "" << std::endl; - handleWrite(); -} - -void HttpSession::handleRequest() { - if (_request.method() != boost::beast::http::verb::get) { - return prepareError(boost::beast::http::status::bad_request, "Invalid method."); - } - if (_request.target().empty() || _request.target()[0] != '/') { - return prepareError(boost::beast::http::status::bad_request, "Invalid path."); - } - - if (_request.target() == "/") { - return prepareHtmlReply(); - } - if (_request.target() == "/metrics") { - return prepareMetricsReply(); - } - - return prepareError(boost::beast::http::status::not_found, "Invalid path"); -} - -void HttpSession::prepareMetricsReply() { - _response.result(boost::beast::http::status::ok); - _response.set(boost::beast::http::field::server, BOOST_BEAST_VERSION_STRING); - _response.set(boost::beast::http::field::content_type, "plain/text"); - _response.keep_alive(_request.keep_alive()); - - std::stringstream stream; - Statistics::get().prometheusMetrics(stream); - boost::beast::ostream(_response.body()) << stream.str(); - handleWrite(); -} - -void HttpSession::prepareError(boost::beast::http::status status, std::string message) { - _response.result(status); - _response.set(boost::beast::http::field::server, BOOST_BEAST_VERSION_STRING); - _response.set(boost::beast::http::field::content_type, "text/html"); - _response.keep_alive(_request.keep_alive()); - boost::beast::ostream(_response.body()) << message; - - return handleWrite(); -} - -void HttpSession::handleWrite() { - auto self = shared_from_this(); - _response.content_length(_response.body().size()); - - boost::beast::http::async_write( - _stream, _response, - [self](boost::beast::error_code ec, std::size_t /* unused bytesTransferred */) { - if (ec) { - LOG_ERROR("Error ", ec.message()); - return; - } - if (self->_request.keep_alive()) { - self->awaitRequest(); - } - self->_buffer.clear(); - }); -} - -void HttpSession::close() { - LOG_DEBUG("Closing connection"); - - boost::beast::error_code ec; - _stream.socket().shutdown(boost::asio::ip::tcp::socket::shutdown_send, ec); - _stream.socket().close(); -} - -} // namespace geds diff --git a/src/libgeds/HttpSession.h b/src/libgeds/HttpSession.h deleted file mode 100644 index b6d2d3cc..00000000 --- a/src/libgeds/HttpSession.h +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace geds { - -class HttpSession : public std::enable_shared_from_this { - boost::beast::tcp_stream _stream; - boost::beast::flat_buffer _buffer{4096}; - boost::beast::http::request _request; - boost::beast::http::response _response; - -public: - HttpSession(boost::asio::ip::tcp::socket &&socket); - ~HttpSession(); - void start(); - - void awaitRequest(); - void handleRequest(); - void prepareHtmlReply(); - void prepareMetricsReply(); - void prepareError(boost::beast::http::status status, std::string message); - void handleWrite(); - - void close(); -}; -} // namespace geds diff --git a/src/libgeds/LocalFile.cpp b/src/libgeds/LocalFile.cpp deleted file mode 100644 index 58f55352..00000000 --- a/src/libgeds/LocalFile.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "LocalFile.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Filesystem.h" -#include "Logging.h" - -#define CHECK_FILE_OPEN \ - if (_fd < 0) { \ - return absl::UnavailableError("The file at " + _path + " is not open!"); \ - } - -namespace geds::filesystem { -LocalFile::LocalFile(std::string pathArg, bool overwrite) : _path(std::move(pathArg)) { - auto mode = O_RDWR | O_CREAT; - if (overwrite) { - mode |= O_TRUNC; - } - - // NOLINTNEXTLINE - _fd = ::open(_path.c_str(), mode, S_IRUSR | S_IWUSR); - if (_fd < 0) { - int error = errno; - auto message = "Unable to open " + _path + ". Reason: " + strerror(error); - LOG_ERROR(message); - throw std::runtime_error{message}; - } - - struct stat statBuf {}; - if (fstat(_fd, &statBuf) != 0) { - int error = errno; - auto message = "Fstat on " + _path + " reported: " + strerror(error); - LOG_ERROR(message); - throw std::runtime_error{message}; - } - _size = statBuf.st_size; -} - -LocalFile::~LocalFile() { - if (_fd >= 0) { - (void)::close(_fd); - _fd = -1; - - auto removeStatus = removeFile(_path); - if (!removeStatus.ok()) { - LOG_ERROR("Unable to delete ", _path, " reason: ", removeStatus.message()); - } - } -} - -void LocalFile::notifyUnused() { - // NOOP. -} - -absl::Status LocalFile::fsync() { - CHECK_FILE_OPEN - - int e = 0; - do { - e = ::fsync(_fd); - } while (e != 0 && errno == EINTR); - if (e != 0) { - int err = errno; - return absl::UnknownError("Unable to fsync " + _path + ": " + strerror(err)); - } - return absl::OkStatus(); -} - -absl::StatusOr LocalFile::fileSize() const { - CHECK_FILE_OPEN - - return _size; -} - -absl::StatusOr LocalFile::rawFd() const { - CHECK_FILE_OPEN - return _fd; -} - -absl::StatusOr LocalFile::rawPtr() const { - return absl::UnavailableError("RawPtr is not supported for LocalFile."); -} - -absl::StatusOr LocalFile::readBytes(uint8_t *bytes, size_t position, size_t length) { - if (position >= INT64_MAX) { - return absl::FailedPreconditionError("Stream positions > " + std::to_string(INT64_MAX) + - " are not supported!"); - } - if (length == 0) { - return 0; - } - - if (position > _size) { - return 0; - } - length = std::min(length, _size - position); - if (length == 0) { - return 0; - } - - size_t offset = 0; - while (offset < length) { - size_t count = length - offset; - - // Truncate length to conform to POSIX API: - // See https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html - if (count > SSIZE_MAX) { - count = SSIZE_MAX; - } - - // Loop and check for EINTR. - ssize_t numBytes = 0; - do { - numBytes = ::pread64(_fd, &bytes[offset], count, position + offset); - } while (numBytes == -1 && errno == EINTR); - - // Error is unrecoverable. - if (numBytes < 0) { - int err = errno; - auto errorMessage = "Error reading " + _path + ": " + strerror(err); - LOG_ERROR(errorMessage); - return absl::UnknownError(errorMessage); - } - offset += numBytes; - - // Encountered an EOF. - if (numBytes == 0) { - break; - } - } - return offset; -} - -absl::Status LocalFile::truncate(size_t targetSize) { - CHECK_FILE_OPEN - - _size = targetSize; - int e = ftruncate64(_fd, targetSize); - if (e < 0) { - int err = errno; - std::string errorMessage = "Unable to ftruncate file " + _path + ": " + strerror(err); - LOG_ERROR(errorMessage); - return absl::UnknownError(errorMessage); - } - return absl::OkStatus(); -} - -absl::Status LocalFile::writeBytes(const uint8_t *bytes, size_t position, size_t length) { - if (position > INT64_MAX) { - return absl::FailedPreconditionError("Stream positions > " + std::to_string(position) + - " are not yet supported."); - } - if (length == 0) { - return absl::OkStatus(); - } - - size_t offset = 0; - while (offset < length) { - size_t count = length - offset; - - // Truncate length to conform to POSIX API: - // See https://pubs.opengroup.org/onlinepubs/009695399/functions/write.html - if (count > SSIZE_MAX) { - count = SSIZE_MAX; - } - - // Loop and check for EINTR. - ssize_t numBytes = 0; - do { - numBytes = ::pwrite64(_fd, &bytes[offset], count, position + offset); - } while (numBytes == -1 && errno == EINTR); - // Error is unrecoverable. - if (numBytes < 0) { - int err = errno; - std::string errorMessage = "Error writing " + _path + ": " + strerror(err); - LOG_ERROR(errorMessage); - return absl::UnknownError(errorMessage); - } - if (numBytes == 0) { - std::string errorMessage = "Write on " + _path + " returned an EOF."; - LOG_ERROR(errorMessage); - return absl::UnknownError(errorMessage); - } - offset += numBytes; - } - - // See: https://stackoverflow.com/a/16190791/592024 - size_t oldSize; - size_t newSize = position + offset; - do { - oldSize = _size; - } while (oldSize < newSize && !_size.compare_exchange_weak(oldSize, newSize)); - return absl::OkStatus(); -} - -absl::StatusOr LocalFile::write(std::istream &stream, size_t position, - std::optional lengthOpt) { - auto buffer = std::vector(4096, 0); - auto length = lengthOpt.value_or(INT64_MAX); - - size_t n = 0; - std::streamsize count; - do { - auto maxRead = std::min(std::min(buffer.size(), length - n), (size_t)LONG_MAX); - count = stream.readsome(buffer.data(), (long)maxRead); - if (count < 0) { - return absl::UnknownError("Unable to read from stream"); - } - auto status = writeBytes(reinterpret_cast(buffer.data()), position + n, count); - if (!status.ok()) { - return status; - } - n += count; - } while (count != 0); - return n; -} -} // namespace geds::filesystem diff --git a/src/libgeds/LocalFile.h b/src/libgeds/LocalFile.h deleted file mode 100644 index 494ba51c..00000000 --- a/src/libgeds/LocalFile.h +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "RWConcurrentObjectAdaptor.h" - -namespace geds::filesystem { - -class LocalFile { - const std::string _path; - - int _fd{-1}; - - std::atomic _size{0}; - - /** - * @brief Seek commands require locking of the file. - */ - mutable std::recursive_mutex __mutex; - -protected: - absl::StatusOr fileSize() const; - -public: - LocalFile() = delete; - LocalFile(LocalFile &) = delete; - LocalFile &operator=(LocalFile &) = delete; - - LocalFile(std::string path, bool overwrite = true); - ~LocalFile(); - - [[nodiscard]] const std::string &path() const { return _path; } - - void notifyUnused(); - - absl::Status fsync(); - - [[nodiscard]] size_t size() const { return _size; } - [[nodiscard]] size_t localStorageSize() const { return _size; } - [[nodiscard]] size_t localMemorySize() const { return 0; } - - absl::StatusOr rawFd() const; - - absl::StatusOr rawPtr() const; - - absl::StatusOr readBytes(uint8_t *bytes, size_t position, size_t length); - - absl::Status truncate(size_t targetSize); - - absl::Status writeBytes(const uint8_t *bytes, size_t position, size_t length); - absl::StatusOr write(std::istream &stream, size_t position, - std::optional length = std::nullopt); - - static const std::string statisticsLabel() { return "LocalFile"; } -}; - -} // namespace geds::filesystem diff --git a/src/libgeds/MMAPFile.cpp b/src/libgeds/MMAPFile.cpp deleted file mode 100644 index 95dcaacf..00000000 --- a/src/libgeds/MMAPFile.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "MMAPFile.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Filesystem.h" -#include "Logging.h" - -namespace geds::filesystem { - -static const size_t MMAP_pageSize = getpagesize(); - -MMAPFile::MMAPFile(std::string pathArg, bool overwrite) : _path(std::move(pathArg)) { - auto mode = O_RDWR | O_CREAT; - if (overwrite) { - mode |= O_TRUNC; - } - - // NOLINTNEXTLINE - _fd = ::open(_path.c_str(), mode, S_IRUSR | S_IWUSR); - if (_fd < 0) { - int error = errno; - auto message = "Unable to open " + _path + ". Reason: " + strerror(error); - LOG_ERROR(message); - throw std::runtime_error{message}; - } -} - -MMAPFile::~MMAPFile() { - auto lock = getWriteLock(); - release(); - if (_fd >= 0) { - (void)::close(_fd); - _fd = -1; - - auto removeStatus = removeFile(_path); - if (!removeStatus.ok()) { - LOG_ERROR("Unable to delete ", _path, " reason: ", removeStatus.message()); - } - } -} - -absl::Status MMAPFile::increaseMmap(size_t requestSize) { - if (_fd < 0) { - return absl::UnknownError("No valid file descriptor for " + _path); - } - - if (_mmapSize < requestSize) { - size_t nPages = requestSize / MMAP_pageSize + (requestSize % MMAP_pageSize > 0 ? 1 : 0); - size_t newSize = nPages * MMAP_pageSize; - void *m = nullptr; - - int e = -1; - do { - e = posix_fallocate64(_fd, _mmapSize, newSize - _mmapSize); - } while (e != 0 && errno == EINTR); - if (e != 0) { - e = errno; - return absl::UnknownError("Unable to extend file: " + _path + ". Reason: " + strerror(e)); - } - if (_mmapPtr == nullptr) { - // fallocate, int mode, __off_t offset, __off_t len) - m = mmap(nullptr, newSize, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); - if (m == MAP_FAILED) { // NOLINT - return absl::UnknownError("Failed to map file " + _path + " with requested size " + - std::to_string(requestSize) + "."); - } - } else { - // TODO: We use MREMAP_MAYMOVE. - m = mremap(_mmapPtr, _mmapSize, newSize, MREMAP_MAYMOVE); - if (m == MAP_FAILED) { // NOLINT - int e = errno; - return absl::UnknownError("mremap for file " + _path + " failed. Reason: " + strerror(e)); - } - } - _mmapPtr = static_cast(m); - _mmapSize = newSize; - } - return absl::OkStatus(); -} - -absl::StatusOr MMAPFile::readBytes(uint8_t *bytes, size_t position, size_t length) { - if (length > SSIZE_MAX) { - return absl::FailedPreconditionError("Lengths > " + std::to_string(SSIZE_MAX) + - " are not supported!"); - } - if (position >= SSIZE_MAX) { - return absl::FailedPreconditionError("Stream positions > " + std::to_string(SSIZE_MAX) + - " are not supported!"); - } - if (length == 0) { - return 0; - } - - // Reopen the file if it has been unmapped. - auto status = reopen(); - if (!status.ok()) { - return status; - } - - auto lock = getReadLock(); - if (position >= _size) { - return 0; - } - length = std::min(length, _size - position); - if (length == 0) { - return 0; - } - auto n = std::min(_size, position + length) - position; - (void)std::memcpy(bytes, _mmapPtr + position, n); // - return n; -} - -absl::StatusOr MMAPFile::rawPtr() { - auto status = reopen(); - if (!status.ok()) { - return status; - } - return _mmapPtr; -} - -absl::StatusOr MMAPFile::rawFd() const { - if (_fd < 0) { - return absl::UnavailableError("The fd for the file " + _path + " is not available"); - } - return _fd; -} - -absl::Status MMAPFile::truncate(size_t targetSize) { - auto lock = getWriteLock(); - if (targetSize > _size) { - auto status = increaseMmap(targetSize); - if (!status.ok()) { - return status; - } - } - _size = targetSize; - return absl::OkStatus(); -} - -absl::Status MMAPFile::writeBytes(const uint8_t *bytes, size_t position, size_t length) { - if (length == 0) { - // Length is 0, return. - return absl::OkStatus(); - } - auto lock = getWriteLock(); - auto newSize = position + length; - - // Check if the new file size is bigger, if yes, increase the file size. - if (newSize > _size) { - auto status = increaseMmap(newSize); - if (!status.ok()) { - return status; - } - } - // mmap is invalid. - if (_mmapPtr == nullptr) { - return absl::InternalError("The file is not mmapped!"); - } - // NOLINTNEXTLINE - (void)std::memcpy(_mmapPtr + position, bytes, length); - if (newSize > _size) { - _size = newSize; - } - return absl::OkStatus(); -} - -absl::StatusOr MMAPFile::write(std::istream &stream, size_t position, - std::optional lengthArg) { - size_t length = 0; - if (!lengthArg.has_value()) { - auto pos = stream.tellg(); - std::for_each(std::istream_iterator(stream), std::istream_iterator(), - [&length](char) { length += 1; }); - stream.seekg(pos); - } else { - length = lengthArg.value(); - } - if (length == 0) { - LOG_DEBUG("Stream is empty"); - return absl::OkStatus(); - } - auto lock = getWriteLock(); - auto newSize = position + length; - - auto status = increaseMmap(newSize); - if (!status.ok()) { - return status; - } - - // mmap is invalid. - if (_mmapPtr == nullptr) { - return absl::InternalError("The file is not mmapped!"); - } - std::copy(std::istream_iterator(stream), std::istream_iterator(), - reinterpret_cast(_mmapPtr + position)); - if (newSize > _size) { - _size = newSize; - } - LOG_DEBUG("Wrote ", std::to_string(length), " to ", _path); - return length; -} - -void MMAPFile::release() { - if (_mmapPtr != 0) { - int err = munmap(_mmapPtr, _mmapSize); - if (err != 0) { - err = errno; - LOG_ERROR("Unable to munmap ", _path, " reason: ", strerror(err)); - } - _mmapPtr = nullptr; - _mmapSize = 0; - } -} - -absl::Status MMAPFile::reopen() { - if (_mmapPtr == nullptr) { - auto lock = getWriteLock(); - if (_mmapPtr != nullptr) { - return absl::OkStatus(); - } - size_t nPages = _size / MMAP_pageSize + (_size % MMAP_pageSize > 0 ? 1 : 0); - size_t mmapSize = nPages * MMAP_pageSize; - // fallocate, int mode, __off_t offset, __off_t len) - auto m = mmap(nullptr, mmapSize, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); - if (m == MAP_FAILED) { // NOLINT - return absl::UnknownError("Failed to map file " + _path + " with requested size " + - std::to_string(_size) + "."); - } - _mmapPtr = static_cast(m); - _mmapSize = mmapSize; - } - return absl::OkStatus(); -} - -void MMAPFile::notifyUnused() { - auto lock = getWriteLock(); - release(); -} - -} // namespace geds::filesystem diff --git a/src/libgeds/MMAPFile.h b/src/libgeds/MMAPFile.h deleted file mode 100644 index fa7a8f6f..00000000 --- a/src/libgeds/MMAPFile.h +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "RWConcurrentObjectAdaptor.h" - -namespace geds::filesystem { - -class MMAPFile : public utility::RWConcurrentObjectAdaptor { - const std::string _path; - - int _fd{-1}; - size_t _size{0}; - - size_t _mmapSize{0}; - uint8_t *_mmapPtr{nullptr}; - - std::atomic _ioProcesses; - - absl::Status increaseMmap(size_t requestSize); - - absl::Status reopen(); - void release(); - -public: - MMAPFile() = delete; - MMAPFile(MMAPFile &) = delete; - MMAPFile &operator=(MMAPFile &) = delete; - - MMAPFile(std::string path, bool overwrite = true); - ~MMAPFile(); - - void notifyUnused(); - - [[nodiscard]] const std::string &path() const { return _path; } - - [[nodiscard]] size_t size() const { return _size; } - [[nodiscard]] size_t localStorageSize() const { return _size; } - [[nodiscard]] size_t localMemorySize() const { return _mmapSize; } - - absl::StatusOr rawPtr(); - - absl::StatusOr rawFd() const; - - absl::StatusOr readBytes(uint8_t *bytes, size_t position, size_t length); - - absl::Status truncate(size_t targetSize); - - absl::Status writeBytes(const uint8_t *bytes, size_t position, size_t length); - absl::StatusOr write(std::istream &stream, size_t position, - std::optional length = std::nullopt); - - static const std::string statisticsLabel() { return "MMAPFile"; } -}; - -} // namespace geds::filesystem diff --git a/src/libgeds/MetadataService.cpp b/src/libgeds/MetadataService.cpp deleted file mode 100644 index 0d487f13..00000000 --- a/src/libgeds/MetadataService.cpp +++ /dev/null @@ -1,511 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "MetadataService.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "GEDS.h" -#include "Logging.h" -#include "ObjectStoreConfig.h" -#include "Status.h" -#include "geds.grpc.pb.h" -#include "geds.pb.h" -#include "status.pb.h" - -namespace geds { - -static std::string printGRPCError(const grpc::Status &status) { - if (status.error_message().size()) { - return {status.error_message() + " code: " + std::to_string(status.error_code())}; - } - return {"Code: " + std::to_string(status.error_code())}; -} - -#define METADATASERVICE_CHECK_CONNECTED \ - if (_connectionState != ConnectionState::Connected) { \ - return absl::FailedPreconditionError("Not connected."); \ - } - -MetadataService::MetadataService(std::string serverAddress) - : _connectionState(ConnectionState::Disconnected), _channel(nullptr), - serverAddress(std::move(serverAddress)) { - boost::uuids::uuid uuid_generated = boost::uuids::random_generator()(); - uuid = boost::lexical_cast(uuid_generated); -} - -MetadataService::~MetadataService() { - if (_connectionState == ConnectionState::Connected) { - disconnect().IgnoreError(); - } -} - -absl::Status MetadataService::connect() { - if (_connectionState != ConnectionState::Disconnected) { - return absl::UnknownError("Cannot reinitialize service."); - } - try { - assert(_channel.get() == nullptr); - - auto arguments = grpc::ChannelArguments(); - arguments.SetMaxReceiveMessageSize(64 * 1024 * 1024); - - _channel = - grpc::CreateCustomChannel(serverAddress, grpc::InsecureChannelCredentials(), arguments); - auto success = _channel->WaitForConnected(grpcDefaultDeadline()); - if (!success) { - LOG_ERROR("Unable to connect to ", serverAddress); - return absl::UnavailableError("Could not connect to " + serverAddress + "."); - } - _stub = geds::rpc::MetadataService::NewStub(_channel); - } catch (std::exception &e) { - auto msg = "Could not open channel with " + serverAddress + ". Reason" + std::string(e.what()); - LOG_ERROR(msg); - return absl::UnavailableError(msg); - } - _connectionState = ConnectionState::Connected; - // ToDO: Register client and implement stop(). - LOG_DEBUG("Connected to metadata service."); - return absl::OkStatus(); -} - -absl::Status MetadataService::disconnect() { - if (_connectionState != ConnectionState::Connected) { - return absl::UnknownError("The service is in the wrong state."); - } - _channel = nullptr; - _connectionState = ConnectionState::Disconnected; - return absl::OkStatus(); -} - -absl::Status MetadataService::registerObjectStoreConfig(const ObjectStoreConfig &mapping) { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::ObjectStoreConfig request; - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - request.set_bucket(mapping.bucket); - request.set_endpointurl(mapping.endpointURL); - request.set_accesskey(mapping.accessKey); - request.set_secretkey(mapping.secretKey); - - auto status = _stub->RegisterObjectStore(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute RegisterObjectStore: " + - status.error_message()); - } - return convertStatus(response); -} - -absl::StatusOr>> -MetadataService::listObjectStoreConfigs() { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::EmptyParams request; - geds::rpc::AvailableObjectStoreConfigs response; - grpc::ClientContext context; - auto status = _stub->ListObjectStores(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute ListObjectStores: " + status.error_message()); - } - auto mappings = response.mappings(); - std::vector> result; - for (auto &m : mappings) { - result.emplace_back(std::make_shared(m.bucket(), m.endpointurl(), - m.accesskey(), m.secretkey())); - } - return result; -} - -absl::StatusOr MetadataService::getConnectionInformation() { - METADATASERVICE_CHECK_CONNECTED; - geds::rpc::EmptyParams request; - geds::rpc::ConnectionInformation response; - grpc::ClientContext context; - auto status = _stub->GetConnectionInformation(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute GetConnectionInformation: " + - status.error_message()); - } - if (response.has_error()) { - return convertStatus(response.error()); - } - return response.remoteaddress(); -} - -absl::Status MetadataService::createBucket(const std::string_view &bucket) { - METADATASERVICE_CHECK_CONNECTED; - geds::rpc::Bucket request; - request.set_bucket(std::string{bucket}); - - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->CreateBucket(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute CreateBucket command: " + - status.error_message()); - } - return convertStatus(response); -} - -absl::Status MetadataService::deleteBucket(const std::string_view &bucket) { - METADATASERVICE_CHECK_CONNECTED; - geds::rpc::Bucket request; - request.set_bucket(std::string{bucket}); - - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->DeleteBucket(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute DeleteBucket command: " + - status.error_message()); - } - return convertStatus(response); -} - -absl::StatusOr> MetadataService::listBuckets() { - METADATASERVICE_CHECK_CONNECTED; - geds::rpc::EmptyParams request; - - geds::rpc::BucketListResponse response; - grpc::ClientContext context; - - auto status = _stub->ListBuckets(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute ListBuckets command: " + - status.error_message()); - } - if (response.has_error()) { - return convertStatus(response.error()); - } - const auto &bucketList = response.results(); - - std::vector result; - result.reserve(bucketList.size()); - for (const auto &r : bucketList) { - result.emplace_back(r); - } - return result; -} - -absl::Status MetadataService::lookupBucket(const std::string_view &bucket) { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::Bucket request; - request.set_bucket(std::string{bucket}); - - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->LookupBucket(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute LookupBucket command: " + - status.error_message()); - } - auto s = convertStatus(response); - if (!s.ok()) { - (void)_mdsCache.deleteBucket(std::string{bucket}); - } - return s; -} - -absl::Status MetadataService::createObject(const geds::Object &obj) { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::Object request; - auto id = request.mutable_id(); - id->set_bucket(obj.id.bucket); - id->set_key(obj.id.key); - auto info = request.mutable_info(); - info->set_location(obj.info.location); - info->set_size(obj.info.size); - info->set_sealedoffset(obj.info.sealedOffset); - if (obj.info.metadata.has_value()) { - info->set_metadata(obj.info.metadata.value()); - } - - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->Create(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute Create command: " + status.error_message()); - } - return convertStatus(response); -} - -absl::Status MetadataService::updateObject(const geds::Object &obj) { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::Object request; - auto id = request.mutable_id(); - id->set_bucket(obj.id.bucket); - id->set_key(obj.id.key); - auto info = request.mutable_info(); - info->set_location(obj.info.location); - info->set_size(obj.info.size); - info->set_sealedoffset(obj.info.sealedOffset); - if (obj.info.metadata.has_value()) { - info->set_metadata(obj.info.metadata.value()); - } - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->Update(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute Update command: " + printGRPCError(status)); - } - return convertStatus(response); -} - -absl::Status MetadataService::deleteObject(const geds::ObjectID &id) { - METADATASERVICE_CHECK_CONNECTED; - return deleteObject(id.bucket, id.key); -} - -absl::Status MetadataService::deleteObject(const std::string &bucket, const std::string &key) { - METADATASERVICE_CHECK_CONNECTED; - - (void)_mdsCache.deleteObject(bucket, key); - - geds::rpc::ObjectID request; - request.set_bucket(bucket); - request.set_key(key); - - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->Delete(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute Delete command: " + printGRPCError(status)); - } - return convertStatus(response); -} - -absl::Status MetadataService::deleteObjectPrefix(const geds::ObjectID &id) { - return deleteObjectPrefix(id.bucket, id.key); -} -absl::Status MetadataService::deleteObjectPrefix(const std::string &bucket, - const std::string &key) { - METADATASERVICE_CHECK_CONNECTED; - - (void)_mdsCache.deleteObjectPrefix(bucket, key); - - geds::rpc::ObjectID request; - request.set_bucket(bucket); - request.set_key(key); - - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - auto status = _stub->DeletePrefix(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute Delete command: " + printGRPCError(status)); - } - return convertStatus(response); -} - -absl::StatusOr MetadataService::lookup(const geds::ObjectID &id, bool force) { - return lookup(id.bucket, id.key, force); -} -absl::StatusOr MetadataService::lookup(const std::string &bucket, - const std::string &key, bool invalidate) { - METADATASERVICE_CHECK_CONNECTED; - - if (!invalidate) { - LOG_DEBUG("Lookup cache", bucket, "/", key); - auto c = _mdsCache.lookup(bucket, key); - if (c.ok()) { - return c; - } - } - - geds::rpc::ObjectID request; - request.set_bucket(bucket); - request.set_key(key); - - geds::rpc::ObjectResponse response; - grpc::ClientContext context; - - LOG_DEBUG("Lookup remote", bucket, "/", key); - - auto status = _stub->Lookup(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute Lookup command: " + printGRPCError(status)); - } - if (response.has_error()) { - return convertStatus(response.error()); - } - const auto &r = response.result(); - auto obj_id = geds::ObjectID{r.id().bucket(), r.id().key()}; - auto obj_info = geds::ObjectInfo{ - r.info().location(), r.info().size(), r.info().sealedoffset(), - (r.info().has_metadata() ? std::make_optional(r.info().metadata()) : std::nullopt)}; - - auto result = geds::Object{obj_id, obj_info}; - (void)_mdsCache.createObject(result, true); - return result; -} - -absl::StatusOr> MetadataService::listPrefix(const geds::ObjectID &id) { - return listPrefix(id.bucket, id.key); -} - -absl::StatusOr> -MetadataService::listPrefix(const std::string &bucket, const std::string &keyPrefix) { - auto result = listPrefix(bucket, keyPrefix, 0); - if (result.ok()) { - return result->first; - } - return result.status(); -} - -absl::StatusOr, std::vector>> -MetadataService::listPrefix(const std::string &bucket, const std::string &keyPrefix, - char delimiter) { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::ObjectListRequest request; - auto prefix = request.mutable_prefix(); - prefix->set_bucket(bucket); - prefix->set_key(keyPrefix); - if (delimiter > 0) { - request.set_delimiter(delimiter); - } - - geds::rpc::ObjectListResponse response; - grpc::ClientContext context; - - auto status = _stub->List(&context, request, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute List command: " + printGRPCError(status)); - } - if (response.has_error()) { - return convertStatus(response.error()); - } - - const auto &rpc_results = response.results(); - auto objects = std::vector{}; - objects.reserve(rpc_results.size()); - for (auto i : rpc_results) { - auto obj_id = geds::ObjectID{i.id().bucket(), i.id().key()}; - auto obj_info = geds::ObjectInfo{ - i.info().location(), i.info().size(), i.info().sealedoffset(), - i.info().has_metadata() ? std::make_optional(i.info().metadata()) : std::nullopt}; - auto obj = geds::Object{obj_id, obj_info}; - (void)_mdsCache.createObject(obj, true); - objects.emplace_back(std::move(obj)); - } - return std::make_pair(objects, std::vector{response.commonprefixes().begin(), - response.commonprefixes().end()}); -} - -absl::StatusOr, std::vector>> -MetadataService::listPrefixFromCache(const std::string &bucket, const std::string &keyPrefix, - char delimiter) { - - auto status_or_objects = _mdsCache.listObjects(bucket, keyPrefix, delimiter); - if (!status_or_objects.ok()) { - status_or_objects = listPrefix(bucket, keyPrefix, delimiter); - } - return status_or_objects; -} - -absl::StatusOr, std::vector>> -MetadataService::listFolder(const std::string &bucket, const std::string &keyPrefix) { - return listPrefix(bucket, keyPrefix, Default_GEDSFolderDelimiter); -} - -absl::Status MetadataService::subscribeStream() { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::SubscriptionStreamEvent subscription_stream_event; - geds::rpc::SubscriptionStreamResponse subscription_response; - grpc::ClientContext context; - subscription_stream_event.set_subscriberid(uuid); - - std::unique_ptr> reader( - _stub->SubscribeStream(&context, subscription_stream_event)); - - while (reader->Read(&subscription_response)) { - const auto &objectPublication = subscription_response.object(); - auto obj_id = geds::ObjectID{objectPublication.id().bucket(), objectPublication.id().key()}; - auto obj_info = - geds::ObjectInfo{objectPublication.info().location(), objectPublication.info().size(), - objectPublication.info().sealedoffset(), - objectPublication.info().has_metadata() - ? std::make_optional(objectPublication.info().metadata()) - : std::nullopt}; - auto obj = geds::Object{obj_id, obj_info}; - - if (subscription_response.publicationtype() == geds::rpc::CREATE_OBJECT) { - (void)_mdsCache.createObject(obj, true); - } else if (subscription_response.publicationtype() == geds::rpc::UPDATE_OBJECT) { - (void)_mdsCache.updateObject(obj); - } else if (subscription_response.publicationtype() == geds::rpc::DELETE_OBJECT) { - (void)_mdsCache.deleteObject(obj.id.bucket, obj.id.key); - } - - LOG_DEBUG("Received subscription and added to cache (bucket, key): ", obj.id.bucket, " , ", - obj.id.key); - } - auto status = reader->Finish(); - if (!status.ok()) { - return absl::InternalError(status.error_message()); - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - return subscribeStream(); -} - -absl::Status MetadataService::subscribe(const geds::SubscriptionEvent &event) { - METADATASERVICE_CHECK_CONNECTED - - geds::rpc::SubscriptionEvent subscription_event; - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - subscription_event.set_subscriberid(uuid); - subscription_event.set_bucketid(std::string{event.bucket}); - subscription_event.set_key(std::string{event.key}); - subscription_event.set_subscriptiontype(event.subscriptionType); - - auto status = _stub->Subscribe(&context, subscription_event, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to execute CreateBucket command: " + - status.error_message()); - } - return convertStatus(response); -} - -absl::Status MetadataService::unsubscribe(const geds::SubscriptionEvent &event) { - METADATASERVICE_CHECK_CONNECTED; - - geds::rpc::SubscriptionEvent subscription_event; - geds::rpc::StatusResponse response; - grpc::ClientContext context; - - subscription_event.set_subscriberid(uuid); - subscription_event.set_bucketid(std::string{event.bucket}); - subscription_event.set_key(std::string{event.key}); - subscription_event.set_subscriptiontype(event.subscriptionType); - - auto status = _stub->Unsubscribe(&context, subscription_event, &response); - if (!status.ok()) { - return absl::UnavailableError("Unable to unsubscribe: " + status.error_message()); - } - return convertStatus(response); -} - -} // namespace geds \ No newline at end of file diff --git a/src/libgeds/MetadataService.h b/src/libgeds/MetadataService.h deleted file mode 100644 index ac2fcf79..00000000 --- a/src/libgeds/MetadataService.h +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_METADATASERVICE_H -#define GEDS_METADATASERVICE_H - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "GEDSInternal.h" -#include "MDSKVS.h" -#include "Object.h" -#include "ObjectStoreConfig.h" -#include "PubSub.h" - -#include "geds.grpc.pb.h" - -namespace geds { - -class MetadataService { - ConnectionState _connectionState; - MDSKVS _mdsCache; - std::shared_ptr _channel; - std::unique_ptr _stub; - std::string uuid; - -public: - const std::string serverAddress; - - MetadataService() = delete; - MetadataService(std::string serverAddress); - - ~MetadataService(); - - absl::Status connect(); - - absl::Status disconnect(); - - absl::StatusOr getConnectionInformation(); - - absl::Status registerObjectStoreConfig(const ObjectStoreConfig &mapping); - - absl::StatusOr>> listObjectStoreConfigs(); - - absl::Status createBucket(const std::string_view &bucket); - - absl::Status deleteBucket(const std::string_view &bucket); - - absl::StatusOr> listBuckets(); - - absl::Status lookupBucket(const std::string_view &bucket); - - absl::Status createObject(const geds::Object &obj); - - absl::Status updateObject(const geds::Object &obj); - - absl::Status deleteObject(const geds::ObjectID &id); - absl::Status deleteObject(const std::string &bucket, const std::string &key); - - absl::Status deleteObjectPrefix(const geds::ObjectID &id); - absl::Status deleteObjectPrefix(const std::string &bucket, const std::string &key); - - absl::StatusOr lookup(const geds::ObjectID &id, bool invalidate = false); - absl::StatusOr lookup(const std::string &bucket, const std::string &key, - bool invalidate = false); - - /** - * @brief List objects in `bucket` starting with `key` as prefix. - */ - absl::StatusOr> listPrefix(const geds::ObjectID &id); - absl::StatusOr> listPrefix(const std::string &bucket, - const std::string &keyPrefix); - - /** - * @brief List objects in `bucket` starting with `key` as prefix. Objects that contain `delimiter` - * in the postfix of the key are filtered. Delimiter `\0` is treated as no filter. - */ - absl::StatusOr, std::vector>> - listPrefix(const std::string &bucket, const std::string &keyPrefix, char delimiter); - - /** - * @brief List objects from cache in `bucket` starting with `key` as prefix. Objects that contain - * `delimiter` in the postfix of the key are filtered. Delimiter `\0` is treated as no filter. - */ - absl::StatusOr, std::vector>> - listPrefixFromCache(const std::string &bucket, const std::string &keyPrefix, char delimiter); - - /** - * @brief Prefix search with `/` as delimiter. - */ - absl::StatusOr, std::vector>> - listFolder(const std::string &bucket, const std::string &keyPrefix); - - /** - * @brief Create subscription stream for the subscriber. - */ - absl::Status subscribeStream(); - - /** - * @brief Create subscription for bucket, objects and prefixes. - */ - absl::Status subscribe(const geds::SubscriptionEvent &event); - - /** - * @brief Unsubscribe for bucket, objects and prefixes. - */ - absl::Status unsubscribe(const geds::SubscriptionEvent &event); -}; - -} // namespace geds - -#endif // GEDS_METADATASERVICE_H diff --git a/src/libgeds/Server.cpp b/src/libgeds/Server.cpp deleted file mode 100644 index 663fd29b..00000000 --- a/src/libgeds/Server.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "Server.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "GEDS.h" -#include "GEDSInternal.h" -#include "Logging.h" -#include "Object.h" -#include "Platform.h" -#include "Ports.h" -#include "Status.h" -#include "geds.grpc.pb.h" -#include "geds.pb.h" - -#include "FileTransferProtocol.h" - -namespace geds { - -class ServerImpl final : public geds::rpc::GEDSService::Service { - std::shared_ptr _geds; - Server &_server; - - ~ServerImpl() = default; - - ::grpc::Status GetAvailEndpoints(::grpc::ServerContext * /* unused context */, - const ::geds::rpc::EmptyParams * /* unused request */, - ::geds::rpc::AvailTransportEndpoints *response) override { - LOG_DEBUG("About to report locally available file transfer endpoints"); - - for (auto &endpoint : _server.TcpListenEp) { - auto ep = response->add_endpoint(); - auto laddr = (sockaddr_in *)&endpoint.laddr; - if (endpoint.type == FileTransferProtocol::RDMA) { - ep->set_type(rpc::RDMA); - } else { - ep->set_type(rpc::Socket); - } - ep->set_address(endpoint.hostname); - ep->set_port(laddr->sin_port); - LOG_DEBUG("Report local endpoint: ", inet_ntoa(laddr->sin_addr), "::", laddr->sin_port); - } - return grpc::Status::OK; - } - -public: - ServerImpl(std::shared_ptr geds, Server &server) : _geds(geds), _server(server) {} - -protected: -}; - -Server::Server(std::string hostname, std::optional port) - : _state(ServiceState::Stopped), _hostname(std::move(hostname)), - _port(port.value_or(defaultGEDSPort)) { - if (_port == 0) { - _port = defaultGEDSPort; - } -} - -Server::~Server() { (void)stop(); } - -#define CHECK_SERVICE_RUNNING \ - if (_state != ServiceState::Running) { \ - return absl::FailedPreconditionError("The service is " + to_string(_state) + "."); \ - } - -void Server::TcpListenThread() { - struct ObjTransferEndpoint listener; - auto *local = (sockaddr_in *)&listener.laddr; - socklen_t addrlen = sizeof(sockaddr_in); - int sock, rv; - - listener.type = FileTransferProtocol::Socket; - /* - * Start TCP listener(s). - * - * For now just do one wildcard listen over all available IP interfaces - * Let the kernel choose a free port for listening. - */ - sock = ::socket(AF_INET, SOCK_STREAM, 0); - if (sock <= 0) { - LOG_ERROR("socket call"); - return; - } - listener.socket = sock; - - memset(&listener.laddr, 0, sizeof listener.laddr); - local->sin_family = AF_INET; - local->sin_addr.s_addr = INADDR_ANY; - - rv = ::bind(sock, &listener.laddr, sizeof listener.laddr); - if (rv) { - LOG_ERROR("bind call: "); - perror("bind"); - close(sock); - return; - } - rv = getsockname(sock, &listener.laddr, &addrlen); - if (rv) { - LOG_ERROR("getsockname call"); - close(sock); - return; - } - /* - * Remember hostname as local addr for now, since we do wildcard listen - */ - auto hostname = utility::platform::getHostName(); - struct addrinfo *ainfo, ahints{}; - ahints.ai_family = AF_UNSPEC; - ahints.ai_socktype = SOCK_STREAM; - ahints.ai_protocol = IPPROTO_TCP; - if (getaddrinfo(hostname.c_str(), nullptr, &ahints, &ainfo) == 0) { - local->sin_addr.s_addr = ((sockaddr_in *)ainfo[0].ai_addr)->sin_addr.s_addr; - listener.hostname = inet_ntoa(local->sin_addr); - freeaddrinfo(ainfo); - } else { - perror("getaddrinfo"); - close(sock); - return; - } - rv = ::listen(sock, 20); - if (rv) { - LOG_ERROR("listen call"); - close(sock); - return; - } - LOG_DEBUG("TCP listener: ", listener.hostname, "::", local->sin_port); - TcpListenEp.emplace(TcpListenEp.end(), listener); - - while (!TcpListenEp.empty()) { - int newsock = ::accept(sock, nullptr, nullptr); - if (newsock < 0) { - perror("accept: "); - continue; - } - if (_geds->_tcpTransport->addEndpointPassive(newsock) == false) { - ::close(newsock); - LOG_ERROR("Server: Adding new TCP client failed "); - } - } - close(sock); -} - -uint16_t Server::port() { return _port; } - -absl::Status Server::start(std::shared_ptr geds) { - if (_state == geds::ServiceState::Running) { - return absl::FailedPreconditionError("The server is already running!"); - } - - _geds = geds; - - _listenThread = std::make_unique([this] { this->TcpListenThread(); }); - - _grpcService = std::unique_ptr(new ServerImpl(geds, *this)); - - grpc::EnableDefaultHealthCheckService(true); - grpc::reflection::InitProtoReflectionServerBuilderPlugin(); - - auto address = _hostname; - - int selectedPort = 0; - do { - grpc::ServerBuilder builder; - LOG_DEBUG("Trying port ", _port); - builder.AddListeningPort(address + ":" + std::to_string(_port), - grpc::InsecureServerCredentials(), &selectedPort); - builder.RegisterService(_grpcService.get()); - builder.AddChannelArgument(GRPC_ARG_ALLOW_REUSEPORT, 0); - _port++; - _grpcServer = builder.BuildAndStart(); - } while (selectedPort == 0); - _port = selectedPort; - LOG_INFO("GRPC Server started using ", _hostname, " and port ", _port); - - _state = ServiceState::Running; - // TODO: Check if _grpcServer->Wait() is required. - _state = ServiceState::Running; - return absl::OkStatus(); -} - -absl::Status Server::stop() { - CHECK_SERVICE_RUNNING - _grpcServer->Shutdown(); - _state = ServiceState::Stopped; - return absl::OkStatus(); -} - -} // namespace geds diff --git a/src/libgeds/Server.h b/src/libgeds/Server.h deleted file mode 100644 index bb6f5160..00000000 --- a/src/libgeds/Server.h +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef GEDS_SERVER_H -#define GEDS_SERVER_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "FileTransferProtocol.h" -#include "GEDSInternal.h" - -class GEDS; - -namespace geds { - -/** - * @brief Implements the GRPC services exposed by the GEDS instance. - * - */ -class Server { - std::shared_ptr _geds; - - geds::ServiceState _state; - std::string _hostname; - uint16_t _port; - - std::unique_ptr _grpcService; - std::unique_ptr _grpcServer; - - std::unique_ptr _listenThread; - void TcpListenThread(); - -public: - Server(std::string hostname, std::optional port = std::nullopt); - Server(const Server &) = delete; - Server &operator=(const Server &) = delete; - ~Server(); - - uint16_t port(); - - /** - * @brief List of TCP connections for object transfers between server and client - */ - std::list TcpListenEp; - - absl::Status start(std::shared_ptr geds); - absl::Status stop(); -}; - -} // namespace geds - -#endif diff --git a/src/libgeds/TcpTransport.cpp b/src/libgeds/TcpTransport.cpp deleted file mode 100644 index bf76ce43..00000000 --- a/src/libgeds/TcpTransport.cpp +++ /dev/null @@ -1,1031 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "FileTransferProtocol.h" -#include "GEDS.h" -#include "GEDSFile.h" -#include "GEDSInternal.h" -#include "Logging.h" -#include "Object.h" -#include "TcpTransport.h" - -constexpr size_t MIN_SENDFILE_SIZE = 4096; -constexpr size_t BUFFER_ALIGNMENT = 32; - -namespace geds { - -TcpTransport::TcpTransport(std::shared_ptr geds) : _geds(geds) {} - -TcpTransport::~TcpTransport() { isServing = false; } - -void TcpTransport::start() { - if (isServing) { - LOG_ERROR("TCP service already started"); - return; - } - - u_int64_t registers[4]; // NOLINT - __asm__ __volatile__("cpuid " - : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), - "=d"(registers[3]) - : "a"(1), "c"(0)); - bool hyperthreading = registers[3] & (1 << 28); // NOLINT - num_proc = std::thread::hardware_concurrency(); - if (hyperthreading) - num_proc /= 2; - - num_proc = std::min(num_proc, MAX_IO_THREADS); - isServing = true; - - /* - * Create eventfd to be integrated into epoll interest list for RX and TX - * threads. Writing to it will wakeup epoll_wait() if no other fd available - * or active - */ - eventFd = eventfd(0, 0); - - for (unsigned int id = 0; id < MAX_IO_THREADS; id++) { - txThreads.push_back(std::make_unique([this, id] { this->tcpTxThread(id); })); - rxThreads.push_back(std::make_unique([this, id] { this->tcpRxThread(id); })); - _buffers.push(new (std::align_val_t(BUFFER_ALIGNMENT)) uint8_t[MIN_SENDFILE_SIZE]); - } - ioStatsThread = std::make_unique([this] { this->updateIoStats(); }); - - LOG_DEBUG("TCP service started"); -} - -void TcpTransport::stop() { - LOG_DEBUG("Stopping TCP Service"); - isServing = false; - - if (eventFd > 0) { - u_int64_t buf = 1; - LOG_DEBUG("TCP Transport: write CTL fd"); - write(eventFd, &buf, 8); - } - - std::vector> tcpPeerV; - tcpPeers.forall([&tcpPeerV](std::shared_ptr &tp) { tcpPeerV.push_back(tp); }); - for (auto &ep : tcpPeerV) - ep->cleanup(); - - for (auto &t : txThreads) - t->join(); - - for (auto &t : rxThreads) - t->join(); - - // Introduces a performance regression in the I/O Benchmark. - // ioStatsThread->join(); - // ioStatsThread = nullptr; - - tcpPeers.clear(); - txThreads.clear(); - rxThreads.clear(); - - uint8_t *buffer; - while (_buffers.pop(buffer)) { - delete[] buffer; - } - if (eventFd > 0) - close(eventFd); - - eventFd = -1; - LOG_DEBUG("TCP Transport stopped"); -} - -TcpPeer::~TcpPeer() { - cleanup(); - assert(!endpoints.size()); -} - -void TcpPeer::cleanup() { - auto lock = getWriteLock(); - for (auto &endpoint : endpoints) { - auto tep = endpoint.second; - shutdown(tep->sock, SHUT_RDWR); - LOG_DEBUG("Endpoint shutdown: socket: ", tep->sock, " sent: ", tep->tx_bytes, - " received: ", tep->rx_bytes); - } - endpoints.clear(); - - if (recvQueue.size()) { - LOG_ERROR("Recv Queue not empty for TcpPeer: Removing tasks from recv queue"); - auto &stats = (*sendQueue_stats); - - recvQueue.remove([&stats](const uint64_t, std::shared_ptr &work) { - work->p->set_value(absl::UnavailableError("TcpPeer closed")); - stats--; - return true; - }); - } - - if (!sendQueue.empty()) { - LOG_ERROR("Send Queue not empty for TcpPeer: Removing tasks from send queue"); - std::optional> task = sendQueue.pop(); - while (task.has_value()) { - (*sendQueue_stats)--; - task = sendQueue.pop(); - } - } -} - -bool TcpPeer::SocketTxReady(int sock) { - bool rv = false; - auto lock = getReadLock(); - auto it = endpoints.find(sock); - if (it != endpoints.end()) { - auto tep = it->second; - tep->send_ctx.stateMux.lock(); - rv = processEndpointSend(tep); - tep->send_ctx.stateMux.unlock(); - } - return rv; -} - -/** - * @brief Write to socket until all sent or failure encountered. - * - * @param sock - socket to write to - * @return true - * @return false - */ -bool TcpPeer::processEndpointSend(std::shared_ptr tep) { - struct TcpSendState *ctx = &tep->send_ctx; - if (ctx->state == PROC_FAILED) { - LOG_ERROR("TcpPeer: Context is in failed state! Aborting!"); - return false; - } - - int sock = tep->sock; - - ssize_t sent = 0; - - do { - ssize_t data_to_send = 0; - sent = 0; - uint16_t hdr_to_send = 0; - struct iovec iov[2]; // NOLINT - int ix = 0; - - if (ctx->state == PROC_IDLE) { - auto workOpt = sendQueue.pop(); - if (!workOpt.has_value()) { - // Stop processing - return true; - } - (*sendQueue_stats)--; - - ctx->state = PROC_HDR; - - auto work = *workOpt; - /* - * Start processing new send work - */ - ctx->hdr.reqid = work->reqId; - ctx->hdr.hdrlen = sizeof ctx->hdr; - ctx->objName.clear(); - if (work->objName.length()) { - ctx->objName = work->objName; - ctx->hdr.hdrlen += ctx->objName.length(); - } - ctx->hdr.datalen = work->len; - ctx->hdr.offset = work->off; - ctx->hdr.type = work->type; - ctx->hdr.error = work->error; - ctx->va = work->va; - ctx->in_fd = work->in_fd; - ctx->progress = 0; - } - if (ctx->state == PROC_HDR) { - if (ctx->progress < sizeof ctx->hdr) { - hdr_to_send = sizeof ctx->hdr - ctx->progress; - auto hdrp = reinterpret_cast(&ctx->hdr); // NOLINT - iov[0].iov_base = &hdrp[ctx->progress]; // NOLINT - iov[0].iov_len = hdr_to_send; - ix = 1; - } - if (ctx->hdr.hdrlen > sizeof ctx->hdr) { - int name_off = (ix == 1) ? 0 : ctx->progress - sizeof ctx->hdr; - - assert(ix == 1 || ctx->progress >= sizeof ctx->hdr); - - iov[ix].iov_base = ctx->objName.data() + name_off; // NOLINT - iov[ix].iov_len = ctx->hdr.hdrlen - (sizeof ctx->hdr + name_off); - hdr_to_send += iov[ix].iov_len; - ix++; - } - sent = ::writev(sock, &iov[0], ix); - if (sent >= 0) { - tep->tx_bytes += sent; - if (sent < hdr_to_send) { - ctx->progress += sent; - continue; - } else { - assert(sent == hdr_to_send); - ctx->progress = 0; - } - } else { - if (errno != EWOULDBLOCK) - LOG_ERROR("Send failed, errno: ", errno); - break; - } - } - /* - * Add more RPC types with payload - * - * If error is signalled back, no data are included. - */ - if (ctx->hdr.type != GET_REPLY || ctx->hdr.error) { - ctx->state = PROC_IDLE; - continue; - } - data_to_send = ctx->hdr.datalen - ctx->progress; - if (!data_to_send) { - ctx->state = PROC_IDLE; - continue; - } - ctx->state = PROC_DATA; - - if (ctx->in_fd > 0) { - /* - * use sendfile() for sending data - */ - auto off = (off_t)(ctx->va + ctx->progress); - off_t *offp = &off; - /* - * sendfile() does not maintain the read offset of - * ctx->in_fd, if offp != NULL, so we do not change - * the file's offset here. - */ - sent = ::sendfile(sock, ctx->in_fd, offp, data_to_send); - } else { - auto vap = reinterpret_cast(ctx->va); // NOLINT - iov[0].iov_base = &vap[ctx->progress]; // NOLINT - iov[0].iov_len = data_to_send; - sent = ::writev(sock, &iov[0], 1); - /* - * XXX This is a hack: Free buffer provided for RPC reply. - * The caller should take care actually. - */ - if (sent == data_to_send) - _tcpTransport.releaseBuffer((uint8_t *)ctx->va); - } - if (sent == data_to_send) { - tep->tx_bytes += ctx->hdr.datalen; - ctx->state = PROC_IDLE; - continue; - } else if (sent < 0) - break; - ctx->progress += sent; - } while (sent > 0); - - if (sent >= 0 || errno == EWOULDBLOCK) - return true; - - ctx->state = PROC_FAILED; - return false; -} - -void TcpTransport::tcpTxThread(unsigned int id) { - struct epoll_event events[EPOLL_MAXEVENTS]; // NOLINT - int poll_fd = ::epoll_create1(0); - if (poll_fd < 0) { - perror("epoll_create: "); - return; - } - LOG_DEBUG("TCP TX thread ", id, " starting"); - epoll_wfd[id] = poll_fd; - if (eventFd > 0) { - struct epoll_event ev{}; - ev.events = EPOLLIN | EPOLLHUP | EPOLLRDHUP | EPOLLERR; - ev.data.fd = eventFd; - if (epoll_ctl(poll_fd, EPOLL_CTL_ADD, eventFd, &ev)) { - LOG_ERROR("WARNING: Cannot register ctl socket for TX epoll"); - perror("epoll_ctl: "); - } - } - do { - int cnt = ::epoll_wait(poll_fd, events, EPOLL_MAXEVENTS, -1); - - for (int i = 0; i < cnt; i++) { - struct epoll_event *ev = &events[i]; - - if (ev->data.fd == eventFd) { - LOG_DEBUG("TCP TX: epoll CTL"); - continue; - } - epoll_epid_t ep_id = {}; - ep_id.data = ev->data.u64; - int sock = ep_id.id.sock; - unsigned int epId = ep_id.id.peer_id; - std::shared_ptr tcpPeer = nullptr; - - if (sock < 0) { - LOG_ERROR("Invalid write socket: ", sock, " PeerId: ", ev->data.u64, ", evcnt: ", cnt); - continue; - } - auto it = tcpPeers.get(epId); - if (it.has_value()) { - LOG_DEBUG("TX: Found peer for: ", sock); - tcpPeer = *it; - } else { - LOG_ERROR("TX: No peer for: ", sock); - deactivateEndpoint(poll_fd, sock, ALL_CLOSED); - continue; - } - if (ev->events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR)) { - deactivateEndpoint(poll_fd, sock, TX_CLOSED); - if (tcpPeer->SocketStateChange(sock, TX_CLOSED)) { - tcpPeers.remove(tcpPeer->Id); - } - continue; - } - if (!(ev->events & EPOLLOUT)) { - LOG_DEBUG("No OUT: ", sock); - continue; - } - if (tcpPeer->SocketTxReady(sock)) - continue; - - shutdown(sock, SHUT_RDWR); - deactivateEndpoint(poll_fd, sock, TX_CLOSED); - if (tcpPeer->SocketStateChange(sock, TX_CLOSED)) { - tcpPeers.remove(tcpPeer->Id); - } - } - } while (isServing); - if (eventFd > 0) - epoll_ctl(poll_fd, EPOLL_CTL_DEL, eventFd, NULL); - - close(poll_fd); - LOG_DEBUG("TCP TX thread ", id, " exiting"); -} - -bool TcpPeer::SocketStateChange(int sock, uint32_t change) { - auto lock = getWriteLock(); - auto it = endpoints.find(sock); - if (it == endpoints.end()) { - LOG_ERROR("Unassigned socket: ", sock); - close(sock); - // Return false to keep unassociated peer alive - return false; - } - auto tep = it->second; - bool dead = false; - - /* - * Close socket for read or write - */ - if (change && !(tep->state & change)) { - tep->state |= change; - shutdown(sock, SHUT_RDWR); - if ((tep->state & ALL_CLOSED) == ALL_CLOSED) { - dead = true; - close(sock); - endpoints.erase(it); - LOG_DEBUG("TCP Peer: ", this->hostname, ", erased socket: ", - sock, ", change: ", change, ", num sockets now: ", endpoints.size()); - } - } - if (endpoints.size() != 0) - dead = false; - return dead; -} - -void TcpPeer::TcpProcessRpcGet(uint64_t reqId, const std::string objName, size_t len, size_t off) { - auto separator = objName.find('/'); - if (separator == std::string::npos) { - LOG_ERROR("cannot open file: ", objName, " invalid format!"); - sendRpcReply(reqId, -1, 0, 0, EINVAL); - return; - } - - auto bucket = objName.substr(0, separator); - auto key = objName.substr(separator + 1); - auto file = _geds->localOpen(bucket, key); - if (!file.ok()) { - LOG_DEBUG("cannot open file: ", objName, " reason: ", file.status().message()); - sendRpcReply(reqId, -1, 0, 0, EINVAL); - return; - } - auto filesize = file->size(); - if (off > filesize) { - LOG_ERROR("offset > filesize: ", off, " > ", filesize); - sendRpcReply(reqId, -1, 0, 0, EINVAL); - return; - } - filesize -= off; - len = (len == 0) ? filesize : std::min(filesize, len); - if (len == 0) { - sendRpcReply(reqId, -1, 0, 0, 0); - return; - } - /* - * Use sendfile() for large chunks of data - */ - auto rawFd = file->rawFd(); - if (len >= MIN_SENDFILE_SIZE && rawFd.ok()) { - int in_fd = *rawFd; - sendRpcReply(reqId, in_fd, off, len, 0); - return; - } - auto buffer = _tcpTransport.getBuffer(); - auto status = file->read(buffer, off, len); - if (len != *status) { - LOG_ERROR("file->read returned with an unexpected length!"); - } - if (status.ok()) { - sendRpcReply(reqId, -1, (uint64_t)buffer, *status, 0); - } else { - _tcpTransport.releaseBuffer(buffer); - LOG_ERROR("cannot read file: ", objName); - sendRpcReply(reqId, -1, 0, 0, EINVAL); - } -} - -/** - * @brief Read socket until empty or read failure - * - * @param tep - * @return true - * @return false - */ -bool TcpPeer::processEndpointRecv(int sock) { - std::shared_ptr tep; - { - auto lock = getReadLock(); - auto it = endpoints.find(sock); - if (it == endpoints.end()) { - LOG_ERROR("No peer for this endpoint: ", sock); - return false; - } - tep = it->second; - } - - TcpRcvState *ctx = &tep->recv_ctx; - int op = -1; - ssize_t rv = 0; - - do { - bool start_data = false; - - if (ctx->state == PROC_IDLE) { - memset(&ctx->hdr, 0, sizeof ctx->hdr); - ctx->progress = 0; - ctx->objName.clear(); - ctx->state = PROC_HDR; - } - if (ctx->state == PROC_HDR) { - /* - * Start or resume hdr reception - */ - size_t to_recv = sizeof ctx->hdr - ctx->progress; - - while (ctx->progress < sizeof ctx->hdr) { - auto hdrp = reinterpret_cast(&ctx->hdr); // NOLINT - rv = ::recv(sock, &hdrp[ctx->progress], to_recv, 0); // NOLINT - if (rv <= 0) { - rv = errno == EWOULDBLOCK ? -EAGAIN : (errno ? -errno : -EIO); - break; - } - to_recv -= rv; - ctx->progress += rv; - } - if (rv < 0) - break; - /* - * Get the additional object name, if present - */ - to_recv = ctx->hdr.hdrlen - ctx->progress; - - if (ctx->hdr.type == GET_REQ) { - if (to_recv == 0 || to_recv > (ssize_t)RPC_TCP_MAX_HDR) { - LOG_ERROR("RPC GET_REQ header size invalid: ", ctx->hdr.hdrlen); - rv = -EINVAL; - break; - } - char buffer[to_recv]; - - while (to_recv) { - rv = ::recv(sock, &buffer[0], to_recv, 0); - if (rv <= 0) { - rv = errno == EWOULDBLOCK ? -EAGAIN : (errno ? -errno : -EIO); - break; - } - ctx->progress += rv; - std::string name(buffer, rv); - ctx->objName += name; - to_recv -= rv; - } - } else if (to_recv) { - int type = ctx->hdr.type, error = ctx->hdr.error; - - LOG_ERROR("RPC unexpected header content:: ", "reqid: ", ctx->hdr.reqid, - ", datalen: ", ctx->hdr.datalen, ", offset: ", ctx->hdr.offset, - ", hdrlen: ", ctx->hdr.hdrlen, ", type: ", type, ", error: ", error, - ", receive progress: ", ctx->progress); - rv = -EINVAL; - break; - } - if (rv < 0) - break; - ctx->state = PROC_DATA; - tep->rx_bytes += ctx->progress; - ctx->progress = 0; - rv = 0; - start_data = true; - } - - op = ctx->hdr.type; - unsigned long datalen = ctx->hdr.datalen; - - switch (op) { - - case GET_REQ: - // Any error would be reported by RPC reply. - TcpProcessRpcGet(ctx->hdr.reqid, ctx->objName, datalen, ctx->hdr.offset); - - ctx->state = PROC_IDLE; - break; - - case GET_REPLY: - if (start_data) { - /* - * Fetch corresponding work from receive queue - */ - auto it = recvQueue.getAndRemove(ctx->hdr.reqid); - if (!it.has_value()) { - LOG_ERROR("Socket: ", sock, ", Peer: ", Id, - ": No corresponding receive for: ", ctx->hdr.reqid, - ", recv's pending: ", recvQueue.size()); - rv = -EINVAL; - break; - } - (*recvQueue_stats)--; - auto work = *it; - ctx->va = work->va; - ctx->p = work->p; - - if (ctx->hdr.error) { - /* - * The peer shall not send any data here. - */ - if (datalen) { - LOG_ERROR("Protocol failure, no data in error reply expected, but indicated: ", datalen, - " Ep: ", tep->sock); - ctx->hdr.datalen = 0; - } - auto message = "Error from GET_REPLY: " + std::to_string(ctx->hdr.error) + - "length: " + std::to_string(datalen) + " Ep: " + std::to_string(tep->sock); - LOG_DEBUG(message); - ctx->p->set_value(absl::UnknownError(message)); - ctx->p = nullptr; - ctx->state = PROC_IDLE; - ctx->progress = 0; - break; - } - } - while (ctx->progress < datalen) { - auto to_recv = datalen - ctx->progress; - auto vap = reinterpret_cast(ctx->va); // NOLINT - rv = ::recv(sock, &vap[ctx->progress], to_recv, 0); // NOLINT - if (rv < 0) { - if (errno == EWOULDBLOCK) { - return true; - } - int err = errno; - int eio = EIO; - std::string message = "Error during recv: "; - if (err) { - message += "got errno " + std::to_string(err) + " " + strerror(err); - } else { - message += "got EIO " + std::to_string(eio); - } - ctx->p->set_value(absl::AbortedError(message)); - ctx->p = nullptr; - ctx->state = PROC_FAILED; - return false; - } - ctx->progress += rv; - } - if (ctx->progress == datalen) { - /* - * completed RPC exchange, inform caller - */ - tep->rx_bytes += ctx->progress; - ctx->p->set_value(datalen); - ctx->p = nullptr; - ctx->state = PROC_IDLE; - } - break; - default: - LOG_ERROR("Unsupported RPC operation: ", op); - return false; - } - } while (rv > 0); - - if (rv >= 0 || rv == -EAGAIN) - return true; - - if (ctx->p.get()) { - auto message = - "Protocol error: Aborted with " + std::to_string(rv) + ": " + std::string{strerror(rv)}; - LOG_ERROR(message); - ctx->p->set_value(absl::AbortedError(message)); - ctx->p = nullptr; - ctx->state = PROC_FAILED; - } - if (rv == -ENOENT) - LOG_ERROR("Socket close on read"); - else { - int err = errno; - LOG_ERROR("unexpected error: ", rv, " errno: ", err); - } - return false; -} - -// TODO: Make this thread interruptiple by signal to make it killable if Transport goes away. -void TcpTransport::updateIoStats() { - do { - tcpPeers.forall([](std::shared_ptr &tp) { tp->updateIoStats(); }); - sleep(1); - } while (isServing); -} - -void TcpTransport::tcpRxThread(unsigned int id) { - struct epoll_event events[EPOLL_MAXEVENTS]; // NOLINT - int poll_fd = ::epoll_create1(0); - if (poll_fd < 0) { - perror("epoll_create: "); - return; - } - LOG_DEBUG("TCP RX thread ", id, " starting"); - epoll_rfd[id] = poll_fd; - - if (eventFd > 0) { - struct epoll_event ev{}; - ev.events = EPOLLIN | EPOLLHUP | EPOLLRDHUP | EPOLLERR; - ev.data.fd = eventFd; - if (epoll_ctl(poll_fd, EPOLL_CTL_ADD, eventFd, &ev)) { - LOG_ERROR("WARNING: Cannot register ctl socket for RX epoll"); - perror("epoll_ctl: "); - } - } - - do { - int cnt = ::epoll_wait(poll_fd, events, EPOLL_MAXEVENTS, -1); - - for (int i = 0; i < cnt; i++) { - struct epoll_event *ev = &events[i]; - - if (ev->data.fd == eventFd) { - LOG_DEBUG("TCP RX: epoll CTL"); - continue; - } - epoll_epid_t ep_id = {}; - ep_id.data = ev->data.u64; - int sock = ep_id.id.sock; - unsigned int epId = ep_id.id.peer_id; - std::shared_ptr tcpPeer = nullptr; - - if (sock < 0) { - LOG_ERROR("Invalid read socket: ", sock, " PeerId: ", ev->data.u64); - continue; - } - auto it = tcpPeers.get(epId); - if (it.has_value()) { - tcpPeer = *it; - } else { - LOG_ERROR("RX: No peer for: ", sock); - deactivateEndpoint(poll_fd, sock, ALL_CLOSED); - continue; - } - if (ev->events & (EPOLLHUP | EPOLLRDHUP | EPOLLERR)) { - LOG_DEBUG("Peer closes: ", sock); - deactivateEndpoint(poll_fd, sock, RX_CLOSED); - if (tcpPeer->SocketStateChange(sock, RX_CLOSED)) { - tcpPeers.remove(tcpPeer->Id); - } - continue; - } - if (!(ev->events & EPOLLIN)) { - LOG_DEBUG("No IN: ", sock); - continue; - } - - if (!tcpPeer->processEndpointRecv(sock)) { - shutdown(sock, SHUT_RDWR); - deactivateEndpoint(poll_fd, sock, RX_CLOSED); - if (tcpPeer->SocketStateChange(sock, RX_CLOSED)) { - tcpPeers.remove(tcpPeer->Id); - } - } - } - } while (isServing); - if (eventFd > 0) - epoll_ctl(poll_fd, EPOLL_CTL_DEL, eventFd, NULL); - - close(poll_fd); - LOG_DEBUG("TCP RX thread ", id, " exiting"); -} - -std::shared_ptr TcpTransport::factory(std::shared_ptr geds) { - return std::shared_ptr(new TcpTransport(geds)); -} - -void TcpTransport::deactivateEndpoint(int poll_fd, int sock, uint32_t state) { - LOG_DEBUG("TCP deactivate EP: ", sock, ", state: ", state); - if (state & TX_CLOSED) - epoll_ctl(poll_fd, EPOLL_CTL_DEL, sock, nullptr); - if (state & RX_CLOSED) - epoll_ctl(poll_fd, EPOLL_CTL_DEL, sock, nullptr); -} - -bool TcpTransport::activateEndpoint(std::shared_ptr tep, - std::shared_ptr peer) { - struct epoll_event ev = {}; - epoll_epid_t ep_id = {}; - int sock = tep->sock; - unsigned int thread_id = peer->endpoints.size() % num_proc; - - ep_id.id.sock = tep->sock; - ep_id.id.peer_id = peer->getId(); - - int no = 1; - if (setsockopt(sock, SOL_TCP, TCP_NODELAY, &no, sizeof(no))) { - perror("setsockopt nodelay"); - return false; - } - - ev.events = EPOLLIN | EPOLLHUP | EPOLLRDHUP | EPOLLERR; - ev.data.u64 = ep_id.data; - if (epoll_ctl(epoll_rfd[thread_id], EPOLL_CTL_ADD, sock, &ev) != 0) { - perror("epoll_ctl read: "); - return false; - } - ev.events = EPOLLOUT | EPOLLHUP | EPOLLERR | EPOLLET; - ev.data.u64 = ep_id.data; - if (epoll_ctl(epoll_wfd[thread_id], EPOLL_CTL_ADD, sock, &ev) != 0) { - epoll_ctl(epoll_rfd[thread_id], EPOLL_CTL_DEL, sock, NULL); - perror("epoll_ctl send: "); - return false; - } - LOG_DEBUG("TCP activated EP, socket: ", tep->sock, ", host: ", peer->hostname); - return true; -} - -bool TcpTransport::addEndpointPassive(int sock) { - std::shared_ptr tep = std::make_shared(); - struct sockaddr peer_sockaddr = {}; - auto *in_peer = (sockaddr_in *)&peer_sockaddr; - - socklen_t addrlen = sizeof peer_sockaddr; - - if (::fcntl(sock, F_SETFL, fcntl(sock, F_GETFL, 0) | O_NONBLOCK)) { - perror("fcntl: "); - return false; - } - - struct linger lg = {.l_onoff = 0, .l_linger = 0}; - if (::setsockopt(sock, SOL_SOCKET, SO_LINGER, &lg, sizeof lg)) { - perror("SO_LINGER: "); - return false; - } - - if (::getpeername(sock, &peer_sockaddr, &addrlen) != 0) { - perror("getpeername: "); - return false; - } - tep->sock = sock; - - std::string hostname = inet_ntoa(in_peer->sin_addr); - std::shared_ptr tcpPeer; - unsigned int epId = SStringHash(hostname); - auto it = tcpPeers.get(epId); - if (!it.has_value()) { - tcpPeer = std::make_shared(hostname, _geds, *this); - tcpPeers.insertOrReplace(epId, tcpPeer); - } else { - tcpPeer = *it; - } - tcpPeer->addEndpoint(tep); - activateEndpoint(tep, tcpPeer); - LOG_DEBUG("Server connected to ", hostname, "::", in_peer->sin_port); - - return true; -} - -std::shared_ptr TcpTransport::getPeer(sockaddr *peer) { - auto inaddr = (sockaddr_in *)peer; - std::string hostname = inet_ntoa(inaddr->sin_addr); - size_t addrlen = sizeof *peer; - int sock = -1, rv = 0; - unsigned int epId = SStringHash(hostname); - auto lock = getWriteLock(); - /* - * Check if we are already connected to that address. No new peer in - * this case. - */ - std::shared_ptr tcpPeer; - auto it = tcpPeers.get(epId); - if (it.has_value()) { - tcpPeer = *it; - LOG_DEBUG("Already connected: ", hostname, "::", inaddr->sin_port); - return tcpPeer; - } - if (peer->sa_family != AF_INET) { - LOG_ERROR("Address family not supported: ", peer->sa_family); - return nullptr; - } - for (unsigned int num_ep = 0; num_ep < num_proc; num_ep++) { - sock = ::socket(AF_INET, SOCK_STREAM, 0); - if (sock < 0) { - return nullptr; - } - rv = ::connect(sock, peer, addrlen); - if (rv) { - LOG_ERROR("Cannot connect: ", hostname, "::", inaddr->sin_port); - ::close(sock); - return nullptr; - } - /* - * Mark socket non-blocking to allow efficient handling of - * multiple sockets in rx and tx threads. - */ - rv = ::fcntl(sock, F_SETFL, fcntl(sock, F_GETFL, 0) | O_NONBLOCK); - if (rv) { - LOG_ERROR("Cannot set socket non-blocking ", hostname, "::", inaddr->sin_port); - close(sock); - return nullptr; - } - struct linger lg = {.l_onoff = 0, .l_linger = 0}; - if (::setsockopt(sock, SOL_SOCKET, SO_LINGER, &lg, sizeof lg)) { - LOG_ERROR("Cannot set NO_LINGER ", hostname, "::", inaddr->sin_port); - close(sock); - return nullptr; - } - std::shared_ptr tep = std::make_shared(); - - LOG_DEBUG("Connected, num Ep: ", num_ep, " hostname: ", hostname, "::", inaddr->sin_port); - tep->sock = sock; - if (num_ep == 0) { - tcpPeer = std::make_shared(hostname, _geds, *this); - tcpPeers.insertOrReplace(epId, tcpPeer); - } - tcpPeer->addEndpoint(tep); - activateEndpoint(tep, tcpPeer); - } - LOG_DEBUG("Client connected to ", hostname, "::", inaddr->sin_port); - return tcpPeer; -} - -void TcpPeer::updateIoStats() { - auto lock = getReadLock(); - for (auto &endpoint : endpoints) { - auto tep = endpoint.second; - /* - * Todo: Implement something more clever here - */ - tep->tx_bytes /= 2; - tep->rx_bytes /= 2; - } -} - -std::shared_ptr TcpPeer::getLeastUsedTx(size_t to_send) { - std::shared_ptr send_tep = nullptr, tep = nullptr; - size_t min_sent = UINT_LEAST32_MAX; - - auto lock = getReadLock(); - for (auto &endpoint : endpoints) { - tep = endpoint.second; - if (tep->state != ALL_OPEN) { - tep = nullptr; - continue; - } - if (tep->send_ctx.state == PROC_IDLE) { - send_tep = tep; - break; - } - if (tep->tx_bytes + to_send < min_sent) { - min_sent = tep->tx_bytes; - send_tep = tep; - } - } - if (send_tep) - return send_tep; - - return tep; // May be nullptr -} - -int TcpPeer::sendRpcReply(uint64_t reqId, int in_fd, uint64_t start, size_t len, int status) { - bool send_ok = false; - - auto sendWork = std::make_shared(); - sendWork->reqId = reqId; - sendWork->va = start; - sendWork->in_fd = in_fd; - sendWork->len = len; - sendWork->type = GET_REPLY; - sendWork->error = status; - sendQueue.emplace(sendWork); - (*sendQueue_stats)++; - - auto tep = getLeastUsedTx(len); - if (tep) { - tep->send_ctx.stateMux.lock(); - send_ok = processEndpointSend(tep); - tep->send_ctx.stateMux.unlock(); - } else { - LOG_ERROR("No active endpoint found"); - } - if (send_ok) - return 0; - if (errno) - return -errno; - return -EIO; -} - -std::shared_ptr>> -TcpPeer::sendRpcRequest(uint64_t dest, std::string name, size_t off, size_t len) { - uint64_t reqId = ++rpcReqId; - bool send_ok = false; - - auto recvWork = std::make_shared(); - recvWork->reqId = reqId; - recvWork->va = dest; - recvWork->len = len; - recvWork->p = std::make_shared>>(); - recvQueue.insertOrReplace(reqId, recvWork); - (*recvQueue_stats)++; - - auto sendWork = std::shared_ptr(new SocketSendWork{}); - sendWork->reqId = reqId; - sendWork->objName = name; - sendWork->va = 0; - sendWork->in_fd = -1; - sendWork->len = len; - sendWork->off = off; - sendWork->error = 0; - sendWork->type = GET_REQ; - sendQueue.emplace(sendWork); - (*sendQueue_stats)++; - - auto tep = getLeastUsedTx(len); - if (tep) { - tep->send_ctx.stateMux.lock(); - send_ok = processEndpointSend(tep); - tep->send_ctx.stateMux.unlock(); - } else { - LOG_ERROR("No active endpoint found"); - } - if (!send_ok) { - LOG_ERROR("RPC Req Send failed"); - if (recvQueue.remove(reqId)) { - (*recvQueue_stats)--; - recvWork->p->set_value(absl::AbortedError("Unable to proceed: ")); - } - } - return recvWork->p; -} - -uint8_t *TcpTransport::getBuffer() { - uint8_t *result; - auto success = _buffers.pop(result); - if (!success) { - return new (std::align_val_t(BUFFER_ALIGNMENT)) uint8_t[MIN_SENDFILE_SIZE]; - } - return result; -} - -void TcpTransport::releaseBuffer(uint8_t *buffer) { _buffers.push(buffer); } -} // namespace geds diff --git a/src/libgeds/TcpTransport.h b/src/libgeds/TcpTransport.h deleted file mode 100644 index 93d30047..00000000 --- a/src/libgeds/TcpTransport.h +++ /dev/null @@ -1,253 +0,0 @@ -/** - * Copyright 2022- IBM Inc. All rights reserved - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef _TCP_TRANSPORT_H -#define _TCP_TRANSPORT_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "ConcurrentMap.h" -#include "ConcurrentQueue.h" -#include "FileTransferProtocol.h" -#include "RWConcurrentObjectAdaptor.h" -#include "Statistics.h" -#include "StatisticsGauge.h" - -class GEDS; - -namespace geds { - -enum SockProcessingState { PROC_IDLE = 0, PROC_HDR, PROC_DATA, PROC_FAILED }; - -enum TcpRpcOp { GET_REQ = 1, GET_REPLY, INFO_REQ, INFO_REPLY }; - -/** - * @brief Work item for threads sending and receiving on TCP socket - * - */ -struct SocketSendWork { - std::string objName; - uint64_t reqId; - uint64_t va; - int in_fd; - size_t off; - size_t len; - TcpRpcOp type; - size_t progress; - int error; -}; - -struct SocketRecvWork { - uint64_t reqId; - uint64_t va; - size_t len; - std::shared_ptr>> p; -}; - -/** - * @brief Hdr of all TCP RPC's - * - */ -struct TcpCtlHdr { - uint64_t reqid; - uint64_t datalen; - uint64_t offset; - uint16_t hdrlen; - uint8_t type; - uint8_t error; // just errors as defined in errno.h - uint32_t pad; -}; - -// 4096 bytes maximum object name length -#define RPC_TCP_MAX_HDR (4096 + sizeof(TcpCtlHdr)) - -struct TcpSendState { - std::atomic state = PROC_IDLE; - mutable std::shared_mutex stateMux; - bool direct_tx = false; - struct TcpCtlHdr hdr; - uint64_t va; - int in_fd; // If non-negative: fd of requested object, to be used in sendfile() - std::string objName; - size_t progress; -}; - -struct TcpRcvState { - std::atomic state = PROC_IDLE; - struct TcpCtlHdr hdr; - uint64_t va; - std::string objName; - size_t progress; - - std::shared_ptr>> p; -}; - -class TcpPeer; - -enum epState { - ALL_OPEN = 0x0, - TX_CLOSED = 0x01, - RX_CLOSED = 0x02, - ALL_CLOSED = TX_CLOSED | RX_CLOSED -}; - -struct TcpEndpoint { - int sock; - uint32_t state; - struct TcpRcvState recv_ctx; - struct TcpSendState send_ctx; - size_t tx_bytes = 0; - size_t rx_bytes = 0; -}; - -/** - * @brief simple hash function to hash peer name to list index - * - * @param name - * @return unsigned int - */ -static unsigned int SStringHash(std::string name) { - unsigned int hash = 0; - for (char i : name) - hash = i + (hash << 6) + (hash << 16) - hash; - return hash; -} - -struct ep_id { - uint32_t peer_id; - int32_t sock; -}; - -using epoll_epid_t = union EpollEpId { - u_int64_t data; - struct ep_id id; -}; - -class TcpTransport; -class TcpPeer : public std::enable_shared_from_this, utility::RWConcurrentObjectAdaptor { - -private: - friend class TcpTransport; - - unsigned int Id; - std::shared_ptr _geds; - TcpTransport &_tcpTransport; - std::string hostname; - std::atomic_uint64_t rpcReqId = 0; - - utility::ConcurrentQueue> sendQueue; - std::shared_ptr sendQueue_stats = - Statistics::createGauge("GEDS: TcpTransport sendQueue length"); - utility::ConcurrentMap> recvQueue; - std::shared_ptr recvQueue_stats = - Statistics::createGauge("GEDS: TcpTransport recvQueue length"); - - std::map> endpoints; - - bool processEndpointSend(std::shared_ptr tep); - bool processEndpointRecv(int sock); - bool SocketStateChange(int sock, uint32_t change); - bool SocketTxReady(int sock); - void updateIoStats(); - void cleanup(); - - std::shared_ptr getLeastUsedTx(size_t tu_send); - - int sendRpcReply(uint64_t reqId, uint64_t start, size_t len, int status); - void TcpProcessRpcGet(uint64_t ReqId, const std::string ObjName, size_t len, size_t off); - -public: - unsigned int getId() { return Id; } - std::shared_ptr>> - sendRpcRequest(uint64_t dest, std::string name, size_t src_off, size_t len); - int sendRpcReply(uint64_t reqId, int in_fd, uint64_t start, size_t len, int status); - void addEndpoint(std::shared_ptr tep) { - auto lock = getWriteLock(); - endpoints.emplace(tep->sock, tep); - }; - TcpPeer(std::string name, std::shared_ptr geds, TcpTransport &tcpTransport) - : Id(SStringHash(name)), _geds(std::move(geds)), _tcpTransport(tcpTransport), - hostname(std::move(name)){}; - TcpPeer(const TcpPeer &other) = delete; - TcpPeer(TcpPeer &&other) = delete; - TcpPeer &operator=(const TcpPeer &other) = delete; - TcpPeer &operator=(TcpPeer &&other) = delete; - ~TcpPeer(); -}; -constexpr unsigned int MAX_PEERS = 8096; -constexpr unsigned int MAX_IO_THREADS = 8; -constexpr unsigned int EPOLL_MAXEVENTS = MAX_PEERS / MAX_IO_THREADS; - -class TcpTransport : public std::enable_shared_from_this { - -private: - std::shared_ptr _geds; - boost::lockfree::stack> _buffers{MAX_IO_THREADS}; - - void tcpTxThread(unsigned int id); - void tcpRxThread(unsigned int id); - std::vector> txThreads; - std::vector> rxThreads; - - void updateIoStats(); - std::unique_ptr ioStatsThread; - - volatile bool isServing = false; - unsigned int num_proc = 0; - - int epoll_rfd[MAX_IO_THREADS] = {}; // for epoll() receive - int epoll_wfd[MAX_IO_THREADS] = {}; // for epoll() send - - /* fd to signal threads in epoll to interrupt wait */ - int eventFd = -1; - - void deactivateEndpoint(int poll_fd, int sock, uint32_t state); - bool activateEndpoint(std::shared_ptr, std::shared_ptr); - utility::ConcurrentMap> tcpPeers; - - mutable std::shared_mutex connMutex; - auto getReadLock() const { return std::shared_lock(connMutex); } - auto getWriteLock() const { return std::unique_lock(connMutex); } - - TcpTransport(std::shared_ptr geds); - -public: - [[nodiscard]] static std::shared_ptr factory(std::shared_ptr geds); - - uint8_t *getBuffer(); - void releaseBuffer(uint8_t *buffer); - - virtual ~TcpTransport(); - TcpTransport(const TcpTransport &other) = delete; - TcpTransport(TcpTransport &&other) = delete; - TcpTransport &operator=(const TcpTransport &other) = delete; - TcpTransport &operator=(TcpTransport &&other) = delete; - - void start(); - void stop(); - - std::shared_ptr getPeer(sockaddr *); - bool addEndpointPassive(int sock); -}; -} // namespace geds -#endif