Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@

build/
.vscode/
.cache/
.cache/
.clangd
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ option(COLUMNAR_BUILD_BENCHMARKS "Build benchmarks" ON)
include(cmake/deps.cmake)

add_library(columnar_lib
src/add.cpp
src/core/column.cpp
)

target_include_directories(columnar_lib PUBLIC include)
target_include_directories(columnar_lib PUBLIC ${PROJECT_SOURCE_DIR}/include)

target_compile_options(columnar_lib PRIVATE -Wall -Wextra -Wpedantic)

Expand Down
12 changes: 5 additions & 7 deletions apps/converter/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,13 @@

#include <iostream>

#include "columnar/add.h"

ABSL_FLAG(int, x, 1, "x");
ABSL_FLAG(int, y, 2, "y");
// ABSL_FLAG(int, x, 1, "x");
// ABSL_FLAG(int, y, 2, "y");

int main(int argc, char** argv) {
absl::ParseCommandLine(argc, argv);
int x = absl::GetFlag(FLAGS_x);
int y = absl::GetFlag(FLAGS_y);
std::cout << "Add(" << x << ", " << y << ") = " << columnar::Add(x, y) << "\n";
// int x = absl::GetFlag(FLAGS_x);
// int y = absl::GetFlag(FLAGS_y);
std::cout << "Hello" << "\n";
return 0;
}
2 changes: 1 addition & 1 deletion benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_executable(columnar_bench
bench_bruh.cpp
bench_csv.cpp
)

target_link_libraries(columnar_bench
Expand Down
12 changes: 0 additions & 12 deletions benchmarks/bench_bruh.cpp

This file was deleted.

69 changes: 69 additions & 0 deletions benchmarks/bench_csv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include <benchmark/benchmark.h>
#include <csv/csv.h>
#include <core/schema.h>

#include <sstream>

using namespace columnar; // NOLINT

std::string GenerateCSV(std::size_t rows) {
std::ostringstream ss;
for (std::size_t i = 0; i < rows; ++i) {
ss << i << ",value" << i << ",0.123\n";
}
return ss.str();
}

void BenchCSVRowReader(benchmark::State& state) {
std::string data = GenerateCSV(state.range(0));
for (auto s : state) {
std::istringstream in(data);
csv::CSVRowReader reader(in);
while (reader.ReadRow()) {
}
}

state.SetItemsProcessed(state.iterations() * state.range(0));
}
BENCHMARK(BenchCSVRowReader)->Range(100, 10000);

void BenchCSVBatchReader(benchmark::State& state) {
core::Schema schema({core::Field("id", core::DataType::Int64),
core::Field("name", core::DataType::String),
core::Field("value", core::DataType::Double)});

std::string data = GenerateCSV(state.range(0));
for (auto s : state) {
std::istringstream in(data);
csv::CSVBatchReader reader(in, schema, {});
while (reader.ReadNext()) {
}
}

state.SetItemsProcessed(state.iterations() * state.range(0));
}
BENCHMARK(BenchCSVBatchReader)->Range(100, 10000);

void BenchCSVBatchWriter(benchmark::State& state) {
core::Schema schema(
{core::Field("id", core::DataType::Int64), core::Field("name", core::DataType::String)});

core::Batch batch(schema);
for (int i = 0; i < state.range(0); ++i) {
batch.ColumnAt(0).AppendFromString(std::to_string(i));
batch.ColumnAt(1).AppendFromString("hello mir");
}

for (auto s : state) {
std::ostringstream out;
csv::CSVBatchWriter writer(out, {});
writer.Write(batch);
writer.Flush();
benchmark::DoNotOptimize(out.str());
}

state.SetItemsProcessed(state.iterations() * state.range(0));
}
BENCHMARK(BenchCSVBatchWriter)->Range(100, 10000);

BENCHMARK_MAIN();
5 changes: 0 additions & 5 deletions include/columnar/add.h

This file was deleted.

75 changes: 75 additions & 0 deletions include/core/batch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma once

#include <core/schema.h>
#include <core/column.h>
#include <util/macro.h>

namespace columnar::core {
class Batch {
public:
Batch() = default;

Batch(Schema schema) : schema_(std::move(schema)) {
columns_.reserve(schema_.FieldsCount());
for(auto& field : schema_.GetFields()) {
columns_.emplace_back(MakeColumn(field.type, field.nullable));
}
}

Batch(Schema schema, std::size_t reserve_rows) : Batch(std::move(schema)) {
Reserve(reserve_rows);
}

const Schema& GetSchema() const {
return schema_;
}

std::size_t ColumnsCount() const {
return columns_.size();
}

std::size_t RowsCount() const {
if (columns_.empty()) {
return 0;
}
return columns_[0]->Size();
}

void Reserve(std::size_t n) {
for (auto& c : columns_) {
c->Reserve(n);
}
}

const Column& ColumnAt(std::size_t i) const {
return *columns_.at(i);
}

Column& ColumnAt(std::size_t i) {
return *columns_.at(i);
}

const std::vector<std::unique_ptr<Column>>& GetColumns() const {
return columns_;
}

std::vector<std::unique_ptr<Column>>& GetColumns() {
return columns_;
}

void Validate() const {
if (columns_.empty()) {
return;
}
for (auto& c : columns_) {
if (c->Size() != columns_[0]->Size()) {
THROW_RUNTIME_ERROR("Different columns sizes");
}
}
}

private:
Schema schema_;
std::vector<std::unique_ptr<Column>> columns_;
};
} // namespace columnar::core
16 changes: 16 additions & 0 deletions include/core/batch_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include <core/batch.h>

#include <optional>

namespace columnar::core {
class BatchReader {
public:
virtual ~BatchReader() = default;

virtual std::optional<Batch> ReadNext() = 0;

virtual const core::Schema& GetSchema() const = 0;
};
} // namespace columnar::core
14 changes: 14 additions & 0 deletions include/core/batch_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include <core/batch.h>

namespace columnar::core {
class BatchWriter {
public:
virtual ~BatchWriter() = default;

virtual void Write(const Batch& batch) = 0;

virtual void Flush() = 0;
};
} // namespace columnar::core
13 changes: 13 additions & 0 deletions include/core/column.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include <core/columns/abstract_column.h>
#include <core/columns/numeric_column.h>
#include <core/columns/string_column.h>
#include <core/datatype.h>

namespace columnar::core {
using Int64Column = NumericColumn<int64_t, DataType::Int64>;
using DoubleColumn = NumericColumn<double, DataType::Double>;

std::unique_ptr<Column> MakeColumn(DataType id, bool nullable);
} // namespace columnar::core
37 changes: 37 additions & 0 deletions include/core/columns/abstract_column.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#pragma once

#include <core/datatype.h>

#include <memory>
#include <string>
#include <string_view>

namespace columnar::core {

class Column {
public:
virtual ~Column() = default;

virtual DataType GetDataType() const = 0;

virtual std::size_t Size() const = 0;

virtual void Reserve(std::size_t n) = 0;

virtual void AppendFromString(std::string_view s) = 0;

virtual void AppendNull() = 0;

virtual void AppendDefault() = 0;

virtual bool IsNullable() const = 0;

virtual bool IsNull(std::size_t i) const = 0;

virtual std::unique_ptr<Column> CloneEmpty() const = 0;

virtual std::string GetAsString(std::size_t i) const = 0;

virtual void Clear() = 0;
};
} // namespace columnar::core
92 changes: 92 additions & 0 deletions include/core/columns/numeric_column.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#pragma once

#include <core/columns/abstract_column.h>
#include <core/datatype.h>
#include <util/macro.h>
#include <util/parse.h>

#include <string>
#include <vector>

namespace columnar::core {
template <typename T, DataType type>
class NumericColumn final : public Column {
public:
NumericColumn(bool nullable = false) : nullable_(nullable) {
}

DataType GetDataType() const override {
return type;
}

std::size_t Size() const override {
return data_.size();
}

void Reserve(std::size_t n) override {
data_.reserve(n);
if (nullable_) {
is_null_.reserve(n);
}
}

bool IsNullable() const override {
return nullable_;
}

bool IsNull(std::size_t i) const override {
return nullable_ && is_null_[i];
}

void AppendFromString(std::string_view s) override {
data_.emplace_back(util::ParseFromString<T>(s));
if (nullable_) {
is_null_.push_back(false);
}
}

void AppendNull() override {
if (!nullable_) {
THROW_RUNTIME_ERROR("Cannot set not nullable value to null");
}
data_.emplace_back();
is_null_.push_back(true);
}

void AppendDefault() override {
data_.emplace_back();
if (nullable_) {
is_null_.push_back(true);
}
}

std::unique_ptr<Column> CloneEmpty() const override {
return std::make_unique<NumericColumn<T, type>>(nullable_);
}

void Clear() override {
data_.clear();
is_null_.clear();
}

const T& Get(std::size_t i) const {
return data_[i];
}

std::string GetAsString(std::size_t i) const override {
if (IsNull(i)) {
return "";
}
if constexpr (std::is_same_v<T, bool>) {
return data_[i] ? "true" : "false";
} else {
return std::to_string(data_[i]);
}
}

private:
std::vector<T> data_;
bool nullable_ = false;
std::vector<bool> is_null_;
};
} // namespace columnar::core
Loading