Skip to content

Commit 17c79cb

Browse files
authored
Merge pull request #4 from Irval1337/added-csv-interface
Add csv interface
2 parents df806f7 + c892708 commit 17c79cb

33 files changed

+1374
-42
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,5 @@
4242

4343
build/
4444
.vscode/
45-
.cache/
45+
.cache/
46+
.clangd

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ option(COLUMNAR_BUILD_BENCHMARKS "Build benchmarks" ON)
1212
include(cmake/deps.cmake)
1313

1414
add_library(columnar_lib
15-
src/add.cpp
15+
src/core/column.cpp
1616
)
1717

18-
target_include_directories(columnar_lib PUBLIC include)
18+
target_include_directories(columnar_lib PUBLIC ${PROJECT_SOURCE_DIR}/include)
1919

2020
target_compile_options(columnar_lib PRIVATE -Wall -Wextra -Wpedantic)
2121

apps/converter/main.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33

44
#include <iostream>
55

6-
#include "columnar/add.h"
7-
8-
ABSL_FLAG(int, x, 1, "x");
9-
ABSL_FLAG(int, y, 2, "y");
6+
// ABSL_FLAG(int, x, 1, "x");
7+
// ABSL_FLAG(int, y, 2, "y");
108

119
int main(int argc, char** argv) {
1210
absl::ParseCommandLine(argc, argv);
13-
int x = absl::GetFlag(FLAGS_x);
14-
int y = absl::GetFlag(FLAGS_y);
15-
std::cout << "Add(" << x << ", " << y << ") = " << columnar::Add(x, y) << "\n";
11+
// int x = absl::GetFlag(FLAGS_x);
12+
// int y = absl::GetFlag(FLAGS_y);
13+
std::cout << "Hello" << "\n";
1614
return 0;
1715
}

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
add_executable(columnar_bench
2-
bench_bruh.cpp
2+
bench_csv.cpp
33
)
44

55
target_link_libraries(columnar_bench

benchmarks/bench_bruh.cpp

Lines changed: 0 additions & 12 deletions
This file was deleted.

benchmarks/bench_csv.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#include <benchmark/benchmark.h>
2+
#include <csv/csv.h>
3+
#include <core/schema.h>
4+
5+
#include <sstream>
6+
7+
using namespace columnar; // NOLINT
8+
9+
std::string GenerateCSV(std::size_t rows) {
10+
std::ostringstream ss;
11+
for (std::size_t i = 0; i < rows; ++i) {
12+
ss << i << ",value" << i << ",0.123\n";
13+
}
14+
return ss.str();
15+
}
16+
17+
void BenchCSVRowReader(benchmark::State& state) {
18+
std::string data = GenerateCSV(state.range(0));
19+
for (auto s : state) {
20+
std::istringstream in(data);
21+
csv::CSVRowReader reader(in);
22+
while (reader.ReadRow()) {
23+
}
24+
}
25+
26+
state.SetItemsProcessed(state.iterations() * state.range(0));
27+
}
28+
BENCHMARK(BenchCSVRowReader)->Range(100, 10000);
29+
30+
void BenchCSVBatchReader(benchmark::State& state) {
31+
core::Schema schema({core::Field("id", core::DataType::Int64),
32+
core::Field("name", core::DataType::String),
33+
core::Field("value", core::DataType::Double)});
34+
35+
std::string data = GenerateCSV(state.range(0));
36+
for (auto s : state) {
37+
std::istringstream in(data);
38+
csv::CSVBatchReader reader(in, schema, {});
39+
while (reader.ReadNext()) {
40+
}
41+
}
42+
43+
state.SetItemsProcessed(state.iterations() * state.range(0));
44+
}
45+
BENCHMARK(BenchCSVBatchReader)->Range(100, 10000);
46+
47+
void BenchCSVBatchWriter(benchmark::State& state) {
48+
core::Schema schema(
49+
{core::Field("id", core::DataType::Int64), core::Field("name", core::DataType::String)});
50+
51+
core::Batch batch(schema);
52+
for (int i = 0; i < state.range(0); ++i) {
53+
batch.ColumnAt(0).AppendFromString(std::to_string(i));
54+
batch.ColumnAt(1).AppendFromString("hello mir");
55+
}
56+
57+
for (auto s : state) {
58+
std::ostringstream out;
59+
csv::CSVBatchWriter writer(out, {});
60+
writer.Write(batch);
61+
writer.Flush();
62+
benchmark::DoNotOptimize(out.str());
63+
}
64+
65+
state.SetItemsProcessed(state.iterations() * state.range(0));
66+
}
67+
BENCHMARK(BenchCSVBatchWriter)->Range(100, 10000);
68+
69+
BENCHMARK_MAIN();

include/columnar/add.h

Lines changed: 0 additions & 5 deletions
This file was deleted.

include/core/batch.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#pragma once
2+
3+
#include <core/schema.h>
4+
#include <core/column.h>
5+
#include <util/macro.h>
6+
7+
namespace columnar::core {
8+
class Batch {
9+
public:
10+
Batch() = default;
11+
12+
Batch(Schema schema) : schema_(std::move(schema)) {
13+
columns_.reserve(schema_.FieldsCount());
14+
for (auto& field : schema_.GetFields()) {
15+
columns_.emplace_back(MakeColumn(field.type, field.nullable));
16+
}
17+
}
18+
19+
Batch(Schema schema, std::size_t reserve_rows) : Batch(std::move(schema)) {
20+
Reserve(reserve_rows);
21+
}
22+
23+
const Schema& GetSchema() const {
24+
return schema_;
25+
}
26+
27+
std::size_t ColumnsCount() const {
28+
return columns_.size();
29+
}
30+
31+
std::size_t RowsCount() const {
32+
if (columns_.empty()) {
33+
return 0;
34+
}
35+
return columns_[0]->Size();
36+
}
37+
38+
void Reserve(std::size_t n) {
39+
for (auto& c : columns_) {
40+
c->Reserve(n);
41+
}
42+
}
43+
44+
const Column& ColumnAt(std::size_t i) const {
45+
return *columns_.at(i);
46+
}
47+
48+
Column& ColumnAt(std::size_t i) {
49+
return *columns_.at(i);
50+
}
51+
52+
const std::vector<std::unique_ptr<Column>>& GetColumns() const {
53+
return columns_;
54+
}
55+
56+
std::vector<std::unique_ptr<Column>>& GetColumns() {
57+
return columns_;
58+
}
59+
60+
void Validate() const {
61+
if (columns_.empty()) {
62+
return;
63+
}
64+
for (auto& c : columns_) {
65+
if (c->Size() != columns_[0]->Size()) {
66+
THROW_RUNTIME_ERROR("Different columns sizes");
67+
}
68+
}
69+
}
70+
71+
private:
72+
Schema schema_;
73+
std::vector<std::unique_ptr<Column>> columns_;
74+
};
75+
} // namespace columnar::core

include/core/batch_reader.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include <core/batch.h>
4+
5+
#include <optional>
6+
7+
namespace columnar::core {
8+
class BatchReader {
9+
public:
10+
virtual ~BatchReader() = default;
11+
12+
virtual std::optional<Batch> ReadNext() = 0;
13+
14+
virtual const core::Schema& GetSchema() const = 0;
15+
};
16+
} // namespace columnar::core

include/core/batch_writer.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#pragma once
2+
3+
#include <core/batch.h>
4+
5+
namespace columnar::core {
6+
class BatchWriter {
7+
public:
8+
virtual ~BatchWriter() = default;
9+
10+
virtual void Write(const Batch& batch) = 0;
11+
12+
virtual void Flush() = 0;
13+
};
14+
} // namespace columnar::core

0 commit comments

Comments
 (0)