Skip to content

Commit 105b44d

Browse files
Fix memory issues in CSVFieldList (#237)
1 parent 5a05e6a commit 105b44d

File tree

7 files changed

+63
-37
lines changed

7 files changed

+63
-37
lines changed

.github/workflows/cmake-multi-platform.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ name: CMake on multiple platforms
44

55
on:
66
push:
7-
branches: [ "master", "remove-werror" ]
7+
branches: [ "master", "memory-fix-csvfieldlist" ]
88
pull_request:
99
branches: [ "master" ]
1010

include/csv.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
CSV for C++, version 2.2.3
2+
CSV for C++, version 2.3.0
33
https://github.com/vincentlaucsb/csv-parser
44
55
MIT License

include/internal/csv_row.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ namespace csv {
1515
}
1616

1717
CSV_INLINE void CSVFieldList::allocate() {
18-
RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
19-
buffers.push_back(buffer);
18+
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));
19+
2020
_current_buffer_size = 0;
21-
_back = &(buffers.back()[0]);
21+
_back = buffers.back().get();
2222
}
2323
}
2424

include/internal/csv_row.hpp

+14-8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#pragma once
66
#include <cmath>
7+
#include <deque>
78
#include <iterator>
89
#include <memory> // For CSVField
910
#include <limits> // For CSVField
@@ -73,16 +74,15 @@ namespace csv {
7374
// CSVFieldArrays may be moved
7475
CSVFieldList(CSVFieldList&& other) :
7576
_single_buffer_capacity(other._single_buffer_capacity) {
76-
buffers = std::move(other.buffers);
77+
78+
for (auto&& buffer : other.buffers) {
79+
this->buffers.emplace_back(std::move(buffer));
80+
}
81+
7782
_current_buffer_size = other._current_buffer_size;
7883
_back = other._back;
7984
}
8085

81-
~CSVFieldList() {
82-
for (auto& buffer : buffers)
83-
delete[] buffer;
84-
}
85-
8686
template <class... Args>
8787
void emplace_back(Args&&... args) {
8888
if (this->_current_buffer_size == this->_single_buffer_capacity) {
@@ -102,7 +102,14 @@ namespace csv {
102102
private:
103103
const size_t _single_buffer_capacity;
104104

105-
std::vector<RawCSVField*> buffers = {};
105+
/**
106+
* Prefer std::deque over std::vector because it does not
107+
* reallocate upon expansion, allowing pointers to its members
108+
* to remain valid & avoiding potential race conditions when
109+
* CSVFieldList is accesssed simulatenously by a reading thread and
110+
* a writing thread
111+
*/
112+
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};
106113

107114
/** Number of items in the current buffer */
108115
size_t _current_buffer_size = 0;
@@ -114,7 +121,6 @@ namespace csv {
114121
void allocate();
115122
};
116123

117-
118124
/** A class for storing raw CSV data and associated metadata */
119125
struct RawCSVData {
120126
std::shared_ptr<void> _data = nullptr;

programs/csv_bench.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,15 @@ int main(int argc, char** argv) {
2121
std::chrono::duration<double> diff = end - start;
2222

2323
std::cout << "Parsing took (including disk IO): " << diff.count() << std::endl;
24+
std::cout << "Dimensions: " << info.n_rows << " rows x " << info.n_cols << " columns " << std::endl;
25+
std::cout << "Columns: ";
26+
for (auto& col : info.col_names) {
27+
std::cout << " " << col;
28+
}
29+
std::cout << std::endl;
2430

2531
// Benchmark 2: Parsing Only
32+
/*
2633
std::ifstream csv(filename);
2734
std::stringstream buffer;
2835
buffer << csv.rdbuf();
@@ -35,6 +42,7 @@ int main(int argc, char** argv) {
3542
diff = end - start;
3643
3744
std::cout << "Parsing took: " << diff.count() << std::endl;
45+
*/
3846

3947
return 0;
4048
}

single_include/csv.hpp

+18-12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22
/*
3-
CSV for C++, version 2.2.3
3+
CSV for C++, version 2.3.0
44
https://github.com/vincentlaucsb/csv-parser
55
66
MIT License
@@ -5051,6 +5051,7 @@ namespace csv {
50515051
*/
50525052

50535053
#include <cmath>
5054+
#include <deque>
50545055
#include <iterator>
50555056
#include <memory> // For CSVField
50565057
#include <limits> // For CSVField
@@ -5468,16 +5469,15 @@ namespace csv {
54685469
// CSVFieldArrays may be moved
54695470
CSVFieldList(CSVFieldList&& other) :
54705471
_single_buffer_capacity(other._single_buffer_capacity) {
5471-
buffers = std::move(other.buffers);
5472+
5473+
for (auto&& buffer : other.buffers) {
5474+
this->buffers.emplace_back(std::move(buffer));
5475+
}
5476+
54725477
_current_buffer_size = other._current_buffer_size;
54735478
_back = other._back;
54745479
}
54755480

5476-
~CSVFieldList() {
5477-
for (auto& buffer : buffers)
5478-
delete[] buffer;
5479-
}
5480-
54815481
template <class... Args>
54825482
void emplace_back(Args&&... args) {
54835483
if (this->_current_buffer_size == this->_single_buffer_capacity) {
@@ -5497,7 +5497,14 @@ namespace csv {
54975497
private:
54985498
const size_t _single_buffer_capacity;
54995499

5500-
std::vector<RawCSVField*> buffers = {};
5500+
/**
5501+
* Prefer std::deque over std::vector because it does not
5502+
* reallocate upon expansion, allowing pointers to its members
5503+
* to remain valid & avoiding potential race conditions when
5504+
* CSVFieldList is accesssed simulatenously by a reading thread and
5505+
* a writing thread
5506+
*/
5507+
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};
55015508

55025509
/** Number of items in the current buffer */
55035510
size_t _current_buffer_size = 0;
@@ -5509,7 +5516,6 @@ namespace csv {
55095516
void allocate();
55105517
};
55115518

5512-
55135519
/** A class for storing raw CSV data and associated metadata */
55145520
struct RawCSVData {
55155521
std::shared_ptr<void> _data = nullptr;
@@ -7708,10 +7714,10 @@ namespace csv {
77087714
}
77097715

77107716
CSV_INLINE void CSVFieldList::allocate() {
7711-
RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
7712-
buffers.push_back(buffer);
7717+
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));
7718+
77137719
_current_buffer_size = 0;
7714-
_back = &(buffers.back()[0]);
7720+
_back = buffers.back().get();
77157721
}
77167722
}
77177723

single_include_test/csv.hpp

+18-12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22
/*
3-
CSV for C++, version 2.2.3
3+
CSV for C++, version 2.3.0
44
https://github.com/vincentlaucsb/csv-parser
55
66
MIT License
@@ -5051,6 +5051,7 @@ namespace csv {
50515051
*/
50525052

50535053
#include <cmath>
5054+
#include <deque>
50545055
#include <iterator>
50555056
#include <memory> // For CSVField
50565057
#include <limits> // For CSVField
@@ -5468,16 +5469,15 @@ namespace csv {
54685469
// CSVFieldArrays may be moved
54695470
CSVFieldList(CSVFieldList&& other) :
54705471
_single_buffer_capacity(other._single_buffer_capacity) {
5471-
buffers = std::move(other.buffers);
5472+
5473+
for (auto&& buffer : other.buffers) {
5474+
this->buffers.emplace_back(std::move(buffer));
5475+
}
5476+
54725477
_current_buffer_size = other._current_buffer_size;
54735478
_back = other._back;
54745479
}
54755480

5476-
~CSVFieldList() {
5477-
for (auto& buffer : buffers)
5478-
delete[] buffer;
5479-
}
5480-
54815481
template <class... Args>
54825482
void emplace_back(Args&&... args) {
54835483
if (this->_current_buffer_size == this->_single_buffer_capacity) {
@@ -5497,7 +5497,14 @@ namespace csv {
54975497
private:
54985498
const size_t _single_buffer_capacity;
54995499

5500-
std::vector<RawCSVField*> buffers = {};
5500+
/**
5501+
* Prefer std::deque over std::vector because it does not
5502+
* reallocate upon expansion, allowing pointers to its members
5503+
* to remain valid & avoiding potential race conditions when
5504+
* CSVFieldList is accesssed simulatenously by a reading thread and
5505+
* a writing thread
5506+
*/
5507+
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};
55015508

55025509
/** Number of items in the current buffer */
55035510
size_t _current_buffer_size = 0;
@@ -5509,7 +5516,6 @@ namespace csv {
55095516
void allocate();
55105517
};
55115518

5512-
55135519
/** A class for storing raw CSV data and associated metadata */
55145520
struct RawCSVData {
55155521
std::shared_ptr<void> _data = nullptr;
@@ -7708,10 +7714,10 @@ namespace csv {
77087714
}
77097715

77107716
CSV_INLINE void CSVFieldList::allocate() {
7711-
RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
7712-
buffers.push_back(buffer);
7717+
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));
7718+
77137719
_current_buffer_size = 0;
7714-
_back = &(buffers.back()[0]);
7720+
_back = buffers.back().get();
77157721
}
77167722
}
77177723

0 commit comments

Comments
 (0)