Skip to content

Commit 670e16f

Browse files
authored
Merge pull request #2399 from opensim-org/speedUpDelimFileAdapter
Speed up DelimFileAdapter
2 parents 70de9e7 + 7a1ff8a commit 670e16f

7 files changed

+79
-50
lines changed

OpenSim/Auxiliary/auxiliaryTestFunctions.h

-21
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include <OpenSim/Common/Function.h>
2828
#include <OpenSim/Common/LinearFunction.h>
2929
#include <OpenSim/Common/PropertyObjArray.h>
30-
#include "OpenSim/Common/STOFileAdapter.h"
3130
#include "getRSS.h"
3231

3332
#include <fstream>
@@ -333,26 +332,6 @@ inline bool revertToVersionNumber1(const std::string& filenameOld,
333332
return changedVersion;
334333
}
335334

336-
// Add number of rows (nRows) and number of columns (nColumns) to the header of
337-
// the STO file. Note that nColumns will include time, so it will be number of
338-
// columns in the matrix plus 1 (for time).
339-
inline void addNumRowsNumColumns(const std::string& filenameOld,
340-
const std::string& filenameNew) {
341-
auto table = OpenSim::STOFileAdapter_<double>::read(filenameOld);
342-
std::regex endheader{ R"( *endheader *)" };
343-
std::ifstream fileOld{ filenameOld };
344-
std::ofstream fileNew{ filenameNew };
345-
std::string line{};
346-
while (std::getline(fileOld, line)) {
347-
if (std::regex_match(line, endheader))
348-
fileNew << "nRows=" << table.getNumRows() << "\n"
349-
<< "nColumns=" << table.getNumColumns() + 1 << "\n"
350-
<< "endheader\n";
351-
else
352-
fileNew << line << "\n";
353-
}
354-
}
355-
356335
// Estimate the memory usage of a *creator* that heap allocates an object
357336
// of type C and returns a pointer to it. Creator can also perform any
358337
// initialization before returning the pointer.

OpenSim/Common/C3DFileAdapter.cpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -136,23 +136,24 @@ C3DFileAdapter::extendRead(const std::string& fileName) const {
136136
}
137137

138138
// Create the data
139-
auto& marker_table = *new
140-
TimeSeriesTableVec3(marker_times, marker_matrix, marker_labels);
139+
auto marker_table =
140+
std::make_shared<TimeSeriesTableVec3>(marker_times,
141+
marker_matrix,
142+
marker_labels);
141143

142-
marker_table.
144+
marker_table->
143145
updTableMetaData().
144146
setValueForKey("DataRate",
145147
std::to_string(acquisition->GetPointFrequency()));
146148

147-
marker_table.
149+
marker_table->
148150
updTableMetaData().
149151
setValueForKey("Units",
150152
acquisition->GetPointUnit());
151153

152-
marker_table.updTableMetaData().setValueForKey("events", event_table);
154+
marker_table->updTableMetaData().setValueForKey("events", event_table);
153155

154-
tables.emplace(_markers,
155-
std::shared_ptr<TimeSeriesTableVec3>(&marker_table));
156+
tables.emplace(_markers, marker_table);
156157
}
157158

158159
// This is probably the right way to get the raw forces data from force

OpenSim/Common/DelimFileAdapter.h

+44-22
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,13 @@ DelimFileAdapter<T>::extendRead(const std::string& fileName) const {
319319
FileIsEmpty,
320320
fileName);
321321

322-
auto table = std::make_shared<TimeSeriesTable_<T>>();
323-
324322
size_t line_num{};
325323
// All the lines until "endheader" is header.
326324
std::regex endheader{R"([ \t]*)" + _endHeaderString + R"([ \t]*)"};
327325
std::regex keyvalue{R"((.*)=(.*))"};
328326
std::string header{};
329327
std::string line{};
328+
ValueArrayDictionary keyValuePairs;
330329
while(std::getline(in_stream, line)) {
331330
++line_num;
332331

@@ -361,7 +360,7 @@ DelimFileAdapter<T>::extendRead(const std::string& fileName) const {
361360
// Discard OpenSim version number. Version number is added
362361
// during writing.
363362
} else {
364-
table->updTableMetaData().setValueForKey(key, value);
363+
keyValuePairs.setValueForKey(key, value);
365364
}
366365
continue;
367366
}
@@ -372,7 +371,7 @@ DelimFileAdapter<T>::extendRead(const std::string& fileName) const {
372371
else
373372
header += "\n" + line;
374373
}
375-
table->updTableMetaData().setValueForKey("header", header);
374+
keyValuePairs.setValueForKey("header", header);
376375

377376
// Callable to get the next line in form of vector of tokens.
378377
auto nextLine = [&] {
@@ -400,39 +399,62 @@ DelimFileAdapter<T>::extendRead(const std::string& fileName) const {
400399
_timeColumnLabel,
401400
column_labels[0]);
402401
column_labels.erase(column_labels.begin());
403-
// Set the column labels as metadata.
404-
ValueArray<std::string> value_array{};
405-
for(const auto& cl : column_labels)
406-
value_array.upd().push_back(SimTK::Value<std::string>{cl});
407-
typename TimeSeriesTable_<T>::DependentsMetaData dep_metadata{};
408-
dep_metadata.setValueArrayForKey("labels", value_array);
409-
table->setDependentsMetaData(dep_metadata);
410402

411403
// Read the rows one at a time and fill up the time column container and
412-
// the data container.
404+
// the data container. Start with a reasonable initial capacity for
405+
// tradeoff between a small file and larger files. 100 worked well for
406+
// a 50 MB file with ~80000 lines.
407+
std::vector<double> timeVec;
408+
int initCapacity = 100;
409+
int ncol = static_cast<int>(column_labels.size());
410+
timeVec.reserve(initCapacity);
411+
SimTK::Matrix_<T> matrix(initCapacity, ncol);
412+
413+
// Initialize current row and capacity
414+
int curCapacity = initCapacity;
415+
int curRow = 0;
416+
417+
// Start looping through each line
413418
auto row = nextLine();
414-
while(!row.empty()) {
419+
while (!row.empty()) {
415420
++line_num;
421+
422+
// Double capacity if we reach the end of the containers.
423+
// This is necessary until Simbody issue #401 is addressed.
424+
if (curRow+1 > curCapacity) {
425+
curCapacity *= 2;
426+
timeVec.reserve(curCapacity);
427+
matrix.resizeKeep(curCapacity, ncol);
428+
}
416429

417430
// Time is column 0.
418-
double time = std::stod(row.front());
431+
timeVec.push_back(std::stod(row.front()));
419432
row.erase(row.begin());
420433

421434
auto row_vector = readElems(row);
422435

423436
OPENSIM_THROW_IF(row_vector.size() != column_labels.size(),
424-
RowLengthMismatch,
425-
fileName,
426-
line_num,
427-
column_labels.size(),
428-
static_cast<size_t>(row_vector.size()));
429-
430-
// Column 1 is time.
431-
table->appendRow(time, std::move(row_vector));
437+
RowLengthMismatch,
438+
fileName,
439+
line_num,
440+
column_labels.size(),
441+
static_cast<size_t>(row_vector.size()));
442+
443+
matrix.updRow(curRow) = std::move(row_vector);
432444

433445
row = nextLine();
446+
++curRow;
434447
}
435448

449+
// Resize the matrix down to the correct number of rows.
450+
// This is necessary until Simbody issue #401 is addressed.
451+
matrix.resizeKeep(curRow, ncol);
452+
453+
// Create the table and update other metadata from above
454+
auto table =
455+
std::make_shared<TimeSeriesTable_<T>>(timeVec, matrix, column_labels);
456+
table->updTableMetaData() = keyValuePairs;
457+
436458
OutputTables output_tables{};
437459
output_tables.emplace(tableString(), table);
438460

OpenSim/Common/MarkerData.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "SimmMacros.h"
3434
#include "Storage.h"
3535
#include "OpenSim/Auxiliary/auxiliaryTestFunctions.h"
36+
#include "OpenSim/Common/STOFileAdapter.h"
3637

3738
//=============================================================================
3839
// STATICS
@@ -41,6 +42,29 @@ using namespace std;
4142
using namespace OpenSim;
4243
using SimTK::Vec3;
4344

45+
//=============================================================================
46+
// HELPER FUNCTIONS
47+
//=============================================================================
48+
// Add number of rows (nRows) and number of columns (nColumns) to the header of
49+
// the STO file. Note that nColumns will include time, so it will be number of
50+
// columns in the matrix plus 1 (for time).
51+
inline void addNumRowsNumColumns(const std::string& filenameOld,
52+
const std::string& filenameNew) {
53+
auto table = OpenSim::STOFileAdapter_<double>::read(filenameOld);
54+
std::regex endheader{ R"( *endheader *)" };
55+
std::ifstream fileOld{ filenameOld };
56+
std::ofstream fileNew{ filenameNew };
57+
std::string line{};
58+
while (std::getline(fileOld, line)) {
59+
if (std::regex_match(line, endheader))
60+
fileNew << "nRows=" << table.getNumRows() << "\n"
61+
<< "nColumns=" << table.getNumColumns() + 1 << "\n"
62+
<< "endheader\n";
63+
else
64+
fileNew << line << "\n";
65+
}
66+
}
67+
4468
//=============================================================================
4569
// CONSTRUCTOR(S) AND DESTRUCTOR
4670
//=============================================================================

OpenSim/Common/Test/testC3DFileAdapter.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "OpenSim/Common/C3DFileAdapter.h"
2424
#include "OpenSim/Common/TRCFileAdapter.h"
25+
#include "OpenSim/Common/STOFileAdapter.h"
2526
#include <OpenSim/Auxiliary/auxiliaryTestFunctions.h>
2627

2728
#include <vector>

OpenSim/Common/Test/testMarkerData.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <OpenSim/Common/Storage.h>
2525
#include <OpenSim/Common/MarkerData.h>
2626
#include <OpenSim/Auxiliary/auxiliaryTestFunctions.h>
27+
#include <OpenSim/Common/STOFileAdapter.h>
2728

2829
#include <unordered_set>
2930

OpenSim/Common/Test/testStorage.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <fstream>
2424
#include <OpenSim/Common/Storage.h>
2525
#include <OpenSim/Auxiliary/auxiliaryTestFunctions.h>
26+
#include <OpenSim/Common/STOFileAdapter.h>
2627

2728
using namespace OpenSim;
2829
using namespace std;

0 commit comments

Comments
 (0)