Skip to content

Commit e555e31

Browse files
committed
[DF] Retrieve RDatasetSpec for RDF built from JSON
1 parent 02c5b75 commit e555e31

File tree

3 files changed

+94
-74
lines changed

3 files changed

+94
-74
lines changed

tree/dataframe/inc/ROOT/RDF/Utils.hxx

+13-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
class TTree;
3232
class TTreeReader;
3333

34-
34+
namespace ROOT::RDF::Experimental {
35+
class RDatasetSpec;
36+
}
3537
namespace ROOT {
3638
namespace RDF {
3739
using ColumnNames_t = std::vector<std::string>;
@@ -325,6 +327,16 @@ auto MakeAliasedSharedPtr(T *rawPtr)
325327
return std::shared_ptr<T>(fgRawPtrCtrlBlock, rawPtr);
326328
}
327329

330+
331+
/**
332+
* \brief Function to retrieve RDatasetSpec from JSON file provided
333+
* \param[in] jsonFile Path to the dataset specification JSON file.
334+
*
335+
* This function allows us to have access to an RDatasetSpec which needs to
336+
* be created when we use the FromSpec factory function.
337+
*/
338+
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile);
339+
328340
} // end NS RDF
329341
} // end NS Internal
330342
} // end NS ROOT

tree/dataframe/src/RDFUtils.cxx

+80
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include "TROOT.h" // IsImplicitMTEnabled, GetThreadPoolSize
2929
#include "TTree.h"
3030

31+
#include <fstream>
32+
#include <nlohmann/json.hpp> // nlohmann::json::parse
3133
#include <stdexcept>
3234
#include <string>
3335
#include <cstring>
@@ -460,6 +462,84 @@ auto RStringCache::Insert(const std::string &string) -> decltype(fStrings)::cons
460462

461463
return fStrings.insert(string).first;
462464
}
465+
466+
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile)
467+
{
468+
const nlohmann::ordered_json fullData = nlohmann::ordered_json::parse(std::ifstream(jsonFile));
469+
if (!fullData.contains("samples") || fullData["samples"].empty()) {
470+
throw std::runtime_error(
471+
R"(The input specification does not contain any samples. Please provide the samples in the specification like:
472+
{
473+
"samples": {
474+
"sampleA": {
475+
"trees": ["tree1", "tree2"],
476+
"files": ["file1.root", "file2.root"],
477+
"metadata": {"lumi": 1.0, }
478+
},
479+
"sampleB": {
480+
"trees": ["tree3", "tree4"],
481+
"files": ["file3.root", "file4.root"],
482+
"metadata": {"lumi": 0.5, }
483+
},
484+
...
485+
},
486+
})");
487+
}
488+
489+
ROOT::RDF::Experimental::RDatasetSpec spec;
490+
for (const auto &keyValue : fullData["samples"].items()) {
491+
const std::string &sampleName = keyValue.key();
492+
const auto &sample = keyValue.value();
493+
// TODO: if requested in https://github.com/root-project/root/issues/11624
494+
// allow union-like types for trees and files, see: https://github.com/nlohmann/json/discussions/3815
495+
if (!sample.contains("trees")) {
496+
throw std::runtime_error("A list of tree names must be provided for sample " + sampleName + ".");
497+
}
498+
std::vector<std::string> trees = sample["trees"];
499+
if (!sample.contains("files")) {
500+
throw std::runtime_error("A list of files must be provided for sample " + sampleName + ".");
501+
}
502+
std::vector<std::string> files = sample["files"];
503+
if (!sample.contains("metadata")) {
504+
spec.AddSample( ROOT::RDF::Experimental::RSample{sampleName, trees, files});
505+
} else {
506+
ROOT::RDF::Experimental::RMetaData m;
507+
for (const auto &metadata : sample["metadata"].items()) {
508+
const auto &val = metadata.value();
509+
if (val.is_string())
510+
m.Add(metadata.key(), val.get<std::string>());
511+
else if (val.is_number_integer())
512+
m.Add(metadata.key(), val.get<int>());
513+
else if (val.is_number_float())
514+
m.Add(metadata.key(), val.get<double>());
515+
else
516+
throw std::logic_error("The metadata keys can only be of type [string|int|double].");
517+
}
518+
spec.AddSample( ROOT::RDF::Experimental::RSample{sampleName, trees, files, m});
519+
}
520+
}
521+
if (fullData.contains("friends")) {
522+
for (const auto &friends : fullData["friends"].items()) {
523+
std::string alias = friends.key();
524+
std::vector<std::string> trees = friends.value()["trees"];
525+
std::vector<std::string> files = friends.value()["files"];
526+
if (files.size() != trees.size() && trees.size() > 1)
527+
throw std::runtime_error("Mismatch between trees and files in a friend.");
528+
spec.WithGlobalFriends(trees, files, alias);
529+
}
530+
}
531+
532+
if (fullData.contains("range")) {
533+
std::vector<int> range = fullData["range"];
534+
535+
if (range.size() == 1)
536+
spec.WithGlobalRange({range[0]});
537+
else if (range.size() == 2)
538+
spec.WithGlobalRange({range[0], range[1]});
539+
}
540+
return spec;
541+
};
542+
463543
} // end NS RDF
464544
} // end NS Internal
465545
} // end NS ROOT

tree/dataframe/src/RDataFrame.cxx

+1-73
Original file line numberDiff line numberDiff line change
@@ -1953,79 +1953,7 @@ namespace Experimental {
19531953
///~~~
19541954
ROOT::RDataFrame FromSpec(const std::string &jsonFile)
19551955
{
1956-
const nlohmann::ordered_json fullData = nlohmann::ordered_json::parse(std::ifstream(jsonFile));
1957-
if (!fullData.contains("samples") || fullData["samples"].empty()) {
1958-
throw std::runtime_error(
1959-
R"(The input specification does not contain any samples. Please provide the samples in the specification like:
1960-
{
1961-
"samples": {
1962-
"sampleA": {
1963-
"trees": ["tree1", "tree2"],
1964-
"files": ["file1.root", "file2.root"],
1965-
"metadata": {"lumi": 1.0, }
1966-
},
1967-
"sampleB": {
1968-
"trees": ["tree3", "tree4"],
1969-
"files": ["file3.root", "file4.root"],
1970-
"metadata": {"lumi": 0.5, }
1971-
},
1972-
...
1973-
},
1974-
})");
1975-
}
1976-
1977-
RDatasetSpec spec;
1978-
for (const auto &keyValue : fullData["samples"].items()) {
1979-
const std::string &sampleName = keyValue.key();
1980-
const auto &sample = keyValue.value();
1981-
// TODO: if requested in https://github.com/root-project/root/issues/11624
1982-
// allow union-like types for trees and files, see: https://github.com/nlohmann/json/discussions/3815
1983-
if (!sample.contains("trees")) {
1984-
throw std::runtime_error("A list of tree names must be provided for sample " + sampleName + ".");
1985-
}
1986-
std::vector<std::string> trees = sample["trees"];
1987-
if (!sample.contains("files")) {
1988-
throw std::runtime_error("A list of files must be provided for sample " + sampleName + ".");
1989-
}
1990-
std::vector<std::string> files = sample["files"];
1991-
if (!sample.contains("metadata")) {
1992-
spec.AddSample(RSample{sampleName, trees, files});
1993-
} else {
1994-
RMetaData m;
1995-
for (const auto &metadata : sample["metadata"].items()) {
1996-
const auto &val = metadata.value();
1997-
if (val.is_string())
1998-
m.Add(metadata.key(), val.get<std::string>());
1999-
else if (val.is_number_integer())
2000-
m.Add(metadata.key(), val.get<int>());
2001-
else if (val.is_number_float())
2002-
m.Add(metadata.key(), val.get<double>());
2003-
else
2004-
throw std::logic_error("The metadata keys can only be of type [string|int|double].");
2005-
}
2006-
spec.AddSample(RSample{sampleName, trees, files, m});
2007-
}
2008-
}
2009-
if (fullData.contains("friends")) {
2010-
for (const auto &friends : fullData["friends"].items()) {
2011-
std::string alias = friends.key();
2012-
std::vector<std::string> trees = friends.value()["trees"];
2013-
std::vector<std::string> files = friends.value()["files"];
2014-
if (files.size() != trees.size() && trees.size() > 1)
2015-
throw std::runtime_error("Mismatch between trees and files in a friend.");
2016-
spec.WithGlobalFriends(trees, files, alias);
2017-
}
2018-
}
2019-
2020-
if (fullData.contains("range")) {
2021-
std::vector<int> range = fullData["range"];
2022-
2023-
if (range.size() == 1)
2024-
spec.WithGlobalRange({range[0]});
2025-
else if (range.size() == 2)
2026-
spec.WithGlobalRange({range[0], range[1]});
2027-
}
2028-
return ROOT::RDataFrame(spec);
1956+
return ROOT::RDataFrame(ROOT::Internal::RDF::RetrieveSpecFromJson(jsonFile));
20291957
}
20301958

20311959
} // namespace Experimental

0 commit comments

Comments
 (0)