Skip to content

Commit 1fd9fc1

Browse files
committed
Merge branch 'cherry-pick-29b4cf3a' into 'release/08-01'
Merge branch 'bugfix/filemetadata-output' into 'main' See merge request belle2/software/basf2!2954
2 parents b4df8f6 + fb6aa60 commit 1fd9fc1

File tree

5 files changed

+118
-102
lines changed

5 files changed

+118
-102
lines changed

framework/modules/rootio/include/RootOutputModule.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ namespace Belle2 {
213213

214214
/** Pointer to the event meta data */
215215
StoreObjPtr<EventMetaData> m_eventMetaData;
216-
/** Pointer to the file meta data */
216+
/** Pointer to the input file meta data */
217217
StoreObjPtr<FileMetaData> m_fileMetaData{"", DataStore::c_Persistent};
218-
/** File meta data finally stored in the output file */
219-
FileMetaData m_outputFileMetaData;
218+
/** File meta data stored in the output file */
219+
FileMetaData* m_outputFileMetaData;
220220
};
221221
} // end namespace Belle2

framework/modules/rootio/src/RootOutputModule.cc

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ subsequently processed completely independent.
126126
127127
.. versionadded:: release-03-00-00
128128
)DOC", m_outputSplitSize);
129+
130+
m_outputFileMetaData = new FileMetaData;
129131
}
130132

131133

@@ -137,9 +139,7 @@ void RootOutputModule::initialize()
137139
//Let's set this to 100PB, that should last a bit longer.
138140
TTree::SetMaxTreeSize(1000 * 1000 * 100000000000LL);
139141

140-
//create a file level metadata object in the data store
141-
m_fileMetaData.registerInDataStore();
142-
//and make sure we have event meta data
142+
//make sure we have event meta data
143143
m_eventMetaData.isRequired();
144144

145145
//check outputSplitSize
@@ -283,6 +283,14 @@ void RootOutputModule::openFile()
283283
}
284284
}
285285

286+
// set the address of the FileMetaData branch for the output to a separate one from the input
287+
TBranch* fileMetaDataBranch = m_tree[DataStore::c_Persistent]->GetBranch("FileMetaData");
288+
if (fileMetaDataBranch) {
289+
fileMetaDataBranch->SetAddress(&m_outputFileMetaData);
290+
} else {
291+
m_tree[DataStore::c_Persistent]->Branch("FileMetaData", &m_outputFileMetaData, m_basketsize, m_splitLevel);
292+
}
293+
286294
dir->cd();
287295
if (m_outputSplitSize) {
288296
B2INFO(getName() << ": Opened " << (m_fileIndex > 0 ? "new " : "") << "file for writing" << LogVar("filename", out));
@@ -359,14 +367,14 @@ void RootOutputModule::event()
359367
void RootOutputModule::fillFileMetaData()
360368
{
361369
bool isMC = (m_fileMetaData) ? m_fileMetaData->isMC() : true;
362-
m_fileMetaData.create(true);
363-
if (!isMC) m_fileMetaData->declareRealData();
370+
new(m_outputFileMetaData) FileMetaData;
371+
if (!isMC) m_outputFileMetaData->declareRealData();
364372

365373
if (m_tree[DataStore::c_Event]) {
366374
//create an index for the event tree
367375
TTree* tree = m_tree[DataStore::c_Event];
368376
unsigned long numEntries = tree->GetEntries();
369-
m_fileMetaData->setNFullEvents(m_nFullEvents);
377+
m_outputFileMetaData->setNFullEvents(m_nFullEvents);
370378
if (m_buildIndex && numEntries > 0) {
371379
if (numEntries > 10000000) {
372380
//10M events correspond to about 240MB for the TTreeIndex object. for more than ~45M entries this causes crashes, broken files :(
@@ -377,31 +385,31 @@ void RootOutputModule::fillFileMetaData()
377385
}
378386
}
379387

380-
m_fileMetaData->setNEvents(numEntries);
388+
m_outputFileMetaData->setNEvents(numEntries);
381389
if (m_experimentLow > m_experimentHigh) {
382390
//starting condition so apparently no events at all
383-
m_fileMetaData->setLow(-1, -1, 0);
384-
m_fileMetaData->setHigh(-1, -1, 0);
391+
m_outputFileMetaData->setLow(-1, -1, 0);
392+
m_outputFileMetaData->setHigh(-1, -1, 0);
385393
} else {
386-
m_fileMetaData->setLow(m_experimentLow, m_runLow, m_eventLow);
387-
m_fileMetaData->setHigh(m_experimentHigh, m_runHigh, m_eventHigh);
394+
m_outputFileMetaData->setLow(m_experimentLow, m_runLow, m_eventLow);
395+
m_outputFileMetaData->setHigh(m_experimentHigh, m_runHigh, m_eventHigh);
388396
}
389397
}
390398

391399
//fill more file level metadata
392-
m_fileMetaData->setParents(m_parentLfns);
393-
RootIOUtilities::setCreationData(*m_fileMetaData);
394-
m_fileMetaData->setRandomSeed(RandomNumbers::getSeed());
395-
m_fileMetaData->setSteering(Environment::Instance().getSteering());
400+
m_outputFileMetaData->setParents(m_parentLfns);
401+
RootIOUtilities::setCreationData(*m_outputFileMetaData);
402+
m_outputFileMetaData->setRandomSeed(RandomNumbers::getSeed());
403+
m_outputFileMetaData->setSteering(Environment::Instance().getSteering());
396404
auto mcEvents = Environment::Instance().getNumberOfMCEvents();
397405
if(m_outputSplitSize and mcEvents > 0) {
398406
if(m_fileIndex == 0) B2WARNING("Number of MC Events cannot be saved when splitting output files by size, setting to 0");
399407
mcEvents = 0;
400408
}
401-
m_fileMetaData->setMcEvents(mcEvents);
402-
m_fileMetaData->setDatabaseGlobalTag(Database::Instance().getGlobalTags());
409+
m_outputFileMetaData->setMcEvents(mcEvents);
410+
m_outputFileMetaData->setDatabaseGlobalTag(Database::Instance().getGlobalTags());
403411
for (const auto& item : m_additionalDataDescription) {
404-
m_fileMetaData->setDataDescription(item.first, item.second);
412+
m_outputFileMetaData->setDataDescription(item.first, item.second);
405413
}
406414
// Set the LFN to the filename: if it's a URL to directly, otherwise make sure it's absolute
407415
std::string lfn = m_file->GetName();
@@ -412,14 +420,13 @@ void RootOutputModule::fillFileMetaData()
412420
std::string format = EnvironmentVariables::get("BELLE2_LFN_FORMATSTRING", "");
413421
if (!format.empty()) {
414422
auto format_filename = boost::python::import("B2Tools.format").attr("format_filename");
415-
lfn = boost::python::extract<std::string>(format_filename(format, m_outputFileName, m_fileMetaData->getJsonStr()));
423+
lfn = boost::python::extract<std::string>(format_filename(format, m_outputFileName, m_outputFileMetaData->getJsonStr()));
416424
}
417-
m_fileMetaData->setLfn(lfn);
425+
m_outputFileMetaData->setLfn(lfn);
418426
//register the file in the catalog
419427
if (m_updateFileCatalog) {
420-
FileCatalog::Instance().registerFile(m_file->GetName(), *m_fileMetaData);
428+
FileCatalog::Instance().registerFile(m_file->GetName(), *m_outputFileMetaData);
421429
}
422-
m_outputFileMetaData = *m_fileMetaData;
423430
}
424431

425432

@@ -431,19 +438,12 @@ void RootOutputModule::terminate()
431438
void RootOutputModule::closeFile()
432439
{
433440
if(!m_file) return;
434-
//get pointer to file level metadata
435-
std::unique_ptr<FileMetaData> old;
436-
if (m_fileMetaData) old = std::make_unique<FileMetaData>(*m_fileMetaData);
437441

438442
fillFileMetaData();
439443

440444
//fill Persistent data
441445
fillTree(DataStore::c_Persistent);
442446

443-
// restore old file meta data if it existed
444-
if (old) *m_fileMetaData = *old;
445-
old.reset();
446-
447447
//write the trees
448448
TDirectory* dir = gDirectory;
449449
m_file->cd();
@@ -465,7 +465,7 @@ void RootOutputModule::closeFile()
465465
m_file = nullptr;
466466

467467
// and now add it to the metadata service as it's fully written
468-
MetadataService::Instance().addRootOutputFile(filename, &m_outputFileMetaData);
468+
MetadataService::Instance().addRootOutputFile(filename, m_outputFileMetaData);
469469

470470
// reset some variables
471471
for (auto & entry : m_entries) {
@@ -495,7 +495,11 @@ void RootOutputModule::fillTree(DataStore::EDurability durability)
495495
entry->object->SetBit(kInvalidObject);
496496
}
497497
//FIXME: Do we need this? in theory no but it crashes in parallel processing otherwise ¯\_(ツ)_/¯
498-
tree.SetBranchAddress(entry->name.c_str(), &entry->object);
498+
if (entry->name == "FileMetaData") {
499+
tree.SetBranchAddress(entry->name.c_str(), &m_outputFileMetaData);
500+
} else {
501+
tree.SetBranchAddress(entry->name.c_str(), &entry->object);
502+
}
499503
}
500504
tree.Fill();
501505
for (auto* entry: m_entries[durability]) {

framework/tests/filemetadata.out

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
[INFO] Steering file: framework/tests/filemetadata.py
1+
[INFO] Steering file: filemetadata.py
22
[INFO] The random number seed is set to "something important"
33
[INFO] Starting event processing, random seed is set to 'something important'
4+
[INFO] Starting event processing, random seed is set to 'something important'
5+
[INFO] Added file b2filemetadata.root
46
=== FileMetaData ===
57
LFN: /logical/file/name
68
nEvents: 10

framework/tests/filemetadata.py

Lines changed: 72 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -10,77 +10,82 @@
1010

1111
from ROOT import Belle2
1212
import os
13-
import tempfile
1413
import subprocess
1514
import json
1615
import basf2
1716
from basf2.version import get_version
17+
from b2test_utils import safe_process, clean_working_directory
1818

1919
basf2.set_random_seed("something important")
2020

21-
testFile = tempfile.NamedTemporaryFile(prefix='b2filemetadata_')
22-
23-
# Generate a small test file
24-
main = basf2.Path()
25-
main.add_module('EventInfoSetter', evtNumList=[9, 1], runList=[1, 15], expList=[7, 7])
26-
main.add_module('RootOutput', outputFileName=testFile.name, updateFileCatalog=False)
27-
basf2.process(main)
28-
29-
# Check the file meta data (via DataStore)
30-
metadata = Belle2.PyStoreObj('FileMetaData', 1)
31-
32-
# print (metadata.getLfn()) #?
33-
assert 10 == metadata.getNEvents()
34-
assert 10 == metadata.getNFullEvents()
35-
36-
assert 7 == metadata.getExperimentLow()
37-
assert 1 == metadata.getRunLow()
38-
assert 1 == metadata.getEventLow()
39-
assert 7 == metadata.getExperimentHigh()
40-
assert 15 == metadata.getRunHigh()
41-
assert 1 == metadata.getEventHigh() # highest evt. no in highest run
42-
assert metadata.containsEvent(7, 1, 1)
43-
assert metadata.containsEvent(7, 1, 9)
44-
assert metadata.containsEvent(7, 15, 1)
45-
assert metadata.containsEvent(7, 1, 10) # does not exist, but would be plausible
46-
assert metadata.containsEvent(7, 3, 6) # same
47-
assert not metadata.containsEvent(7, 0, 10) # does not exist, but would be plausible
48-
assert not metadata.containsEvent(7, 16, 6) # same
49-
assert not metadata.containsEvent(0, 15, 6)
50-
51-
assert 0 == metadata.getNParents()
52-
53-
# print (metadata.getDate())
54-
# assert socket.gethostname() == metadata.getSite()
55-
# print (metadata.getUser()) #different env variables, not checked
56-
assert "something important" == metadata.getRandomSeed()
57-
58-
assert get_version() == metadata.getRelease()
59-
assert metadata.getSteering().startswith('#!/usr/bin/env python3')
60-
assert metadata.getSteering().strip().endswith('dummystring')
61-
assert 10 == metadata.getMcEvents()
62-
63-
64-
os.system('touch Belle2FileCatalog.xml')
65-
assert 0 == os.system('b2file-metadata-add --lfn /logical/file/name ' + testFile.name)
66-
67-
assert 0 == os.system('b2file-metadata-show ' + testFile.name)
68-
69-
# Check JSON output (contains steering file, so we cannotuse .out)
70-
metadata_output = subprocess.check_output(['b2file-metadata-show', '--json', testFile.name])
71-
m = json.loads(metadata_output.decode('utf-8'))
72-
assert 7 == m['experimentLow']
73-
assert 1 == m['runLow']
74-
assert 1 == m['eventLow']
75-
assert 7 == m['experimentHigh']
76-
assert 15 == m['runHigh']
77-
assert 1 == m['eventHigh']
78-
assert 'something important' == m['randomSeed']
79-
assert 10 == m['nEvents']
80-
assert isinstance(m['nEvents'], int)
81-
assert 10 == m['nFullEvents']
82-
assert isinstance(m['nFullEvents'], int)
83-
assert '/logical/file/name' == m['LFN']
84-
85-
# steering file is in metadata, so we check for existence of this string:
86-
# dummystring
21+
testFile = 'b2filemetadata.root'
22+
23+
with clean_working_directory():
24+
25+
# Generate a small test file
26+
main = basf2.Path()
27+
main.add_module('EventInfoSetter', evtNumList=[9, 1], runList=[1, 15], expList=[7, 7])
28+
main.add_module('RootOutput', outputFileName=testFile, updateFileCatalog=False)
29+
safe_process(main)
30+
31+
# Check the file meta data (via DataStore)
32+
read = basf2.Path()
33+
read.add_module('RootInput', inputFileName=testFile)
34+
basf2.process(read)
35+
36+
metadata = Belle2.PyStoreObj('FileMetaData', 1)
37+
38+
# print (metadata.getLfn()) #?
39+
assert 10 == metadata.getNEvents()
40+
assert 10 == metadata.getNFullEvents()
41+
42+
assert 7 == metadata.getExperimentLow()
43+
assert 1 == metadata.getRunLow()
44+
assert 1 == metadata.getEventLow()
45+
assert 7 == metadata.getExperimentHigh()
46+
assert 15 == metadata.getRunHigh()
47+
assert 1 == metadata.getEventHigh() # highest evt. no in highest run
48+
assert metadata.containsEvent(7, 1, 1)
49+
assert metadata.containsEvent(7, 1, 9)
50+
assert metadata.containsEvent(7, 15, 1)
51+
assert metadata.containsEvent(7, 1, 10) # does not exist, but would be plausible
52+
assert metadata.containsEvent(7, 3, 6) # same
53+
assert not metadata.containsEvent(7, 0, 10) # does not exist, but would be plausible
54+
assert not metadata.containsEvent(7, 16, 6) # same
55+
assert not metadata.containsEvent(0, 15, 6)
56+
57+
assert 0 == metadata.getNParents()
58+
59+
# print (metadata.getDate())
60+
# assert socket.gethostname() == metadata.getSite()
61+
# print (metadata.getUser()) #different env variables, not checked
62+
assert "something important" == metadata.getRandomSeed()
63+
64+
assert get_version() == metadata.getRelease()
65+
assert metadata.getSteering().startswith('#!/usr/bin/env python3')
66+
assert metadata.getSteering().strip().endswith('dummystring')
67+
assert 10 == metadata.getMcEvents()
68+
69+
os.system('touch Belle2FileCatalog.xml')
70+
assert 0 == os.system('b2file-metadata-add --lfn /logical/file/name ' + testFile)
71+
72+
assert 0 == os.system('b2file-metadata-show ' + testFile)
73+
74+
# Check JSON output (contains steering file, so we cannot use .out)
75+
metadata_output = subprocess.check_output(['b2file-metadata-show', '--json', testFile])
76+
m = json.loads(metadata_output.decode('utf-8'))
77+
assert 7 == m['experimentLow']
78+
assert 1 == m['runLow']
79+
assert 1 == m['eventLow']
80+
assert 7 == m['experimentHigh']
81+
assert 15 == m['runHigh']
82+
assert 1 == m['eventHigh']
83+
assert 'something important' == m['randomSeed']
84+
assert 10 == m['nEvents']
85+
assert isinstance(m['nEvents'], int)
86+
assert 10 == m['nFullEvents']
87+
assert isinstance(m['nFullEvents'], int)
88+
assert '/logical/file/name' == m['LFN']
89+
90+
# steering file is in metadata, so we check for existence of this string:
91+
# dummystring

framework/tests/root_output_split.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ def check_jobfile(jobfile, expected_files):
6868
assert safe_process(path) == 0, "RootOutput failed"
6969
check_jobfile("job-info1.json", {f"test_split.f0000{i}.root" for i in range(2)})
7070

71+
# check metadata
72+
meta = [get_metadata(e) for e in ["test_split.f00000.root", "test_split.f00001.root"]]
73+
assert meta[0]["parents"] == [], "There should be no parents"
74+
assert meta[1]["parents"] == [], "There should be no parents"
75+
7176
# check files and set a well known lfn
7277
for i in range(2):
7378
subprocess.check_call(["b2file-metadata-add", "-l", f"parent{i}", f"test_split.f0000{i}.root"])

0 commit comments

Comments
 (0)