Skip to content

Commit 27390cd

Browse files
authored
Merge branch 'main' into refactor-output-dir-config
2 parents ab53378 + a8570f0 commit 27390cd

5 files changed

Lines changed: 1324 additions & 0 deletions

File tree

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
cmake_minimum_required(VERSION 3.20)
2+
3+
project(async_gpu_launch LANGUAGES CXX)
4+
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
7+
set(CMAKE_CXX_EXTENSIONS OFF)
8+
9+
find_package(eic-opticks REQUIRED)
10+
find_package(Geant4 REQUIRED)
11+
12+
add_executable(async_gpu_launch async_gpu_launch.cpp async_gpu_launch.h)
13+
target_link_libraries(async_gpu_launch
14+
eic-opticks::gphox
15+
eic-opticks::G4CX
16+
eic-opticks::SysRap
17+
eic-opticks::U4
18+
${Geant4_LIBRARIES}
19+
)
20+
21+
install(TARGETS async_gpu_launch)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Async GPU Launch Example
2+
3+
Demonstrates asynchronous CPU+GPU optical photon simulation using
4+
double-buffered genstep collection with Geant4's `G4TaskGroup`.
5+
6+
## Architecture
7+
8+
```
9+
CPU Event Loop GPU Worker (G4TaskGroup)
10+
----------------- ------------------------
11+
Event N:
12+
EM shower -> Cerenkov/Scint
13+
Collect gensteps into buffer A
14+
Buffer A hits threshold
15+
Swap A <-> B
16+
Submit buffer A to GPU -------> Process buffer A:
17+
Event N+1: Load gensteps into SEvt
18+
Collect gensteps into buffer B gx->simulate()
19+
... Save hits
20+
Done
21+
End of run:
22+
Flush buffer B ----------------> Process buffer B
23+
Wait for completion Done
24+
```
25+
26+
The CPU never blocks waiting for the GPU (except at end-of-run flush).
27+
A single `G4Mutex` ensures only one GPU kernel runs at a time.
28+
29+
## Modes
30+
31+
| Flag | Behavior |
32+
|----------|----------|
33+
| `--async` | (default) Double-buffered async GPU processing |
34+
| `--sync` | Original end-of-run batch GPU simulation |
35+
36+
## Environment Variables
37+
38+
| Variable | Default | Description |
39+
|----------|---------|-------------|
40+
| `GPU_PHOTON_FLUSH_THRESHOLD` | 10000000 | Photons per GPU batch |
41+
| `OPTICKS_MAX_BOUNCE` | 1000 | Maximum optical photon bounces |
42+
| `OPTICKS_PROPAGATE_EPSILON` | - | Ray offset after boundary crossing |
43+
| `OPTICKS_PROPAGATE_EPSILON0` | - | Ray offset after bulk interaction |
44+
45+
## Build (standalone)
46+
47+
```bash
48+
mkdir build && cd build
49+
cmake .. -DCMAKE_PREFIX_PATH=/path/to/eic-opticks/install
50+
make
51+
```
52+
53+
## Build (in-tree)
54+
55+
The example is also built as part of the main eic-opticks build.
56+
57+
## Run
58+
59+
```bash
60+
# Async mode (default)
61+
./run.sh
62+
63+
# Sync mode
64+
./run.sh --sync
65+
66+
# Custom threshold (smaller batches = more CPU/GPU overlap)
67+
GPU_PHOTON_FLUSH_THRESHOLD=1000000 ./run.sh
68+
```
69+
70+
## Output
71+
72+
- `gpu_hits_batch_*.npy` — GPU hits per batch (async mode)
73+
- `gpu_hits.npy` — GPU hits (sync mode)
74+
- `g4_hits.npy` — Geant4 reference hits
75+
76+
All hit files use the sphoton layout: `(N, 4, 4)` float32 array with
77+
fields `pos/time`, `mom`, `pol/wavelength`, `flags`.
78+
79+
## Supported Physics
80+
81+
Both Cerenkov and scintillation gensteps are collected. Multi-component
82+
scintillation (up to 3 time constants) is supported.
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// async_gpu_launch.cpp — Async CPU+GPU optical photon simulation example
2+
//
3+
// Demonstrates double-buffered async GPU processing where the CPU event
4+
// loop continues while the GPU processes accumulated gensteps in batches.
5+
//
6+
// Usage:
7+
// async_gpu_launch -g apex.gdml -m run.mac [--async] [--sync]
8+
//
9+
// Default is --async. Use --sync for the original end-of-run GPU mode.
10+
11+
#include <string>
12+
13+
#include <argparse/argparse.hpp>
14+
15+
#include "FTFP_BERT.hh"
16+
#include "G4OpticalPhysics.hh"
17+
#include "G4VModularPhysicsList.hh"
18+
19+
#include "G4UIExecutive.hh"
20+
#include "G4UImanager.hh"
21+
#include "G4VisExecutive.hh"
22+
23+
#include "async_gpu_launch.h"
24+
#include "config.h"
25+
#include "sysrap/OPTICKS_LOG.hh"
26+
27+
#include "G4RunManager.hh"
28+
#include "G4RunManagerFactory.hh"
29+
#include "G4VUserActionInitialization.hh"
30+
31+
using namespace std;
32+
33+
struct ActionInitialization : public G4VUserActionInitialization
34+
{
35+
G4App* fG4App;
36+
37+
ActionInitialization(G4App* app) :
38+
G4VUserActionInitialization(),
39+
fG4App(app)
40+
{
41+
}
42+
43+
void BuildForMaster() const override
44+
{
45+
SetUserAction(fG4App->run_act_);
46+
}
47+
48+
void Build() const override
49+
{
50+
SetUserAction(fG4App->prim_gen_);
51+
SetUserAction(fG4App->run_act_);
52+
SetUserAction(fG4App->event_act_);
53+
SetUserAction(fG4App->tracking_);
54+
SetUserAction(fG4App->stepping_);
55+
}
56+
};
57+
58+
int main(int argc, char** argv)
59+
{
60+
OPTICKS_LOG(argc, argv);
61+
62+
argparse::ArgumentParser program("async_gpu_launch", "0.0.0");
63+
64+
string gdml_file, macro_name;
65+
bool interactive;
66+
67+
program.add_argument("-g", "--gdml")
68+
.help("path to GDML file")
69+
.default_value(string("apex.gdml"))
70+
.nargs(1)
71+
.store_into(gdml_file);
72+
73+
program.add_argument("-m", "--macro")
74+
.help("path to G4 macro")
75+
.default_value(string("run.mac"))
76+
.nargs(1)
77+
.store_into(macro_name);
78+
79+
program.add_argument("-c", "--config")
80+
.help("config file name (without .json extension)")
81+
.default_value(string(""))
82+
.nargs(1);
83+
84+
program.add_argument("-i", "--interactive").help("open interactive viewer").flag().store_into(interactive);
85+
86+
program.add_argument("-s", "--seed").help("fixed random seed").scan<'i', long>();
87+
88+
program.add_argument("--async").help("use async double-buffered GPU processing (default)").flag();
89+
90+
program.add_argument("--sync").help("use synchronous end-of-run GPU processing").flag();
91+
92+
try
93+
{
94+
program.parse_args(argc, argv);
95+
}
96+
catch (const exception& err)
97+
{
98+
cerr << err.what() << endl;
99+
cerr << program;
100+
return EXIT_FAILURE;
101+
}
102+
103+
// Seed
104+
long seed;
105+
if (program.is_used("--seed"))
106+
seed = program.get<long>("--seed");
107+
else
108+
seed = static_cast<long>(time(nullptr));
109+
CLHEP::HepRandom::setTheSeed(seed);
110+
G4cout << "Random seed: " << seed << G4endl;
111+
112+
// Mode: async by default, sync if --sync is given
113+
bool enable_async = !program.get<bool>("--sync");
114+
G4cout << "Mode: " << (enable_async ? "ASYNC" : "SYNC") << G4endl;
115+
116+
// Physics
117+
G4VModularPhysicsList* physics = new FTFP_BERT;
118+
physics->RegisterPhysics(new G4OpticalPhysics);
119+
120+
auto* run_mgr = G4RunManagerFactory::CreateRunManager();
121+
run_mgr->SetUserInitialization(physics);
122+
123+
// Application
124+
G4App* g4app = new G4App(gdml_file, enable_async);
125+
126+
ActionInitialization* actionInit = new ActionInitialization(g4app);
127+
run_mgr->SetUserInitialization(actionInit);
128+
run_mgr->SetUserInitialization(g4app->det_cons_);
129+
130+
// UI
131+
G4UIExecutive* uix = nullptr;
132+
G4VisManager* vis = nullptr;
133+
134+
if (interactive)
135+
{
136+
uix = new G4UIExecutive(argc, argv);
137+
vis = new G4VisExecutive;
138+
vis->Initialize();
139+
}
140+
141+
G4UImanager* ui = G4UImanager::GetUIpointer();
142+
ui->ApplyCommand("/control/execute " + macro_name);
143+
144+
if (interactive)
145+
uix->SessionStart();
146+
147+
delete uix;
148+
return EXIT_SUCCESS;
149+
}

0 commit comments

Comments
 (0)