Skip to content

Commit a568f9f

Browse files
committed
Add a (claude generated) logger to decrowd outputs / save output and a few other fixes
Used Claude to help generate a logger that would log any of our material model outputs to their own files per mpi rank, output any non-0 rank MFEM logging info to its own MPI rank file, and also duplicate all of the terminal output to its own logging file for future references. Outside of that had a few bug fixes captured in this that I noticed when running some tougher material models and hitting some interesting edge cases...
1 parent 97f59cd commit a568f9f

16 files changed

Lines changed: 1517 additions & 36 deletions

src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(EXACONSTIT_HEADERS
2828
utilities/assembly_ops.hpp
2929
utilities/rotations.hpp
3030
utilities/strain_measures.hpp
31+
utilities/unified_logger.hpp
3132
./TOML_Reader/toml.hpp
3233
)
3334

@@ -59,6 +60,7 @@ set(EXACONSTIT_SOURCES
5960
solvers/mechanics_solver.cpp
6061
utilities/dynamic_umat_loader.cpp
6162
utilities/mechanics_kernels.cpp
63+
utilities/unified_logger.cpp
6264
./umat_tests/userumat.cxx
6365
)
6466

src/fem_operators/mechanics_operator.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "models/mechanics_multi_model.hpp"
44
#include "utilities/mechanics_kernels.hpp"
55
#include "utilities/mechanics_log.hpp"
6+
#include "utilities/unified_logger.hpp"
67

78
#include "mfem/general/forall.hpp"
89
#include "RAJA/RAJA.hpp"
@@ -175,7 +176,7 @@ void NonlinearMechOperator::Setup(const mfem::Vector &k) const
175176
}
176177
catch(const std::exception &exc) {
177178
// catch anything thrown within try block that derives from std::exception
178-
MFEM_WARNING(exc.what());
179+
MFEM_WARNING_0(exc.what());
179180
succeed = false;
180181
}
181182
catch(...) {

src/mechanics_driver.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,10 @@
7878
#include "mfem_expt/partial_qfunc.hpp"
7979
#include "options/option_parser_v2.hpp"
8080
#include "postprocessing/postprocessing_driver.hpp"
81+
#include "postprocessing/postprocessing_file_manager.hpp"
8182
#include "sim_state/simulation_state.hpp"
8283
#include "utilities/mechanics_log.hpp"
84+
#include "utilities/unified_logger.hpp"
8385

8486
#include "mfem.hpp"
8587
#include "mfem/general/forall.hpp"
@@ -129,10 +131,6 @@ int main(int argc, char *argv[])
129131
* - Enable detailed timing data for strong/weak scaling studies
130132
*/
131133
double start = MPI_Wtime();
132-
// Print MFEM version information for reproducibility and debugging
133-
if (myid == 0) {
134-
printf("MFEM Version: %d \n", mfem::GetVersion());
135-
}
136134
/**
137135
* **PHASE 2: COMMAND LINE PROCESSING AND CONFIGURATION**
138136
*/
@@ -157,6 +155,11 @@ int main(int argc, char *argv[])
157155
return 1;
158156
}
159157

158+
// Print MFEM version information for reproducibility and debugging
159+
if (myid == 0) {
160+
printf("MFEM Version: %d \n", mfem::GetVersion());
161+
}
162+
160163
/*
161164
* Configuration File Parsing:
162165
* - Load complete simulation configuration from TOML file
@@ -165,6 +168,10 @@ int main(int argc, char *argv[])
165168
*/
166169
ExaOptions toml_opt;
167170
toml_opt.parse_options(toml_file, myid);
171+
172+
exaconstit::UnifiedLogger& logger = exaconstit::UnifiedLogger::getInstance();
173+
logger.initialize(toml_opt);
174+
168175
toml_opt.print_options();
169176

170177
/**
@@ -312,7 +319,6 @@ int main(int argc, char *argv[])
312319
* - Update time-dependent material properties and boundary conditions
313320
* - Prepare solver state for current time increment
314321
*/
315-
const double sim_time = sim_state->getTime();
316322

317323
/*
318324
* Boundary Condition Change Detection:
@@ -347,7 +353,7 @@ int main(int argc, char *argv[])
347353
*/
348354
sim_state->finishCycle();
349355
oper.UpdateModel();
350-
post_process.Update(ti, sim_time);
356+
post_process.Update(ti, sim_state->getTrueCyleTime());
351357
} // end loop over time steps
352358

353359
/**
@@ -371,7 +377,7 @@ int main(int argc, char *argv[])
371377
if (myid == 0) {
372378
printf("The process took %lf seconds to run\n", (avg_sim_time / world_size));
373379
}
374-
380+
logger.shutdown();
375381
} // End of main simulation scope for proper resource cleanup
376382

377383
/*

src/models/mechanics_ecmech.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "models/mechanics_model.hpp"
33
#include "utilities/mechanics_log.hpp"
44
#include "utilities/mechanics_kernels.hpp"
5+
#include "utilities/unified_logger.hpp"
56

67
#include "mfem.hpp"
78
#include "mfem/general/forall.hpp"
@@ -424,6 +425,11 @@ void ExaCMechModel::ModelSetup(const int nqpts, const int nelems, const int /*sp
424425
const int nnodes, const mfem::Vector &jacobian,
425426
const mfem::Vector &loc_grad, const mfem::Vector &vel)
426427
{
428+
429+
auto& logger = exaconstit::UnifiedLogger::getInstance();
430+
std::string material_log = logger.getMaterialLogFilename("exacmech", m_region);
431+
exaconstit::UnifiedLogger::ScopedCapture capture(material_log);
432+
427433
const int nstatev = numStateVars;
428434

429435
const double *jacobian_array = jacobian.Read();

src/models/mechanics_multi_model.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "mfem_expt/partial_qfunc.hpp"
66
#include "utilities/mechanics_log.hpp"
77
#include "utilities/dynamic_umat_loader.hpp"
8+
#include "utilities/unified_logger.hpp"
89

910
#include <stdexcept>
1011
#include <filesystem>
@@ -235,11 +236,11 @@ bool MultiExaModel::SetupChildModel(int region_idx, const int nqpts, const int n
235236
return true;
236237
}
237238
catch (const std::exception& e) {
238-
MFEM_WARNING("Region " + std::to_string(actual_region_id) + " failed: " + e.what());
239+
MFEM_WARNING_0("[Cycle " << std::to_string(m_sim_state->getSimulationCycle() + 1) << " ]Region " + std::to_string(actual_region_id) + " failed: " + e.what());
239240
return false;
240241
}
241242
catch (...) {
242-
MFEM_WARNING("Region " + std::to_string(actual_region_id) + " failed with unknown error");
243+
MFEM_WARNING_0("Region " + std::to_string(actual_region_id) + " failed with unknown error");
243244
return false;
244245
}
245246
}

src/models/mechanics_umat.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "boundary_conditions/BCManager.hpp"
33
#include "utilities/assembly_ops.hpp"
44
#include "utilities/strain_measures.hpp"
5+
#include "utilities/unified_logger.hpp"
56

67
#include "RAJA/RAJA.hpp"
78
#include "mfem/fem/qfunction.hpp"
@@ -350,6 +351,9 @@ void AbaqusUmatModel::ModelSetup(const int nqpts, const int nelems, const int sp
350351
const int /*nnodes*/, const mfem::Vector &jacobian,
351352
const mfem::Vector & /*loc_grad*/, const mfem::Vector &/*vel*/)
352353
{
354+
auto& logger = exaconstit::UnifiedLogger::getInstance();
355+
std::string material_log = logger.getMaterialLogFilename("umat", m_region);
356+
exaconstit::UnifiedLogger::ScopedCapture capture(material_log);
353357
// Load UMAT library if using on-demand loading
354358
if (use_dynamic_loading_ && load_strategy_ == DynamicUmatLoader::LoadStrategy::LOAD_ON_SETUP) {
355359
if (!LoadUmatLibrary()) {

src/postprocessing/projection_class.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "projection_class.hpp"
22

33
#include "utilities/rotations.hpp"
4+
#include "utilities/unified_logger.hpp"
45

56
#include "ECMech_const.h"
67
#include "SNLS_linalg.h"
@@ -187,11 +188,11 @@ StateVariableProjection::Execute(std::shared_ptr<SimulationState> sim_state,
187188
m_component_length = (m_component_length == -1) ? vdim : m_component_length;
188189

189190
if ((m_component_length + m_component_index) > vdim) {
190-
MFEM_ABORT("StateVariableProjection provided a length and index that pushes us past the state variable length");
191+
MFEM_ABORT_0("StateVariableProjection provided a length and index that pushes us past the state variable length");
191192
};
192193

193194
if (m_component_length > state_gf->VectorDim()) {
194-
MFEM_ABORT("StateVariableProjection provided length is greater than the gridfunction vector length");
195+
MFEM_ABORT_0("StateVariableProjection provided length is greater than the gridfunction vector length");
195196
};
196197

197198
const auto l2g = qpts2mesh.Read();
@@ -265,11 +266,11 @@ ElasticStrainProjection::Execute(std::shared_ptr<SimulationState> sim_state,
265266
m_component_length = (m_component_length == -1) ? vdim : m_component_length;
266267

267268
if ((m_component_length + m_component_index) > vdim) {
268-
MFEM_ABORT("ElasticStrainProjection provided a length and index that pushes us past the state variable length");
269+
MFEM_ABORT_0("ElasticStrainProjection provided a length and index that pushes us past the state variable length");
269270
};
270271

271272
if (m_component_length > elastic_strain_gf->VectorDim()) {
272-
MFEM_ABORT("ElasticStrainProjection provided length is greater than the gridfunction vector length");
273+
MFEM_ABORT_0("ElasticStrainProjection provided length is greater than the gridfunction vector length");
273274
};
274275

275276
const int estrain_ind = sim_state->GetQuadratureFunctionStatePair("elastic_strain", region).first;

src/postprocessing/projection_class.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,9 @@ class ElasticStrainProjection final : public StateVariableProjection {
710710
int component_index,
711711
int component_length,
712712
const std::string& display_name)
713-
: StateVariableProjection(state_var_name, component_index, component_length, display_name, ptmc::EXACMECH_ONLY) {}
713+
: StateVariableProjection(state_var_name, component_index, component_length, display_name, ptmc::EXACMECH_ONLY) {
714+
m_component_length = 6;
715+
}
714716
/**
715717
* @brief Execute elastic strain projection with coordinate transformation
716718
*

src/sim_state/simulation_state.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,12 @@ TimeManagement::updateDeltaTime(const int nr_steps, const bool success) {
288288
updateTime();
289289
num_failures++;
290290
num_sub_steps = 1;
291+
if (internal_tracker == TimeStep::FINAL) {
292+
const double tf_dt = std::abs(time - time_final);
293+
if (tf_dt > std::abs(1e-3 * dt)) {
294+
internal_tracker = TimeStep::RETRIAL;
295+
}
296+
}
291297
// If we've failed too many times just give up at this point
292298
if (num_failures > max_failures) {
293299
return TimeStep::FAILED;
@@ -298,6 +304,8 @@ TimeManagement::updateDeltaTime(const int nr_steps, const bool success) {
298304
}
299305
}
300306

307+
old_time = time;
308+
301309
if (internal_tracker == TimeStep::FINAL) {
302310
internal_tracker = TimeStep::FINISHED;
303311
return TimeStep::FINISHED;

src/sim_state/simulation_state.hpp

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ class TimeManagement {
5555
private:
5656
/** @brief Current simulation time */
5757
double time = 0.0;
58+
59+
/** @brief Old simulation time */
60+
double old_time = 0.0;
5861

5962
/** @brief Final simulation time (target end time) */
6063
double time_final = 0.0;
@@ -140,6 +143,13 @@ class TimeManagement {
140143
* @return Current time value
141144
*/
142145
double getTime() const { return time; }
146+
147+
/**
148+
* @brief Get actual simulation time if auto-time stepping used
149+
*
150+
* @return Actual time step value for a step
151+
*/
152+
double getTrueCyleTime() const { return old_time; }
143153

144154
/**
145155
* @brief Get current time step size
@@ -248,6 +258,22 @@ class TimeManagement {
248258
dt = dt_restart;
249259
}
250260

261+
/**
262+
* @brief Print retrial diagnostic information
263+
*
264+
* @details Outputs detailed information about cycle time step info including:
265+
* - Original time step size before we reduced things down
266+
* - Current time
267+
* - Current cycle
268+
* - Current time step size
269+
*
270+
* Used for debugging convergence issues and understanding when/why
271+
* retrying a time step is required.
272+
*/
273+
void printRetrialStats() const {
274+
std::cout << "[Cycle: "<< (simulation_cycle + 1) << " , time: " << time << "] Previous attempts to converge failed step: dt old was " << dt_orig << " new dt is " << dt << std::endl;
275+
}
276+
251277
/**
252278
* @brief Print sub-stepping diagnostic information
253279
*
@@ -260,7 +286,7 @@ class TimeManagement {
260286
* sub-stepping is being triggered.
261287
*/
262288
void printSubStepStats() const {
263-
std::cout << "Previous attempts to converge failed but now starting sub-stepping of our desired time step: desired dt old was " << dt_orig << " sub-stepping dt is " << dt << " and number of sub-steps required is " << required_num_sub_steps << std::endl;
289+
std::cout << "[Cycle: "<< (simulation_cycle + 1) << " , time: " << time << "] Previous attempts to converge failed but now starting sub-stepping of our desired time step: desired dt old was " << dt_orig << " sub-stepping dt is " << dt << " and number of sub-steps required is " << required_num_sub_steps << std::endl;
264290
}
265291

266292
/**
@@ -935,6 +961,8 @@ class SimulationState
935961
return GetRegionRootRank(region_id) == my_id;
936962
}
937963

964+
size_t GetMPIID() const { return my_id; }
965+
938966
// =========================================================================
939967
// SOLUTION FIELD ACCESS
940968
// =========================================================================
@@ -978,6 +1006,13 @@ class SimulationState
9781006
*/
9791007
double getTime() const { return m_time_manager.getTime(); }
9801008

1009+
/**
1010+
* @brief Get actual simulation time for a given cycle as auto-time step might have changed things
1011+
*
1012+
* @return Current time value from TimeManagement
1013+
*/
1014+
double getTrueCyleTime() const { return m_time_manager.getTrueCyleTime(); }
1015+
9811016
/**
9821017
* @brief Get current time step size
9831018
*
@@ -1027,6 +1062,15 @@ class SimulationState
10271062
*/
10281063
void printTimeStats() const { m_time_manager.printTimeStats(); }
10291064

1065+
/**
1066+
* @brief Print retrial time step statistics
1067+
*
1068+
* @details Outputs current time and time step information for monitoring
1069+
* adaptive time step behavior. Delegates to TimeManagement.
1070+
*/
1071+
void printRetrialTimeStats() const { m_time_manager.printRetrialStats(); }
1072+
1073+
10301074
private:
10311075
/** @brief Create MPI communicators for each region containing only ranks with that region
10321076
* @details This prevents deadlocks in collective operations when some ranks have no

0 commit comments

Comments
 (0)