Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update nancheck to ensure exit using openmp #239

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Examples/BinaryBH/BinaryBHLevel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ void BinaryBHLevel::specificAdvance()

// Check for nan's
if (m_p.nan_check)
BoxLoops::loop(NanCheck("NaNCheck in specific Advance: "), m_state_new,
m_state_new, EXCLUDE_GHOST_CELLS, disable_simd());
BoxLoops::loop(NanCheck(m_dx, "NaNCheck in specific Advance: "),
m_state_new, m_state_new, EXCLUDE_GHOST_CELLS,
disable_simd());
}

// This initial data uses an approximation for the metric which
Expand Down
7 changes: 7 additions & 0 deletions Examples/ScalarField/InitialScalarData.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ class InitialScalarData
Coordinates<data_t> coords(current_cell, m_dx, m_params.center);
data_t rr = coords.get_radius();
data_t rr2 = rr * rr;
IntVect iv = current_cell.get_int_vect();

if (iv[0] == 4 && iv[1] == 4 && iv[2] == 4)
{
data_t K = NAN;
current_cell.store_vars(K, c_K);
}

// calculate the field value
data_t phi = m_params.amplitude *
Expand Down
24 changes: 18 additions & 6 deletions Examples/ScalarField/ScalarFieldLevel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ void ScalarFieldLevel::specificAdvance()
make_compute_pack(TraceARemoval(),
PositiveChiAndAlpha(m_p.min_chi, m_p.min_lapse)),
m_state_new, m_state_new, INCLUDE_GHOST_CELLS);

// Check for nan's
if (m_p.nan_check)
BoxLoops::loop(NanCheck(), m_state_new, m_state_new,
EXCLUDE_GHOST_CELLS, disable_simd());
}

// Initial data for field and metric variables
Expand All @@ -56,11 +51,18 @@ void ScalarFieldLevel::initialData()
BoxLoops::loop(
make_compute_pack(SetValue(0.), KerrBH(m_p.kerr_params, m_dx),
InitialScalarData(m_p.initial_params, m_dx)),
m_state_new, m_state_new, INCLUDE_GHOST_CELLS);
m_state_new, m_state_new, INCLUDE_GHOST_CELLS, disable_simd());

fillAllGhosts();
BoxLoops::loop(GammaCalculator(m_dx), m_state_new, m_state_new,
EXCLUDE_GHOST_CELLS);

// Check for nan's
/*if (m_p.nan_check)
BoxLoops::loop(NanCheck(), m_state_new, m_state_new,
EXCLUDE_GHOST_CELLS, disable_simd());*/

MayDay::Error("Nan check didn't work.");
}

#ifdef CH_USE_HDF5
Expand Down Expand Up @@ -130,3 +132,13 @@ void ScalarFieldLevel::computeTaggingCriterion(
FixedGridsTaggingCriterion(m_dx, m_level, 2.0 * m_p.L, m_p.center),
current_state, tagging_criterion);
}

void ScalarFieldLevel::specificPostTimeStep()
{
CH_TIME("ScalarFieldLevel::specificPostTimeStep");

// Check for nan's
if (m_p.nan_check)
BoxLoops::loop(NanCheck(), m_state_new, m_state_new,
EXCLUDE_GHOST_CELLS, disable_simd());
}
2 changes: 2 additions & 0 deletions Examples/ScalarField/ScalarFieldLevel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class ScalarFieldLevel : public GRAMRLevel
virtual void computeTaggingCriterion(
FArrayBox &tagging_criterion, const FArrayBox &current_state,
const FArrayBox &current_state_diagnostics) override;

virtual void specificPostTimeStep();
};

#endif /* SCALARFIELDLEVEL_HPP_ */
4 changes: 2 additions & 2 deletions Examples/ScalarField/params.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
verbosity = 0

# location / naming of output files
# output_path = "" # Main path for all files. Must exist!
output_path = "/nfs/st01/hpc-gr-epss/eaf49/PR-dump" # Main path for all files. Must exist!
chk_prefix = ScalarField_
plot_prefix = ScalarFieldp_
# restart_file = ScalarField_000000.3d.hdf5
Expand Down Expand Up @@ -131,7 +131,7 @@ extrapolating_vars = phi Pi
# dt will be dx*dt_multiplier on each grid level
dt_multiplier = 0.25
stop_time = 100.0
# max_steps = 4
max_steps = 4

# Spatial derivative order (only affects CCZ4 RHS)
max_spatial_derivative_order = 4 # can be 4 or 6
Expand Down
50 changes: 44 additions & 6 deletions Source/BoxUtils/NanCheck.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,80 @@ class NanCheck
protected:
const std::string m_error_info = "NanCheck";
const double m_max_abs = 1e20;
const double m_dx = 0.0;

public:
NanCheck() {}

/// This constructor takes a string which will be displayed when nans happen
NanCheck(const std::string a_error_info) : m_error_info(a_error_info) {}

NanCheck(const std::string a_error_info, const double a_max_abs)
: m_error_info(a_error_info), m_max_abs(a_max_abs)
/// This allows us to output the physical coords if the dx value is passed
NanCheck(const double a_dx) : m_dx(a_dx) {}

/// This constructor takes a string which will be displayed when nans happen
/// as well as the grid spacing
NanCheck(const double a_dx, const std::string a_error_info)
: m_error_info(a_error_info), m_dx(a_dx)
{
}

// This constructor takes all arguments, note ordering to reduce potential
// to confuse dx and max_abs
NanCheck(const double a_dx, const std::string a_error_info,
const double a_max_abs)
: m_error_info(a_error_info), m_max_abs(a_max_abs), m_dx(a_dx)
{
}

void compute(Cell<double> current_cell) const
{
bool stop = false;
// stop is shared between all threads
bool stop;
int thrd = 0;
// guard assignment to prevent a race
#pragma omp atomic write
stop = false;

int num_vars = current_cell.get_num_in_vars();
for (int ivar = 0; ivar < num_vars; ++ivar)
{
double val;
current_cell.load_vars(val, ivar);
if (std::isnan(val) || abs(val) > m_max_abs)
// we want to exit if any of the threads find a nan
#pragma omp atomic write
stop = true;
thrd = omp_get_thread_num();
}

if (stop)
{
#pragma omp single
{
// This needs to be the master thread, otherwise some schedulers have trouble
// exiting
#pragma omp master
pout() << m_error_info
<< "::Values have become nan. The current state is: \n";
for (int ivar = 0; ivar < num_vars; ++ivar)
{
pout() << UserVariables::variable_names[ivar] << ": "
<< current_cell.load_vars(ivar) << std::endl;
}
pout() << "Integer coordinates: " << current_cell.get_int_vect()
<< std::endl;
pout() << "Nan was caught on thread: " << thrd << "\n";
pout() << "Error message printed on thread: "
<< omp_get_thread_num() << "\n";
IntVect iv = current_cell.get_int_vect();
pout() << "Integer coordinates: " << iv << std::endl;
if (m_dx != 0.0)
{
pout() << "with m_dx: " << m_dx << std::endl;
RealVect position(iv + 0.5 * RealVect::Unit);
position *= m_dx;
pout() << "Physical coords relative to bottom left corner "
"of domain: "
<< position << std::endl;
}
}
MayDay::Error("Values have become nan.");
}
Expand Down
Loading