-
Notifications
You must be signed in to change notification settings - Fork 28
/
state.h
479 lines (432 loc) · 16.6 KB
/
state.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
/*
* Copyright (c) 2011-2019, Triad National Security, LLC.
* All rights Reserved.
*
* CLAMR -- LA-CC-11-094
*
* Copyright 2011-2019. Triad National Security, LLC. This software was produced
* under U.S. Government contract 89233218CNA000001 for Los Alamos National
* Laboratory (LANL), which is operated by Triad National Security, LLC
* for the U.S. Department of Energy. The U.S. Government has rights to use,
* reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR
* TRIAD NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
* ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
* to produce derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Triad National Security, LLC, Los Alamos
* National Laboratory, LANL, the U.S. Government, nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE TRIAD NATIONAL SECURITY, LLC AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
* NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TRIAD NATIONAL
* SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* CLAMR -- LA-CC-11-094
* This research code is being developed as part of the
* 2011 X Division Summer Workshop for the express purpose
* of a collaborative code for development of ideas in
* the implementation of AMR codes for Exascale platforms
*
* AMR implementation of the Wave code previously developed
* as a demonstration code for regular grids on Exascale platforms
* as part of the Supercomputing Challenge and Los Alamos
* National Laboratory
*
* Authors: Bob Robey XCP-2 [email protected]
* Neal Davis [email protected], [email protected]
* David Nicholaeff [email protected], [email protected]
* Dennis Trujillo [email protected], [email protected]
*
*/
#ifndef STATE_H_
#define STATE_H_
#include <list>
#include "MallocPlus/MallocPlus.h"
#include "mesh/mesh.h"
#include "crux/crux.h"
#ifdef HAVE_OPENCL
#include "ezcl/ezcl.h"
#endif
#include "l7/l7.h"
#include <atomic>
#define STATUS_OK 0
#define STATUS_NAN 1
#define STATUS_MASS_LOSS 2
#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION) && !defined(HALF_PRECISION)
#define FULL_PRECISION
#endif
#ifdef NO_CL_DOUBLE
#undef FULL_PRECISION
#undef MIXED_PRECISION
#define MINIMUM_PRECISION
#undef HALF_PRECISION
#endif
#if defined(HALF_PRECISION)
#include "half.hpp"
using half_float::half;
using namespace half_float::literal;
typedef half state_t; // this is for physics state variables ncell in size
typedef float real_t; // this is used for intermediate calculations
typedef struct
{
float s0;
float s1;
} real2_t;
#define CONSERVATION_EPS 15.0
#ifdef HAVE_OPENCL
typedef cl_half cl_state_t; // for gpu physics state variables
typedef cl_half4 cl_state4_t; // for gpu physics state variables
typedef cl_float cl_real_t; // for intermediate gpu physics state variables
typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_SHORT // for MPI communication for physics state variables
#define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
#define L7_STATE_T L7_SHORT
#define L7_REAL_T L7_FLOAT
#endif
#elif defined(MINIMUM_PRECISION)
typedef float state_t; // this is for physics state variables ncell in size
typedef float real_t; // this is used for intermediate calculations
typedef struct
{
float s0;
float s1;
} real2_t;
#define CONSERVATION_EPS 15.0
#ifdef HAVE_OPENCL
typedef cl_float cl_state_t; // for gpu physics state variables
typedef cl_float4 cl_state4_t; // for gpu physics state variables
typedef cl_float cl_real_t; // for intermediate gpu physics state variables
typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_FLOAT // for MPI communication for physics state variables
#define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
#define L7_STATE_T L7_FLOAT
#define L7_REAL_T L7_FLOAT
#endif
#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
typedef float state_t;
typedef double real_t;
typedef struct
{
double s0;
double s1;
} real2_t;
#define CONSERVATION_EPS .02
#ifdef HAVE_OPENCL
typedef cl_float cl_state_t;
typedef cl_float4 cl_state4_t;
typedef cl_double cl_real_t; // for intermediate gpu physics state variables
typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_FLOAT
#define MPI_REAL_T MPI_DOUBLE
#define L7_STATE_T L7_FLOAT
#define L7_REAL_T L7_DOUBLE
#endif
#elif defined(FULL_PRECISION)
typedef double state_t;
typedef double real_t;
typedef struct
{
double s0;
double s1;
} real2_t;
#define CONSERVATION_EPS .02
#ifdef HAVE_OPENCL
typedef cl_double cl_state_t;
typedef cl_double4 cl_state4_t;
typedef cl_double cl_real_t; // for intermediate gpu physics state variables
typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_DOUBLE
#define MPI_REAL_T MPI_DOUBLE
#define L7_STATE_T L7_DOUBLE
#define L7_REAL_T L7_DOUBLE
#endif
#endif
extern "C" void do_calc(void);
enum CUT_TYPE {
CUT_NONE = 0,
CUT_XAXIS,
CUT_YAXIS,
CUT_45DEG,
CUT_ALL
};
enum SUM_TYPE {
SUM_REGULAR,
SUM_KAHAN
};
enum SIGN_RULE {
DIAG_RULE,
X_RULE,
Y_RULE,
};
enum state_timers
{
STATE_TIMER_APPLY_BCS,
STATE_TIMER_SET_TIMESTEP,
STATE_TIMER_FINITE_DIFFERENCE,
STATE_TIMER_FINITE_DIFFERENCE_PART1,
STATE_TIMER_FINITE_DIFFERENCE_PART2,
STATE_TIMER_FINITE_DIFFERENCE_PART3,
STATE_TIMER_FINITE_DIFFERENCE_PART4,
STATE_TIMER_FINITE_DIFFERENCE_PART5,
STATE_TIMER_FINITE_DIFFERENCE_PART6,
STATE_TIMER_REFINE_POTENTIAL,
STATE_TIMER_CALC_MPOT,
STATE_TIMER_REZONE_ALL,
STATE_TIMER_MASS_SUM,
STATE_TIMER_READ,
STATE_TIMER_WRITE,
STATE_TIMER_SIZE
};
typedef enum state_timers state_timer_category;
using namespace std;
class State {
public:
MallocPlus state_memory;
MallocPlus gpu_state_memory;
Mesh *mesh;
state_t *H = NULL;
state_t *U = NULL;
state_t *V = NULL;
#ifdef PRECISION_CHECK
state_t *PCHECK = NULL;
#endif
#ifdef HAVE_OPENCL
cl_mem dev_H;
cl_mem dev_U;
cl_mem dev_V;
cl_mem dev_HxFlux;
cl_mem dev_Hxfluxplus;
cl_mem dev_Hxfluxminus;
cl_mem dev_UxFlux;
cl_mem dev_Uxfluxplus;
cl_mem dev_Uxfluxminus;
cl_mem dev_VxFlux;
cl_mem dev_Vxfluxplus;
cl_mem dev_Vxfluxminus;
cl_mem dev_HyFlux;
cl_mem dev_Hyfluxplus;
cl_mem dev_Hyfluxminus;
cl_mem dev_UyFlux;
cl_mem dev_Uyfluxplus;
cl_mem dev_Uyfluxminus;
cl_mem dev_VyFlux;
cl_mem dev_Vyfluxplus;
cl_mem dev_Vyfluxminus;
cl_mem dev_Wx_H;
cl_mem dev_Wplusx_H;
cl_mem dev_Wminusx_H;
cl_mem dev_Wx_U;
cl_mem dev_Wplusx_U;
cl_mem dev_Wminusx_U;
cl_mem dev_Wy_H;
cl_mem dev_Wplusy_H;
cl_mem dev_Wminusy_H;
cl_mem dev_Wy_V;
cl_mem dev_Wplusy_V;
cl_mem dev_Wminusy_V;
cl_mem dev_H_reg_lev;
cl_mem dev_U_reg_lev;
cl_mem dev_V_reg_lev;
cl_mem dev_H_state_new;
cl_mem dev_U_state_new;
cl_mem dev_V_state_new;
cl_mem dev_reg_start;
cl_mem dev_lev_jregmin;
cl_mem dev_lev_iregmin;
cl_mem dev_lev_jregsize;
cl_mem dev_lev_iregsize;
cl_mem dev_mass_sum;
cl_mem dev_deltaT;
cl_event apply_BCs_event;
cl_mem dev_mpot;
//cl_mem dev_ioffset;
cl_mem dev_result;
#endif
double cpu_timers[STATE_TIMER_SIZE];
long long gpu_timers[STATE_TIMER_SIZE];
// constructor -- allocates state arrays to size ncells
State(Mesh *mesh_in);
void init(int do_gpu_calc);
void terminate(void);
/* Memory routines for linked list of state arrays */
void allocate(size_t ncells);
void allocate_from_backup_file(FILE *fp);
void allocate_for_rollback(State *state_to_copy);
void resize(size_t ncells);
void memory_reset_ptrs(void);
#ifdef HAVE_OPENCL
void gpu_memory_reset_ptrs(void);
void allocate_device_memory(size_t ncells);
#endif
void resize_old_device_memory(size_t ncells);
/* Accessor routines */
double get_cpu_timer(state_timer_category category) {return(cpu_timers[category]); };
/* Convert nanoseconds to msecs */
double get_gpu_timer(state_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); };
/* Boundary routines -- not currently used */
void add_boundary_cells(void);
void apply_boundary_conditions(void);
void remove_boundary_cells(void);
/*******************************************************************
* set_timestep
* Input
* H, U, V -- from state object
* celltype, level, lev_delta
* Output
* mindeltaT returned
*******************************************************************/
double set_timestep(double g, double sigma);
#ifdef HAVE_OPENCL
double gpu_set_timestep(double sigma);
#endif
/*******************************************************************
* calc finite difference
* will add ghost region to H, U, V and fill at start of routine
* Input
* H, U, V -- from state object
* nlft, nrht, nbot, ntop, level, celltype -- from mesh object
* Output
* H, U, V
*******************************************************************/
void calc_finite_difference(double deltaT);
void calc_finite_difference_cell_in_place(double deltaT);
void calc_finite_difference_via_faces(double deltaT);
void calc_finite_difference_face_in_place(double deltaT);
void calc_finite_difference_regular_cells(double deltaT);
void calc_finite_difference_regular_cells_by_faces(double deltaT);
#ifdef HAVE_OPENCL
//void gpu_faces_realloc(size_t mem_requestx, size_t mem_requesty);
void gpu_faces_setup(size_t mem_requestx, size_t mem_requesty);
void gpu_faces_setup_phantom(size_t mem_request);
void gpu_faces_delete(void);
void gpu_faces_delete_phantom(void);
void gpu_calc_finite_difference(double deltaT);
void gpu_calc_finite_difference_via_faces(double deltaT);
void gpu_calc_finite_difference_in_place(double deltaT);
void gpu_calc_finite_difference_via_face_in_place(double deltaT);
void gpu_reggrid_setup(size_t mem_request);
void gpu_reggrid_delete(void);
void gpu_calc_finite_difference_regular_cells(double deltaT);
void gpu_calc_finite_difference_regular_cells_by_faces(double deltaT);
#endif
/*******************************************************************
* calc refine potential -- state has responsibility to calc initial
* refinement potential array that is then passed to mesh for
* smoothing and enforcing refinement ruiles
* Input
* H, U, V -- from state object
* Output
* mpot
* ioffset
* count
*******************************************************************/
size_t calc_refine_potential(vector<char_t> &mpot, int &icount, int &jcount);
#ifdef HAVE_OPENCL
size_t gpu_calc_refine_potential(int &icount, int &jcount);
#endif
/*******************************************************************
* rezone all -- most of call is done in mesh
* Input
* Mesh and state variables
* Output
* New mesh and state variables on refined mesh
*******************************************************************/
void rezone_all(int icount, int jcount, vector<char_t> mpot);
#ifdef HAVE_OPENCL
void gpu_rezone_all(int icount, int jcount, bool localStencil);
#endif
/*******************************************************************
* load balance -- most of call is done in mesh, but pointers are
* reset to newly allocated state arrays
* Input
* Mesh and state variables
* Output
* New mesh and state variables on refined mesh
*******************************************************************/
#ifdef HAVE_MPI
void do_load_balance_local(size_t &numcells);
#ifdef HAVE_OPENCL
void gpu_do_load_balance_local(size_t &numcells);
#endif
#endif
/*******************************************************************
* mass sum -- Conservation of mass check
* Input
* H from state object
* Precision type for sum
* Output
* total mass is returned
*******************************************************************/
double mass_sum(int enhanced_precision_sum);
#ifdef HAVE_OPENCL
double gpu_mass_sum(int enhanced_precision_sum);
#endif
void fill_circle(double circ_radius, double fill_value, double background);
void state_reorder(vector<int> iorder);
void symmetry_check(const char *string, vector<int> sym_index, double eps,
SIGN_RULE sign_rule, int &flag);
void output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time);
/* state comparison routines */
#ifdef HAVE_OPENCL
void compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells);
#endif
void compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl);
#ifdef HAVE_OPENCL
void compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl);
#endif
void output_timer_block(mesh_device_types device_type, double elapsed_time,
double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio);
void timer_output(state_timer_category category, mesh_device_types device_type, int timer_level);
void print(void);
void print_data_dump(int ncycle);
size_t get_checkpoint_size(void);
void store_checkpoint(Crux *crux);
void restore_checkpoint(Crux *crux);
//Added to for second print for every interation: Brian Atkinson (5-29-14)
void print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage);
void print_local(int ncycle);
void print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan);
void print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status);
private:
State(const State&); // To block copy constructor so copies are not made inadvertently
void print_object_info(void);
};
class Mesh_CLAMR : public Mesh{
public:
Mesh_CLAMR(int, int, int, int, double, double, int, int, int);
void interpolate(int, int, int, int, double, MallocPlus&);
void interpolate_fine_x(int, int, int, int, double, MallocPlus&);
void interpolate_fine_y(int, int, int, int, double, MallocPlus&);
void interpolate_course_x(int, int, int, int, double, MallocPlus&);
void interpolate_course_y(int, int, int, int, double, MallocPlus&);
};
#endif // ifndef STATE_H_