Skip to content

Commit

Permalink
Add direct .so backdoor, and XDSL_SKIP_CLEAN not *not* delete tempora…
Browse files Browse the repository at this point in the history
…ry files if desired.
  • Loading branch information
PapyChacal committed Aug 7, 2023
1 parent 69134cb commit c107d49
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 0 deletions.
Binary file added fast/profile.nsys-rep
Binary file not shown.
Binary file added fast/report1.nsys-rep
Binary file not shown.
Binary file added fast/report2.nsys-rep
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#define _POSIX_C_SOURCE 200809L
#define START_TIMER(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP_TIMER(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "openacc.h"

struct dataobj
{
void *restrict data;
unsigned long * size;
unsigned long * npsize;
unsigned long * dsize;
int * hsize;
int * hofs;
int * oofs;
void * dmap;
} ;

struct profiler
{
double section0;
} ;

extern "C" int DevitoOperator(const float a, struct dataobj *restrict u_vec, const float dt, const float h_x, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int deviceid, const int devicerm, struct profiler * timers);


int DevitoOperator(const float a, struct dataobj *restrict u_vec, const float dt, const float h_x, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int deviceid, const int devicerm, struct profiler * timers)
{
/* Begin of OpenACC setup */
acc_init(acc_device_nvidia);
if (deviceid != -1)
{
acc_set_device_num(deviceid,acc_device_nvidia);
}
/* End of OpenACC setup */

float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;

#pragma acc enter data copyin(u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]])

float r0 = 1.0F/dt;
float r1 = 1.0F/(h_x*h_x);
float r2 = 1.0F/(h_y*h_y);

for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))
{
/* Begin section0 */
START_TIMER(section0)
#pragma acc parallel loop collapse(2) present(u)
for (int x = x_m; x <= x_M; x += 1)
{
for (int y = y_m; y <= y_M; y += 1)
{
float r3 = -2.0F*u[t0][x + 2][y + 2];
u[t1][x + 2][y + 2] = dt*(a*(r1*r3 + r1*u[t0][x + 1][y + 2] + r1*u[t0][x + 3][y + 2] + r2*r3 + r2*u[t0][x + 2][y + 1] + r2*u[t0][x + 2][y + 3]) + r0*u[t0][x + 2][y + 2]);
}
}
STOP_TIMER(section0,timers)
/* End section0 */
}

#pragma acc exit data copyout(u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]])
#pragma acc exit data delete(u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]]) if(devicerm)

return 0;
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#define _POSIX_C_SOURCE 200809L
#define START_TIMER(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP_TIMER(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "openacc.h"

struct dataobj
{
void *restrict data;
unsigned long * size;
unsigned long * npsize;
unsigned long * dsize;
int * hsize;
int * hofs;
int * oofs;
void * dmap;
} ;

struct profiler
{
double section0;
} ;

extern "C" int norm2(struct dataobj *restrict n_vec, struct dataobj *restrict u_vec, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int deviceid, const int devicerm, struct profiler * timers);


int norm2(struct dataobj *restrict n_vec, struct dataobj *restrict u_vec, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int deviceid, const int devicerm, struct profiler * timers)
{
/* Begin of OpenACC setup */
acc_init(acc_device_nvidia);
if (deviceid != -1)
{
acc_set_device_num(deviceid,acc_device_nvidia);
}
/* End of OpenACC setup */

double (*restrict n) __attribute__ ((aligned (64))) = (double (*)) n_vec->data;
float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;

#pragma acc enter data copyin(u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]])

double sum = 0.0;

for (int time = time_m, t0 = (time)%(2); time <= time_M; time += 1, t0 = (time)%(2))
{
/* Begin section0 */
START_TIMER(section0)
#pragma acc parallel loop collapse(2) reduction(+:sum) present(u)
for (int x = x_m; x <= x_M; x += 1)
{
for (int y = y_m; y <= y_M; y += 1)
{
sum += u[t0][x + 2][y + 2]*u[t0][x + 2][y + 2];
}
}
STOP_TIMER(section0,timers)
/* End section0 */
}

n[0] = sum;

#pragma acc exit data delete(u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]]) if(devicerm)

return 0;
}
Binary file not shown.

0 comments on commit c107d49

Please sign in to comment.