Skip to content

Commit

Permalink
Merge pull request #3 from jdinan/pr/1.4.2
Browse files Browse the repository at this point in the history
Update to v1.4.2
  • Loading branch information
jdinan authored Oct 6, 2018
2 parents 41e36a8 + ae356a8 commit 9534c62
Show file tree
Hide file tree
Showing 37 changed files with 2,287 additions and 1,360 deletions.
5 changes: 3 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

dnl Init Autoconf/Automake/Libtool

AC_INIT([Sandia OpenSHMEM Test Suite], [1.4.1], [https://github.com/Sandia-OpenSHMEM/SOS])
AC_INIT([Sandia OpenSHMEM Test Suite], [1.4.2], [https://github.com/Sandia-OpenSHMEM/SOS])
AC_PREREQ([2.60])
AC_CONFIG_AUX_DIR([config])
AC_CONFIG_MACRO_DIR([config])
Expand Down Expand Up @@ -76,7 +76,7 @@ AM_CONDITIONAL([HAVE_OPENMP], [test "$enable_threads" != "no" -a "$enable_openmp

AC_ARG_ENABLE([lengthy-tests],
[AC_HELP_STRING([--enable-lengthy-tests],
[Enable long running tests in the test suite (default: disabled)])])
[Execute long running tests as part of "make check" (default: disabled)])])
AM_CONDITIONAL([ENABLE_LENGTHY_TESTS], [test "$enable_lengthy_tests" = "yes"])

AC_ARG_ENABLE([fortran],
Expand Down Expand Up @@ -185,6 +185,7 @@ AM_CONDITIONAL([HAVE_CXX], [test "$enable_cxx" != "no" ])
AM_CONDITIONAL([ENABLE_PROFILING], [test "$enable_profiling" = "yes" ])

dnl make tests work in standalone mode
AM_CONDITIONAL([USE_PMI_MPI], [false])
AM_CONDITIONAL([USE_PMI_SIMPLE], [false])
AM_CONDITIONAL([USE_PORTALS4], [false])
AM_CONDITIONAL([HAVE_LONG_FORTRAN_HEADER], [false])
Expand Down
2 changes: 1 addition & 1 deletion test/apps/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,5 @@ LDADD += $(top_builddir)/pmi-simple/libpmi_simple.la
endif

mandelbrot_LDFLAGS = $(AM_LDFLAGS) $(PTHREAD_LIBS)
mandelbrot_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS)
mandelbrot_CFLAGS = -I$(top_srcdir)/test/include $(PTHREAD_CFLAGS)
mandelbrot_LDADD = $(LDADD) $(PTHREAD_CFLAGS)
18 changes: 10 additions & 8 deletions test/apps/gups.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
#define ZERO64B 0LL

uint64_t TotalMemOpt = 8192;
int NumUpdatesOpt = 0; /* FIXME: This option is ignored */
uint64_t NumUpdatesOpt = 0;
double SHMEMGUPs;
double SHMEMRandomAccess_ErrorsFraction;
double SHMEMRandomAccess_time;
Expand Down Expand Up @@ -324,9 +324,7 @@ SHMEMRandomAccess(void)
double TotalMem;
static int sAbort, rAbort;

uint64_t NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */
uint64_t NumUpdates; /* actual number of updates to table - may be smaller than
* NumUpdates_Default due to execution time bounds */
uint64_t NumUpdates; /* total number of updates to table */
uint64_t ProcNumUpdates; /* number of updates per processor */

static long pSync_bcast[SHMEM_BCAST_SYNC_SIZE];
Expand Down Expand Up @@ -421,9 +419,13 @@ SHMEMRandomAccess(void)
HPCC_PELock[i] = 0;

/* Default number of global updates to table: 4x number of table entries */
NumUpdates_Default = 4 * TableSize;
ProcNumUpdates = 4 * LocalTableSize;
NumUpdates = NumUpdates_Default;
if (NumUpdatesOpt == 0) {
ProcNumUpdates = 4 * LocalTableSize;
NumUpdates = 4 * TableSize;
} else {
ProcNumUpdates = NumUpdatesOpt;
NumUpdates = NumUpdatesOpt * NumProcs;
}

if (MyProc == 0) {
fprintf( outFile, "Running on %d processors\n", NumProcs);
Expand All @@ -432,7 +434,7 @@ SHMEMRandomAccess(void)
fprintf( outFile, "PE Main table size = (2^%" PRIu64 ")/%d = %" PRIu64 " words/PE MAX\n",
logTableSize, NumProcs, LocalTableSize);

fprintf( outFile, "Default number of updates (RECOMMENDED) = %" PRIu64 "\n", NumUpdates_Default);
fprintf( outFile, "Total number of updates = %" PRIu64 "\n", NumUpdates);
}

/* Initialize main table */
Expand Down
4 changes: 2 additions & 2 deletions test/apps/mandelbrot.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ static void *thread_worker(void *arg) {
// Malloc local (non-symmetric) buffers
pixels[0] = malloc(sizeof(int)*job_points);
pixels[1] = malloc(sizeof(int)*job_points);
pe_mask = malloc(sizeof(int)*npes);
pe_ct_max = malloc(sizeof(int)*npes);
pe_mask = calloc(npes, sizeof(int));
pe_ct_max = calloc(npes, sizeof(int));

if (NULL == pixels[0] || NULL == pixels[1] || NULL == pe_mask || NULL == pe_ct_max) {
printf("%d, %d: Error, thread malloc failed\n", me, tid);
Expand Down
3 changes: 2 additions & 1 deletion test/include/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
# distribution.

noinst_HEADERS = \
uthash.h
uthash.h \
pthread_barrier.h
File renamed without changes.
31 changes: 27 additions & 4 deletions test/performance/shmem_perf_suite/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

check_PROGRAMS = \
shmem_latency_put_perf \
shmem_latency_put_perf_nb \
shmem_latency_get_perf \
shmem_latency_get_perf_nb \
shmem_bw_put_perf \
shmem_bw_put_perf_nb \
shmem_bibw_put_perf \
Expand All @@ -18,8 +20,6 @@ check_PROGRAMS = \
shmem_bw_get_perf_nb \
shmem_bibw_get_perf \
shmem_bibw_get_perf_nb \
shmem_latency_nb_put_perf \
shmem_latency_nb_get_perf \
shmem_bw_atomics_perf \
shmem_bibw_atomics_perf

Expand All @@ -28,7 +28,11 @@ check_PROGRAMS += \
shmem_bw_put_ctx_perf \
shmem_bw_put_ctx_perf_nb \
shmem_bibw_put_ctx_perf \
shmem_bibw_put_ctx_perf_nb
shmem_bibw_put_ctx_perf_nb \
shmem_latency_put_ctx_perf \
shmem_latency_put_ctx_perf_nb \
shmem_latency_get_ctx_perf \
shmem_latency_get_ctx_perf_nb
endif

noinst_HEADERS = \
Expand All @@ -41,7 +45,8 @@ noinst_HEADERS = \
uni_dir_ctx.h \
bi_dir.h \
bi_dir_ctx.h \
target_put.h
target_put.h \
latency_ctx.h

if ENABLE_LENGTHY_TESTS
TESTS = $(check_PROGRAMS)
Expand All @@ -65,6 +70,12 @@ if USE_PMI_SIMPLE
LDADD += $(top_builddir)/pmi-simple/libpmi_simple.la
endif

shmem_latency_put_perf_nb_SOURCES = shmem_latency_put_perf.c
shmem_latency_put_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API

shmem_latency_get_perf_nb_SOURCES = shmem_latency_get_perf.c
shmem_latency_get_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API

shmem_bw_put_perf_nb_SOURCES = shmem_bw_put_perf.c
shmem_bw_put_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API

Expand All @@ -86,3 +97,15 @@ shmem_bibw_put_ctx_perf_CFLAGS = $(AM_OPENMP_CFLAGS)

shmem_bibw_put_ctx_perf_nb_SOURCES = shmem_bibw_put_ctx_perf.c
shmem_bibw_put_ctx_perf_nb_CFLAGS = $(AM_OPENMP_CFLAGS) -DUSE_NONBLOCKING_API

shmem_latency_put_ctx_perf_CFLAGS = $(AM_OPENMP_CFLAGS)

shmem_latency_put_ctx_perf_nb_SOURCES = shmem_latency_put_ctx_perf.c
shmem_latency_put_ctx_perf_nb_CFLAGS = $(AM_OPENMP_CFLAGS) -DUSE_NONBLOCKING_API

shmem_latency_get_ctx_perf_CFLAGS = $(AM_OPENMP_CFLAGS)

shmem_latency_get_ctx_perf_nb_SOURCES = shmem_latency_get_ctx_perf.c
shmem_latency_get_ctx_perf_nb_CFLAGS = $(AM_OPENMP_CFLAGS) -DUSE_NONBLOCKING_API

AM_CPPFLAGS += -DENABLE_THREADS
24 changes: 12 additions & 12 deletions test/performance/shmem_perf_suite/bi_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
* SOFTWARE.
*/

void static inline bi_bw_put(int len, perf_metrics_t *metric_info)
static inline void bi_bw_put(int len, perf_metrics_t * const metric_info)
{
double start = 0.0, end = 0.0;
int dest = partner_node(*metric_info);
int dest = partner_node(metric_info);
unsigned long int i = 0, j = 0;
static int check_once = 0;
static int fin = -1;
Expand All @@ -40,7 +40,7 @@ void static inline bi_bw_put(int len, perf_metrics_t *metric_info)
dest);
}
/* hostname validation for all sender and receiver processes */
int status = check_hostname_validation(*metric_info);
int status = check_hostname_validation(metric_info);
if (status != 0) return;
check_once++;
}
Expand All @@ -59,7 +59,7 @@ void static inline bi_bw_put(int len, perf_metrics_t *metric_info)
}

shmem_barrier_all();
if (streaming_node(*metric_info)) {
if (streaming_node(metric_info)) {
start = perf_shmemx_wtime();
}

Expand All @@ -74,22 +74,22 @@ void static inline bi_bw_put(int len, perf_metrics_t *metric_info)
shmem_quiet();
}

if (streaming_node(*metric_info)) {
if (streaming_node(metric_info)) {
shmem_int_p(&fin, 1, dest);
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0);
end = perf_shmemx_wtime();
calc_and_print_results(end, start, len, *metric_info);
calc_and_print_results(end, start, len, metric_info);
} else {
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1);
shmem_int_p(&fin, 0, dest);
}

}

void static inline bi_bw_get(int len, perf_metrics_t *metric_info)
static inline void bi_bw_get(int len, perf_metrics_t * const metric_info)
{
double start = 0.0, end = 0.0;
int dest = partner_node(*metric_info);
int dest = partner_node(metric_info);
unsigned long int i = 0, j = 0;
static int check_once = 0;
static int fin = -1;
Expand All @@ -101,7 +101,7 @@ void static inline bi_bw_get(int len, perf_metrics_t *metric_info)
dest);
}
/* hostname validation for all sender and receiver processes */
int status = check_hostname_validation(*metric_info);
int status = check_hostname_validation(metric_info);
if (status != 0) return;
check_once++;
}
Expand All @@ -125,7 +125,7 @@ void static inline bi_bw_get(int len, perf_metrics_t *metric_info)
}

shmem_barrier_all();
if (streaming_node(*metric_info)) {
if (streaming_node(metric_info)) {
start = perf_shmemx_wtime();
}

Expand All @@ -145,11 +145,11 @@ void static inline bi_bw_get(int len, perf_metrics_t *metric_info)
#endif
}

if (streaming_node(*metric_info)) {
if (streaming_node(metric_info)) {
shmem_int_p(&fin, 1, dest);
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 0);
end = perf_shmemx_wtime();
calc_and_print_results(end, start, len, *metric_info);
calc_and_print_results(end, start, len, metric_info);
} else {
shmem_int_wait_until(&fin, SHMEM_CMP_EQ, 1);
shmem_int_p(&fin, 0, dest);
Expand Down
58 changes: 27 additions & 31 deletions test/performance/shmem_perf_suite/bi_dir_ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,44 +26,42 @@
*/


void static inline bi_bw_ctx (int len, perf_metrics_t *metric_info)
static inline void bi_bw_ctx (int len, perf_metrics_t *metric_info)
{
double start = 0.0, end = 0.0;
int dest = partner_node(*metric_info);
int j = 0;
char *src = aligned_buffer_alloc(metric_info->nthreads * len);
char *dst = aligned_buffer_alloc(metric_info->nthreads * len);
assert(src && dst);
int dest = partner_node(metric_info);
unsigned long int i, j;
static int check_once = 0;

if (!check_once) {
/* check to see whether sender and receiver are the same process */
if (dest == metric_info->my_node) {
fprintf(stderr, "Warning: Sender and receiver are the same process (%d)\n",
dest);
fprintf(stderr, "Warning: Sender and receiver are the same "
"process (%d)\n", dest);
}
/* hostname validation for all sender and receiver processes */
int status = check_hostname_validation(*metric_info);
int status = check_hostname_validation(metric_info);
if (status != 0) return;
check_once++;
}

shmem_barrier_all();

#pragma omp parallel default(none) firstprivate(len, dest) private(j) \
shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads)
#pragma omp parallel default(none) firstprivate(len, dest) private(i, j) \
shared(metric_info, start, end) num_threads(metric_info->nthreads)
{
int i;
const int thread_id = omp_get_thread_num();
shmem_ctx_t ctx;
shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx);

for (i = 0; i < metric_info->warmup; i++) {
for(j = 0; j < metric_info->window_size; j++) {
#ifdef USE_NONBLOCKING_API
shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest);
shmem_ctx_putmem_nbi(ctx, metric_info->dest + thread_id * len,
metric_info->src + thread_id * len, len, dest);
#else
shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest);
shmem_ctx_putmem(ctx, metric_info->dest + thread_id * len,
metric_info->src + thread_id * len, len, dest);
#endif
}
shmem_ctx_quiet(ctx);
Expand All @@ -72,11 +70,10 @@ void static inline bi_bw_ctx (int len, perf_metrics_t *metric_info)
}

shmem_barrier_all();
if (streaming_node(*metric_info)) {
#pragma omp parallel default(none) firstprivate(len, dest) private(j) \
shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads)
if (streaming_node(metric_info)) {
#pragma omp parallel default(none) firstprivate(len, dest) private(i, j) \
shared(metric_info, start, end) num_threads(metric_info->nthreads)
{
int i;
const int thread_id = omp_get_thread_num();
shmem_ctx_t ctx;
shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx);
Expand All @@ -89,30 +86,33 @@ void static inline bi_bw_ctx (int len, perf_metrics_t *metric_info)
for (i = 0; i < metric_info->trials; i++) {
for(j = 0; j < metric_info->window_size; j++) {
#ifdef USE_NONBLOCKING_API
shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest);
shmem_ctx_putmem_nbi(ctx, metric_info->dest + thread_id * len,
metric_info->src + thread_id * len, len, dest);
#else
shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest);
shmem_ctx_putmem(ctx, metric_info->dest + thread_id * len,
metric_info->src + thread_id * len, len, dest);
#endif
}
shmem_ctx_quiet(ctx);
}
shmem_ctx_destroy(ctx);
}
} else {
#pragma omp parallel default(none) firstprivate(len, dest) private(j) \
shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads)
#pragma omp parallel default(none) firstprivate(len, dest) private(i, j) \
shared(metric_info, start, end) num_threads(metric_info->nthreads)
{
int i;
const int thread_id = omp_get_thread_num();
shmem_ctx_t ctx;
shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx);

for (i = 0; i < metric_info->trials; i++) {
for(j = 0; j < metric_info->window_size; j++) {
#ifdef USE_NONBLOCKING_API
shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest);
shmem_ctx_putmem_nbi(ctx, metric_info->dest + thread_id * len,
metric_info->src + thread_id * len, len, dest);
#else
shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest);
shmem_ctx_putmem(ctx, metric_info->dest + thread_id * len,
metric_info->src + thread_id * len, len, dest);
#endif
}
shmem_ctx_quiet(ctx);
Expand All @@ -122,14 +122,10 @@ void static inline bi_bw_ctx (int len, perf_metrics_t *metric_info)
}

shmem_barrier_all();
if (streaming_node(*metric_info)) {
if (streaming_node(metric_info)) {
end = perf_shmemx_wtime();
calc_and_print_results(end, start, len, *metric_info);
calc_and_print_results(end, start, len, metric_info);
}

shmem_barrier_all();

aligned_buffer_free(src);
aligned_buffer_free(dst);

}
Loading

0 comments on commit 9534c62

Please sign in to comment.