diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..483325c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,46 @@ +dist: trusty +language: c +dist: trusty +compiler: + - gcc +env: + global: + - TRAVIS_PAR_MAKE="-j 4" + - FI_LOG_LEVEL=warn + - SHMEM_INFO=1 +os: + - linux + +addons: + apt: + packages: + - gfortran + +before_install: + - mkdir $HOME/travis + - mkdir $HOME/travis/build + - mkdir $HOME/travis/install + - export SOS_SRC=$PWD + - cd $HOME/travis + - wget https://raw.githubusercontent.com/Sandia-OpenSHMEM/SOS/master/scripts/simple-build-ofi.sh + - chmod +x simple-build-ofi.sh + - export SOS_VERSION="master" + - ./simple-build-ofi.sh $HOME/travis/build $HOME/travis/install + +install: + - cd $SOS_SRC + - ./autogen.sh + +script: + - export PATH=$HOME/travis/install/bin:$PATH + - cd $SOS_SRC + - mkdir build + - cd build + - oshcc -showme + - oshc++ -showme + - oshfort -showme + # FIXME: Fortran and threads tests temporarily disabled because of + # compilation issues in Travis environment + - ../configure CC=oshcc CXX=oshc++ FC=oshfort CFLAGS="-std=gnu99" FCFLAGS="-fcray-pointer" --disable-fortran --disable-threads + - make $TRAVIS_PAR_MAKE + - make check diff --git a/test/Makefile.am b/test/Makefile.am new file mode 100644 index 0000000..ad7acf7 --- /dev/null +++ b/test/Makefile.am @@ -0,0 +1,14 @@ +# -*- Makefile -*- +# +# Copyright 2011 Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +# retains certain rights in this software. +# +# Copyright (c) 2017 Intel Corporation. All rights reserved. +# This software is available to you under the BSD license. +# +# This file is part of the Sandia OpenSHMEM software package. For license +# information, see the LICENSE file in the top level directory of the +# distribution. + +SUBDIRS = unit performance apps diff --git a/test/apps/Makefile.am b/test/apps/Makefile.am new file mode 100644 index 0000000..00a5f53 --- /dev/null +++ b/test/apps/Makefile.am @@ -0,0 +1,58 @@ +# -*- Makefile -*- +# +# Copyright 2011 Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +# retains certain rights in this software. +# +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. +# +# Copyright (c) 2017 Intel Corporation. All rights reserved. +# This software is available to you under the BSD license. +# +# This file is part of the Sandia OpenSHMEM software package. For license +# information, see the LICENSE file in the top level directory of the +# distribution. + +NPROCS ?= 2 +LOG_COMPILER = $(TEST_RUNNER) + +AM_LDFLAGS = $(LIBTOOL_WRAPPER_LDFLAGS) + +check_PROGRAMS = \ + binary-search \ + gups + +SHORT_TESTS = \ + binary-search \ + gups + +if HAVE_PTHREADS +check_PROGRAMS += \ + mandelbrot + +SHORT_TESTS += \ + mandelbrot +endif + +if ENABLE_LENGTHY_TESTS +TESTS = $(check_PROGRAMS) +else +TESTS = $(SHORT_TESTS) +endif + +if EXTERNAL_TESTS +bin_PROGRAMS = $(check_PROGRAMS) +AM_CPPFLAGS = +LDADD = +else +AM_CPPFLAGS = -I$(top_builddir)/mpp +LDADD = $(top_builddir)/src/libsma.la +endif + +if USE_PMI_SIMPLE +LDADD += $(top_builddir)/pmi-simple/libpmi_simple.la +endif + +mandelbrot_LDFLAGS = $(PTHREAD_LIBS) +mandelbrot_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +mandelbrot_LDADD = $(LDADD) $(PTHREAD_CFLAGS) diff --git a/test/apps/binary-search.c b/test/apps/binary-search.c new file mode 100644 index 0000000..825febb --- /dev/null +++ b/test/apps/binary-search.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#define N_PER_PE 10 + +/* N_PER_PE*N_PES sorted, distributed shared array */ +int keys[N_PER_PE]; + +static int binary_search(int key) { + int low, mid, high; + + low = 0; + high = shmem_n_pes()*N_PER_PE; + + while(low < high) { + int val; + + mid = low + (high-low)/2; + val = shmem_int_g(&keys[mid%N_PER_PE], mid/N_PER_PE); + + if(val == key) { + return mid; + } else if(val < key) { + low = mid; + } else { + high = mid; + } + } + + return -1; +} + +int main(int argc, char **argv) { + int i, errors = 0; + + shmem_init(); + + for (i = 0; i < N_PER_PE; i++) + keys[i] = N_PER_PE * shmem_my_pe() + i; + + shmem_barrier_all(); + + for (i = 0; i < N_PER_PE * shmem_n_pes(); i++) { + int j = binary_search(i); + if (j != i) { + printf("%2d: Error searching for %d. Found at index %d, expected %d\n", + shmem_my_pe(), i, j, i); + errors++; + } + } + + shmem_finalize(); + + return errors; +} diff --git a/test/apps/gups.c b/test/apps/gups.c new file mode 100644 index 0000000..46fb90e --- /dev/null +++ b/test/apps/gups.c @@ -0,0 +1,563 @@ +/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ + +/* + * This code has been contributed by the DARPA HPCS program. Contact + * David Koester or Bob Lucas + * if you have questions. + * + * + * GUPS (Giga UPdates per Second) is a measurement that profiles the memory + * architecture of a system and is a measure of performance similar to MFLOPS. + * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the + * GUPS capability of a system, much like the LINPACK benchmark is intended to + * exercise the MFLOPS capability of a computer. In each case, we would + * expect these benchmarks to achieve close to the "peak" capability of the + * memory system. The extent of the similarities between RandomAccess and + * LINPACK are limited to both benchmarks attempting to calculate a peak system + * capability. + * + * GUPS is calculated by identifying the number of memory locations that can be + * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" + * means that there is little relationship between one address to be updated and + * the next, except that they occur in the space of one half the total system + * memory. An update is a read-modify-write operation on a table of 64-bit words. + * An address is generated, the value at that address read from memory, modified + * by an integer operation (add, and, or, xor) with a literal value, and that + * new value is written back to memory. + * + * We are interested in knowing the GUPS performance of both entire systems and + * system subcomponents --- e.g., the GUPS rating of a distributed memory + * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a + * single processor. While there is typically a scaling of FLOPS with processor + * count, a similar phenomenon may not always occur for GUPS. + * + * Select the memory size to be the power of two such that 2^n <= 1/2 of the + * total memory. Each CPU operates on its own address stream, and the single + * table may be distributed among nodes. The distribution of memory to nodes + * is left to the implementer. A uniform data distribution may help balance + * the workload, while non-uniform data distributions may simplify the + * calculations that identify processor location by eliminating the requirement + * for integer divides. A small (less than 1%) percentage of missed updates + * are permitted. + * + * When implementing a benchmark that measures GUPS on a distributed memory + * multiprocessor system, it may be required to define constraints as to how + * far in the random address stream each node is permitted to "look ahead". + * Likewise, it may be required to define a constraint as to the number of + * update messages that can be stored before processing to permit multi-level + * parallelism for those systems that support such a paradigm. The limits on + * "look ahead" and "stored updates" are being implemented to assure that the + * benchmark meets the intent to profile memory architecture and not induce + * significant artificial data locality. For the purpose of measuring GUPS, + * we will stipulate that each thread is permitted to look ahead no more than + * 1024 random address stream samples with the same number of update messages + * stored before processing. + * + * The supplied MPI-1 code generates the input stream {A} on all processors + * and the global table has been distributed as uniformly as possible to + * balance the workload and minimize any Amdahl fraction. This code does not + * exploit "look-ahead". Addresses are sent to the appropriate processor + * where the table entry resides as soon as each address is calculated. + * Updates are performed as addresses are received. Each message is limited + * to a single 64 bit long integer containing element ai from {A}. + * Local offsets for T[ ] are extracted by the destination processor. + * + * If the number of processors is equal to a power of two, then the global + * table can be distributed equally over the processors. In addition, the + * processor number can be determined from that portion of the input stream + * that identifies the address into the global table by masking off log2(p) + * bits in the address. + * + * If the number of processors is not equal to a power of two, then the global + * table cannot be equally distributed between processors. In the MPI-1 + * implementation provided, there has been an attempt to minimize the differences + * in workloads and the largest difference in elements of T[ ] is one. The + * number of values in the input stream generated by each processor will be + * related to the number of global table entries on each processor. + * + * The MPI-1 version of RandomAccess treats the potential instance where the + * number of processors is a power of two as a special case, because of the + * significant simplifications possible because processor location and local + * offset can be determined by applying masks to the input stream values. + * The non power of two case uses an integer division to determine the processor + * location. The integer division will be more costly in terms of machine + * cycles to perform than the bit masking operations + * + * For additional information on the GUPS metric, the HPCchallenge RandomAccess + * Benchmark,and the rules to run RandomAccess or modify it to optimize + * performance -- see http://icl.cs.utk.edu/hpcc/ + * + */ + +/* Jan 2005 + * + * This code has been modified to allow local bucket sorting of updates. + * The total maximum number of updates in the local buckets of a process + * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. + * When the total maximum number of updates is reached, the process selects + * the bucket (or destination process) with the largest number of + * updates and sends out all the updates in that bucket. See buckets.c + * for details about the buckets' implementation. + * + * This code also supports posting multiple MPI receive descriptors (based + * on a contribution by David Addison). + * + * In addition, this implementation provides an option for limiting + * the execution time of the benchmark to a specified time bound + * (see time_bound.c). The time bound is currently defined in + * time_bound.h, but it should be a benchmark parameter. By default + * the benchmark will execute the recommended number of updates, + * that is, four times the global table size. + */ + +/* + * OpenSHMEM version: + * + * Copyright (c) 2011 - 2015 + * University of Houston System and UT-Battelle, LLC. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * o Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * o Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * o Neither the name of the University of Houston System, + * UT-Battelle, LLC. nor the names of its contributors may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. + * + * This file is part of the Sandia OpenSHMEM software package. For license + * information, see the LICENSE file in the top level directory of the + * distribution. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if 0 +#include "config.h" +#endif + +/* Random number generator */ +#define POLY 0x0000000000000007UL +#define PERIOD 1317624576693539401L + +/* Define 64-bit constants */ +#define ZERO64B 0LL + +uint64_t TotalMemOpt = 8192; +int NumUpdatesOpt = 0; /* FIXME: This option is ignored */ +double SHMEMGUPs; +double SHMEMRandomAccess_ErrorsFraction; +double SHMEMRandomAccess_time; +double SHMEMRandomAccess_CheckTime; +int Failure; + +/* Allocate main table (in global memory) */ +uint64_t *HPCC_Table; +long *HPCC_PELock; + +static uint64_t GlobalStartMyProc; + +int SHMEMRandomAccess(void); + +static double RTSEC(void) +{ + struct timeval tp; + gettimeofday (&tp, NULL); + return tp.tv_sec + tp.tv_usec/(double)1.0e6; +} + +static void print_usage(void) +{ + fprintf(stderr, "\nOptions:\n"); + fprintf(stderr, " %-20s %s\n", "-h", "display this help message"); + fprintf(stderr, " %-20s %s\n", "-m", "memory in bytes per PE"); + fprintf(stderr, " %-20s %s\n", "-n", "number of updates per PE"); + + return; +} + +static int64_t starts(uint64_t n) +{ + /* int64_t i, j; */ + int i, j; + uint64_t m2[64]; + uint64_t temp, ran; + + /* + * this loop doesn't make sense + * so commenting out. + */ +#if 0 + while (n < 0) + n += PERIOD; +#endif + while (n > PERIOD) + n -= PERIOD; + if (n == 0) + return 0x1; + + temp = 0x1; + for (i=0; i<64; i++) { + m2[i] = temp; + temp = (temp << 1) ^ ((int64_t) temp < 0 ? POLY : 0); + temp = (temp << 1) ^ ((int64_t) temp < 0 ? POLY : 0); + } + + for (i=62; i>=0; i--) + if ((n >> i) & 1) break; + + ran = 0x2; + + while (i > 0) { + temp = 0; + for (j=0; j<64; j++) + if ((ran >> j) & 1) temp ^= m2[j]; + ran = temp; + i -= 1; + if ((n >> i) & 1) + ran = (ran << 1) ^ ((int64_t) ran < 0 ? POLY : 0); + } + + return ran; +} + +static void +UpdateTable(uint64_t *Table, + uint64_t TableSize, + uint64_t MinLocalTableSize, + uint64_t Top, + int Remainder, + uint64_t niterate, + int use_lock) +{ + uint64_t iterate; + int index; + uint64_t ran, global_offset; + int remote_pe; + int global_start_at_pe; +#ifdef USE_GET_PUT + uint64_t remote_val; +#endif + + shmem_barrier_all(); + + /* setup: should not really be part of this timed routine */ + ran = starts(4*GlobalStartMyProc); + + for (iterate = 0; iterate < niterate; iterate++) { + ran = (ran << 1) ^ ((int64_t) ran < ZERO64B ? POLY : ZERO64B); + global_offset = ran & (TableSize-1); + if (global_offset < Top) { + remote_pe = global_offset / (MinLocalTableSize + 1); + global_start_at_pe = (MinLocalTableSize + 1) * remote_pe; + } else { + remote_pe = (global_offset - Remainder) / MinLocalTableSize; + global_start_at_pe = MinLocalTableSize * remote_pe + Remainder; + } + index = global_offset - global_start_at_pe; + + if (use_lock) shmem_set_lock(&HPCC_PELock[remote_pe]); +#ifdef USE_GET_PUT + remote_val = (uint64_t) shmem_long_g((long *)&Table[index], remote_pe); + remote_val ^= ran; + shmem_long_p((long *)&Table[index], remote_val, remote_pe); +#else + shmem_uint64_atomic_xor(&Table[index], ran, remote_pe); +#endif + if (use_lock) shmem_clear_lock(&HPCC_PELock[remote_pe]); + } + + shmem_barrier_all(); + +} + +int +SHMEMRandomAccess(void) +{ + int64_t i; + static int64_t NumErrors, GlbNumErrors; + + int NumProcs, MyProc; + int Remainder; /* Number of processors with (LocalTableSize + 1) entries */ + uint64_t Top; /* Number of table entries in top of Table */ + uint64_t LocalTableSize; /* Local table width */ + uint64_t MinLocalTableSize; /* Integer ratio TableSize/NumProcs */ + uint64_t logTableSize, TableSize; + + double RealTime; /* Real time to update table */ + + double TotalMem; + static int sAbort, rAbort; + + uint64_t NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */ + uint64_t NumUpdates; /* actual number of updates to table - may be smaller than + * NumUpdates_Default due to execution time bounds */ + uint64_t ProcNumUpdates; /* number of updates per processor */ + + static long pSync_bcast[SHMEM_BCAST_SYNC_SIZE]; + static long long int llpWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + + static long pSync_reduce[SHMEM_REDUCE_SYNC_SIZE]; + static int ipWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + + FILE *outFile = NULL; + double *GUPs; + double *temp_GUPs; + + + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1){ + pSync_bcast[i] = SHMEM_SYNC_VALUE; + } + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1){ + pSync_reduce[i] = SHMEM_SYNC_VALUE; + } + + SHMEMGUPs = -1; + GUPs = &SHMEMGUPs; + + NumProcs = shmem_n_pes(); + MyProc = shmem_my_pe(); + + if (0 == MyProc) { + outFile = stdout; + setbuf(outFile, NULL); + } + + /* + * TODO: replace this + */ + + TotalMem = TotalMemOpt; /* max single node memory */ + TotalMem *= NumProcs; /* max memory in NumProcs nodes */ + + TotalMem /= sizeof(uint64_t); + + /* calculate TableSize --- the size of update array (must be a power of 2) */ + for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1; + TotalMem >= 1.0; + TotalMem *= 0.5, logTableSize++, TableSize <<= 1) + ; /* EMPTY */ + + /* + * Calculate local table size, etc. + */ + + MinLocalTableSize = TableSize / NumProcs; + + /* Number of processors with (LocalTableSize + 1) entries */ + + Remainder = TableSize - (MinLocalTableSize * NumProcs); + + /* Number of table entries in top of Table */ + Top = (MinLocalTableSize + 1) * Remainder; + /* Local table size */ + if (MyProc < Remainder) { + LocalTableSize = MinLocalTableSize + 1; + GlobalStartMyProc = ( (MinLocalTableSize + 1) * MyProc); + } else { + LocalTableSize = MinLocalTableSize; + GlobalStartMyProc = ( (MinLocalTableSize * MyProc) + Remainder ); + } + + + sAbort = 0; + /* Ensure the allocation size is symmetric */ + HPCC_Table = shmem_malloc((Remainder > 0 ? (MinLocalTableSize + 1) : LocalTableSize) + * sizeof(uint64_t)); + if (! HPCC_Table) sAbort = 1; + + HPCC_PELock = (long *) shmem_malloc(sizeof(long) * NumProcs); + if (! HPCC_PELock) sAbort = 1; + + shmem_barrier_all(); + shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, pSync_reduce); + shmem_barrier_all(); + + if (rAbort > 0) { + if (MyProc == 0) fprintf(outFile, "Failed to allocate memory\n"); + /* check all allocations in case there are new added and their order changes */ + if (HPCC_Table) shmem_free( HPCC_Table ); + if (HPCC_PELock) shmem_free( HPCC_PELock ); + goto failed_table; + } + + for (i = 0; i < NumProcs; i++) + HPCC_PELock[i] = 0; + + /* Default number of global updates to table: 4x number of table entries */ + NumUpdates_Default = 4 * TableSize; + ProcNumUpdates = 4 * LocalTableSize; + NumUpdates = NumUpdates_Default; + + if (MyProc == 0) { + fprintf( outFile, "Running on %d processors\n", NumProcs); + fprintf( outFile, "Total Main table size = 2^%" PRIu64 " = %" PRIu64 " words\n", + logTableSize, TableSize ); + fprintf( outFile, "PE Main table size = (2^%" PRIu64 ")/%d = %" PRIu64 " words/PE MAX\n", + logTableSize, NumProcs, LocalTableSize); + + fprintf( outFile, "Default number of updates (RECOMMENDED) = %" PRIu64 "\n", NumUpdates_Default); + } + + /* Initialize main table */ + for (i=0; i 0.01*TableSize) Failure = 1; + } + /* End verification phase */ + + shmem_free( HPCC_Table ); + shmem_free( HPCC_PELock ); + failed_table: + + if (0 == MyProc) if (outFile != stderr) fclose( outFile ); + + shmem_barrier_all(); + + return 0; +} + +int main(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "hm:n:")) != -1) { + switch (op) { + /* + * memory per PE (used for determining table size) + */ + case 'm': + TotalMemOpt = atoll(optarg); + if (TotalMemOpt <= 0) { + print_usage(); + return -1; + } + break; + + /* + * num updates/PE + */ + case 'n': + NumUpdatesOpt = atoi(optarg); + if (NumUpdatesOpt <= 0) { + print_usage(); + return -1; + } + break; + + case '?': + case 'h': + print_usage(); + return -1; + } + } + + shmem_init(); + SHMEMRandomAccess(); + shmem_finalize(); + + return 0; +} diff --git a/test/apps/mandelbrot.c b/test/apps/mandelbrot.c new file mode 100644 index 0000000..0c2c112 --- /dev/null +++ b/test/apps/mandelbrot.c @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Mandelbrot set calculation using OpenSHMEM + * + * James Dinan and Mario Flajslik, "Contexts: A Mechanism for High Throughput + * Communication in OpenSHMEM." In Proc. 8th Intl. Conf. on Partitioned Global + * Address Space Programming Models (PGAS '14). DOI: 10.1145/2676870.2676872. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_PAPI +#include +#endif + +#include +/* For systems without the PThread barrier API (e.g. MacOS) */ +#include "pthread_barrier.h" + +#ifdef ENABLE_PINNING +// Number of CPUs on the machine +#define NUM_CPUS 12 +// Pin threads to every CPU_PIN_OFFSET core. Setting this to 2 allows for +// portals progress_thread to have a dedicated core +#define CPU_PIN_OFFSET 2 +#endif + +// MAX_ITERATIONS must be less than 65536 to satisfy pgm file format +#define MAX_ITERATIONS 1000 +// This PE prints stats and gather image data at the end +#define IMAGE_PE 0 + +// Default values for width and height +int width = 2048; +int height = 2048; + +// An interesting transition point is job_points*sizeof(int) being +// smaller/bigger than max_volatile size for Portals implementation +int job_points = 128; + +int me, npes; + +// Pointer to shmalloc-ed array for image data +int *imageData; +// Counter used for work load balancing +long nextPoint = 0; +// Stats +long sumTime = 0; +long sumWorkRate = 0; + +#ifdef ENABLE_PAPI +long sumL1_ICM = 0; +long sumL2_ICM = 0; +#endif + +pthread_barrier_t fencebar; + +// Parameters set on the command-line +int use_contexts = 1; +int use_pipelining = 1; +int use_blocking = 0; + +static long getTime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return tv.tv_sec*1e6 + tv.tv_usec; +} + +static void fileDump(void) { + int i, j; + FILE *fp; + fp = fopen("mandelbrot.pgm", "w"); + + if (NULL == fp) { + perror ("File open failed!"); + exit(1); + } + + fprintf(fp,"P2\n"); + fprintf(fp,"%d %d\n", width, height); + fprintf(fp,"%d\n", MAX_ITERATIONS); + + for (j = 0; j < height; j++) { + for (i = 0; i < width; i++) { + fprintf(fp,"%d ", imageData[i + j * width]); + } + fprintf(fp,"\n"); + } + + fclose(fp); +} + +static int computeSingle(int cx, int cy) { + + int i; + double x, y, x0, y0, x2, y2; + + // cx is in range [-2.5, 1.5] (x range = 4.0) + // cy is in range [-2.0, 2.0] (y range = 4.0) + + x0 = -2.5 + cx * (4.0 / width); + y0 = -2.0 + cy * (4.0 / height); + + x = 0; + y = 0; + x2 = x*x; + y2 = y*y; + + for (i = 0; (i < MAX_ITERATIONS) && (x2 + y2 < 4); i++) { + y = 2*x*y + y0; + x = x2-y2 + x0; + x2 = x*x; + y2 = y*y; + } + return MAX_ITERATIONS - i; +} + +struct th_arg{ + int tid; + shmem_ctx_t ctx[2]; + int cpu; +}; + +static void *thread_worker(void *arg) { + int tid = ((struct th_arg*)arg)->tid; + shmem_ctx_t *ctx = ((struct th_arg*)arg)->ctx; + int i, j; + long timer; + long work_start, work_end; + int *pixels[2]; + int rr_pe = me; // next PE in round-robin scheme + int pe_pending = npes; // number of PEs with work left + int *pe_mask; // flags indicating PEs with work left + int *pe_ct_max; // max work counter value for each PE + int index = 0; // index for comm/comp overlap + long total_work = 0; // total amound of work in this thread + +#ifdef ENABLE_PAPI + // we are tracking the instruction L1/L2 misses to show why a threaded implementation + // performs better than a pure PE implementation + long long counters[4]; + int PAPI_events[] = {PAPI_L1_ICM, PAPI_L2_ICM, + PAPI_L1_ICA, PAPI_L2_ICA}; +#endif +#ifdef ENABLE_PINNING + int my_cpu = ((struct th_arg*)arg)->cpu; + pthread_t thread = pthread_self(); + cpu_set_t cpuset; +#endif + +#ifdef ENABLE_PINNING + // Pin each thread + CPU_ZERO(&cpuset); + CPU_SET(my_cpu, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); +#endif + + // Malloc local (non-symmetric) buffers + pixels[0] = malloc(sizeof(int)*job_points); + pixels[1] = malloc(sizeof(int)*job_points); + pe_mask = malloc(sizeof(int)*npes); + pe_ct_max = malloc(sizeof(int)*npes); + + if (NULL == pixels[0] || NULL == pixels[1] || NULL == pe_mask || NULL == pe_ct_max) { + printf("%d, %d: Error, thread malloc failed\n", me, tid); + + if (pixels[0]) free(pixels[0]); + if (pixels[1]) free(pixels[1]); + if (pe_mask) free(pe_mask); + if (pe_ct_max) free(pe_ct_max); + + return NULL; + } + + // Initialize the PE work available flags + for (i = 0; i < npes; i++) pe_mask[i] = 1; + + // Pre-calculate max value for all remote counters + for (i = 0; i < npes; i++) { + if (i < npes-1) { + pe_ct_max[i] = (width*height / npes)*(i+1); + } + else { + pe_ct_max[i] = width*height; + } + } + + // Synchornize all thraeds on all PEs before starting work + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + +#ifdef ENABLE_PAPI + // Start PAPI cache miss counters + PAPI_start_counters(PAPI_events, 4); +#endif + + // Start work + timer = getTime(); + while(pe_pending) { + // Next round-robin PE + do { + rr_pe = (rr_pe + 1) % npes; + } while(!pe_mask[rr_pe]); + + work_start = shmem_ctx_long_atomic_fetch_add(ctx[index], &nextPoint, job_points, rr_pe); + work_end = work_start + job_points; + + // Check if all work at this PE has been done + if (work_start >= pe_ct_max[rr_pe]) { // No more work at this PE + pe_pending--; + pe_mask[rr_pe] = 0; + continue; + } + + if (work_end >= pe_ct_max[rr_pe]) { // This is the last bit of work on this PE + work_end = pe_ct_max[rr_pe]; + pe_pending--; + pe_mask[rr_pe] = 0; + } + + if (!use_blocking) + shmem_ctx_quiet(ctx[index]); + + // Do actual compute work + for (i = work_start, j = 0; i < work_end; i++, j++) { + pixels[index][j] = computeSingle(i%width, i/width); + } + + // Return the computed image data to the PE responsible for it + if (use_blocking) + shmem_ctx_putmem(ctx[index], &imageData[work_start], pixels[index], + (work_end-work_start)*sizeof(int), rr_pe); + else + shmem_ctx_putmem_nbi(ctx[index], &imageData[work_start], pixels[index], + (work_end-work_start)*sizeof(int), rr_pe); + + total_work += work_end - work_start; + + if (use_pipelining) + index ^= 1; + } + shmem_quiet(); + timer = getTime() - timer; + + // send stats data to IMAGE_PE + shmem_long_atomic_add(&sumTime, timer, IMAGE_PE); + shmem_long_atomic_add(&sumWorkRate, (long)(total_work / ((double)timer / 1e6) + 0.5), IMAGE_PE); + +#ifdef ENABLE_PAPI + // Read PAPI cache miss counters + PAPI_read_counters(counters, 4); + shmem_long_atomic_add(&sumL1_ICM, counters[0]*1e6/counters[2], IMAGE_PE); + shmem_long_atomic_add(&sumL2_ICM, counters[1]*1e6/counters[3], IMAGE_PE); +#endif + + free(pixels[0]); + free(pixels[1]); + free(pe_mask); + free(pe_ct_max); + + return NULL; +} + +static void printUsage(void) { + printf("USAGE: mandelbrot [options]\n"); + printf(" -t number of worker threads (def: 1)\n"); + printf(" -w width of the mandelbrot domain (def: 2048)\n"); + printf(" -w height of the mandelbrot domain (def: 2048)\n"); + printf(" -j load balancing granularity (def: 128)\n"); + printf(" -o output image mandelbrot.pgm (def: off)\n"); + printf(" -c use OpenSHMEM contexts (def: on)\n"); + printf(" -p enable pipelining (implies -c) (def: on)\n"); + printf(" -b use blocking communication (def: off)\n"); + printf(" -? prints this message\n"); +} + +int main(int argc, char** argv) { + int tl, i; + int c; + int num_threads = 1; + int out_file = 0; + pthread_t *threads; + struct th_arg *t_arg; + +#ifdef ENABLE_PINNING + int p4_cpu; + pthread_t thread = pthread_self(); + cpu_set_t cpuset; +#endif + +#ifdef ENABLE_PINNING + // Must pin the main thread here before calling PtlInit() inside shmem_init(). + // This is the way we control the pinning of Portals4 progress_thread. + // see portals4 configuration flag: --enable-progress-thread-polling + //p4_cpu = CPU_PIN_OFFSET*(getpid()%(NUM_CPUS/CPU_PIN_OFFSET)); + p4_cpu = 3; + CPU_ZERO(&cpuset); + CPU_SET(p4_cpu, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); +#endif + + while ((c = getopt (argc, argv, "?copbt:w:h:j:")) != -1) { + switch(c) { + case 't': + num_threads = atoi(optarg); + break; + case 'o': + out_file = 1; + break; + case 'c': + use_contexts = 1; + break; + case 'p': + use_contexts = 1; + use_pipelining = 1; + break; + case 'b': + use_blocking = 1; + break; + case 'w': + width = atoi(optarg); + break; + case 'h': + height = atoi(optarg); + break; + case 'j': + job_points = atoi(optarg); + break; + case '?': + printUsage(); + return 0; + default: + printUsage(); + return 1; + } + } + +#ifdef ENABLE_PAPI + // Init PAPI + PAPI_library_init(PAPI_VER_CURRENT); + // This is a little sketcy since pthread_self is not guaranteed to return an integer, but it works. + if (num_threads > 1 && PAPI_thread_init(pthread_self) != PAPI_OK) { + printf("ERROR: PAPI thread init failed\n"); + return 1; + } +#endif + + // Allocate local memory (non-symmetric) + t_arg = malloc(sizeof(struct th_arg)*num_threads); + threads = malloc(sizeof(pthread_t)*num_threads); + + if (NULL == t_arg || NULL == threads) { + printf("ERROR: malloc failed\n"); + return 1; + } + + // Start SHMEM (allso calls PtlInit()) + if (num_threads > 1) { + shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + // If OpenSHMEM doesn't support multithreading, exit gracefully + if (SHMEM_THREAD_MULTIPLE != tl) { + printf("Warning: Exiting because threading is disabled, tested nothing\n"); + shmem_global_exit(0); + return 0; + } + } + else { + shmem_init_thread(SHMEM_THREAD_SINGLE, &tl); + } + + // Allocate symmtric memory for image data + // For shmalloc of more than 512MB, set SMA_SYMMETRIC_SIZE to increase symmetric heap size. + imageData = shmem_malloc(sizeof(int)*width*height); + if (NULL == imageData) { + printf("ERROR: shmem_malloc failed\n"); + return 1; + } + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + // Divide work balancing counter between all PEs + nextPoint = (width*height / npes) * me; + + // Initalize barrier for thread synchronization inside PE + pthread_barrier_init(&fencebar, NULL, num_threads); + + shmem_barrier_all(); + + // Ready to go... + if (me == IMAGE_PE) printf("Starting benchmark on %d PEs, %d threads/PE\n", npes, num_threads); + + // Initialize worker threads + for (i = 0; i < num_threads; i++) { + int err; + t_arg[i].tid = i; + if (1 == use_contexts) { + shmem_ctx_create(0, &t_arg[i].ctx[0]); + + if (use_pipelining) + shmem_ctx_create(0, &t_arg[i].ctx[1]); + else + t_arg[i].ctx[1] = t_arg[i].ctx[0]; + } + else { + t_arg[i].ctx[0] = SHMEM_CTX_DEFAULT; + t_arg[i].ctx[1] = SHMEM_CTX_DEFAULT; + } +#ifdef ENABLE_PINNING + t_arg[i].cpu = (p4_cpu+i+1)%NUM_CPUS; +#else + t_arg[i].cpu = -1; +#endif + + err = pthread_create(&threads[i], NULL, thread_worker, (void*) &t_arg[i]); + assert(0 == err); + } + + // Wait for local threads to finish work + for (i = 0; i < num_threads; i++) { + int err; + err = pthread_join(threads[i], NULL); + assert(0 == err); + } + + // Wait for all PEs to finish work + shmem_barrier_all(); + + // Collect all image data on IMAGE_PE and dump it to a file + if (1 == out_file) { + if (me != IMAGE_PE) { + if (me < npes-1) { + shmem_putmem(&imageData[(width*height / npes) * me], + &imageData[(width*height / npes) * me], + (width*height / npes)*sizeof(int), IMAGE_PE); + } + else { + shmem_putmem(&imageData[(width*height / npes) * me], + &imageData[(width*height / npes) * me], + (width*height - (width*height / npes) * me)*sizeof(int), IMAGE_PE); + } + } + shmem_barrier_all(); + if (me == IMAGE_PE) { + fileDump(); + } + } + + // Print stats + if (me == IMAGE_PE) { +#ifdef ENABLE_PAPI + printf("Average thread L1 instruction misses(%%): %f\n", (double)sumL1_ICM/(npes*num_threads)/1e4); + printf("Average thread L2 instruction misses(%%): %f\n", (double)sumL2_ICM/(npes*num_threads)/1e4); +#endif + printf("Total cumulative runtime (sec) : %f\n", (double)sumTime/1e6); + printf("Average thread work rate (points/sec) : %f\n", (double)sumWorkRate/(npes*num_threads)); + printf("Average thread work runtime (sec) : %f\n", ((double)sumTime/1e6)/(npes*num_threads)); + printf("Total work rate (points/sec) : %e\n", + width*height/(((double)sumTime/1e6)/(npes*num_threads))); + } + + // Cleanup + if (use_contexts) { + for (i = 0; i < num_threads; i++) + shmem_ctx_destroy(t_arg[i].ctx[0]); + + if (use_pipelining) { + for (i = 0; i < num_threads; i++) + shmem_ctx_destroy(t_arg[i].ctx[1]); + } + } + + pthread_barrier_destroy(&fencebar); + shmem_free(imageData); + free(t_arg); + free(threads); + shmem_finalize(); + return 0; +} diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am new file mode 100644 index 0000000..575cfd3 --- /dev/null +++ b/test/performance/Makefile.am @@ -0,0 +1,10 @@ +# -*- Makefile -*- +# +# Copyright (c) 2017 Intel Corporation. All rights reserved. +# This software is available to you under the BSD license. +# +# This file is part of the Sandia OpenSHMEM software package. For license +# information, see the LICENSE file in the top level directory of the +# distribution. + +SUBDIRS = shmem_perf_suite tests diff --git a/test/performance/shmem_perf_suite/Makefile.am b/test/performance/shmem_perf_suite/Makefile.am new file mode 100644 index 0000000..38e0866 --- /dev/null +++ b/test/performance/shmem_perf_suite/Makefile.am @@ -0,0 +1,86 @@ +# -*- Makefile -*- +# +# Copyright (c) 2018 Intel Corporation. All rights reserved. +# This software is available to you under the BSD license. +# +# This file is part of the Sandia OpenSHMEM software package. For license +# information, see the LICENSE file in the top level directory of the +# distribution. + +check_PROGRAMS = \ + shmem_latency_put_perf \ + shmem_latency_get_perf \ + shmem_bw_put_perf \ + shmem_bw_put_perf_nb \ + shmem_bibw_put_perf \ + shmem_bibw_put_perf_nb \ + shmem_bw_get_perf \ + shmem_bw_get_perf_nb \ + shmem_bibw_get_perf \ + shmem_bibw_get_perf_nb \ + shmem_latency_nb_put_perf \ + shmem_latency_nb_get_perf \ + shmem_bw_atomics_perf \ + shmem_bibw_atomics_perf + +if HAVE_OPENMP +check_PROGRAMS += \ + shmem_bw_put_ctx_perf \ + shmem_bw_put_ctx_perf_nb \ + shmem_bibw_put_ctx_perf \ + shmem_bibw_put_ctx_perf_nb +endif + +noinst_HEADERS = \ + common.h \ + latency_common.h \ + round_t_latency.h \ + int_element_latency.h \ + bw_common.h \ + uni_dir.h \ + bi_dir.h \ + target_put.h + +if ENABLE_LENGTHY_TESTS +TESTS = $(check_PROGRAMS) +endif + +NPROCS ?= 2 +LOG_COMPILER = $(TEST_RUNNER) + +AM_LDFLAGS = $(LIBTOOL_WRAPPER_LDFLAGS) + +if EXTERNAL_TESTS +bin_PROGRAMS = $(check_PROGRAMS) +AM_CPPFLAGS = +LDADD = +else +AM_CPPFLAGS = -I$(top_builddir)/mpp +LDADD = $(top_builddir)/src/libsma.la +endif + +if USE_PMI_SIMPLE +LDADD += $(top_builddir)/pmi-simple/libpmi_simple.la +endif + +shmem_bw_put_perf_nb_SOURCES = shmem_bw_put_perf.c +shmem_bw_put_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API + +shmem_bw_get_perf_nb_SOURCES = shmem_bw_get_perf.c +shmem_bw_get_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API + +shmem_bibw_put_perf_nb_SOURCES = shmem_bibw_put_perf.c +shmem_bibw_put_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API + +shmem_bibw_get_perf_nb_SOURCES = shmem_bibw_get_perf.c +shmem_bibw_get_perf_nb_CFLAGS = -DUSE_NONBLOCKING_API + +shmem_bw_put_ctx_perf_CFLAGS = $(AM_OPENMP_CFLAGS) + +shmem_bw_put_ctx_perf_nb_SOURCES = shmem_bw_put_ctx_perf.c +shmem_bw_put_ctx_perf_nb_CFLAGS = $(AM_OPENMP_CFLAGS) -DUSE_NONBLOCKING_API + +shmem_bibw_put_ctx_perf_CFLAGS = $(AM_OPENMP_CFLAGS) + +shmem_bibw_put_ctx_perf_nb_SOURCES = shmem_bibw_put_ctx_perf.c +shmem_bibw_put_ctx_perf_nb_CFLAGS = $(AM_OPENMP_CFLAGS) -DUSE_NONBLOCKING_API diff --git a/test/performance/shmem_perf_suite/README b/test/performance/shmem_perf_suite/README new file mode 100644 index 0000000..b760725 --- /dev/null +++ b/test/performance/shmem_perf_suite/README @@ -0,0 +1,82 @@ +=============================================================================== + + User Manual: Latency and Bandwidth Performance Test Suite + +=============================================================================== +includes: + shmem_bw_put_perf + shmem_bw_get_perf + shmem_bw_atomics_perf + shmem_bw_put_ctx_perf + shmem_bibw_put_perf + shmem_bibw_get_perf + shmem_bibw_atomics_perf + shmem_bibw_put_ctx_perf + shmem_latency_put_perf + shmem_latency_get_perf + shmem_latency_nb_put_perf + shmem_latency_nb_get_perf + +Notes for Users: + + Bandwidth tests: only even set of PEs can run the tests, assumed 2-node model with + multiple processors per node: half of PE's comes from one node and + other half from partner node + + calculation parameters: runs for x numbers of iterations, over varying input + specified data size, for 64 back to back operations (window size = 64) + + bw tests: uni-direction bw tests + (half of PE set does put/get to other half) + bibw tests: bi-direction bw test + (PE pairs put to each other simultaneously but to different buffers) + + Latency tests: two-nodes only + 1) runs a single long element (shmem_long_p/g) round-trip communication + scheme for x number of iterations + 2) for x numbers of iterations, over varying input specified data size + range, latency calculation + + Input Parameters: + -e : end length (power of two) DEFAULT: 8MB + -s : start length (power of two) DEFAULT: 1B + -n : number of trials (must be greater than 20 (warmup size)) DEFAULT: 100 + -v : validate input stream used for performance data (off by default) + BW only: + -k : output in KB + -b : output in B + -T : number of threads, DEFAULT: 1 + -C : thread level (SINGLE, FUNNELED, SERIALIZED, MULTIPLE), DEFAULT: SINGLE + +Notes for performance tests developers: + overall model: + development assumed that the initialization of data, calculating and printing of + output, as well as input from user is constant and reusable between put/get + performance tests. Thus header files define resuable items amongst .c + performance files. The scope of reuse (i.e. between latency or bw files) is + specified by header name, common.h being used amongst all the files. The + unique aspect to the performance files, gathering the actual timing data + from a communication pattern is implemented in each individual .c performance + file (thus they implement the header extern bw/latency function call that + plugs into the rest of the framework). + + NOTE: data is automatically initialized for latency/bw functions that are implemented + in the .c files, they are initialized with the following assumptions + assumed that the uni direction test only needs a single symmetric buffer of + max length to use for uni_dir_bw implementation, bi-direction, assumes + two unique symmetric buffers, latency one element assumes single long symmetric + element data needed, streaming latency assumes single symmetric buffer + -all buffers are initialized with my_pe, and are validating by checking that they + contain partners PE number, expect the latency one element function + must be initialized per latency implementation (in .c file) + + common.h: functions used in both latency/bw tests + latency_common.h: the init path is assumed to be reusable and static amongst tests + only the latency calculation is assumed to be unique between tests and thus + is implemented in each shmem_latency....c file + latency_common.h: the init path is assumed to be reusable and static amongst tests + only the latency timing is assumed to be unique between tests and thus + is implemented in each shmem_latency....c file + bw_common.h: the init path is assumed to be reusable and static amongst tests + only the bw timing is assumed to be unique between tests and thus + is implemented in each shmem_bw....c file diff --git a/test/performance/shmem_perf_suite/bi_dir.h b/test/performance/shmem_perf_suite/bi_dir.h new file mode 100644 index 0000000..fda8119 --- /dev/null +++ b/test/performance/shmem_perf_suite/bi_dir.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +void static inline bi_bw_put(int len, perf_metrics_t *metric_info) +{ + double start = 0.0, end = 0.0; + int dest = partner_node(*metric_info); + int i = 0, j = 0; + + shmem_barrier_all(); + + for (i = 0; i < metric_info->warmup; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_putmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_putmem(metric_info->dest, metric_info->src, len, dest); +#endif + } + shmem_quiet(); + } + + shmem_barrier_all(); + if (streaming_node(*metric_info)) { + start = perf_shmemx_wtime(); + } + + for (i = 0; i < metric_info->trials; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_putmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_putmem(metric_info->dest, metric_info->src, len, dest); +#endif + } + shmem_quiet(); + } + + shmem_barrier_all(); + if (streaming_node(*metric_info)) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, *metric_info); + } +} + +void static inline bi_bw_get(int len, perf_metrics_t *metric_info) +{ + double start = 0.0, end = 0.0; + int dest = partner_node(*metric_info); + int i = 0, j = 0; + + shmem_barrier_all(); + + for (i = 0; i < metric_info->warmup; i++) { + for(j = 0; j < metric_info->window_size; j++) { + /* Choosing to skip quiet for both blocking and non-blocking getmem + * as this sequence of operation (writing to the same location) is + * currently undefined by the OpenSHMEM Spec. */ +#ifdef USE_NONBLOCKING_API + shmem_getmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_getmem(metric_info->dest, metric_info->src, len, dest); +#endif + } +#ifdef USE_NONBLOCKING_API + shmem_quiet(); +#endif + } + + shmem_barrier_all(); + if (streaming_node(*metric_info)) { + start = perf_shmemx_wtime(); + } + + for (i = 0; i < metric_info->trials; i++) { + for(j = 0; j < metric_info->window_size; j++) { + /* Choosing to skip quiet for both blocking and non-blocking getmem + * as this sequence of operation (writing to the same location) is + * currently undefined by the OpenSHMEM Spec. */ +#ifdef USE_NONBLOCKING_API + shmem_getmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_getmem(metric_info->dest, metric_info->src, len, dest); +#endif + } +#ifdef USE_NONBLOCKING_API + shmem_quiet(); +#endif + } + + shmem_barrier_all(); + if (streaming_node(*metric_info)) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, *metric_info); + } +} + diff --git a/test/performance/shmem_perf_suite/bi_dir_ctx.h b/test/performance/shmem_perf_suite/bi_dir_ctx.h new file mode 100644 index 0000000..7043b38 --- /dev/null +++ b/test/performance/shmem_perf_suite/bi_dir_ctx.h @@ -0,0 +1,122 @@ +/* +* Copyright (c) 2018 Intel Corporation. All rights reserved. +* This software is available to you under the BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + + +void static inline bi_bw_ctx (int len, perf_metrics_t *metric_info) +{ + double start = 0.0, end = 0.0; + int dest = partner_node(*metric_info); + int j = 0; + char *src = aligned_buffer_alloc(metric_info->nthreads * len); + char *dst = aligned_buffer_alloc(metric_info->nthreads * len); + assert(src && dst); + + shmem_barrier_all(); + +#pragma omp parallel default(none) firstprivate(len, dest) private(j) \ + shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads) + { + int i; + const int thread_id = omp_get_thread_num(); + shmem_ctx_t ctx; + shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + + for (i = 0; i < metric_info->warmup; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#else + shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#endif + } + shmem_ctx_quiet(ctx); + } + shmem_ctx_destroy(ctx); + } + + shmem_barrier_all(); + if (streaming_node(*metric_info)) { +#pragma omp parallel default(none) firstprivate(len, dest) private(j) \ + shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads) + { + int i; + const int thread_id = omp_get_thread_num(); + shmem_ctx_t ctx; + shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + +#pragma omp barrier +#pragma omp master + { + start = perf_shmemx_wtime(); + } + for (i = 0; i < metric_info->trials; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#else + shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#endif + } + shmem_ctx_quiet(ctx); + } + shmem_ctx_destroy(ctx); + } + } else { +#pragma omp parallel default(none) firstprivate(len, dest) private(j) \ + shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads) + { + int i; + const int thread_id = omp_get_thread_num(); + shmem_ctx_t ctx; + shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + + for (i = 0; i < metric_info->trials; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#else + shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#endif + } + shmem_ctx_quiet(ctx); + } + shmem_ctx_destroy(ctx); + } + } + + shmem_barrier_all(); + if (streaming_node(*metric_info)) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, *metric_info); + } + + shmem_barrier_all(); + + aligned_buffer_free(src); + aligned_buffer_free(dst); + +} diff --git a/test/performance/shmem_perf_suite/bw_common.h b/test/performance/shmem_perf_suite/bw_common.h new file mode 100644 index 0000000..12beef8 --- /dev/null +++ b/test/performance/shmem_perf_suite/bw_common.h @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#ifdef ENABLE_OPENMP +#include +#endif + +#define MAX_MSG_SIZE (1<<23) +#define START_LEN 1 + +#define INC 2 +#define TRIALS 500 +#define WINDOW_SIZE 64 +#define WARMUP 50 + +#define TRIALS_LARGE 100 +#define WINDOW_SIZE_LARGE 64 +#define WARMUP_LARGE 10 +#define LARGE_MESSAGE_SIZE 8192 + +#define TARGET_SZ_MIN 8 +#define TARGET_SZ_MAX 4096 + +/*atomics common */ +#define ATOMICS_N_DTs 3 +/*note: ignoring cswap/swap for now in verification */ +#define ATOMICS_N_OPs 4 +/*PE 0 is printing its latency, thus have it not be the INCAST PE*/ +#define INCAST_PE 1 + +typedef enum { + UNI_DIR, + BI_DIR, +} bw_type; + +typedef enum { + STYLE_PUT, + STYLE_GET, + STYLE_RMA, + STYLE_ATOMIC +} bw_style; + +typedef enum { + FIRST_HALF, + SECOND_HALF, + FULL_SET +} red_PE_set; + +typedef enum { + COMM_PAIRWISE, + COMM_INCAST +} comm_style; + +typedef enum { + B, + KB, + MB +} bw_units; + +typedef struct perf_metrics { + unsigned long int start_len, max_len; + unsigned long int size_inc, trials; + unsigned long int window_size, warmup; + int validate; + int target_data; + int my_node, num_pes, sztarget, szinitiator, midpt; + bw_units unit; + char *src, *dest; + const char *bw_type; + bw_type type; + comm_style cstyle; + bw_style bwstyle; + int thread_safety; + int nthreads; +} perf_metrics_t; + +long red_psync[SHMEM_REDUCE_SYNC_SIZE]; + +/*default settings if no input is provided */ +void static data_set_defaults(perf_metrics_t * data) { + data->start_len = START_LEN; + data->max_len = MAX_MSG_SIZE; + data->size_inc = INC; + data->trials = TRIALS; + data->window_size = WINDOW_SIZE; /*back-to-back msg stream*/ + data->warmup = WARMUP; /*number of initial iterations to skip*/ + data->unit = MB; + data->validate = false; + data->target_data = false; + data->my_node = -1; + data->num_pes = -1; + data->midpt = -1; + data->sztarget = -1; + data->szinitiator = -1; + data->src = NULL; + data->dest = NULL; + data->cstyle = COMM_PAIRWISE; + data->bwstyle = STYLE_RMA; + data->thread_safety = SHMEM_THREAD_SINGLE; + data->nthreads = 1; +} + +static int error_checking_init_target_usage(perf_metrics_t *metric_info) { + int error = false; + assert(metric_info->midpt > 0); + + if(metric_info->sztarget != -1 && metric_info->szinitiator != -1) + error = true; /* can't use them together */ + + if(metric_info->sztarget != -1) { + if(metric_info->sztarget < 1 || metric_info->sztarget > metric_info->midpt + || !metric_info->target_data) + error = true; + } else { + metric_info->sztarget = metric_info->midpt; + } + + if(metric_info->szinitiator != -1) { + if(metric_info->szinitiator < 1 || metric_info->szinitiator > metric_info->midpt + || !metric_info->target_data) + error = true; + } else { + metric_info->szinitiator = metric_info->midpt; + } + + if(error) { + fprintf(stderr, "invalid usage of command line arg -r/-l, use --help for info\n"); + return -1; + } + return 0; +} + +/* must use shmem_init beforehand */ +static int data_runtime_update(perf_metrics_t *data) { + data->my_node = shmem_my_pe(); + data->num_pes = shmem_n_pes(); + assert(data->num_pes); + data->midpt = data->num_pes/2; + return error_checking_init_target_usage(data); +} + +static const char * dt_names [] = { "int", "long", "longlong" }; + +void static bi_dir_data_init(perf_metrics_t * data) { + data->bw_type = "Bi-directional Bandwidth"; + data->type = BI_DIR; +} + +void static uni_dir_data_init(perf_metrics_t * data) { + data->bw_type = "Uni-directional Bandwidth"; + data->type = UNI_DIR; +} + + +int static inline partner_node(perf_metrics_t my_info) +{ + if(my_info.num_pes == 1) + return 0; + + if(my_info.cstyle == COMM_PAIRWISE) { + int pairs = my_info.midpt; + + return (my_info.my_node < pairs ? (my_info.my_node + pairs) : + (my_info.my_node - pairs)); + } else { + assert(my_info.cstyle == COMM_INCAST); + return INCAST_PE; + } +} + +int static inline streaming_node(perf_metrics_t my_info) +{ + if(my_info.cstyle == COMM_PAIRWISE) { + return (my_info.my_node < my_info.szinitiator); + } else { + assert(my_info.cstyle == COMM_INCAST); + return true; + } +} + +int static inline target_node(perf_metrics_t my_info) +{ + return (my_info.my_node >= my_info.midpt && + (my_info.my_node < (my_info.midpt + my_info.sztarget))); +} + +/* put/get bw use opposite streaming/validate nodes */ +red_PE_set static inline validation_set(perf_metrics_t my_info, int *nPEs) +{ + if(my_info.cstyle == COMM_PAIRWISE) { + if(streaming_node(my_info)) { + *nPEs = my_info.szinitiator; + return FIRST_HALF; + } else if(target_node(my_info)) { + *nPEs = my_info.sztarget; + return SECOND_HALF; + } else { + fprintf(stderr, "Warning: you are getting data from a node that " + "wasn't a part of the perf set \n "); + return 0; + } + } else { + assert(my_info.cstyle == COMM_INCAST); + *nPEs = my_info.num_pes; + return FULL_SET; + } +} + +/**************************************************************/ +/* Input Checking */ +/**************************************************************/ + +static int command_line_arg_check(int argc, char *argv[], + perf_metrics_t *metric_info) { + int ch, error = false; + extern char *optarg; + + /* check command line args */ + while ((ch = getopt(argc, argv, "e:s:n:w:p:r:l:kbvtC:T:")) != EOF) { + switch (ch) { + case 's': + metric_info->start_len = strtoul(optarg, (char **)NULL, 0); + if ( metric_info->start_len < 1 ) metric_info->start_len = 1; + if(!is_pow_of_2(metric_info->start_len)) { + fprintf(stderr, "Error: start_length must be a power of two\n"); + error = true; + } + break; + case 'e': + metric_info->max_len = strtoul(optarg, (char **)NULL, 0); + if(!is_pow_of_2(metric_info->max_len)) { + fprintf(stderr, "Error: end_length must be a power of two\n"); + error = true; + } + if(metric_info->max_len < metric_info->start_len) { + fprintf(stderr, "Error: end_length (%ld) must be >= " + "start_length (%ld)\n", metric_info->max_len, + metric_info->start_len); + error = true; + } + break; + case 'n': + metric_info->trials = strtoul(optarg, (char **)NULL, 0); + if(metric_info->trials < (metric_info->warmup*2)) { + fprintf(stderr, "Error: trials (%ld) must be >= 2*warmup " + "(%ld)\n", metric_info->trials, metric_info->warmup*2); + error = true; + } + break; + case 'p': + metric_info->warmup = strtoul(optarg, (char **)NULL, 0); + if(metric_info->warmup > (metric_info->trials/2)) { + fprintf(stderr, "Error: warmup (%ld) must be <= trials/2 " + "(%ld)\n", metric_info->warmup, metric_info->trials/2); + error = true; + } + break; + case 'k': + metric_info->unit = KB; + break; + case 'b': + metric_info->unit = B; + break; + case 'v': + metric_info->validate = true; + if(metric_info->target_data) error = true; + break; + case 'w': + metric_info->window_size = strtoul(optarg, (char **)NULL, 0); + if(metric_info->target_data) error = true; + break; + case 't': + metric_info->target_data = true; + metric_info->window_size = 1; + if(metric_info->validate) error = true; + break; + case 'r': + metric_info->sztarget = strtoul(optarg, (char **)NULL, 0); + break; + case 'l': + metric_info->szinitiator = strtoul(optarg, (char **)NULL, 0); + break; + case 'C': + if (strcmp(optarg, "SINGLE") == 0) { + metric_info->thread_safety = SHMEM_THREAD_SINGLE; + } else if (strcmp(optarg, "FUNNELED") == 0) { + metric_info->thread_safety = SHMEM_THREAD_FUNNELED; + } else if (strcmp(optarg, "SERIALIZED") == 0) { + metric_info->thread_safety = SHMEM_THREAD_SERIALIZED; + } else if (strcmp(optarg, "MULTIPLE") == 0) { + metric_info->thread_safety = SHMEM_THREAD_MULTIPLE; + } else { + fprintf(stderr, "Invalid threading level: \"%s\"\n", optarg); + error = true; + } + break; + case 'T': + metric_info->nthreads = atoi(optarg); + break; + default: + error = true; + break; + } + } + + /* filling in 8/4KB chunks into array alloc'd to max_len */ + if(metric_info->target_data) { + metric_info->start_len = TARGET_SZ_MIN; + if((metric_info->max_len < + ((metric_info->trials + metric_info->warmup) * TARGET_SZ_MIN)) || + (metric_info->max_len < + ((metric_info->trials + metric_info->warmup) * TARGET_SZ_MAX))) { + error = true; + } + } + + if (error) { + if (metric_info->my_node == 0) { + fprintf(stderr, "Usage: \n[-s start_length] [-e end_length] " + ": lengths should be a power of two \n" + "[-n trials (must be greater than 2*warmup (default: x => 100))] \n" + "[-p warm-up (see trials for value restriction)] \n" + "[-w window size - iterations between completion, cannot use with -t] \n" + "[-k (kilobytes/second)] [-b (bytes/second)] \n" + "[-v (validate data stream)] \n" + "[-t output data for target side (default is initiator," + " only use with put_bw),\n cannot be used in conjunction " + "with validate, special sizes used, \ntrials" + " + warmup * sizes (8/4KB) <= max length \n" + "[-r number of nodes at target, use only with -t] \n" + "[-l number of nodes at initiator, use only with -t, " + "l/r cannot be used together] \n" + "[-C thread-safety-config: SINGLE, FUNNELED, SERIALIZED, or MULTIPLE] \n" + "[-T num-threads] \n"); + } + return -1; + } + return 0; +} + +static inline int only_even_PEs_check(int my_node, int num_pes) { + if (num_pes % 2 != 0) { + if (my_node == 0) { + fprintf(stderr, "Only even number of nodes can be used\n"); + } + return 77; + } else + return 0; +} + +/**************************************************************/ +/* Result Printing and Calc */ +/**************************************************************/ + +static const char *thread_safety_str(perf_metrics_t *metric_info) { + if (metric_info->thread_safety == SHMEM_THREAD_SINGLE) { + return "SINGLE"; + } else if (metric_info->thread_safety == SHMEM_THREAD_FUNNELED) { + return "FUNNELED"; + } else if (metric_info->thread_safety == SHMEM_THREAD_SERIALIZED) { + return "SERIALIZED"; + } else if (metric_info->thread_safety == SHMEM_THREAD_MULTIPLE) { + return "MULTIPLE"; + } else { + fprintf(stderr, "Unexpected thread safety value: %d. Setting it to SINGLE\n", metric_info->thread_safety); + metric_info->thread_safety = SHMEM_THREAD_SINGLE; + return "SINGLE"; + } +} + +static void inline thread_safety_validation_check(perf_metrics_t *metric_info) { + if (metric_info->nthreads == 1) + return; + else { + if (metric_info->thread_safety != SHMEM_THREAD_MULTIPLE) { + if(metric_info->my_node == 0) { + fprintf(stderr, "Warning: argument \"-T %d\" is ignored because of the thread level specified." + " Switching to single thread with thread safety %s\n", metric_info->nthreads, + thread_safety_str(metric_info)); + } + metric_info->nthreads = 1; + } + return; + } +} + +void static print_atomic_results_header(perf_metrics_t metric_info) { + printf("\nResults for %d PEs %lu trials with window size %lu ", + metric_info.num_pes, metric_info.trials, metric_info.window_size); + + if (metric_info.cstyle == COMM_INCAST) { + printf("using incast communication style\n"); + } else { + assert(metric_info.cstyle == COMM_PAIRWISE); + printf("using pairwise communication style\n"); + } + + printf("\nOperation %s " + "Message Rate%17sLatency\n", metric_info.bw_type, " "); + + if (metric_info.unit == MB) { + printf("%19s in megabytes per second"," "); + } else if (metric_info.unit == KB) { + printf("%19s in kilobytes per second", " "); + } else { + printf("%19s in bytes per second", " "); + } + + printf(" in Million ops/second%8sin microseconds\n", " "); + + /* hack */ + printf("shmem_add\n"); +} + +void static print_results_header(perf_metrics_t metric_info) { + printf("\nResults for %d PEs %lu trials with window size %lu " + "max message size %lu with multiple of %lu increments, " + "\ntargeting %d remote PEs initiated from %d PEs", metric_info.num_pes, + metric_info.trials, metric_info.window_size, metric_info.max_len, + metric_info.size_inc, metric_info.sztarget, metric_info.szinitiator); + printf(", thread safety %s (%d threads)\n", + thread_safety_str(&metric_info), metric_info.nthreads); + printf("\nLength %s " + "Message Rate\n", metric_info.bw_type); + + printf("in bytes "); + if (metric_info.unit == MB) { + printf("in megabytes per second"); + } else if (metric_info.unit == KB) { + printf("in kilobytes per second"); + } else { + printf("in bytes per second"); + } + + printf(" in messages/seconds\n"); +} + +void static print_data_results(double bw, double mr, perf_metrics_t data, + int len, double total_t) { + static int atomic_type_index = 0; + + if(data.target_data) { + if(data.my_node < data.midpt) { + printf("initiator:\n"); + } else { + printf("target:\n"); + } + } + + if (data.bwstyle == STYLE_ATOMIC) { + printf("%-10s ", dt_names[atomic_type_index]); + atomic_type_index = (atomic_type_index + 1) % ATOMICS_N_DTs; + } else + printf("%9d ", len); + + if(data.unit == KB) { + bw = bw * 1.0e3; + } else if(data.unit == B) { + bw = bw * 1.0e6; + } + + if (data.bwstyle == STYLE_ATOMIC) { + printf("%5s%10.2f %10.2f%14s%10.2f\n", " ", bw, + mr/1.0e6, " ", total_t/(data.trials * data.window_size)); + } else + printf("%10.2f %10.2f\n", bw, mr); +} + + +/* reduction to collect performance results from PE set + then start_pe will print results --- assumes num_pes is even */ +void static inline PE_set_used_adjustments(int *nPEs, int *stride, int *start_pe, + perf_metrics_t my_info) +{ + red_PE_set PE_set = validation_set(my_info, nPEs); + + if(PE_set == FIRST_HALF || PE_set == FULL_SET) { + *start_pe = 0; + } + else { + assert(PE_set == SECOND_HALF); + *start_pe = my_info.midpt; + } + + *stride = 0; /* back to back PEs */ +} + + +void static inline calc_and_print_results(double total_t, int len, + perf_metrics_t metric_info) +{ + int stride = 0, start_pe = 0, nPEs = 0; + static double pe_bw_sum, bw = 0.0; /*must be symmetric for reduction*/ + double pe_bw_avg = 0.0, pe_mr_avg = 0.0; + int nred_elements = 1; + static double pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + + PE_set_used_adjustments(&nPEs, &stride, &start_pe, metric_info); + + if (total_t > 0 ) { + +#ifdef ENABLE_OPENMP + bw = (len / 1.0e6 * metric_info.window_size * metric_info.trials * + (double)metric_info.nthreads) / (total_t / 1.0e6); +#else + bw = (len / 1.0e6 * metric_info.window_size * metric_info.trials) / + (total_t / 1.0e6); +#endif + } + + /* 2x as many messages/bytes at once for bi-directional */ + if(metric_info.type == BI_DIR) + bw *= 2.0; + + /* base case: will be overwritten by collective if num_pes > 2 */ + pe_bw_sum = bw; + + if(nPEs >= 2) + shmem_double_sum_to_all(&pe_bw_sum, &bw, nred_elements, start_pe, + stride, nPEs, pwrk, + red_psync); + + /* aggregate bw since bw op pairs are communicating simultaneously */ + if(metric_info.my_node == start_pe) { + pe_bw_avg = pe_bw_sum; + pe_mr_avg = pe_bw_avg / (len / 1.0e6); + print_data_results(pe_bw_avg, pe_mr_avg, metric_info, len, total_t); + } +} + +void static inline large_message_metric_chg(perf_metrics_t *metric_info, int len) { + if(len > LARGE_MESSAGE_SIZE) { + metric_info->window_size = WINDOW_SIZE_LARGE; + metric_info->trials = TRIALS_LARGE; + metric_info->warmup = WARMUP_LARGE; + } +} + +static void validate_atomics(perf_metrics_t m_info) { + int snode = streaming_node(m_info); + int * my_buf = (int *)m_info.dest; + bw_type tbw = m_info.type; + unsigned int expected_val = 0; + unsigned int ppe_exp_val = ((m_info.trials + m_info.warmup) * m_info.window_size + * ATOMICS_N_DTs * ATOMICS_N_OPs) + m_info.my_node; + + if(m_info.cstyle == COMM_INCAST) { + if(tbw == BI_DIR) + printf("WARNING: This use-case is not currently well defined\n"); + + if(m_info.my_node == 0) { + expected_val = ppe_exp_val * m_info.num_pes; + } else + expected_val = m_info.my_node; + } else { + assert(m_info.cstyle == COMM_PAIRWISE); + expected_val = ppe_exp_val; + } + + if((!snode && tbw == UNI_DIR) || tbw == BI_DIR) { + if(my_buf[0] != expected_val) + printf("validation error for PE %d: %d != %d \n", m_info.my_node, my_buf[0], + expected_val); + } +} + +/**************************************************************/ +/* Bi-Directional BW */ +/**************************************************************/ + +/*have two symmetric char array metric_info->src/dest of max_len to + * use for calculation initalized with my_node number + * NOTE: post function validation assumptions, data isn't flushed pre/post */ +extern void bi_dir_bw(int len, perf_metrics_t *metric_info); + +void static inline bi_dir_bw_test_and_output(perf_metrics_t metric_info) { + int len = 0, partner_pe = partner_node(metric_info); + + if(metric_info.my_node == 0) { + if (metric_info.bwstyle == STYLE_ATOMIC) + print_atomic_results_header(metric_info); + else + print_results_header(metric_info); + } + + for (len = metric_info.start_len; len <= metric_info.max_len; + len *= metric_info.size_inc) { + + large_message_metric_chg(&metric_info, len); + + bi_dir_bw(len, &metric_info); + } + + shmem_barrier_all(); + + if(metric_info.validate) { + if(metric_info.bwstyle != STYLE_ATOMIC) { + validate_recv(metric_info.dest, metric_info.max_len, partner_pe); + } else { + validate_atomics(metric_info); + } + } +} + +/**************************************************************/ +/* UNI-Directional BW */ +/**************************************************************/ + +/*have one symmetric char array metric_info->buf of max_len to use for + * calculation initalized with my_node number + * NOTE: post function validation assumptions, data isn't flushed pre/post */ +extern void uni_dir_bw(int len, perf_metrics_t *metric_info); + +void static inline uni_dir_bw_test_and_output(perf_metrics_t metric_info) { + int len = 0, partner_pe = partner_node(metric_info); + + if(metric_info.my_node == 0) { + if (metric_info.bwstyle == STYLE_ATOMIC) + print_atomic_results_header(metric_info); + else + print_results_header(metric_info); + } + + for (len = metric_info.start_len; len <= metric_info.max_len; + len *= metric_info.size_inc) { + + large_message_metric_chg(&metric_info, len); + + uni_dir_bw(len, &metric_info); + } + + shmem_barrier_all(); + + if(metric_info.validate) { + if((streaming_node(metric_info) && metric_info.bwstyle == STYLE_GET) || + (target_node(metric_info) && metric_info.bwstyle == STYLE_PUT)) { + validate_recv(metric_info.dest, metric_info.max_len, partner_pe); + } else if(metric_info.bwstyle == STYLE_ATOMIC) { + validate_atomics(metric_info); + } + } +} + +/**************************************************************/ +/* INIT and teardown of resources */ +/**************************************************************/ + +/*create and init (with my_PE_num) two symmetric arrays on the heap */ +static inline int bw_init_data_stream(perf_metrics_t *metric_info, + int argc, char *argv[]) { + + int i = 0; + data_set_defaults(metric_info); + int ret = command_line_arg_check(argc, argv, metric_info); + if (ret != 0) { + return -1; + } + +#ifndef VERSION_1_0 + int tl; + shmem_init_thread(metric_info->thread_safety, &tl); + if(tl != metric_info->thread_safety) { + fprintf(stderr,"Could not initialize with requested thread " + "level %d: got %d\n", metric_info->thread_safety, tl); + return -2; + } +#else + start_pes(0); +#endif + + if (data_runtime_update(metric_info) == -1) + return -2; + thread_safety_validation_check(metric_info); + metric_info->sztarget = metric_info->midpt; + metric_info->szinitiator = metric_info->midpt; + + for(i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) + red_psync[i] = SHMEM_SYNC_VALUE; + + if (only_even_PEs_check(metric_info->my_node, metric_info->num_pes) != 0) { + return -2; + } + + metric_info->src = aligned_buffer_alloc(metric_info->max_len); + init_array(metric_info->src, metric_info->max_len, metric_info->my_node); + + metric_info->dest = aligned_buffer_alloc(metric_info->max_len); + init_array(metric_info->dest, metric_info->max_len, metric_info->my_node); + + return 0; +} + + +static inline int bi_dir_init(perf_metrics_t *metric_info, int argc, + char *argv[]) { + int ret = bw_init_data_stream(metric_info, argc, argv); + if (ret == 0) { + bi_dir_data_init(metric_info); + return 0; + } else + return ret; +} + +static inline int uni_dir_init(perf_metrics_t *metric_info, int argc, + char *argv[], bw_style bwstyl) { + int ret = bw_init_data_stream(metric_info, argc, argv); + if (ret == 0) { + /* uni-dir validate needs to know if its a put or get */ + metric_info->bwstyle = bwstyl; + uni_dir_data_init(metric_info); + return 0; + } else + return ret; +} + +void static inline bw_data_free(perf_metrics_t *metric_info) { + shmem_barrier_all(); + + aligned_buffer_free(metric_info->src); + aligned_buffer_free(metric_info->dest); +} + +static void inline bw_finalize(void) { +#ifndef VERSION_1_0 + shmem_finalize(); +#endif +} + +void static inline bi_dir_bw_main(int argc, char *argv[]) { + + perf_metrics_t metric_info; + + int ret = bi_dir_init(&metric_info, argc, argv); + + if (ret == 0) { + bi_dir_bw_test_and_output(metric_info); + bw_data_free(&metric_info); + } + + if (ret != -1) + bw_finalize(); +} /*main() */ + +void static inline uni_dir_bw_main(int argc, char *argv[], bw_style bwstyl) { + + perf_metrics_t metric_info; + + int ret = uni_dir_init(&metric_info, argc, argv, bwstyl); + + if (ret == 0) { + uni_dir_bw_test_and_output(metric_info); + bw_data_free(&metric_info); + } + + if (ret != -1) + bw_finalize(); +} /*main() */ diff --git a/test/performance/shmem_perf_suite/common.h b/test/performance/shmem_perf_suite/common.h new file mode 100644 index 0000000..4c444ba --- /dev/null +++ b/test/performance/shmem_perf_suite/common.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE 1 + +/* return microseconds */ +double perf_shmemx_wtime(void); + +double perf_shmemx_wtime(void) +{ + double wtime = 0.0; + +#ifdef CLOCK_MONOTONIC + struct timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + wtime = tv.tv_sec * 1e6; + wtime += (double)tv.tv_nsec / 1000.0; +#else + struct timeval tv; + gettimeofday(&tv, NULL); + wtime = tv.tv_sec * 1e6; + wtime += (double)tv.tv_usec; +#endif + return wtime; +} + +#ifdef CRAY_SHMEM +#define shmem_putmem_nbi(dest, source, nelems, pe) shmem_putmem_nb(dest, source, nelems, pe, NULL) +#define shmem_getmem_nbi(dest, source, nelems, pe) shmem_getmem_nb(dest, source, nelems, pe, NULL) +#endif + +static char * aligned_buffer_alloc(int len) +{ + unsigned long alignment = 0; + char *ptr1 = NULL, *ptr_aligned = NULL; + size_t ptr_size = sizeof(uintptr_t); + uintptr_t save_ptr1 = 0; + + alignment = getpagesize(); + +#ifndef VERSION_1_0 + ptr1 = shmem_malloc(ptr_size + alignment + len); +#else + ptr1 = shmalloc(ptr_size + alignment + len); +#endif + assert(ptr1 != NULL); + + save_ptr1 = (uintptr_t)ptr1; + + /* reserve at least ptr_size before alignment chunk */ + ptr1 = (char *) (ptr1 + ptr_size); + + /* only offset ptr by alignment to ensure len is preserved */ + /* clear bottom bits to ensure alignment */ + ptr_aligned = (char *) ( ((uintptr_t) ((char *) (ptr1 + alignment))) + & ~(alignment-1)); + + /* embed org ptr address in reserved ptr_size space */ + memcpy((ptr_aligned - ptr_size), &save_ptr1, ptr_size); + + return ptr_aligned; +} + +static void aligned_buffer_free(char * ptr_aligned) +{ + + char * ptr_org; + uintptr_t temp_p; + size_t ptr_size = sizeof(uintptr_t); + + /* grab ptr */ + memcpy(&temp_p, (ptr_aligned - ptr_size), ptr_size); + ptr_org = (char *) temp_p; + +#ifndef VERSION_1_0 + shmem_free(ptr_org); +#else + shfree(ptr_org); +#endif +} + +int static inline is_divisible_by_4(int num) +{ + assert(num >= 0); + assert(sizeof(int) == 4); + return (!(num & 0x00000003)); +} + +/*to be a power of 2 must only have 1 set bit*/ +int static inline is_pow_of_2(unsigned int num) +{ + /*move first set bit all the way to right*/ + while(num && !((num >>=1 ) & 1)); + + /*it will be 1 if its the only set bit*/ + return ((num == 1 || num == 0)? true : false); +} + +void static init_array(char * const buf, int len, int my_pe_num) +{ + int i = 0; + int array_size = len / sizeof(int); + int * ibuf = (int *)buf; + + assert(is_divisible_by_4(len)); + + for(i = 0; i < array_size; i++) + ibuf[i] = my_pe_num; + +} + +void static inline validate_recv(char * buf, int len, int partner_pe) +{ + int i = 0; + int array_size = len / sizeof(int); + int * ibuf = (int *)buf; + + assert(is_divisible_by_4(len)); + + for(i = 0; i < array_size; i++) { + if(ibuf[i] != partner_pe) + printf("validation error at index %d: %d != %d \n", i, ibuf[i], + partner_pe); + } +} diff --git a/test/performance/shmem_perf_suite/int_element_latency.h b/test/performance/shmem_perf_suite/int_element_latency.h new file mode 100644 index 0000000..df9e8d6 --- /dev/null +++ b/test/performance/shmem_perf_suite/int_element_latency.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +void static inline +int_p_latency(perf_metrics_t data) +{ + double start = 0.0; + double end = 0.0; + int i = 0; + + if (data.my_node == PUT_IO_NODE) { + printf("\nStream shmem_int_p results:\n"); + print_results_header(); + } + + /*puts to zero to match gets validation scheme*/ + if (data.my_node == PUT_IO_NODE) { + + for (i = 0; i < data.trials + data.warmup; i++) { + if(i == data.warmup) + start = perf_shmemx_wtime(); + + shmem_int_p((int*) data.dest, data.my_node, 0); + shmem_quiet(); + + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, sizeof(int), data); + } + + shmem_barrier_all(); + + if((data.my_node == 0) && data.validate) + validate_recv(data.dest, sizeof(int), partner_node(data.my_node)); + +} /* latency/bw for one-way trip */ + +void static inline +int_g_latency(perf_metrics_t data) +{ + double start = 0.0; + double end = 0.0; + int i = 0; + int rtnd = -1; + + if (data.my_node == GET_IO_NODE) { + printf("\nStream shmem_int_g results:\n"); + print_results_header(); + } + + if (data.my_node == GET_IO_NODE) { + + for (i = 0; i < data.trials + data.warmup; i++) { + if(i == data.warmup) + start = perf_shmemx_wtime(); + + rtnd = shmem_int_g((int*) data.src, 1); + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, sizeof(int), data); + } + + shmem_barrier_all(); + + if((data.my_node == 0) && data.validate) + validate_recv((char*) &rtnd, sizeof(int), partner_node(data.my_node)); +} diff --git a/test/performance/shmem_perf_suite/latency_common.h b/test/performance/shmem_perf_suite/latency_common.h new file mode 100644 index 0000000..6f2ae80 --- /dev/null +++ b/test/performance/shmem_perf_suite/latency_common.h @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#define PUT_IO_NODE 1 +#define GET_IO_NODE !PUT_IO_NODE +#define INIT_VALUE 1 + +#define MAX_MSG_SIZE (1<<23) +#define START_LEN 1 + +#define INC 2 +#define TRIALS 100 +#define WARMUP 10 + +typedef struct perf_metrics { + unsigned int start_len, max_len; + unsigned int inc, trials; + unsigned int warmup; + int validate; + int my_node, npes; + long * target; + char * src, *dest; +} perf_metrics_t; + +void static data_init(perf_metrics_t * data) { + data->start_len = START_LEN; + data->max_len = MAX_MSG_SIZE; + data->inc = INC; + data->trials = TRIALS; + data->warmup = WARMUP; /*number of initial iterations to skip*/ + data->validate = false; + data->my_node = shmem_my_pe(); + data->npes = shmem_n_pes(); + data->target = NULL; + data->src = NULL; + data->dest = NULL; +} + +void static inline print_results_header(void) { + printf("\nLength Latency \n"); + printf("in bytes in micro seconds \n"); +} + +/*not storing results, only outputing it*/ +void static inline calc_and_print_results(double start, double end, int len, + perf_metrics_t data) { + double latency = 0.0; + latency = (end - start) / data.trials; + + printf("%9d %8.2f \n", len, latency); +} + +int static inline partner_node(int my_node) +{ + return ((my_node % 2 == 0) ? (my_node + 1) : (my_node - 1)); +} + +void static inline command_line_arg_check(int argc, char *argv[], + perf_metrics_t *metric_info) { + int ch, error = false; + extern char *optarg; + + /* check command line args */ + while ((ch = getopt(argc, argv, "e:s:n:v")) != EOF) { + switch (ch) { + case 's': + metric_info->start_len = strtol(optarg, (char **)NULL, 0); + if ( metric_info->start_len < 1 ) metric_info->start_len = 1; + if(!is_pow_of_2(metric_info->start_len)) error = true; + break; + case 'e': + metric_info->max_len = strtol(optarg, (char **)NULL, 0); + if(!is_pow_of_2(metric_info->max_len)) error = true; + if(metric_info->max_len < metric_info->start_len) error = true; + break; + case 'n': + metric_info->trials = strtol(optarg, (char **)NULL, 0); + if(metric_info->trials <= (metric_info->warmup*2)) error = true; + break; + case 'v': + metric_info->validate = true; + break; + default: + error = true; + break; + } + } + + if (error) { + if (metric_info->my_node == 0) { + fprintf(stderr, "Usage: [-s start_length] [-e end_length] "\ + ": lengths must be a power of two \n " \ + "[-n trials (must be greater than 20)] "\ + "[-v (validate results)]\n"); + } +#ifndef VERSION_1_0 + shmem_finalize(); +#endif + exit (-1); + } +} + +void static inline only_two_PEs_check(int my_node, int num_pes) { + if (num_pes != 2) { + if (my_node == 0) { + fprintf(stderr, "2-nodes only test\n"); + } +#ifndef VERSION_1_0 + shmem_finalize(); +#endif + exit(77); + } +} + +/**************************************************************/ +/* Latency data gathering */ +/**************************************************************/ + +/*have single symmetric long element "target" from perf_metrics_t + * that needs to be initialized in function*/ +extern void long_element_round_trip_latency(perf_metrics_t data); + +extern void int_element_latency(perf_metrics_t data); + +/*have symmetric buffers src/dest from perf_metrics_t + * that has been initialized to my_node number */ +extern void streaming_latency(int len, perf_metrics_t *data); + +void static inline multi_size_latency(perf_metrics_t data, char *argv[]) { + int len; + int partner_pe = partner_node(data.my_node); + + for (len = data.start_len; len <= data.max_len; len *= data.inc) { + + shmem_barrier_all(); + + streaming_latency(len, &data); + + shmem_barrier_all(); + } + + shmem_barrier_all(); + + if((data.my_node == 0) && data.validate) + validate_recv(data.dest, data.max_len, partner_pe); +} + + + +/**************************************************************/ +/* INIT and teardown of resources */ +/**************************************************************/ + +void static inline latency_init_resources(int argc, char *argv[], + perf_metrics_t *data) { +#ifndef VERSION_1_0 + shmem_init(); +#else + start_pes(0); +#endif + + data_init(data); + + only_two_PEs_check(data->my_node, data->npes); + + command_line_arg_check(argc, argv, data); + + data->src = aligned_buffer_alloc(data->max_len); + init_array(data->src, data->max_len, data->my_node); + + data->dest = aligned_buffer_alloc(data->max_len); + init_array(data->dest, data->max_len, data->my_node); + +#ifndef VERSION_1_0 + data->target = shmem_malloc(sizeof(long)); +#else + data->target = shmalloc(sizeof(long)); +#endif +} + +void static inline latency_free_resources(perf_metrics_t *data) { + shmem_barrier_all(); + +#ifndef VERSION_1_0 + shmem_free(data->target); +#else + shfree(data->target); +#endif + aligned_buffer_free(data->src); + aligned_buffer_free(data->dest); +#ifndef VERSION_1_0 + shmem_finalize(); +#endif +} + +void static inline latency_main(int argc, char *argv[]) { + perf_metrics_t data; + + latency_init_resources(argc, argv, &data); + + long_element_round_trip_latency(data); + + int_element_latency(data); + + multi_size_latency(data, argv); + + latency_free_resources(&data); +} diff --git a/test/performance/shmem_perf_suite/round_t_latency.h b/test/performance/shmem_perf_suite/round_t_latency.h new file mode 100644 index 0000000..7552223 --- /dev/null +++ b/test/performance/shmem_perf_suite/round_t_latency.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +void static inline +long_element_round_trip_latency_get(perf_metrics_t data) +{ + double start = 0.0; + double end = 0.0; + int dest = 1, i = 0; + int partner_pe = partner_node(data.my_node); + *data.target = data.my_node; + + if (data.my_node == GET_IO_NODE) { + printf("\nshmem_long_g results:\n"); + print_results_header(); + } + + shmem_barrier_all(); + + if (data.my_node == GET_IO_NODE) { + for (i = 0; i < data.trials + data.warmup; i++) { + if(i == data.warmup) + start = perf_shmemx_wtime(); + + *data.target = shmem_long_g(data.target, dest); + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, sizeof(long), data); + + if(data.validate) { + if(*data.target != partner_pe) + printf("validation error shmem_long_g target = %ld != %d\n", + *data.target, partner_pe); + } + } +} /*gauge small get pathway round trip latency*/ + +void static inline +long_element_round_trip_latency_put(perf_metrics_t data) +{ + double start = 0.0; + double end = 0.0; + long tmp; + int dest = (data.my_node + 1) % data.npes, i = 0; + tmp = *data.target = INIT_VALUE; + + if (data.my_node == PUT_IO_NODE) { + printf("\nPing-Pong shmem_long_p results:\n"); + print_results_header(); + } + + shmem_barrier_all(); + + if (data.my_node == PUT_IO_NODE) { + for (i = 0; i < data.trials + data.warmup; i++) { + if(i == data.warmup) + start = perf_shmemx_wtime(); + + shmem_long_p(data.target, ++tmp, dest); + + shmem_long_wait_until(data.target, SHMEM_CMP_EQ, tmp); + } + end = perf_shmemx_wtime(); + + data.trials = data.trials*2; /*output half to get single round trip time*/ + calc_and_print_results(start, end, sizeof(long), data); + + } else { + for (i = 0; i < data.trials + data.warmup; i++) { + shmem_long_wait_until(data.target, SHMEM_CMP_EQ, ++tmp); + + shmem_long_p(data.target, tmp, dest); + } + } + +} /*gauge small put pathway round trip latency*/ diff --git a/test/performance/shmem_perf_suite/shmem_bibw_atomics_perf.c b/test/performance/shmem_perf_suite/shmem_bibw_atomics_perf.c new file mode 100644 index 0000000..b0ad0bd --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bibw_atomics_perf.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for put: back-to-back message rate +** +** Features of Test: bi-directional bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ + +#include + +#define bi_bw(len, metric_info, snode, NAME, TYPE, op) \ + do { \ + double start = 0.0, end = 0.0; \ + int i = 0, j = 0, num_itr = metric_info->trials + metric_info->warmup; \ + int dest = partner_node(*metric_info); \ + shmem_barrier_all(); \ + \ + switch(op) { \ + case OP_ADD: \ + for(i = 0; i < num_itr; i++) { \ + if(snode && i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_add( \ + (TYPE *)(metric_info->dest), ONE, dest); \ + \ + shmem_quiet(); \ + } \ + if(snode) \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_INC: \ + for(i = 0; i < num_itr; i++) { \ + if(snode && i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_inc( \ + (TYPE *)(metric_info->dest), dest); \ + \ + shmem_quiet(); \ + } \ + if(snode) \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_FADD: \ + for(i = 0; i < num_itr; i++) { \ + if(snode && i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_fetch_add( \ + (TYPE *)(metric_info->dest), ONE, dest); \ + } \ + if(snode) \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_FINC: \ + for(i = 0; i < num_itr; i++) { \ + if(snode && i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_fetch_inc( \ + (TYPE *)(metric_info->dest), dest); \ + } \ + if(snode) \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_SWAP: \ + for(i = 0; i < num_itr; i++) { \ + if(snode && i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_swap( \ + (TYPE *)(metric_info->src), ONE, dest); \ + } \ + if(snode) \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_CSWAP: \ + for(i = 0; i < num_itr; i++) { \ + if(snode && i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_compare_swap( \ + (TYPE *)(metric_info->src), dest, ONE, dest); \ + } \ + if(snode) \ + end = perf_shmemx_wtime(); \ + break; \ + default: \ + fprintf(stderr, "Error %d not a valid op case \ + for atomics\n", op); \ + break; \ + } \ + if(snode) \ + calc_and_print_results((end - start), len, *metric_info); \ + } while(0) + + +#define NUM_INC 100 + +typedef enum { + OP_ADD, + OP_INC, + OP_FADD, + OP_FINC, + OP_SWAP, + OP_CSWAP, + SIZE_OF_OP +} atomic_op_type; + +static const char * op_names [] = { "add", "inc", "fadd", "finc", "swap", "cswap" }; + +static inline void bw_set_metric_info_len(perf_metrics_t *metric_info) +{ + unsigned int atomic_sizes[ATOMICS_N_DTs] = {sizeof(int), sizeof(long), + sizeof(long long)}; + int snode = streaming_node(*metric_info); + atomic_op_type op_type = OP_ADD; + metric_info->type = BI_DIR; + metric_info->bwstyle = STYLE_ATOMIC; + + for(op_type = OP_ADD; op_type < SIZE_OF_OP; op_type++) { + if(metric_info->my_node == 0 && op_type != OP_ADD) + printf("\nshmem_%s\n", op_names[op_type]); + + metric_info->start_len = atomic_sizes[0]; + metric_info->max_len = atomic_sizes[0]; + metric_info->size_inc = NUM_INC; + + shmem_barrier_all(); + + bi_bw(atomic_sizes[0], metric_info, snode, int, int, op_type); + + metric_info->start_len = atomic_sizes[1]; + metric_info->max_len = atomic_sizes[1]; + + shmem_barrier_all(); + + bi_bw(atomic_sizes[1], metric_info, snode, long, long, op_type); + + metric_info->start_len = atomic_sizes[2]; + metric_info->max_len = atomic_sizes[2]; + + shmem_barrier_all(); + + bi_bw(atomic_sizes[2], metric_info, snode, longlong, long long, op_type); + } +} + +void bi_dir_bw(int len, perf_metrics_t *metric_info) +{ + bw_set_metric_info_len(metric_info); +} + +int main(int argc, char *argv[]) +{ + bi_dir_bw_main(argc, argv); + + return 0; +} diff --git a/test/performance/shmem_perf_suite/shmem_bibw_get_perf.c b/test/performance/shmem_perf_suite/shmem_bibw_get_perf.c new file mode 100644 index 0000000..df82042 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bibw_get_perf.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for get: back-to-back message rate +** +** Features of Test: bi-direction bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ + +#include +#include + +int main(int argc, char *argv[]) +{ + bi_dir_bw_main(argc,argv); + + return 0; +} /* end of main() */ + +void +bi_dir_bw(int len, perf_metrics_t *metric_info) +{ + bi_bw_get(len, metric_info); +} diff --git a/test/performance/shmem_perf_suite/shmem_bibw_put_ctx_perf.c b/test/performance/shmem_perf_suite/shmem_bibw_put_ctx_perf.c new file mode 100644 index 0000000..4ddbed2 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bibw_put_ctx_perf.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for put: back-to-back message rate +** +** Features of Test: bi-direction bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ + +#define ENABLE_OPENMP + +#include +#include + +int main(int argc, char *argv[]) +{ + bi_dir_bw_main(argc, argv); + + return 0; +} + +void +bi_dir_bw(int len, perf_metrics_t *metric_info) +{ + bi_bw_ctx(len, metric_info); +} diff --git a/test/performance/shmem_perf_suite/shmem_bibw_put_perf.c b/test/performance/shmem_perf_suite/shmem_bibw_put_perf.c new file mode 100644 index 0000000..7f5589a --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bibw_put_perf.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for put: back-to-back message rate +** +** Features of Test: bi-direction bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ + +#include +#include + +int main(int argc, char *argv[]) +{ + bi_dir_bw_main(argc, argv); + + return 0; +} + +void +bi_dir_bw(int len, perf_metrics_t *metric_info) +{ + bi_bw_put(len, metric_info); +} diff --git a/test/performance/shmem_perf_suite/shmem_bw_atomics_perf.c b/test/performance/shmem_perf_suite/shmem_bw_atomics_perf.c new file mode 100644 index 0000000..ce3c618 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bw_atomics_perf.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for put: back-to-back message rate +** +** Features of Test: uni-directional bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ +#include + +#define ATOMIC_COMM_STYLE COMM_INCAST + +#define uni_bw(len, metric_info, snode, NAME, TYPE, op) \ + do { \ + double start = 0.0, end = 0.0; \ + int i = 0, j = 0, num_itr = metric_info->trials + metric_info->warmup; \ + int dest = partner_node(*metric_info); \ + shmem_barrier_all(); \ + \ + if(snode) { \ + switch(op) { \ + case OP_ADD: \ + for(i = 0; i < num_itr; i++) { \ + if(i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_add( \ + (TYPE *)(metric_info->dest), ONE, dest); \ + \ + shmem_quiet(); \ + \ + } \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_INC: \ + for(i = 0; i < num_itr; i++) { \ + if(i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_inc( \ + (TYPE *)(metric_info->dest), dest); \ + \ + shmem_quiet(); \ + \ + } \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_FADD: \ + for(i = 0; i < num_itr; i++) { \ + if(i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_fetch_add( \ + (TYPE *)(metric_info->dest), ONE, dest); \ + } \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_FINC: \ + for(i = 0; i < num_itr; i++) { \ + if(i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_fetch_inc( \ + (TYPE *)(metric_info->dest), dest); \ + } \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_SWAP: \ + for(i = 0; i < num_itr; i++) { \ + if(i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_swap( \ + (TYPE *)(metric_info->src), ONE, dest); \ + } \ + end = perf_shmemx_wtime(); \ + break; \ + case OP_CSWAP: \ + for(i = 0; i < num_itr; i++) { \ + if(i == metric_info->warmup) \ + start = perf_shmemx_wtime(); \ + \ + for(j = 0; j < metric_info->window_size; j++) \ + shmem_##NAME##_atomic_compare_swap( \ + (TYPE *)(metric_info->src), dest, ONE, dest); \ + } \ + end = perf_shmemx_wtime(); \ + break; \ + default: \ + fprintf(stderr, "Error %d not a valid op case \ + for atomics\n", op); \ + break; \ + } \ + calc_and_print_results((end - start), len, *metric_info); \ + } \ + } while(0) + +#define NUM_INC 100 + + +typedef enum { + OP_ADD, + OP_INC, + OP_FADD, + OP_FINC, + OP_SWAP, + OP_CSWAP, + SIZE_OF_OP +} atomic_op_type; + +static const char * op_names [] = { "add", "inc", "fadd", "finc", "swap", "cswap" }; + +static inline void bw_set_metric_info_len(perf_metrics_t *metric_info) +{ + unsigned int atomic_sizes[ATOMICS_N_DTs] = {sizeof(int), sizeof(long), + sizeof(long long)}; + metric_info->cstyle = ATOMIC_COMM_STYLE; + metric_info->type = UNI_DIR; + int snode = streaming_node(*metric_info); + atomic_op_type op_type = OP_ADD; + + for(op_type = OP_ADD; op_type < SIZE_OF_OP; op_type++) { + if(metric_info->my_node == 0 && op_type != OP_ADD) + printf("\nshmem_%s\n", op_names[op_type]); + + metric_info->start_len = atomic_sizes[0]; + metric_info->max_len = atomic_sizes[0]; + metric_info->size_inc = NUM_INC; + + shmem_barrier_all(); + + uni_bw(atomic_sizes[0], metric_info, snode, int, int, op_type); + + metric_info->start_len = atomic_sizes[1]; + metric_info->max_len = atomic_sizes[1]; + + shmem_barrier_all(); + + uni_bw(atomic_sizes[1], metric_info, snode, long, long, op_type); + + metric_info->start_len = atomic_sizes[2]; + metric_info->max_len = atomic_sizes[2]; + + shmem_barrier_all(); + + uni_bw(atomic_sizes[2], metric_info, snode, longlong, long long, op_type); + } +} + +void uni_dir_bw(int len, perf_metrics_t *metric_info) +{ + bw_set_metric_info_len(metric_info); +} + +int main(int argc, char *argv[]) +{ + uni_dir_bw_main(argc, argv, STYLE_ATOMIC); + + return 0; +} diff --git a/test/performance/shmem_perf_suite/shmem_bw_get_perf.c b/test/performance/shmem_perf_suite/shmem_bw_get_perf.c new file mode 100644 index 0000000..9c4d832 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bw_get_perf.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for get: back-to-back message rate +** +** Features of Test: uni-directional bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ + +#include +#include + +int main(int argc, char *argv[]) +{ + uni_dir_bw_main(argc,argv, STYLE_GET); + + return 0; +} /* end of main() */ + +void +uni_dir_bw(int len, perf_metrics_t *metric_info) +{ + uni_bw_get(len, metric_info); +} + diff --git a/test/performance/shmem_perf_suite/shmem_bw_put_ctx_perf.c b/test/performance/shmem_perf_suite/shmem_bw_put_ctx_perf.c new file mode 100644 index 0000000..c4faaee --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bw_put_ctx_perf.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +** +** This is a bandwidth centric test for put: back-to-back message rate +** +** Features of Test: uni-directional bandwidth using contexts driven by +** multiple threads. +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ + +#define ENABLE_OPENMP + +#include +#include + +int main(int argc, char *argv[]) +{ + uni_dir_bw_main(argc, argv, STYLE_PUT); + + return 0; +} + +void +uni_dir_bw(int len, perf_metrics_t *metric_info) +{ + uni_bw_ctx(len, metric_info, !streaming_node(*metric_info)); +} diff --git a/test/performance/shmem_perf_suite/shmem_bw_put_perf.c b/test/performance/shmem_perf_suite/shmem_bw_put_perf.c new file mode 100644 index 0000000..3cbab8f --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_bw_put_perf.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** This is a bandwidth centric test for put: back-to-back message rate +** +** Features of Test: uni-directional bandwidth +** +** -by default megabytes/second results +** +**NOTE: this test assumes correctness of reduction algorithm +*/ +#include +#include + +int main(int argc, char *argv[]) +{ + uni_dir_bw_main(argc, argv, STYLE_PUT); + + return 0; +} + +void +uni_dir_bw(int len, perf_metrics_t *metric_info) +{ + uni_bw_put(len, metric_info); +} + diff --git a/test/performance/shmem_perf_suite/shmem_latency_get_perf.c b/test/performance/shmem_perf_suite/shmem_latency_get_perf.c new file mode 100644 index 0000000..1d1a595 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_latency_get_perf.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** Notice: micro benchmark ~ two nodes only +** +** Features of Test: +** 1) small get latency test +** 2) getmem latency test to calculate latency of various sizes +** +*/ + +#include +#include +#include + +int main(int argc, char *argv[]) +{ + + latency_main(argc, argv); + + return 0; +} /* end of main() */ + + +void +long_element_round_trip_latency(perf_metrics_t data) +{ + long_element_round_trip_latency_get(data); +} + +void +int_element_latency(perf_metrics_t data) +{ + int_g_latency(data); +} + +void +streaming_latency(int len, perf_metrics_t *data) +{ + double start = 0.0; + double end = 0.0; + int i = 0; + static int print_once = 0; + if(!print_once && data->my_node == GET_IO_NODE) { + printf("\nStreaming results for %d trials each of length %d through %d in"\ + " powers of %d\n", data->trials, data->start_len, + data->max_len, data->inc); + print_results_header(); + print_once++; + } + + if (data->my_node == 0) { + + for (i = 0; i < data->trials + data->warmup; i++) { + if(i == data->warmup) + start = perf_shmemx_wtime(); + + shmem_getmem(data->dest, data->src, len, 1); + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, len, *data); + } +} /* latency/bw for one-way trip */ diff --git a/test/performance/shmem_perf_suite/shmem_latency_nb_get_perf.c b/test/performance/shmem_perf_suite/shmem_latency_nb_get_perf.c new file mode 100644 index 0000000..e88cbf2 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_latency_nb_get_perf.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** Notice: micro benchmark ~ two nodes only +** +** Features of Test: +** 1) small get latency test +** 2) getmem latency test to calculate latency of various sizes +** +*/ + +#include + +int main(int argc, char *argv[]) +{ + latency_main(argc, argv); + + return 0; +} /* end of main() */ + +/* NO-OP for non-blocking */ +void +long_element_round_trip_latency(perf_metrics_t data) {} + +void +int_element_latency(perf_metrics_t data) {} + +void +streaming_latency(int len, perf_metrics_t *data) +{ + double start = 0.0; + double end = 0.0; + int i = 0; + + if (data->my_node == 0) { + + for (i = 0; i < data->trials + data->warmup; i++) { + if(i == data->warmup) + start = perf_shmemx_wtime(); + + shmem_getmem_nbi(data->dest, data->src, len, 1); + shmem_quiet(); + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, len, *data); + } +} /* latency/bw for one-way trip */ diff --git a/test/performance/shmem_perf_suite/shmem_latency_nb_put_perf.c b/test/performance/shmem_perf_suite/shmem_latency_nb_put_perf.c new file mode 100644 index 0000000..080534b --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_latency_nb_put_perf.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** Notice: micro benchmark ~ two nodes only +** +** Features of Test: +** 1) small put pingpong latency test +** 2) one sided latency test to calculate latency of various sizes +** to the network stack +** +*/ + +#include + +int main(int argc, char *argv[]) +{ + latency_main(argc, argv); + + return 0; +} /* end of main() */ + + +/* NO-OP for non-blocking */ +void +long_element_round_trip_latency(perf_metrics_t data) {} + +void +int_element_latency(perf_metrics_t data) {} + +void +streaming_latency(int len, perf_metrics_t *data) +{ + double start = 0.0; + double end = 0.0; + int i = 0; + + /*puts to zero to match gets validation scheme*/ + if (data->my_node == 1) { + + for (i = 0; i < data->trials + data->warmup; i++) { + if(i == data->warmup) + start = perf_shmemx_wtime(); + + shmem_putmem_nbi(data->dest, data->src, len, 0); + shmem_quiet(); + + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, len, *data); + } +} /* latency/bw for one-way trip */ diff --git a/test/performance/shmem_perf_suite/shmem_latency_put_perf.c b/test/performance/shmem_perf_suite/shmem_latency_put_perf.c new file mode 100644 index 0000000..2bf9f23 --- /dev/null +++ b/test/performance/shmem_perf_suite/shmem_latency_put_perf.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** +** Notice: micro benchmark ~ two nodes only +** +** Features of Test: +** 1) small put pingpong latency test +** 2) one sided latency test to calculate latency of various sizes +** to the network stack +** +*/ + +#include +#include +#include + +int main(int argc, char *argv[]) +{ + latency_main(argc, argv); + + return 0; +} /* end of main() */ + + +void +long_element_round_trip_latency(perf_metrics_t data) +{ + long_element_round_trip_latency_put(data); +} + +void +int_element_latency(perf_metrics_t data) +{ + int_p_latency(data); +} + +void +streaming_latency(int len, perf_metrics_t *data) +{ + double start = 0.0; + double end = 0.0; + int i = 0; + static int print_once = 0; + if(!print_once && data->my_node == PUT_IO_NODE) { + printf("\nStreaming results for %d trials each of length %d through %d in"\ + " powers of %d\n", data->trials, data->start_len, + data->max_len, data->inc); + print_results_header(); + print_once++; + } + + /*puts to zero to match gets validation scheme*/ + if (data->my_node == 1) { + + for (i = 0; i < data->trials + data->warmup; i++) { + if(i == data->warmup) + start = perf_shmemx_wtime(); + + shmem_putmem(data->dest, data->src, len, 0); + shmem_quiet(); + + } + end = perf_shmemx_wtime(); + + calc_and_print_results(start, end, len, *data); + } +} /* latency/bw for one-way trip */ diff --git a/test/performance/shmem_perf_suite/target_put.h b/test/performance/shmem_perf_suite/target_put.h new file mode 100644 index 0000000..984b687 --- /dev/null +++ b/test/performance/shmem_perf_suite/target_put.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +int static inline get_size_of_side(perf_metrics_t my_info) { + if(my_info.my_node < my_info.midpt) + return my_info.szinitiator; + else + return my_info.sztarget; +} + +int static inline get_num_partners(perf_metrics_t my_info) { + int unused_PEs = 0, num_partners = 0, num_xtra_partners = 0; + int active_PEs = get_size_of_side(my_info); + + if(active_PEs == my_info.midpt) + return 1; + + unused_PEs = my_info.midpt - active_PEs; + num_partners = my_info.midpt / active_PEs; + num_xtra_partners = unused_PEs % active_PEs; + + if((my_info.my_node % my_info.midpt) < num_xtra_partners) + num_partners++; + + return num_partners; +} + +/* target only needs to know num of partners */ +int static inline *get_initiators_partners(perf_metrics_t my_info, int num_partners) { + int node_to_shadow = my_info.my_node; + int i = 0; + int *partner_nodes = NULL; + + assert(my_info.cstyle == COMM_PAIRWISE && !target_node(my_info)); + if(num_partners < 1) + return partner_nodes; + + partner_nodes = (int *) malloc(sizeof(int) * num_partners); + assert(partner_nodes); + + for(i = 0; i < num_partners; i++) { + partner_nodes[i] = ((node_to_shadow % my_info.sztarget) + my_info.midpt); + node_to_shadow += my_info.szinitiator; + } + + return partner_nodes; +} + +void static inline target_data_uni_bw(int len, perf_metrics_t metric_info) +{ + double start = 0.0, end = 0.0; + int i = 0, j = 0; + int snode = (metric_info.num_pes != 1)? streaming_node(metric_info) : true; + int num_partners = get_num_partners(metric_info); + static int completion_signal = 0; + int *my_PE_partners = (snode ? + get_initiators_partners(metric_info, num_partners): NULL); + + shmem_barrier_all(); + if (target_node(metric_info)) { + shmem_int_wait_until(&completion_signal, SHMEM_CMP_EQ, num_partners); + } else if (snode) { + for (i = 0; i < num_partners; i++) { + for(j = 0; j < metric_info.warmup; j++) { +#ifdef USE_NONBLOCKING_API + shmem_putmem_nbi(metric_info.dest, metric_info.src, len, my_PE_partners[i]); +#else + shmem_putmem(metric_info.dest, metric_info.src, len, my_PE_partners[i]); +#endif + } + shmem_quiet(); + shmem_int_atomic_inc(&completion_signal, my_PE_partners[i]); + } + } + + completion_signal = 0; + shmem_barrier_all(); + start = perf_shmemx_wtime(); + + if (target_node(metric_info)) { + shmem_int_wait_until(&completion_signal, SHMEM_CMP_EQ, num_partners); + } else if (snode) { + for (i = 0; i < num_partners; i++) { + for(j = 0; j < metric_info.trials; j++) { +#ifdef USE_NONBLOCKING_API + shmem_putmem_nbi(metric_info.dest, metric_info.src, len, my_PE_partners[i]); +#else + shmem_putmem(metric_info.dest, metric_info.src, len, my_PE_partners[i]); +#endif + } + shmem_quiet(); + shmem_int_atomic_inc(&completion_signal, my_PE_partners[i]); + } + } + + shmem_barrier_all(); + if (snode || target_node(metric_info)) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, metric_info); + } + free(my_PE_partners); +} + +void static inline target_bw_itr(int len, perf_metrics_t *metric_info) +{ + target_data_uni_bw(len, *metric_info); + + metric_info->start_len = TARGET_SZ_MAX; + len = TARGET_SZ_MAX; + + target_data_uni_bw(len, *metric_info); + + /* stopping upper layer from iterating, we are done */ + metric_info->max_len = TARGET_SZ_MIN; +} diff --git a/test/performance/shmem_perf_suite/uni_dir.h b/test/performance/shmem_perf_suite/uni_dir.h new file mode 100644 index 0000000..627b2b7 --- /dev/null +++ b/test/performance/shmem_perf_suite/uni_dir.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include + +void static inline uni_bw_put(int len, perf_metrics_t *metric_info) +{ + double start = 0.0, end = 0.0; + int i = 0, j = 0; + int dest = partner_node(*metric_info); + int snode = (metric_info->num_pes != 1)? streaming_node(*metric_info) : true; + + if(metric_info->target_data) { + target_bw_itr(len, metric_info); + return; + } + + shmem_barrier_all(); + + if (snode) { + for (i = 0; i < metric_info->warmup; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_putmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_putmem(metric_info->dest, metric_info->src, len, dest); +#endif + } + shmem_quiet(); + } + } + + shmem_barrier_all(); + if (snode) { + start = perf_shmemx_wtime(); + for (i = 0; i < metric_info->trials; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_putmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_putmem(metric_info->dest, metric_info->src, len, dest); +#endif + } + shmem_quiet(); + } + } + + shmem_barrier_all(); + if (snode) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, *metric_info); + } +} + +void static inline uni_bw_get(int len, perf_metrics_t *metric_info) +{ + double start = 0.0, end = 0.0; + int i = 0, j = 0; + int dest = partner_node(*metric_info); + int snode = (metric_info->num_pes != 1)? streaming_node(*metric_info) : true; + + if(metric_info->target_data) { + target_bw_itr(len, metric_info); + return; + } + + shmem_barrier_all(); + + if (snode) { + for (i = 0; i < metric_info->warmup; i++) { + for(j = 0; j < metric_info->window_size; j++) { + /* Choosing to skip quiet for both blocking and non-blocking getmem + * as this sequence of operation (writing to the same location) is + * currently undefined by the OpenSHMEM Spec. */ +#ifdef USE_NONBLOCKING_API + shmem_getmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_getmem(metric_info->dest, metric_info->src, len, dest); +#endif + } +#ifdef USE_NONBLOCKING_API + shmem_quiet(); +#endif + } + } + + shmem_barrier_all(); + if (snode) { + start = perf_shmemx_wtime(); + for (i = 0; i < metric_info->trials; i++) { + for(j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_getmem_nbi(metric_info->dest, metric_info->src, len, dest); +#else + shmem_getmem(metric_info->dest, metric_info->src, len, dest); +#endif + } +#ifdef USE_NONBLOCKING_API + shmem_quiet(); +#endif + } + } + + shmem_barrier_all(); + if (snode) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, *metric_info); + } +} + diff --git a/test/performance/shmem_perf_suite/uni_dir_ctx.h b/test/performance/shmem_perf_suite/uni_dir_ctx.h new file mode 100644 index 0000000..199815c --- /dev/null +++ b/test/performance/shmem_perf_suite/uni_dir_ctx.h @@ -0,0 +1,105 @@ +/* +* Copyright (c) 2018 Intel Corporation. All rights reserved. +* This software is available to you under the BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + + +void static inline uni_bw_ctx(int len, perf_metrics_t *metric_info, + int streaming_node) +{ + double start = 0.0, end = 0.0; + int j = 0; + int dest = partner_node(*metric_info); + char *src = aligned_buffer_alloc(metric_info->nthreads * len); + char *dst = aligned_buffer_alloc(metric_info->nthreads * len); + assert(src && dst); + + shmem_barrier_all(); + + if (streaming_node) { +#pragma omp parallel default(none) firstprivate(len, dest) private(j) \ + shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads) + { + int i; + const int thread_id = omp_get_thread_num(); + shmem_ctx_t ctx; + shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + + for (i = 0; i < metric_info->warmup; i++) { + for (j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#else + shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#endif + } + shmem_ctx_quiet(ctx); + } + shmem_ctx_destroy(ctx); + } + } + + shmem_barrier_all(); + if (streaming_node) { +#pragma omp parallel default(none) firstprivate(len, dest) private(j) \ + shared(metric_info, src, dst, start, end) num_threads(metric_info->nthreads) + { + int i; + const int thread_id = omp_get_thread_num(); + shmem_ctx_t ctx; + shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + +#pragma omp barrier +#pragma omp master + { + start = perf_shmemx_wtime(); + } + + for (i = 0; i < metric_info->trials; i++) { + for (j = 0; j < metric_info->window_size; j++) { +#ifdef USE_NONBLOCKING_API + shmem_ctx_putmem_nbi(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#else + shmem_ctx_putmem(ctx, dst + thread_id * len, src + thread_id * len, len, dest); +#endif + } + shmem_ctx_quiet(ctx); + } + shmem_ctx_destroy(ctx); + } + } + + shmem_barrier_all(); + if (streaming_node) { + end = perf_shmemx_wtime(); + calc_and_print_results((end - start), len, *metric_info); + } + + shmem_barrier_all(); + + aligned_buffer_free(src); + aligned_buffer_free(dst); + +} diff --git a/test/performance/tests/Makefile.am b/test/performance/tests/Makefile.am new file mode 100644 index 0000000..7d9dc6c --- /dev/null +++ b/test/performance/tests/Makefile.am @@ -0,0 +1,38 @@ +# -*- Makefile -*- +# +# Copyright 2011 Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +# retains certain rights in this software. +# +# Copyright (c) 2017 Intel Corporation. All rights reserved. +# This software is available to you under the BSD license. +# +# This file is part of the Sandia OpenSHMEM software package. For license +# information, see the LICENSE file in the top level directory of the +# distribution. + +check_PROGRAMS = \ + shmemlatency \ + msgrate + +if ENABLE_LENGTHY_TESTS +TESTS = $(check_PROGRAMS) +endif + +NPROCS ?= 2 +LOG_COMPILER = $(TEST_RUNNER) + +AM_LDFLAGS = $(LIBTOOL_WRAPPER_LDFLAGS) + +if EXTERNAL_TESTS +bin_PROGRAMS = $(check_PROGRAMS) +AM_CPPFLAGS = +LDADD = +else +AM_CPPFLAGS = -I$(top_builddir)/mpp +LDADD = $(top_builddir)/src/libsma.la +endif + +if USE_PMI_SIMPLE +LDADD += $(top_builddir)/pmi-simple/libpmi_simple.la +endif diff --git a/test/performance/tests/msgrate.c b/test/performance/tests/msgrate.c new file mode 100644 index 0000000..5400237 --- /dev/null +++ b/test/performance/tests/msgrate.c @@ -0,0 +1,395 @@ +/* -*- C -*- + * + * Copyright 2006 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +/* + * Adapted to Cray SHMEM by Brian Barrett + */ + +#include +#include +#include +#include +#include +#include +#include + +/* configuration parameters - setable by command line arguments */ +int npeers = 6; +int niters = 4096; +int nmsgs = 128; +int nbytes = 8; +int cache_size = (8 * 1024 * 1024 / sizeof(int)); +int ppn = -1; +int machine_output = 0; + +/* globals */ +int *send_peers; +int *recv_peers; +int *cache_buf; +char *send_buf; +char *recv_buf; +long bcast_pSync[SHMEM_BCAST_SYNC_SIZE]; +long barrier_pSync[SHMEM_BARRIER_SYNC_SIZE]; +long reduce_pSync[SHMEM_REDUCE_SYNC_SIZE]; +double reduce_pWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; +int start_err = 0; +double tmp = 0; +double total = 0; + +int rank = -1; +int world_size = -1; + +static void +abort_app(const char *msg) +{ + perror(msg); + abort(); +} + + +static void +cache_invalidate(void) +{ + int i; + + cache_buf[0] = 1; + for (i = 1 ; i < cache_size ; ++i) { + cache_buf[i] = cache_buf[i - 1]; + } +} + + +static inline double +timer(void) +{ +#ifdef HAVE_SHMEMX_WTIME + return shmemx_wtime(); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0; +#endif /* HAVE_SHMEMX_WTIME */ +} + + +static void +display_result(const char *test, const double result) +{ + if (0 == rank) { + if (machine_output) { + printf("%.2f ", result); + } else { + printf("%16s: %.2f\n", test, result); + } + } +} + + +static void +test_one_way(void) +{ + int i, k; + int pe_size = world_size; + + tmp = 0; + total = 0; + + shmem_barrier_all(); + + if (world_size % 2 == 1) { + pe_size = world_size - 1; + } + + if (!(world_size % 2 == 1 && rank == (world_size - 1))) { + if (rank < world_size / 2) { + for (i = 0 ; i < niters ; ++i) { + cache_invalidate(); + + shmem_barrier(0, 0, pe_size, barrier_pSync); + + tmp = timer(); + for (k = 0 ; k < nmsgs ; ++k) { + shmem_putmem(recv_buf + (nbytes * k), + send_buf + (nbytes * k), + nbytes, rank + (world_size / 2)); + } + shmem_quiet(); + total += (timer() - tmp); + } + } else { + for (i = 0 ; i < niters ; ++i) { + cache_invalidate(); + + shmem_barrier(0, 0, pe_size, barrier_pSync); + + tmp = timer(); + shmem_short_wait_until((short*) (recv_buf + (nbytes * (nmsgs - 1))), SHMEM_CMP_NE, 0); + total += (timer() - tmp); + memset(recv_buf, 0, npeers * nmsgs * nbytes); + } + } + + shmem_double_sum_to_all(&tmp, &total, 1, 0, 0, pe_size, reduce_pWrk, reduce_pSync); + display_result("single direction", (niters * nmsgs) / (tmp / world_size)); + } + + shmem_barrier_all(); +} + + +static void +test_same_direction(void) +{ + /* Not implemented yet */ +} + + +static void +test_prepost(void) +{ + int i, j, k; + + tmp = 0; + total = 0; + + shmem_barrier_all(); + + for (i = 0 ; i < niters - 1 ; ++i) { + cache_invalidate(); + + shmem_barrier_all(); + + tmp = timer(); + for (j = 0 ; j < npeers ; ++j) { + for (k = 0 ; k < nmsgs ; ++k) { + shmem_putmem(recv_buf + (nbytes * (k + j * nmsgs)), + send_buf + (nbytes * (k + j * nmsgs)), + nbytes, send_peers[npeers - j - 1]); + } + } + shmem_quiet(); + shmem_short_wait_until((short*) (recv_buf + (nbytes * ((nmsgs - 1) + (npeers - 1) * nmsgs))), SHMEM_CMP_NE, 0); + total += (timer() - tmp); + memset(recv_buf, 0, npeers * nmsgs * nbytes); + } + + shmem_double_sum_to_all(&tmp, &total, 1, 0, 0, world_size, reduce_pWrk, reduce_pSync); + display_result("pre-post", (niters * npeers * nmsgs * 2) / (tmp / world_size)); +} + + +static void +test_allstart(void) +{ + /* BWB: Not implemented */ +} + + +static void +usage(void) +{ + fprintf(stderr, "Usage: msgrate -n [OPTION]...\n\n"); + fprintf(stderr, " -h Display this help message and exit\n"); + fprintf(stderr, " -p Number of peers used in communication\n"); + fprintf(stderr, " -i Number of iterations per test\n"); + fprintf(stderr, " -m Number of messages per peer per iteration\n"); + fprintf(stderr, " -s Number of bytes per message\n"); + fprintf(stderr, " -c Cache size in bytes\n"); + fprintf(stderr, " -n Number of procs per node\n"); + fprintf(stderr, " -o Format output to be machine readable\n"); + fprintf(stderr, "\nReport bugs to \n"); +} + + +int +main(int argc, char *argv[]) +{ + int i; + + shmem_init(); + + rank = shmem_my_pe(); + world_size = shmem_n_pes(); + + /* root handles arguments and bcasts answers */ + if (0 == rank) { + int ch; + while (start_err != 1 && + (ch = getopt(argc, argv, "p:i:m:s:c:n:oh")) != -1) { + switch (ch) { + case 'p': + npeers = atoi(optarg); + break; + case 'i': + niters = atoi(optarg); + break; + case 'm': + nmsgs = atoi(optarg); + break; + case 's': + nbytes = atoi(optarg); + break; + case 'c': + cache_size = atoi(optarg) / sizeof(int); + break; + case 'n': + ppn = atoi(optarg); + break; + case 'o': + machine_output = 1; + break; + case 'h': + case '?': + default: + start_err = 1; + usage(); + } + } + + /* sanity check */ + if (start_err != 1) { +#if 0 + if (world_size < 3) { + fprintf(stderr, "Error: At least three processes are required\n"); + start_err = 1; + } else +#endif + if (world_size <= npeers) { + fprintf(stderr, "Error: job size (%d) <= number of peers (%d)\n", + world_size, npeers); + start_err = 77; + } else if (ppn < 1) { + fprintf(stderr, "Error: must specify process per node (-n #)\n"); + start_err = 77; + } else if (world_size / ppn <= npeers) { + fprintf(stderr, "Error: node count <= number of peers\n"); + start_err = 77; + } + } + } + + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) + bcast_pSync[i] = SHMEM_SYNC_VALUE; + for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) + barrier_pSync[i] = SHMEM_SYNC_VALUE; + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + reduce_pSync[i] = SHMEM_SYNC_VALUE; + for (i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) + reduce_pWrk[i] = SHMEM_SYNC_VALUE; + + shmem_barrier_all(); + + /* broadcast results */ + printf("%d: psync: 0x%lu\n", rank, (unsigned long) bcast_pSync); + shmem_broadcast32(&start_err, &start_err, 1, 0, 0, 0, world_size, bcast_pSync); + if (0 != start_err) { + exit(start_err); + } + shmem_barrier_all(); + shmem_broadcast32(&npeers, &npeers, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_barrier_all(); + shmem_broadcast32(&niters, &niters, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_barrier_all(); + shmem_broadcast32(&nmsgs, &nmsgs, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_barrier_all(); + shmem_broadcast32(&nbytes, &nbytes, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_barrier_all(); + shmem_broadcast32(&cache_size, &cache_size, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_barrier_all(); + shmem_broadcast32(&ppn, &ppn, 1, 0, 0, 0, world_size, bcast_pSync); + shmem_barrier_all(); + if (0 == rank) { + if (!machine_output) { + printf("job size: %d\n", world_size); + printf("npeers: %d\n", npeers); + printf("niters: %d\n", niters); + printf("nmsgs: %d\n", nmsgs); + printf("nbytes: %d\n", nbytes); + printf("cache size: %d\n", cache_size * (int)sizeof(int)); + printf("ppn: %d\n", ppn); + } else { + printf("%d %d %d %d %d %d %d ", + world_size, npeers, niters, nmsgs, nbytes, + cache_size * (int)sizeof(int), ppn); + } + } + + /* allocate buffers */ + send_peers = malloc(sizeof(int) * npeers); + if (NULL == send_peers) abort_app("malloc"); + recv_peers = malloc(sizeof(int) * npeers); + if (NULL == recv_peers) abort_app("malloc"); + cache_buf = malloc(sizeof(int) * cache_size); + if (NULL == cache_buf) abort_app("malloc"); + send_buf = malloc(npeers * nmsgs * nbytes); + if (NULL == send_buf) abort_app("malloc"); + memset(send_buf, 1, npeers * nmsgs * nbytes); + + recv_buf = shmem_malloc(npeers * nmsgs * nbytes); + if (NULL == recv_buf) abort_app("malloc"); + memset(recv_buf, 0, npeers * nmsgs * nbytes); + + /* calculate peers */ + for (i = 0 ; i < npeers ; ++i) { + if (i < npeers / 2) { + send_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size; + } else { + send_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size; + } + } + if (npeers % 2 == 0) { + /* even */ + for (i = 0 ; i < npeers ; ++i) { + if (i < npeers / 2) { + recv_peers[i] = (rank + world_size + ((i - npeers / 2) *ppn)) % world_size; + } else { + recv_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size; + } + } + } else { + /* odd */ + for (i = 0 ; i < npeers ; ++i) { + if (i < npeers / 2 + 1) { + recv_peers[i] = (rank + world_size + ((i - npeers / 2 - 1) * ppn)) % world_size; + } else { + recv_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size; + } + } + } + + /* BWB: FIX ME: trash the free lists / malloc here */ + + /* sync, although tests will do this on their own (in theory) */ + shmem_barrier_all(); + + /* run tests */ + test_one_way(); + test_same_direction(); + test_prepost(); + test_allstart(); + + if (rank == 0 && machine_output) printf("\n"); + + /* done */ + shmem_finalize(); + return 0; +} diff --git a/test/performance/tests/shmemlatency.c b/test/performance/tests/shmemlatency.c new file mode 100644 index 0000000..7eb4fca --- /dev/null +++ b/test/performance/tests/shmemlatency.c @@ -0,0 +1,243 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +** This is a pingpong test used to calculate +** latency and bandwidth for various message +** sizes. +** +** SHMEM version +*/ + +#include +#include +#include +#include +#include +#include + +#define SIZE (10000000) + +#define TRUE (1) +#define FALSE (0) + +void doit(int len, double *latency, double *bandwidth); + +#ifndef HAVE_SHMEMX_WTIME +static double shmemx_wtime(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0; +} +#endif /* HAVE_SHMEMX_WTIME */ + +double aligned_buf[SIZE/sizeof(double)]; + +char *buf; + +int my_node; + +int +main(int argc, char *argv[]) +{ + + extern char *optarg; + int ch, error; + int len, start_len, end_len, increment, inc, trials, i; + int mega; + double latency, bandwidth; + double tot_latency, tot_bandwidth; + double max_latency, max_bandwidth; + double min_latency, min_bandwidth; + + shmem_init(); + + my_node = shmem_my_pe(); + + if ( shmem_n_pes() < 2) { + if (my_node == 0) { + fprintf(stderr, "Need to run on at least two nodes\n"); + } + exit(77); + } + + /* Set the defaults */ + error= FALSE; + start_len= 1; + end_len= 1024; + increment = 16; + trials= 1000; + mega= TRUE; + + /* check command line args */ + while ((ch= getopt(argc, argv, "i:e:s:n:m")) != EOF) { + switch (ch) { + case 'i': + increment= strtol(optarg, (char **)NULL, 0); + break; + case 'e': + end_len= strtol(optarg, (char **)NULL, 0); + break; + case 's': + start_len= strtol(optarg, (char **)NULL, 0); + if ( start_len < 1 ) start_len = 1; + break; + case 'n': + trials= strtol(optarg, (char **)NULL, 0); + break; + case 'm': + mega= FALSE; + break; + default: + error= TRUE; + break; + } + } + + if (error) { + if (my_node == 0) { + fprintf(stderr, "Usage: %s [-s start_length] [-e end_length] [-i inc] [-n trials] [-m (millions)]\n", argv[0]); + } + exit (-1); + } + + + if (my_node == 0) { + printf("\n"); + printf("Results for %d trials each of length %d through %d in increments of %d\n\n", + trials, start_len, end_len, increment); + printf("Length Latency Bandwidth\n"); + printf("in bytes in micro seconds "); + if (mega) { + printf("in mega bytes per second\n"); + } else { + printf("in million bytes per second\n"); + } + printf(" minimum average maximum minimum average maximum\n"); + } + + + for (len= start_len; len <= end_len; len += inc) { + buf= (char *)aligned_buf; + latency= tot_latency= 0.0; + max_latency= 0.0; + min_latency= 1000000000.0; + bandwidth= tot_bandwidth= 0.0; + max_bandwidth= 0.0; + min_bandwidth= 1000000000.0; + + for (i= 0; i < trials; i++) { + + buf[len-1] = (char)my_node; + + shmem_barrier_all(); + + doit(len, &latency, &bandwidth); + tot_latency= tot_latency + latency; + if (latency < min_latency) { + min_latency= latency; + } + if (latency > max_latency) { + max_latency= latency; + } + tot_bandwidth= tot_bandwidth + bandwidth; + if (bandwidth < min_bandwidth) { + min_bandwidth= bandwidth; + } + if (bandwidth > max_bandwidth) { + max_bandwidth= bandwidth; + } + } + + if (my_node == 0) { + printf("%9d %8.2f %8.2f %8.2f ", + len, min_latency, tot_latency / trials, max_latency); + if (mega) { + printf("%8.2f %8.2f %8.2f\n", + min_bandwidth / (1024 * 1024), + (tot_bandwidth / trials) / (1024 * 1024), + max_bandwidth / (1024 * 1024)); + } else { + printf("%8.2f %8.2f %8.2f\n", + min_bandwidth / 1000000.0, + (tot_bandwidth / trials) / 1000000.0, + max_bandwidth / 1000000.0); + } + } + + if ( len == 1 ) { + inc = increment - 1; + } else { + inc = increment; + } + } + + shmem_finalize(); + return 0; +} /* end of main() */ + + +void +doit(int len, double *latency, double *bandwidth) +{ + + double start, end; + + if (my_node == 0) { + + start = shmemx_wtime(); + + shmem_putmem( buf, buf, len, 1 ); + + shmem_long_wait_until( (long *)&buf[len-1], SHMEM_CMP_NE, (long)0 ); + + end = shmemx_wtime(); + + *latency= (end - start) * 1000000.0 / 2.0; + + if ( (end - start) != 0 ) { + *bandwidth= len / (end - start) * 2.0; + } else { + *bandwidth = 0.0; + } + + } else { + + shmem_long_wait_until( (long *)&buf[len-1], SHMEM_CMP_NE, (long)1 ); + + buf[len-1] = (char)1; + + shmem_putmem( buf, buf, len, 0 ); + + *latency = 1.0; + *bandwidth = 10.0; + } +} /* end of doit() */ diff --git a/test/unit/Makefile.am b/test/unit/Makefile.am new file mode 100644 index 0000000..ff52cb9 --- /dev/null +++ b/test/unit/Makefile.am @@ -0,0 +1,245 @@ +# -*- Makefile -*- +# +# Copyright 2011 Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +# retains certain rights in this software. +# +# Copyright (c) 2017 Intel Corporation. All rights reserved. +# This software is available to you under the BSD license. +# +# This file is part of the Sandia OpenSHMEM software package. For license +# information, see the LICENSE file in the top level directory of the +# distribution. + +check_PROGRAMS = \ + hello \ + pi \ + micro_unit_shmem \ + circular_shift \ + accessible_ping \ + max_reduction \ + big_reduction \ + to_all \ + strided_put \ + barrier \ + bcast \ + put1 \ + get1 \ + swap1 \ + ping \ + pingpong \ + shmalloc \ + shmem_calloc \ + shrealloc \ + shmemalign \ + get_g \ + iput32 \ + iput64 \ + iput128 \ + iput_short \ + iput_double \ + iput_float \ + iput_long \ + iput_longdouble \ + iput_longlong \ + sping \ + pingpong-short \ + ipgm \ + iput-iget \ + swapm \ + cswap \ + waituntil \ + atomic_inc \ + set_lock \ + test_lock \ + test_lock_cswap \ + fcollect64 \ + bigput \ + bigget \ + ns \ + bcast_flood \ + lfinc \ + shmem_info \ + global_exit \ + asym_alloc \ + set_fetch \ + alltoall \ + alltoalls \ + c11_test_shmem_g \ + c11_test_shmem_get \ + c11_test_shmem_p \ + c11_test_shmem_put \ + c11_test_shmem_atomic_fetch \ + c11_test_shmem_atomic_set \ + c11_test_shmem_atomic_add \ + c11_test_shmem_atomic_inc \ + c11_test_shmem_atomic_and \ + c11_test_shmem_atomic_or \ + c11_test_shmem_atomic_xor \ + c11_test_shmem_atomic_swap \ + c11_test_shmem_atomic_cswap \ + c11_test_shmem_wait_until \ + c11_test_shmem_test \ + get_nbi \ + put_nbi \ + rma_coverage \ + collect \ + repeated_barriers \ + repeated_syncs \ + broadcast_active_set \ + reduce_active_set \ + collect_active_set \ + atomic_bitwise \ + nop_collectives \ + self_collectives \ + zero_comm \ + sync-size \ + shmem_ctx_pipelined_reduce \ + many-ctx \ + shmem_test + +if ENABLE_PROFILING +check_PROGRAMS += \ + pcontrol \ + rma_coverage_pshmem +endif + +if USE_PORTALS4 +check_PROGRAMS += \ + shmem_ct +endif + +if HAVE_CXX +check_PROGRAMS += \ + cxx_test_shmem_complex \ + cxx_test_shmem_g \ + cxx_test_shmem_get \ + cxx_test_shmem_p \ + cxx_test_shmem_put \ + cxx_test_shmem_atomic_fetch \ + cxx_test_shmem_atomic_set \ + cxx_test_shmem_atomic_add \ + cxx_test_shmem_atomic_inc \ + cxx_test_shmem_atomic_and \ + cxx_test_shmem_atomic_or \ + cxx_test_shmem_atomic_xor \ + cxx_test_shmem_atomic_swap \ + cxx_test_shmem_atomic_cswap \ + cxx_test_shmem_wait_until \ + cxx_test_shmem_test +endif + +if HAVE_FORTRAN +check_PROGRAMS += \ + hello_f \ + shmem_info_f + +if !HAVE_LONG_FORTRAN_HEADER +check_PROGRAMS += \ + complex_reductions_f \ + set_fetch_f +endif +endif + +if HAVE_PTHREADS +check_PROGRAMS += \ + mt_a2a \ + mt_contention \ + mt_membar \ + threading \ + web \ + thread_wait + +if SHMEMX_TESTS +check_PROGRAMS += \ + gettid_register +endif +endif + +if HAVE_OPENMP +check_PROGRAMS += \ + shmem_ctx +endif + +TESTS = $(check_PROGRAMS) + +NPROCS ?= 2 +LOG_COMPILER = $(TEST_RUNNER) + +AM_LDFLAGS = $(LIBTOOL_WRAPPER_LDFLAGS) + +if EXTERNAL_TESTS +bin_PROGRAMS = $(check_PROGRAMS) +AM_CPPFLAGS = +AM_FCFLAGS = +LDADD = +else +AM_CPPFLAGS = -I$(top_builddir)/mpp +AM_FCFLAGS = -I$(top_builddir)/mpp +LDADD = $(top_builddir)/src/libsma.la +endif + +if USE_PMI_SIMPLE +LDADD += $(top_builddir)/pmi-simple/libpmi_simple.la +endif + +# C Tests with special flags +rma_coverage_pshmem_SOURCES = rma_coverage.c +rma_coverage_pshmem_CFLAGS = -DTEST_PSHMEM + +mt_a2a_SOURCES = mt_a2a.c pthread_barrier.h +mt_a2a_LDFLAGS = $(PTHREAD_LIBS) +mt_a2a_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +mt_a2a_LDADD = $(LDADD) $(PTHREAD_CFLAGS) + +mt_contention_LDFLAGS = $(PTHREAD_LIBS) +mt_contention_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +mt_contention_LDADD = $(LDADD) $(PTHREAD_CFLAGS) + +if SHMEMX_TESTS +gettid_register_LDFLAGS = $(AM_LDFLAGS) $(PTHREAD_LIBS) +gettid_register_CFLAGS = $(PTHREAD_CFLAGS) +gettid_register_LDADD = $(LDADD) $(PTHREAD_CFLAGS) +endif + +mt_membar_LDFLAGS = $(AM_LDFLAGS) $(PTHREAD_LIBS) +mt_membar_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +mt_membar_LDADD = $(LDADD) $(PTHREAD_CFLAGS) + +threading_LDFLAGS = $(PTHREAD_LIBS) +threading_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +threading_LDADD = $(LDADD) $(PTHREAD_CFLAGS) + +web_LDFLAGS = $(PTHREAD_LIBS) +web_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +web_LDADD = $(LDADD) $(PTHREAD_CFLAGS) + +thread_wait_LDFLAGS = $(PTHREAD_LIBS) +thread_wait_CFLAGS = -I$(top_srcdir)/test/unit $(PTHREAD_CFLAGS) +thread_wait_LDADD = $(LDADD) $(PTHREAD_CFLAGS) + +shmem_ctx_CFLAGS = $(AM_OPENMP_CFLAGS) + +# Fortran Tests (only .c tests use automatic _SOURCES) +hello_f_SOURCES = hello_f.f90 +complex_reductions_f_SOURCES = complex_reductions_f.f90 +shmem_info_f_SOURCES = shmem_info_f.f90 +set_fetch_f_SOURCES = set_fetch_f.f90 + +# C++ Tests +cxx_test_shmem_complex_SOURCES = cxx_test_shmem_complex.cpp +cxx_test_shmem_g_SOURCES = cxx_test_shmem_g.cpp +cxx_test_shmem_get_SOURCES = cxx_test_shmem_get.cpp +cxx_test_shmem_p_SOURCES = cxx_test_shmem_p.cpp +cxx_test_shmem_put_SOURCES = cxx_test_shmem_put.cpp +cxx_test_shmem_atomic_fetch_SOURCES = cxx_test_shmem_atomic_fetch.cpp +cxx_test_shmem_atomic_set_SOURCES = cxx_test_shmem_atomic_set.cpp +cxx_test_shmem_atomic_add_SOURCES = cxx_test_shmem_atomic_add.cpp +cxx_test_shmem_atomic_inc_SOURCES = cxx_test_shmem_atomic_inc.cpp +cxx_test_shmem_atomic_and_SOURCES = cxx_test_shmem_atomic_and.cpp +cxx_test_shmem_atomic_or_SOURCES = cxx_test_shmem_atomic_or.cpp +cxx_test_shmem_atomic_xor_SOURCES = cxx_test_shmem_atomic_xor.cpp +cxx_test_shmem_atomic_swap_SOURCES = cxx_test_shmem_atomic_swap.cpp +cxx_test_shmem_atomic_cswap_SOURCES = cxx_test_shmem_atomic_cswap.cpp +cxx_test_shmem_wait_until_SOURCES = cxx_test_shmem_wait_until.cpp +cxx_test_shmem_test_SOURCES = cxx_test_shmem_test.cpp diff --git a/test/unit/accessible_ping.c b/test/unit/accessible_ping.c new file mode 100644 index 0000000..687c3dc --- /dev/null +++ b/test/unit/accessible_ping.c @@ -0,0 +1,66 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * test if PE is accessible + * + */ + +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + int me, npes; + setbuf(stdout, NULL); + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + if (me == 0) { + int i; + int verbose = (NULL == getenv("MAKELEVEL")) ? 1 : 0; + for (i = 1; i < npes; i += 1) { + if (verbose) { + printf("From %d: PE %d is ", me, i); + printf("%s", shmem_pe_accessible(i) ? "" : "NOT "); + printf("accessible\n"); + } + if (! shmem_pe_accessible(i)) + shmem_global_exit(1); + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/alltoall.c b/test/unit/alltoall.c new file mode 100644 index 0000000..9ffc7d9 --- /dev/null +++ b/test/unit/alltoall.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; + +static int is_active(int pe, int pe_start, int pe_stride, int pe_size) { + int stride = 1 << pe_stride; + + return pe >= pe_start && pe < pe_start + pe_size * stride && (pe - pe_start) % stride == 0; +} + +/* Tranlate a group PE index to a global PE rank. */ +static int pe_group_to_world(int group_pe, int pe_start, int pe_stride, int pe_size) { + int stride = 1 << pe_stride; + + return group_pe >= pe_size ? -1 : pe_start + group_pe * stride; +} + +static void alltoall_test(int32_t *out, int32_t *in, int pe_start, int pe_stride, + int pe_size) +{ + int me, npes, i; + int failed = 0; + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (me == pe_start) + printf("A2A: pe_start = %d, pe_stride = %d, pe_size = %d\n", pe_start, + pe_stride, pe_size); + + for (i = 0; i < npes; i++) { + in[i] = me; + out[i] = -1; + } + + shmem_barrier_all(); + + if (is_active(me, pe_start, pe_stride, pe_size)) + shmem_alltoall32(out, in, 1, pe_start, pe_stride, pe_size, pSync); + + for (i = 0; i < npes; i++) { + int expected; + + if (is_active(me, pe_start, pe_stride, pe_size)) + expected = pe_group_to_world(i, pe_start, pe_stride, pe_size); + else + expected = -1; + + if (out[i] != expected) { + printf("[%d] out[%d] = %d, expected %d\n", me, i, out[i], expected); + failed = 1; + } + } + + if (failed) + shmem_global_exit(1); +} + + +int main(int argc, char **argv) { + int npes, i; + int32_t *in, *out; + + shmem_init(); + + npes = shmem_n_pes(); + + for (i = 0; i < SHMEM_ALLTOALL_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + + in = shmem_malloc(4 * npes); + out = shmem_malloc(4 * npes); + + /* All PEs */ + alltoall_test(out, in, 0, 0, npes); + /* Only PE 0, stride is invalid (should be ignored) */ + alltoall_test(out, in, 0, 13, 1); + /* Only even PEs */ + alltoall_test(out, in, 0, 1, npes / 2 + npes % 2); + + if (npes > 1) { + /* Remove PE n-1 */ + alltoall_test(out, in, 0, 0, npes-1); + /* Remove PE 0 */ + alltoall_test(out, in, 1, 0, npes-1); + /* Only odd PEs */ + alltoall_test(out, in, 1, 1, npes / 2); + } + + shmem_finalize(); + return 0; +} diff --git a/test/unit/alltoalls.c b/test/unit/alltoalls.c new file mode 100644 index 0000000..aa9862e --- /dev/null +++ b/test/unit/alltoalls.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#define NELEM 16 + +long pSync[SHMEM_ALLTOALLS_SYNC_SIZE]; + +static int is_active(int pe, int pe_start, int pe_stride, int pe_size) { + int stride = 1 << pe_stride; + + return pe >= pe_start && pe < pe_start + pe_size * stride && (pe - pe_start) % stride == 0; +} + +/* Tranlate a group PE index to a global PE rank. */ +static int pe_group_to_world(int group_pe, int pe_start, int pe_stride, int pe_size) { + int stride = 1 << pe_stride; + + return group_pe >= pe_size ? -1 : pe_start + group_pe * stride; +} + +static void alltoalls_test(int32_t *out, int32_t *in, int dst, int sst, int nelem, + int pe_start, int pe_stride, int pe_size) +{ + int me, npes, i, j, k; + int failed = 0; + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (me == pe_start) + printf("A2AS: dst = %2d, sst = %2d, nelem = %2d, " + "pe_start = %2d, pe_stride = %2d, pe_size = %2d\n", + dst, sst, nelem, pe_start, pe_stride, pe_size); + + for (i = 0; i < NELEM*npes; i++) { + in[i] = me; + out[i] = -1; + } + + shmem_barrier_all(); + + if (is_active(me, pe_start, pe_stride, pe_size)) + shmem_alltoalls32(out, in, dst, sst, nelem, + pe_start, pe_stride, pe_size, pSync); + + for (i = 0; i < npes; i++) { + int expected; + + for (j = 0; j < nelem; j++) { + for (k = 0; k < dst; k++) { + int idx = i*dst*nelem + j*dst + k; + if (is_active(me, pe_start, pe_stride, pe_size)) + expected = (k % dst == 0) ? pe_group_to_world(i, pe_start, pe_stride, pe_size) : -1; + else + expected = -1; + + if (out[idx] != expected) { + printf("[%d] out[%d] = %d, expected %d\n", me, idx, out[idx], expected); + failed = 1; + } + } + } + } + + if (failed) + shmem_global_exit(1); +} + + +int main(int argc, char **argv) { + int npes, i; + int32_t *in, *out; + + shmem_init(); + + npes = shmem_n_pes(); + + for (i = 0; i < SHMEM_ALLTOALLS_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + + in = shmem_malloc(4 * NELEM * npes); + out = shmem_malloc(4 * NELEM * npes); + + /* All PEs */ + alltoalls_test(out, in, 1, 1, 1, 0, 0, npes); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 0, npes); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 0, npes); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 0, npes); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 0, npes); /* dst != sst */ + /* Only PE 0, stride is invalid (should be ignored) */ + alltoalls_test(out, in, 1, 1, 1, 0, 13, 1); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 13, 1); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 13, 1); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 13, 1); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 13, 1); /* dst != sst */ + /* Only even PEs */ + alltoalls_test(out, in, 1, 1, 1, 0, 1, npes / 2 + npes % 2); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 1, npes / 2 + npes % 2); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 1, npes / 2 + npes % 2); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 1, npes / 2 + npes % 2); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 1, npes / 2 + npes % 2); /* dst != sst */ + + if (npes > 1) { + /* Remove PE n-1 */ + alltoalls_test(out, in, 1, 1, 1, 0, 0, npes-1); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 0, 0, npes-1); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 0, 0, npes-1); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 0, 0, npes-1); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 0, 0, npes-1); /* dst != sst */ + /* Remove PE 0 */ + alltoalls_test(out, in, 1, 1, 1, 1, 0, npes-1); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 1, 0, npes-1); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 1, 0, npes-1); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 1, 0, npes-1); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 1, 0, npes-1); /* dst != sst */ + /* Only odd PEs */ + alltoalls_test(out, in, 1, 1, 1, 1, 1, npes / 2); /* Same as alltoall */ + alltoalls_test(out, in, 1, 1, 0, 1, 1, npes / 2); /* No op */ + alltoalls_test(out, in, 2, 2, NELEM/2, 1, 1, npes / 2); /* Alternate elements */ + alltoalls_test(out, in, 2, 1, NELEM/2, 1, 1, npes / 2); /* dst != sst */ + alltoalls_test(out, in, 1, 2, NELEM/2 - 1, 1, 1, npes / 2); /* dst != sst */ + } + + shmem_finalize(); + return 0; +} diff --git a/test/unit/asym_alloc.c b/test/unit/asym_alloc.c new file mode 100644 index 0000000..398b700 --- /dev/null +++ b/test/unit/asym_alloc.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Synopsis: Test that a single asymmetric allocation works correctly. + * + * This semantic is provided in OpenSHMEM 1.1 and some versions of Cray SHMEM. + * It was removed from OpenSHMEM in 1.2, but we maintain it for backward + * compatibility. + */ + +#include +#include +#include + +long bufsize, maxbufsize; + +int main(int argc, char **argv) { + int *buf, *buf_in; + int me, npes, i, target; + + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + + /* Each PE allocates space for "me + 1" integers */ + bufsize = me + 1; + buf = shmem_malloc(sizeof(int) * bufsize); + + if (NULL == buf) + shmem_global_exit(1); + + for (i = 0; i < bufsize; i++) + buf[i] = -1; + + shmem_barrier_all(); + + /* Write to neighbor's buffer */ + target = (me + 1) % npes; + buf_in = malloc(sizeof(int) * (target + 1)); + if (!buf_in) { + fprintf(stderr, "ERR - null buf_in pointer\n"); + shmem_global_exit(1); + } + + for (i = 0; i < target + 1; i++) + buf_in[i] = target; + + shmem_int_put(buf, buf_in, target + 1, target); + + shmem_barrier_all(); + + /* Validate data was written correctly */ + for (i = 0; i < me + 1; i++) { + if (buf[i] != me) { + printf("Error [%3d]: buf[%d] == %d, expected %d\n", me, i, buf[i], me); + shmem_global_exit(2); + } + } + + free(buf_in); + shmem_free(buf); + shmem_finalize(); + return 0; +} diff --git a/test/unit/atomic_bitwise.c b/test/unit/atomic_bitwise.c new file mode 100644 index 0000000..b9dd9af --- /dev/null +++ b/test/unit/atomic_bitwise.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2017 Rice University. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Test various bitwise atomics. These tests assume that an unsigned int is at + * least 4 bytes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define XOR_MASK 0xff +#define XOR_MASK2 0xffff +#define OR_MASK 0xff +#define OR_MASK2 0xff0000 +#define AND_MASK 0xffff00 +#define AND_MASK2 0xff0000 + +unsigned int shared_uint = 0; +unsigned long shared_ulong = 0; +unsigned long long shared_ulonglong = 0; +int32_t shared_int32 = 0; +int64_t shared_int64 = 0; +uint32_t shared_uint32 = 0; +uint64_t shared_uint64 = 0; + +int +main(int argc, char* argv[]) +{ + shmem_init(); + int my_rank = shmem_my_pe(); + int num_ranks = shmem_n_pes(); + if (num_ranks == 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + if (num_ranks % 2 != 0) { + fprintf(stderr, "ERR - Requires even number of PEs\n"); + shmem_finalize(); + return 0; + } + if (sizeof(unsigned int) < 4) { + fprintf(stderr, "ERR - Expected ints to be at least 4 bytes\n"); + shmem_finalize(); + return 0; + } + + int neighbor; + if ((my_rank % 2) == 0) { + neighbor = my_rank + 1; + } else { + neighbor = my_rank - 1; + } + + /* + * Test non-fetching XOR. This should result in each shared variable having + * its value set to XOR_MASK. All shared values are initialized to zero. + */ + shmem_uint_atomic_xor(&shared_uint, (unsigned int)XOR_MASK, neighbor); + shmem_ulong_atomic_xor(&shared_ulong, (unsigned long)XOR_MASK, neighbor); + shmem_ulonglong_atomic_xor(&shared_ulonglong, (unsigned long long)XOR_MASK, neighbor); + shmem_int32_atomic_xor(&shared_int32, (int32_t)XOR_MASK, neighbor); + shmem_int64_atomic_xor(&shared_int64, (int64_t)XOR_MASK, neighbor); + shmem_uint32_atomic_xor(&shared_uint32, (uint32_t)XOR_MASK, neighbor); + shmem_uint64_atomic_xor(&shared_uint64, (uint64_t)XOR_MASK, neighbor); + + shmem_barrier_all(); + + assert(shared_uint == XOR_MASK); + assert(shared_ulong == XOR_MASK); + assert(shared_ulonglong == XOR_MASK); + assert(shared_int32 == XOR_MASK); + assert(shared_int64 == XOR_MASK); + assert(shared_uint32 == XOR_MASK); + assert(shared_uint64 == XOR_MASK); + + shmem_barrier_all(); + + /* + * Test fetching XOR. Prior to this block, all shared variables contain the + * value XOR_MASK. Here, we XOR them with XOR_MASK2. + */ + unsigned int fetched_uint = shmem_uint_atomic_fetch_xor(&shared_uint, (unsigned int)XOR_MASK2, neighbor); + unsigned long fetched_ulong = shmem_ulong_atomic_fetch_xor(&shared_ulong, (unsigned long)XOR_MASK2, neighbor); + unsigned long long fetched_ulonglong = shmem_ulonglong_atomic_fetch_xor(&shared_ulonglong, (unsigned long long)XOR_MASK2, neighbor); + int32_t fetched_int32 = shmem_int32_atomic_fetch_xor(&shared_int32, (int32_t)XOR_MASK2, neighbor); + int64_t fetched_int64 = shmem_int64_atomic_fetch_xor(&shared_int64, (int64_t)XOR_MASK2, neighbor); + uint32_t fetched_uint32 = shmem_uint32_atomic_fetch_xor(&shared_uint32, (uint32_t)XOR_MASK2, neighbor); + uint64_t fetched_uint64 = shmem_uint64_atomic_fetch_xor(&shared_uint64, (uint64_t)XOR_MASK2, neighbor); + + shmem_barrier_all(); + + assert(fetched_uint == XOR_MASK); assert(shared_uint == ((unsigned int)XOR_MASK ^ (unsigned int)XOR_MASK2)); + assert(fetched_ulong == XOR_MASK); assert(shared_ulong == ((unsigned long)XOR_MASK ^ (unsigned long)XOR_MASK2)); + assert(fetched_ulonglong == XOR_MASK); assert(shared_ulonglong == ((unsigned long long)XOR_MASK ^ (unsigned long long)XOR_MASK2)); + assert(fetched_int32 == XOR_MASK); assert(shared_int32 == ((int32_t)XOR_MASK ^ (int32_t)XOR_MASK2)); + assert(fetched_int64 == XOR_MASK); assert(shared_int64 == ((int64_t)XOR_MASK ^ (int64_t)XOR_MASK2)); + assert(fetched_uint32 == XOR_MASK); assert(shared_uint32 == ((uint32_t)XOR_MASK ^ (uint32_t)XOR_MASK2)); + assert(fetched_uint64 == XOR_MASK); assert(shared_uint64 == ((uint64_t)XOR_MASK ^ (uint64_t)XOR_MASK2)); + + shmem_barrier_all(); + + /* + * Test non-fetching OR. Prior to this block, all shared variables have the + * value XOR_MASK ^ XOR_MASK2 (i.e. 0xff00). Here, we do a bitwise OR with + * OR_MASK (i.e. 0xff). + */ + shmem_uint_atomic_or(&shared_uint, (unsigned int)OR_MASK, neighbor); + shmem_ulong_atomic_or(&shared_ulong, (unsigned long)OR_MASK, neighbor); + shmem_ulonglong_atomic_or(&shared_ulonglong, (unsigned long long)OR_MASK, neighbor); + shmem_int32_atomic_or(&shared_int32, (int32_t)OR_MASK, neighbor); + shmem_int64_atomic_or(&shared_int64, (int64_t)OR_MASK, neighbor); + shmem_uint32_atomic_or(&shared_uint32, (uint32_t)OR_MASK, neighbor); + shmem_uint64_atomic_or(&shared_uint64, (uint64_t)OR_MASK, neighbor); + + shmem_barrier_all(); + + assert(shared_uint == 0xffff); + assert(shared_ulong == 0xffff); + assert(shared_ulonglong == 0xffff); + assert(shared_int32 == 0xffff); + assert(shared_int64 == 0xffff); + assert(shared_uint32 == 0xffff); + assert(shared_uint64 == 0xffff); + + shmem_barrier_all(); + + /* + * Test fetching OR. Prior to this block, all shared variables have the + * value (XOR_MASK ^ XOR_MASK2) | OR_MASK (i.e. 0xffff). Here we OR with + * OR_MASK2 (i.e. 0xff0000). + */ + fetched_uint = shmem_uint_atomic_fetch_or(&shared_uint, (unsigned int)OR_MASK2, neighbor); + fetched_ulong = shmem_ulong_atomic_fetch_or(&shared_ulong, (unsigned long)OR_MASK2, neighbor); + fetched_ulonglong = shmem_ulonglong_atomic_fetch_or(&shared_ulonglong, (unsigned long long)OR_MASK2, neighbor); + fetched_int32 = shmem_int32_atomic_fetch_or(&shared_int32, (int32_t)OR_MASK2, neighbor); + fetched_int64 = shmem_int64_atomic_fetch_or(&shared_int64, (int64_t)OR_MASK2, neighbor); + fetched_uint32 = shmem_uint32_atomic_fetch_or(&shared_uint32, (uint32_t)OR_MASK2, neighbor); + fetched_uint64 = shmem_uint64_atomic_fetch_or(&shared_uint64, (uint64_t)OR_MASK2, neighbor); + + shmem_barrier_all(); + + assert(fetched_uint == 0xffff); assert(shared_uint == (unsigned int)0xffffff); + assert(fetched_ulong == 0xffff); assert(shared_ulong == (unsigned long)0xffffff); + assert(fetched_ulonglong == 0xffff); assert(shared_ulonglong == (unsigned long long)0xffffff); + assert(fetched_int32 == 0xffff); assert(shared_int32 == (int32_t)0xffffff); + assert(fetched_int64 == 0xffff); assert(shared_int64 == (int64_t)0xffffff); + assert(fetched_uint32 == 0xffff); assert(shared_uint32 == (uint32_t)0xffffff); + assert(fetched_uint64 == 0xffff); assert(shared_uint64 == (uint64_t)0xffffff); + + shmem_barrier_all(); + + /* + * Test non-fetching AND. All shared variables store the value 0xffffff. + * Here, we AND with 0xffff00. + */ + shmem_uint_atomic_and(&shared_uint, (unsigned int)AND_MASK, neighbor); + shmem_ulong_atomic_and(&shared_ulong, (unsigned long)AND_MASK, neighbor); + shmem_ulonglong_atomic_and(&shared_ulonglong, (unsigned long long)AND_MASK, neighbor); + shmem_int32_atomic_and(&shared_int32, (int32_t)AND_MASK, neighbor); + shmem_int64_atomic_and(&shared_int64, (int64_t)AND_MASK, neighbor); + shmem_uint32_atomic_and(&shared_uint32, (uint32_t)AND_MASK, neighbor); + shmem_uint64_atomic_and(&shared_uint64, (uint64_t)AND_MASK, neighbor); + + shmem_barrier_all(); + + assert(shared_uint == AND_MASK); + assert(shared_ulong == AND_MASK); + assert(shared_ulonglong == AND_MASK); + assert(shared_int32 == AND_MASK); + assert(shared_int64 == AND_MASK); + assert(shared_uint32 == AND_MASK); + assert(shared_uint64 == AND_MASK); + + shmem_barrier_all(); + + /* + * Test fetching AND. All shared variables store the value 0xffff00. Here, + * we AND with 0xff0000. + */ + fetched_uint = shmem_uint_atomic_fetch_and(&shared_uint, (unsigned int)AND_MASK2, neighbor); + fetched_ulong = shmem_ulong_atomic_fetch_and(&shared_ulong, (unsigned long)AND_MASK2, neighbor); + fetched_ulonglong = shmem_ulonglong_atomic_fetch_and(&shared_ulonglong, (unsigned long long)AND_MASK2, neighbor); + fetched_int32 = shmem_int32_atomic_fetch_and(&shared_int32, (int32_t)AND_MASK2, neighbor); + fetched_int64 = shmem_int64_atomic_fetch_and(&shared_int64, (int64_t)AND_MASK2, neighbor); + fetched_uint32 = shmem_uint32_atomic_fetch_and(&shared_uint32, (uint32_t)AND_MASK2, neighbor); + fetched_uint64 = shmem_uint64_atomic_fetch_and(&shared_uint64, (uint64_t)AND_MASK2, neighbor); + + shmem_barrier_all(); + + assert(fetched_uint == AND_MASK); assert(shared_uint == (unsigned int)AND_MASK2); + assert(fetched_ulong == AND_MASK); assert(shared_ulong == (unsigned long)AND_MASK2); + assert(fetched_ulonglong == AND_MASK); assert(shared_ulonglong == (unsigned long long)AND_MASK2); + assert(fetched_int32 == AND_MASK); assert(shared_int32 == (int32_t)AND_MASK2); + assert(fetched_int64 == AND_MASK); assert(shared_int64 == (int64_t)AND_MASK2); + assert(fetched_uint32 == AND_MASK); assert(shared_uint32 == (uint32_t)AND_MASK2); + assert(fetched_uint64 == AND_MASK); assert(shared_uint64 == (uint64_t)AND_MASK2); + + shmem_finalize(); + + if (my_rank == 0) { + printf("Passed!\n"); + } + + return 0; +} diff --git a/test/unit/atomic_inc.c b/test/unit/atomic_inc.c new file mode 100644 index 0000000..5fb3926 --- /dev/null +++ b/test/unit/atomic_inc.c @@ -0,0 +1,132 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * test shmem_int_atomic_inc() atomic_inc {-v|q} {loop-cnt(default=10)(default=10)} + * where: -q == quiet, -v == verbose/debug + * Loop for loop-cnt + * all PEs call shmem_int_atomic_inc(), PE-0 totals + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf +#define Vprintf if (Verbose) printf +#define Vfprintf if (Verbose) fprintf + +int Verbose; +int lock_cnt; +long lock; + +int +main(int argc, char* argv[]) +{ + int c, cloop, loops; + int my_rank, num_ranks; + int Announce = (NULL == getenv("MAKELEVEL")) ? 1 : 0; + + shmem_init(); + my_rank = shmem_my_pe(); + num_ranks = shmem_n_pes(); + if (num_ranks == 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + while((c=getopt(argc,argv,"vq")) != -1) { + switch(c) { + case 'v': + Verbose++; + break; + case 'q': + Announce = 0; + break; + default: + Rfprintf(stderr,"ERR - unknown -%c ?\n",c); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = 10; + else { + loops = atoi(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + + for(cloop=1; cloop <= loops; cloop++) + { + lock_cnt = 0; + shmem_barrier_all(); /* sync all ranks */ + + for(c=0; c < num_ranks; c++) + shmem_int_atomic_inc( &lock_cnt, c ); + + Vprintf("[%d] locked: lock_cnt(%d)\n", my_rank, lock_cnt); + + shmem_int_wait_until( &lock_cnt, SHMEM_CMP_GE, num_ranks ); + + shmem_barrier_all(); /* sync all ranks */ + + if (lock_cnt != num_ranks) + printf ("[%d] loop %d: bad lock_cnt %d, expected %d?\n", + my_rank, cloop, lock_cnt, num_ranks); + + if ( (cloop % 10) == 0 ) { + if (my_rank == 0 && Announce) + printf("%d ranks completed %d loops\n", num_ranks, cloop); + } + } + + Vprintf ("[%d] of %d, Exit: lock_cnt %d\n", + my_rank, num_ranks, lock_cnt); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/barrier.c b/test/unit/barrier.c new file mode 100644 index 0000000..aba9e0f --- /dev/null +++ b/test/unit/barrier.c @@ -0,0 +1,110 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * shmem_barrier() test barrier {-V} {loop-cnt} + */ +#include +#include +#include +#include + +#include + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf + +int Verbose; + +int +main(int argc, char* argv[]) +{ + int c, j,loops; + int rank, num_ranks; + char *prog_name; + + shmem_init(); + rank = shmem_my_pe(); + num_ranks = shmem_n_pes(); + if (num_ranks == 1) { + Rfprintf(stderr, + "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + prog_name = strrchr(argv[0],'/'); + if ( prog_name ) + prog_name++; + else + prog_name = argv[0]; + + while((c=getopt(argc,argv,"v")) != -1) { + switch(c) { + case 'V': + Verbose++; + break; + default: + Rfprintf(stderr,"ERR - unknown -%c ?\n",c); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = 30; + else { + loops = atoi(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + + for(j=0; j < loops; j++) { + //if ( j==0 || (j % 10) == 0 ) + RDfprintf(stderr,"[%d] pre-barrier(%d)\n", rank,j); + + shmem_barrier_all(); /* sync sender and receiver */ + + //if ( j==0 || (j % 10) == 0 ) + RDfprintf(stderr,"[%d] post barrier(%d)\n", rank,j); + } + + RDprintf ("%d(%d) Exit\n", rank, num_ranks); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/bcast.c b/test/unit/bcast.c new file mode 100644 index 0000000..a27f686 --- /dev/null +++ b/test/unit/bcast.c @@ -0,0 +1,143 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * broadcast [0...num_pes] + * + * usage: bcast {-v|h} + * + * Loop - shmem_broadcast_all() with increasing data amount. + */ + +#include +#include +#include +#include +#include + +long pSync[SHMEM_BCAST_SYNC_SIZE]; + +#define START_BCAST_SIZE 16 +#define BCAST_INCR 1024 + +int +main(int argc, char* argv[]) +{ + int i, Verbose=0; + int mpe, num_pes, loops=10, cloop; + char *pgm; + long *dst, *src; + int nBytes = START_BCAST_SIZE; + int nLongs=0; + + shmem_init(); + mpe = shmem_my_pe(); + num_pes = shmem_n_pes(); + + if (num_pes == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + if (sizeof(long) != 8) { + printf("Test assumes 64-bit long (%zd)\n", sizeof(long)); + shmem_global_exit(1); + return 0; + } + + if ((pgm=strrchr(argv[0],'/'))) { + pgm++; + } else { + pgm = argv[0]; + } + + if (argc > 1) { + if (strncmp(argv[1],"-v",3) == 0) { + Verbose=1; + } else if (strncmp(argv[1],"-h",3) == 0) { + fprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); + shmem_finalize(); + exit(1); + } + } + + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { + pSync[i] = SHMEM_SYNC_VALUE; + } + + if ( mpe == 0 && Verbose ) { + fprintf(stderr,"%d loops\n",loops); + } + + for(cloop=1; cloop <= loops; cloop++) { + + nLongs = nBytes / sizeof(long); + dst = (long *)shmem_malloc(nBytes*2); + if ( !dst ) { + fprintf(stderr,"[%d] shmem_malloc(%d) failed %s\n", + mpe,nBytes,strerror(errno)); + return 0; + } + memset( (void*)dst, 0, nBytes ); + src = &dst[nLongs]; + for (i = 1; i < nLongs; i++) { + src[i] = i+1; + } + + shmem_barrier_all(); + + shmem_broadcast64(dst, src, nLongs, 1, 0, 0, num_pes, pSync); + + for(i=0; i < nLongs; i++) { + /* the root node shouldn't have the result into dst (cf specification).*/ + if (1 != mpe && dst[i] != src[i]) { + fprintf(stderr,"[%d] dst[%d] %ld != expected %ld\n", + mpe, i, dst[i],src[i]); + shmem_global_exit(1); + } else if (1 == mpe && dst[i] != 0) { + fprintf(stderr,"[%d] dst[%d] %ld != expected 0\n", + mpe, i, dst[i]); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + shmem_free (dst); + if (Verbose && mpe ==0) + fprintf(stderr,"loop %2d Bcast %d, Done.\n",cloop,nBytes); + nBytes += BCAST_INCR; + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/bcast_flood.c b/test/unit/bcast_flood.c new file mode 100644 index 0000000..6331a10 --- /dev/null +++ b/test/unit/bcast_flood.c @@ -0,0 +1,236 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Broadcast flood - bcast 100KB to all, default to using 2 pSync vars */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static int atoi_scaled(char *s); +static void usage(char *pgm); + +#ifndef HAVE_SHMEMX_WTIME +static double shmemx_wtime(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0; +} +#endif /* HAVE_SHMEMX_WTIME */ + +int Verbose=0; +int Serialize; + +long *pSync; + +#define DFLT_LOOPS 600 // downsized for 'make check' +//#define DFLT_LOOPS 10000 +#define N_ELEMENTS 25600 /*100 KB as ints */ + +int +main(int argc, char **argv) +{ + int i,ps,ps_cnt=2; + int *target; + int *source; + int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; + char *pgm; + double start_time, time_taken; + + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if ((pgm=strrchr(argv[0],'/'))) { + pgm++; + } else { + pgm = argv[0]; + } + + while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) { + switch (i) + { + case 'v': + Verbose++; + break; + case 'e': + if ((elements = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad elements count %d\n",elements); + shmem_finalize(); + return 1; + } + break; + case 'l': + if ((loops = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad loop count %d\n",loops); + shmem_finalize(); + return 1; + } + break; + case 'p': + if ((ps_cnt = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops); + shmem_finalize(); + return 1; + } + break; + case 's': + Serialize++; + break; + case 'h': + if (me == 0) + usage(pgm); + return 0; + default: + if (me == 0) { + fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); + usage(pgm); + } + shmem_finalize(); + return 1; + } + } + + ps_cnt *= SHMEM_BCAST_SYNC_SIZE; + pSync = shmem_malloc( ps_cnt * sizeof(long) ); + if (!pSync) { + fprintf(stderr, "ERR - null pSync pointer\n"); + shmem_global_exit(1); + } + + for (i = 0; i < ps_cnt; i++) { + pSync[i] = SHMEM_SYNC_VALUE; + } + + source = (int *) shmem_malloc( elements * sizeof(*source) ); + if (!source) { + fprintf(stderr, "ERR - null source pointer\n"); + shmem_global_exit(1); + } + + target = (int *) shmem_malloc( elements * sizeof(*target) ); + if (!target) { + fprintf(stderr, "ERR - null target pointer\n"); + shmem_global_exit(1); + } + for (i = 0; i < elements; i += 1) { + source[i] = i + 1; + target[i] = -90; + } + + if (me==0 && Verbose) { + fprintf(stderr,"ps_cnt %d loops %d nElems %d\n", + ps_cnt,loops,elements); + } + + shmem_barrier_all(); + + for(time_taken = 0.0, ps = i = 0; i < loops; i++) { + + start_time = shmemx_wtime(); + + shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]); + + if (Serialize) shmem_barrier_all(); + + time_taken += (shmemx_wtime() - start_time); + + if (ps_cnt > 1 ) { + ps += SHMEM_BCAST_SYNC_SIZE; + if ( ps >= ps_cnt ) ps = 0; + } + } + + if(me == 0 && Verbose) { + printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n", + loops, (elements*sizeof(*source)), npes, time_taken); + elements = (elements * loops * sizeof(*source)) / (1024*1024); + printf(" %7.5f secs per broadcast() @ %7.4f MB/sec\n", + (time_taken/(double)loops), ((double)elements / time_taken) ); + } + + if (Verbose > 1) fprintf(stderr,"[%d] pre B1\n",me); + + shmem_barrier_all(); + + if (Verbose > 1) fprintf(stderr,"[%d] post B1\n",me); + + shmem_free(pSync); + shmem_free(target); + shmem_free(source); + + shmem_finalize(); + + return 0; +} + + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} + + +static void +usage(char *pgm) +{ + fprintf(stderr, + "usage: %s -{lhv}\n" + " where:\n" + " -l loops (%d) loop count.\n" + " -e ints # of integers to broadcast\n" + " -p cnt # of pSync[] elements\n" + " -v be verbose, multiple 'v' more verbose\n" + " -h this text.\n", + pgm,DFLT_LOOPS); +} + diff --git a/test/unit/big_reduction.c b/test/unit/big_reduction.c new file mode 100644 index 0000000..51fd5cc --- /dev/null +++ b/test/unit/big_reduction.c @@ -0,0 +1,109 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * reduce across PEs with shmem_max_to_all() + * + * usage: big_reduction {-v|h} +*/ + +#include +#include +#include +#include + +long pSync[SHMEM_REDUCE_SYNC_SIZE]; + +#define N 128 + +long src[N]; +long dst[N]; + +#define MAX(a, b) ((a) > (b)) ? (a) : (b) +#define WRK_SIZE MAX(N/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) + +long pWrk[WRK_SIZE]; + +int +main(int argc, char* argv[]) +{ + int i, Verbose=0; + char *pgm; + + if ((pgm=strrchr(argv[0],'/'))) { + pgm++; + } else { + pgm = argv[0]; + } + + if (argc > 1) { + if (strncmp(argv[1],"-v",3) == 0) { + Verbose=1; + } else if (strncmp(argv[1],"-h",3) == 0) { + fprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); + shmem_finalize(); + exit(1); + } + } + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1) { + pSync[i] = SHMEM_SYNC_VALUE; + } + + shmem_init(); + + for (i = 0; i < N; i += 1) { + src[i] = shmem_my_pe() + i; + } + shmem_barrier_all(); + + shmem_long_max_to_all(dst, src, N, 0, 0, shmem_n_pes(), pWrk, pSync); + + if (Verbose) { + printf("%d/%d\tdst =", shmem_my_pe(), shmem_n_pes() ); + for (i = 0; i < N; i+= 1) { + printf(" %ld", dst[i]); + } + printf("\n"); + } + + for (i = 0; i < N; i+= 1) { + if (dst[i] != shmem_n_pes() - 1 + i) { + printf("[%3d] Error: dst[%d] == %ld, expected %ld\n", + shmem_my_pe(), i, dst[i], shmem_n_pes() - 1 + (long) i); + shmem_global_exit(1); + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/bigget.c b/test/unit/bigget.c new file mode 100644 index 0000000..db8a236 --- /dev/null +++ b/test/unit/bigget.c @@ -0,0 +1,249 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * (big get) each PE gets N elements (1 MB) from (my_pe()+1 mod num_pes()). + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_ELEMENTS 4194304 // 32 MB by longs +//#define DFLT_LOOPS 10000 +#define DFLT_LOOPS 1000 + +int Verbose; +int Sync; +int Track; +int elements = NUM_ELEMENTS; +double *total_time; + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} + +#ifndef HAVE_SHMEMX_WTIME +static double shmemx_wtime(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0; +} +#endif /* HAVE_SHMEMX_WTIME */ + +static void +usage(char *pgm) +{ + fprintf(stderr, + "usage: %s -{hvclst}\n" + " where: (big gets)\n" + " -v be verbose, multiple 'v' more verbose\n" + " -e element-cnt (%d) # of int sized elements to get\n" + " -l loops (%d) loop count.\n" + " -s synchronize: barrier after each shmem_get()\n" + " -t track: output '.' for every 200 shmem_get()s\n", + pgm,NUM_ELEMENTS,DFLT_LOOPS); +} + + +int +main(int argc, char **argv) +{ + int loops=DFLT_LOOPS; + char *pgm; + int *Target; + int *Source; + int i, me, npes; + int target_pe; + long bytes; + double time_taken=0.0, start_time; + + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if ((pgm=strrchr(argv[0],'/'))) + pgm++; + else + pgm = argv[0]; + + while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { + switch (i) + { + case 'v': + Verbose++; + break; + case 'e': + if ((elements = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad elements count %d\n",elements); + shmem_finalize(); + return 1; + } + break; + case 'l': + if ((loops = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad loop count %d\n",loops); + shmem_finalize(); + return 1; + } + break; + case 's': + Sync++; + break; + case 't': + Track++; + break; + case 'h': + if (me == 0) + usage(pgm); + return 0; + default: + if (me == 0) { + fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); + usage(pgm); + } + shmem_finalize(); + return 1; + } + } + + target_pe = (me+1) % npes; + + total_time = (double *) shmem_malloc( npes * sizeof(double) ); + if (!total_time) { + fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", + (elements * sizeof(double))); + shmem_global_exit(1); + } + + Source = (int *) shmem_malloc( elements * sizeof(*Source) ); + if (!Source) { + fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", + (elements * sizeof(*Target))); + shmem_free(total_time); + shmem_global_exit(1); + } + + Target = (int *) shmem_malloc( elements * sizeof(*Target) ); + if (!Target) { + fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", + (elements * sizeof(*Target))); + shmem_free(Source); + shmem_free(total_time); + shmem_global_exit(1); + } + + for (i = 0; i < elements; i++) { + Target[i] = -90; + Source[i] = i + 1; + } + + bytes = loops * sizeof(int) * elements; + + if (Verbose && me==0) + fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n", + pgm, loops, elements); + + shmem_barrier_all(); + + for(i=0; i < loops; i++) { + + start_time = shmemx_wtime(); + + shmem_int_get( Target, Source, elements, target_pe ); + + time_taken += shmemx_wtime() - start_time; + + if (me==0) { + if ( Track && i > 0 && ((i % 200) == 0)) + fprintf(stderr,".%d",i); + } + if (Sync) + shmem_barrier_all(); + } + + // collect time per node elapsed time. + shmem_double_put( &total_time[me], &time_taken, 1, 0 ); + + shmem_barrier_all(); + + for (i = 0; i < elements; i++) { + if (Target[i] != i + 1) { + printf("%d: Error Target[%d] = %d, expected %d\n", + me, i, Target[i], i + 1); + shmem_global_exit(1); + } + } + + if ( Track && me == 0 ) + fprintf(stderr,"\n"); + + if (Verbose && me == 0) { + double rate,secs; + + // average time + for(i=0,secs=0.0; i < npes; i++) + secs += total_time[i]; + secs /= (double)npes; + rate = ((double)bytes/(1024.0*1024.0)) / secs; + printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n", + pgm, rate, bytes, secs); + } + + shmem_free(total_time); + shmem_free(Target); + shmem_free(Source); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/bigput.c b/test/unit/bigput.c new file mode 100644 index 0000000..ce37249 --- /dev/null +++ b/test/unit/bigput.c @@ -0,0 +1,268 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * (big puts) each PE puts N elements (1MB) to ((my_pe()+1) mod num_pes()). + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_ELEMENTS 4194304 // 32 MB by longs +//#define DFLT_LOOPS 10000 // reset when Portals4 can achieve this. +#define DFLT_LOOPS 100 + +int Verbose; +int Sync; +int Track; +int elements = NUM_ELEMENTS; +double sum_time, time_taken; + +double pWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; +long pSync[SHMEM_REDUCE_SYNC_SIZE]; + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} + +static void +usage(char *pgm) +{ + fprintf(stderr, + "usage: %s -{hvclst}\n" + " where: (big puts)\n" + " -v be verbose, multiple 'v' more verbose\n" + " -e element-cnt (%d) # of int sized elements to put\n" + " -l loops (%d) loop count.\n" + " -s synchronize: barrier after each shmem_put()\n" + " -t track: output '.' for every 200 shmem_put()s\n", + pgm,NUM_ELEMENTS,DFLT_LOOPS); +} + +#if !defined(HAVE_SHMEMX_WTIME) +static inline double shmemx_wtime(void) +{ + struct timeval tv; + gettimeofday(&tv, 0); + return (double)((tv.tv_usec / 1000000.0) + tv.tv_sec); +} +#endif + +int +main(int argc, char **argv) +{ + int loops=DFLT_LOOPS; + char *pgm; + int *Target; + int *Source; + int i, me, npes; + int target_PE; + long bytes; + double start_time, *total_time; + + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if ((pgm=strrchr(argv[0],'/'))) + pgm++; + else + pgm = argv[0]; + + while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { + switch (i) + { + case 'v': + Verbose++; + break; + case 'e': + if ((elements = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad elements count %d\n",elements); + shmem_finalize(); + return 1; + } + break; + case 'l': + if ((loops = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad loop count %d\n",loops); + shmem_finalize(); + return 1; + } + break; + case 's': + Sync++; + break; + case 't': + Track++; + break; + case 'h': + if (me == 0) + usage(pgm); + return 0; + default: + if (me == 0) { + fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); + usage(pgm); + } + shmem_finalize(); + return 1; + } + } + + for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + + target_PE = (me+1) % npes; + + total_time = (double *) shmem_malloc( npes * sizeof(double) ); + if (!total_time) { + fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", + (elements * sizeof(double))); + shmem_global_exit(1); + } + for(i=0; i < npes; i++) + total_time[i] = -1.0; + + Source = (int *) shmem_malloc( elements * sizeof(*Source) ); + if (!Source) { + fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", + (elements * sizeof(*Target))); + shmem_free(total_time); + shmem_global_exit(1); + } + + Target = (int *) shmem_malloc( elements * sizeof(*Target) ); + if (!Target) { + fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", + (elements * sizeof(*Target))); + shmem_free(Source); + shmem_free(total_time); + shmem_global_exit(1); + } + + for (i = 0; i < elements; i++) { + Target[i] = -90; + Source[i] = i + 1; + } + + bytes = loops * sizeof(int) * elements; + + if (Verbose && me==0) { + fprintf(stderr, + "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n", + pgm, loops, elements); + } + shmem_barrier_all(); + + for(i=0; i < loops; i++) { + + start_time = shmemx_wtime(); + + shmem_int_put(Target, Source, elements, target_PE); + + time_taken += (shmemx_wtime() - start_time); + + if (me==0) { + if ( Track && i > 0 && ((i % 200) == 0)) + fprintf(stderr,".%d",i); + } + if (Sync) + shmem_barrier_all(); + } + + // collect time per node. + shmem_double_put( &total_time[me], &time_taken, 1, 0 ); + shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync); + + shmem_barrier_all(); + + for (i = 0; i < elements; i++) { + if (Target[i] != i + 1) { + printf("%d: Error Target[%d] = %d, expected %d\n", + me, i, Target[i], i + 1); + shmem_global_exit(1); + } + } + + if ( Track && me == 0 ) fprintf(stderr,"\n"); + + if(Verbose && me == 0) { + double rate, comp_time; + + if (Verbose > 1) + fprintf(stdout,"Individule PE times: (seconds)\n"); + for(i=0,comp_time=0.0; i < npes; i++) { + comp_time += total_time[i]; + if (Verbose > 1) + fprintf(stdout," PE[%d] %8.6f\n",i,total_time[i]); + } + + sum_time /= (double)npes; + comp_time /= (double)npes; + if (sum_time != comp_time) + printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n", + pgm, comp_time, sum_time ); + + rate = ((double)bytes/(1024.0*1024.0)) / comp_time; + printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n", + pgm, rate, bytes, sum_time); + } + + shmem_free(total_time); + shmem_free(Target); + shmem_free(Source); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/broadcast_active_set.c b/test/unit/broadcast_active_set.c new file mode 100644 index 0000000..3365367 --- /dev/null +++ b/test/unit/broadcast_active_set.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#define NELEM 10 + +long bcast_psync[SHMEM_BCAST_SYNC_SIZE]; + +/* Note: Need to alternate psync arrays because the active set changes */ +long barrier_psync0[SHMEM_BARRIER_SYNC_SIZE]; +long barrier_psync1[SHMEM_BARRIER_SYNC_SIZE]; + +int64_t src[NELEM]; +int64_t dst[NELEM]; + +int main(void) +{ + int i, me, npes; + int errors = 0; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < NELEM; i++) { + src[i] = me; + dst[i] = -1; + } + + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) + bcast_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) { + barrier_psync0[i] = SHMEM_SYNC_VALUE; + barrier_psync1[i] = SHMEM_SYNC_VALUE; + } + + if (me == 0) + printf("Shrinking active set test\n"); + + shmem_barrier_all(); + + /* A total of npes tests are performed, where the active set in each test + * includes PEs i..npes-1 */ + for (i = 0; i <= me; i++) { + int j; + + if (me == i) + printf(" + active set size %d\n", npes-i); + + shmem_broadcast64(dst, src, NELEM, 0, i, 0, npes-i, bcast_psync); + + /* Validate broadcasted data */ + for (j = 0; j < NELEM; j++) { + int64_t expected = (me == i) ? i-1 : i; + if (dst[j] != expected) { + printf("%d: Expected dst[%d] = %"PRId64", got dst[%d] = %"PRId64", iteration %d\n", + me, j, expected, j, dst[j], i); + errors++; + } + } + + shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + } + + shmem_barrier_all(); + + for (i = 0; i < NELEM; i++) + dst[i] = -1; + + if (me == 0) + printf("Changing root test\n"); + + shmem_barrier_all(); + + /* A total of npes tests are performed, where the root changes each time */ + for (i = 0; i < npes; i++) { + int j; + + if (me == i) + printf(" + root %d\n", i); + + shmem_broadcast64(dst, src, NELEM, i, 0, 0, npes, bcast_psync); + + /* Validate broadcasted data */ + for (j = 0; j < NELEM; j++) { + int64_t expected = (me == i) ? i-1 : i; + if (dst[j] != expected) { + printf("%d: Expected dst[%d] = %"PRId64", got dst[%d] = %"PRId64", iteration %d\n", + me, j, expected, j, dst[j], i); + errors++; + } + } + + shmem_barrier(0, 0, npes, barrier_psync0); + } + shmem_finalize(); + + return errors != 0; +} diff --git a/test/unit/c11_test_shmem_atomic_add.c b/test/unit/c11_test_shmem_atomic_add.c new file mode 100644 index 0000000..90344a4 --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_add.c @@ -0,0 +1,199 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { ADD = 0, ATOMIC_ADD, CTX_ATOMIC_ADD, FADD, ATOMIC_FETCH_ADD, + CTX_ATOMIC_FETCH_ADD }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_ADD shmem_add +#define DEPRECATED_FADD shmem_fadd +#else +#define DEPRECATED_ADD shmem_atomic_add +#define DEPRECATED_FADD shmem_atomic_fetch_add +#endif + +#define TEST_SHMEM_ADD(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)0; \ + shmem_barrier_all(); \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case ADD: \ + DEPRECATED_ADD(&remote, (TYPE)(mype + 1), i); \ + break; \ + case ATOMIC_ADD: \ + shmem_atomic_add(&remote, (TYPE)(mype + 1), i); \ + break; \ + case CTX_ATOMIC_ADD: \ + shmem_atomic_add(SHMEM_CTX_DEFAULT, &remote, (TYPE)(mype + 1), i); \ + break; \ + case FADD: \ + old = DEPRECATED_FADD(&remote, (TYPE)(mype + 1), i); \ + if (old > (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case ATOMIC_FETCH_ADD: \ + old = shmem_atomic_fetch_add(&remote, (TYPE)(mype + 1), i); \ + if (old > (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_ATOMIC_FETCH_ADD: \ + old = shmem_atomic_fetch_add(SHMEM_CTX_DEFAULT, &remote, (TYPE)(mype + 1), i); \ + if (old > (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i observed error with TEST_SHMEM_ADD(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_ADD(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_ADD(ADD, int); + TEST_SHMEM_ADD(ADD, long); + TEST_SHMEM_ADD(ADD, long long); + TEST_SHMEM_ADD(ADD, unsigned int); + TEST_SHMEM_ADD(ADD, unsigned long); + TEST_SHMEM_ADD(ADD, unsigned long long); + TEST_SHMEM_ADD(ADD, int32_t); + TEST_SHMEM_ADD(ADD, int64_t); + TEST_SHMEM_ADD(ADD, uint32_t); + TEST_SHMEM_ADD(ADD, uint64_t); + TEST_SHMEM_ADD(ADD, size_t); + TEST_SHMEM_ADD(ADD, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_ADD(ATOMIC_ADD, int); + TEST_SHMEM_ADD(ATOMIC_ADD, long); + TEST_SHMEM_ADD(ATOMIC_ADD, long long); + TEST_SHMEM_ADD(ATOMIC_ADD, unsigned int); + TEST_SHMEM_ADD(ATOMIC_ADD, unsigned long); + TEST_SHMEM_ADD(ATOMIC_ADD, unsigned long long); + TEST_SHMEM_ADD(ATOMIC_ADD, int32_t); + TEST_SHMEM_ADD(ATOMIC_ADD, int64_t); + TEST_SHMEM_ADD(ATOMIC_ADD, uint32_t); + TEST_SHMEM_ADD(ATOMIC_ADD, uint64_t); + TEST_SHMEM_ADD(ATOMIC_ADD, size_t); + TEST_SHMEM_ADD(ATOMIC_ADD, ptrdiff_t); + + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, int); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, long long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, unsigned int); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, unsigned long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, unsigned long long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, int32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, int64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, uint32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, uint64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, size_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, ptrdiff_t); + + TEST_SHMEM_ADD(FADD, int); + TEST_SHMEM_ADD(FADD, long); + TEST_SHMEM_ADD(FADD, long long); + TEST_SHMEM_ADD(FADD, unsigned int); + TEST_SHMEM_ADD(FADD, unsigned long); + TEST_SHMEM_ADD(FADD, unsigned long long); + TEST_SHMEM_ADD(FADD, int32_t); + TEST_SHMEM_ADD(FADD, int64_t); + TEST_SHMEM_ADD(FADD, uint32_t); + TEST_SHMEM_ADD(FADD, uint64_t); + TEST_SHMEM_ADD(FADD, size_t); + TEST_SHMEM_ADD(FADD, ptrdiff_t); + + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, long long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, unsigned int); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, unsigned long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, unsigned long long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int32_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int64_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, uint32_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, uint64_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, size_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, ptrdiff_t); + + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, int); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, long long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, unsigned int); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, unsigned long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, unsigned long long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, int32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, int64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, uint32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, uint64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, size_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_and.c b/test/unit/c11_test_shmem_atomic_and.c new file mode 100644 index 0000000..b8e9d3f --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_and.c @@ -0,0 +1,134 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { AND = 0, CTX_AND, FETCH_AND, CTX_FETCH_AND }; + +/* Initially, remote = 111...b. Each PE performs an atomic AND where the + * PEth bit of the input value is set to 0 and all other bits are set to 1. + * The result has the NPES least significant bits cleared, 111...000...b. + */ + +#define TEST_SHMEM_AND(OP, TYPE) \ + do { \ + static TYPE remote = ~(TYPE)0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE old = (TYPE)0; \ + if (npes-1 > sizeof(TYPE)) break; /* Avoid overflow */ \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case AND: \ + shmem_atomic_and(&remote, ~(TYPE)(1LLU << mype), i); \ + break; \ + case CTX_AND: \ + shmem_atomic_and(SHMEM_CTX_DEFAULT, &remote, ~(TYPE)(1LLU << mype), i); \ + break; \ + case FETCH_AND: \ + old = shmem_atomic_fetch_and(&remote, ~(TYPE)(1LLU << mype), i); \ + if ((old & (TYPE)(1LLU << mype)) == 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_FETCH_AND: \ + old = shmem_atomic_fetch_and(SHMEM_CTX_DEFAULT, &remote, ~(TYPE)(1LLU << mype), i); \ + if ((old & (TYPE)(1LLU << mype)) == 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != ~(TYPE)((1LLU << npes) - 1LLU)) { \ + printf("PE %i observed error with TEST_SHMEM_AND(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_AND(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_AND(AND, unsigned int); + TEST_SHMEM_AND(AND, unsigned long); + TEST_SHMEM_AND(AND, unsigned long long); + TEST_SHMEM_AND(AND, int32_t); + TEST_SHMEM_AND(AND, int64_t); + TEST_SHMEM_AND(AND, uint32_t); + TEST_SHMEM_AND(AND, uint64_t); + + TEST_SHMEM_AND(CTX_AND, unsigned int); + TEST_SHMEM_AND(CTX_AND, unsigned long); + TEST_SHMEM_AND(CTX_AND, unsigned long long); + TEST_SHMEM_AND(CTX_AND, int32_t); + TEST_SHMEM_AND(CTX_AND, int64_t); + TEST_SHMEM_AND(CTX_AND, uint32_t); + TEST_SHMEM_AND(CTX_AND, uint64_t); + + TEST_SHMEM_AND(FETCH_AND, unsigned int); + TEST_SHMEM_AND(FETCH_AND, unsigned long); + TEST_SHMEM_AND(FETCH_AND, unsigned long long); + TEST_SHMEM_AND(FETCH_AND, int32_t); + TEST_SHMEM_AND(FETCH_AND, int64_t); + TEST_SHMEM_AND(FETCH_AND, uint32_t); + TEST_SHMEM_AND(FETCH_AND, uint64_t); + + TEST_SHMEM_AND(CTX_FETCH_AND, unsigned int); + TEST_SHMEM_AND(CTX_FETCH_AND, unsigned long); + TEST_SHMEM_AND(CTX_FETCH_AND, unsigned long long); + TEST_SHMEM_AND(CTX_FETCH_AND, int32_t); + TEST_SHMEM_AND(CTX_FETCH_AND, int64_t); + TEST_SHMEM_AND(CTX_FETCH_AND, uint32_t); + TEST_SHMEM_AND(CTX_FETCH_AND, uint64_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_cswap.c b/test/unit/c11_test_shmem_atomic_cswap.c new file mode 100644 index 0000000..ff8f811 --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_cswap.c @@ -0,0 +1,141 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { CSWAP = 0, ATOMIC_COMPARE_SWAP, CTX_ATOMIC_COMPARE_SWAP }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_CSWAP shmem_cswap +#else +#define DEPRECATED_CSWAP shmem_atomic_compare_swap +#endif + +#define TEST_SHMEM_CSWAP(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = npes; \ + shmem_barrier_all(); \ + switch (OP) { \ + case CSWAP: \ + old = DEPRECATED_CSWAP(&remote, (TYPE)npes, (TYPE)mype, \ + (mype + 1) % npes); \ + break; \ + case ATOMIC_COMPARE_SWAP: \ + old = shmem_atomic_compare_swap(&remote, (TYPE)npes, \ + (TYPE)mype, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_COMPARE_SWAP: \ + old = shmem_atomic_compare_swap(SHMEM_CTX_DEFAULT, &remote, \ + (TYPE)npes, (TYPE)mype, \ + (mype + 1) % npes); \ + break; \ + default: \ + printf("invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i observed error with TEST_SHMEM_CSWAP(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + if (old != npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_CSWAP(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_CSWAP(CSWAP, int); + TEST_SHMEM_CSWAP(CSWAP, long); + TEST_SHMEM_CSWAP(CSWAP, long long); + TEST_SHMEM_CSWAP(CSWAP, unsigned int); + TEST_SHMEM_CSWAP(CSWAP, unsigned long); + TEST_SHMEM_CSWAP(CSWAP, unsigned long long); + TEST_SHMEM_CSWAP(CSWAP, int32_t); + TEST_SHMEM_CSWAP(CSWAP, int64_t); + TEST_SHMEM_CSWAP(CSWAP, uint32_t); + TEST_SHMEM_CSWAP(CSWAP, uint64_t); + TEST_SHMEM_CSWAP(CSWAP, size_t); + TEST_SHMEM_CSWAP(CSWAP, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, int); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, long long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, unsigned int); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, unsigned long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, unsigned long long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, int32_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, int64_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, uint32_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, uint64_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, size_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, ptrdiff_t); + + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, int); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, long long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, unsigned int); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, unsigned long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, unsigned long long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, int32_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, int64_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, uint32_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, uint64_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, size_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_fetch.c b/test/unit/c11_test_shmem_atomic_fetch.c new file mode 100644 index 0000000..9f8e7e8 --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_fetch.c @@ -0,0 +1,137 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { FETCH = 0, ATOMIC_FETCH, CTX_ATOMIC_FETCH }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_FETCH shmem_fetch +#else +#define DEPRECATED_FETCH shmem_atomic_fetch +#endif + +#define TEST_SHMEM_FETCH(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE val; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)mype; \ + shmem_barrier_all(); \ + switch (OP) { \ + case FETCH: \ + val = DEPRECATED_FETCH(&remote, (mype + 1) % npes); \ + break; \ + case ATOMIC_FETCH: \ + val = shmem_atomic_fetch(&remote, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_FETCH: \ + val = shmem_atomic_fetch(SHMEM_CTX_DEFAULT, &remote, (mype + 1) % npes); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + if (val != (TYPE)((mype + 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_FETCH(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_FETCH(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_FETCH(FETCH, float); + TEST_SHMEM_FETCH(FETCH, double); + TEST_SHMEM_FETCH(FETCH, int); + TEST_SHMEM_FETCH(FETCH, long); + TEST_SHMEM_FETCH(FETCH, long long); + TEST_SHMEM_FETCH(FETCH, unsigned int); + TEST_SHMEM_FETCH(FETCH, unsigned long); + TEST_SHMEM_FETCH(FETCH, unsigned long long); + TEST_SHMEM_FETCH(FETCH, int32_t); + TEST_SHMEM_FETCH(FETCH, int64_t); + TEST_SHMEM_FETCH(FETCH, uint32_t); + TEST_SHMEM_FETCH(FETCH, uint64_t); + TEST_SHMEM_FETCH(FETCH, size_t); + TEST_SHMEM_FETCH(FETCH, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_FETCH(ATOMIC_FETCH, float); + TEST_SHMEM_FETCH(ATOMIC_FETCH, double); + TEST_SHMEM_FETCH(ATOMIC_FETCH, int); + TEST_SHMEM_FETCH(ATOMIC_FETCH, long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, long long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, unsigned int); + TEST_SHMEM_FETCH(ATOMIC_FETCH, unsigned long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, unsigned long long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, int32_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, int64_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, uint32_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, uint64_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, size_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, ptrdiff_t); + + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, float); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, double); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, int); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, long long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, unsigned int); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, unsigned long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, unsigned long long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, int32_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, int64_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, uint32_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, uint64_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, size_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_inc.c b/test/unit/c11_test_shmem_atomic_inc.c new file mode 100644 index 0000000..d3bf945 --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_inc.c @@ -0,0 +1,199 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { INC = 0, ATOMIC_INC, CTX_ATOMIC_INC, FINC, ATOMIC_FETCH_INC, + CTX_ATOMIC_FETCH_INC }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_INC shmem_inc +#define DEPRECATED_FINC shmem_finc +#else +#define DEPRECATED_INC shmem_atomic_inc +#define DEPRECATED_FINC shmem_atomic_fetch_inc +#endif + +#define TEST_SHMEM_INC(OP, TYPE) \ + do { \ + static TYPE remote = (TYPE)0; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)0; \ + shmem_barrier_all(); \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case INC: \ + DEPRECATED_INC(&remote, i); \ + break; \ + case ATOMIC_INC: \ + shmem_atomic_inc(&remote, i); \ + break; \ + case CTX_ATOMIC_INC: \ + shmem_atomic_inc(SHMEM_CTX_DEFAULT, &remote, i); \ + break; \ + case FINC: \ + old = DEPRECATED_FINC(&remote, i); \ + if (old > npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case ATOMIC_FETCH_INC: \ + old = shmem_atomic_fetch_inc(&remote, i); \ + if (old > npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_ATOMIC_FETCH_INC: \ + old = shmem_atomic_fetch_inc(SHMEM_CTX_DEFAULT, &remote, i); \ + if (old > npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)npes) { \ + printf("PE %i observed error with TEST_SHMEM_INC(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_INC(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_INC(INC, int); + TEST_SHMEM_INC(INC, long); + TEST_SHMEM_INC(INC, long long); + TEST_SHMEM_INC(INC, unsigned int); + TEST_SHMEM_INC(INC, unsigned long); + TEST_SHMEM_INC(INC, unsigned long long); + TEST_SHMEM_INC(INC, int32_t); + TEST_SHMEM_INC(INC, int64_t); + TEST_SHMEM_INC(INC, uint32_t); + TEST_SHMEM_INC(INC, uint64_t); + TEST_SHMEM_INC(INC, size_t); + TEST_SHMEM_INC(INC, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_INC(ATOMIC_INC, int); + TEST_SHMEM_INC(ATOMIC_INC, long); + TEST_SHMEM_INC(ATOMIC_INC, long long); + TEST_SHMEM_INC(ATOMIC_INC, unsigned int); + TEST_SHMEM_INC(ATOMIC_INC, unsigned long); + TEST_SHMEM_INC(ATOMIC_INC, unsigned long long); + TEST_SHMEM_INC(ATOMIC_INC, int32_t); + TEST_SHMEM_INC(ATOMIC_INC, int64_t); + TEST_SHMEM_INC(ATOMIC_INC, uint32_t); + TEST_SHMEM_INC(ATOMIC_INC, uint64_t); + TEST_SHMEM_INC(ATOMIC_INC, size_t); + TEST_SHMEM_INC(ATOMIC_INC, ptrdiff_t); + + TEST_SHMEM_INC(CTX_ATOMIC_INC, int); + TEST_SHMEM_INC(CTX_ATOMIC_INC, long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, long long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, unsigned int); + TEST_SHMEM_INC(CTX_ATOMIC_INC, unsigned long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, unsigned long long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, int32_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, int64_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, uint32_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, uint64_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, size_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, ptrdiff_t); + + TEST_SHMEM_INC(FINC, int); + TEST_SHMEM_INC(FINC, long); + TEST_SHMEM_INC(FINC, long long); + TEST_SHMEM_INC(FINC, unsigned int); + TEST_SHMEM_INC(FINC, unsigned long); + TEST_SHMEM_INC(FINC, unsigned long long); + TEST_SHMEM_INC(FINC, int32_t); + TEST_SHMEM_INC(FINC, int64_t); + TEST_SHMEM_INC(FINC, uint32_t); + TEST_SHMEM_INC(FINC, uint64_t); + TEST_SHMEM_INC(FINC, size_t); + TEST_SHMEM_INC(FINC, ptrdiff_t); + + TEST_SHMEM_INC(ATOMIC_FETCH_INC, int); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, long long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, unsigned int); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, unsigned long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, unsigned long long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, int32_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, int64_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, uint32_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, uint64_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, size_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, ptrdiff_t); + + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, int); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, long long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, unsigned int); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, unsigned long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, unsigned long long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, int32_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, int64_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, uint32_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, uint64_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, size_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_or.c b/test/unit/c11_test_shmem_atomic_or.c new file mode 100644 index 0000000..2215289 --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_or.c @@ -0,0 +1,134 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { OR = 0, CTX_OR, FETCH_OR, CTX_FETCH_OR }; + +/* Initially, remote = 000...b. Each PE performs an atomic OR where the + * PEth bit of the input value is set to 1 and all other bits are set to 0. + * The result has the NPES least significant bits set, 000...111...b. + */ + +#define TEST_SHMEM_OR(OP, TYPE) \ + do { \ + static TYPE remote = (TYPE)0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE old = (TYPE)0; \ + if (npes-1 > sizeof(TYPE)) break; /* Avoid overflow */ \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case OR: \ + shmem_atomic_or(&remote, (TYPE)(1LLU << mype), i); \ + break; \ + case CTX_OR: \ + shmem_atomic_or(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + break; \ + case FETCH_OR: \ + old = shmem_atomic_fetch_or(&remote, (TYPE)(1LLU << mype), i);\ + if ((old & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_FETCH_OR: \ + old = shmem_atomic_fetch_or(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + if ((old & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((1LLU << npes) - 1LLU)) { \ + printf("PE %i observed error with TEST_SHMEM_OR(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_OR(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_OR(OR, unsigned int); + TEST_SHMEM_OR(OR, unsigned long); + TEST_SHMEM_OR(OR, unsigned long long); + TEST_SHMEM_OR(OR, int32_t); + TEST_SHMEM_OR(OR, int64_t); + TEST_SHMEM_OR(OR, uint32_t); + TEST_SHMEM_OR(OR, uint64_t); + + TEST_SHMEM_OR(CTX_OR, unsigned int); + TEST_SHMEM_OR(CTX_OR, unsigned long); + TEST_SHMEM_OR(CTX_OR, unsigned long long); + TEST_SHMEM_OR(CTX_OR, int32_t); + TEST_SHMEM_OR(CTX_OR, int64_t); + TEST_SHMEM_OR(CTX_OR, uint32_t); + TEST_SHMEM_OR(CTX_OR, uint64_t); + + TEST_SHMEM_OR(FETCH_OR, unsigned int); + TEST_SHMEM_OR(FETCH_OR, unsigned long); + TEST_SHMEM_OR(FETCH_OR, unsigned long long); + TEST_SHMEM_OR(FETCH_OR, int32_t); + TEST_SHMEM_OR(FETCH_OR, int64_t); + TEST_SHMEM_OR(FETCH_OR, uint32_t); + TEST_SHMEM_OR(FETCH_OR, uint64_t); + + TEST_SHMEM_OR(CTX_FETCH_OR, unsigned int); + TEST_SHMEM_OR(CTX_FETCH_OR, unsigned long); + TEST_SHMEM_OR(CTX_FETCH_OR, unsigned long long); + TEST_SHMEM_OR(CTX_FETCH_OR, int32_t); + TEST_SHMEM_OR(CTX_FETCH_OR, int64_t); + TEST_SHMEM_OR(CTX_FETCH_OR, uint32_t); + TEST_SHMEM_OR(CTX_FETCH_OR, uint64_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_set.c b/test/unit/c11_test_shmem_atomic_set.c new file mode 100644 index 0000000..453f35b --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_set.c @@ -0,0 +1,135 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { SET = 0, ATOMIC_SET, CTX_ATOMIC_SET }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_SET shmem_set +#else +#define DEPRECATED_SET shmem_atomic_set +#endif + +#define TEST_SHMEM_SET(OP, TYPE) \ + do { \ + static TYPE remote; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + switch (OP) { \ + case SET: \ + DEPRECATED_SET(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case ATOMIC_SET: \ + shmem_atomic_set(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_SET: \ + shmem_atomic_set(SHMEM_CTX_DEFAULT, &remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_SET(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_SET(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_SET(SET, float); + TEST_SHMEM_SET(SET, double); + TEST_SHMEM_SET(SET, int); + TEST_SHMEM_SET(SET, long); + TEST_SHMEM_SET(SET, long long); + TEST_SHMEM_SET(SET, unsigned int); + TEST_SHMEM_SET(SET, unsigned long); + TEST_SHMEM_SET(SET, unsigned long long); + TEST_SHMEM_SET(SET, int32_t); + TEST_SHMEM_SET(SET, int64_t); + TEST_SHMEM_SET(SET, uint32_t); + TEST_SHMEM_SET(SET, uint64_t); + TEST_SHMEM_SET(SET, size_t); + TEST_SHMEM_SET(SET, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_SET(ATOMIC_SET, float); + TEST_SHMEM_SET(ATOMIC_SET, double); + TEST_SHMEM_SET(ATOMIC_SET, int); + TEST_SHMEM_SET(ATOMIC_SET, long); + TEST_SHMEM_SET(ATOMIC_SET, long long); + TEST_SHMEM_SET(ATOMIC_SET, unsigned int); + TEST_SHMEM_SET(ATOMIC_SET, unsigned long); + TEST_SHMEM_SET(ATOMIC_SET, unsigned long long); + TEST_SHMEM_SET(ATOMIC_SET, int32_t); + TEST_SHMEM_SET(ATOMIC_SET, int64_t); + TEST_SHMEM_SET(ATOMIC_SET, uint32_t); + TEST_SHMEM_SET(ATOMIC_SET, uint64_t); + TEST_SHMEM_SET(ATOMIC_SET, size_t); + TEST_SHMEM_SET(ATOMIC_SET, ptrdiff_t); + + TEST_SHMEM_SET(CTX_ATOMIC_SET, float); + TEST_SHMEM_SET(CTX_ATOMIC_SET, double); + TEST_SHMEM_SET(CTX_ATOMIC_SET, int); + TEST_SHMEM_SET(CTX_ATOMIC_SET, long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, long long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, unsigned int); + TEST_SHMEM_SET(CTX_ATOMIC_SET, unsigned long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, unsigned long long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, int32_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, int64_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, uint32_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, uint64_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, size_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_swap.c b/test/unit/c11_test_shmem_atomic_swap.c new file mode 100644 index 0000000..646441b --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_swap.c @@ -0,0 +1,144 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { SWAP = 0, ATOMIC_SWAP, CTX_ATOMIC_SWAP }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_SWAP shmem_swap +#else +#define DEPRECATED_SWAP shmem_atomic_swap +#endif + +#define TEST_SHMEM_SWAP(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = npes; \ + shmem_barrier_all(); \ + switch (OP) { \ + case SWAP: \ + old = DEPRECATED_SWAP(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case ATOMIC_SWAP: \ + old = shmem_atomic_swap(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_SWAP: \ + old = shmem_atomic_swap(SHMEM_CTX_DEFAULT, &remote, \ + (TYPE)mype, (mype + 1) % npes); \ + break; \ + default: \ + printf("invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_SWAP(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + if (old != npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_SWAP(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_SWAP(SWAP, float); + TEST_SHMEM_SWAP(SWAP, double); + TEST_SHMEM_SWAP(SWAP, int); + TEST_SHMEM_SWAP(SWAP, long); + TEST_SHMEM_SWAP(SWAP, long long); + TEST_SHMEM_SWAP(SWAP, unsigned int); + TEST_SHMEM_SWAP(SWAP, unsigned long); + TEST_SHMEM_SWAP(SWAP, unsigned long long); + TEST_SHMEM_SWAP(SWAP, int32_t); + TEST_SHMEM_SWAP(SWAP, int64_t); + TEST_SHMEM_SWAP(SWAP, uint32_t); + TEST_SHMEM_SWAP(SWAP, uint64_t); + TEST_SHMEM_SWAP(SWAP, size_t); + TEST_SHMEM_SWAP(SWAP, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_SWAP(ATOMIC_SWAP, float); + TEST_SHMEM_SWAP(ATOMIC_SWAP, double); + TEST_SHMEM_SWAP(ATOMIC_SWAP, int); + TEST_SHMEM_SWAP(ATOMIC_SWAP, long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, long long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, unsigned int); + TEST_SHMEM_SWAP(ATOMIC_SWAP, unsigned long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, unsigned long long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, int32_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, int64_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, uint32_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, uint64_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, size_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, ptrdiff_t); + + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, float); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, double); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, int); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, long long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, unsigned int); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, unsigned long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, unsigned long long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, int32_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, int64_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, uint32_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, uint64_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, size_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_atomic_xor.c b/test/unit/c11_test_shmem_atomic_xor.c new file mode 100644 index 0000000..67e1562 --- /dev/null +++ b/test/unit/c11_test_shmem_atomic_xor.c @@ -0,0 +1,134 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { XOR = 0, CTX_XOR, FETCH_XOR, CTX_FETCH_XOR }; + +/* Initially, remote = 111...b. Each PE performs an atomic XOR where the + * PEth bit of the input value is set to 1 and all other bits are set to 0. + * The result has the NPES least significant bits cleared, 111...000...b. + */ + +#define TEST_SHMEM_XOR(OP, TYPE) \ + do { \ + static TYPE remote = ~(TYPE)0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE old; \ + if (npes-1 > sizeof(TYPE)) break; /* Avoid overflow */ \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case XOR: \ + shmem_atomic_xor(&remote, (TYPE)(1LLU << mype), i); \ + break; \ + case CTX_XOR: \ + shmem_atomic_xor(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + break; \ + case FETCH_XOR: \ + old = shmem_atomic_fetch_xor(&remote, (TYPE)(1LLU << mype), i); \ + if (((old ^ (TYPE)(1LLU << mype)) & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_FETCH_XOR: \ + old = shmem_atomic_fetch_xor(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + if (((old ^ (TYPE)(1LLU << mype)) & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != ~(TYPE)((1LLU << npes) - 1LLU)) { \ + printf("PE %i observed error with TEST_SHMEM_XOR(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_XOR(OP, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_XOR(XOR, unsigned int); + TEST_SHMEM_XOR(XOR, unsigned long); + TEST_SHMEM_XOR(XOR, unsigned long long); + TEST_SHMEM_XOR(XOR, int32_t); + TEST_SHMEM_XOR(XOR, int64_t); + TEST_SHMEM_XOR(XOR, uint32_t); + TEST_SHMEM_XOR(XOR, uint64_t); + + TEST_SHMEM_XOR(CTX_XOR, unsigned int); + TEST_SHMEM_XOR(CTX_XOR, unsigned long); + TEST_SHMEM_XOR(CTX_XOR, unsigned long long); + TEST_SHMEM_XOR(CTX_XOR, int32_t); + TEST_SHMEM_XOR(CTX_XOR, int64_t); + TEST_SHMEM_XOR(CTX_XOR, uint32_t); + TEST_SHMEM_XOR(CTX_XOR, uint64_t); + + TEST_SHMEM_XOR(FETCH_XOR, unsigned int); + TEST_SHMEM_XOR(FETCH_XOR, unsigned long); + TEST_SHMEM_XOR(FETCH_XOR, unsigned long long); + TEST_SHMEM_XOR(FETCH_XOR, int32_t); + TEST_SHMEM_XOR(FETCH_XOR, int64_t); + TEST_SHMEM_XOR(FETCH_XOR, uint32_t); + TEST_SHMEM_XOR(FETCH_XOR, uint64_t); + + TEST_SHMEM_XOR(CTX_FETCH_XOR, unsigned int); + TEST_SHMEM_XOR(CTX_FETCH_XOR, unsigned long); + TEST_SHMEM_XOR(CTX_FETCH_XOR, unsigned long long); + TEST_SHMEM_XOR(CTX_FETCH_XOR, int32_t); + TEST_SHMEM_XOR(CTX_FETCH_XOR, int64_t); + TEST_SHMEM_XOR(CTX_FETCH_XOR, uint32_t); + TEST_SHMEM_XOR(CTX_FETCH_XOR, uint64_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_g.c b/test/unit/c11_test_shmem_g.c new file mode 100644 index 0000000..4c61533 --- /dev/null +++ b/test/unit/c11_test_shmem_g.c @@ -0,0 +1,122 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +#define TEST_SHMEM_G(USE_CTX, TYPE) \ + do { \ + static TYPE remote; \ + TYPE val; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)mype; \ + shmem_barrier_all(); \ + if (USE_CTX) \ + val = shmem_g(SHMEM_CTX_DEFAULT, &remote, (mype + 1) % npes); \ + else \ + val = shmem_g(&remote, (mype + 1) % npes); \ + if (val != (TYPE)((mype + 1) % npes)) { \ + printf("PE %i received incorrect value with" \ + "TEST_SHMEM_G(%d, %s)\n", mype, \ + (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_G(USE_CTX, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_G(0, float); + TEST_SHMEM_G(0, double); + TEST_SHMEM_G(0, long double); + TEST_SHMEM_G(0, char); + TEST_SHMEM_G(0, signed char); + TEST_SHMEM_G(0, short); + TEST_SHMEM_G(0, int); + TEST_SHMEM_G(0, long); + TEST_SHMEM_G(0, long long); + TEST_SHMEM_G(0, unsigned char); + TEST_SHMEM_G(0, unsigned short); + TEST_SHMEM_G(0, unsigned int); + TEST_SHMEM_G(0, unsigned long); + TEST_SHMEM_G(0, unsigned long long); + TEST_SHMEM_G(0, int8_t); + TEST_SHMEM_G(0, int16_t); + TEST_SHMEM_G(0, int32_t); + TEST_SHMEM_G(0, int64_t); + TEST_SHMEM_G(0, uint8_t); + TEST_SHMEM_G(0, uint16_t); + TEST_SHMEM_G(0, uint32_t); + TEST_SHMEM_G(0, uint64_t); + TEST_SHMEM_G(0, size_t); + TEST_SHMEM_G(0, ptrdiff_t); + + TEST_SHMEM_G(1, float); + TEST_SHMEM_G(1, double); + TEST_SHMEM_G(1, long double); + TEST_SHMEM_G(1, char); + TEST_SHMEM_G(1, signed char); + TEST_SHMEM_G(1, short); + TEST_SHMEM_G(1, int); + TEST_SHMEM_G(1, long); + TEST_SHMEM_G(1, long long); + TEST_SHMEM_G(1, unsigned char); + TEST_SHMEM_G(1, unsigned short); + TEST_SHMEM_G(1, unsigned int); + TEST_SHMEM_G(1, unsigned long); + TEST_SHMEM_G(1, unsigned long long); + TEST_SHMEM_G(1, int8_t); + TEST_SHMEM_G(1, int16_t); + TEST_SHMEM_G(1, int32_t); + TEST_SHMEM_G(1, int64_t); + TEST_SHMEM_G(1, uint8_t); + TEST_SHMEM_G(1, uint16_t); + TEST_SHMEM_G(1, uint32_t); + TEST_SHMEM_G(1, uint64_t); + TEST_SHMEM_G(1, size_t); + TEST_SHMEM_G(1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_get.c b/test/unit/c11_test_shmem_get.c new file mode 100644 index 0000000..278a924 --- /dev/null +++ b/test/unit/c11_test_shmem_get.c @@ -0,0 +1,246 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { GET = 0, IGET, GET_NBI }; + +#define TEST_SHMEM_GET(OP, USE_CTX, TYPE) \ + do { \ + static TYPE remote[10]; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE local[10]; \ + for (int i = 0; i < 10; i++) \ + remote[i] = (TYPE)mype; \ + shmem_barrier_all(); \ + switch (OP) { \ + case GET: \ + if (USE_CTX) \ + shmem_get(SHMEM_CTX_DEFAULT, local, remote, 10, (mype + 1) % npes); \ + else \ + shmem_get(local, remote, 10, (mype + 1) % npes); \ + break; \ + case IGET: \ + if (USE_CTX) \ + shmem_iget(SHMEM_CTX_DEFAULT, local, remote, 1, 1, 10, (mype + 1) % npes); \ + else \ + shmem_iget(local, remote, 1, 1, 10, (mype + 1) % npes); \ + break; \ + case GET_NBI: \ + if (USE_CTX) \ + shmem_get_nbi(SHMEM_CTX_DEFAULT, local, remote, 10, (mype + 1) % npes); \ + else \ + shmem_get_nbi(local, remote, 10, (mype + 1) % npes); \ + shmem_quiet(); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + for (int i = 0; i < 10; i++) \ + if (local[i] != (TYPE)((mype + 1) % npes)) { \ + printf("PE %i received incorrect value with" \ + "TEST_SHMEM_GET(%s, %d, %s)\n", mype, #OP, \ + (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_GET(OP, USE_CTX, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_GET(GET, 0, float); + TEST_SHMEM_GET(GET, 0, double); + TEST_SHMEM_GET(GET, 0, long double); + TEST_SHMEM_GET(GET, 0, char); + TEST_SHMEM_GET(GET, 0, signed char); + TEST_SHMEM_GET(GET, 0, short); + TEST_SHMEM_GET(GET, 0, int); + TEST_SHMEM_GET(GET, 0, long); + TEST_SHMEM_GET(GET, 0, long long); + TEST_SHMEM_GET(GET, 0, unsigned char); + TEST_SHMEM_GET(GET, 0, unsigned short); + TEST_SHMEM_GET(GET, 0, unsigned int); + TEST_SHMEM_GET(GET, 0, unsigned long); + TEST_SHMEM_GET(GET, 0, unsigned long long); + TEST_SHMEM_GET(GET, 0, int8_t); + TEST_SHMEM_GET(GET, 0, int16_t); + TEST_SHMEM_GET(GET, 0, int32_t); + TEST_SHMEM_GET(GET, 0, int64_t); + TEST_SHMEM_GET(GET, 0, uint8_t); + TEST_SHMEM_GET(GET, 0, uint16_t); + TEST_SHMEM_GET(GET, 0, uint32_t); + TEST_SHMEM_GET(GET, 0, uint64_t); + TEST_SHMEM_GET(GET, 0, size_t); + TEST_SHMEM_GET(GET, 0, ptrdiff_t); + + TEST_SHMEM_GET(GET, 1, float); + TEST_SHMEM_GET(GET, 1, double); + TEST_SHMEM_GET(GET, 1, long double); + TEST_SHMEM_GET(GET, 1, char); + TEST_SHMEM_GET(GET, 1, signed char); + TEST_SHMEM_GET(GET, 1, short); + TEST_SHMEM_GET(GET, 1, int); + TEST_SHMEM_GET(GET, 1, long); + TEST_SHMEM_GET(GET, 1, long long); + TEST_SHMEM_GET(GET, 1, unsigned char); + TEST_SHMEM_GET(GET, 1, unsigned short); + TEST_SHMEM_GET(GET, 1, unsigned int); + TEST_SHMEM_GET(GET, 1, unsigned long); + TEST_SHMEM_GET(GET, 1, unsigned long long); + TEST_SHMEM_GET(GET, 1, int8_t); + TEST_SHMEM_GET(GET, 1, int16_t); + TEST_SHMEM_GET(GET, 1, int32_t); + TEST_SHMEM_GET(GET, 1, int64_t); + TEST_SHMEM_GET(GET, 1, uint8_t); + TEST_SHMEM_GET(GET, 1, uint16_t); + TEST_SHMEM_GET(GET, 1, uint32_t); + TEST_SHMEM_GET(GET, 1, uint64_t); + TEST_SHMEM_GET(GET, 1, size_t); + TEST_SHMEM_GET(GET, 1, ptrdiff_t); + + TEST_SHMEM_GET(IGET, 0, float); + TEST_SHMEM_GET(IGET, 0, double); + TEST_SHMEM_GET(IGET, 0, long double); + TEST_SHMEM_GET(IGET, 0, char); + TEST_SHMEM_GET(IGET, 0, signed char); + TEST_SHMEM_GET(IGET, 0, short); + TEST_SHMEM_GET(IGET, 0, int); + TEST_SHMEM_GET(IGET, 0, long); + TEST_SHMEM_GET(IGET, 0, long long); + TEST_SHMEM_GET(IGET, 0, unsigned char); + TEST_SHMEM_GET(IGET, 0, unsigned short); + TEST_SHMEM_GET(IGET, 0, unsigned int); + TEST_SHMEM_GET(IGET, 0, unsigned long); + TEST_SHMEM_GET(IGET, 0, unsigned long long); + TEST_SHMEM_GET(IGET, 0, int8_t); + TEST_SHMEM_GET(IGET, 0, int16_t); + TEST_SHMEM_GET(IGET, 0, int32_t); + TEST_SHMEM_GET(IGET, 0, int64_t); + TEST_SHMEM_GET(IGET, 0, uint8_t); + TEST_SHMEM_GET(IGET, 0, uint16_t); + TEST_SHMEM_GET(IGET, 0, uint32_t); + TEST_SHMEM_GET(IGET, 0, uint64_t); + TEST_SHMEM_GET(IGET, 0, size_t); + TEST_SHMEM_GET(IGET, 0, ptrdiff_t); + + TEST_SHMEM_GET(IGET, 1, float); + TEST_SHMEM_GET(IGET, 1, double); + TEST_SHMEM_GET(IGET, 1, long double); + TEST_SHMEM_GET(IGET, 1, char); + TEST_SHMEM_GET(IGET, 1, signed char); + TEST_SHMEM_GET(IGET, 1, short); + TEST_SHMEM_GET(IGET, 1, int); + TEST_SHMEM_GET(IGET, 1, long); + TEST_SHMEM_GET(IGET, 1, long long); + TEST_SHMEM_GET(IGET, 1, unsigned char); + TEST_SHMEM_GET(IGET, 1, unsigned short); + TEST_SHMEM_GET(IGET, 1, unsigned int); + TEST_SHMEM_GET(IGET, 1, unsigned long); + TEST_SHMEM_GET(IGET, 1, unsigned long long); + TEST_SHMEM_GET(IGET, 1, int8_t); + TEST_SHMEM_GET(IGET, 1, int16_t); + TEST_SHMEM_GET(IGET, 1, int32_t); + TEST_SHMEM_GET(IGET, 1, int64_t); + TEST_SHMEM_GET(IGET, 1, uint8_t); + TEST_SHMEM_GET(IGET, 1, uint16_t); + TEST_SHMEM_GET(IGET, 1, uint32_t); + TEST_SHMEM_GET(IGET, 1, uint64_t); + TEST_SHMEM_GET(IGET, 1, size_t); + TEST_SHMEM_GET(IGET, 1, ptrdiff_t); + + TEST_SHMEM_GET(GET_NBI, 0, float); + TEST_SHMEM_GET(GET_NBI, 0, double); + TEST_SHMEM_GET(GET_NBI, 0, long double); + TEST_SHMEM_GET(GET_NBI, 0, char); + TEST_SHMEM_GET(GET_NBI, 0, signed char); + TEST_SHMEM_GET(GET_NBI, 0, short); + TEST_SHMEM_GET(GET_NBI, 0, int); + TEST_SHMEM_GET(GET_NBI, 0, long); + TEST_SHMEM_GET(GET_NBI, 0, long long); + TEST_SHMEM_GET(GET_NBI, 0, unsigned char); + TEST_SHMEM_GET(GET_NBI, 0, unsigned short); + TEST_SHMEM_GET(GET_NBI, 0, unsigned int); + TEST_SHMEM_GET(GET_NBI, 0, unsigned long); + TEST_SHMEM_GET(GET_NBI, 0, unsigned long long); + TEST_SHMEM_GET(GET_NBI, 0, int8_t); + TEST_SHMEM_GET(GET_NBI, 0, int16_t); + TEST_SHMEM_GET(GET_NBI, 0, int32_t); + TEST_SHMEM_GET(GET_NBI, 0, int64_t); + TEST_SHMEM_GET(GET_NBI, 0, uint8_t); + TEST_SHMEM_GET(GET_NBI, 0, uint16_t); + TEST_SHMEM_GET(GET_NBI, 0, uint32_t); + TEST_SHMEM_GET(GET_NBI, 0, uint64_t); + TEST_SHMEM_GET(GET_NBI, 0, size_t); + TEST_SHMEM_GET(GET_NBI, 0, ptrdiff_t); + + TEST_SHMEM_GET(GET_NBI, 1, float); + TEST_SHMEM_GET(GET_NBI, 1, double); + TEST_SHMEM_GET(GET_NBI, 1, long double); + TEST_SHMEM_GET(GET_NBI, 1, char); + TEST_SHMEM_GET(GET_NBI, 1, signed char); + TEST_SHMEM_GET(GET_NBI, 1, short); + TEST_SHMEM_GET(GET_NBI, 1, int); + TEST_SHMEM_GET(GET_NBI, 1, long); + TEST_SHMEM_GET(GET_NBI, 1, long long); + TEST_SHMEM_GET(GET_NBI, 1, unsigned char); + TEST_SHMEM_GET(GET_NBI, 1, unsigned short); + TEST_SHMEM_GET(GET_NBI, 1, unsigned int); + TEST_SHMEM_GET(GET_NBI, 1, unsigned long); + TEST_SHMEM_GET(GET_NBI, 1, unsigned long long); + TEST_SHMEM_GET(GET_NBI, 1, int8_t); + TEST_SHMEM_GET(GET_NBI, 1, int16_t); + TEST_SHMEM_GET(GET_NBI, 1, int32_t); + TEST_SHMEM_GET(GET_NBI, 1, int64_t); + TEST_SHMEM_GET(GET_NBI, 1, uint8_t); + TEST_SHMEM_GET(GET_NBI, 1, uint16_t); + TEST_SHMEM_GET(GET_NBI, 1, uint32_t); + TEST_SHMEM_GET(GET_NBI, 1, uint64_t); + TEST_SHMEM_GET(GET_NBI, 1, size_t); + TEST_SHMEM_GET(GET_NBI, 1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_p.c b/test/unit/c11_test_shmem_p.c new file mode 100644 index 0000000..2a40cc6 --- /dev/null +++ b/test/unit/c11_test_shmem_p.c @@ -0,0 +1,120 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +#define TEST_SHMEM_P(USE_CTX, TYPE) \ + do { \ + static TYPE remote; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + if (USE_CTX) \ + shmem_p(SHMEM_CTX_DEFAULT, &remote, (TYPE)mype, (mype + 1) % npes); \ + else \ + shmem_p(&remote, (TYPE)mype, (mype + 1) % npes);\ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_P(%d, %s)\n", mype, \ + (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_P(USE_CTX, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_P(0, float); + TEST_SHMEM_P(0, double); + TEST_SHMEM_P(0, long double); + TEST_SHMEM_P(0, char); + TEST_SHMEM_P(0, signed char); + TEST_SHMEM_P(0, short); + TEST_SHMEM_P(0, int); + TEST_SHMEM_P(0, long); + TEST_SHMEM_P(0, long long); + TEST_SHMEM_P(0, unsigned char); + TEST_SHMEM_P(0, unsigned short); + TEST_SHMEM_P(0, unsigned int); + TEST_SHMEM_P(0, unsigned long); + TEST_SHMEM_P(0, unsigned long long); + TEST_SHMEM_P(0, int8_t); + TEST_SHMEM_P(0, int16_t); + TEST_SHMEM_P(0, int32_t); + TEST_SHMEM_P(0, int64_t); + TEST_SHMEM_P(0, uint8_t); + TEST_SHMEM_P(0, uint16_t); + TEST_SHMEM_P(0, uint32_t); + TEST_SHMEM_P(0, uint64_t); + TEST_SHMEM_P(0, size_t); + TEST_SHMEM_P(0, ptrdiff_t); + + TEST_SHMEM_P(1, float); + TEST_SHMEM_P(1, double); + TEST_SHMEM_P(1, long double); + TEST_SHMEM_P(1, char); + TEST_SHMEM_P(1, signed char); + TEST_SHMEM_P(1, short); + TEST_SHMEM_P(1, int); + TEST_SHMEM_P(1, long); + TEST_SHMEM_P(1, long long); + TEST_SHMEM_P(1, unsigned char); + TEST_SHMEM_P(1, unsigned short); + TEST_SHMEM_P(1, unsigned int); + TEST_SHMEM_P(1, unsigned long); + TEST_SHMEM_P(1, unsigned long long); + TEST_SHMEM_P(1, int8_t); + TEST_SHMEM_P(1, int16_t); + TEST_SHMEM_P(1, int32_t); + TEST_SHMEM_P(1, int64_t); + TEST_SHMEM_P(1, uint8_t); + TEST_SHMEM_P(1, uint16_t); + TEST_SHMEM_P(1, uint32_t); + TEST_SHMEM_P(1, uint64_t); + TEST_SHMEM_P(1, size_t); + TEST_SHMEM_P(1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_put.c b/test/unit/c11_test_shmem_put.c new file mode 100644 index 0000000..adf1942 --- /dev/null +++ b/test/unit/c11_test_shmem_put.c @@ -0,0 +1,246 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +enum op { PUT = 0, IPUT, PUT_NBI }; + +#define TEST_SHMEM_PUT(OP, USE_CTX, TYPE) \ + do { \ + static TYPE remote[10]; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE local[10]; \ + for (int i = 0; i < 10; i++) \ + local[i] = (TYPE)mype; \ + switch (OP) { \ + case PUT: \ + if (USE_CTX) \ + shmem_put(SHMEM_CTX_DEFAULT, remote, local, 10, (mype + 1) % npes); \ + else \ + shmem_put(remote, local, 10, (mype + 1) % npes); \ + break; \ + case IPUT: \ + if (USE_CTX) \ + shmem_iput(SHMEM_CTX_DEFAULT, remote, local, 1, 1, 10, (mype + 1) % npes); \ + else \ + shmem_iput(remote, local, 1, 1, 10, (mype + 1) % npes); \ + break; \ + case PUT_NBI: \ + if (USE_CTX) \ + shmem_put_nbi(SHMEM_CTX_DEFAULT, remote, local, 10, (mype + 1) % npes); \ + else \ + shmem_put_nbi(remote, local, 10, (mype + 1) % npes); \ + shmem_quiet(); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + for (int i = 0; i < 10; i++) \ + if (remote[i] != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_PUT(%s, %d, %s)\n", mype, \ + #OP, (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_PUT(OP, USE_CTX, TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_PUT(PUT, 0, float); + TEST_SHMEM_PUT(PUT, 0, double); + TEST_SHMEM_PUT(PUT, 0, long double); + TEST_SHMEM_PUT(PUT, 0, char); + TEST_SHMEM_PUT(PUT, 0, signed char); + TEST_SHMEM_PUT(PUT, 0, short); + TEST_SHMEM_PUT(PUT, 0, int); + TEST_SHMEM_PUT(PUT, 0, long); + TEST_SHMEM_PUT(PUT, 0, long long); + TEST_SHMEM_PUT(PUT, 0, unsigned char); + TEST_SHMEM_PUT(PUT, 0, unsigned short); + TEST_SHMEM_PUT(PUT, 0, unsigned int); + TEST_SHMEM_PUT(PUT, 0, unsigned long); + TEST_SHMEM_PUT(PUT, 0, unsigned long long); + TEST_SHMEM_PUT(PUT, 0, int8_t); + TEST_SHMEM_PUT(PUT, 0, int16_t); + TEST_SHMEM_PUT(PUT, 0, int32_t); + TEST_SHMEM_PUT(PUT, 0, int64_t); + TEST_SHMEM_PUT(PUT, 0, uint8_t); + TEST_SHMEM_PUT(PUT, 0, uint16_t); + TEST_SHMEM_PUT(PUT, 0, uint32_t); + TEST_SHMEM_PUT(PUT, 0, uint64_t); + TEST_SHMEM_PUT(PUT, 0, size_t); + TEST_SHMEM_PUT(PUT, 0, ptrdiff_t); + + TEST_SHMEM_PUT(PUT, 1, float); + TEST_SHMEM_PUT(PUT, 1, double); + TEST_SHMEM_PUT(PUT, 1, long double); + TEST_SHMEM_PUT(PUT, 1, char); + TEST_SHMEM_PUT(PUT, 1, signed char); + TEST_SHMEM_PUT(PUT, 1, short); + TEST_SHMEM_PUT(PUT, 1, int); + TEST_SHMEM_PUT(PUT, 1, long); + TEST_SHMEM_PUT(PUT, 1, long long); + TEST_SHMEM_PUT(PUT, 1, unsigned char); + TEST_SHMEM_PUT(PUT, 1, unsigned short); + TEST_SHMEM_PUT(PUT, 1, unsigned int); + TEST_SHMEM_PUT(PUT, 1, unsigned long); + TEST_SHMEM_PUT(PUT, 1, unsigned long long); + TEST_SHMEM_PUT(PUT, 1, int8_t); + TEST_SHMEM_PUT(PUT, 1, int16_t); + TEST_SHMEM_PUT(PUT, 1, int32_t); + TEST_SHMEM_PUT(PUT, 1, int64_t); + TEST_SHMEM_PUT(PUT, 1, uint8_t); + TEST_SHMEM_PUT(PUT, 1, uint16_t); + TEST_SHMEM_PUT(PUT, 1, uint32_t); + TEST_SHMEM_PUT(PUT, 1, uint64_t); + TEST_SHMEM_PUT(PUT, 1, size_t); + TEST_SHMEM_PUT(PUT, 1, ptrdiff_t); + + TEST_SHMEM_PUT(IPUT, 0, float); + TEST_SHMEM_PUT(IPUT, 0, double); + TEST_SHMEM_PUT(IPUT, 0, long double); + TEST_SHMEM_PUT(IPUT, 0, char); + TEST_SHMEM_PUT(IPUT, 0, signed char); + TEST_SHMEM_PUT(IPUT, 0, short); + TEST_SHMEM_PUT(IPUT, 0, int); + TEST_SHMEM_PUT(IPUT, 0, long); + TEST_SHMEM_PUT(IPUT, 0, long long); + TEST_SHMEM_PUT(IPUT, 0, unsigned char); + TEST_SHMEM_PUT(IPUT, 0, unsigned short); + TEST_SHMEM_PUT(IPUT, 0, unsigned int); + TEST_SHMEM_PUT(IPUT, 0, unsigned long); + TEST_SHMEM_PUT(IPUT, 0, unsigned long long); + TEST_SHMEM_PUT(IPUT, 0, int8_t); + TEST_SHMEM_PUT(IPUT, 0, int16_t); + TEST_SHMEM_PUT(IPUT, 0, int32_t); + TEST_SHMEM_PUT(IPUT, 0, int64_t); + TEST_SHMEM_PUT(IPUT, 0, uint8_t); + TEST_SHMEM_PUT(IPUT, 0, uint16_t); + TEST_SHMEM_PUT(IPUT, 0, uint32_t); + TEST_SHMEM_PUT(IPUT, 0, uint64_t); + TEST_SHMEM_PUT(IPUT, 0, size_t); + TEST_SHMEM_PUT(IPUT, 0, ptrdiff_t); + + TEST_SHMEM_PUT(IPUT, 1, float); + TEST_SHMEM_PUT(IPUT, 1, double); + TEST_SHMEM_PUT(IPUT, 1, long double); + TEST_SHMEM_PUT(IPUT, 1, char); + TEST_SHMEM_PUT(IPUT, 1, signed char); + TEST_SHMEM_PUT(IPUT, 1, short); + TEST_SHMEM_PUT(IPUT, 1, int); + TEST_SHMEM_PUT(IPUT, 1, long); + TEST_SHMEM_PUT(IPUT, 1, long long); + TEST_SHMEM_PUT(IPUT, 1, unsigned char); + TEST_SHMEM_PUT(IPUT, 1, unsigned short); + TEST_SHMEM_PUT(IPUT, 1, unsigned int); + TEST_SHMEM_PUT(IPUT, 1, unsigned long); + TEST_SHMEM_PUT(IPUT, 1, unsigned long long); + TEST_SHMEM_PUT(IPUT, 1, int8_t); + TEST_SHMEM_PUT(IPUT, 1, int16_t); + TEST_SHMEM_PUT(IPUT, 1, int32_t); + TEST_SHMEM_PUT(IPUT, 1, int64_t); + TEST_SHMEM_PUT(IPUT, 1, uint8_t); + TEST_SHMEM_PUT(IPUT, 1, uint16_t); + TEST_SHMEM_PUT(IPUT, 1, uint32_t); + TEST_SHMEM_PUT(IPUT, 1, uint64_t); + TEST_SHMEM_PUT(IPUT, 1, size_t); + TEST_SHMEM_PUT(IPUT, 1, ptrdiff_t); + + TEST_SHMEM_PUT(PUT_NBI, 0, float); + TEST_SHMEM_PUT(PUT_NBI, 0, double); + TEST_SHMEM_PUT(PUT_NBI, 0, long double); + TEST_SHMEM_PUT(PUT_NBI, 0, char); + TEST_SHMEM_PUT(PUT_NBI, 0, signed char); + TEST_SHMEM_PUT(PUT_NBI, 0, short); + TEST_SHMEM_PUT(PUT_NBI, 0, int); + TEST_SHMEM_PUT(PUT_NBI, 0, long); + TEST_SHMEM_PUT(PUT_NBI, 0, long long); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned char); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned short); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned int); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned long); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned long long); + TEST_SHMEM_PUT(PUT_NBI, 0, int8_t); + TEST_SHMEM_PUT(PUT_NBI, 0, int16_t); + TEST_SHMEM_PUT(PUT_NBI, 0, int32_t); + TEST_SHMEM_PUT(PUT_NBI, 0, int64_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint8_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint16_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint32_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint64_t); + TEST_SHMEM_PUT(PUT_NBI, 0, size_t); + TEST_SHMEM_PUT(PUT_NBI, 0, ptrdiff_t); + + TEST_SHMEM_PUT(PUT_NBI, 1, float); + TEST_SHMEM_PUT(PUT_NBI, 1, double); + TEST_SHMEM_PUT(PUT_NBI, 1, long double); + TEST_SHMEM_PUT(PUT_NBI, 1, char); + TEST_SHMEM_PUT(PUT_NBI, 1, signed char); + TEST_SHMEM_PUT(PUT_NBI, 1, short); + TEST_SHMEM_PUT(PUT_NBI, 1, int); + TEST_SHMEM_PUT(PUT_NBI, 1, long); + TEST_SHMEM_PUT(PUT_NBI, 1, long long); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned char); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned short); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned int); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned long); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned long long); + TEST_SHMEM_PUT(PUT_NBI, 1, int8_t); + TEST_SHMEM_PUT(PUT_NBI, 1, int16_t); + TEST_SHMEM_PUT(PUT_NBI, 1, int32_t); + TEST_SHMEM_PUT(PUT_NBI, 1, int64_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint8_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint16_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint32_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint64_t); + TEST_SHMEM_PUT(PUT_NBI, 1, size_t); + TEST_SHMEM_PUT(PUT_NBI, 1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_test.c b/test/unit/c11_test_shmem_test.c new file mode 100644 index 0000000..2a6dfcf --- /dev/null +++ b/test/unit/c11_test_shmem_test.c @@ -0,0 +1,81 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +#define TEST_SHMEM_TEST(TYPE) \ + do { \ + static TYPE remote = 0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + shmem_p(&remote, (TYPE)mype+1, (mype + 1) % npes); \ + while (!shmem_test(&remote, SHMEM_CMP_NE, 0)) ; \ + if (remote != (TYPE)((mype + npes - 1) % npes)+1) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_TEST(%s)\n", mype, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_TEST(TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_TEST(short); + TEST_SHMEM_TEST(int); + TEST_SHMEM_TEST(long); + TEST_SHMEM_TEST(long long); + TEST_SHMEM_TEST(unsigned short); + TEST_SHMEM_TEST(unsigned int); + TEST_SHMEM_TEST(unsigned long); + TEST_SHMEM_TEST(unsigned long long); + TEST_SHMEM_TEST(int32_t); + TEST_SHMEM_TEST(int64_t); + TEST_SHMEM_TEST(uint32_t); + TEST_SHMEM_TEST(uint64_t); + TEST_SHMEM_TEST(size_t); + TEST_SHMEM_TEST(ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/c11_test_shmem_wait_until.c b/test/unit/c11_test_shmem_wait_until.c new file mode 100644 index 0000000..c152765 --- /dev/null +++ b/test/unit/c11_test_shmem_wait_until.c @@ -0,0 +1,81 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + +#define TEST_SHMEM_WAIT_UNTIL(TYPE) \ + do { \ + static TYPE remote = 0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + shmem_p(&remote, (TYPE)mype+1, (mype + 1) % npes); \ + shmem_wait_until(&remote, SHMEM_CMP_NE, 0); \ + if (remote != (TYPE)((mype + npes - 1) % npes)+1) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_WAIT_UNTIL(%s)\n", mype, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +#else +#define TEST_SHMEM_WAIT_UNTIL(TYPE) + +#endif + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_WAIT_UNTIL(short); + TEST_SHMEM_WAIT_UNTIL(int); + TEST_SHMEM_WAIT_UNTIL(long); + TEST_SHMEM_WAIT_UNTIL(long long); + TEST_SHMEM_WAIT_UNTIL(unsigned short); + TEST_SHMEM_WAIT_UNTIL(unsigned int); + TEST_SHMEM_WAIT_UNTIL(unsigned long); + TEST_SHMEM_WAIT_UNTIL(unsigned long long); + TEST_SHMEM_WAIT_UNTIL(int32_t); + TEST_SHMEM_WAIT_UNTIL(int64_t); + TEST_SHMEM_WAIT_UNTIL(uint32_t); + TEST_SHMEM_WAIT_UNTIL(uint64_t); + TEST_SHMEM_WAIT_UNTIL(size_t); + TEST_SHMEM_WAIT_UNTIL(ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/circular_shift.c b/test/unit/circular_shift.c new file mode 100644 index 0000000..21534c8 --- /dev/null +++ b/test/unit/circular_shift.c @@ -0,0 +1,59 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* circular shift bbb into aaa */ + +#include + +int aaa, bbb; + +int +main(int argc, char* argv[]) +{ + int me, neighbor; + int ret = 0; + + shmem_init(); + bbb = me = shmem_my_pe(); + neighbor = (me + 1) % shmem_n_pes(); + + shmem_barrier_all(); + + shmem_int_get( &aaa, &bbb, 1, neighbor ); + + shmem_barrier_all(); + + if (aaa != neighbor ) ret = 1; + + shmem_finalize(); + + return ret; +} diff --git a/test/unit/collect.c b/test/unit/collect.c new file mode 100644 index 0000000..9857073 --- /dev/null +++ b/test/unit/collect.c @@ -0,0 +1,125 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#define MAX_NPES 32 + +int32_t src[MAX_NPES]; +int32_t dst[MAX_NPES*MAX_NPES]; + +long pSync[SHMEM_COLLECT_SYNC_SIZE]; + +int main(int argc, char **argv) { + int me, npes; + int i, j, errors = 0; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (npes > MAX_NPES) { + if (me == 0) + printf("Warning: npes > %d, exiting without performing test\n", MAX_NPES); + + shmem_finalize(); + return 0; + } + + for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < MAX_NPES; i++) + src[i] = -1; + + for (i = 0; i < MAX_NPES*MAX_NPES; i++) + dst[i] = -1; + + shmem_barrier_all(); + + /* TEST: All PEs contribute their PE id */ + src[0] = me; + + shmem_collect32(dst, src, 1, 0, 0, npes, pSync); + + for (i = 0; i < npes; i++) { + if (dst[i] != i) { + printf("%d: Test 1 error, dst[%d] == %"PRId32", expected %d\n", + me, i, dst[i], i); + ++errors; + } + } + + shmem_barrier_all(); + + /* TEST: Even PEs contribute their PE id */ + src[0] = me; + + if (me % 2 == 0) { + shmem_collect32(dst, src, 1, 0, 1, npes/2 + npes%2, pSync); + + for (i = 0; i < npes/2; i++) { + if (dst[i] != i*2) { + printf("%d: Test 2 error, dst[%d] == %"PRId32", expected %d\n", + me, i, dst[i], i); + ++errors; + } + } + } + + shmem_barrier_all(); + + /* TEST: All PEs contribute a number of elements equal to PE id */ + for (i = 0; i < me; i++) + src[i] = me+1; + + shmem_collect32(dst, src, me, 0, 0, npes, pSync); + + int idx = 0; + for (i = 0; i < npes; i++) { + for (j = 0; j < i; j++) { + if (dst[idx] != i+1) { + printf("%d: Test 3 error, dst[%d] == %"PRId32", expected %d\n", + me, idx, dst[idx], i+1); + ++errors; + } + ++idx; + } + } + + shmem_finalize(); + + return errors != 0; +} diff --git a/test/unit/collect_active_set.c b/test/unit/collect_active_set.c new file mode 100644 index 0000000..3286123 --- /dev/null +++ b/test/unit/collect_active_set.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#define MAX_NPES 32 + +long collect_psync[SHMEM_COLLECT_SYNC_SIZE]; + +/* Note: Need to alternate psync arrays because the active set changes */ +long barrier_psync0[SHMEM_BARRIER_SYNC_SIZE]; +long barrier_psync1[SHMEM_BARRIER_SYNC_SIZE]; + +int64_t src[MAX_NPES]; +int64_t dst[MAX_NPES*MAX_NPES]; + +int main(void) +{ + int i, me, npes; + int errors = 0; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (npes > MAX_NPES) { + if (me == 0) + printf("Test requires fewer than %d PEs\n", MAX_NPES); + shmem_finalize(); + return 0; + } + + for (i = 0; i < MAX_NPES; i++) + src[i] = me; + + for (i = 0; i < MAX_NPES*MAX_NPES; i++) + dst[i] = -1; + + for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) + collect_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) { + barrier_psync0[i] = SHMEM_SYNC_VALUE; + barrier_psync1[i] = SHMEM_SYNC_VALUE; + } + + if (me == 0) + printf("Shrinking active set test\n"); + + shmem_barrier_all(); + + /* A total of npes tests are performed, where the active set in each test + * includes PEs i..npes-1 and each PE contributes PE ID elements */ + for (i = 0; i <= me; i++) { + int j, k; + int idx = 0; + + if (me == i) + printf(" + active set size %d\n", npes-i); + + shmem_collect64(dst, src, me, i, 0, npes-i, collect_psync); + + /* Validate destination buffer data */ + for (j = 0; j < npes - i; j++) { + for (k = 0; k < i+j; k++, idx++) { + if (dst[idx] != i+j) { + printf("%d: Expected dst[%d] = %d, got dst[%d] = %"PRId64", iteration %d\n", + me, idx, i+j, idx, dst[idx], i); + errors++; + } + } + } + + /* Validate unused destination buffer */ + for ( ; idx < MAX_NPES*MAX_NPES; idx++) { + if (dst[idx] != -1) { + printf("%d: Expected dst[%d] = %d, got dst[%d] = %"PRId64", iteration %d\n", + me, idx, -1, idx, dst[idx], i); + errors++; + } + } + + /* Reset for next iteration */ + for (j = 0; j < MAX_NPES*MAX_NPES; j++) + dst[j] = -1; + + shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + } + + shmem_finalize(); + + return errors != 0; +} diff --git a/test/unit/complex_reductions_f.f90 b/test/unit/complex_reductions_f.f90 new file mode 100644 index 0000000..77e11e6 --- /dev/null +++ b/test/unit/complex_reductions_f.f90 @@ -0,0 +1,248 @@ +! +! Copyright (c) 2017 Intel Corporation. All rights reserved. +! This software is available to you under the BSD license below: +! +! Redistribution and use in source and binary forms, with or +! without modification, are permitted provided that the following +! conditions are met: +! +! - Redistributions of source code must retain the above +! copyright notice, this list of conditions and the following +! disclaimer. +! +! - Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials +! provided with the distribution. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +! NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +! BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +! ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. +! + + program complex_reductions_f + implicit none + include "shmem.fh" + + integer psync(shmem_reduce_sync_size), i, j, nr + data psync /shmem_reduce_sync_size*shmem_sync_value/ + parameter (nr=10) + complex(kind=4) z_src(nr), z_target(nr) + complex(kind=4) pwrk(max(nr/2+1,shmem_reduce_min_wrkdata_size)) + complex(kind=8) zd_src(nr), zd_target(nr) + complex(kind=8) pwrkd(max(nr/2+1,shmem_reduce_min_wrkdata_size)) + common /com/ z_src, z_target, pwrk + common /com/ zd_src, zd_target, pwrkd + integer shmem_my_pe, shmem_n_pes, npes, me + complex(kind=4) exp_result(nr) + complex(kind=8) exp_result_d(nr) + + + call shmem_init() + + npes = shmem_n_pes() + me = shmem_my_pe() + + ! Set up the source buffer and calculate the expected sum reduction result: + do i=1,nr + z_src(i) = cmplx(me,me+1) + exp_result(i) = z_src(i) + do j=0,npes-1 + if (j .ne. me) then + exp_result(i) = exp_result(i) + cmplx(j,j+1) + end if + end do + end do + + ! Test single precision complex sum_to_all reductions: + call shmem_comp4_sum_to_all(z_target, z_src, nr, 0, 0, npes, pwrk, psync) + + ! Check the result: + call check_result_complex(z_target, exp_result, nr, 1) + + call shmem_barrier_all() + + ! Test single precision complex sum reduction on a PE subset with a stride of 2 + if ( mod(me,2) .eq. 0) then + if ( mod(shmem_n_pes(),2) .eq. 0) then + + call shmem_comp4_sum_to_all(z_target, z_src, nr, 0, 1, npes/2, pwrk, psync) + + do i=1,nr + exp_result(i) = z_src(i) + do j=0,npes-1,2 + if (j .ne. me) then + exp_result(i) = exp_result(i) + cmplx(j,j+1) + end if + end do + end do + + call check_result_complex(z_target, exp_result, nr, 2) + + endif + endif + + call shmem_barrier_all() + + ! Initialize the double precision buffers and expected result + do i=1,nr + zd_src(i) = dcmplx(-7.123123123123123123123, 2.32132132132132132132) + exp_result_d(i) = zd_src(i)*npes + end do + + ! Test double precision complex sum_to_all reductions: + call shmem_comp8_sum_to_all(zd_target, zd_src, nr, 0, 0, npes, pwrkd, psync) + + call check_result_complex_dbl(zd_target, exp_result_d, nr, 3) + + call shmem_barrier_all() + + ! Test double precision sum reductions on a PE subset with a stride of 2 + if ( mod(me,2) .eq. 0) then + if ( mod(shmem_n_pes(),2) .eq. 0) then + + call shmem_comp8_sum_to_all(zd_target, zd_src, nr, 0, 1, npes/2, pwrkd, psync) + + do i=1,nr + exp_result_d(i) = zd_src(i)*(npes/2) + end do + + call check_result_complex_dbl(zd_target, exp_result_d, nr, 4) + + endif + endif + + call shmem_barrier_all() + + ! Re-initialize the source and expected result buffers for single precision + do i=1,nr + z_src(i) = cmplx(me,me+1) + exp_result(i) = z_src(i) + do j=0,npes-1 + if (j .ne. me) then + exp_result(i) = exp_result(i) * cmplx(j,j+1) + end if + end do + end do + + ! Test single precision complex product_to_all reductions: + call shmem_comp4_prod_to_all(z_target, z_src, nr, 0, 0, npes, pwrk, psync) + + ! Check the result: + call check_result_complex(z_target, exp_result, nr, 5) + + call shmem_barrier_all() + + ! Test single precision product reduction on a PE subset with a stride of 2 + if ( mod(me,2) .eq. 0) then + if ( mod(shmem_n_pes(),2) .eq. 0) then + + call shmem_comp4_prod_to_all(z_target, z_src, nr, 0, 1, npes/2, pwrk, psync) + + do i=1,nr + exp_result(i) = z_src(i) + do j=0,npes-1,2 + if (j .ne. me) then + exp_result(i) = exp_result(i) * cmplx(j,j+1) + end if + end do + end do + + call check_result_complex(z_target, exp_result, nr, 6) + + endif + endif + + call shmem_barrier_all() + + ! Re-initialize the double precision buffers and expected result + do i=1,nr + zd_src(i) = dcmplx(me, me+1) + exp_result_d(i) = zd_src(i) + do j=0,npes-1 + if (j .ne. me) then + exp_result_d(i) = exp_result_d(i) * dcmplx(j,j+1) + end if + end do + end do + + ! Test double precision complex product_to_all reductions: + call shmem_comp8_prod_to_all(zd_target, zd_src, nr, 0, 0, npes, pwrkd, psync) + + call check_result_complex_dbl(zd_target, exp_result_d, nr, 7) + + call shmem_barrier_all() + + ! Test double precision product reduction on a PE subset with a stride of 2 + if ( mod(me,2) .eq. 0) then + if ( mod(shmem_n_pes(),2) .eq. 0) then + + call shmem_comp8_prod_to_all(zd_target, zd_src, nr, 0, 1, npes/2, pwrkd, psync) + + do i=1,nr + exp_result_d(i) = zd_src(i) + do j=0,npes-1,2 + if (j .ne. me) then + exp_result_d(i) = exp_result_d(i) * dcmplx(j,j+1) + end if + end do + end do + + call check_result_complex_dbl(zd_target, exp_result_d, nr, 8) + + endif + endif + + call shmem_finalize() + + contains + + ! Checks that real and imaginary components are within 1/1000th of + ! a percent of the expected value: + subroutine check_result_complex(z_target, correct, N, id) + implicit none + integer N, id, me + complex(kind=4) z_target(N), correct(N) + + me = shmem_my_pe() + do i=1,N + if ( abs(1 - real(z_target(i)) / real(correct(i))) .gt. 1e-5 ) then + print *, "fail : incorrect real component ", real(z_target(i)), & + " expected ", real(correct(i)), " on process ", me, "test #", id + call shmem_global_exit(id) + endif + if ( abs(1 - aimag(z_target(i)) / aimag(correct(i))) .gt. 1e-5 ) then + print *, "fail : incorrect imaginary component ", aimag(z_target(i)), & + " expected ", aimag(correct(i)), " on process ", me, "test #", id + call shmem_global_exit(id) + endif + end do + + end subroutine check_result_complex + + subroutine check_result_complex_dbl(zd_target, correct, N, id) + implicit none + integer N, id, me + complex(kind=8) zd_target(N), correct(N) + + me = shmem_my_pe() + do i=1,N + if ( abs(1 - real(zd_target(i)) / real(correct(i))) .gt. 1e-5 ) then + print *, "fail : incorrect real component ", real(zd_target(i)), & + " expected ", real(correct(i)), " on process ", me, "test #", id + call shmem_global_exit(id) + endif + if ( abs(1 - aimag(zd_target(i)) / aimag(correct(i))) .gt. 1e-5 ) then + print *, "fail : incorrect imaginary component ", aimag(zd_target(i)), & + " expected ", aimag(correct(i)), " on process ", me, "test #", id + call shmem_global_exit(id) + endif + end do + end subroutine check_result_complex_dbl + + end program complex_reductions_f diff --git a/test/unit/cswap.c b/test/unit/cswap.c new file mode 100644 index 0000000..cd48d1c --- /dev/null +++ b/test/unit/cswap.c @@ -0,0 +1,326 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * exercise: + * shmem_*_cswap() + * shmem_*_fadd() + * shmem_*_finc() + */ +#include + +#include +#include +#include + +#define Vprintf if (Verbose) printf + +static int *src_int; +static long *src_long; +static long long *src_llong; + +static int dst_int, itmp; +static long dst_long, ltmp; +static long long dst_llong, lltmp; + +static int loops = 5; + +int +main(int argc, char* argv[]) +{ + int me, num_pes, l, pe; + int Verbose = 0; + + shmem_init(); + me = shmem_my_pe(); + num_pes = shmem_n_pes(); + + if (num_pes == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + for (l = 0 ; l < loops ; ++l) { + + if ((src_int = shmem_malloc(sizeof(int))) == NULL) { + printf("PE-%d int shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_int = 4; + dst_int = itmp = 0; + + if ((src_long = shmem_malloc(sizeof(long))) == NULL) { + printf("PE-%d long shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_long = 8; + dst_long = ltmp = 0; + + if ((src_llong = shmem_malloc(sizeof(long long))) == NULL) { + printf("PE-%d long long shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_llong = 16; + dst_llong = lltmp = 0; + + //printf("PE-%d malloc()s done.\n",me); + shmem_barrier_all(); + + if ( me == 0 ) { + /* integer swap */ + itmp = shmem_int_g(src_int,1); + Vprintf("PE-0 Initial Conditions(int) local %d rem(%d)\n", + dst_int,itmp); + + dst_int = shmem_int_atomic_compare_swap(src_int,*src_int,0,1); + if (dst_int != 4) { + printf("PE-%d dst_int %d != 4?\n",me,dst_int); + shmem_global_exit(1); + } + /* verify remote data */ + itmp = shmem_int_g(src_int,1); + if (itmp != 0) { + printf("PE-%d rem %d != 0?\n",me,itmp); + shmem_global_exit(1); + } + Vprintf("PE-0 1st int_cswap done: local %d rem(%d)\n",dst_int,itmp); + + dst_int = shmem_int_atomic_compare_swap(src_int,0,dst_int,1); + if (dst_int != 0) { + printf("PE-%d dst_int %d != 0?\n",me,dst_int); + shmem_global_exit(1); + } + /* verify remote data */ + itmp = shmem_int_g(src_int,1); + if (itmp != 4) { + printf("PE-%d rem %d != 4?\n",me,itmp); + shmem_global_exit(1); + } + Vprintf("PE-0 2nd int_swap done: local %d rem(%d)\n",dst_int,itmp); + + /* cswap() should not swap as cond(0) != remote(4) */ + dst_int = shmem_int_atomic_compare_swap(src_int,0,0,1); + if (dst_int != 4) { + printf("PE-%d int no-swap returned dst_int %d != 4?\n", + me,dst_int); + shmem_global_exit(1); + } + /* verify previous cswap() did not swap */ + itmp = shmem_int_g(src_int,1); + if (itmp != 4) { + printf("PE-%d failed cond int_cswap() swapped? rem(%d) != 4?\n", + me,itmp); + shmem_global_exit(1); + } + + /* long swap */ + ltmp = shmem_long_g(src_long,1); + Vprintf("PE-0 Initial Conditions(long) local %ld rem(%ld)\n", + dst_long,ltmp); + + dst_long = shmem_long_atomic_compare_swap(src_long,*src_long,0,1); + if (dst_long != 8) { + printf("PE-%d dst_long %ld != 8?\n",me,dst_long); + shmem_global_exit(1); + } + /* verify remote data */ + ltmp = shmem_long_g(src_long,1); + if (ltmp != 0) { + printf("PE-%d long rem(%ld) != 0?\n",me,ltmp); + shmem_global_exit(1); + } + Vprintf("PE-0 1st long_cswap done: local %ld rem(%ld)\n", + dst_long,ltmp); + + dst_long = shmem_long_atomic_compare_swap(src_long,0,dst_long,1); + if (dst_long != 0) { + printf("PE-%d dst_long %ld != 0?\n",me,dst_long); + shmem_global_exit(1); + } + /* verify remote data */ + ltmp = shmem_long_g(src_long,1); + if (ltmp != 8) { + printf("PE-%d long rem(%ld) != 8?\n",me,ltmp); + shmem_global_exit(1); + } + Vprintf("PE-0 2nd long_swap done: local %ld rem(%ld)\n", + dst_long,ltmp); + + /* cswap() should not swap as cond(0) != remote(8) */ + dst_long = shmem_long_atomic_compare_swap(src_long,0,0,1); + if (dst_long != 8) { + printf("PE-%d long no-swap returned dst_long %ld != 8?\n", + me,dst_long); + shmem_global_exit(1); + } + /* verify previous cswap() did not swap */ + ltmp = shmem_long_g(src_long,1); + if (ltmp != 8) { + printf("PE-%d failed cond long_cswap() swapped? rem(%ld) != 8?\n", + me,ltmp); + shmem_global_exit(1); + } + + /* long long swap */ + lltmp = shmem_longlong_g(src_llong,1); + Vprintf("PE-0 Initial Conditions(long long) local %lld rem(%lld)\n", + dst_llong,lltmp); + + dst_llong = shmem_longlong_atomic_compare_swap(src_llong,*src_llong,0,1); + if (dst_llong != 16) { + printf("PE-%d dst_llong %lld != 16?\n",me,dst_llong); + shmem_global_exit(1); + } + /* verify remote data */ + lltmp = shmem_longlong_g(src_llong,1); + if (lltmp != 0) { + printf("PE-%d longlong rem(%lld) != 0?\n",me,lltmp); + shmem_global_exit(1); + } + Vprintf("PE-0 1st longlong_cswap done: local %lld rem(%lld)\n", + dst_llong, lltmp); + + dst_llong = shmem_longlong_atomic_compare_swap(src_llong,0,dst_llong,1); + if (dst_llong != 0) { + printf("PE-%d dst_llong %lld != 0?\n",me,dst_llong); + shmem_global_exit(1); + } + /* verify remote data */ + lltmp = shmem_longlong_g(src_llong,1); + if (lltmp != 16) { + printf("PE-%d long long rem(%lld) != 16?\n",me,lltmp); + shmem_global_exit(1); + } + Vprintf("PE-0 2nd longlong_swap done: local %lld rem(%lld)\n", + dst_llong,lltmp); + + /* cswap() should not swap as cond(0) != remote(8) */ + dst_llong = shmem_longlong_atomic_compare_swap(src_llong,0,0,1); + if (dst_llong != 16) { + printf("PE-%d longlong no-swap returned dst_llong %lld != 16?\n", + me,dst_llong); + shmem_global_exit(1); + } + /* verify previous cswap() did not swap */ + lltmp = shmem_longlong_g(src_llong,1); + if (lltmp != 16) { + printf("PE-0 failed cond longlong_cswap() swapped? rem(%lld) != 16?\n", + lltmp); + shmem_global_exit(1); + } + } + else { + if (!shmem_addr_accessible(src_int,0)) { + printf("PE-%d local src_int %p not accessible from PE-%d?\n", + me, (void*)src_int, 0); + shmem_global_exit(1); + } + if (!shmem_addr_accessible(src_long,0)) { + printf("PE-%d local src_long %p not accessible from PE-%d?\n", + me, (void*)src_long, 0); + shmem_global_exit(1); + } + if (!shmem_addr_accessible(src_llong,0)) { + printf("PE-%d local src_llong %p not accessible from PE-%d?\n", + me, (void*)src_llong, 0); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + /* shmem_*fadd() exercise */ + + if (me == 0) { + itmp = 0; + ltmp = 0; + lltmp = 0; + *src_int = 0; + *src_long = 0; + *src_llong = 0; + } + shmem_barrier_all(); + + (void)shmem_int_atomic_fetch_add( &itmp, me+1, 0 ); + (void)shmem_long_atomic_fetch_add( <mp, me+1, 0 ); + (void)shmem_longlong_atomic_fetch_add( &lltmp, me+1, 0 ); + + shmem_barrier_all(); + + if (me == 0) { + int tot; + + for(pe=0,tot=0; pe < num_pes; pe++) + tot += pe+1; + + if ( itmp != tot ) + printf("fadd() total %d != expected %d?\n",itmp,tot); + + if ( ltmp != (long)tot ) + printf("fadd() total %ld != expected %d?\n",ltmp,tot); + + if ( lltmp != (long long)tot ) + printf("fadd() total %lld != expected %d?\n",lltmp,tot); + } + shmem_barrier_all(); + + (void)shmem_int_atomic_fetch_inc(src_int,0); + (void)shmem_long_atomic_fetch_inc(src_long,0); + (void)shmem_longlong_atomic_fetch_inc(src_llong,0); + + shmem_barrier_all(); + + if (me == 0) { + int tot = num_pes; + + if ( *src_int != tot ) + printf("finc() total %d != expected %d?\n",*src_int,tot); + + if ( *src_long != (long)tot ) + printf("finc() total %ld != expected %d?\n",*src_long,tot); + + if ( *src_llong != (long long)tot ) + printf("finc() total %lld != expected %d?\n",*src_llong,tot); + } + shmem_barrier_all(); + + shmem_free(src_int); + shmem_free(src_long); + shmem_free(src_llong); + } + + if (Verbose) + fprintf(stderr,"[%d] exit\n",shmem_my_pe()); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/cxx_test_shmem_atomic_add.cpp b/test/unit/cxx_test_shmem_atomic_add.cpp new file mode 100644 index 0000000..eeebb01 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_add.cpp @@ -0,0 +1,193 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { ADD = 0, ATOMIC_ADD, CTX_ATOMIC_ADD, FADD, ATOMIC_FETCH_ADD, + CTX_ATOMIC_FETCH_ADD }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_ADD shmem_add +#define DEPRECATED_FADD shmem_fadd +#else +#define DEPRECATED_ADD shmem_atomic_add +#define DEPRECATED_FADD shmem_atomic_fetch_add +#endif + +#define TEST_SHMEM_ADD(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)0; \ + shmem_barrier_all(); \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case ADD: \ + DEPRECATED_ADD(&remote, (TYPE)(mype + 1), i); \ + break; \ + case ATOMIC_ADD: \ + shmem_atomic_add(&remote, (TYPE)(mype + 1), i); \ + break; \ + case CTX_ATOMIC_ADD: \ + shmem_atomic_add(SHMEM_CTX_DEFAULT, &remote, (TYPE)(mype + 1), i); \ + break; \ + case FADD: \ + old = DEPRECATED_FADD(&remote, (TYPE)(mype + 1), i); \ + if (old > (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case ATOMIC_FETCH_ADD: \ + old = shmem_atomic_fetch_add(&remote, (TYPE)(mype + 1), i); \ + if (old > (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_ATOMIC_FETCH_ADD: \ + old = shmem_atomic_fetch_add(SHMEM_CTX_DEFAULT, &remote, (TYPE)(mype + 1), i); \ + if (old > (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)(npes * (npes + 1) / 2)) { \ + printf("PE %i observed error with TEST_SHMEM_ADD(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_ADD(ADD, int); + TEST_SHMEM_ADD(ADD, long); + TEST_SHMEM_ADD(ADD, long long); + TEST_SHMEM_ADD(ADD, unsigned int); + TEST_SHMEM_ADD(ADD, unsigned long); + TEST_SHMEM_ADD(ADD, unsigned long long); + TEST_SHMEM_ADD(ADD, int32_t); + TEST_SHMEM_ADD(ADD, int64_t); + TEST_SHMEM_ADD(ADD, uint32_t); + TEST_SHMEM_ADD(ADD, uint64_t); + TEST_SHMEM_ADD(ADD, size_t); + TEST_SHMEM_ADD(ADD, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_ADD(ATOMIC_ADD, int); + TEST_SHMEM_ADD(ATOMIC_ADD, long); + TEST_SHMEM_ADD(ATOMIC_ADD, long long); + TEST_SHMEM_ADD(ATOMIC_ADD, unsigned int); + TEST_SHMEM_ADD(ATOMIC_ADD, unsigned long); + TEST_SHMEM_ADD(ATOMIC_ADD, unsigned long long); + TEST_SHMEM_ADD(ATOMIC_ADD, int32_t); + TEST_SHMEM_ADD(ATOMIC_ADD, int64_t); + TEST_SHMEM_ADD(ATOMIC_ADD, uint32_t); + TEST_SHMEM_ADD(ATOMIC_ADD, uint64_t); + TEST_SHMEM_ADD(ATOMIC_ADD, size_t); + TEST_SHMEM_ADD(ATOMIC_ADD, ptrdiff_t); + + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, int); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, long long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, unsigned int); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, unsigned long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, unsigned long long); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, int32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, int64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, uint32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, uint64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, size_t); + TEST_SHMEM_ADD(CTX_ATOMIC_ADD, ptrdiff_t); + + TEST_SHMEM_ADD(FADD, int); + TEST_SHMEM_ADD(FADD, long); + TEST_SHMEM_ADD(FADD, long long); + TEST_SHMEM_ADD(FADD, unsigned int); + TEST_SHMEM_ADD(FADD, unsigned long); + TEST_SHMEM_ADD(FADD, unsigned long long); + TEST_SHMEM_ADD(FADD, int32_t); + TEST_SHMEM_ADD(FADD, int64_t); + TEST_SHMEM_ADD(FADD, uint32_t); + TEST_SHMEM_ADD(FADD, uint64_t); + TEST_SHMEM_ADD(FADD, size_t); + TEST_SHMEM_ADD(FADD, ptrdiff_t); + + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, long long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, unsigned int); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, unsigned long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, unsigned long long); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int32_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, int64_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, uint32_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, uint64_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, size_t); + TEST_SHMEM_ADD(ATOMIC_FETCH_ADD, ptrdiff_t); + + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, int); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, long long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, unsigned int); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, unsigned long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, unsigned long long); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, int32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, int64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, uint32_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, uint64_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, size_t); + TEST_SHMEM_ADD(CTX_ATOMIC_FETCH_ADD, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_and.cpp b/test/unit/cxx_test_shmem_atomic_and.cpp new file mode 100644 index 0000000..a399c29 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_and.cpp @@ -0,0 +1,129 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { AND = 0, CTX_AND, FETCH_AND, CTX_FETCH_AND }; + +/* Initially, remote = 111...b. Each PE performs an atomic AND where the + * PEth bit of the input value is set to 0 and all other bits are set to 1. + * The result has the NPES least significant bits cleared, 111...000...b. + */ + +#define TEST_SHMEM_AND(OP, TYPE) \ + do { \ + static TYPE remote = ~(TYPE)0; \ + TYPE old = (TYPE)0; \ + if (npes-1 > sizeof(TYPE)) break; /* Avoid overflow */ \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case AND: \ + shmem_atomic_and(&remote, ~(TYPE)(1LLU << mype), i); \ + break; \ + case CTX_AND: \ + shmem_atomic_and(SHMEM_CTX_DEFAULT, &remote, ~(TYPE)(1LLU << mype), i); \ + break; \ + case FETCH_AND: \ + old = shmem_atomic_fetch_and(&remote, ~(TYPE)(1LLU << mype), i); \ + if ((old & (TYPE)(1LLU << mype)) == 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_FETCH_AND: \ + old = shmem_atomic_fetch_and(SHMEM_CTX_DEFAULT, &remote, ~(TYPE)(1LLU << mype), i); \ + if ((old & (TYPE)(1LLU << mype)) == 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != ~(TYPE)((1LLU << npes) - 1LLU)) { \ + printf("PE %i observed error with TEST_SHMEM_AND(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_AND(AND, unsigned int); + TEST_SHMEM_AND(AND, unsigned long); + TEST_SHMEM_AND(AND, unsigned long long); + TEST_SHMEM_AND(AND, int32_t); + TEST_SHMEM_AND(AND, int64_t); + TEST_SHMEM_AND(AND, uint32_t); + TEST_SHMEM_AND(AND, uint64_t); + + TEST_SHMEM_AND(CTX_AND, unsigned int); + TEST_SHMEM_AND(CTX_AND, unsigned long); + TEST_SHMEM_AND(CTX_AND, unsigned long long); + TEST_SHMEM_AND(CTX_AND, int32_t); + TEST_SHMEM_AND(CTX_AND, int64_t); + TEST_SHMEM_AND(CTX_AND, uint32_t); + TEST_SHMEM_AND(CTX_AND, uint64_t); + + TEST_SHMEM_AND(FETCH_AND, unsigned int); + TEST_SHMEM_AND(FETCH_AND, unsigned long); + TEST_SHMEM_AND(FETCH_AND, unsigned long long); + TEST_SHMEM_AND(FETCH_AND, int32_t); + TEST_SHMEM_AND(FETCH_AND, int64_t); + TEST_SHMEM_AND(FETCH_AND, uint32_t); + TEST_SHMEM_AND(FETCH_AND, uint64_t); + + TEST_SHMEM_AND(CTX_FETCH_AND, unsigned int); + TEST_SHMEM_AND(CTX_FETCH_AND, unsigned long); + TEST_SHMEM_AND(CTX_FETCH_AND, unsigned long long); + TEST_SHMEM_AND(CTX_FETCH_AND, int32_t); + TEST_SHMEM_AND(CTX_FETCH_AND, int64_t); + TEST_SHMEM_AND(CTX_FETCH_AND, uint32_t); + TEST_SHMEM_AND(CTX_FETCH_AND, uint64_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_cswap.cpp b/test/unit/cxx_test_shmem_atomic_cswap.cpp new file mode 100644 index 0000000..1a4ef0d --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_cswap.cpp @@ -0,0 +1,135 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { CSWAP = 0, ATOMIC_COMPARE_SWAP, CTX_ATOMIC_COMPARE_SWAP }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_CSWAP shmem_cswap +#else +#define DEPRECATED_CSWAP shmem_atomic_compare_swap +#endif + +#define TEST_SHMEM_CSWAP(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = npes; \ + shmem_barrier_all(); \ + switch (OP) { \ + case CSWAP: \ + old = DEPRECATED_CSWAP(&remote, (TYPE)npes, (TYPE)mype, \ + (mype + 1) % npes); \ + break; \ + case ATOMIC_COMPARE_SWAP: \ + old = shmem_atomic_compare_swap(&remote, (TYPE)npes, \ + (TYPE)mype, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_COMPARE_SWAP: \ + old = shmem_atomic_compare_swap(SHMEM_CTX_DEFAULT, &remote, \ + (TYPE)npes, (TYPE)mype, \ + (mype + 1) % npes); \ + break; \ + default: \ + printf("invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i observed error with TEST_SHMEM_CSWAP(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + if (old != npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_CSWAP(CSWAP, int); + TEST_SHMEM_CSWAP(CSWAP, long); + TEST_SHMEM_CSWAP(CSWAP, long long); + TEST_SHMEM_CSWAP(CSWAP, unsigned int); + TEST_SHMEM_CSWAP(CSWAP, unsigned long); + TEST_SHMEM_CSWAP(CSWAP, unsigned long long); + TEST_SHMEM_CSWAP(CSWAP, int32_t); + TEST_SHMEM_CSWAP(CSWAP, int64_t); + TEST_SHMEM_CSWAP(CSWAP, uint32_t); + TEST_SHMEM_CSWAP(CSWAP, uint64_t); + TEST_SHMEM_CSWAP(CSWAP, size_t); + TEST_SHMEM_CSWAP(CSWAP, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, int); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, long long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, unsigned int); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, unsigned long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, unsigned long long); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, int32_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, int64_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, uint32_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, uint64_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, size_t); + TEST_SHMEM_CSWAP(ATOMIC_COMPARE_SWAP, ptrdiff_t); + + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, int); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, long long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, unsigned int); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, unsigned long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, unsigned long long); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, int32_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, int64_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, uint32_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, uint64_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, size_t); + TEST_SHMEM_CSWAP(CTX_ATOMIC_COMPARE_SWAP, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_fetch.cpp b/test/unit/cxx_test_shmem_atomic_fetch.cpp new file mode 100644 index 0000000..6577c07 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_fetch.cpp @@ -0,0 +1,131 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { FETCH = 0, ATOMIC_FETCH, CTX_ATOMIC_FETCH }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_FETCH shmem_fetch +#else +#define DEPRECATED_FETCH shmem_atomic_fetch +#endif + +#define TEST_SHMEM_FETCH(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE val; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)mype; \ + shmem_barrier_all(); \ + switch (OP) { \ + case FETCH: \ + val = DEPRECATED_FETCH(&remote, (mype + 1) % npes); \ + break; \ + case ATOMIC_FETCH: \ + val = shmem_atomic_fetch(&remote, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_FETCH: \ + val = shmem_atomic_fetch(SHMEM_CTX_DEFAULT, &remote, (mype + 1) % npes); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + if (val != (TYPE)((mype + 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_FETCH(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_FETCH(FETCH, float); + TEST_SHMEM_FETCH(FETCH, double); + TEST_SHMEM_FETCH(FETCH, int); + TEST_SHMEM_FETCH(FETCH, long); + TEST_SHMEM_FETCH(FETCH, long long); + TEST_SHMEM_FETCH(FETCH, unsigned int); + TEST_SHMEM_FETCH(FETCH, unsigned long); + TEST_SHMEM_FETCH(FETCH, unsigned long long); + TEST_SHMEM_FETCH(FETCH, int32_t); + TEST_SHMEM_FETCH(FETCH, int64_t); + TEST_SHMEM_FETCH(FETCH, uint32_t); + TEST_SHMEM_FETCH(FETCH, uint64_t); + TEST_SHMEM_FETCH(FETCH, size_t); + TEST_SHMEM_FETCH(FETCH, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_FETCH(ATOMIC_FETCH, float); + TEST_SHMEM_FETCH(ATOMIC_FETCH, double); + TEST_SHMEM_FETCH(ATOMIC_FETCH, int); + TEST_SHMEM_FETCH(ATOMIC_FETCH, long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, long long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, unsigned int); + TEST_SHMEM_FETCH(ATOMIC_FETCH, unsigned long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, unsigned long long); + TEST_SHMEM_FETCH(ATOMIC_FETCH, int32_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, int64_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, uint32_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, uint64_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, size_t); + TEST_SHMEM_FETCH(ATOMIC_FETCH, ptrdiff_t); + + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, float); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, double); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, int); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, long long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, unsigned int); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, unsigned long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, unsigned long long); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, int32_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, int64_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, uint32_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, uint64_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, size_t); + TEST_SHMEM_FETCH(CTX_ATOMIC_FETCH, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_inc.cpp b/test/unit/cxx_test_shmem_atomic_inc.cpp new file mode 100644 index 0000000..3090454 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_inc.cpp @@ -0,0 +1,193 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { INC = 0, ATOMIC_INC, CTX_ATOMIC_INC, FINC, ATOMIC_FETCH_INC, + CTX_ATOMIC_FETCH_INC }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_INC shmem_inc +#define DEPRECATED_FINC shmem_finc +#else +#define DEPRECATED_INC shmem_atomic_inc +#define DEPRECATED_FINC shmem_atomic_fetch_inc +#endif + +#define TEST_SHMEM_INC(OP, TYPE) \ + do { \ + static TYPE remote = (TYPE)0; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)0; \ + shmem_barrier_all(); \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case INC: \ + DEPRECATED_INC(&remote, i); \ + break; \ + case ATOMIC_INC: \ + shmem_atomic_inc(&remote, i); \ + break; \ + case CTX_ATOMIC_INC: \ + shmem_atomic_inc(SHMEM_CTX_DEFAULT, &remote, i); \ + break; \ + case FINC: \ + old = DEPRECATED_FINC(&remote, i); \ + if (old > npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case ATOMIC_FETCH_INC: \ + old = shmem_atomic_fetch_inc(&remote, i); \ + if (old > npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_ATOMIC_FETCH_INC: \ + old = shmem_atomic_fetch_inc(SHMEM_CTX_DEFAULT, &remote, i); \ + if (old > npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)npes) { \ + printf("PE %i observed error with TEST_SHMEM_INC(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_INC(INC, int); + TEST_SHMEM_INC(INC, long); + TEST_SHMEM_INC(INC, long long); + TEST_SHMEM_INC(INC, unsigned int); + TEST_SHMEM_INC(INC, unsigned long); + TEST_SHMEM_INC(INC, unsigned long long); + TEST_SHMEM_INC(INC, int32_t); + TEST_SHMEM_INC(INC, int64_t); + TEST_SHMEM_INC(INC, uint32_t); + TEST_SHMEM_INC(INC, uint64_t); + TEST_SHMEM_INC(INC, size_t); + TEST_SHMEM_INC(INC, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_INC(ATOMIC_INC, int); + TEST_SHMEM_INC(ATOMIC_INC, long); + TEST_SHMEM_INC(ATOMIC_INC, long long); + TEST_SHMEM_INC(ATOMIC_INC, unsigned int); + TEST_SHMEM_INC(ATOMIC_INC, unsigned long); + TEST_SHMEM_INC(ATOMIC_INC, unsigned long long); + TEST_SHMEM_INC(ATOMIC_INC, int32_t); + TEST_SHMEM_INC(ATOMIC_INC, int64_t); + TEST_SHMEM_INC(ATOMIC_INC, uint32_t); + TEST_SHMEM_INC(ATOMIC_INC, uint64_t); + TEST_SHMEM_INC(ATOMIC_INC, size_t); + TEST_SHMEM_INC(ATOMIC_INC, ptrdiff_t); + + TEST_SHMEM_INC(CTX_ATOMIC_INC, int); + TEST_SHMEM_INC(CTX_ATOMIC_INC, long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, long long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, unsigned int); + TEST_SHMEM_INC(CTX_ATOMIC_INC, unsigned long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, unsigned long long); + TEST_SHMEM_INC(CTX_ATOMIC_INC, int32_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, int64_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, uint32_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, uint64_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, size_t); + TEST_SHMEM_INC(CTX_ATOMIC_INC, ptrdiff_t); + + TEST_SHMEM_INC(FINC, int); + TEST_SHMEM_INC(FINC, long); + TEST_SHMEM_INC(FINC, long long); + TEST_SHMEM_INC(FINC, unsigned int); + TEST_SHMEM_INC(FINC, unsigned long); + TEST_SHMEM_INC(FINC, unsigned long long); + TEST_SHMEM_INC(FINC, int32_t); + TEST_SHMEM_INC(FINC, int64_t); + TEST_SHMEM_INC(FINC, uint32_t); + TEST_SHMEM_INC(FINC, uint64_t); + TEST_SHMEM_INC(FINC, size_t); + TEST_SHMEM_INC(FINC, ptrdiff_t); + + TEST_SHMEM_INC(ATOMIC_FETCH_INC, int); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, long long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, unsigned int); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, unsigned long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, unsigned long long); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, int32_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, int64_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, uint32_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, uint64_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, size_t); + TEST_SHMEM_INC(ATOMIC_FETCH_INC, ptrdiff_t); + + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, int); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, long long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, unsigned int); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, unsigned long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, unsigned long long); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, int32_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, int64_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, uint32_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, uint64_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, size_t); + TEST_SHMEM_INC(CTX_ATOMIC_FETCH_INC, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_or.cpp b/test/unit/cxx_test_shmem_atomic_or.cpp new file mode 100644 index 0000000..5623a9a --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_or.cpp @@ -0,0 +1,129 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { OR = 0, CTX_OR, FETCH_OR, CTX_FETCH_OR }; + +/* Initially, remote = 000...b. Each PE performs an atomic OR where the + * PEth bit of the input value is set to 1 and all other bits are set to 0. + * The result has the NPES least significant bits set, 000...111...b. + */ + +#define TEST_SHMEM_OR(OP, TYPE) \ + do { \ + static TYPE remote = (TYPE)0; \ + TYPE old = (TYPE)0; \ + if (npes-1 > sizeof(TYPE)) break; /* Avoid overflow */ \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case OR: \ + shmem_atomic_or(&remote, (TYPE)(1LLU << mype), i); \ + break; \ + case CTX_OR: \ + shmem_atomic_or(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + break; \ + case FETCH_OR: \ + old = shmem_atomic_fetch_or(&remote, (TYPE)(1LLU << mype), i);\ + if ((old & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_FETCH_OR: \ + old = shmem_atomic_fetch_or(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + if ((old & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((1LLU << npes) - 1LLU)) { \ + printf("PE %i observed error with TEST_SHMEM_OR(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_OR(OR, unsigned int); + TEST_SHMEM_OR(OR, unsigned long); + TEST_SHMEM_OR(OR, unsigned long long); + TEST_SHMEM_OR(OR, int32_t); + TEST_SHMEM_OR(OR, int64_t); + TEST_SHMEM_OR(OR, uint32_t); + TEST_SHMEM_OR(OR, uint64_t); + + TEST_SHMEM_OR(CTX_OR, unsigned int); + TEST_SHMEM_OR(CTX_OR, unsigned long); + TEST_SHMEM_OR(CTX_OR, unsigned long long); + TEST_SHMEM_OR(CTX_OR, int32_t); + TEST_SHMEM_OR(CTX_OR, int64_t); + TEST_SHMEM_OR(CTX_OR, uint32_t); + TEST_SHMEM_OR(CTX_OR, uint64_t); + + TEST_SHMEM_OR(FETCH_OR, unsigned int); + TEST_SHMEM_OR(FETCH_OR, unsigned long); + TEST_SHMEM_OR(FETCH_OR, unsigned long long); + TEST_SHMEM_OR(FETCH_OR, int32_t); + TEST_SHMEM_OR(FETCH_OR, int64_t); + TEST_SHMEM_OR(FETCH_OR, uint32_t); + TEST_SHMEM_OR(FETCH_OR, uint64_t); + + TEST_SHMEM_OR(CTX_FETCH_OR, unsigned int); + TEST_SHMEM_OR(CTX_FETCH_OR, unsigned long); + TEST_SHMEM_OR(CTX_FETCH_OR, unsigned long long); + TEST_SHMEM_OR(CTX_FETCH_OR, int32_t); + TEST_SHMEM_OR(CTX_FETCH_OR, int64_t); + TEST_SHMEM_OR(CTX_FETCH_OR, uint32_t); + TEST_SHMEM_OR(CTX_FETCH_OR, uint64_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_set.cpp b/test/unit/cxx_test_shmem_atomic_set.cpp new file mode 100644 index 0000000..9cb1507 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_set.cpp @@ -0,0 +1,129 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { SET = 0, ATOMIC_SET, CTX_ATOMIC_SET }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_SET shmem_set +#else +#define DEPRECATED_SET shmem_atomic_set +#endif + +#define TEST_SHMEM_SET(OP, TYPE) \ + do { \ + static TYPE remote; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + switch (OP) { \ + case SET: \ + DEPRECATED_SET(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case ATOMIC_SET: \ + shmem_atomic_set(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_SET: \ + shmem_atomic_set(SHMEM_CTX_DEFAULT, &remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_SET(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_SET(SET, float); + TEST_SHMEM_SET(SET, double); + TEST_SHMEM_SET(SET, int); + TEST_SHMEM_SET(SET, long); + TEST_SHMEM_SET(SET, long long); + TEST_SHMEM_SET(SET, unsigned int); + TEST_SHMEM_SET(SET, unsigned long); + TEST_SHMEM_SET(SET, unsigned long long); + TEST_SHMEM_SET(SET, int32_t); + TEST_SHMEM_SET(SET, int64_t); + TEST_SHMEM_SET(SET, uint32_t); + TEST_SHMEM_SET(SET, uint64_t); + TEST_SHMEM_SET(SET, size_t); + TEST_SHMEM_SET(SET, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_SET(ATOMIC_SET, float); + TEST_SHMEM_SET(ATOMIC_SET, double); + TEST_SHMEM_SET(ATOMIC_SET, int); + TEST_SHMEM_SET(ATOMIC_SET, long); + TEST_SHMEM_SET(ATOMIC_SET, long long); + TEST_SHMEM_SET(ATOMIC_SET, unsigned int); + TEST_SHMEM_SET(ATOMIC_SET, unsigned long); + TEST_SHMEM_SET(ATOMIC_SET, unsigned long long); + TEST_SHMEM_SET(ATOMIC_SET, int32_t); + TEST_SHMEM_SET(ATOMIC_SET, int64_t); + TEST_SHMEM_SET(ATOMIC_SET, uint32_t); + TEST_SHMEM_SET(ATOMIC_SET, uint64_t); + TEST_SHMEM_SET(ATOMIC_SET, size_t); + TEST_SHMEM_SET(ATOMIC_SET, ptrdiff_t); + + TEST_SHMEM_SET(CTX_ATOMIC_SET, float); + TEST_SHMEM_SET(CTX_ATOMIC_SET, double); + TEST_SHMEM_SET(CTX_ATOMIC_SET, int); + TEST_SHMEM_SET(CTX_ATOMIC_SET, long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, long long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, unsigned int); + TEST_SHMEM_SET(CTX_ATOMIC_SET, unsigned long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, unsigned long long); + TEST_SHMEM_SET(CTX_ATOMIC_SET, int32_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, int64_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, uint32_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, uint64_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, size_t); + TEST_SHMEM_SET(CTX_ATOMIC_SET, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_swap.cpp b/test/unit/cxx_test_shmem_atomic_swap.cpp new file mode 100644 index 0000000..8efddf2 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_swap.cpp @@ -0,0 +1,138 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { SWAP = 0, ATOMIC_SWAP, CTX_ATOMIC_SWAP }; + +#ifdef ENABLE_DEPRECATED_TESTS +#define DEPRECATED_SWAP shmem_swap +#else +#define DEPRECATED_SWAP shmem_atomic_swap +#endif + +#define TEST_SHMEM_SWAP(OP, TYPE) \ + do { \ + static TYPE remote; \ + TYPE old; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = npes; \ + shmem_barrier_all(); \ + switch (OP) { \ + case SWAP: \ + old = DEPRECATED_SWAP(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case ATOMIC_SWAP: \ + old = shmem_atomic_swap(&remote, (TYPE)mype, (mype + 1) % npes); \ + break; \ + case CTX_ATOMIC_SWAP: \ + old = shmem_atomic_swap(SHMEM_CTX_DEFAULT, &remote, \ + (TYPE)mype, (mype + 1) % npes); \ + break; \ + default: \ + printf("invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_SWAP(%s, %s)\n", mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + if (old != npes) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + +#ifdef ENABLE_DEPRECATED_TESTS + TEST_SHMEM_SWAP(SWAP, float); + TEST_SHMEM_SWAP(SWAP, double); + TEST_SHMEM_SWAP(SWAP, int); + TEST_SHMEM_SWAP(SWAP, long); + TEST_SHMEM_SWAP(SWAP, long long); + TEST_SHMEM_SWAP(SWAP, unsigned int); + TEST_SHMEM_SWAP(SWAP, unsigned long); + TEST_SHMEM_SWAP(SWAP, unsigned long long); + TEST_SHMEM_SWAP(SWAP, int32_t); + TEST_SHMEM_SWAP(SWAP, int64_t); + TEST_SHMEM_SWAP(SWAP, uint32_t); + TEST_SHMEM_SWAP(SWAP, uint64_t); + TEST_SHMEM_SWAP(SWAP, size_t); + TEST_SHMEM_SWAP(SWAP, ptrdiff_t); +#endif /* ENABLE_DEPRECATED_TESTS */ + + TEST_SHMEM_SWAP(ATOMIC_SWAP, float); + TEST_SHMEM_SWAP(ATOMIC_SWAP, double); + TEST_SHMEM_SWAP(ATOMIC_SWAP, int); + TEST_SHMEM_SWAP(ATOMIC_SWAP, long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, long long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, unsigned int); + TEST_SHMEM_SWAP(ATOMIC_SWAP, unsigned long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, unsigned long long); + TEST_SHMEM_SWAP(ATOMIC_SWAP, int32_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, int64_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, uint32_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, uint64_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, size_t); + TEST_SHMEM_SWAP(ATOMIC_SWAP, ptrdiff_t); + + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, float); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, double); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, int); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, long long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, unsigned int); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, unsigned long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, unsigned long long); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, int32_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, int64_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, uint32_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, uint64_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, size_t); + TEST_SHMEM_SWAP(CTX_ATOMIC_SWAP, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_atomic_xor.cpp b/test/unit/cxx_test_shmem_atomic_xor.cpp new file mode 100644 index 0000000..c52e3c3 --- /dev/null +++ b/test/unit/cxx_test_shmem_atomic_xor.cpp @@ -0,0 +1,129 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { XOR = 0, CTX_XOR, FETCH_XOR, CTX_FETCH_XOR }; + +/* Initially, remote = 111...b. Each PE performs an atomic XOR where the + * PEth bit of the input value is set to 1 and all other bits are set to 0. + * The result has the NPES least significant bits cleared, 111...000...b. + */ + +#define TEST_SHMEM_XOR(OP, TYPE) \ + do { \ + static TYPE remote = ~(TYPE)0; \ + TYPE old; \ + if (npes-1 > sizeof(TYPE)) break; /* Avoid overflow */ \ + for (int i = 0; i < npes; i++) \ + switch (OP) { \ + case XOR: \ + shmem_atomic_xor(&remote, (TYPE)(1LLU << mype), i); \ + break; \ + case CTX_XOR: \ + shmem_atomic_xor(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + break; \ + case FETCH_XOR: \ + old = shmem_atomic_fetch_xor(&remote, (TYPE)(1LLU << mype), i); \ + if (((old ^ (TYPE)(1LLU << mype)) & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + case CTX_FETCH_XOR: \ + old = shmem_atomic_fetch_xor(SHMEM_CTX_DEFAULT, &remote, (TYPE)(1LLU << mype), i); \ + if (((old ^ (TYPE)(1LLU << mype)) & (TYPE)(1LLU << mype)) != 0) { \ + printf("PE %i error inconsistent value of old (%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + if (remote != ~(TYPE)((1LLU << npes) - 1LLU)) { \ + printf("PE %i observed error with TEST_SHMEM_XOR(%s, %s)\n", \ + mype, #OP, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + + +int main(int argc, char* argv[]) { + shmem_init(); + + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_XOR(XOR, unsigned int); + TEST_SHMEM_XOR(XOR, unsigned long); + TEST_SHMEM_XOR(XOR, unsigned long long); + TEST_SHMEM_XOR(XOR, int32_t); + TEST_SHMEM_XOR(XOR, int64_t); + TEST_SHMEM_XOR(XOR, uint32_t); + TEST_SHMEM_XOR(XOR, uint64_t); + + TEST_SHMEM_XOR(CTX_XOR, unsigned int); + TEST_SHMEM_XOR(CTX_XOR, unsigned long); + TEST_SHMEM_XOR(CTX_XOR, unsigned long long); + TEST_SHMEM_XOR(CTX_XOR, int32_t); + TEST_SHMEM_XOR(CTX_XOR, int64_t); + TEST_SHMEM_XOR(CTX_XOR, uint32_t); + TEST_SHMEM_XOR(CTX_XOR, uint64_t); + + TEST_SHMEM_XOR(FETCH_XOR, unsigned int); + TEST_SHMEM_XOR(FETCH_XOR, unsigned long); + TEST_SHMEM_XOR(FETCH_XOR, unsigned long long); + TEST_SHMEM_XOR(FETCH_XOR, int32_t); + TEST_SHMEM_XOR(FETCH_XOR, int64_t); + TEST_SHMEM_XOR(FETCH_XOR, uint32_t); + TEST_SHMEM_XOR(FETCH_XOR, uint64_t); + + TEST_SHMEM_XOR(CTX_FETCH_XOR, unsigned int); + TEST_SHMEM_XOR(CTX_FETCH_XOR, unsigned long); + TEST_SHMEM_XOR(CTX_FETCH_XOR, unsigned long long); + TEST_SHMEM_XOR(CTX_FETCH_XOR, int32_t); + TEST_SHMEM_XOR(CTX_FETCH_XOR, int64_t); + TEST_SHMEM_XOR(CTX_FETCH_XOR, uint32_t); + TEST_SHMEM_XOR(CTX_FETCH_XOR, uint64_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_complex.cpp b/test/unit/cxx_test_shmem_complex.cpp new file mode 100644 index 0000000..0cf2f7a --- /dev/null +++ b/test/unit/cxx_test_shmem_complex.cpp @@ -0,0 +1,96 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define MAX(a, b) ((a) > (b)) ? (a) : (b) + +#define DECLARE_FOR(TYPE) \ + extern "C" { \ + TYPE _Complex TYPE##_dest[10]; \ + TYPE _Complex TYPE##_src[10]; \ + TYPE _Complex* TYPE##_workData; \ + TYPE _Complex* TYPE##_alloc_data() { \ + size_t size = sizeof(TYPE##_src[0]); \ + size_t minData = SHMEM_REDUCE_MIN_WRKDATA_SIZE*size; \ + size_t workDataSize = MAX(sizeof(TYPE##_src),minData); \ + return (TYPE _Complex*)shmem_malloc(workDataSize); \ + } \ + } + +long syncArr[SHMEM_REDUCE_SYNC_SIZE]; + +#define TEST_COMPLEX(TYPE,LETTER,OP) \ + { \ + TYPE##_workData = TYPE##_alloc_data(); \ + \ + memset(TYPE##_src,0,sizeof(TYPE##_src)); \ + \ + shmem_complex##LETTER##_##OP##_to_all(TYPE##_dest,TYPE##_src,10, \ + 0,0, shmem_n_pes(), TYPE##_workData, syncArr); \ + \ + shmem_barrier_all(); \ + \ + if(shmem_my_pe() == 0) { \ + int i; \ + for(i = 1; i < shmem_n_pes(); ++i) { \ + shmem_getmem(TYPE##_src,TYPE##_dest,sizeof(TYPE##_dest),i); \ + if(0 != memcmp(TYPE##_src,TYPE##_dest,sizeof(TYPE##_src))) { \ + ++rc; \ + } \ + } \ + } \ + \ + shmem_barrier_all(); \ + } + +DECLARE_FOR(float); +DECLARE_FOR(double); + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = 0; + + TEST_COMPLEX(float,f,sum); + TEST_COMPLEX(float,f,prod); + TEST_COMPLEX(double,d,sum); + TEST_COMPLEX(double,d,prod); + + shmem_finalize(); + return rc; +} + diff --git a/test/unit/cxx_test_shmem_g.cpp b/test/unit/cxx_test_shmem_g.cpp new file mode 100644 index 0000000..a7912a7 --- /dev/null +++ b/test/unit/cxx_test_shmem_g.cpp @@ -0,0 +1,115 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define TEST_SHMEM_G(USE_CTX, TYPE) \ + do { \ + static TYPE remote; \ + TYPE val; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + remote = (TYPE)mype; \ + shmem_barrier_all(); \ + if (USE_CTX) \ + val = shmem_g(SHMEM_CTX_DEFAULT, &remote, (mype + 1) % npes); \ + else \ + val = shmem_g(&remote, (mype + 1) % npes); \ + if (val != (TYPE)((mype + 1) % npes)) { \ + printf("PE %i received incorrect value with" \ + "TEST_SHMEM_G(%d, %s)\n", mype, \ + (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_G(0, float); + TEST_SHMEM_G(0, double); + TEST_SHMEM_G(0, long double); + TEST_SHMEM_G(0, char); + TEST_SHMEM_G(0, signed char); + TEST_SHMEM_G(0, short); + TEST_SHMEM_G(0, int); + TEST_SHMEM_G(0, long); + TEST_SHMEM_G(0, long long); + TEST_SHMEM_G(0, unsigned char); + TEST_SHMEM_G(0, unsigned short); + TEST_SHMEM_G(0, unsigned int); + TEST_SHMEM_G(0, unsigned long); + TEST_SHMEM_G(0, unsigned long long); + TEST_SHMEM_G(0, int8_t); + TEST_SHMEM_G(0, int16_t); + TEST_SHMEM_G(0, int32_t); + TEST_SHMEM_G(0, int64_t); + TEST_SHMEM_G(0, uint8_t); + TEST_SHMEM_G(0, uint16_t); + TEST_SHMEM_G(0, uint32_t); + TEST_SHMEM_G(0, uint64_t); + TEST_SHMEM_G(0, size_t); + TEST_SHMEM_G(0, ptrdiff_t); + + TEST_SHMEM_G(1, float); + TEST_SHMEM_G(1, double); + TEST_SHMEM_G(1, long double); + TEST_SHMEM_G(1, char); + TEST_SHMEM_G(1, signed char); + TEST_SHMEM_G(1, short); + TEST_SHMEM_G(1, int); + TEST_SHMEM_G(1, long); + TEST_SHMEM_G(1, long long); + TEST_SHMEM_G(1, unsigned char); + TEST_SHMEM_G(1, unsigned short); + TEST_SHMEM_G(1, unsigned int); + TEST_SHMEM_G(1, unsigned long); + TEST_SHMEM_G(1, unsigned long long); + TEST_SHMEM_G(1, int8_t); + TEST_SHMEM_G(1, int16_t); + TEST_SHMEM_G(1, int32_t); + TEST_SHMEM_G(1, int64_t); + TEST_SHMEM_G(1, uint8_t); + TEST_SHMEM_G(1, uint16_t); + TEST_SHMEM_G(1, uint32_t); + TEST_SHMEM_G(1, uint64_t); + TEST_SHMEM_G(1, size_t); + TEST_SHMEM_G(1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_get.cpp b/test/unit/cxx_test_shmem_get.cpp new file mode 100644 index 0000000..437c279 --- /dev/null +++ b/test/unit/cxx_test_shmem_get.cpp @@ -0,0 +1,239 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { GET = 0, IGET, GET_NBI }; + +#define TEST_SHMEM_GET(OP, USE_CTX, TYPE) \ + do { \ + static TYPE remote[10]; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE local[10]; \ + for (int i = 0; i < 10; i++) \ + remote[i] = (TYPE)mype; \ + shmem_barrier_all(); \ + switch (OP) { \ + case GET: \ + if (USE_CTX) \ + shmem_get(SHMEM_CTX_DEFAULT, local, remote, 10, (mype + 1) % npes); \ + else \ + shmem_get(local, remote, 10, (mype + 1) % npes); \ + break; \ + case IGET: \ + if (USE_CTX) \ + shmem_iget(SHMEM_CTX_DEFAULT, local, remote, 1, 1, 10, (mype + 1) % npes); \ + else \ + shmem_iget(local, remote, 1, 1, 10, (mype + 1) % npes); \ + break; \ + case GET_NBI: \ + if (USE_CTX) \ + shmem_get_nbi(SHMEM_CTX_DEFAULT, local, remote, 10, (mype + 1) % npes); \ + else \ + shmem_get_nbi(local, remote, 10, (mype + 1) % npes); \ + shmem_quiet(); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + for (int i = 0; i < 10; i++) \ + if (local[i] != (TYPE)((mype + 1) % npes)) { \ + printf("PE %i received incorrect value with" \ + "TEST_SHMEM_GET(%s, %d, %s)\n", mype, #OP, \ + (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_GET(GET, 0, float); + TEST_SHMEM_GET(GET, 0, double); + TEST_SHMEM_GET(GET, 0, long double); + TEST_SHMEM_GET(GET, 0, char); + TEST_SHMEM_GET(GET, 0, signed char); + TEST_SHMEM_GET(GET, 0, short); + TEST_SHMEM_GET(GET, 0, int); + TEST_SHMEM_GET(GET, 0, long); + TEST_SHMEM_GET(GET, 0, long long); + TEST_SHMEM_GET(GET, 0, unsigned char); + TEST_SHMEM_GET(GET, 0, unsigned short); + TEST_SHMEM_GET(GET, 0, unsigned int); + TEST_SHMEM_GET(GET, 0, unsigned long); + TEST_SHMEM_GET(GET, 0, unsigned long long); + TEST_SHMEM_GET(GET, 0, int8_t); + TEST_SHMEM_GET(GET, 0, int16_t); + TEST_SHMEM_GET(GET, 0, int32_t); + TEST_SHMEM_GET(GET, 0, int64_t); + TEST_SHMEM_GET(GET, 0, uint8_t); + TEST_SHMEM_GET(GET, 0, uint16_t); + TEST_SHMEM_GET(GET, 0, uint32_t); + TEST_SHMEM_GET(GET, 0, uint64_t); + TEST_SHMEM_GET(GET, 0, size_t); + TEST_SHMEM_GET(GET, 0, ptrdiff_t); + + TEST_SHMEM_GET(GET, 1, float); + TEST_SHMEM_GET(GET, 1, double); + TEST_SHMEM_GET(GET, 1, long double); + TEST_SHMEM_GET(GET, 1, char); + TEST_SHMEM_GET(GET, 1, signed char); + TEST_SHMEM_GET(GET, 1, short); + TEST_SHMEM_GET(GET, 1, int); + TEST_SHMEM_GET(GET, 1, long); + TEST_SHMEM_GET(GET, 1, long long); + TEST_SHMEM_GET(GET, 1, unsigned char); + TEST_SHMEM_GET(GET, 1, unsigned short); + TEST_SHMEM_GET(GET, 1, unsigned int); + TEST_SHMEM_GET(GET, 1, unsigned long); + TEST_SHMEM_GET(GET, 1, unsigned long long); + TEST_SHMEM_GET(GET, 1, int8_t); + TEST_SHMEM_GET(GET, 1, int16_t); + TEST_SHMEM_GET(GET, 1, int32_t); + TEST_SHMEM_GET(GET, 1, int64_t); + TEST_SHMEM_GET(GET, 1, uint8_t); + TEST_SHMEM_GET(GET, 1, uint16_t); + TEST_SHMEM_GET(GET, 1, uint32_t); + TEST_SHMEM_GET(GET, 1, uint64_t); + TEST_SHMEM_GET(GET, 1, size_t); + TEST_SHMEM_GET(GET, 1, ptrdiff_t); + + TEST_SHMEM_GET(IGET, 0, float); + TEST_SHMEM_GET(IGET, 0, double); + TEST_SHMEM_GET(IGET, 0, long double); + TEST_SHMEM_GET(IGET, 0, char); + TEST_SHMEM_GET(IGET, 0, signed char); + TEST_SHMEM_GET(IGET, 0, short); + TEST_SHMEM_GET(IGET, 0, int); + TEST_SHMEM_GET(IGET, 0, long); + TEST_SHMEM_GET(IGET, 0, long long); + TEST_SHMEM_GET(IGET, 0, unsigned char); + TEST_SHMEM_GET(IGET, 0, unsigned short); + TEST_SHMEM_GET(IGET, 0, unsigned int); + TEST_SHMEM_GET(IGET, 0, unsigned long); + TEST_SHMEM_GET(IGET, 0, unsigned long long); + TEST_SHMEM_GET(IGET, 0, int8_t); + TEST_SHMEM_GET(IGET, 0, int16_t); + TEST_SHMEM_GET(IGET, 0, int32_t); + TEST_SHMEM_GET(IGET, 0, int64_t); + TEST_SHMEM_GET(IGET, 0, uint8_t); + TEST_SHMEM_GET(IGET, 0, uint16_t); + TEST_SHMEM_GET(IGET, 0, uint32_t); + TEST_SHMEM_GET(IGET, 0, uint64_t); + TEST_SHMEM_GET(IGET, 0, size_t); + TEST_SHMEM_GET(IGET, 0, ptrdiff_t); + + TEST_SHMEM_GET(IGET, 1, float); + TEST_SHMEM_GET(IGET, 1, double); + TEST_SHMEM_GET(IGET, 1, long double); + TEST_SHMEM_GET(IGET, 1, char); + TEST_SHMEM_GET(IGET, 1, signed char); + TEST_SHMEM_GET(IGET, 1, short); + TEST_SHMEM_GET(IGET, 1, int); + TEST_SHMEM_GET(IGET, 1, long); + TEST_SHMEM_GET(IGET, 1, long long); + TEST_SHMEM_GET(IGET, 1, unsigned char); + TEST_SHMEM_GET(IGET, 1, unsigned short); + TEST_SHMEM_GET(IGET, 1, unsigned int); + TEST_SHMEM_GET(IGET, 1, unsigned long); + TEST_SHMEM_GET(IGET, 1, unsigned long long); + TEST_SHMEM_GET(IGET, 1, int8_t); + TEST_SHMEM_GET(IGET, 1, int16_t); + TEST_SHMEM_GET(IGET, 1, int32_t); + TEST_SHMEM_GET(IGET, 1, int64_t); + TEST_SHMEM_GET(IGET, 1, uint8_t); + TEST_SHMEM_GET(IGET, 1, uint16_t); + TEST_SHMEM_GET(IGET, 1, uint32_t); + TEST_SHMEM_GET(IGET, 1, uint64_t); + TEST_SHMEM_GET(IGET, 1, size_t); + TEST_SHMEM_GET(IGET, 1, ptrdiff_t); + + TEST_SHMEM_GET(GET_NBI, 0, float); + TEST_SHMEM_GET(GET_NBI, 0, double); + TEST_SHMEM_GET(GET_NBI, 0, long double); + TEST_SHMEM_GET(GET_NBI, 0, char); + TEST_SHMEM_GET(GET_NBI, 0, signed char); + TEST_SHMEM_GET(GET_NBI, 0, short); + TEST_SHMEM_GET(GET_NBI, 0, int); + TEST_SHMEM_GET(GET_NBI, 0, long); + TEST_SHMEM_GET(GET_NBI, 0, long long); + TEST_SHMEM_GET(GET_NBI, 0, unsigned char); + TEST_SHMEM_GET(GET_NBI, 0, unsigned short); + TEST_SHMEM_GET(GET_NBI, 0, unsigned int); + TEST_SHMEM_GET(GET_NBI, 0, unsigned long); + TEST_SHMEM_GET(GET_NBI, 0, unsigned long long); + TEST_SHMEM_GET(GET_NBI, 0, int8_t); + TEST_SHMEM_GET(GET_NBI, 0, int16_t); + TEST_SHMEM_GET(GET_NBI, 0, int32_t); + TEST_SHMEM_GET(GET_NBI, 0, int64_t); + TEST_SHMEM_GET(GET_NBI, 0, uint8_t); + TEST_SHMEM_GET(GET_NBI, 0, uint16_t); + TEST_SHMEM_GET(GET_NBI, 0, uint32_t); + TEST_SHMEM_GET(GET_NBI, 0, uint64_t); + TEST_SHMEM_GET(GET_NBI, 0, size_t); + TEST_SHMEM_GET(GET_NBI, 0, ptrdiff_t); + + TEST_SHMEM_GET(GET_NBI, 1, float); + TEST_SHMEM_GET(GET_NBI, 1, double); + TEST_SHMEM_GET(GET_NBI, 1, long double); + TEST_SHMEM_GET(GET_NBI, 1, char); + TEST_SHMEM_GET(GET_NBI, 1, signed char); + TEST_SHMEM_GET(GET_NBI, 1, short); + TEST_SHMEM_GET(GET_NBI, 1, int); + TEST_SHMEM_GET(GET_NBI, 1, long); + TEST_SHMEM_GET(GET_NBI, 1, long long); + TEST_SHMEM_GET(GET_NBI, 1, unsigned char); + TEST_SHMEM_GET(GET_NBI, 1, unsigned short); + TEST_SHMEM_GET(GET_NBI, 1, unsigned int); + TEST_SHMEM_GET(GET_NBI, 1, unsigned long); + TEST_SHMEM_GET(GET_NBI, 1, unsigned long long); + TEST_SHMEM_GET(GET_NBI, 1, int8_t); + TEST_SHMEM_GET(GET_NBI, 1, int16_t); + TEST_SHMEM_GET(GET_NBI, 1, int32_t); + TEST_SHMEM_GET(GET_NBI, 1, int64_t); + TEST_SHMEM_GET(GET_NBI, 1, uint8_t); + TEST_SHMEM_GET(GET_NBI, 1, uint16_t); + TEST_SHMEM_GET(GET_NBI, 1, uint32_t); + TEST_SHMEM_GET(GET_NBI, 1, uint64_t); + TEST_SHMEM_GET(GET_NBI, 1, size_t); + TEST_SHMEM_GET(GET_NBI, 1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_p.cpp b/test/unit/cxx_test_shmem_p.cpp new file mode 100644 index 0000000..9dfbd7e --- /dev/null +++ b/test/unit/cxx_test_shmem_p.cpp @@ -0,0 +1,113 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define TEST_SHMEM_P(USE_CTX, TYPE) \ + do { \ + static TYPE remote; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + if (USE_CTX) \ + shmem_p(SHMEM_CTX_DEFAULT, &remote, (TYPE)mype, (mype + 1) % npes); \ + else \ + shmem_p(&remote, (TYPE)mype, (mype + 1) % npes);\ + shmem_barrier_all(); \ + if (remote != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_P(%d, %s)\n", mype, \ + (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_P(0, float); + TEST_SHMEM_P(0, double); + TEST_SHMEM_P(0, long double); + TEST_SHMEM_P(0, char); + TEST_SHMEM_P(0, signed char); + TEST_SHMEM_P(0, short); + TEST_SHMEM_P(0, int); + TEST_SHMEM_P(0, long); + TEST_SHMEM_P(0, long long); + TEST_SHMEM_P(0, unsigned char); + TEST_SHMEM_P(0, unsigned short); + TEST_SHMEM_P(0, unsigned int); + TEST_SHMEM_P(0, unsigned long); + TEST_SHMEM_P(0, unsigned long long); + TEST_SHMEM_P(0, int8_t); + TEST_SHMEM_P(0, int16_t); + TEST_SHMEM_P(0, int32_t); + TEST_SHMEM_P(0, int64_t); + TEST_SHMEM_P(0, uint8_t); + TEST_SHMEM_P(0, uint16_t); + TEST_SHMEM_P(0, uint32_t); + TEST_SHMEM_P(0, uint64_t); + TEST_SHMEM_P(0, size_t); + TEST_SHMEM_P(0, ptrdiff_t); + + TEST_SHMEM_P(1, float); + TEST_SHMEM_P(1, double); + TEST_SHMEM_P(1, long double); + TEST_SHMEM_P(1, char); + TEST_SHMEM_P(1, signed char); + TEST_SHMEM_P(1, short); + TEST_SHMEM_P(1, int); + TEST_SHMEM_P(1, long); + TEST_SHMEM_P(1, long long); + TEST_SHMEM_P(1, unsigned char); + TEST_SHMEM_P(1, unsigned short); + TEST_SHMEM_P(1, unsigned int); + TEST_SHMEM_P(1, unsigned long); + TEST_SHMEM_P(1, unsigned long long); + TEST_SHMEM_P(1, int8_t); + TEST_SHMEM_P(1, int16_t); + TEST_SHMEM_P(1, int32_t); + TEST_SHMEM_P(1, int64_t); + TEST_SHMEM_P(1, uint8_t); + TEST_SHMEM_P(1, uint16_t); + TEST_SHMEM_P(1, uint32_t); + TEST_SHMEM_P(1, uint64_t); + TEST_SHMEM_P(1, size_t); + TEST_SHMEM_P(1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_put.cpp b/test/unit/cxx_test_shmem_put.cpp new file mode 100644 index 0000000..196df8d --- /dev/null +++ b/test/unit/cxx_test_shmem_put.cpp @@ -0,0 +1,239 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +enum op { PUT = 0, IPUT, PUT_NBI }; + +#define TEST_SHMEM_PUT(OP, USE_CTX, TYPE) \ + do { \ + static TYPE remote[10]; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + TYPE local[10]; \ + for (int i = 0; i < 10; i++) \ + local[i] = (TYPE)mype; \ + switch (OP) { \ + case PUT: \ + if (USE_CTX) \ + shmem_put(SHMEM_CTX_DEFAULT, remote, local, 10, (mype + 1) % npes); \ + else \ + shmem_put(remote, local, 10, (mype + 1) % npes); \ + break; \ + case IPUT: \ + if (USE_CTX) \ + shmem_iput(SHMEM_CTX_DEFAULT, remote, local, 1, 1, 10, (mype + 1) % npes); \ + else \ + shmem_iput(remote, local, 1, 1, 10, (mype + 1) % npes); \ + break; \ + case PUT_NBI: \ + if (USE_CTX) \ + shmem_put_nbi(SHMEM_CTX_DEFAULT, remote, local, 10, (mype + 1) % npes); \ + else \ + shmem_put_nbi(remote, local, 10, (mype + 1) % npes); \ + shmem_quiet(); \ + break; \ + default: \ + printf("Invalid operation (%d)\n", OP); \ + shmem_global_exit(1); \ + } \ + shmem_barrier_all(); \ + for (int i = 0; i < 10; i++) \ + if (remote[i] != (TYPE)((mype + npes - 1) % npes)) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_PUT(%s, %d, %s)\n", mype, \ + #OP, (int)(USE_CTX), #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_PUT(PUT, 0, float); + TEST_SHMEM_PUT(PUT, 0, double); + TEST_SHMEM_PUT(PUT, 0, long double); + TEST_SHMEM_PUT(PUT, 0, char); + TEST_SHMEM_PUT(PUT, 0, signed char); + TEST_SHMEM_PUT(PUT, 0, short); + TEST_SHMEM_PUT(PUT, 0, int); + TEST_SHMEM_PUT(PUT, 0, long); + TEST_SHMEM_PUT(PUT, 0, long long); + TEST_SHMEM_PUT(PUT, 0, unsigned char); + TEST_SHMEM_PUT(PUT, 0, unsigned short); + TEST_SHMEM_PUT(PUT, 0, unsigned int); + TEST_SHMEM_PUT(PUT, 0, unsigned long); + TEST_SHMEM_PUT(PUT, 0, unsigned long long); + TEST_SHMEM_PUT(PUT, 0, int8_t); + TEST_SHMEM_PUT(PUT, 0, int16_t); + TEST_SHMEM_PUT(PUT, 0, int32_t); + TEST_SHMEM_PUT(PUT, 0, int64_t); + TEST_SHMEM_PUT(PUT, 0, uint8_t); + TEST_SHMEM_PUT(PUT, 0, uint16_t); + TEST_SHMEM_PUT(PUT, 0, uint32_t); + TEST_SHMEM_PUT(PUT, 0, uint64_t); + TEST_SHMEM_PUT(PUT, 0, size_t); + TEST_SHMEM_PUT(PUT, 0, ptrdiff_t); + + TEST_SHMEM_PUT(PUT, 1, float); + TEST_SHMEM_PUT(PUT, 1, double); + TEST_SHMEM_PUT(PUT, 1, long double); + TEST_SHMEM_PUT(PUT, 1, char); + TEST_SHMEM_PUT(PUT, 1, signed char); + TEST_SHMEM_PUT(PUT, 1, short); + TEST_SHMEM_PUT(PUT, 1, int); + TEST_SHMEM_PUT(PUT, 1, long); + TEST_SHMEM_PUT(PUT, 1, long long); + TEST_SHMEM_PUT(PUT, 1, unsigned char); + TEST_SHMEM_PUT(PUT, 1, unsigned short); + TEST_SHMEM_PUT(PUT, 1, unsigned int); + TEST_SHMEM_PUT(PUT, 1, unsigned long); + TEST_SHMEM_PUT(PUT, 1, unsigned long long); + TEST_SHMEM_PUT(PUT, 1, int8_t); + TEST_SHMEM_PUT(PUT, 1, int16_t); + TEST_SHMEM_PUT(PUT, 1, int32_t); + TEST_SHMEM_PUT(PUT, 1, int64_t); + TEST_SHMEM_PUT(PUT, 1, uint8_t); + TEST_SHMEM_PUT(PUT, 1, uint16_t); + TEST_SHMEM_PUT(PUT, 1, uint32_t); + TEST_SHMEM_PUT(PUT, 1, uint64_t); + TEST_SHMEM_PUT(PUT, 1, size_t); + TEST_SHMEM_PUT(PUT, 1, ptrdiff_t); + + TEST_SHMEM_PUT(IPUT, 0, float); + TEST_SHMEM_PUT(IPUT, 0, double); + TEST_SHMEM_PUT(IPUT, 0, long double); + TEST_SHMEM_PUT(IPUT, 0, char); + TEST_SHMEM_PUT(IPUT, 0, signed char); + TEST_SHMEM_PUT(IPUT, 0, short); + TEST_SHMEM_PUT(IPUT, 0, int); + TEST_SHMEM_PUT(IPUT, 0, long); + TEST_SHMEM_PUT(IPUT, 0, long long); + TEST_SHMEM_PUT(IPUT, 0, unsigned char); + TEST_SHMEM_PUT(IPUT, 0, unsigned short); + TEST_SHMEM_PUT(IPUT, 0, unsigned int); + TEST_SHMEM_PUT(IPUT, 0, unsigned long); + TEST_SHMEM_PUT(IPUT, 0, unsigned long long); + TEST_SHMEM_PUT(IPUT, 0, int8_t); + TEST_SHMEM_PUT(IPUT, 0, int16_t); + TEST_SHMEM_PUT(IPUT, 0, int32_t); + TEST_SHMEM_PUT(IPUT, 0, int64_t); + TEST_SHMEM_PUT(IPUT, 0, uint8_t); + TEST_SHMEM_PUT(IPUT, 0, uint16_t); + TEST_SHMEM_PUT(IPUT, 0, uint32_t); + TEST_SHMEM_PUT(IPUT, 0, uint64_t); + TEST_SHMEM_PUT(IPUT, 0, size_t); + TEST_SHMEM_PUT(IPUT, 0, ptrdiff_t); + + TEST_SHMEM_PUT(IPUT, 1, float); + TEST_SHMEM_PUT(IPUT, 1, double); + TEST_SHMEM_PUT(IPUT, 1, long double); + TEST_SHMEM_PUT(IPUT, 1, char); + TEST_SHMEM_PUT(IPUT, 1, signed char); + TEST_SHMEM_PUT(IPUT, 1, short); + TEST_SHMEM_PUT(IPUT, 1, int); + TEST_SHMEM_PUT(IPUT, 1, long); + TEST_SHMEM_PUT(IPUT, 1, long long); + TEST_SHMEM_PUT(IPUT, 1, unsigned char); + TEST_SHMEM_PUT(IPUT, 1, unsigned short); + TEST_SHMEM_PUT(IPUT, 1, unsigned int); + TEST_SHMEM_PUT(IPUT, 1, unsigned long); + TEST_SHMEM_PUT(IPUT, 1, unsigned long long); + TEST_SHMEM_PUT(IPUT, 1, int8_t); + TEST_SHMEM_PUT(IPUT, 1, int16_t); + TEST_SHMEM_PUT(IPUT, 1, int32_t); + TEST_SHMEM_PUT(IPUT, 1, int64_t); + TEST_SHMEM_PUT(IPUT, 1, uint8_t); + TEST_SHMEM_PUT(IPUT, 1, uint16_t); + TEST_SHMEM_PUT(IPUT, 1, uint32_t); + TEST_SHMEM_PUT(IPUT, 1, uint64_t); + TEST_SHMEM_PUT(IPUT, 1, size_t); + TEST_SHMEM_PUT(IPUT, 1, ptrdiff_t); + + TEST_SHMEM_PUT(PUT_NBI, 0, float); + TEST_SHMEM_PUT(PUT_NBI, 0, double); + TEST_SHMEM_PUT(PUT_NBI, 0, long double); + TEST_SHMEM_PUT(PUT_NBI, 0, char); + TEST_SHMEM_PUT(PUT_NBI, 0, signed char); + TEST_SHMEM_PUT(PUT_NBI, 0, short); + TEST_SHMEM_PUT(PUT_NBI, 0, int); + TEST_SHMEM_PUT(PUT_NBI, 0, long); + TEST_SHMEM_PUT(PUT_NBI, 0, long long); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned char); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned short); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned int); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned long); + TEST_SHMEM_PUT(PUT_NBI, 0, unsigned long long); + TEST_SHMEM_PUT(PUT_NBI, 0, int8_t); + TEST_SHMEM_PUT(PUT_NBI, 0, int16_t); + TEST_SHMEM_PUT(PUT_NBI, 0, int32_t); + TEST_SHMEM_PUT(PUT_NBI, 0, int64_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint8_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint16_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint32_t); + TEST_SHMEM_PUT(PUT_NBI, 0, uint64_t); + TEST_SHMEM_PUT(PUT_NBI, 0, size_t); + TEST_SHMEM_PUT(PUT_NBI, 0, ptrdiff_t); + + TEST_SHMEM_PUT(PUT_NBI, 1, float); + TEST_SHMEM_PUT(PUT_NBI, 1, double); + TEST_SHMEM_PUT(PUT_NBI, 1, long double); + TEST_SHMEM_PUT(PUT_NBI, 1, char); + TEST_SHMEM_PUT(PUT_NBI, 1, signed char); + TEST_SHMEM_PUT(PUT_NBI, 1, short); + TEST_SHMEM_PUT(PUT_NBI, 1, int); + TEST_SHMEM_PUT(PUT_NBI, 1, long); + TEST_SHMEM_PUT(PUT_NBI, 1, long long); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned char); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned short); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned int); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned long); + TEST_SHMEM_PUT(PUT_NBI, 1, unsigned long long); + TEST_SHMEM_PUT(PUT_NBI, 1, int8_t); + TEST_SHMEM_PUT(PUT_NBI, 1, int16_t); + TEST_SHMEM_PUT(PUT_NBI, 1, int32_t); + TEST_SHMEM_PUT(PUT_NBI, 1, int64_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint8_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint16_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint32_t); + TEST_SHMEM_PUT(PUT_NBI, 1, uint64_t); + TEST_SHMEM_PUT(PUT_NBI, 1, size_t); + TEST_SHMEM_PUT(PUT_NBI, 1, ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_test.cpp b/test/unit/cxx_test_shmem_test.cpp new file mode 100644 index 0000000..8c90199 --- /dev/null +++ b/test/unit/cxx_test_shmem_test.cpp @@ -0,0 +1,74 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define TEST_SHMEM_TEST(TYPE) \ + do { \ + static TYPE remote = 0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + shmem_p(&remote, (TYPE)mype+1, (mype + 1) % npes); \ + while (!shmem_test(&remote, SHMEM_CMP_NE, 0)) ; \ + if (remote != (TYPE)((mype + npes - 1) % npes)+1) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_TEST(%s)\n", mype, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_TEST(short); + TEST_SHMEM_TEST(int); + TEST_SHMEM_TEST(long); + TEST_SHMEM_TEST(long long); + TEST_SHMEM_TEST(unsigned short); + TEST_SHMEM_TEST(unsigned int); + TEST_SHMEM_TEST(unsigned long); + TEST_SHMEM_TEST(unsigned long long); + TEST_SHMEM_TEST(int32_t); + TEST_SHMEM_TEST(int64_t); + TEST_SHMEM_TEST(uint32_t); + TEST_SHMEM_TEST(uint64_t); + TEST_SHMEM_TEST(size_t); + TEST_SHMEM_TEST(ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/cxx_test_shmem_wait_until.cpp b/test/unit/cxx_test_shmem_wait_until.cpp new file mode 100644 index 0000000..92315a6 --- /dev/null +++ b/test/unit/cxx_test_shmem_wait_until.cpp @@ -0,0 +1,74 @@ +/* + * This test program is derived from a unit test created by Nick Park. + * The original unit test is a work of the U.S. Government and is not subject + * to copyright protection in the United States. Foreign copyrights may + * apply. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define TEST_SHMEM_WAIT_UNTIL(TYPE) \ + do { \ + static TYPE remote = 0; \ + const int mype = shmem_my_pe(); \ + const int npes = shmem_n_pes(); \ + shmem_p(&remote, (TYPE)mype+1, (mype + 1) % npes); \ + shmem_wait_until(&remote, SHMEM_CMP_NE, 0); \ + if (remote != (TYPE)((mype + npes - 1) % npes)+1) { \ + printf("PE %i received incorrect value with " \ + "TEST_SHMEM_WAIT_UNTIL(%s)\n", mype, #TYPE); \ + rc = EXIT_FAILURE; \ + } \ + } while (false) + +int main(int argc, char* argv[]) { + shmem_init(); + + int rc = EXIT_SUCCESS; + TEST_SHMEM_WAIT_UNTIL(short); + TEST_SHMEM_WAIT_UNTIL(int); + TEST_SHMEM_WAIT_UNTIL(long); + TEST_SHMEM_WAIT_UNTIL(long long); + TEST_SHMEM_WAIT_UNTIL(unsigned short); + TEST_SHMEM_WAIT_UNTIL(unsigned int); + TEST_SHMEM_WAIT_UNTIL(unsigned long); + TEST_SHMEM_WAIT_UNTIL(unsigned long long); + TEST_SHMEM_WAIT_UNTIL(int32_t); + TEST_SHMEM_WAIT_UNTIL(int64_t); + TEST_SHMEM_WAIT_UNTIL(uint32_t); + TEST_SHMEM_WAIT_UNTIL(uint64_t); + TEST_SHMEM_WAIT_UNTIL(size_t); + TEST_SHMEM_WAIT_UNTIL(ptrdiff_t); + + shmem_finalize(); + return rc; +} diff --git a/test/unit/fcollect64.c b/test/unit/fcollect64.c new file mode 100644 index 0000000..a82aaf7 --- /dev/null +++ b/test/unit/fcollect64.c @@ -0,0 +1,225 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * shmem_collect test cat {-v -l loops} {nLongs(12)} {loopIncr(0)} + * + * For yod/mpirun -c/np X + */ +#include + +#include +#include +#include +#include +#include + +#define AMEG (1024UL*1024UL) +#define AGIG (1024UL*1024UL*1024UL) + +#ifndef TRUE +#define TRUE (1) +#define FALSE (0) +#endif + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf +#define RVfprintf if (Verbose && shmem_my_pe() == 0) fprintf +#define RVprintf if (Verbose && shmem_my_pe() == 0) printf +#define Vprintf if (Verbose) printf +#define Vfprintf if (Verbose) fprintf + +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf + +/* option flags */ +int Verbose; + +#define DFLT_NWORDS 16 +#define DFLT_INCR 32 +#define DFLT_LOOPS 20 + +#define VAL 0xCafeBabe + +long *dst; +long *src; +long pSync[SHMEM_COLLECT_SYNC_SIZE]; + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int) val; +} + +int +main(int argc, char* argv[]) +{ + int c, j, cloop, loops = DFLT_LOOPS; + int mpe, num_pes; + int nWords=1; + int nIncr=1; + int failures=0; + char *pgm; + + shmem_init(); + mpe = shmem_my_pe(); + num_pes = shmem_n_pes(); + + if (num_pes == 1) { + Rfprintf(stderr, + "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + pgm = strrchr(argv[0],'/'); + if ( pgm ) + pgm++; + else + pgm = argv[0]; + + while((c=getopt(argc,argv,"hqVvl:")) != -1) { + switch(c) { + case 'V': + case 'v': + Verbose++; + break; + case 'l': + loops = atoi(optarg); + break; + case 'h': + Rfprintf(stderr, + "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n", + pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR); + shmem_finalize(); + return 1; + default: + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else { + nWords = atoi_scaled(argv[optind++]); + if (nWords <= 0) { + Rfprintf(stderr, "ERR - Bad nBytes arg?\n"); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + nIncr = DFLT_INCR; + else { + loops = atoi(argv[optind++]); + if (nIncr <= 0 ) { + Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr); + shmem_finalize(); + return 1; + } + } + + if ( nWords % 8 ) { // integral multiple of longs + Rprintf("%s: nWords(%d) not a multiple of %ld?\n", + pgm,nWords,sizeof(long)); + shmem_finalize(); + return 1; + } + + for (c = 0; c < SHMEM_COLLECT_SYNC_SIZE;c++) + pSync[c] = SHMEM_SYNC_VALUE; + + if (Verbose && mpe == 0) + fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n", + loops,nWords,nIncr); + + for(cloop=1; cloop <= loops; cloop++) { + + c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation. + //nWords /= sizeof(long); // convert input of bytes --> longs. + + src = (long*)shmem_malloc(c); + if ( !src ) { + Rprintf("[%d] %s: shmem_malloc(%d) failed?\n", mpe, pgm,c); + shmem_global_exit(1); + } + dst = &src[nWords]; + + for(j=0; j < nWords; j++) + src[j] = (long) (j + mpe*nWords); + + shmem_barrier_all(); + + shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync); + + // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1 + for(j=0; j < (nWords*num_pes); j++) { + if ( dst[j] != (long) j ) { + fprintf(stderr, + "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + if (Verbose && mpe == 0 && loops > 1) { + fprintf(stderr,"."); + } + nWords += nIncr; + } + + if (Verbose && mpe == 0) { + fprintf(stderr,"\n");fflush(stderr); + } + shmem_free( (void*)src ); + shmem_barrier_all(); + if (Verbose) + printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures); + + shmem_finalize(); + + return failures; +} diff --git a/test/unit/get1.c b/test/unit/get1.c new file mode 100644 index 0000000..34d2857 --- /dev/null +++ b/test/unit/get1.c @@ -0,0 +1,75 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include +#include + +static long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static long target[10]; + +int +main(int argc, char* argv[]) +{ + int i; + + shmem_init(); + + if (shmem_n_pes() == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + if (shmem_my_pe() == 0) { + memset(target, 0, sizeof(target)); + /* put 10 elements into target on PE 1 */ + shmem_long_get(target, source, 10, 1); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (shmem_my_pe() == 0) { + if (0 != memcmp(source, target, sizeof(long) * 10)) { + fprintf(stderr,"[%d] Src & Target mismatch?\n",shmem_my_pe()); + for (i = 0 ; i < 10 ; ++i) { + printf("%ld,%ld ", source[i], target[i]); + } + printf("\n"); + shmem_global_exit(1); + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/get_g.c b/test/unit/get_g.c new file mode 100644 index 0000000..986d48b --- /dev/null +++ b/test/unit/get_g.c @@ -0,0 +1,169 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include +#include +#include + +static short *src_short; +static int *src_int; +static float *src_float; +static double *src_double; +static long *src_long; + +static short dst_short; +static int dst_int; +static float dst_float; +static double dst_double; +static long dst_long; + +static int loops = 100; + +int +main(int argc, char* argv[]) +{ + int me, num_pes, pe, l; + int Verbose = 0; + char *pgm; + + if ((pgm=strrchr(argv[0],'/'))) { + pgm++; + } else { + pgm = argv[0]; + } + + if (argc > 1) { + if (strncmp(argv[1],"-v",3) == 0) { + Verbose=1; + } else if (strncmp(argv[1],"-h",3) == 0) { + fprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); + shmem_finalize(); + exit(1); + } + } + + shmem_init(); + me = shmem_my_pe(); + num_pes = shmem_n_pes(); + + // be a bit sane with total number of gets issued + loops = loops / num_pes; + if (loops < 5) loops = 5; + + for (l = 0 ; l < loops ; ++l) { + if ((src_short = shmem_malloc(sizeof(short))) == NULL) { + printf("PE-%d short shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_short = 2; + + if ((src_int = shmem_malloc(sizeof(int))) == NULL) { + printf("PE-%d int shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_int = 4; + + if ((src_float = shmem_malloc(sizeof(float))) == NULL) { + printf("PE-%d float shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_float = 4.0; + + if ((src_double = shmem_malloc(sizeof(double))) == NULL) { + printf("PE-%d double shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_double = 8.0; + + if ((src_long = shmem_malloc(sizeof(long))) == NULL) { + printf("PE-%d long shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + *src_long = 8; + + shmem_barrier_all(); + + for (pe=0 ; pe < num_pes; ++pe) { + if (!shmem_addr_accessible(src_short,pe)) { + printf("PE-%d local addr %p not accessible from PE-%d?\n", + me, (void*)src_short, pe); + shmem_global_exit(1); + } + dst_short = 0; + dst_short = shmem_short_g(src_short,pe); + if (dst_short != 2) { + printf("PE-%d dst_short %hd != 2?\n",me,dst_short); + shmem_global_exit(1); + } + dst_int = 0; + dst_int = shmem_int_g(src_int,pe); + if (dst_int != 4) { + printf("PE-%d dst_int %d != 4?\n",me,dst_int); + shmem_global_exit(1); + } + dst_float = 0.0; + dst_float = shmem_float_g(src_float,pe); + if (dst_float != 4.0) { + printf("PE-%d dst_float %f != 4.0?\n",me,dst_float); + shmem_global_exit(1); + } + dst_double = 0.0; + dst_double = shmem_double_g(src_double,pe); + if (dst_double != 8.0) { + printf("PE-%d dst_double %f != 8.0?\n",me,dst_double); + shmem_global_exit(1); + } + dst_long = 0; + dst_long = shmem_long_g(src_long,pe); + if (dst_long != 8.0) { + printf("PE-%d dst_long %ld != 8?\n",me,dst_long); + shmem_global_exit(1); + } + } + + shmem_barrier_all(); + + shmem_free(src_short); + shmem_free(src_int); + shmem_free(src_float); + shmem_free(src_double); + shmem_free(src_long); + } + + if (Verbose) + fprintf(stderr,"[%d] exit\n",shmem_my_pe()); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/get_nbi.c b/test/unit/get_nbi.c new file mode 100644 index 0000000..ee85023 --- /dev/null +++ b/test/unit/get_nbi.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Non-Blocking Get Test + * Tom St. John + * January, 2016 + * + * PE 0 uses a non-blocking get to copy an array from + * every remote PE. + */ + +#include + +#include +#include + +static long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static long target[10]; + +int +main(int argc, char* argv[]) +{ + int i, j, num_pes; + int failed = 0; + + shmem_init(); + + if (shmem_my_pe() == 0) { + num_pes=shmem_n_pes(); + + for(j = 0; j < num_pes; j++) { + memset(target, 0, sizeof(long) * 10); + shmem_long_get_nbi(target, source, 10, j); + shmem_quiet(); + + for (i = 0; i < 10; i++) { + if (source[i] != target[i]) { + fprintf(stderr,"[%d] get_nbi from PE %d: target[%d] = %ld, expected %ld\n", + shmem_my_pe(), j, i, target[i], source[i]); + failed = 1; + } + } + + if (failed) + shmem_global_exit(1); + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/gettid_register.c b/test/unit/gettid_register.c new file mode 100644 index 0000000..2350f83 --- /dev/null +++ b/test/unit/gettid_register.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Gettid Register Test: Register a custom gettid function pointer */ + +#include +#include +#include +#include +#include +#include + +#define T 8 + +int dest; + +int me, npes; +int errors = 0; + +pthread_key_t key; + +uint64_t my_gettid(void) { + uint64_t tid_val = 0; + + tid_val = * (uint64_t*) pthread_getspecific(key); + + return tid_val; +} + + +static void * thread_main(void *arg) { + int i; + + int ret = pthread_setspecific(key, arg); + assert(0 == ret); + + shmem_ctx_t ctx; + ret = shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + if (ret != 0) { + printf("Error creating context (%d)\n", ret); + shmem_global_exit(2); + } + + for (i = 1; i <= npes; i++) + shmem_ctx_int_atomic_add(ctx, &dest, *(uint64_t *)arg, (me + i) % npes); + + shmem_quiet(); + + shmem_ctx_destroy(ctx); + + return NULL; +} + + +int main(int argc, char **argv) { + int tl, i, ret; + pthread_t threads[T]; + uint64_t t_arg[T]; + + shmemx_register_gettid( &my_gettid ); + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + if (sizeof(pthread_t) > sizeof(uint64_t)) { + printf("Cannot run this test, size of pthread_t is larger than 64 bits\n"); + shmem_finalize(); + return 0; + } + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + + if (me == 0) printf("Starting multithreaded test on %d PEs, %d threads/PE\n", npes, T); + + ret = pthread_key_create(&key, NULL); + assert(0 == ret); + + for (i = 0; i < T; i++) { + int err; + t_arg[i] = i; + err = pthread_create(&threads[i], NULL, thread_main, (void*) &t_arg[i]); + assert(0 == err); + } + + for (i = 0; i < T; i++) { + int err; + err = pthread_join(threads[i], NULL); + assert(0 == err); + } + + shmem_sync_all(); + + if (dest != ((T-1)*T/2)*npes) { + printf("%d: dest = %d, expected %d\n", me, dest, ((T-1)*T/2)*npes); + errors++; + } + + shmem_finalize(); + return (errors == 0) ? 0 : 1; +} diff --git a/test/unit/global_exit.c b/test/unit/global_exit.c new file mode 100644 index 0000000..245e139 --- /dev/null +++ b/test/unit/global_exit.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + shmem_init(); + + if (shmem_my_pe() == 0) { + shmem_global_exit(0); + abort(); + } + + /* All other PEs wait in this barrier */ + shmem_barrier_all(); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/hello.c b/test/unit/hello.c new file mode 100644 index 0000000..cfe65d2 --- /dev/null +++ b/test/unit/hello.c @@ -0,0 +1,61 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +int +main(int argc, char* argv[], char *envp[]) +{ + int me, myshmem_n_pes; + /* + ** Starts/Initializes SHMEM/OpenSHMEM + */ + shmem_init(); + /* + ** Fetch the number or processes + ** Some implementations use num_pes(); + */ + myshmem_n_pes = shmem_n_pes(); + /* + ** Assign my process ID to me + */ + me = shmem_my_pe(); + + if (NULL == getenv("MAKELEVEL")) { + printf("Hello World from %d of %d\n",me,myshmem_n_pes); + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/hello_f.f90 b/test/unit/hello_f.f90 new file mode 100644 index 0000000..80166fe --- /dev/null +++ b/test/unit/hello_f.f90 @@ -0,0 +1,43 @@ +! +! Copyright 2011 Sandia Corporation. Under the terms of Contract +! DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +! retains certain rights in this software. +! +! Copyright (c) 2017 Intel Corporation. All rights reserved. +! This software is available to you under the BSD license below: +! +! Redistribution and use in source and binary forms, with or +! without modification, are permitted provided that the following +! conditions are met: +! +! - Redistributions of source code must retain the above +! copyright notice, this list of conditions and the following +! disclaimer. +! +! - Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials +! provided with the distribution. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +! NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +! BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +! ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. +! + +program hello + include 'shmem.fh' + + integer npes, me + + call start_pes(0) + npes = num_pes() + me = my_pe() + + print *,'I am ', me, ' of ', npes + +end program hello diff --git a/test/unit/ipgm.c b/test/unit/ipgm.c new file mode 100644 index 0000000..5e98ff1 --- /dev/null +++ b/test/unit/ipgm.c @@ -0,0 +1,373 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * ipgm / shmem iput/iget multiple + * usage: ipgm [-rmhd] [nWords] [loops] [incWords-per-loop] + * where: + * -r == use shmem_realloc() per loop + * -n == use shmem_malloc() + shmem_free() per loop + * -h == help text. + * -d/v == enable debug. + * + * start-loop + * shmem_malloc() or shmem_realloc() src and target arrays. + * PE0 puts nWords to PE1 ... PE(num_pes-1) + * PE* validates received data + * PE* adds shmem_my_pe() to each received data element. + * PE0 pulls/iget()s data from PE* into loop/PE specific results range. + * PE0 validates received for each PE. + * if shmem_malloc() then shmem_free() + * nWords += incWords + * end-loop + */ + +#include +#include +#include +#include +#include + +#include + +#define DFLT_NWORDS 16 +#define DFLT_LOOPS 16 +#define DFLT_INCR 16 + +#define DataType long +#define IPUT shmem_long_iput +#define IGET shmem_long_iget + +static DataType *source; +static DataType *target; +static DataType *results; + +static char *pgm; +static int Debug; /* ==1 std dbg, 2==more, 3==max */ + +#define Dprintf if (Debug && me==0) printf +#define Zprintf if (me==0) printf + +void usage (void); +int getSize (char *); +int target_data_good(DataType *, int, int, int); + +void +usage (void) +{ + if (shmem_my_pe() == 0) { + fprintf (stderr, + "Usage: %s {-rmhv} [nWords(%d)] [loops(%d)] [incWords(%d)]\n", + pgm,DFLT_NWORDS,DFLT_LOOPS,DFLT_INCR); + fprintf (stderr, + " where:\n" + " -r == use shmem_realloc() instead of shmem_malloc()\n" + " -m == use shmem_malloc() instead of shmem_realloc()\n" + " -v == Verbose output\n" + " -h == help.\n"); + } + shmem_global_exit(1); +} + +int +getSize (char *str) +{ + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[mMkK]", &size, mod)) + { + case 1: + return (size); + + case 2: + switch (*mod) + { + case 'm': + case 'M': + return (size << 20); + + case 'k': + case 'K': + return (size << 10); + + default: + return (size); + } + + default: + return (-1); + } +} + +int +target_data_good( DataType *data, int elements, int id, int lineno ) +{ + int j, k, rc=0; + + for(j=0,k=1; j < elements; j++,k+=2) { + if ( data[j] != ((DataType)k + id) ) { + printf("PE[%d] ERR @ line#%d data[%d] wanted %d != %ld\n", + shmem_my_pe(), lineno, j, k+id, data[j] ); + + for(rc=0,k=1; rc < elements; rc++,k++) { + printf("%ld ",data[rc]); + } + rc = 1; + break; + } + else + if (Debug > 2) printf("%ld ",data[j]); + } + if (Debug > 2 || rc) + printf("\n"); + fflush(stdout); + return rc; +} + +int +main(int argc, char **argv) +{ + int me, nProcs, workers, rc=0, l, j, c, Malloc=1; + int nWords, nWords_start, prev_sz=0, ridx, loops, incWords; + + pgm = strrchr(argv[0],'/'); + if ( pgm ) + pgm++; + else + pgm = argv[0]; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + workers = nProcs - 1; + + if (nProcs <= 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + while ((c = getopt (argc, argv, "hmrvdD")) != -1) + switch (c) + { + case 'm': + Malloc=1; + break; + case 'r': + Malloc=0; + break; + case 'v': + case 'd': + Debug++; + break; + case 'D': + Debug=2; + break; + case 'h': + default: + usage(); + break; + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else if ((nWords = getSize (argv[optind++])) <= 0) + usage (); + + if (nWords % 2 != 0) { + if (me == 0) + fprintf(stderr," nWords(%d) not even?\n",nWords); + shmem_finalize(); + exit(1); + } + nWords_start = nWords; + + if (optind == argc) + loops = DFLT_LOOPS; + else if ((loops = getSize (argv[optind++])) < 0) + usage (); + + if (optind == argc) + incWords = DFLT_INCR; + else if ((incWords = getSize (argv[optind++])) < 0) + usage (); + + if (loops > 1 ) { + if (incWords > 0 && ((incWords % 2) != 0)) { + if (me == 0) + fprintf(stderr," incWords(%d) not even?\n",incWords); + shmem_finalize(); + exit(1); + } + } + + if (Debug && me == 0) + printf("%s: workers(%d) nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n", + pgm, workers, nWords, loops, incWords); + + for(l=0,ridx=0; l < loops; l++) { + // reserve space for worker PEs to deposit data + rc = ((workers * nWords) * sizeof(DataType)) + prev_sz; + if (Debug > 2) + printf("alloc: results[%ld]\n",(rc/sizeof(DataType))); + results = (DataType *)shmem_realloc(results,rc); + if (!results) + { + perror ("Failed results memory allocation"); + shmem_global_exit(1); + } + prev_sz = rc; + + if (me==0 && Debug > 2) { + int idx = ridx; + printf("alloc: results[%ld] ridx %d psz %ld\n", + (rc/sizeof(DataType)),ridx,(prev_sz/sizeof(DataType))); + for(j=1; j < nProcs; j++) { + printf(" PE[%d] results[%d...%d]\n", + j,idx,(idx+(nWords-1))); + idx += nWords; + } + } + + rc = (2 * nWords) * sizeof(DataType); + if (Debug > 2) + printf("source %ld words\n",rc/sizeof(DataType)); + if (Malloc) + source = (DataType *)shmem_malloc(rc); + else + source = (DataType *)shmem_realloc(source,rc); + + if (! source) { + perror ("Failed source memory allocation"); + shmem_global_exit(1); + } + if (Debug > 3) + printf("shmem_malloc() source %p (%d bytes)\n",(void*)source,rc); + + /* init source data */ + for(j=0; j < nWords*2; j++) + source[j] = j+1; + + rc = nWords * sizeof(DataType); + if (Debug > 2) + printf("target %ld words\n",rc/sizeof(DataType)); + if (Malloc) + target = (DataType *)shmem_malloc(rc); + else + target = (DataType *)shmem_realloc(target,rc); + + if ( ! target ) { + perror ("Failed target memory allocation"); + shmem_global_exit(1); + } + memset(target, 0, rc); + if (Debug > 3) + printf("shmem_malloc() target %p (%d bytes)\n",(void*)target,rc); + + shmem_barrier_all(); + + if (me == 0) { + /* put nWords of DataType into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT(target, source, 1, 2, nWords, j); + } + + shmem_barrier_all(); + + if (me != 0) { + // Verify iput target data + rc = target_data_good(target, nWords, 0, __LINE__); + if (rc) + shmem_global_exit(1); + // add my PE to target data so PE0 will know I have seen the data. + for(j=0; j < nWords; j++) + target[j] += me; + } + + shmem_barrier_all(); + + if (me == 0) { + // Pull data from PE1...(nProcs-1) + // iget() target[0...nWords] --> + // results[0 ... nWords] == loop-0 PE1 results + // results[loop*PE1*nWords ... PE*nWords] == loop-0 PE1 results + + for(j=1; j < nProcs; j++) { + if (Debug > 1) + printf("PE[0] iget(%d words PE[%d]) results[%d...%d]\n", + nWords,j,ridx,(ridx+(nWords-1))); + IGET(&results[ridx], target, 1, 1, nWords, j); + rc = target_data_good( &results[ridx], nWords, j, __LINE__); + if (rc) + shmem_global_exit(1); + ridx += nWords; + } + } + + shmem_barrier_all(); + + if (Malloc) { + shmem_free(source); + shmem_free(target); + } + + nWords += incWords; + + if (Debug && me == 0 && loops > 1) + printf("End loop %d: nWords %d\n", (l+1), nWords); + + shmem_barrier_all(); + } + + if (me == 0) { + nWords = nWords_start; + for(l=0,ridx=0; l < loops; l++) { + for(j=1; j < nProcs; j++) { + if (Debug > 1) + printf(" Validate: loop %d PE-%d ridx %d nWords %d\n", + l,j,ridx,nWords); + rc = target_data_good( &results[ridx], nWords, j, __LINE__); + if (rc) + shmem_global_exit(1); + ridx += nWords; + } + nWords += incWords; + } + } + + if (Debug) + printf("PE-%d exit(0)\n",me); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/iput-iget.c b/test/unit/iput-iget.c new file mode 100644 index 0000000..047b3d5 --- /dev/null +++ b/test/unit/iput-iget.c @@ -0,0 +1,127 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + * Enhanced to not limit PE count. + */ + +#include +#include +#include + +#include + +#define WRDS 5 + +short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static short target[10]; +static short *results; + +int +main(int argc, char **argv) +{ + int me, nProcs, rc=0, j, k; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + /* put words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + shmem_short_iput(target, source, 1, 2, WRDS, j); + } + + results = (short*)shmem_malloc(nProcs * WRDS * sizeof(short)); + assert(results); + memset((void*)results, 0, (nProcs * WRDS * sizeof(short))); + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + // Verify iput data is expected + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("%s ERR: target on PE %d is %hd %hd %hd %hd %hd\n" + " Expected 1,3,5,7,9?\n", argv[0], + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_barrier_all(); + + if (rc != 0) + shmem_global_exit(rc); + + if (me == 0) { + // Pull data from PE1...(nProcs-1) + // iget() nProcs target[0...4] --> results[Proc*WRDS ...Proc*WRDS+WRDS] + + for(j=1,k=0; j < nProcs; j++,k+=WRDS) + shmem_short_iget(&results[k], target, 1, 1, WRDS, j); + + for(j=1,k=0; j < nProcs; j++,k+=WRDS) { + if (results[k] != 1 || + results[k+1] != 3 || + results[k+2] != 5 || + results[k+3] != 7 || + results[k+4] != 9) + { + printf("%s [%d] ERR: results %hd %hd %hd %hd %hd\n" + " Expected 1,3,5,7,9?\n", argv[0], + me, results[k], results[k+1], results[k+2], + results[k+3], results[k+4] ); + rc = 1; + } + } + } +#if _DEBUG + else { + printf("%s [%d] target is %hd %hd %hd %hd %hd\n", argv[0], + me, target[0], target[1], target[2], + target[3], target[4] ); + } +#endif + + shmem_barrier_all(); /* sync before exiting */ + shmem_free(results); + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput128.c b/test/unit/iput128.c new file mode 100644 index 0000000..8a32c57 --- /dev/null +++ b/test/unit/iput128.c @@ -0,0 +1,82 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +//#define IPUT _IPUT(double) +#define IPUT shmem_iput128 +#define DataType long double + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %Lf %Lf %Lf %Lf %Lf\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput32.c b/test/unit/iput32.c new file mode 100644 index 0000000..3c60748 --- /dev/null +++ b/test/unit/iput32.c @@ -0,0 +1,84 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#include + +#define _IPUT(a) shmem_##a##_iput + +//#define IPUT _IPUT(int32_t) +#define IPUT shmem_iput32 +#define DataType uint32_t + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %u %u %u %u %u\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput64.c b/test/unit/iput64.c new file mode 100644 index 0000000..7d53ad2 --- /dev/null +++ b/test/unit/iput64.c @@ -0,0 +1,88 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#include + +#define _IPUT(a) shmem_##a##_iput + +//#define IPUT _IPUT(int32_t) +#define IPUT shmem_iput64 +#define DataType uint64_t + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %ld %ld %ld %ld %ld\n" + " Expected 1,3,5,7,9?\n", + me, + (long int) target[0], + (long int) target[1], + (long int) target[2], + (long int) target[3], + (long int) target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput_double.c b/test/unit/iput_double.c new file mode 100644 index 0000000..1b521c8 --- /dev/null +++ b/test/unit/iput_double.c @@ -0,0 +1,82 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +#define IPUT _IPUT(double) +//#define IPUT shmem_longlong_iput +#define DataType double + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %f %f %f %f %f\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput_float.c b/test/unit/iput_float.c new file mode 100644 index 0000000..4de7253 --- /dev/null +++ b/test/unit/iput_float.c @@ -0,0 +1,82 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +#define IPUT _IPUT(float) +//#define IPUT shmem_longlong_iput +#define DataType float + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %f %f %f %f %f\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput_long.c b/test/unit/iput_long.c new file mode 100644 index 0000000..207af75 --- /dev/null +++ b/test/unit/iput_long.c @@ -0,0 +1,81 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +#define IPUT _IPUT(long) +#define DataType long + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %ld %ld %ld %ld %ld\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput_longdouble.c b/test/unit/iput_longdouble.c new file mode 100644 index 0000000..753c016 --- /dev/null +++ b/test/unit/iput_longdouble.c @@ -0,0 +1,82 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +//#define IPUT _IPUT(double) +#define IPUT shmem_longdouble_iput +#define DataType long double + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %Lf %Lf %Lf %Lf %Lf\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput_longlong.c b/test/unit/iput_longlong.c new file mode 100644 index 0000000..f4feb92 --- /dev/null +++ b/test/unit/iput_longlong.c @@ -0,0 +1,82 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +//#define IPUT _IPUT(long) +#define IPUT shmem_longlong_iput +#define DataType long long + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %lld %lld %lld %lld %lld\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/iput_short.c b/test/unit/iput_short.c new file mode 100644 index 0000000..a0f3ce1 --- /dev/null +++ b/test/unit/iput_short.c @@ -0,0 +1,81 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * adaptation of example from SGI man page for shmem_iput. + */ + +#include +#include +#define _IPUT(a) shmem_##a##_iput + +#define IPUT _IPUT(short) +#define DataType short + +static DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; +static DataType target[10]; + +int main(int argc, char **argv) +{ + int me, nProcs, rc=0; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (me == 0) { + int j; + /* put 5 words into target on PE's [1 to (nProcs-1)] */ + for(j=1; j < nProcs; j++) + IPUT (target, source, 1, 2, 5, j); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (target[0] != 1 || + target[1] != 3 || + target[2] != 5 || + target[3] != 7 || + target[4] != 9) + { + printf("ERR: target on PE %d is %hd %hd %hd %hd %hd\n" + " Expected 1,3,5,7,9?\n", + me, target[0], target[1], target[2], + target[3], target[4] ); + rc = 1; + } + } + + shmem_finalize(); + + return rc; +} diff --git a/test/unit/lfinc.c b/test/unit/lfinc.c new file mode 100644 index 0000000..585ccd8 --- /dev/null +++ b/test/unit/lfinc.c @@ -0,0 +1,130 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* long_finc neighbor - Perf test shmem_atomic_fetch_inc(); */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define LOOPS 25000 + +#ifndef HAVE_SHMEMX_WTIME +static double shmemx_wtime(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0; +} +#endif /* HAVE_SHMEMX_WTIME */ + +int Verbose; +double elapsed; + +int main( int argc, char *argv[]) +{ + int rc=0, my_pe, npes, neighbor; + int loops=LOOPS; + int j; + size_t data_sz=sizeof(long) * 3; + double start_time; + long *data, lval=0; + + if (argc > 1) + loops = atoi(argv[1]); + + shmem_init(); + + my_pe = shmem_my_pe(); + npes = shmem_n_pes(); + + if (loops <= 0) { + if (my_pe == 0) + printf("Error: loops must be greater than 0\n"); + + shmem_finalize(); + return 1; + } + + data = shmem_malloc(data_sz); + if (!data) { + fprintf(stderr,"[%d] shmem_malloc(%ld) failure? %d\n", + my_pe,data_sz,errno); + shmem_global_exit(1); + } + memset((void*)data,0,data_sz); + + shmem_barrier_all(); + + neighbor = (my_pe + 1) % npes; + start_time = shmemx_wtime(); + for(j=0,elapsed=0.0; j < loops; j++) { + start_time = shmemx_wtime(); + lval = shmem_long_atomic_fetch_inc( (void*)&data[1], neighbor ); + elapsed += shmemx_wtime() - start_time; + if (lval != (long) j) { + fprintf(stderr,"[%d] Test: FAIL previous val %ld != %d Exit.\n", + my_pe, lval, j); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + rc = 0; + if (data[1] != (long)loops) { + fprintf(stderr,"[%d] finc neighbot: FAIL data[1](%p) %ld != %d Exit.\n", + my_pe, (void*)&data[1], data[1], loops); + rc--; + } + + /* check if adjancent memory locations distrubed */ + assert(data[0] == 0); + assert(data[2] == 0); + + if (my_pe == 0 ) { + if (rc == 0 && Verbose) + fprintf(stderr,"[%d] finc neighbor: PASSED.\n",my_pe); + fprintf(stderr,"[%d] %d loops of shmem_long_atomic_fetch_inc() in %6.4f secs\n" + " %2.6f usecs per shmem_long_atomic_fetch_inc()\n", + my_pe,loops,elapsed,((elapsed*100000.0)/(double)loops)); + } + shmem_free(data); + + shmem_finalize(); + + return rc; +} + diff --git a/test/unit/many-ctx.c b/test/unit/many-ctx.c new file mode 100644 index 0000000..377e49e --- /dev/null +++ b/test/unit/many-ctx.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#define NUM_CTX 32 + +long data = 0; + +int main(int argc, char **argv) { + int me, npes, i; + int errors = 0; + shmem_ctx_t ctx[NUM_CTX]; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < NUM_CTX; i++) { + int err = shmem_ctx_create(0, &ctx[i]); + + if (err) { + printf("%d: Error creating context %d (%d)\n", me, i, err); + shmem_global_exit(1); + } + } + + for (i = 0; i < NUM_CTX; i++) + shmem_ctx_long_atomic_inc(ctx[i], &data, (me+1) % npes); + + for (i = 0; i < NUM_CTX; i++) + shmem_ctx_quiet(ctx[i]); + + shmem_sync_all(); + + if (data != NUM_CTX) { + printf("%d: error expected %d, got %ld\n", me, NUM_CTX, data); + ++errors; + } + + shmem_finalize(); + return errors; +} diff --git a/test/unit/max_reduction.c b/test/unit/max_reduction.c new file mode 100644 index 0000000..b500807 --- /dev/null +++ b/test/unit/max_reduction.c @@ -0,0 +1,107 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +* reduce [0...num_pes] +*/ + +#include +#include +#include +#include + +long pSync[SHMEM_REDUCE_SYNC_SIZE]; + +#define N 3 + +long src[N]; +long dst[N]; + +#define MAX(a, b) ((a) > (b)) ? (a) : (b) +#define WRK_SIZE MAX(N/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) + +long pWrk[WRK_SIZE]; + +int +main(int argc, char* argv[]) +{ + int i, Verbose=0; + char *pgm; + + if ((pgm=strrchr(argv[0],'/'))) { + pgm++; + } else { + pgm = argv[0]; + } + + if (argc > 1) { + if (strncmp(argv[1],"-v",3) == 0) { + Verbose=1; + } else if (strncmp(argv[1],"-h",3) == 0) { + fprintf(stderr,"usage: %s {v(verbose)|h(help)}\n",pgm); + shmem_finalize(); + exit(1); + } + } + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i += 1) { + pSync[i] = SHMEM_SYNC_VALUE; + } + + shmem_init(); + + for (i = 0; i < N; i += 1) { + src[i] = shmem_my_pe() + i; + } + shmem_barrier_all(); + + shmem_long_max_to_all(dst, src, N, 0, 0, shmem_n_pes(), pWrk, pSync); + + if (Verbose) { + printf("%d/%d\tdst =", shmem_my_pe(), shmem_n_pes() ); + for (i = 0; i < N; i+= 1) { + printf(" %ld", dst[i]); + } + printf("\n"); + } + + for (i = 0; i < N; i+= 1) { + if (dst[i] != shmem_n_pes() - 1 + i) { + printf("[%3d] Error: dst[%d] == %ld, expected %ld\n", + shmem_my_pe(), i, dst[i], shmem_n_pes() - 1 + (long) i); + shmem_global_exit(1); + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/micro_unit_shmem.c b/test/unit/micro_unit_shmem.c new file mode 100644 index 0000000..b6f796b --- /dev/null +++ b/test/unit/micro_unit_shmem.c @@ -0,0 +1,462 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + *for back to back operation testing: independent buffers for each operation + *as well as alternating PE waiting + * */ + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { + NUM_WRITE = 8, + NUM_READ = 5, + NUM_SYNC = 3 +} max_ops; + +int target[NUM_WRITE]; +int source[NUM_READ]; +int sync_pes[NUM_SYNC]; + +int verbose; +int debug; + + +static inline void wait_until(int *wait_var, int iterations, int pe) +{ + + if (debug) + printf("PE %d waiting...%d\n", pe, *wait_var); + + shmem_int_wait_until(wait_var, SHMEM_CMP_EQ, iterations); + + if (debug) + printf("PE %d wait_until passed\n", pe); + +} + +static inline void pre_op_check(const char *op, + int check_var, int iterations, int pe) +{ + if (verbose) + printf("SHMEM %s, performing %d iterations\n", + op, iterations); + + if (debug) + printf("BEFORE operation PE %d target = %d\n", + pe, check_var); +} + +static inline void post_op_check(const char *op, + int check_var, int iterations, int pe) +{ + + if (check_var != iterations) { + fprintf(stderr, "%s ERR: PE %d source = %d != %d\n", + op, pe, check_var, iterations); + shmem_global_exit(EXIT_FAILURE); + } +} + +static inline void putfence(int me, int iterations, int T) +{ + int i; + + if (me == 1) + pre_op_check(__func__, target[T], iterations, 1); + + if (me == 0) { + for (i = 1; i < iterations; i++) { + shmem_int_p(&target[T], i, 1); + shmem_fence(); + } + + shmem_int_p(&target[T], i, 1); + + } else + wait_until(&target[T], iterations, 1); + + if (verbose) + if (me == 0) + printf("SHMEM %s finished\n", __func__); + +} + + +static inline void gettest(int me, int iterations, int T, int S, int P) +{ + + int i; + + if (me == 1) { + pre_op_check(__func__, target[T], iterations, 1); + + shmem_int_p(&source[S], iterations, 0); + shmem_fence(); + + for (i = 0; i < iterations; i++) + target[T] = shmem_int_g(&source[S], 0); + + shmem_int_p(&sync_pes[P], iterations, 0); + + post_op_check("get", target[T], iterations, 1); + + } else + wait_until(&sync_pes[P], iterations, 0); + + if (verbose) { + if (me == 0) + printf("SHMEM %s finished\n", __func__); + } +} + +static inline void atomic_inc(int me, int iterations, int T) +{ + + int i; + + if (me == 1) + pre_op_check(__func__, target[T], iterations, 1); + + target[T] = 0; + shmem_barrier_all(); + + if (me == 0) { + for (i = 0; i < iterations; i++) { + shmem_int_atomic_inc(&target[T], 1); + shmem_fence(); + } + shmem_int_atomic_inc(&target[T], 1); + + if (debug) + printf("PE 0 done with operation\n"); + + } else + wait_until(&target[T], (iterations+1), 1); + + if (verbose) { + if (me == 1) + printf("SHMEM %s finished\n", __func__); + } +} + +static inline void atomic_add(int me, int iterations, int T) +{ + + int i; + + if (me == 0) + pre_op_check(__func__, target[T], iterations, 0); + + target[T] = 0; + shmem_barrier_all(); + + if (me == 1) { + for (i = 0; i < iterations; i++) { + shmem_int_atomic_add(&target[T], 1, 0); + shmem_fence(); + } + shmem_int_atomic_add(&target[T], 1, 0); + + if (debug) + printf("PE 1 done with operation\n"); + + } else + wait_until(&target[T], (iterations+1), 0); + + if (verbose) { + if (me == 1) + printf("SHMEM %s finished\n", __func__); + } +} + + +static inline void swaptest(int me, int iterations, int T, int S, int P) +{ + + int i; + const int tswap = 5, sswap = 2; + target[T] = tswap; + source[S] = sswap; + + shmem_barrier_all(); /* Ensure target/source initialization completed */ + + if (me == 0) + pre_op_check(__func__, source[S], iterations, 0); + + if (me == 0) { + for (i = 0; i < iterations; i++) + source[S] = shmem_int_atomic_swap(&target[T], source[S], 1); + + shmem_int_p(&sync_pes[P], i, 1); + + if (debug) + printf("AFTER flag PE 0 value of source is %d" + " = 5?\n", source[S]); + + if (((iterations % 2 == 1) && (source[S] != tswap)) || + ((iterations % 2 == 0) && + (source[S] != sswap))) { + fprintf(stderr, "swap ERR: PE 0 source = %d\n", + source[S]); + shmem_global_exit(EXIT_FAILURE); + } + + } else { + wait_until(&sync_pes[P], iterations, 1); + + if (((iterations % 2 == 1) && (target[T] != sswap)) || + ((iterations % 2 == 0) && + (target[T] != tswap))) { + fprintf(stderr, "swap ERR: PE 0 target = %d \n", + target[T]); + shmem_global_exit(EXIT_FAILURE); + } + + } + + if (verbose) { + if (me == 0) + printf("SHMEM %s finished\n", __func__); + } +} + +static inline void cswaptest(int me, int iterations, int T, int S, int P) +{ + + int i; + source[S] = -100; + + target[T] = 0; + shmem_barrier_all(); + + if (me == 1) { + pre_op_check(__func__, source[S], iterations, 1); + + for (i = 0; i < iterations; i++) + source[S] = shmem_int_atomic_compare_swap(&(target[T]), i, (i+1), 0); + + shmem_int_p(&sync_pes[P], i, 0); + + post_op_check("compare_swap", source[S], (iterations-1), 1); + + } else { + wait_until(&sync_pes[P], iterations, 0); + + if (target[T] != iterations) { + fprintf(stderr, "compare_swap ERR: PE 1 target = %d != %d\n", + target[T], iterations); + shmem_global_exit(EXIT_FAILURE); + } + } + + if (verbose) { + if (me == 1) + printf("SHMEM %s finished\n", __func__); + } +} + +static inline void fetchatomic_add(int me, int iterations, int T, int S) +{ + + int i; + + if (me == 1) + pre_op_check(__func__, target[T], iterations, 1); + + target[T] = 0; + shmem_barrier_all(); + + if (me == 0) { + if (debug) { + printf("BEFORE flag PE 0 value of source is" + " %d = 0?\n", source[S]); + } + + for (i = 0; i < iterations; i++) { + source[S] = shmem_int_atomic_fetch_add(&target[T], 1, 1); + shmem_fence(); + } + source[S] = shmem_int_atomic_fetch_add(&target[T], 1, 1); + + post_op_check("fetch_add", source[S], iterations, 0); + + } else + wait_until(&target[T], (iterations+1), 1); + + if (verbose) { + if (me == 0) + printf("SHMEM %s finished\n", __func__); + } +} + +static inline void fetchatomic_inc(int me, int iterations, int T, int S) +{ + + int i; + + if (me == 0) + pre_op_check(__func__, target[T], iterations, 0); + + target[T] = 0; + shmem_barrier_all(); + + if (me == 1) { + if (debug) { + printf("BEFORE flag PE 1 value of source is %d\n", + source[S]); + } + + for (i = 0; i < iterations; i++) { + source[S] = shmem_int_atomic_fetch_inc(&target[T], 0); + shmem_fence(); + } + + post_op_check("fetch_inc", source[S], (iterations-1), 1); + } else + wait_until(&target[T], iterations, 0); + + if (verbose) { + if (me == 1) + printf("SHMEM %s finished\n", __func__); + } + +} + +int main(int argc, char **argv) +{ + int me, nproc; + int c, all_ops = 1; + int T = 0, S = 0, P = 0; + const int DEFAULT_ITR = 7; + int iterations = DEFAULT_ITR; + + shmem_init(); + + me = shmem_my_pe(); + nproc = shmem_n_pes(); + + memset(target, -1, NUM_WRITE * sizeof(int)); + memset(source, -1, NUM_READ * sizeof(int)); + memset(sync_pes, -1, NUM_SYNC * sizeof(int)); + + shmem_barrier_all(); + + if (nproc != 2) { + if (me == 0) { + fprintf(stderr, "This is a micro test and is only " + "intended to run on exactly two processes you" + " are using %d\n", nproc); + } + shmem_finalize(); + return 0; + } + + while ((c = getopt(argc, argv, "i:vdpgaAscfFh")) != -1) { + switch (c) { + case 'i': + iterations = atoi(optarg); + assert(iterations > 0); + all_ops += 2; + break; + case 'v': + verbose = 1; + all_ops++; + break; + case 'd': + debug = 1; + break; + case 'p': + putfence(me, iterations, T++); + break; + case 'g': + gettest(me, iterations, T++, S++, P++); + break; + case 'a': + atomic_add(me, iterations, T++); + break; + case 'A': + atomic_inc(me, iterations, T++); + break; + case 's': + swaptest(me, iterations, T++, S++, P++); + break; + case 'c': + cswaptest(me, iterations, T++, S++, P++); + break; + case 'f': + fetchatomic_add(me, iterations, T++, S++); + break; + case 'F': + fetchatomic_inc(me, iterations, T++, S++); + break; + case 'h': + default: + if (me == 0) { + fprintf(stderr, "input options:\n 1) single" + " argument option will run all tests by default" + "and additionally request: -v (verbose) | " + "-i \n"); + fprintf(stderr, " 2) two argument options " + "choose any combination of the following " + "to run individual tests: -i , -v" + ", -d, -p, -g, -a, -A, -s, -c, -f, -F, -h\n"); + } + shmem_finalize(); + return 1; + } + } + + if (argc == all_ops || argc == 1) { + putfence(me, iterations, T++); + gettest(me, iterations, T++, S++, P++); + atomic_add(me, iterations, T++); + atomic_inc(me, iterations, T++); + swaptest(me, iterations, T++, S++, P++); + cswaptest(me, iterations, T++, S++, P++); + fetchatomic_add(me, iterations, T++, S++); + fetchatomic_inc(me, iterations, T++, S++); + } + + if (verbose) { + if (me == 1) + printf("PE 1: PASS: %8d iterations\n", iterations); + else + printf("PE 0 Successful exit\n"); + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/mt_a2a.c b/test/unit/mt_a2a.c new file mode 100644 index 0000000..c1c5a88 --- /dev/null +++ b/test/unit/mt_a2a.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Multithreaded All-to-All Test + * James Dinan + * January, 2014 + */ + +#include +#include +#include +#include +#include + +/* For systems without the PThread barrier API (e.g. MacOS) */ +#include "pthread_barrier.h" + +#define T 8 + +int dest[T] = { 0 }; +int flag[T] = { 0 }; + +int me, npes; +int errors = 0; +pthread_barrier_t fencebar; +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + +static void * thread_main(void *arg) { + int tid = * (int *) arg; + int i, val, expected; + + /* TEST CONCURRENT ATOMICS */ + val = me; + for (i = 1; i <= npes; i++) + shmem_int_atomic_add(&dest[tid], val, (me + i) % npes); + + /* Ensure that fence does not overlap with communication calls */ + pthread_barrier_wait(&fencebar); + if (tid == 0) shmem_fence(); + pthread_barrier_wait(&fencebar); + + for (i = 1; i <= npes; i++) + shmem_int_atomic_inc(&flag[tid], (me + i) % npes); + + shmem_int_wait_until(&flag[tid], SHMEM_CMP_EQ, npes); + + expected = (npes-1) * npes / 2; + if (dest[tid] != expected || flag[tid] != npes) { + printf("Atomic test error: [PE = %d | TID = %d] -- " + "dest = %d (expected %d), flag = %d (expected %d)\n", + me, tid, dest[tid], expected, flag[tid], npes); + pthread_mutex_lock(&mutex); + ++errors; + pthread_mutex_unlock(&mutex); + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + /* TEST CONCURRENT PUTS */ + val = -1; + shmem_int_put(&dest[tid], &val, 1, (me + 1) % npes); + + /* Ensure that all puts are issued before the shmem barrier is called. */ + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + /* TEST CONCURRENT GETS */ + for (i = 1; i <= npes; i++) { + shmem_int_get(&val, &dest[tid], 1, (me + i) % npes); + + expected = -1; + if (val != expected) { + printf("Put/get test error: [PE = %d | TID = %d] -- From PE %d, got %d expected %d\n", + me, tid, (me + i) % npes, val, expected); + pthread_mutex_lock(&mutex); + ++errors; + pthread_mutex_unlock(&mutex); + } + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + + return NULL; +} + + +int main(int argc, char **argv) { + int tl, i, ret; + pthread_t threads[T]; + int t_arg[T]; + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + pthread_barrier_init(&fencebar, NULL, T); + + if (me == 0) printf("Starting multithreaded test on %d PEs, %d threads/PE\n", npes, T); + + for (i = 0; i < T; i++) { + int err; + t_arg[i] = i; + err = pthread_create(&threads[i], NULL, thread_main, (void*) &t_arg[i]); + assert(0 == err); + } + + for (i = 0; i < T; i++) { + int err; + err = pthread_join(threads[i], NULL); + assert(0 == err); + } + + pthread_barrier_destroy(&fencebar); + + if (me == 0) { + if (errors) printf("Encountered %d errors\n", errors); + else printf("Success\n"); + } + + shmem_finalize(); + return (errors == 0) ? 0 : 1; +} diff --git a/test/unit/mt_contention.c b/test/unit/mt_contention.c new file mode 100644 index 0000000..a989125 --- /dev/null +++ b/test/unit/mt_contention.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Multithreaded Contention Test: Overlapping AMO/quiet on a shared (default) + * context */ + +#include +#include +#include +#include +#include + +#define T 8 + +int dest; + +int me, npes; +int errors = 0; + +static void * thread_main(void *arg) { + int tid = * (int *) arg; + int i; + + /* Threads increment the counter on each PE and then performs a quiet. + * All threads use the default context; thus, this checks that quiet + * with overlapping AMOs behaves correctly. */ + + for (i = 1; i <= npes; i++) + shmem_int_atomic_add(&dest, tid, (me + i) % npes); + + shmem_quiet(); + + return NULL; +} + + +int main(int argc, char **argv) { + int tl, i, ret; + pthread_t threads[T]; + int t_arg[T]; + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (me == 0) printf("Starting multithreaded test on %d PEs, %d threads/PE\n", npes, T); + + for (i = 0; i < T; i++) { + int err; + t_arg[i] = i; + err = pthread_create(&threads[i], NULL, thread_main, (void*) &t_arg[i]); + assert(0 == err); + } + + for (i = 0; i < T; i++) { + int err; + err = pthread_join(threads[i], NULL); + assert(0 == err); + } + + shmem_sync_all(); + + if (dest != ((T-1)*T/2)*npes) { + printf("%d: dest = %d, expected %d\n", me, dest, ((T-1)*T/2)*npes); + errors++; + } + + shmem_finalize(); + return (errors == 0) ? 0 : 1; +} diff --git a/test/unit/mt_membar.c b/test/unit/mt_membar.c new file mode 100644 index 0000000..d5efb67 --- /dev/null +++ b/test/unit/mt_membar.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Multi-threaded tests for validation of memory barrier implemented in + * different synchronization routines. +*/ + +#include +#include +#include +#include + +/* For systems without the PThread barrier API (e.g. MacOS) */ +#include "pthread_barrier.h" + +#define T 2 +#define ITER 100 + +#ifndef MAX +#define MAX(A,B) (((A)>(B)) ? (A) : (B)) +#endif + +int shared_dest_1 = 0, shared_dest_2 = 0, result = 0; +int me, npes, errors = 0, sum_error = 0; +long lock = 0; + +pthread_barrier_t fencebar; + +long pSync[SHMEM_REDUCE_SYNC_SIZE]; +int pWrk[MAX(1, SHMEM_REDUCE_MIN_WRKDATA_SIZE)]; + +static void * thread_main(void *arg) { + int tid = *(int *) arg; + int one = 1, zero = 0; + int i; + + /* TEST WAIT */ + for (i = 0; i < ITER; i++) { + shmem_int_wait_until(&shared_dest_1, SHMEM_CMP_EQ, tid); + shmem_int_atomic_add(&result, one, me); + shared_dest_1 = (tid + 1) % T; + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + if (tid == 0) { + errors += ((result == (T * ITER)) ? 0 : 1); + if (result != T * ITER) { + printf("ERROR in WAIT test from %d : result = %d, expected = %d\n", + me, result, T * ITER); + } + result = 0; + shared_dest_1 = 0; + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + /* TEST WAIT & FENCE WITH NON-ATOMIC READ-WRITE */ + for (i = 0; i < ITER; i++) { + shmem_int_wait_until(&shared_dest_1, SHMEM_CMP_EQ, tid); + result++; + shmem_fence(); + shared_dest_1 = (tid + 1) % T; + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + if (tid == 0) { + errors += ((result == (T * ITER)) ? 0 : 1); + if (result != T * ITER) { + printf("ERROR in WAIT test from %d : result = %d, expected = %d\n", + me, result, T * ITER); + } + result = 0; + shared_dest_1 = 0; + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + /* TEST FENCE */ + for (i = 0; i < ITER; i++) { + if (tid == 0) { + shared_dest_1 += 1; + shared_dest_2 += 1; + shmem_fence(); + shmem_int_wait_until(&shared_dest_1, SHMEM_CMP_EQ, zero); + } + + if (tid == 1) { + shmem_int_wait_until(&shared_dest_2, SHMEM_CMP_EQ, one); + shmem_int_atomic_add(&result, shared_dest_1 + shared_dest_2, me); + shared_dest_2 = 0; + shared_dest_1 = 0; + shmem_fence(); + } + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + if (tid == 0) { + errors += ((result == (T * ITER)) ? 0 : 1); + if (result != T * ITER) { + printf("ERROR in FENCE test from %d : result = %d, expected = %d\n", + me, result, T * ITER); + } + result = 0; + shared_dest_1 = 0; + shared_dest_2 = 0; + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + /* TEST LOCK */ + for (i = 0; i < ITER; i++) { + if (tid == 0) { + shmem_set_lock(&lock); + shared_dest_1 = 1; + shmem_clear_lock(&lock); + shared_dest_2 = 1; + shmem_int_wait_until(&shared_dest_2, SHMEM_CMP_EQ, zero); + } + + if (tid == 1) { + shmem_int_wait_until(&shared_dest_2, SHMEM_CMP_EQ, one); + result += shared_dest_1; + shared_dest_1 = 0; + shared_dest_2 = 0; + } + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + if (tid == 0) { + errors += ((result == ITER) ? 0 : 1); + if (result != ITER) { + printf("ERROR in LOCK test from %d : result = %d, expected = %d\n", + me, result, T * ITER); + } + result = 0; + } + + pthread_barrier_wait(&fencebar); + if (0 == tid) shmem_barrier_all(); + + return NULL; +} + +int main(int argc, char **argv) { + int tl, i, ret; + pthread_t threads[T]; + int t_arg[T]; + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { + pSync[i] = SHMEM_SYNC_VALUE; + } + + pthread_barrier_init(&fencebar, NULL, T); + + if (me == 0) { + printf("Starting multi-threaded test on %d PEs, %d threads/PE\n", npes, T); + } + + for (i = 0; i < T; i++) { + int err; + t_arg[i] = i; + err = pthread_create(&threads[i], NULL, thread_main, (void *) &t_arg[i]); + assert(0 == err); + } + + for (i = 0; i < T; i++) { + int err; + err = pthread_join(threads[i], NULL); + assert(0 == err); + } + + pthread_barrier_destroy(&fencebar); + + shmem_barrier_all(); + shmem_int_sum_to_all(&sum_error, &errors, 1, 0, 0, npes, pWrk, pSync); + + shmem_finalize(); + return (sum_error == 0) ? 0 : 1; +} diff --git a/test/unit/nop_collectives.c b/test/unit/nop_collectives.c new file mode 100644 index 0000000..f591e48 --- /dev/null +++ b/test/unit/nop_collectives.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +long bcast_psync[SHMEM_BCAST_SYNC_SIZE]; +long collect_psync[SHMEM_COLLECT_SYNC_SIZE]; +long reduce_psync[SHMEM_REDUCE_SYNC_SIZE]; +long alltoall_psync[SHMEM_ALLTOALL_SYNC_SIZE]; +long alltoalls_psync[SHMEM_ALLTOALLS_SYNC_SIZE]; + +int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + +int main(void) { + int i; + int me, npes; + + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) + bcast_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) + collect_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + reduce_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_ALLTOALL_SYNC_SIZE; i++) + alltoall_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_ALLTOALLS_SYNC_SIZE; i++) + alltoalls_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) + pwrk[i] = 0; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (me == 0) printf("Testing zero length collectives\n"); + + if (me == 0) printf(" + broadcast\n"); + shmem_broadcast32(NULL, NULL, 0, 0, 0, 0, npes, bcast_psync); + shmem_barrier_all(); + shmem_broadcast64(NULL, NULL, 0, 0, 0, 0, npes, bcast_psync); + shmem_barrier_all(); + + if (me == 0) printf(" + collect\n"); + shmem_fcollect32(NULL, NULL, 0, 0, 0, npes, collect_psync); + shmem_barrier_all(); + shmem_fcollect64(NULL, NULL, 0, 0, 0, npes, collect_psync); + shmem_barrier_all(); + + shmem_collect32(NULL, NULL, 0, 0, 0, npes, collect_psync); + shmem_barrier_all(); + shmem_collect64(NULL, NULL, 0, 0, 0, npes, collect_psync); + shmem_barrier_all(); + + if (me == 0) printf(" + reduction\n"); + shmem_int_and_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + shmem_int_or_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + shmem_int_xor_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + shmem_int_min_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + shmem_int_max_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + shmem_int_sum_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + shmem_int_prod_to_all(NULL, NULL, 0, 0, 0, npes, pwrk, reduce_psync); + shmem_barrier_all(); + + if (me == 0) printf(" + all-to-all\n"); + shmem_alltoall32(NULL, NULL, 0, 0, 0, npes, alltoall_psync); + shmem_barrier_all(); + shmem_alltoall64(NULL, NULL, 0, 0, 0, npes, alltoall_psync); + shmem_barrier_all(); + + shmem_alltoalls32(NULL, NULL, 1, 1, 0, 0, 0, npes, alltoalls_psync); + shmem_barrier_all(); + shmem_alltoalls64(NULL, NULL, 1, 1, 0, 0, 0, npes, alltoalls_psync); + + if (me == 0) printf("Done\n"); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/ns.c b/test/unit/ns.c new file mode 100644 index 0000000..331d5fe --- /dev/null +++ b/test/unit/ns.c @@ -0,0 +1,159 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +/* + * Neighbor swap: + * swap between odd numbered PEs and their right neighbor (modulo num_pes) + */ + +#include +#include +#include +#include + +#include + +static int atoi_scaled(char *s); +static void usage(char *pgm); + +int Verbose; + +#define DFLT_LAPS 50 + + +int +main(int argc, char *argv[]) +{ + char *pgm; + int l, laps = DFLT_LAPS; + long *target; + int me, npes; + long swapped_val, new_val; + + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if ((pgm=strrchr(argv[0],'/'))) + pgm++; + else + pgm = argv[0]; + + while ((l = getopt (argc, argv, "l:vh")) != EOF) { + switch (l) + { + case 'l': + if ((laps = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad loop count %d\n",laps); + shmem_finalize(); + return 1; + } + break; + case 'v': + Verbose++; + break; + case 'h': + usage(pgm); + shmem_finalize(); + return 0; + default: + if (me == 0) { + fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,l); + usage(pgm); + } + shmem_finalize(); + return 1; + } + } + + for (l=0; l < laps; l++) { + target = (long *) shmem_malloc(sizeof (*target)); + if (!target) { + fprintf(stderr,"[%d] shmem_malloc() failed?\n",me); + shmem_global_exit(1); + } + + *target = me; + new_val = me; + + shmem_barrier_all(); + + if (me & 1) { + swapped_val = shmem_long_atomic_swap(target, new_val, (me + 1) % npes); + if (Verbose > 1) + printf("[%d] target %ld, swapped %ld\n", + me, *target, swapped_val); + } + shmem_barrier_all(); + shmem_free (target); + if (Verbose == 1 && me == 0) fprintf(stderr,"."); + } + if (Verbose && me == 0) fprintf(stderr,"\n"); + + shmem_finalize(); + + return 0; +} + + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} + +static void +usage(char *pgm) +{ + fprintf(stderr, + "usage: %s -{lhv}\n" + " where:\n" + " -l loops (%d) loop count.\n" + " -v be verbose, multiple 'v' more verbose\n" + " -h this text.\n", + pgm,DFLT_LAPS); +} + diff --git a/test/unit/pcontrol.c b/test/unit/pcontrol.c new file mode 100644 index 0000000..27a9d92 --- /dev/null +++ b/test/unit/pcontrol.c @@ -0,0 +1,51 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +int +main(int argc, char* argv[], char *envp[]) +{ + shmem_init(); + + shmemx_pcontrol(1, "Region 1"); + shmem_barrier_all(); + + shmemx_pcontrol(1, "Region 2"); + shmem_barrier_all(); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/pi.c b/test/unit/pi.c new file mode 100644 index 0000000..73a6e5f --- /dev/null +++ b/test/unit/pi.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_POINTS 10000 + + +long long inside = 0, total = 0; + +int +main(int argc, char* argv[], char *envp[]) +{ + int me, myshmem_n_pes; + /* + ** Starts/Initializes SHMEM/OpenSHMEM + */ + shmem_init(); + /* + ** Fetch the number or processes + ** Some implementations use num_pes(); + */ + myshmem_n_pes = shmem_n_pes(); + /* + ** Assign my process ID to me + */ + me = shmem_my_pe(); + + srand(1+me); + + for(total = 0; total < NUM_POINTS; ++total) { + double x,y; + x = rand()/(double)RAND_MAX; + y = rand()/(double)RAND_MAX; + + if(x*x + y*y < 1) { + ++inside; + } + } + + shmem_barrier_all(); + + int errors = 0; + + if(me == 0) { + for(int i = 1; i < myshmem_n_pes; ++i) { + long long remoteInside,remoteTotal; + shmem_longlong_get(&remoteInside,&inside,1,i); + shmem_longlong_get(&remoteTotal,&total,1,i); + total += remoteTotal; + inside += remoteInside; + } + + double approx_pi = 4.0*inside/(double)total; + + if(fabs(M_PI-approx_pi) > 0.1) { + ++errors; + } + + if (NULL == getenv("MAKELEVEL")) { + printf("Pi from %llu points on %d PEs: %lf\n",total,myshmem_n_pes,approx_pi); + } + } + + shmem_finalize(); + + return errors; +} + diff --git a/test/unit/ping.c b/test/unit/ping.c new file mode 100644 index 0000000..15c2c0e --- /dev/null +++ b/test/unit/ping.c @@ -0,0 +1,203 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * ping test usage: ping {-V} {longs-per-put(128)} {loop-cnt(10)} + * + * For yod -c X ./ping + * Proc0 puts nWords to Proc1 ... Proc(X-1) + */ +#include +#include +#include +#include +#include +#include + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf + +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf + +/* option flags */ +#define OUTPUT_MOD 1 // output debug every X loops +int output_mod = OUTPUT_MOD; +int Verbose; +int Slow; + +#define DFLT_NWORDS 128 +#define TARGET_SZ 8192 +#define VAL 0xCafeBabe + +long Target[TARGET_SZ]; +long src[TARGET_SZ]; + +int +main(int argc, char* argv[]) +{ + int c, j, loops, k; + int proc, num_procs; + int nWords=1; + int failures=0; + char *prog_name; + + shmem_init(); + proc = shmem_my_pe(); + num_procs = shmem_n_pes(); + + if (num_procs == 1) { + Rfprintf(stderr, + "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + prog_name = strrchr(argv[0],'/'); + if ( prog_name ) + prog_name++; + else + prog_name = argv[0]; + + while((c=getopt(argc,argv,"hVM:s")) != -1) { + switch(c) { + case 's': + Slow++; + break; + case 'V': + Verbose++; + break; + case 'M': + output_mod = atoi(optarg); + if (output_mod <= 0) { + Rfprintf(stderr, "ERR - output modulo arg out of " + "bounds '%d'?]\n", output_mod); + shmem_finalize(); + return 1; + } + Rfprintf(stderr,"%s: output modulo %d\n", + prog_name,output_mod); + break; + case 'h': + Rfprintf(stderr, + "usage: %s {nWords-2-put} {Loop-count}\n", + prog_name); + shmem_finalize(); + return 1; + default: + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else { + nWords = atoi(argv[optind++]); + if (nWords <= 0 || nWords > TARGET_SZ) { + Rfprintf(stderr, + "ERR - nWords arg out of bounds '%d' [1..%d]?\n", + nWords, TARGET_SZ); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = 10; + else { + loops = atoi(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + + //Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); + + for(j=0; j < nWords; j++) + src[j] = VAL; + + for(j=0; j < loops; j++) { + + shmem_barrier_all(); + + if ( Verbose && (j==0 || (j % output_mod) == 0) ) + fprintf(stderr,"[%d] +(%d)\n", shmem_my_pe(),j); + + if ( proc == 0 ) { + int p; + for(p=1; p < num_procs; p++) + shmem_long_put(Target, src, nWords, p); + } + else { + if (Slow) { + /* wait for each put to complete */ + for(k=0; k < nWords; k++) + shmem_long_wait_until(&Target[k], SHMEM_CMP_NE, proc); + } else { + /* wait for last word to be written */ + shmem_long_wait_until(&Target[nWords-1], SHMEM_CMP_NE, proc); + } + } + + if ( Verbose && (j==0 || (j % output_mod) == 0) ) + fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); + + shmem_barrier_all(); + + if ( proc != 0 ) { + for(k=0; k < nWords; k++) { + if (Target[k] != VAL) { + fprintf(stderr, "[%d] Target[%d] %#lx " + "!= %#x\?\n", + proc,k,Target[k],VAL); + failures++; + } + //assert(Target[proc] == VAL); + Target[k] = 0; + } + } + else + memset(Target, 0, sizeof(Target)); + } + + shmem_barrier_all(); + + if (failures || Verbose) + Rprintf ("%d(%d) Exit(%d)\n", proc, num_procs, failures); + + shmem_finalize(); + + return failures; +} diff --git a/test/unit/pingpong-short.c b/test/unit/pingpong-short.c new file mode 100644 index 0000000..7394eb3 --- /dev/null +++ b/test/unit/pingpong-short.c @@ -0,0 +1,325 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * pingpong test pingpong-short {-v} {numShorts-2-put(128)} {loop-cnt(10)} + * + * For yod -c X + * Proc0 puts nShorts to Proc1 ... Proc(X-1) + * Proc1 ... Proc(X-1) verify put data to be correct. + * Proc1 ... Proc(X-1) put nShorts(proc_ID) back to proc0 in + * work[proc_ID*nLongs] ... work[proc_ID*(nLongs-1)] + * Proc 0 verifes data + */ +#include + +#include +#include +#include +#include +#include +#include + +static int atoi_scaled(char *s); + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf + +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf + +/* option flags */ +#define OUTPUT_MOD 1 // output debug every X loops on -V +int output_mod = OUTPUT_MOD; +int Verbose; +int Slow; +long pSync0[SHMEM_BARRIER_SYNC_SIZE], + pSync1[SHMEM_BARRIER_SYNC_SIZE], + pSync2[SHMEM_BARRIER_SYNC_SIZE], + pSync3[SHMEM_BARRIER_SYNC_SIZE], + pSync4[SHMEM_BARRIER_SYNC_SIZE]; + +#define DFLT_NWORDS 128 +#define DFLT_LOOPS 100 + +#define VAL 0xCafe + +#define DataType short + +DataType *Target; +DataType *src; +DataType *work; + +int +main(int argc, char* argv[]) +{ + int c, j, loops, k, l; + int my_pe, nProcs, nWorkers; + int nWords=1; + int failures=0; + char *prog_name; + DataType *wp; + long work_sz; + + for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { + pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = + pSync4[j] = SHMEM_SYNC_VALUE; + } + + shmem_init(); + my_pe = shmem_my_pe(); + nProcs = shmem_n_pes(); + nWorkers = nProcs - 1; + + if (nProcs == 1) { + Rfprintf(stderr, + "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + for(j=0; j < nProcs; j++) + if ( shmem_pe_accessible(j) != 1 ) { + fprintf(stderr, + "ERR - pe %d not accessible from pe %d\n", + j, my_pe); + } + + prog_name = strrchr(argv[0],'/'); + if ( prog_name ) + prog_name++; + else + prog_name = argv[0]; + + while((c=getopt(argc,argv,"hvM:s")) != -1) { + switch(c) { + case 's': + Slow++; + break; + case 'v': + Verbose++; + break; + case 'M': + output_mod = atoi(optarg); + if (output_mod <= 0) { + Rfprintf(stderr, "ERR - output modulo arg out of " + "bounds '%d'?\n", output_mod); + shmem_finalize(); + return 1; + } + Rfprintf(stderr,"%s: output modulo %d\n", + prog_name,output_mod); + break; + case 'h': + Rfprintf(stderr, + "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", + prog_name, DFLT_NWORDS, DFLT_LOOPS); + shmem_finalize(); + return 1; + default: + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else { + nWords = atoi_scaled(argv[optind++]); + if (nWords <= 0) { + Rfprintf(stderr, "ERR - Bad nWords '%d'?\n", nWords); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = DFLT_LOOPS; + else { + loops = atoi_scaled(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + + work_sz = (nProcs*nWords) * sizeof(DataType); + work = shmem_malloc( work_sz ); + if ( !work ) { + fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); + shmem_global_exit(1); + } + + Target = shmem_malloc( 2 * nWords * sizeof(DataType) ); // Target & src + if ( !Target ) { + fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", + my_pe, (nWords * sizeof(DataType))); + shmem_global_exit(1); + } + src = &Target[nWords]; + + if (Verbose) Rprintf("%s: %d loops of %d shorts per put/get cycle\n", + prog_name,loops,nWords); + + for(j=0; j < nWords; j++) + src[j] = (DataType)VAL; + + for(j=0; j < loops; j++) { + +#if _DEBUG + if ( Verbose && (j==0 || (j % output_mod) == 0) ) + fprintf(stderr,"[%d] +(%d)\n", shmem_my_pe(),j); +#endif + shmem_barrier(0, 0, nProcs, pSync0); + if ( my_pe == 0 ) { + int p; + for(p=1; p < nProcs; p++) + shmem_short_put(Target, src, nWords, p); + } + else { + if (Slow) { + /* wait for each put to complete */ + for(k=0; k < nWords; k++) + shmem_short_wait_until(&Target[k], SHMEM_CMP_NE, my_pe); + } else { + /* wait for last word to be written */ + shmem_short_wait_until(&Target[nWords-1], SHMEM_CMP_NE, my_pe); + } + } +#if _DEBUG + if ( Verbose && (j==0 || (j % output_mod) == 0) ) + fprintf(stderr,"[%d] -(%d)\n", my_pe,j); +#endif + shmem_barrier(0, 0, nProcs, pSync1); + + RDprintf("Workers[1 ... %d] verify Target data put by my_pe 0\n", + nWorkers); + + /* workers verify put data is expected */ + if ( my_pe != 0 ) { + for(k=0; k < nWords; k++) { + if (Target[k] != (DataType)VAL) { + fprintf(stderr, "[%d] Target[%d] %#hx " + "!= %#hx?\n", + my_pe,k,Target[k],(DataType)VAL); + failures++; + } + assert(Target[k] == (DataType)VAL); + Target[k] = my_pe; + } + } + else /* clear results buffer, workers will put here */ + memset(work, 0, work_sz); + + shmem_barrier(0, 0, nProcs, pSync2); + + RDprintf("Workers[1 ... %d] put Target data to PE0 work " + "vector\n",nWorkers); + + if ( my_pe != 0 ) { + /* push nWords of val my_pe back to PE zero */ + shmem_short_put(&work[my_pe*nWords], Target, nWords, 0); + } + else { + /* wait for procs 1 ... nProcs to complete put()s */ + for(l=1; l < nProcs; l++) { + wp = &work[ l*nWords ]; // procs nWords chunk +#if 1 + /* wait for last DataType to be written from each PE */ + shmem_short_wait_until(&wp[nWords-1], SHMEM_CMP_NE, 0); +#else + for(k=0; k < nWords; k++) + shmem_short_wait_until(&wp[k], SHMEM_CMP_NE, 0); +#endif + } + } + + shmem_barrier(0, 0, nProcs, pSync3); + + if ( my_pe == 0 ) { + RDprintf("Loop(%d) PE0 verifing work data.\n",j); + for(l=1; l < nProcs; l++) { + wp = &work[ l*nWords ]; // procs nWords chunk + for(k=0; k < nWords; k++) { + if (wp[k] != l) { + fprintf(stderr, + "[0] PE(%d)_work[%d] %hd " + "!= %hd?\n", + l,k,work[k],(DataType)l); + failures++; + } + } + if (failures) + break; + } + } + shmem_barrier(0, 0, nProcs, pSync4); + + if (loops > 1) { + RDfprintf(stderr,"."); + RDprintf("Loop(%d) Pass.\n",j); + } + } + RDfprintf(stderr,"\n");fflush(stderr); + + shmem_free( work ); + shmem_free( Target ); + + shmem_barrier_all(); + + RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); + + shmem_finalize(); + + return failures; +} + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} diff --git a/test/unit/pingpong.c b/test/unit/pingpong.c new file mode 100644 index 0000000..f476137 --- /dev/null +++ b/test/unit/pingpong.c @@ -0,0 +1,328 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * pingpong test pingpong {-v} {nLongs-2-put(128)} {loop-cnt(10)} + * + * For yod -c X + * Proc0 puts nLongs to Proc1 ... Proc(X-1) + * Proc1 ... Proc(X-1) verify put data to be correct. + * Proc1 ... Proc(X-1) put nLongs(proc_ID) back to proc0 in + * work[proc_ID*nLongs] ... work[proc_ID*(nLongs-1)] + * Proc 0 verifes data + */ +#include + +#include +#include +#include +#include +#include + +static int atoi_scaled(char *s); + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf + +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf + +/* option flags */ +#define OUTPUT_MOD 1 // output debug every X loops on -V +int output_mod = OUTPUT_MOD; +int Verbose; +int Slow; + +long pSync0[SHMEM_BARRIER_SYNC_SIZE], + pSync1[SHMEM_BARRIER_SYNC_SIZE], + pSync2[SHMEM_BARRIER_SYNC_SIZE], + pSync3[SHMEM_BARRIER_SYNC_SIZE], + pSync4[SHMEM_BARRIER_SYNC_SIZE]; + +#define DFLT_NWORDS 128 +#define DFLT_LOOPS 10 + +#define VAL 0xCafeBabe + +long *Target; +long *src; +long *work; + +int +main(int argc, char* argv[]) +{ + int c, j, loops, k, l; + int my_pe, nProcs, nWorkers; + int nWords=1; + int failures=0; + char *prog_name; + long *wp,work_sz; + + for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { + pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = + pSync4[j] = SHMEM_SYNC_VALUE; + } + + shmem_init(); + my_pe = shmem_my_pe(); + nProcs = shmem_n_pes(); + nWorkers = nProcs - 1; + + if (nProcs == 1) { + Rfprintf(stderr, + "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + for(j=0; j < nProcs; j++) + if ( shmem_pe_accessible(j) != 1 ) { + fprintf(stderr, + "ERR - pe %d not accessible from pe %d\n", + j, my_pe); + } + + prog_name = strrchr(argv[0],'/'); + if ( prog_name ) + prog_name++; + else + prog_name = argv[0]; + + while((c=getopt(argc,argv,"hvM:s")) != -1) { + switch(c) { + case 's': + Slow++; + break; + case 'v': + Verbose++; + break; + case 'M': + output_mod = atoi(optarg); + if (output_mod <= 0) { + Rfprintf(stderr, "ERR - output modulo arg out of " + "bounds '%d'?\n", output_mod); + shmem_finalize(); + return 1; + } + Rfprintf(stderr,"%s: output modulo %d\n", + prog_name,output_mod); + break; + case 'h': + Rfprintf(stderr, + "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", + prog_name, DFLT_NWORDS, DFLT_LOOPS); + shmem_finalize(); + return 1; + default: + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else { + nWords = atoi_scaled(argv[optind++]); + if (nWords <= 0) { + Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = DFLT_LOOPS; + else { + loops = atoi_scaled(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + + work_sz = (nProcs*nWords) * sizeof(long); + work = shmem_malloc( work_sz ); + if ( !work ) { + fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); + shmem_global_exit(1); + } + + Target = shmem_malloc( 2 * nWords * sizeof(long) ); + if ( !Target ) { + fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", + my_pe, (nWords * sizeof(long))); + shmem_global_exit(1); + } + src = &Target[nWords]; + +#if _DEBUG + Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); +#endif + + for(j=0; j < nWords; j++) + src[j] = VAL; + + for(j=0; j < loops; j++) { + +#if _DEBUG + if ( Verbose && (j==0 || (j % output_mod) == 0) ) + fprintf(stderr,"[%d] +(%d)\n", my_pe,j); +#endif + shmem_barrier(0, 0, nProcs, pSync0); + if ( my_pe == 0 ) { + int p; + for(p=1; p < nProcs; p++) + shmem_long_put(Target, src, nWords, p); + } + else { + if (Slow) { + /* wait for each put to complete */ + for(k=0; k < nWords; k++) + shmem_long_wait_until(&Target[k], SHMEM_CMP_NE, my_pe); + } else { + /* wait for last word to be written */ + shmem_long_wait_until(&Target[nWords-1], SHMEM_CMP_NE, my_pe); + } + } +#if _DEBUG + if ( Verbose && (j==0 || (j % output_mod) == 0) ) + fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); +#endif + shmem_barrier(0, 0, nProcs, pSync1); + + RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", + nWorkers); + + /* workers verify put data is expected */ + if ( my_pe != 0 ) { + for(k=0; k < nWords; k++) { + if (Target[k] != VAL) { + fprintf(stderr, "[%d] Target[%d] %#lx " + "!= %#x?\n", + my_pe,k,Target[k],VAL); + failures++; + } + assert(Target[k] == VAL); + Target[k] = my_pe; + } + } + else /* clear results buffer, workers will put here */ + memset(work, 0, work_sz); + + shmem_barrier(0, 0, nProcs, pSync2); + + RDprintf("Workers[1 ... %d] put Target data to PE0 work " + "vector\n",nWorkers); + + if ( my_pe != 0 ) { + /* push nWords of val my_pe back to PE zero */ + shmem_long_put(&work[my_pe * nWords], Target, nWords, 0); + } + else { + /* wait for procs 1 ... nProcs to complete put()s */ + for(l=1; l < nProcs; l++) { + wp = &work[ l*nWords ]; // procs nWords chunk +#if 1 + /* wait for last long to be written from each PE */ + shmem_long_wait_until(&wp[nWords-1], SHMEM_CMP_NE, 0); +#else + for(k=0; k < nWords; k++) + shmem_long_wait_until(&wp[k], SHMEM_CMP_NE, 0); +#endif + } + } + + shmem_barrier(0, 0, nProcs, pSync3); + + if ( my_pe == 0 ) { + RDprintf("Loop(%d) PE0 verifing work data.\n",j); + for(l=1; l < nProcs; l++) { + wp = &work[ l*nWords ]; // procs nWords chunk + for(k=0; k < nWords; k++) { + if (wp[k] != l) { + fprintf(stderr, + "[0] PE(%d)_work[%d] %ld " + "!= %d?\n", + l,k,work[k],l); + failures++; + } + assert(wp[k] == l); + break; + } + if (failures) + break; + } + } + shmem_barrier(0, 0, nProcs, pSync4); +#if _DEBUG + if (loops > 1) { + Rfprintf(stderr,"."); + RDprintf("Loop(%d) Pass.\n",j); + } +#endif + } + + shmem_free( work ); + shmem_free( Target ); + +#if _DEBUG + Rfprintf(stderr,"\n");fflush(stderr); + shmem_barrier_all(); + RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); +#endif + + shmem_finalize(); + + return failures; +} + + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} diff --git a/test/unit/pthread_barrier.h b/test/unit/pthread_barrier.h new file mode 100644 index 0000000..6e95f48 --- /dev/null +++ b/test/unit/pthread_barrier.h @@ -0,0 +1,71 @@ +/* Apple currently doesn't provide a pthread_barrier. This file implements the + * functions needed for threaded SHMEM tests using the other parts of the + * PThread API. */ +#ifdef __APPLE__ + +#ifndef PTHREAD_BARRIER_H_ +#define PTHREAD_BARRIER_H_ + +#include +#include + +#define PTHREAD_BARRIER_SERIAL_THREAD -1 + +typedef int pthread_barrierattr_t; + +typedef struct { + int cur_count; + int count; + pthread_cond_t cond; + pthread_mutex_t mutex; +} pthread_barrier_t; + + +static int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count) { + int err; + + err = pthread_mutex_init(&barrier->mutex, 0); + if (err) return err; + err = pthread_cond_init(&barrier->cond, 0); + if (err) { + pthread_mutex_destroy(&barrier->mutex); + return err; + } + + barrier->cur_count = barrier->count = count; + + return 0; +} + + +static int pthread_barrier_destroy(pthread_barrier_t *barrier) { + pthread_cond_destroy(&barrier->cond); + pthread_mutex_destroy(&barrier->mutex); + return 0; +} + + +static int pthread_barrier_wait(pthread_barrier_t *barrier) { + int ret = 0; + + pthread_mutex_lock(&barrier->mutex); + + if (barrier->cur_count <= 0) return EINVAL; + else { + barrier->cur_count--; + + if (barrier->cur_count == 0) { + ret = PTHREAD_BARRIER_SERIAL_THREAD; + barrier->cur_count = barrier->count; + pthread_cond_broadcast(&barrier->cond); + } else { + pthread_cond_wait(&barrier->cond, &barrier->mutex); + } + } + + pthread_mutex_unlock(&barrier->mutex); + return ret; +} + +#endif /* PTHREAD_BARRIER_H_ */ +#endif /* __APPLE__ */ diff --git a/test/unit/put1.c b/test/unit/put1.c new file mode 100644 index 0000000..ad895b5 --- /dev/null +++ b/test/unit/put1.c @@ -0,0 +1,72 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static long target[10]; + + shmem_init(); + + if (shmem_n_pes() == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + if (shmem_my_pe() == 0) { + /* put 10 elements into target on PE 1 */ + shmem_long_put(target, source, 10, 1); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (shmem_my_pe() == 1) { + if (0 != memcmp(source, target, sizeof(long) * 10)) { + int i; + fprintf(stderr,"[%d] Src & Target mismatch?\n",shmem_my_pe()); + for (i = 0 ; i < 10 ; ++i) { + printf("%ld,%ld ", source[i], target[i]); + } + printf("\n"); + shmem_global_exit(1); + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/put_nbi.c b/test/unit/put_nbi.c new file mode 100644 index 0000000..69459dd --- /dev/null +++ b/test/unit/put_nbi.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016 Intel COrporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modfiication, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Non-Blocking Put Test + * Tom St. John + * January, 2016 + * + * PE 0 uses non-blocking put to write a message followed by a + * notification flag to every remote PE, + */ + +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + long *target; + int *flag; + int i, num_pes; + int failed = 0; + + shmem_init(); + + target = (long*) shmem_malloc(sizeof(long) * 10); + flag = (int*) shmem_malloc(sizeof(int)); + if (!flag) { + fprintf(stderr, "ERR - null flag pointer\n"); + shmem_global_exit(1); + } + *flag = 0; + + num_pes=shmem_n_pes(); + + if (target) { + memset(target, 0, sizeof(long)*10); + } else { + fprintf(stderr, "ERR - null target pointer\n"); + shmem_global_exit(1); + } + + shmem_barrier_all(); + + if (shmem_my_pe() == 0) { + for(i = 0; i < num_pes; i++) { + shmem_long_put_nbi(target, source, 10, i); + shmem_fence(); + shmem_int_atomic_inc(flag, i); + } + } + + shmem_int_wait_until(flag, SHMEM_CMP_EQ, 1); + + for (i = 0; i < 10; i++) { + if (target[i] != source[i]) { + fprintf(stderr,"[%d] target[%d] = %ld, expected %ld\n", + shmem_my_pe(), i, target[i], source[i]); + failed = 1; + } + } + + shmem_free(target); + shmem_free(flag); + + shmem_finalize(); + + return failed; +} diff --git a/test/unit/reduce_active_set.c b/test/unit/reduce_active_set.c new file mode 100644 index 0000000..b307342 --- /dev/null +++ b/test/unit/reduce_active_set.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#define NELEM 10 + +long max_psync[SHMEM_REDUCE_SYNC_SIZE]; +long min_psync[SHMEM_REDUCE_SYNC_SIZE]; + +long min_pwrk[NELEM/2 + SHMEM_REDUCE_MIN_WRKDATA_SIZE]; +long max_pwrk[NELEM/2 + SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + +long src[NELEM]; +long dst_max[NELEM]; +long dst_min[NELEM]; + +int main(void) +{ + int i, me, npes; + int errors = 0; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < NELEM; i++) { + src[i] = me; + dst_max[i] = -1; + dst_min[i] = -1; + } + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { + max_psync[i] = SHMEM_SYNC_VALUE; + max_psync[i] = SHMEM_SYNC_VALUE; + } + + if (me == 0) + printf("Shrinking active set test\n"); + + shmem_barrier_all(); + + /* A total of npes tests are performed, where the active set in each test + * includes PEs i..npes-1 */ + for (i = 0; i <= me; i++) { + int j; + + if (me == i) + printf(" + PE_start=%d, logPE_stride=0, PE_size=%d\n", i, npes-i); + + shmem_long_max_to_all(dst_max, src, NELEM, i, 0, npes-i, max_pwrk, max_psync); + + /* Validate reduced data */ + for (j = 0; j < NELEM; j++) { + long expected = npes-1; + if (dst_max[j] != expected) { + printf("%d: Max expected dst_max[%d] = %ld, got dst_max[%d] = %ld, iteration %d\n", + me, j, expected, j, dst_max[j], i); + errors++; + } + } + + shmem_long_min_to_all(dst_min, src, NELEM, i, 0, npes-i, min_pwrk, min_psync); + + /* Validate reduced data */ + for (j = 0; j < NELEM; j++) { + long expected = i; + if (dst_min[j] != expected) { + printf("%d: Min expected dst_min[%d] = %ld, got dst_min[%d] = %ld, iteration %d\n", + me, j, expected, j, dst_min[j], i); + errors++; + } + } + + } + + shmem_finalize(); + + return errors != 0; +} diff --git a/test/unit/repeated_barriers.c b/test/unit/repeated_barriers.c new file mode 100644 index 0000000..b15b265 --- /dev/null +++ b/test/unit/repeated_barriers.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#define NREPS 50 + +long barrier_psync0[SHMEM_BARRIER_SYNC_SIZE]; +long barrier_psync1[SHMEM_BARRIER_SYNC_SIZE]; + +int main(void) +{ + int i, me, npes; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) { + barrier_psync0[i] = SHMEM_SYNC_VALUE; + barrier_psync1[i] = SHMEM_SYNC_VALUE; + } + + shmem_barrier_all(); + + /* A total of npes tests are performed, where the active set in each test + * includes PEs i..npes-1 */ + for (i = 0; i <= me; i++) { + int j; + + if (me == i) + printf(" + iteration %d\n", i); + + /* Test that barrier can be called repeatedly with the *same* pSync */ + for (j = 0; j < NREPS; j++) + shmem_barrier(i, 0, npes-i, (i % 2) ? barrier_psync0 : barrier_psync1); + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/repeated_syncs.c b/test/unit/repeated_syncs.c new file mode 100644 index 0000000..9093701 --- /dev/null +++ b/test/unit/repeated_syncs.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#define NREPS 50 + +long sync_psync0[SHMEM_BARRIER_SYNC_SIZE]; +long sync_psync1[SHMEM_BARRIER_SYNC_SIZE]; + +int main(void) +{ + int i, me, npes; + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) { + sync_psync0[i] = SHMEM_SYNC_VALUE; + sync_psync1[i] = SHMEM_SYNC_VALUE; + } + + shmem_sync_all(); + + /* A total of npes tests are performed, where the active set in each test + * includes PEs i..npes-1 */ + for (i = 0; i <= me; i++) { + int j; + + if (me == i) + printf(" + iteration %d\n", i); + + /* Test that sync can be called repeatedly with the *same* pSync */ + for (j = 0; j < NREPS; j++) + shmem_sync(i, 0, npes-i, (i % 2) ? sync_psync0 : sync_psync1); + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/rma_coverage.c b/test/unit/rma_coverage.c new file mode 100644 index 0000000..f60b2fe --- /dev/null +++ b/test/unit/rma_coverage.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#ifdef TEST_PSHMEM +#include +#define SHFN(fn) pshmem_##fn +#else +#define SHFN(fn) shmem_##fn +#endif + +#define CHUNK_SIZE 10 +#define ARR_SIZE (4*(CHUNK_SIZE)) + +#define EVAL_MACRO_FOR_RMA(DECL,END) \ + DECL(float, float) END \ + DECL(double, double) END \ + DECL(longdouble, long double) END \ + DECL(char, char) END \ + DECL(short, short) END \ + DECL(int, int) END \ + DECL(long, long) END \ + DECL(longlong, long long) END + +#define DECLARE_TEST(TYPENAME,TYPE) \ + TYPE TYPENAME##_shared[ARR_SIZE]; \ + \ + static int TYPENAME##_rmaTest(int target_pe, int verbose) { \ + TYPE* shared = TYPENAME##_shared; \ + TYPE myvals[ARR_SIZE]; \ + TYPE result[ARR_SIZE]; \ + size_t i; \ + \ + for(i = 0; i < ARR_SIZE; ++i) { \ + myvals[i] = (TYPE)rand(); \ + } \ + \ + SHFN(TYPENAME##_put)(shared, myvals, CHUNK_SIZE, target_pe); \ + for(i = 0; i < CHUNK_SIZE; ++i) { \ + SHFN(TYPENAME##_p)(&shared[CHUNK_SIZE+i], \ + myvals[CHUNK_SIZE+i], target_pe); \ + } \ + SHFN(TYPENAME##_iput)(shared+2*CHUNK_SIZE,myvals+2*CHUNK_SIZE, \ + 1, 2, CHUNK_SIZE/2, target_pe); \ + SHFN(TYPENAME##_iput)(shared+2*CHUNK_SIZE+CHUNK_SIZE/2, \ + myvals+2*CHUNK_SIZE+1, 1, 2, CHUNK_SIZE/2, target_pe); \ + SHFN(TYPENAME##_put_nbi)(shared+3*CHUNK_SIZE, \ + myvals+3*CHUNK_SIZE, CHUNK_SIZE, target_pe); \ + \ + SHFN(quiet)(); \ + SHFN(barrier_all)(); \ + \ + SHFN(TYPENAME##_get)(result,shared,CHUNK_SIZE,target_pe); \ + for(i = 0; i < CHUNK_SIZE; ++i) { \ + result[CHUNK_SIZE+i] = SHFN(TYPENAME##_g)( \ + &shared[CHUNK_SIZE+i], target_pe); \ + } \ + SHFN(TYPENAME##_iget)(result+2*CHUNK_SIZE, shared+2*CHUNK_SIZE, \ + 2, 1, CHUNK_SIZE/2, target_pe); \ + SHFN(TYPENAME##_iget)(result+2*CHUNK_SIZE+1, \ + shared+2*CHUNK_SIZE+CHUNK_SIZE/2, 2, 1, CHUNK_SIZE/2, \ + target_pe); \ + SHFN(TYPENAME##_get_nbi)(result+3*CHUNK_SIZE, \ + shared+3*CHUNK_SIZE, CHUNK_SIZE, target_pe); \ + \ + SHFN(quiet)(); \ + SHFN(barrier_all)(); \ + int ret = 0; \ + for(i = 0; i < ARR_SIZE; ++i) { \ + if(result[i] != myvals[i]) { \ + ++ret; \ + if(verbose) { \ + fprintf(stderr,"result[%lu] != myvals[%lu]", i, i); \ + } \ + } \ + } \ + if(verbose) { \ + fprintf(stderr,"%s (type '%s') %s: %d\n",#TYPENAME, \ + #TYPE,ret ? "Failed" : "Succeeded",ret); \ + } \ + return ret; \ + } + +EVAL_MACRO_FOR_RMA(DECLARE_TEST,) + +int main(int argc, char* argv[]) { + int verbose = 0; + if(argc > 1) { + verbose = !strcmp("-v",argv[1]); + } + + int errors = 0; + + int me, myshmem_n_pes; + SHFN(init)(); + myshmem_n_pes = SHFN(n_pes)(); + me = SHFN(my_pe)(); + + srand(1+me); + + int nextpe = (me+1)%myshmem_n_pes; + +#define RUN_TEST(TYPENAME,TYPE) do { \ + errors += (TYPENAME##_rmaTest(nextpe,verbose)); \ + } while(0) + + EVAL_MACRO_FOR_RMA(RUN_TEST,;) + + SHFN(finalize)(); + + return errors; +} diff --git a/test/unit/self_collectives.c b/test/unit/self_collectives.c new file mode 100644 index 0000000..5fb09f7 --- /dev/null +++ b/test/unit/self_collectives.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +long bcast_psync[SHMEM_BCAST_SYNC_SIZE]; +long collect_psync[SHMEM_COLLECT_SYNC_SIZE]; +long reduce_psync[SHMEM_REDUCE_SYNC_SIZE]; +long alltoall_psync[SHMEM_ALLTOALL_SYNC_SIZE]; +long alltoalls_psync[SHMEM_ALLTOALLS_SYNC_SIZE]; + +int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + +#define CHECK(func, in, out) \ + do { \ + if (out != in) { \ + printf("[%02d] Error: %s expected=%ld out=%ld\n", me, func, \ + (long) in, (long) out); \ + ++errors; \ + } \ + } while (0) + +int in, out; +int32_t in_32, out_32; +int64_t in_64, out_64; + +int main(void) { + int i, errors = 0; + int me; + + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) + bcast_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) + collect_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + reduce_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_ALLTOALL_SYNC_SIZE; i++) + alltoall_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_ALLTOALLS_SYNC_SIZE; i++) + alltoalls_psync[i] = SHMEM_SYNC_VALUE; + + for (i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) + pwrk[i] = 0; + + shmem_init(); + + me = shmem_my_pe(); + + if (me == 0) printf("Testing single PE active set collectives\n"); + + /* Broadcast */ + /* Note: Broadcast does not modify the output buffer at the root */ + if (me == 0) printf(" + broadcast\n"); + + in_32 = me; out_32 = -1; + shmem_broadcast32(&in_32, &out_32, 1, 0, me, 0, 1, bcast_psync); + CHECK("shmem_broadcast32", -1, out_32); + shmem_barrier_all(); + + in_64 = me; out_64 = -1; + shmem_broadcast64(&in_64, &out_64, 1, 0, me, 0, 1, bcast_psync); + CHECK("shmem_broadcast64", -1, out_64); + shmem_barrier_all(); + + /* Collect */ + if (me == 0) printf(" + collect\n"); + + in_32 = me; out_32 = -1; + shmem_fcollect32(&in_32, &out_32, 1, me, 0, 1, collect_psync); + CHECK("shmem_fcollect32", in_32, out_32); + shmem_barrier_all(); + + in_64 = me; out_64 = -1; + shmem_fcollect64(&in_64, &out_64, 1, me, 0, 1, collect_psync); + CHECK("shmem_fcollect64", in_64, out_64); + shmem_barrier_all(); + + in_32 = me; out_32 = -1; + shmem_collect32(&in_32, &out_32, 1, me, 0, 1, collect_psync); + CHECK("shmem_collect32", in_32, out_32); + shmem_barrier_all(); + + in_64 = me; out_64 = -1; + shmem_collect64(&in_64, &out_64, 1, me, 0, 1, collect_psync); + CHECK("shmem_collect64", in_64, out_64); + shmem_barrier_all(); + + /* Reduction */ + if (me == 0) printf(" + reduction\n"); + + in = me; out = -1; + shmem_int_and_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_and_to_all", in, out); + shmem_barrier_all(); + + in = me; out = -1; + shmem_int_or_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_or_to_all", in, out); + shmem_barrier_all(); + + in = me; out = -1; + shmem_int_xor_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_xor_to_all", in, out); + shmem_barrier_all(); + + in = me; out = -1; + shmem_int_min_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_min_to_all", in, out); + shmem_barrier_all(); + + in = me; out = -1; + shmem_int_max_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_max_to_all", in, out); + shmem_barrier_all(); + + in = me; out = -1; + shmem_int_sum_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_sum_to_all", in, out); + shmem_barrier_all(); + + in = me; out = -1; + shmem_int_prod_to_all(&in, &out, 1, me, 0, 1, pwrk, reduce_psync); + CHECK("shmem_int_prod_to_all", in, out); + shmem_barrier_all(); + + /* All-to-all */ + if (me == 0) printf(" + all-to-all\n"); + + in_32 = me; out_32 = -1; + shmem_alltoall32(&in_32, &out_32, 1, me, 0, 1, alltoall_psync); + CHECK("shmem_alltoall32", in_32, out_32); + shmem_barrier_all(); + + in_64 = me; out_64 = -1; + shmem_alltoall64(&in_64, &out_64, 1, me, 0, 1, alltoall_psync); + CHECK("shmem_alltoall64", in_64, out_64); + shmem_barrier_all(); + + in_32 = me; out_32 = -1; + shmem_alltoalls32(&in_32, &out_32, 1, 1, 1, me, 0, 1, alltoalls_psync); + CHECK("shmem_alltoalls32", in_32, out_32); + shmem_barrier_all(); + + in_64 = me; out_64 = -1; + shmem_alltoalls64(&in_64, &out_64, 1, 1, 1, me, 0, 1, alltoalls_psync); + CHECK("shmem_alltoalls64", in_64, out_64); + shmem_barrier_all(); + + if (me == 0) printf("Done\n"); + + shmem_finalize(); + + return errors; +} diff --git a/test/unit/set_fetch.c b/test/unit/set_fetch.c new file mode 100644 index 0000000..97123bb --- /dev/null +++ b/test/unit/set_fetch.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +/* Synopsis: Use atomic fetch to spin on var until it changes, then release + * neighbor process. + */ + +#include +#include + +int var_int = -1; +long var_long = -1; +long long var_longlong = -1; +float var_float = -1; +double var_double = -1; + +#define SPIN_TEST(typename, fmt) \ +do { \ + printf("%d: Entering %s test\n", me, #typename); \ + if (me == 0) \ + shmem_##typename##_atomic_set(&var_##typename, nproc-1, me); \ + \ + while (0 > shmem_##typename##_atomic_fetch(&var_##typename, me)) ; \ + \ + shmem_##typename##_atomic_set(&var_##typename, me, (me+1) % nproc); \ + \ + if (var_##typename - ((me + (nproc-1)) % nproc) > 0.01) { \ + printf("[%d] Type '%s' expected %d, got " fmt "\n", me, \ + #typename, me + (nproc-1) % nproc, var_##typename); \ + shmem_global_exit(1); \ + } \ + printf("%d: Finished %s test\n", me, #typename);\ +} while (0) + +int main(int argc, char **argv) { + int me, nproc; + + shmem_init(); + + me = shmem_my_pe(); + nproc = shmem_n_pes(); + + SPIN_TEST(int, "%d"); + SPIN_TEST(long, "%ld"); + SPIN_TEST(longlong, "%lld"); + SPIN_TEST(float, "%f"); + SPIN_TEST(double, "%lf"); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/set_fetch_f.f90 b/test/unit/set_fetch_f.f90 new file mode 100644 index 0000000..5ad8117 --- /dev/null +++ b/test/unit/set_fetch_f.f90 @@ -0,0 +1,101 @@ +! +! Copyright 2011 Sandia Corporation. Under the terms of Contract +! DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government +! retains certain rights in this software. +! +! Copyright (c) 2017 Intel Corporation. All rights reserved. +! This software is available to you under the BSD license below: +! +! Redistribution and use in source and binary forms, with or +! without modification, are permitted provided that the following +! conditions are met: +! +! - Redistributions of source code must retain the above +! copyright notice, this list of conditions and the following +! disclaimer. +! +! - Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials +! provided with the distribution. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +! NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +! BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +! ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. +! + +program set_fetch + implicit none + include 'shmem.fh' + + integer*4 :: var_i4, val_i4 + integer*8 :: var_i8, val_i8 + real*4 :: var_r4, val_r4 + real*8 :: var_r8, val_r8 + + integer npes, me + + common /symmetricvars/ var_i4, var_r4, var_i8, var_r8 + + integer shmem_my_pe, shmem_n_pes + integer*4 shmem_int4_fetch + integer*8 shmem_int8_fetch + real*4 shmem_real4_fetch + real*8 shmem_real8_fetch + + call shmem_init() + + me = shmem_my_pe() + npes = shmem_n_pes() + + var_i4 = 0 + var_i8 = 0 + var_r4 = 0.0 + var_r8 = 0.0 + + val_i4 = me + 1 + val_i8 = me + 1 + val_r4 = me + 1.0 + val_r8 = me + 1.0 + + call shmem_barrier_all() + + call shmem_int4_set(var_i4, val_i4, MOD(me+1, npes)) + call shmem_int8_set(var_i8, val_i8, MOD(me+1, npes)) + call shmem_real4_set(var_r4, val_r4, MOD(me+1, npes)) + call shmem_real8_set(var_r8, val_r8, MOD(me+1, npes)) + + call shmem_barrier_all() + + val_i4 = shmem_int4_fetch(var_i4, MOD(me+1, npes)) + val_i8 = shmem_int8_fetch(var_i8, MOD(me+1, npes)) + val_r4 = shmem_real4_fetch(var_r4, MOD(me+1, npes)) + val_r8 = shmem_real8_fetch(var_r8, MOD(me+1, npes)) + + if (val_i4 .ne. me + 1) then + write (*,*) "PE ", me, " int4 test failed: ", val_i4 + call shmem_global_exit(1) + endif + + if (val_i8 .ne. me + 1) then + write (*,*) "PE ", me, " int8 test failed: ", val_i8 + call shmem_global_exit(2) + endif + + if (val_r4 - (me + 1) .gt. epsilon(val_r4)) then + write (*,*) "PE ", me, " real4 test failed: ", val_r4 + call shmem_global_exit(3) + endif + + if (val_r8 - (me + 1) .gt. epsilon(val_r8)) then + write (*,*) "PE ", me, " real8 test failed: ", val_r8 + call shmem_global_exit(4) + endif + + call shmem_finalize() +end program set_fetch diff --git a/test/unit/set_lock.c b/test/unit/set_lock.c new file mode 100644 index 0000000..1a9e731 --- /dev/null +++ b/test/unit/set_lock.c @@ -0,0 +1,136 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * test shmem_set_lock() set_lock {-v|q} {loop-cnt} + * where: -q == quiet, -v == verbose/debug + * Loop for loop-cnt + * all pes call shmem_set_lock(), in fifo order a pe will return from + * shmem_set_lock() holding the lock and increment the global lock count. + * + */ + +#include +#include +#include +#include +#include + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf +#define RDfprintf if (Verbose && shmem_my_pe() == 0) fprintf +#define RDprintf if (Verbose && shmem_my_pe() == 0) printf +#define Vprintf if (Verbose) printf +#define Vfprintf if (Verbose) fprintf + +int Verbose; +int lock_cnt; +long lock; + +int +main(int argc, char* argv[]) +{ + int pe, c, cloop, loops; + int my_rank, num_ranks; + int Announce = (NULL == getenv("MAKELEVEL")) ? 1 : 0; + + shmem_init(); + my_rank = shmem_my_pe(); + num_ranks = shmem_n_pes(); + if (num_ranks == 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + while((c=getopt(argc,argv,"vq")) != -1) { + switch(c) { + case 'v': + Verbose++; + break; + case 'q': + Announce = 0; + break; + default: + Rfprintf(stderr,"ERR - unknown -%c ?\n",c); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = 40; + else { + loops = atoi(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + + for(cloop=1; cloop <= loops; cloop++) { + + lock_cnt = 0; + lock = 0; + + shmem_barrier_all(); /* sync all ranks */ + + shmem_set_lock(&lock); + + for(pe=0; pe < num_ranks; pe++) { + shmem_int_atomic_add( &lock_cnt, 1, pe ); + } + if (Verbose) + printf("[%d] locked: lock_cnt(%d)\n", my_rank, lock_cnt); + + shmem_clear_lock( &lock ); + + shmem_int_wait_until( &lock_cnt, SHMEM_CMP_GE, num_ranks ); + + shmem_barrier_all(); /* sync all ranks */ + + if ( (cloop % 10) == 0 ) { + if (my_rank == 0 && Announce) + printf("%d ranks completed %d loops\n", num_ranks, cloop); + } + + if (lock_cnt != num_ranks) + printf ("[%d] loop %d: bad lock_cnt %d, expected %d?\n", + my_rank, cloop, lock_cnt, num_ranks); + } + Vprintf ("[%d] of %d, Exit: lock_cnt %d\n", my_rank, num_ranks, lock_cnt); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/shmalloc.c b/test/unit/shmalloc.c new file mode 100644 index 0000000..4e059c5 --- /dev/null +++ b/test/unit/shmalloc.c @@ -0,0 +1,264 @@ +/* :vim:sw=4:ts=4: */ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * usage: shmalloc [-p] [nWords] [loops] [incWords-per-loop] + * where: -p == power-of-two allocation bump per loop + * [nWords] # of longs to shmem_malloc()\n" + * [loops(1)] # of loops\n" + * [incWords(2)] nWords += incWords per loop\n"); + * Loop: + * PE* shmem_malloc(nWords) + * set *DataType = 1 + * PE* shmem_malloc(nWords) + * set *DataType = 2 + * PE* shmem_malloc(nWords) + * set *DataType = 3 + * + * for(1...3) allocated ranges + * verify + * shmem_free() + * end-loop + */ + +#include +#include +#include +#include +#include + +#include + +#define DFLT_NWORDS 32 +#define DFLT_INCR 1025 +#define DFLT_LOOPS 50 + +#define DataType long + +static DataType *source; +static DataType *target; +static DataType *result; + +static int source_sz; +static int target_sz; +static int result_sz; + +static char *pgm; + +void usage (void); +int getSize (char *); + +void +usage (void) +{ + if (shmem_my_pe() == 0 ) { + fprintf (stderr, + "Usage: %s [-p] [nWords(%d)] [loops(%d)] [incWords(%d)]\n", + pgm, DFLT_NWORDS, DFLT_LOOPS, DFLT_INCR); + fprintf (stderr, + " -p == (2**0 ... 2**22) shmem_malloc(), other args ignored\n" + " -v == Verbose output\n" + " [nWords] # of longs to shmem_malloc()\n" + " [loops] # of loops\n" + " [incWords] nWords += incWords per loop\n"); + } + shmem_finalize(); + exit (1); +} + +int +getSize (char *str) +{ + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[mMkK]", &size, mod)) + { + case 1: + return (size); + + case 2: + switch (*mod) + { + case 'm': + case 'M': + return (size << 20); + + case 'k': + case 'K': + return (size << 10); + + default: + return (size); + } + + default: + return (-1); + } +} + +int +main(int argc, char **argv) +{ + int me, nProcs, c, l; + int nWords, loops, incWords; + int Verbose = 0, power2 = 0, modulo = 5; + DataType *dp; + + pgm = strrchr(argv[0],'/'); + if ( pgm ) + pgm++; + else + pgm = argv[0]; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + while ((c = getopt (argc, argv, "hpv")) != -1) + switch (c) + { + case 'p': + power2++; + break; + case 'v': + Verbose++; + break; + case 'h': + default: + usage(); + break; + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else if ((nWords = getSize (argv[optind++])) <= 0) + usage (); + + if (optind == argc) + loops = DFLT_LOOPS; + else if ((loops = getSize (argv[optind++])) < 0) + usage (); + + if (optind == argc) + incWords = DFLT_INCR; + else if ((incWords = getSize (argv[optind++])) < 0) + usage (); + + if (power2) { + nWords = 1; + modulo = 1; + loops = 21; + } + + if (Verbose && me == 0) { + if (power2) { + printf("%s: nWords(1) << 1 per loop.\n", pgm); + } + else + printf("%s: nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n", + pgm, nWords, loops, incWords); + } + + for(l=0; l < loops; l++) { + + result_sz = (nProcs-1) * (nWords * sizeof(DataType)); + result = (DataType *)shmem_malloc(result_sz); + if (! result) + { + perror ("Failed result memory allocation"); + shmem_finalize(); + exit (1); + } + for(dp=result; dp < &result[(result_sz/sizeof(DataType))];) + *dp++ = 1; + + + target_sz = nWords * sizeof(DataType); + if (!(target = (DataType *)shmem_malloc(target_sz))) + { + perror ("Failed target memory allocation"); + shmem_finalize(); + exit (1); + } + for(dp=target; dp < &target[(target_sz / sizeof(DataType))];) + *dp++ = 2; + + source_sz = 2 * nWords * sizeof(DataType); + if (!(source = (DataType *)shmem_malloc(source_sz))) + { + perror ("Failed source memory allocation"); + shmem_finalize(); + exit (1); + } + for(dp=source; dp < &source[(source_sz / sizeof(DataType))];) + *dp++ = 3; + + shmem_barrier_all(); /* sync sender and receiver */ + + for(dp=source; dp < &source[(source_sz / sizeof(DataType))]; dp++) + if (*dp != 3 ) { + printf("source not consistent @ 3?\n"); + break; + } + shmem_free(source); + + for(dp=target; dp < &target[(target_sz / sizeof(DataType))]; dp++) + if (*dp != 2 ) { + printf("target not consistent @ 2?\n"); + break; + } + shmem_free(target); + + for(dp=result; dp < &result[(result_sz / sizeof(DataType))]; dp++) + if (*dp != 1 ) { + printf("result not consistent @ 1?\n"); + break; + } + shmem_free(result); + + if (loops > 1) { + if (Verbose && me == 0) { + if (l == 0 || (l % modulo == 0)) + printf("End loop %3d nWords(%d)\n",(l+1),nWords); + } + if (power2) + nWords <<= 1; + else + nWords += incWords; // watch for double inc. + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/shmem_calloc.c b/test/unit/shmem_calloc.c new file mode 100644 index 0000000..5cfca33 --- /dev/null +++ b/test/unit/shmem_calloc.c @@ -0,0 +1,264 @@ +/* :vim:sw=4:ts=4: */ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * usage: shmem_calloc [-p] [nWords] [loops] [incWords-per-loop] + * where: -p == power-of-two allocation bump per loop + * [nWords] # of longs to shmem_calloc()\n" + * [loops(1)] # of loops\n" + * [incWords(2)] nWords += incWords per loop\n"); + * Loop: + * PE* shmem_calloc(nWords) + * set *DataType = 1 + * PE* shmem_calloc(nWords) + * set *DataType = 2 + * PE* shmem_calloc(nWords) + * set *DataType = 3 + * + * for(1...3) allocated ranges + * verify + * shmem_free() + * end-loop + */ + +#include +#include +#include +#include +#include + +#include + +#define DFLT_NWORDS 32 +#define DFLT_INCR 1025 +#define DFLT_LOOPS 50 + +#define DataType long + +static DataType *source; +static DataType *target; +static DataType *result; + +static int source_sz; +static int target_sz; +static int result_sz; + +static char *pgm; + +void usage (void); +int getSize (char *); + +void +usage (void) +{ + if (shmem_my_pe() == 0 ) { + fprintf (stderr, + "Usage: %s [-p] [nWords(%d)] [loops(%d)] [incWords(%d)]\n", + pgm, DFLT_NWORDS, DFLT_LOOPS, DFLT_INCR); + fprintf (stderr, + " -p == (2**0 ... 2**22) shmem_calloc(), other args ignored\n" + " -v == Verbose output\n" + " [nWords] # of longs to shmem_calloc()\n" + " [loops] # of loops\n" + " [incWords] nWords += incWords per loop\n"); + } + shmem_finalize(); + exit (1); +} + +int +getSize (char *str) +{ + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[mMkK]", &size, mod)) + { + case 1: + return (size); + + case 2: + switch (*mod) + { + case 'm': + case 'M': + return (size << 20); + + case 'k': + case 'K': + return (size << 10); + + default: + return (size); + } + + default: + return (-1); + } +} + +int +main(int argc, char **argv) +{ + int me, nProcs, c, l; + int nWords, loops, incWords; + int Verbose = 0, power2 = 0, modulo = 5; + DataType *dp; + + pgm = strrchr(argv[0],'/'); + if ( pgm ) + pgm++; + else + pgm = argv[0]; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + while ((c = getopt (argc, argv, "hpv")) != -1) + switch (c) + { + case 'p': + power2++; + break; + case 'v': + Verbose++; + break; + case 'h': + default: + usage(); + break; + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else if ((nWords = getSize (argv[optind++])) <= 0) + usage (); + + if (optind == argc) + loops = DFLT_LOOPS; + else if ((loops = getSize (argv[optind++])) < 0) + usage (); + + if (optind == argc) + incWords = DFLT_INCR; + else if ((incWords = getSize (argv[optind++])) < 0) + usage (); + + if (power2) { + nWords = 1; + modulo = 1; + loops = 21; + } + + if (Verbose && me == 0) { + if (power2) { + printf("%s: nWords(1) << 1 per loop.\n", pgm); + } + else + printf("%s: nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n", + pgm, nWords, loops, incWords); + } + + for(l=0; l < loops; l++) { + + result_sz = (nProcs-1) * (nWords * sizeof(DataType)); + result = (DataType *)shmem_calloc((nProcs-1)*nWords, sizeof(DataType)); + if (! result) + { + perror ("Failed result memory allocation"); + shmem_finalize(); + exit (1); + } + for(dp=result; dp < &result[(result_sz/sizeof(DataType))];) + *dp++ += 1; + + + target_sz = nWords * sizeof(DataType); + if (!(target = (DataType *)shmem_calloc(nWords, sizeof(DataType)))) + { + perror ("Failed target memory allocation"); + shmem_finalize(); + exit (1); + } + for(dp=target; dp < &target[(target_sz / sizeof(DataType))];) + *dp++ += 2; + + source_sz = 2 * nWords * sizeof(DataType); + if (!(source = (DataType *)shmem_calloc(2*nWords, sizeof(DataType)))) + { + perror ("Failed source memory allocation"); + shmem_finalize(); + exit (1); + } + for(dp=source; dp < &source[(source_sz / sizeof(DataType))];) + *dp++ += 3; + + shmem_barrier_all(); /* sync sender and receiver */ + + for(dp=source; dp < &source[(source_sz / sizeof(DataType))]; dp++) + if (*dp != 3 ) { + printf("source not consistent @ 3?\n"); + break; + } + shmem_free(source); + + for(dp=target; dp < &target[(target_sz / sizeof(DataType))]; dp++) + if (*dp != 2 ) { + printf("target not consistent @ 2?\n"); + break; + } + shmem_free(target); + + for(dp=result; dp < &result[(result_sz / sizeof(DataType))]; dp++) + if (*dp != 1 ) { + printf("result not consistent @ 1?\n"); + break; + } + shmem_free(result); + + if (loops > 1) { + if (Verbose && me == 0) { + if (l == 0 || (l % modulo == 0)) + printf("End loop %3d nWords(%d)\n",(l+1),nWords); + } + if (power2) + nWords <<= 1; + else + nWords += incWords; // watch for double inc. + } + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/shmem_ct.c b/test/unit/shmem_ct.c new file mode 100644 index 0000000..53379d9 --- /dev/null +++ b/test/unit/shmem_ct.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +int data; + +int main(int argc, char **argv) { + int me, npes, i; + int mine, ct_val; + shmemx_ct_t ct; + + shmem_init(); + me = mine = shmem_my_pe(); + npes = shmem_n_pes(); + + shmemx_ct_create(&ct); + ct_val = shmemx_ct_get(ct); + + if (ct_val != 0) { + printf("[%3d] Error: Initial value of ct was nonzero (%d)\n", me, ct_val); + shmem_global_exit(1); + } + + /* TEST 1: Everyone puts to rank 0 */ + shmem_barrier_all(); + shmemx_putmem_ct(ct, &data, &mine, sizeof(int), 0); + + if (me == 0) { + shmemx_ct_wait(ct, npes); + } + shmem_barrier_all(); + + /* Reset the counter */ + shmemx_ct_set(ct, 0); + ct_val = shmemx_ct_get(ct); + if (ct_val != 0) { + printf("[%3d] Error: Reset value of ct was nonzero (%d)\n", me, ct_val); + shmem_global_exit(1); + } + + /* TEST 1.5: Everyone gets from rank 0 */ + data = -1; + shmem_barrier_all(); + shmemx_getmem_ct(ct, &mine, &data, sizeof(int), 0); + + if (me == 0) { + shmemx_ct_wait(ct, npes); + } + shmem_barrier_all(); + + if (mine != -1) { + printf("[%3d] Error: Getmem_ct value read was incorrect (%d)\n", me, mine); + shmem_global_exit(1); + } + + /* Reset the counter */ + shmemx_ct_set(ct, 0); + ct_val = shmemx_ct_get(ct); + if (ct_val != 0) { + printf("[%3d] Error: Reset value of ct was nonzero (%d)\n", me, ct_val); + shmem_global_exit(1); + } + + /* TEST 2: Everyone puts to every other rank, but not themselves */ + shmem_barrier_all(); + + for (i = 1; i < npes; i++) + shmemx_putmem_ct(ct, &data, &mine, sizeof(int), (me + i) % npes); + + shmemx_ct_wait(ct, npes-1); + + shmem_barrier_all(); + shmemx_ct_free(&ct); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/shmem_ctx.c b/test/unit/shmem_ctx.c new file mode 100644 index 0000000..2c133d1 --- /dev/null +++ b/test/unit/shmem_ctx.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * This test is derived from an example provided in the OpenSHMEM 1.4 + * specification. Additional copyrights may apply. + * + */ + +#include +#include + +long pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; +long psync[SHMEM_REDUCE_SYNC_SIZE]; + +long task_cntr = 0; /* Next task counter */ +long tasks_done = 0; /* Tasks done by this PE */ +long total_done = 0; /* Total tasks done by all PEs */ + +int main(void) { + int tl, i, ret; + long ntasks = 1024; /* Total tasks per PE */ + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + psync[i] = SHMEM_SYNC_VALUE; + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + +#pragma omp parallel reduction (+:tasks_done) + { + shmem_ctx_t ctx; + int task_pe = me, pes_done = 0; + int ret = shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + + if (ret != 0) { + printf("%d: Error creating context (%d)\n", me, ret); + shmem_global_exit(2); + } + + /* Process tasks on all PEs, starting with the local PE. After + * all tasks on a PE are completed, help the next PE. */ + while (pes_done < npes) { + long task = shmem_ctx_long_atomic_fetch_inc(ctx, &task_cntr, task_pe); + while (task < ntasks) { + /* Perform task (task_pe, task) */ + tasks_done++; + task = shmem_ctx_long_atomic_fetch_inc(ctx, &task_cntr, task_pe); + } + pes_done++; + task_pe = (task_pe + 1) % npes; + } + + shmem_ctx_destroy(ctx); + } + + shmem_long_sum_to_all(&total_done, &tasks_done, 1, 0, 0, npes, pwrk, psync); + + int result = (total_done != ntasks * npes); + if (me == 0 && result) + printf("Error: total_done is %ld, expected %ld\n", total_done, ntasks * npes); + + shmem_finalize(); + return result; +} diff --git a/test/unit/shmem_ctx_pipelined_reduce.c b/test/unit/shmem_ctx_pipelined_reduce.c new file mode 100644 index 0000000..9739be6 --- /dev/null +++ b/test/unit/shmem_ctx_pipelined_reduce.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * This test is derived from an example provided in the OpenSHMEM 1.4 + * specification. Additional copyrights may apply. + * + */ + +#include +#include +#include + +#define LEN 8192 /* Full buffer length */ +#define PLEN 512 /* Length of each pipeline stage */ + +int in_buf[LEN], out_buf[LEN]; + +int main(void) { + int i, j, *pbuf[2]; + shmem_ctx_t ctx[2]; + + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + + pbuf[0] = shmem_malloc(PLEN * npes * sizeof(int)); + pbuf[1] = shmem_malloc(PLEN * npes * sizeof(int)); + + int ret_0 = shmem_ctx_create(0, &ctx[0]); + int ret_1 = shmem_ctx_create(0, &ctx[1]); + if (ret_0 || ret_1) shmem_global_exit(1); + + for (i = 0; i < LEN; i++) { + in_buf[i] = me; out_buf[i] = 0; + } + + /* Index of ctx and pbuf (p_idx) for current pipeline stage (p) */ + int p_idx = 0, p = 0; + for (i = 1; i <= npes; i++) + shmem_ctx_int_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN*me], + &in_buf[PLEN*p], PLEN, (me+i) % npes); + + /* Issue communication for pipeline stage p, then accumulate results for + * stage p-1 */ + for (p = 1; p < LEN/PLEN; p++) { + p_idx ^= 1; + for (i = 1; i <= npes; i++) + shmem_ctx_int_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN*me], + &in_buf[PLEN*p], PLEN, (me+i) % npes); + + shmem_ctx_quiet(ctx[p_idx^1]); + shmem_sync_all(); + for (i = 0; i < npes; i++) + for (j = 0; j < PLEN; j++) + out_buf[PLEN*(p-1)+j] += pbuf[p_idx^1][PLEN*i+j]; + } + + shmem_ctx_quiet(ctx[p_idx]); + shmem_sync_all(); + for (i = 0; i < npes; i++) + for (j = 0; j < PLEN; j++) + out_buf[PLEN*(p-1)+j] += pbuf[p_idx][PLEN*i+j]; + + int errors = 0; + int expected = (npes - 1) * npes / 2; + for (i = 0; i < LEN; i++) { + if (out_buf[i] != expected) { + printf("%d: out_buf[%d] = %d, expected %d\n", me, i, out_buf[i], expected); + errors++; + } + } + + shmem_finalize(); + return errors; +} diff --git a/test/unit/shmem_info.c b/test/unit/shmem_info.c new file mode 100644 index 0000000..4686ed9 --- /dev/null +++ b/test/unit/shmem_info.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + int major_ver, minor_ver; + char name[SHMEM_MAX_NAME_LEN]; + + shmem_init(); + + shmem_info_get_version(&major_ver, &minor_ver); + shmem_info_get_name(name); + + assert(strlen(name) <= SHMEM_MAX_NAME_LEN); + assert(major_ver == SHMEM_MAJOR_VERSION); + assert(minor_ver == SHMEM_MINOR_VERSION); + assert(major_ver >= 1); + assert(minor_ver >= 0); + assert(strcmp(name, SHMEM_VENDOR_STRING) == 0); + + printf("%d: OpenSHMEM %d.%d -- \"%s\"\n", shmem_my_pe(), major_ver, + minor_ver, name); + + shmem_finalize(); + return 0; +} diff --git a/test/unit/shmem_info_f.f90 b/test/unit/shmem_info_f.f90 new file mode 100644 index 0000000..8c3e4d9 --- /dev/null +++ b/test/unit/shmem_info_f.f90 @@ -0,0 +1,60 @@ +! +! Copyright (c) 2017 Intel Corporation. All rights reserved. +! This software is available to you under the BSD license below: +! +! Redistribution and use in source and binary forms, with or +! without modification, are permitted provided that the following +! conditions are met: +! +! - Redistributions of source code must retain the above +! copyright notice, this list of conditions and the following +! disclaimer. +! +! - Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials +! provided with the distribution. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +! NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +! BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +! ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. +! + +program shmem_info + include 'shmem.fh' + + integer me, major_ver, minor_ver + character (len=SHMEM_MAX_NAME_LEN) lib_name + + call shmem_init() + me = shmem_my_pe() + + call shmem_info_get_version(major_ver, minor_ver) + call shmem_info_get_name(lib_name) + + ! Note: The spec does not require these strings to be identical. + ! They are implemented this way in SOS, and we take advantage + ! of that property to check the C/Fortran bindings linkage. + if (lib_name .ne. SHMEM_VENDOR_STRING) then + print *, me, "Vendor strings did not match!" + print *, me, "shmem_info_get_name: ", lib_name + print *, me, "SHMEM_VENDOR_STRING: ", SHMEM_VENDOR_STRING + call shmem_global_exit(1) + endif + + if ((major_ver .ne. SHMEM_MAJOR_VERSION) .OR. & + (minor_ver .ne. SHMEM_MINOR_VERSION)) then + print *, me, "Version numbers did not match!" + print *, me, "shmem_info_get_version: ", major_ver, minor_ver + print *, me, "Library constants : ", SHMEM_MAJOR_VERSION, & + SHMEM_MINOR_VERSION + call shmem_global_exit(1) + end if + + call shmem_finalize() +end program shmem_info diff --git a/test/unit/shmem_test.c b/test/unit/shmem_test.c new file mode 100644 index 0000000..1831e40 --- /dev/null +++ b/test/unit/shmem_test.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* Each PE sends a message to every PE. PEs wait for all messages to + * arrive using shmem_test to poll the array. */ + +#include +#include + +/* Wait for any entry in the given ivar array to match the wait criteria and + * return the index of the entry that satisfied the test. */ +static int wait_any(long *ivar, int count, int cmp, long value) +{ + int idx = 0; + while (!shmem_long_test(&ivar[idx], cmp, value)) + idx = (idx + 1) % count; + return idx; +} + +int main(void) +{ + shmem_init(); + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); + + long *wait_vars = shmem_calloc(npes, sizeof(long)); + + /* Put mype+1 to every PE */ + for (int i = 0; i < npes; i++) + shmem_long_p(&wait_vars[mype], mype+1, i); + + int nrecv = 0, errors = 0; + + /* Wait for all messages to arrive */ + while (nrecv < npes) { + int who = wait_any(wait_vars, npes, SHMEM_CMP_NE, 0); + if (wait_vars[who] != who+1) { + printf("%d: wait_vars[%d] = %ld, expected %d\n", + mype, who, wait_vars[who], who+1); + errors++; + } + wait_vars[who] = 0; + nrecv++; + } + + shmem_free(wait_vars); + shmem_finalize(); + return errors; +} diff --git a/test/unit/shmemalign.c b/test/unit/shmemalign.c new file mode 100644 index 0000000..30285be --- /dev/null +++ b/test/unit/shmemalign.c @@ -0,0 +1,186 @@ +/* :vim:sw=4:ts=4: */ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * usage: shmemalign [-v] + * where: -v == Verbose display + */ + +#include +#include +#include +#include +#include + +#include + +#define DFLT_NWORDS 257 +#define DFLT_INCR 31 +#define DFLT_LOOPS 1 + +#define DataType long + +static DataType *target; +static int target_sz; + +static char *pgm; +static int Verbose = 0; + +void usage (void); +int getSize (char *); + +void +usage (void) +{ + if (shmem_my_pe() == 0 ) { + fprintf (stderr, + "Usage: %s [-p] [nWords(%d)] [loops(%d)] [incWords(%d)]\n", + pgm, DFLT_NWORDS, DFLT_LOOPS, DFLT_INCR); + fprintf (stderr, + " -v == Verbose output\n" + " [nWords] # of longs to shmem_malloc()\n" + " [loops] # of loops\n" + " [incWords] nWords += incWords per loop\n"); + } + exit (1); +} + +int +getSize (char *str) +{ + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[mMkK]", &size, mod)) + { + case 1: + return (size); + + case 2: + switch (*mod) + { + case 'm': + case 'M': + return (size << 20); + + case 'k': + case 'K': + return (size << 10); + + default: + return (size); + } + + default: + return (-1); + } +} + +int +main(int argc, char **argv) +{ + int me, c, l, j; + int nWords, loops, incWords; + + pgm = strrchr(argv[0],'/'); + if ( pgm ) + pgm++; + else + pgm = argv[0]; + + shmem_init(); + me = shmem_my_pe(); + + while ((c = getopt (argc, argv, "hpv")) != -1) + switch (c) + { + case 'v': + Verbose++; + break; + case 'h': + default: + usage(); + break; + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else if ((nWords = getSize (argv[optind++])) <= 0) + usage (); + + if (optind == argc) + loops = DFLT_LOOPS; + else if ((loops = getSize (argv[optind++])) < 0) + usage (); + + if (optind == argc) + incWords = DFLT_INCR; + else if ((incWords = getSize (argv[optind++])) < 0) + usage (); + + if (Verbose && me == 0) + fprintf (stderr, "nWords(%d) loops(%d) incWords(%d)]\n", + nWords, loops, incWords); + + for(l=0; l < loops; l++) + { + /* align 2**2 ... 2**23; 24 exceeds symetric heap max */ + for(j=0,c=2; j < 23; j++,c<<=1) + { + target_sz = nWords * sizeof(DataType); + if (!(target = (DataType *)shmem_align(c,target_sz))) { + perror ("Failed target memory allocation"); + exit (1); + } + + if ( (unsigned long)target & (c-1) ) { + fprintf(stdout,"PE%d Unaligned? ",me); + fflush(stdout); + fprintf(stdout,"align[%#09x]target %p\n", + c, (void*)target); + shmem_global_exit(1); + } + else if (Verbose > 1 && me == 0) + fprintf(stdout,"align[%#09x]target %p\n", + c, (void*)target); + shmem_barrier_all(); + shmem_free(target); + } + nWords += incWords; + if (Verbose && me == 0) + fprintf(stdout,"Fini loop %d\n",(l+1)); + } + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/shrealloc.c b/test/unit/shrealloc.c new file mode 100644 index 0000000..9a273dc --- /dev/null +++ b/test/unit/shrealloc.c @@ -0,0 +1,284 @@ +/* :vim:sw=4:ts=4:expandtab: */ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * usage: shrealloc [-p] [nWords] [loops] [incWords-per-loop] + * where: -p == power-of-two allocation size bump per loop + * [nWords] # of longs to shmem_realloc()\n" + * [loops(1)] # of loops\n" + * [incWords(2)] nWords += incWords per loop\n"); + * Loop: + * PE* shmem_realloc(nWords) + * set *DataType = 1 + * PE* shmem_realloc(nWords) + * set *DataType = 2 + * PE* shmem_realloc(nWords) + * set *DataType = 3 + * + * for(1...3) allocated ranges + * verify + * end-loop + * shmem_free(3 allocations) + */ + +#include +#include +#include +#include +#include + +#include + +#define DFLT_NWORDS 32 +#define DFLT_INCR 1025 +#define DFLT_LOOPS 50 +#define DFLT_PLOOPS 21 // reduced to stay under shmem_malloc() stack limit. + +#define DataType int + +static DataType *source; +static DataType *target; +static DataType *result; + +static int source_sz; +static int target_sz; +static int result_sz; + +static int prev_source_idx = 0; +static int prev_target_idx = 0; +static int prev_result_idx = 0; + +static char *pgm; + +void usage (void); +int getSize (char *); + +void +usage (void) +{ + if (shmem_my_pe() == 0 ) { + fprintf (stderr, + "Usage: %s [-p] [nWords(%d)] [loops(%d)] [incWords(%d)]\n", + pgm, DFLT_NWORDS, DFLT_LOOPS, DFLT_INCR); + fprintf (stderr, + " -p == (2**0 ... 2**22) shmem_realloc(), other args ignored\n" + " -v == verbose output\n" + " [nWords] # of longs to shmem_realloc()\n" + " [loops] # of loops\n" + " [incWords] nWords += incWords per loop\n"); + } + exit (1); +} + +int +getSize (char *str) +{ + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[mMkK]", &size, mod)) + { + case 1: + return (size); + + case 2: + switch (*mod) + { + case 'm': + case 'M': + return (size << 20); + + case 'k': + case 'K': + return (size << 10); + + default: + return (size); + } + + default: + return (-1); + } +} + +int +main(int argc, char **argv) +{ + int me, nProcs, c, l; + int nWords, loops, incWords; + int Verbose = 0, power2 = 0, modulo = 5; + DataType *dp; + + pgm = strrchr(argv[0],'/'); + if ( pgm ) + pgm++; + else + pgm = argv[0]; + + shmem_init(); + me = shmem_my_pe(); + nProcs = shmem_n_pes(); + + if (nProcs <= 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + while ((c = getopt (argc, argv, "hpv")) != -1) + switch (c) + { + case 'p': + power2++; + break; + case 'v': + Verbose++; + break; + case 'h': + default: + usage(); + break; + } + + if (optind == argc) + nWords = DFLT_NWORDS; + else if ((nWords = getSize (argv[optind++])) <= 0) + usage (); + + if (optind == argc) + loops = DFLT_LOOPS; + else if ((loops = getSize (argv[optind++])) < 0) + usage (); + + if (optind == argc) + incWords = DFLT_INCR; + else if ((incWords = getSize (argv[optind++])) < 0) + usage (); + + if (power2) { + nWords = 1; + modulo = 1; + loops = DFLT_PLOOPS; + } + + if (Verbose && me == 0) { + if (power2) { + printf("%s: nWords(1) << 1 per loop(%d).\n", pgm, DFLT_PLOOPS); + } + else + printf("%s: nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n", + pgm, nWords, loops, incWords); + } + + for(l=0; l < loops; l++) + { + result_sz = (nProcs-1) * (nWords * sizeof(DataType)); + result = (DataType *)shmem_realloc(result,result_sz); + if (! result) + { + perror ("Failed result memory allocation"); + exit (1); + } + if (prev_result_idx == 0) + for(dp=result; dp < &result[(result_sz/sizeof(DataType))];) + *dp++ = 1; + else + for(dp=&result[prev_result_idx]; + dp < &result[(result_sz/sizeof(DataType))];) *dp++ = 1; + prev_result_idx = result_sz / sizeof(DataType); + + target_sz = nWords * sizeof(DataType); + if (!(target = (DataType *)shmem_realloc(target,target_sz))) + { + perror ("Failed target memory allocation"); + exit (1); + } + if (prev_target_idx == 0) + for(dp=target; dp < &target[(target_sz / sizeof(DataType))];) + *dp++ = 2; + else + for(dp=&target[prev_target_idx]; + dp < &target[(target_sz/sizeof(DataType))];) *dp++ = 2; + prev_target_idx = target_sz / sizeof(DataType); + + source_sz = 2 * nWords * sizeof(DataType); + if (!(source = (DataType *)shmem_realloc(source,source_sz))) + { + perror ("Failed source memory allocation"); + exit (1); + } + if (prev_source_idx == 0) + for(dp=source; dp < &source[(source_sz / sizeof(DataType))];) + *dp++ = 3; + else + for(dp=&source[prev_source_idx]; + dp < &source[(source_sz/sizeof(DataType))];) *dp++ = 3; + prev_source_idx = source_sz / sizeof(DataType); + + for(dp=source; dp < &source[(source_sz / sizeof(DataType))]; dp++) + if (*dp != 3 ) { + printf("source not consistent @ 3?\n"); + break; + } + + for(dp=target; dp < &target[(target_sz / sizeof(DataType))]; dp++) + if (*dp != 2 ) { + printf("target not consistent @ 2?\n"); + break; + } + + for(dp=result; dp < &result[(result_sz / sizeof(DataType))]; dp++) + if (*dp != 1 ) { + printf("result not consistent @ 1?\n"); + break; + } + + if (loops > 1) { + if (Verbose && me == 0) { + if (l == 0 || (l % modulo == 0)) + printf("End loop %3d nWords(%d)\n",(l+1),nWords); + } + if (power2) + nWords <<= 1; + else + nWords += incWords; // watch for double inc. + } + } + + shmem_free(source); + shmem_free(target); + shmem_free(result); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/spam.c b/test/unit/spam.c new file mode 100644 index 0000000..7288972 --- /dev/null +++ b/test/unit/spam.c @@ -0,0 +1,582 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* spam - shmem performance amalgamation + * + * one2many - timed loops of [PE-0 put 2048 bytes to 1..(npes-1) PEs + * many2one - timed loops of [PE-0 get 2048 bytes to 1..(npes-1) PEs + * all2all_get - timed loops of get 2048 bytes all-2-all. + * all2all_put - timed loops of put 2048 bytes all-2-all. + * neighbor_put - timed loops of put 2048 bytes to next neighbor. + * neighbor_get - timed loops of get 2048 bytes to next neighbor. + * bcast - timed loops of broadcast 2048 bytes to all. + * collect - timed loops of collect 2048 bytes from all PEs to 0. + * fcollect - timed loops of fcollect 2048 bytes from all PEs to 0. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +void one2many_put(int *dst, int *src, int Elems, int me, int npe, int laps); +void many2one_get(int *dst, int *src, int Elems, int me, int npe, int laps); +void all2all_get(int *dst, int *src, int Elems, int me, int npes, int laps); +void all2all_put(int *dst, int *src, int Elems, int me, int npes, int laps); +void neighbor_put(int *dst, int *src, int Elems, int me, int npes, int laps); +void neighbor_get(int *dst, int *src, int Elems, int me, int npes, int laps); +void bcast(int *dst, int *src, int Elems, int me, int npes, int laps); +void collect(int *dst, int *src, int Elems, int me, int npes, int laps); +void fcollect(int *dst, int *src, int Elems, int me, int npes, int laps); + +static int atoi_scaled(char *s); +static void usage(char *pgm); + +#ifndef HAVE_SHMEMX_WTIME +static double shmemx_wtime(void); +#endif + +int Verbose=1; +int All2=0; +int Bcast=0; +int Collect=0; +int Many=0; +int Neighbor=0; + +#define DFLT_LOOPS 20000 + +#define N_ELEMENTS 512 /* # ints */ + +int +main(int argc, char **argv) +{ + int i; + int *target; + int *source; + int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; + char *pgm; + + shmem_init(); + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if ((pgm=strrchr(argv[0],'/'))) + pgm++; + else + pgm = argv[0]; + + /* lower-case switch enable only a specific test; otherwise run all tests */ + while ((i = getopt (argc, argv, "hvqe:l:abcmn")) != EOF) { + switch (i) + { + case 'a': + All2++; + break; + case 'b': + Bcast++; + break; + case 'c': + Collect++; + break; + case 'm': + Many++; + break; + case 'n': + Neighbor++; + break; + case 'q': + Verbose=0; + break; + case 'v': + Verbose++; + break; + case 'e': + if ((elements = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad elements count %d\n",elements); + shmem_finalize(); + return 1; + } + break; + case 'l': + if ((loops = atoi_scaled(optarg)) <= 0) { + fprintf(stderr,"ERR: Bad loop count %d\n",loops); + shmem_finalize(); + return 1; + } + break; + case 'h': + if (me == 0) + usage(pgm); + shmem_finalize(); + return 0; + default: + if (me == 0) { + fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); + usage(pgm); + } + shmem_finalize(); + return 1; + } + } + + if (All2==0 && Bcast==0 && Collect==0 && Many==0 && Neighbor==0) + All2 = Bcast = Collect = Many = Neighbor = 1; + + source = (int *) shmem_malloc( elements * sizeof(*source) ); + target = (int *) shmem_malloc( elements * sizeof(*target) ); + + for (i = 0; i < elements; i += 1) { + source[i] = i + 1; + target[i] = -90; + } + + shmem_barrier_all(); + + if (Neighbor) { + neighbor_put( target, source, elements, me, npes, loops ); + neighbor_get( target, source, elements, me, npes, loops ); + } + + if (All2) { + all2all_put( target, source, elements, me, npes, loops ); + all2all_get( target, source, elements, me, npes, loops ); + } + + if (Many) { + one2many_put( target, source, elements, me, npes, loops ); + many2one_get( target, source, elements, me, npes, loops ); + } + + if (Bcast) bcast( target, source, elements, me, npes, loops ); + + if (Collect) { + collect( NULL, source, elements, me, npes, loops ); + fcollect( NULL, source, elements, me, npes, loops ); + } + + shmem_barrier_all(); + + shmem_free(target); + shmem_free(source); + + shmem_finalize(); + + return 0; +} + +void +one2many_put(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i, pe; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src) * (npes - 1); + + if (me == 0) { + fprintf(stdout,"%s: %d loops of put(%ld bytes) to %d PEs: ", + __FUNCTION__, loops, (elements*sizeof(*src)), npes-1); + fflush(stdout); + } + shmem_barrier_all(); + + if (me == 0) { + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + for(pe = 1; pe < npes; pe++) + shmem_int_put(target, src, elements, pe); + } + elapsed_time = shmemx_wtime() - start_time; + + if (Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / put(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*(npes-1)))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + } + shmem_barrier_all(); +} + +void +many2one_get(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i, pe; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src) * (npes - 1); + + if (me == 0) { + fprintf(stdout,"%s: %d loops of get(%ld bytes) from %d PEs: ", + __FUNCTION__, loops, (elements*sizeof(*src)), npes-1); + fflush(stdout); + } + + shmem_barrier_all(); + + if (me == 0) { + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + for(pe = 1; pe < npes; pe++) + shmem_int_get(target, src, elements, pe); + } + elapsed_time = shmemx_wtime() - start_time; + + if (Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*(npes-1)))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + } + shmem_barrier_all(); +} + +void +all2all_get(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i, pe; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src) * npes; + + if (me==0 && Verbose) { + fprintf(stdout, "%s: %d loops of get(%ld bytes) from all %d PEs: ", + __FUNCTION__, loops, (elements*sizeof(*src)), npes); + fflush(stdout); + } + shmem_barrier_all(); + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + for(pe = 0; pe < npes; pe++) + shmem_int_get(target, src, elements, pe); + } + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + + shmem_barrier_all(); +} + + +void +all2all_put(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i, pe; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src) * npes; + + if (me==0 && Verbose) { + fprintf(stdout, "%s: %d loops of put(%ld bytes) to all %d PEs: ", + __FUNCTION__, loops, (elements*sizeof(*src)), npes); + fflush(stdout); + } + + shmem_barrier_all(); + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + for(pe = 0; pe < npes; pe++) + shmem_int_put(target, src, elements, pe); + } + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / put(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + + shmem_barrier_all(); +} + +void +neighbor_put(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i, neighbor_pe; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src); + + if (me==0 && Verbose) { + fprintf(stdout, "%s: %d loops of put(%ld bytes) to neighbor, %d PEs: ", + __FUNCTION__, loops, (elements*sizeof(*src)), npes); + fflush(stdout); + } + + shmem_barrier_all(); + + neighbor_pe = (me + 1) % npes; + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) + shmem_int_put(target, src, elements, neighbor_pe); + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / put(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + shmem_barrier_all(); +} + +void +neighbor_get(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i, neighbor_pe; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src); + + if (me==0 && Verbose) { + fprintf(stdout, "%s: %d loops of get(%ld bytes) from neighbor, %d PEs: ", + __FUNCTION__, loops, (elements*sizeof(*src)), npes); + fflush(stdout); + } + + shmem_barrier_all(); + + neighbor_pe = (me + 1) % npes; + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) + shmem_int_get(target, src, elements, neighbor_pe); + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + shmem_barrier_all(); +} + + +void +bcast(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i; + double start_time, elapsed_time; + long *ps, *pSync, *pSync1; + long total_bytes = loops * elements * sizeof(*src); + + pSync = (long*)shmem_malloc( 2 * sizeof(long) * SHMEM_BCAST_SYNC_SIZE ); + pSync1 = &pSync[SHMEM_BCAST_SYNC_SIZE]; + for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) { + pSync[i] = pSync1[i] = SHMEM_SYNC_VALUE; + } + + if (me==0 && Verbose) { + fprintf(stdout, "%s: %d loops of broadcast32(%ld bytes) over %d PEs: ", + __FUNCTION__,loops,(elements*sizeof(*src)),npes); + fflush(stdout); + } + + shmem_barrier_all(); + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + ps = (i & 1) ? pSync1 : pSync; + shmem_broadcast32( target, src, elements, 0, 0, 0, npes, ps ); + } + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / broadcast32(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + shmem_barrier_all(); + shmem_free( pSync ); +} + + +void +collect(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src); + long *ps, *pSync, *pSync1; + + pSync = (long*) shmem_malloc( 2 * sizeof(long) * SHMEM_COLLECT_SYNC_SIZE ); + pSync1 = &pSync[SHMEM_COLLECT_SYNC_SIZE]; + for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) { + pSync[i] = pSync1[i] = SHMEM_SYNC_VALUE; + } + target = (int *) shmem_malloc( elements * sizeof(*target) * npes ); + + if (me==0 && Verbose) { + fprintf(stdout,"%s: %d loops of collect32(%ld bytes) over %d PEs: ", + __FUNCTION__,loops,(elements*sizeof(*src)),npes); + fflush(stdout); + } + + shmem_barrier_all(); + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + ps = (i & 1) ? pSync1 : pSync; + shmem_collect32( target, src, elements, 0, 0, npes, ps ); + } + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / collect32(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + shmem_barrier_all(); + shmem_free(target); + shmem_free( pSync ); + shmem_barrier_all(); +} + + + +void +fcollect(int *target, int *src, int elements, int me, int npes, int loops) +{ + int i; + double start_time, elapsed_time; + long total_bytes = loops * elements * sizeof(*src); + long *ps, *pSync, *pSync1; + + pSync = (long*) shmem_malloc( 2 * sizeof(long) * SHMEM_COLLECT_SYNC_SIZE ); + pSync1 = &pSync[SHMEM_COLLECT_SYNC_SIZE]; + for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) { + pSync[i] = pSync1[i] = SHMEM_SYNC_VALUE; + } + target = (int *) shmem_malloc( elements * sizeof(*target) * npes ); + + if (me==0 && Verbose) { + fprintf(stdout,"%s: %d loops of fcollect32(%ld bytes) over %d PEs: ", + __FUNCTION__,loops,(elements*sizeof(*src)),npes); + fflush(stdout); + } + + shmem_barrier_all(); + + start_time = shmemx_wtime(); + for(i = 0; i < loops; i++) { + ps = &pSync[(i&1)]; + shmem_fcollect32( target, src, elements, 0, 0, npes, ps ); + } + elapsed_time = shmemx_wtime() - start_time; + + if (me==0 && Verbose) { + printf("%7.3f secs\n", elapsed_time); + printf(" %7.5f usecs / fcollect32(), %ld Kbytes @ %7.4f MB/sec\n\n", + (elapsed_time/((double)loops*npes))*1000000.0, + (total_bytes/1024), + ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); + } + shmem_barrier_all(); + shmem_free(target); + shmem_free( pSync ); + shmem_barrier_all(); +} + +static int +atoi_scaled(char *s) +{ + long val; + char *e; + + val = strtol(s,&e,0); + if (e == NULL || *e =='\0') + return (int)val; + + if (*e == 'k' || *e == 'K') + val *= 1024; + else if (*e == 'm' || *e == 'M') + val *= 1024*1024; + else if (*e == 'g' || *e == 'G') + val *= 1024*1024*1024; + + return (int)val; +} + + +static void +usage(char *pgm) +{ + fprintf(stderr, + "usage: %s -{lhv}\n" + " where:\n" + " -l loops (%d) loop count.\n" + " -e ints (%d) # of integers to Tx\n" + " -a enable all-2-all tests\n" + " -b enable broadcast tests\n" + " -c enable collective tests\n" + " -m enable Many-2-one, one-2-many tests\n" + " -n enable put/get to neighbor PEtests\n" + " -v be verbose, multiple 'v' more verbose\n" + " -q be quite\n" + " -h this text.\n", + pgm,DFLT_LOOPS, N_ELEMENTS); +} + + +#ifndef HAVE_SHMEMX_WTIME + +static double +shmemx_wtime(void) +{ + double wtime; + struct timeval tv; + + gettimeofday(&tv, NULL); + wtime = tv.tv_sec; + wtime += (double)tv.tv_usec / 1000000.0; + return wtime; +} + +#endif /* HAVE_SHMEMX_WTIME */ diff --git a/test/unit/sping.c b/test/unit/sping.c new file mode 100644 index 0000000..3004901 --- /dev/null +++ b/test/unit/sping.c @@ -0,0 +1,258 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * SHMEM PingPong test + * :vim:sw=4:ts=4: + */ +#include +#include +#include +#include +#include + +int getSize (char *); +double gettime(void); +double dt (double *, double *); +void usage (char *); +void help (char *); +void printStats (int, int, int, int, double); + +#define dprint if (Verbose) printf +#define DFLT_MIN_WORDS 128 +#define DFLT_REPS 1000; + +int Verbose=0; + +#include + +int getSize (char *str) +{ + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[mMkK]", &size, mod)) + { + case 1: + return (size); + + case 2: + switch (*mod) + { + case 'm': + case 'M': + return (size << 20); + + case 'k': + case 'K': + return (size << 10); + + default: + return (size); + } + + default: + return (-1); + } +} + +double gettime() +{ + struct timeval tv; + gettimeofday(&tv, 0); + return (tv.tv_sec * 1000000 + tv.tv_usec); +} + +double dt (double *tv1, double *tv2) +{ + return (*tv1 - *tv2); +} + +void usage (char *name) +{ + fprintf (stderr, "Usage: %s [flags] nwords [maxWords] [incWords]\n", name); + fprintf (stderr, " %s -h\n", name); + exit (1); +} + +void help (char *name) +{ + if (shmem_my_pe() == 0) { + printf ("Usage: %s [flags] nwords [maxWords] [incWords]\n\n", name); + printf (" Flags may be any of\n"); + printf (" -n number repititions\n"); + printf (" -e everyone print timing info\n"); + printf (" -h print this info\n\n"); + printf (" Numbers may be postfixed with 'k' or 'm'\n\n"); + } + shmem_barrier_all(); + exit (0); +} + +void printStats (int proc, int peer, int doprint, int now, double t) +{ + if (doprint || (proc & 1)) + printf("%3d pinged %3d: %8d words %9.2f uSec %8.2f MB/s\n", + proc, peer, now, t, sizeof(long)*now/(t)); +} + +int main (int argc, char *argv[]) +{ + double t,tv[2]; + int reps = DFLT_REPS; + int doprint = 1/*0*/; + char *progName; + int minWords; + int maxWords; + int incWords, nwords, nproc, proc, peer, c, r, i; + long *rbuf; /* remote buffer - sink */ + long *tbuf; /* transmit buffer - src */ + + shmem_init(); + proc = shmem_my_pe(); + nproc = shmem_n_pes(); + if (nproc == 1) { + fprintf(stderr, "ERR - Requires > 1 Processing Elements\n"); + shmem_finalize(); + return 0; + } + + for (progName = argv[0] + strlen(argv[0]); + progName > argv[0] && *(progName - 1) != '/'; + progName--) + ; + + while ((c = getopt (argc, argv, "n:evh")) != -1) + switch (c) + { + case 'n': + if ((reps = getSize (optarg)) <= 0) + usage (progName); + break; + case 'e': + doprint++; + break; + case 'v': + Verbose++; + break; + case 'h': + help (progName); + default: + usage (progName); + } + + if (optind == argc) + minWords = DFLT_MIN_WORDS; + else if ((minWords = getSize (argv[optind++])) <= 0) + usage (progName); + + if (optind == argc) + maxWords = minWords; + else if ((maxWords = getSize (argv[optind++])) < minWords) + usage (progName); + + if (optind == argc) + incWords = 0; + else if ((incWords = getSize (argv[optind++])) < 0) + usage (progName); + + if (!(rbuf = (long *)shmem_malloc(maxWords * sizeof(long)))) + { + perror ("Failed memory allocation"); + exit (1); + } + memset (rbuf, 0, maxWords * sizeof (long)); + + if (!(tbuf = (long *)shmem_malloc(maxWords * sizeof(long)))) + { + perror ("Failed memory allocation"); + exit (1); + } + + for (i = 0; i < maxWords; i++) + tbuf[i] = 1000 + (i & 255); + + if (doprint) + printf ("%d(%d): Shmem PING reps %d minWords %d maxWords %d " + "incWords %d\n", + proc, nproc, reps, minWords, maxWords, incWords); + + dprint("[%d] rbuf: %ld\n", proc, (unsigned long) rbuf); + + shmem_barrier_all(); + + peer = proc ^ 1; + if (peer >= nproc) + doprint = 0; + + for (nwords = minWords; + nwords <= maxWords; + nwords = incWords ? nwords + incWords : nwords ? 2 * nwords : 1) + { + r = reps; + shmem_barrier_all(); + tv[0] = gettime(); + if (peer < nproc) + { + if (proc & 1) + { + r--; + shmem_long_wait_until(&rbuf[nwords-1], SHMEM_CMP_NE, 0); + rbuf[nwords-1] = 0; + } + + while (r-- > 0) + { + shmem_long_put(rbuf, tbuf, nwords, peer); + shmem_long_wait_until(&rbuf[nwords-1], SHMEM_CMP_NE, 0); + rbuf[nwords-1] = 0; + } + + if (proc & 1) + { + shmem_long_put(rbuf, tbuf, nwords, peer); + } + } + tv[1] = gettime(); + t = dt (&tv[1], &tv[0]) / (2 * reps); + + shmem_barrier_all(); + + printStats (proc, peer, doprint, nwords, t); + } + + shmem_free(rbuf); + shmem_free(tbuf); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/strided_put.c b/test/unit/strided_put.c new file mode 100644 index 0000000..ac49ebb --- /dev/null +++ b/test/unit/strided_put.c @@ -0,0 +1,82 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +* adaptation of example from SGI man page for shmem_iput. +* +* Code slightly cleaned up (removed cache call, fixed % format) +* +*/ +#include +#include + +int +main(int argc, char* argv[]) +{ + short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static short target[10]; + int me, num_pes; + int ret = 0; + + shmem_init(); + me = shmem_my_pe(); + num_pes = shmem_n_pes(); + + if (num_pes != 2) { + printf("%s: Requires 2 PEs\n", argv[0]); + shmem_finalize(); + return 0; + } + + if (me == 0) { + /* put 10 words into target on PE 1 */ + shmem_short_iput(target, source, 1, 2, 5, 1); + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (me == 1) { + if (! (target[0] == source[0] && + target[1] == source[2] && + target[2] == source[4] && + target[3] == source[6] && + target[4] == source[8])) { + printf("target on PE %d is %hd %hd %hd %hd %hd\n", me, + target[0], target[1], target[2], + target[3], target[4] ); + ret = 1; + } + } + + shmem_finalize(); + + return ret; +} diff --git a/test/unit/swap1.c b/test/unit/swap1.c new file mode 100644 index 0000000..8b2e4f6 --- /dev/null +++ b/test/unit/swap1.c @@ -0,0 +1,80 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static long target[10] = { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }; + int i, bad = 0; + + shmem_init(); + + if (shmem_n_pes() == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + if (shmem_my_pe() == 0) { + for (i = 0 ; i < 10 ; ++i) { + source[i] = shmem_long_atomic_swap(target + i, source[i], 1); + } + } + + shmem_barrier_all(); /* sync sender and receiver */ + + if (shmem_my_pe() == 0) { + for (i = 0 ; i < 10 ; ++i) { + if (source[i] != i + 11) bad = 1; + } + } else if (shmem_my_pe() == 1) { + for (i = 0 ; i < 10 ; ++i) { + if (target[i] != i + 1) bad = 1; + } + } + + if (bad) { + fprintf(stderr, "[%02d] error: ", shmem_my_pe()); + for (i = 0 ; i < 10 ; ++i) { + fprintf(stderr, "%ld ", (shmem_my_pe() == 0) ? source[i] : target[i]); + } + fprintf(stderr, "\n"); + } + + shmem_finalize(); + + return bad; +} diff --git a/test/unit/swapm.c b/test/unit/swapm.c new file mode 100644 index 0000000..17a3e24 --- /dev/null +++ b/test/unit/swapm.c @@ -0,0 +1,325 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * swapm - exercise shmem_*_swap() + */ + +#include + +#include +#include +#include + +#define Vprintf if (Verbose) printf + +static int *src_int; +static float *src_float; +static double *src_double; +static long *src_long; +static long long *src_llong; + +static int *dst_int, itmp; +static float *dst_float, ftmp; +static double *dst_double, dtmp; +static long *dst_long, ltmp; +static long long *dst_llong, lltmp; + +static int loops = 10; + +int +main(int argc, char* argv[]) +{ + int me, num_procs, l, j; + int Verbose = 0; + + shmem_init(); + me = shmem_my_pe(); + num_procs = shmem_n_pes(); + if ( num_procs < 2 ) { + if (me ==0) + printf("PE[0] requires 2 or more PEs?\n"); + shmem_finalize(); + return 0; + } + + for (l = 0 ; l < loops ; ++l) { + + if ((src_int = shmem_malloc(2*num_procs*sizeof(int))) == NULL) { + printf("PE-%d int shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + dst_int = &src_int[num_procs]; + for(j=0; j < num_procs; j++) { + src_int[j] = 4; + dst_int[j] = 0; + } + + if ((src_float = shmem_malloc(2*num_procs*sizeof(float))) == NULL) { + printf("PE-%d float shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + dst_float = &src_float[num_procs]; + for(j=0; j < num_procs; j++) { + src_float[j] = 4.0; + dst_float[j] = 0.0; + } + + if ((src_double = shmem_malloc(2*num_procs*sizeof(double))) == NULL) { + printf("PE-%d double shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + dst_double = &src_double[num_procs]; + for(j=0; j < num_procs; j++) { + src_double[j] = 8.0; + dst_double[j] = 0.0; + } + + if ((src_long = shmem_malloc(2*num_procs*sizeof(long))) == NULL) { + printf("PE-%d long shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + dst_long = &src_long[num_procs]; + for(j=0; j < num_procs; j++) { + src_long[j] = 8; + dst_long[j] = 0; + } + + if ((src_llong = shmem_malloc(2*num_procs*sizeof(long long))) == NULL) { + printf("PE-%d long shmem_malloc() failed?\n", me); + shmem_global_exit(1); + } + dst_llong = &src_llong[num_procs]; + for(j=0; j < num_procs; j++) { + src_llong[j] = 16; + dst_llong[j] = 0; + } + + shmem_barrier_all(); + + if ( me != 0 ) { + /* is 'src_*' accessible from PE0? should be. */ + if (!shmem_addr_accessible(src_int,0)) { + printf("PE-%d local src_int %p not accessible from PE-%d?\n", + me, (void*)src_int, 0); + shmem_global_exit(1); + } + if (!shmem_addr_accessible(src_float,0)) { + printf("PE-%d local src_float %p not accessible from PE-%d?\n", + me, (void*)src_float, 0); + shmem_global_exit(1); + } + if (!shmem_addr_accessible(src_double,0)) { + printf("PE-%d local src_double %p not accessible from PE-%d?\n", + me, (void*)src_double, 0); + shmem_global_exit(1); + } + if (!shmem_addr_accessible(src_long,0)) { + printf("PE-%d local src_long %p not accessible from PE-%d?\n", + me, (void*)src_long, 0); + shmem_global_exit(1); + } + if (!shmem_addr_accessible(src_llong,0)) { + printf("PE-%d local src_llong %p not accessible from PE-%d?\n", + me, (void*)src_llong, 0); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + if ( me == 0 ) { + shmem_quiet(); + for(j=1; j < num_procs; j++) { + dst_int[j] = shmem_int_atomic_swap(src_int+j,0,j); + if (dst_int[j] != 4) { + printf("PE-%d dst_int[%d] %d != 4?\n",me,j,dst_int[j]); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + /* verify remote data */ + for(j=1; j < num_procs; j++) { + itmp = shmem_int_g(src_int+j,j); + if (itmp != 0) { + printf("PE-0 int PE[%d] rem(%d) != 0?\n",j,itmp); + shmem_global_exit(1); + } + + /* swap back */ + dst_int[j] = shmem_int_atomic_swap(src_int+j,dst_int[j],j); + if (dst_int[j] != 0) { + printf("PE-0 dst_int[%d] %d != 0?\n",j,dst_int[j]); + shmem_global_exit(1); + } + + itmp = shmem_int_g(src_int+j,j); + if (itmp != 4) { + printf("PE-0 PE[%d] rem %d != 4?\n",j,itmp); + shmem_global_exit(1); + } + } + + for(j=1; j < num_procs; j++) { + dst_float[j] = shmem_float_atomic_swap(src_float+j,0.0,j); + if (dst_float[j] != 4.0) { + printf("PE-0 dst_float[%d] %f != 4.0?\n",j,dst_float[j]); + shmem_global_exit(1); + } + + /* verify remote data */ + ftmp = shmem_float_g(src_float+j,j); + if (ftmp != 0.0) { + printf("PE-0 float rem(%f) != 0.0?\n",ftmp); + shmem_global_exit(1); + } + /* swap back */ + dst_float[j] = shmem_float_atomic_swap(src_float+j,dst_float[j],j); + if (dst_float[j] != 0.0) { + printf("PE-0 dst_float[%d] %f != 0.0?\n",j,dst_float[j]); + shmem_global_exit(1); + } + ftmp = shmem_float_g(src_float+j,j); + if (ftmp != 4.0) { + printf("PE-%d float rem(%f) != 4.0?\n",me,ftmp); + shmem_global_exit(1); + } + } + + for(j=1; j < num_procs; j++) { + dst_double[j] = shmem_double_atomic_swap(src_double+j,0.0,j); + if (dst_double[j] != 8.0) { + printf("PE-0 dst_double[%d] %f != 8.0?\n",j,dst_double[j]); + shmem_global_exit(1); + } + /* verify remote data */ + dtmp = shmem_double_g(src_double+j,j); + if (dtmp != 0.0) { + printf("PE-0 float rem(%f) != 0.0?\n",dtmp); + shmem_global_exit(1); + } + dst_double[j] = shmem_double_atomic_swap(src_double+j,dst_double[j],j); + if (dst_double[j] != 0.0) { + printf("PE-0 dst_double[%d] %f != 0.0?\n",j,dst_double[j]); + shmem_global_exit(1); + } + dtmp = shmem_double_g(src_double+j,j); + if (dtmp != 8.0) { + printf("PE-0 double rem(%f) != 8.0?\n",dtmp); + shmem_global_exit(1); + } + } + + for(j=1; j < num_procs; j++) { + dst_long[j] = shmem_long_atomic_swap(src_long+j,0,j); + if (dst_long[j] != 8) { + printf("PE-0 dst_long[%d] %ld != 8?\n",j,dst_long[j]); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + /* verify remote data */ + for(j=1; j < num_procs; j++) { + ltmp = shmem_long_g(src_long+j,j); + if (ltmp != 0) { + printf("PE-0 PE[%d]long rem(%ld) != 0?\n",j,ltmp); + shmem_global_exit(1); + } + /* swap back */ + dst_long[j] = shmem_long_atomic_swap(src_long+j,dst_long[j],j); + if (dst_long[j] != 0) { + printf("PE-%d dst_long[%d] %ld != 0?\n",me,j,dst_long[j]); + shmem_global_exit(1); + } + ltmp = shmem_long_g(src_long+j,j); + if (ltmp != 8) { + printf("PE-%d long rem(%ld) != 8?\n",me,ltmp); + shmem_global_exit(1); + } + } + + for(j=1; j < num_procs; j++) { + dst_llong[j] = shmem_longlong_atomic_swap(src_llong+j,0,j); + if (dst_llong[j] != 16) { + printf("PE-%d dst_llong[%d] %lld != 16?\n",me,j,dst_llong[j]); + shmem_global_exit(1); + } + } + shmem_barrier_all(); + + /* verify remote data */ + for(j=1; j < num_procs; j++) { + lltmp = shmem_longlong_g(src_llong+j,j); + if (lltmp != 0) { + printf("PE-%d long long rem(%lld) != 0?\n",me,lltmp); + shmem_global_exit(1); + } + /* swap back */ + dst_llong[j] = shmem_longlong_atomic_swap(src_llong+j,dst_llong[j],j); + if (dst_llong[j] != 0) { + printf("PE-%d dst_llong[%d] %lld != 0?\n", me,j,dst_llong[j]); + shmem_global_exit(1); + } + lltmp = shmem_longlong_g(src_llong+j,j); + if (lltmp != 16) { + printf("PE-%d longlong rem(%lld) != 16?\n",me,lltmp); + shmem_global_exit(1); + } + } + } + else { + shmem_int_wait_until(&src_int[me],SHMEM_CMP_EQ,0); + shmem_barrier_all(); + + shmem_long_wait_until(&src_long[me],SHMEM_CMP_EQ,0); + shmem_barrier_all(); + + shmem_longlong_wait_until(&src_llong[me],SHMEM_CMP_EQ,0); + shmem_barrier_all(); + } + + shmem_barrier_all(); + + shmem_free(src_int); + shmem_free(src_float); + shmem_free(src_double); + shmem_free(src_long); + shmem_free(src_llong); + } + + if (Verbose) + fprintf(stderr,"[%d] exit\n",shmem_my_pe()); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/sync-size.c b/test/unit/sync-size.c new file mode 100644 index 0000000..8af6ea3 --- /dev/null +++ b/test/unit/sync-size.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Test various collectives using the same pSync array of SHMEM_SYNC_SIZE */ + +#include +#include +#include +#include + +#define N 3 +#define MAX(A,B) ((A) > (B)) ? (A) : (B) + +long pSync[SHMEM_SYNC_SIZE]; +long pWrk[MAX(N/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE)]; + +long src[N]; +long dst[N]; + +int main(int argc, char* argv[]) { + int i, j, me, npes, long_is_32; + int errors = 0; + + for (i = 0; i < SHMEM_SYNC_SIZE; i++) { + pSync[i] = SHMEM_SYNC_VALUE; + pSync[i] = SHMEM_SYNC_VALUE; + } + + shmem_init(); + + me = shmem_my_pe(); + npes = shmem_n_pes(); + + if (sizeof(long) == 4) { + long_is_32 = 1; + } else if (sizeof(long) == 8) { + long_is_32 = 0; + } else { + printf("Error: sizeof(long) == %zu, must be either 4 or 8\n", sizeof(long)); + shmem_global_exit(1); + } + + for (i = 0; i < N; i += 1) { + src[i] = me; + dst[i] = -1; + } + + /* Barrier */ + + shmem_barrier(0, 0, npes, pSync); + shmem_barrier_all(); + + /* Broadcast */ + + if (long_is_32) + shmem_broadcast32(dst, src, N, 0, 0, 0, npes, pSync); + else + shmem_broadcast64(dst, src, N, 0, 0, 0, npes, pSync); + + for (i = 0; i < N && me > 0; i++) { + if (dst[i] != 0) { + printf("[%d] Broadcast: got %ld, expected %d\n", me, dst[i], 0); + ++errors; + } + } + + shmem_barrier_all(); + + /* Collect */ + + long *dst_all = shmem_malloc(npes * N * sizeof(long)); + + if (long_is_32) + shmem_fcollect32(dst_all, src, N, 0, 0, npes, pSync); + else + shmem_fcollect64(dst_all, src, N, 0, 0, npes, pSync); + + for (i = 0; i < npes; i++) { + for (j = 0; j < N; j++) { + if (dst_all[i*N+j] != i) { + printf("[%d] Fcollect: got %ld, expected %d\n", me, dst_all[i], j); + ++errors; + } + } + } + + shmem_free(dst_all); + shmem_barrier_all(); + + /* Reduction */ + + shmem_long_max_to_all(dst, src, N, 0, 0, npes, pWrk, pSync); + + for (i = 0; i < N; i++) { + if (dst[i] != npes-1) { + printf("[%d] Reduction: got %ld, expected %d\n", npes-1, dst[i], 0); + ++errors; + } + } + + shmem_finalize(); + + return errors; +} diff --git a/test/unit/test_lock.c b/test/unit/test_lock.c new file mode 100644 index 0000000..ab31ccf --- /dev/null +++ b/test/unit/test_lock.c @@ -0,0 +1,189 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * shmem_test_lock() test_lock {-v|q|n} {loop-cnt} + * where: + * -v == verbose/debug + * -q == quiet + * -n x x == Spin count threshold to announce excessive spin counts. + * + * For n loops: + * Each pe attempts to lock the global lock, if lock is taken, increment the + * lock count on all pes and then waits until the lock count reaches num_pes() + * which is the exit condition. + * On a failed lock attempt, increment local lock_tries counter and repeat. + */ +#include +#include +#include +#include +#include + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf +#define Vfprintf if (Verbose) fprintf +#define Vprintf if (Verbose) printf + +int Verbose; +int Stats; +int Noise = 500; + +int lock_cnt; +long lock; + +typedef struct _lock_stat { + int attempts; +} lock_stat_t; + +lock_stat_t *lock_stats; + +int +main(int argc, char* argv[]) +{ + int pe, c, rc, cloop, loops; + int tries, lock_stats_sz; + int my_rank, num_ranks; + int Announce = (NULL == getenv("MAKELEVEL")) ? 1 : 0; + + shmem_init(); + my_rank = shmem_my_pe(); + num_ranks = shmem_n_pes(); + if (num_ranks == 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + while((c=getopt(argc,argv,"n:qv")) != -1) { + switch(c) { + case 'n': + Noise = atoi(optarg); // threshold to announce spinloops + break; + case 'v': + Verbose++; + break; + case 'q': + Announce = 0; + break; + default: + Rfprintf(stderr,"ERR - unknown -%c ?\n",c); + shmem_finalize(); + return 1; + } + } + + if (optind == argc) + loops = 40; + else { + loops = atoi(argv[optind++]); + if (loops <= 0 || loops > 1000000) { + Rfprintf(stderr, + "ERR - loops arg out of bounds '%d'?\n", loops); + shmem_finalize(); + return 1; + } + } + if (Announce) + Stats++; + else + Stats = 0; + + if (Stats) { + lock_stats_sz = sizeof(lock_stat_t) * num_ranks; // lock stats + lock_stats = shmem_malloc(lock_stats_sz); + if ( !lock_stats ) { + fprintf(stderr,"[%d] ERR: shmem_malloc(%d)\n",my_rank,lock_stats_sz); + shmem_global_exit(1); + } + memset( lock_stats, 0, lock_stats_sz ); + } + + for(cloop=1; cloop <= loops; cloop++) { + lock = 0; + lock_cnt = 0; + tries = 0; + + shmem_barrier_all(); /* sync all ranks */ + + while( *(&lock_cnt) < num_ranks ) { + rc = shmem_test_lock(&lock); + if ( rc == 0 ) { + /* incr lock_cnt on all ranks */ + for(pe=0; pe < num_ranks; pe++) { + shmem_int_atomic_add( &lock_cnt, 1, pe ); + } + Vprintf("[%d] locked: lock_cnt(%d)\n", my_rank, lock_cnt); + shmem_clear_lock( &lock ); + shmem_int_wait_until( &lock_cnt, SHMEM_CMP_GE, num_ranks ); + } + else { + tries++; + if ( Announce && ((tries % (num_ranks*Noise)) == 0) ) + printf("[%d] unsuccessful lock attempts %d lock_cnt %d\n", + my_rank, tries, lock_cnt); + } + } + shmem_barrier_all(); /* sync all ranks */ + + if ( (cloop % 10) == 0 ) { + if (my_rank == 0 && Announce) + printf("%d ranks completed %d loops\n", num_ranks, cloop); + } + + if (Stats) + lock_stats[my_rank].attempts += tries; + } + + if ( Stats && my_rank != 0 ) { + shmem_int_put( &lock_stats[my_rank].attempts, + &lock_stats[my_rank].attempts, 1, 0 ); + shmem_fence(); + } + shmem_barrier_all(); /* sync all ranks */ + + if (Stats && my_rank == 0) { + for(c=0; c < num_ranks; c++) { + printf("[%d] lock attempts %d\n", c, + lock_stats[c].attempts); + } + } + shmem_barrier_all(); /* sync all ranks */ + + if (Stats) + shmem_free(lock_stats); + + Vprintf ("[%d] of %d, Exit\n", my_rank, num_ranks); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/test_lock_cswap.c b/test/unit/test_lock_cswap.c new file mode 100644 index 0000000..9c2bb3a --- /dev/null +++ b/test/unit/test_lock_cswap.c @@ -0,0 +1,142 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * shmem_test_lock_cswap() test_lock {-v|n} + * where: + * -v Enable debugging messages + * -n x Period with which to announce spin count + * + * For n loops: + * Each PE repeatedly attempts to take a simple spinlock on rank 0 using + * cswap, upon success the lock is released and the PE enters a barrier. + * On a failed lock attempt, increment local lock_tries counter and repeat. + */ + +#include +#include +#include +#include +#include + +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Rprintf if (shmem_my_pe() == 0) printf +#define Vfprintf if (Verbose) fprintf +#define Vprintf if (Verbose) printf + +int Verbose = 0; +int Announce = 0; +int Noise = 500; +int Loops = 40; + +long lock; + +int +main(int argc, char* argv[]) +{ + int c, cloop; + int my_rank, num_ranks; + + shmem_init(); + my_rank = shmem_my_pe(); + num_ranks = shmem_n_pes(); + + if (num_ranks == 1) { + fprintf(stderr, "ERR - Requires > 1 PEs\n"); + shmem_finalize(); + return 0; + } + + while ((c = getopt(argc,argv,"n:v")) != -1) { + switch (c) { + case 'n': + Noise = atoi(optarg); + break; + case 'v': + Verbose++; + Announce = 1; + break; + default: + Rfprintf(stderr,"ERR - unknown -%c ?\n",c); + shmem_finalize(); + return 1; + } + } + + for (cloop=1; cloop <= Loops; cloop++) { + int got_lock = 0; + int lock_cnt = 0; + int tries = 0; + + lock = 0; + + shmem_barrier_all(); /* sync all ranks */ + + while (!got_lock) { + long lockval = shmem_long_atomic_compare_swap(&lock, 0, my_rank+1, 0); + + if (lockval == 0) { + long unlockval; + got_lock = 1; + + Vprintf("[%d] locked: lock_cnt(%d) lock(%lx)\n", my_rank, lock_cnt, lock); + + unlockval = shmem_long_atomic_compare_swap(&lock, my_rank+1, 0, 0); /* RACE: PE 1 hangs here */ + if (unlockval != my_rank+1) { + printf("[%d] unlock failed, expected %lx got %lx\n", my_rank, (long) my_rank+1, unlockval); + shmem_global_exit(1); + } + + Vprintf("[%d] finished unlock\n", my_rank); + } + else { + tries++; + if ( Announce && ((tries % (num_ranks*Noise)) == 0) ) { + printf("[%d] unsuccessful lock attempts %d lock_cnt %d lock %lx\n", + my_rank, tries, lock_cnt, lock); + } + } + } + shmem_barrier_all(); /* sync all ranks */ + + if ((cloop % 10) == 0) { + if (Announce) { + Rprintf("%d ranks completed %d loops\n", num_ranks, cloop); + } + } + } + + shmem_barrier_all(); /* sync all ranks */ + + Vprintf ("[%d] of %d, Exit\n", my_rank, num_ranks); + shmem_finalize(); + return 0; +} diff --git a/test/unit/thread_wait.c b/test/unit/thread_wait.c new file mode 100644 index 0000000..2d98431 --- /dev/null +++ b/test/unit/thread_wait.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Thread wait test: Test whether a store performed by one thead will wake up a + * second thread from a call to shmem_wait. */ + +#include +#include +#include +#include + +static long shr_var = 0; + +static void* src_thread_fn(void *arg) { + /* Try to get the dst thread to enter wait before the call to sleep */ + sleep(1); + + /* This should wake up the waiting dst thread */ + shr_var = 1; + + /* Quiet should provide a store fence */ + shmem_quiet(); + return NULL; +} + +static void* dst_thread_fn(void *arg) { + shmem_long_wait_until(&shr_var, SHMEM_CMP_NE, 0); + return NULL; +} + +int main(int argc, char* argv[]) { + int tl, ret; + pthread_t src_thread, dst_thread; + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + pthread_create(&dst_thread, NULL, &dst_thread_fn, NULL); + pthread_create(&src_thread, NULL, &src_thread_fn, NULL); + + pthread_join(dst_thread, NULL); + pthread_join(src_thread, NULL); + + shmem_finalize(); + + return 0; +} diff --git a/test/unit/threading.c b/test/unit/threading.c new file mode 100644 index 0000000..06b5475 --- /dev/null +++ b/test/unit/threading.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +/* For systems without the PThread barrier API (e.g. MacOS) */ +#include "pthread_barrier.h" + +#define N_THREADS 8 +#define N_ELEMS 10 + +static long source[N_THREADS*N_ELEMS]; +static long target[N_THREADS*N_ELEMS]; +pthread_barrier_t fencebar; + +static void* roundrobin(void* tparam) { + ptrdiff_t tid = (ptrdiff_t)tparam; + int offset = tid*N_ELEMS; + /* fprintf(stderr,"Starting thread %lu with offset %d\n",tid,offset); */ + + int nextpe = (shmem_my_pe()+1)%shmem_n_pes(); + int prevpe = (shmem_my_pe()-1 + shmem_n_pes())%shmem_n_pes(); + shmem_long_put(target+offset, source+offset, N_ELEMS, nextpe); + + /* fprintf(stderr,"Thread %lu done first put\n",tid); */ + pthread_barrier_wait(&fencebar); + if(tid == 0) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + shmem_long_get(source+offset, target+offset, N_ELEMS, prevpe); + + /* fprintf(stderr,"Thread %lu done first get\n",tid); */ + pthread_barrier_wait(&fencebar); + if(tid == 0) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + + shmem_long_get(target+offset, source+offset, N_ELEMS, nextpe); + + /* fprintf(stderr,"Thread %lu done second get\n",tid); */ + pthread_barrier_wait(&fencebar); + if(tid == 0) shmem_barrier_all(); + pthread_barrier_wait(&fencebar); + /* fprintf(stderr,"Done thread %lu\n",tid); */ + + return 0; +} + +int +main(int argc, char* argv[]) +{ + int i; + for(i = 0; i < N_THREADS*N_ELEMS; ++i) { + source[i] = i+1; + } + + int tl; + int ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + if (shmem_n_pes() == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + pthread_t threads[N_THREADS]; + + pthread_barrier_init(&fencebar,NULL,N_THREADS); + + fprintf(stderr,"Starting threads\n"); + for(i = 0; i < N_THREADS; ++i) { + /* fprintf(stderr,"Starting thread %d\n",i); */ + ptrdiff_t tid = i; + pthread_create(&threads[i],NULL,&roundrobin,(void*)tid); + } + + for(i = 0; i < N_THREADS; ++i) { + pthread_join(threads[i],NULL); + } + pthread_barrier_destroy(&fencebar); + + if (0 != memcmp(source, target, sizeof(long) * N_THREADS*N_ELEMS)) { + fprintf(stderr,"[%d] Src & Target mismatch?\n",shmem_my_pe()); + for (i = 0 ; i < 10 ; ++i) { + printf("%ld,%ld ", source[i], target[i]); + } + printf("\n"); + shmem_global_exit(1); + } + + shmem_finalize(); + + return 0; +} + diff --git a/test/unit/to_all.c b/test/unit/to_all.c new file mode 100644 index 0000000..6bb49ae --- /dev/null +++ b/test/unit/to_all.c @@ -0,0 +1,839 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +* to_all - exercise SHMEM max,min,or,prod,sum,or,xor_to_all() reduction calls. +* Each reduction is invoked for all data types: +* short, int, long, float, double, long double, long long. +* Point being numerous SHMEM atomics and synchronizations in flight. +* From OpenSHMEM_specification_v1.0-final doc: +* The pWrk and pSync arrays on all PEs in the active set must not be +* in use from a prior call to a collective OpenSHMEM routine. +* +* frank @ SystemFabric Works identified an interesting overflow issue in the +* prod_to_all test. In the presence of slightly larger PE counts (>=14), +* overflow is encountered in short, int and float, double and long double. +* The short and int both wrap correctly and are both uniformly wrong...uniformly +* being the salient point. float, double and long double all suffer from +* floating point rounding errors, hence the FP test results are ignored +* (assumed to pass)when FP rounding is encountered. FP*_prod_to_all() calls are +* still made so as not to upset the pSync ordering. +* +* usage: to_all {-amopsSv|h} +* where: +* -a do not run and_to_all +* -m do not run min_to_all, max_to_all() always run. +* -o do not run or_to_all +* -p do not run prod_to_all +* -s do not run sum_to_all +* -x do not run xor_to_all +* -S Serialize *_to_all() calls with barriers. +* -v verbose(additional -v, more verbose) +* -h this text. +*/ +#include +#include +#include +#include +#include + +#include + +#define Rprintf if (shmem_my_pe() == 0) printf +#define Rfprintf if (shmem_my_pe() == 0) fprintf +#define Vprintf if (Verbose > 1) printf + +int sum_to_all(int me, int npes); +int and_to_all(int me, int npes); +int min_to_all(int me, int npes); +int max_to_all(int me, int npes); +int prod_to_all(int me, int npes); +int or_to_all(int me, int npes); +int xor_to_all(int me, int npes); + +int Verbose; +int Serialize; +int Min, And, Sum, Prod, Or, Xor; +int Passed; + +long pSync[SHMEM_REDUCE_SYNC_SIZE]; +long pSync1[SHMEM_REDUCE_SYNC_SIZE]; + +#define N 128 + +#define MAX(a, b) ((a) > (b)) ? (a) : (b) +#define WRK_SIZE MAX(N/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) + +short src0[N], dst0[N], pWrk0[WRK_SIZE]; +int src1[N], dst1[N], pWrk1[WRK_SIZE]; +long src2[N], dst2[N], pWrk2[WRK_SIZE]; +float src3[N], dst3[N], pWrk3[WRK_SIZE]; +double src4[N], dst4[N], pWrk4[WRK_SIZE]; +long double src5[N], dst5[N], pWrk5[WRK_SIZE]; +long long src6[N], dst6[N], pWrk6[WRK_SIZE]; + +short expected_result0; +int expected_result1; +long expected_result2; +float expected_result3; +double expected_result4; +long double expected_result5; +long long expected_result6; + +int ok[7]; + +int +max_to_all(int me, int npes) +{ + int i, j, pass=0; + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] + = me + i; + } + shmem_barrier_all(); + + shmem_short_max_to_all( dst0, src0, N, 0, 0, npes, pWrk0, pSync); + shmem_int_max_to_all( dst1, src1, N, 0, 0, npes, pWrk1, pSync1); + shmem_long_max_to_all( dst2, src2, N, 0, 0, npes, pWrk2, pSync); + shmem_float_max_to_all( dst3, src3, N, 0, 0, npes, pWrk3, pSync1); + shmem_double_max_to_all( dst4, src4, N, 0, 0, npes, pWrk4, pSync); + shmem_longdouble_max_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); + shmem_longlong_max_to_all( dst6, src6, N, 0, 0, npes, pWrk6, pSync); + + if (me == 0) { + for (i = 0,j=-1; i < N; i++,j++) { + if(dst0[i] != npes+j) ok[0] = 1; + if(dst1[i] != npes+j) ok[1] = 1; + if(dst2[i] != npes+j) ok[2] = 1; + if(dst3[i] != npes+j) ok[3] = 1; + if(dst4[i] != npes+j) ok[4] = 1; + if(dst5[i] != npes+j) ok[5] = 1; + if(dst6[i] != npes+j) ok[6] = 1; + } + + if(ok[0]==1){ + printf("Reduction operation shmem_short_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_short_max_to_all: Passed\n"); + pass++; + } + if(ok[1]==1){ + printf("Reduction operation shmem_int_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_int_max_to_all: Passed\n"); + pass++; + } + if(ok[2]==1){ + printf("Reduction operation shmem_long_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_long_max_to_all: Passed\n"); + pass++; + } + if(ok[3]==1){ + printf("Reduction operation shmem_float_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_float_max_to_all: Passed\n"); + pass++; + } + if(ok[4]==1){ + printf("Reduction operation shmem_double_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_double_max_to_all: Passed\n"); + pass++; + } + if(ok[5]==1){ + printf("Reduction operation shmem_longdouble_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longdouble_max_to_all: Passed\n"); + pass++; + } + if(ok[6]==1){ + printf("Reduction operation shmem_longlong_max_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longlong_max_to_all: Passed\n"); + pass++; + } + Vprintf("\n"); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 7 ? 1 : 0); +} + +int +min_to_all(int me, int npes) +{ + int i, pass=0; + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] + = me + i; + dst0[i] = -9; + dst1[i] = -9; + dst2[i] = -9; + dst3[i] = -9; + dst4[i] = -9; + dst5[i] = -9; + dst6[i] = -9; + } + + shmem_barrier_all(); + + shmem_short_min_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); + shmem_int_min_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); + shmem_long_min_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); + shmem_float_min_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); + shmem_double_min_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); + shmem_longdouble_min_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); + shmem_longlong_min_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); + + if(me == 0) { + for (i = 0; i < N; i++) { + if(dst0[i] != i) ok[0] = 1; + if(dst1[i] != i) ok[1] = 1; + if(dst2[i] != i) ok[2] = 1; + if(dst3[i] != i) ok[3] = 1; + if(dst4[i] != i) ok[4] = 1; + if(dst5[i] != i) ok[5] = 1; + if(dst6[i] != i) ok[6] = 1; + } + if(ok[0]==1){ + printf("Reduction operation shmem_short_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_short_min_to_all: Passed\n"); + pass++; + } + if(ok[1]==1){ + printf("Reduction operation shmem_int_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_int_min_to_all: Passed\n"); + pass++; + } + if(ok[2]==1){ + printf("Reduction operation shmem_long_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_long_min_to_all: Passed\n"); + pass++; + } + if(ok[3]==1){ + printf("Reduction operation shmem_float_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_float_min_to_all: Passed\n"); + pass++; + } + if(ok[4]==1){ + printf("Reduction operation shmem_double_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_double_min_to_all: Passed\n"); + pass++; + } + if(ok[5]==1){ + printf("Reduction operation shmem_longdouble_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longdouble_min_to_all: Passed\n"); + pass++; + } + if(ok[6]==1){ + printf("Reduction operation shmem_longlong_min_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longlong_min_to_all: Passed\n"); + pass++; + } + Vprintf("\n"); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 7 ? 1 : 0); +} + + +int +sum_to_all(int me, int npes) +{ + int i, pass=0; + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me; + dst0[i] = -9; + dst1[i] = -9; + dst2[i] = -9; + dst3[i] = -9; + dst4[i] = -9; + dst5[i] = -9; + dst6[i] = -9; + } + + shmem_barrier_all(); + + shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); + shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); + shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); + shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); + shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); + shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); + shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); + + if(me == 0) { + for (i = 0; i < N; i++) { + if(dst0[i] != (short) (npes * (npes-1)/2)) ok[0] = 1; + if(dst1[i] != (int) (npes * (npes-1)/2)) ok[1] = 1; + if(dst2[i] != (long) (npes * (npes-1)/2)) ok[2] = 1; + if(dst3[i] != (float) (npes * (npes-1)/2)) ok[3] = 1; + if(dst4[i] != (double) (npes * (npes-1)/2)) ok[4] = 1; + if(dst5[i] != (long double) (npes * (npes-1)/2)) ok[5] = 1; + if(dst6[i] != (long long) (npes * (npes-1)/2)) ok[6] = 1; + } + if(ok[0]==1){ + printf("Reduction operation shmem_short_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_short_sum_to_all: Passed\n"); + pass++; + } + if(ok[1]==1){ + printf("Reduction operation shmem_int_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_int_sum_to_all: Passed\n"); + pass++; + } + if(ok[2]==1){ + printf("Reduction operation shmem_long_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_long_sum_to_all: Passed\n"); + pass++; + } + if(ok[3]==1){ + printf("Reduction operation shmem_float_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_float_sum_to_all: Passed\n"); + pass++; + } + if(ok[4]==1){ + printf("Reduction operation shmem_double_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_double_sum_to_all: Passed\n"); + pass++; + } + if(ok[5]==1){ + printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longdouble_sum_to_all: Passed\n"); + pass++; + } + if(ok[6]==1){ + printf("Reduction operation shmem_longlong_sum_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longlong_sum_to_all: Passed\n"); + pass++; + } + Vprintf("\n"); fflush(stdout); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 7 ? 1 : 0); +} + + +int +and_to_all(int me, int num_pes) +{ + int i, pass=0; + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src6[i] = me; + dst0[i] = dst1[i] = dst2[i] = dst6[i] = -9; + } + + shmem_barrier_all(); + + shmem_short_and_to_all(dst0, src0, N, 0, 0, num_pes, pWrk0, pSync); + shmem_int_and_to_all(dst1, src1, N, 0, 0, num_pes, pWrk1, pSync1); + shmem_long_and_to_all(dst2, src2, N, 0, 0, num_pes, pWrk2, pSync); + shmem_longlong_and_to_all(dst6, src6, N, 0, 0, num_pes, pWrk6, pSync1); + + if (me==0) { + for (i = 0; i < N; i++) { + if(dst0[i] != 0) ok[0] = 1; + if(dst1[i] != 0) ok[1] = 1; + if(dst2[i] != 0) ok[2] = 1; + if(dst6[i] != 0) ok[3] = 1; + } + + if(ok[0]==1){ + printf("Reduction operation shmem_short_and_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_short_and_to_all: Passed\n"); + pass++; + } + if(ok[1]==1){ + printf("Reduction operation shmem_int_and_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_int_and_to_all: Passed\n"); + pass++; + } + if(ok[2]==1){ + printf("Reduction operation shmem_long_and_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_long_and_to_all: Passed\n"); + pass++; + } + if(ok[3]==1){ + printf("Reduction operation shmem_longlong_and_to_all: Failed\n"); + } + else{ + Vprintf("Reduction operation shmem_longlong_and_to_all: Passed\n"); + pass++; + } + Vprintf("\n"); fflush(stdout); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 4 ? 1 : 0); +} + + +int +prod_to_all(int me, int npes) +{ + int i, pass=0; + int float_rounding_err=0; + int double_rounding_err=0; + int ldouble_rounding_err=0; + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] + = me + 1; + dst0[i] = -9; + dst1[i] = -9; + dst2[i] = -9; + dst3[i] = -9; + dst4[i] = -9; + dst5[i] = -9; + dst6[i] = -9; + } + + expected_result0 = expected_result1 = expected_result2 = + expected_result6 = 1; + expected_result3 = expected_result4 = expected_result5 = 1.0; + + + for(i=1; i <= npes; i++) { + expected_result0 *= i; + expected_result1 *= i; + expected_result2 *= i; + expected_result3 *= (float)i; + expected_result4 *= (double)i; + if ((double)expected_result3 != expected_result4) { + if (!float_rounding_err && Verbose > 2 && me == 0) + printf("float_err @ npes %d\n",i); + float_rounding_err = 1; + } + expected_result5 *= (long double)i; + if ((long double)expected_result4 != expected_result5) { + if (!double_rounding_err && Verbose > 2 && me == 0) + printf("double_err @ npes %d\n",i); + ldouble_rounding_err = double_rounding_err = 1; + } + expected_result6 *= i; + } + + shmem_barrier_all(); + + shmem_short_prod_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); + shmem_int_prod_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); + shmem_long_prod_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); + shmem_float_prod_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); + shmem_double_prod_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); + shmem_longdouble_prod_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); + shmem_longlong_prod_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); + + if(me == 0) { + for (i = 0; i < N; i++) { + if(dst0[i] != expected_result0) ok[0] = 1; + if(dst1[i] != expected_result1) ok[1] = 1; + if(dst2[i] != expected_result2) ok[2] = 1; + + /* check for overflow */ + if(!float_rounding_err && dst3[i] != expected_result3) { ok[3] = 1; + printf("dst3[%d]: %f, expected val: %f\n",i, dst3[i], expected_result3); + } + if(!double_rounding_err && dst4[i] != expected_result4) {ok[4] = 1; + printf("dst4[%d]: %f, expected val: %f\n",i, dst4[i], expected_result4); + } + if(!ldouble_rounding_err && dst5[i] != expected_result5) {ok[5] = 1; + printf("dst5[%d]: %Lf, expected val: %Lf T4 %f\n",i, dst5[i], expected_result5,dst4[i]); + } + if(dst6[i] != expected_result6) ok[6] = 1; + } + + if(ok[0]==1) + printf("Reduction operation shmem_short_prod_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_short_prod_to_all: Passed\n"); + pass++; + } + + if(ok[1]==1) + printf("Reduction operation shmem_int_prod_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_int_prod_to_all: Passed\n"); + pass++; + } + + if(ok[2]==1) + printf("Reduction operation shmem_long_prod_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_long_prod_to_all: Passed\n"); + pass++; + } + + if(ok[3]==1) + printf("Reduction operation shmem_float_prod_to_all: Failed\n"); + else { + if (float_rounding_err) { + Vprintf("Reduction operation shmem_float_prod_to_all: skipped due to float rounding error\n"); + } + else { + Vprintf("Reduction operation shmem_float_prod_to_all: Passed\n"); + } + pass++; + } + + if(ok[4]==1) + printf("Reduction operation shmem_double_prod_to_all: Failed\n"); + else { + if (double_rounding_err) { + Vprintf("Reduction operation shmem_double_prod_to_all: skipped due to double rounding error\n"); + } + else { + Vprintf("Reduction operation shmem_double_prod_to_all: Passed\n"); + } + pass++; + } + + if(ok[5]==1) + printf("Reduction operation shmem_longdouble_prod_to_all: Failed\n"); + else { + if (double_rounding_err) { + Vprintf("Reduction operation shmem_longdouble_prod_to_all: skipped due to long double rounding error\n"); + } + else { + Vprintf("Reduction operation shmem_longdouble_prod_to_all: Passed\n"); + } + pass++; + } + + if(ok[6]==1) + printf("Reduction operation shmem_longlong_prod_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_longlong_prod_to_all: Passed\n"); + pass++; + } + Vprintf("\n"); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 7 ? 1 : 0); +} + + +int +or_to_all(int me, int npes) +{ + int i, pass=0; + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src6[i] = (me + 1)%4; + dst0[i] = -9; + dst1[i] = -9; + dst2[i] = -9; + dst6[i] = -9; + } + + shmem_barrier_all(); + + shmem_short_or_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); + shmem_int_or_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); + shmem_long_or_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); + shmem_longlong_or_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); + + if (me==0) { + for (i = 0; i < N; i++) { + int expected = (npes == 1) ? 1 : 3; + + if(dst0[i] != expected) ok[0] = 1; + if(dst1[i] != expected) ok[1] = 1; + if(dst2[i] != expected) ok[2] = 1; + if(dst6[i] != expected) ok[6] = 1; + } + + if(ok[0]==1) + printf("Reduction operation shmem_short_or_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_short_or_to_all: Passed\n"); + pass++; + } + + if(ok[1]==1) + printf("Reduction operation shmem_int_or_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_int_or_to_all: Passed\n"); + pass++; + } + + if(ok[2]==1) + printf("Reduction operation shmem_long_or_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_long_or_to_all: Passed\n"); + pass++; + } + + if(ok[6]==1) + printf("Reduction operation shmem_longlong_or_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_longlong_or_to_all: Passed\n"); + pass++; + } + Vprintf("\n"); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 4 ? 1 : 0); +} + + +int +xor_to_all(int me, int npes) +{ + int i, pass=0; + int expected_result = ((int)(npes/2) % 2); + + memset(ok,0,sizeof(ok)); + + for (i = 0; i < N; i++) { + src0[i] = src1[i] = src2[i] = src6[i] = me%2; + dst0[i] = -9; + dst1[i] = -9; + dst2[i] = -9; + dst6[i] = -9; + } + + shmem_barrier_all(); + + shmem_short_xor_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); + shmem_int_xor_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); + shmem_long_xor_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); + shmem_longlong_xor_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); + + if (me==0) { + for (i = 0; i < N; i++) { + if(dst0[i] != expected_result) ok[0] = 1; + if(dst1[i] != expected_result) ok[1] = 1; + if(dst2[i] != expected_result) ok[2] = 1; + if(dst6[i] != expected_result) ok[6] = 1; + } + + if(ok[0]==1) + printf("Reduction operation shmem_short_xor_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_short_xor_to_all: Passed\n"); + pass++; + } + + if(ok[1]==1) + printf("Reduction operation shmem_int_xor_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_int_xor_to_all: Passed\n"); + pass++; + } + + if(ok[2]==1) + printf("Reduction operation shmem_long_xor_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_long_xor_to_all: Passed\n"); + pass++; + } + + if(ok[6]==1) + printf("Reduction operation shmem_longlong_xor_to_all: Failed\n"); + else { + Vprintf("Reduction operation shmem_longlong_xor_to_all: Passed\n"); + pass++; + } + + Vprintf("\n"); + } + if (Serialize) shmem_barrier_all(); + + return (pass == 4 ? 1 : 0); +} + + +int +main(int argc, char* argv[]) +{ + int c, i, mype, num_pes, tests, passed; + char *pgm; + + shmem_init(); + mype = shmem_my_pe(); + num_pes = shmem_n_pes(); + + if ((pgm=strrchr(argv[0],'/'))) { + pgm++; + } else { + pgm = argv[0]; + } + + while((c=getopt(argc,argv,"ampsSoxhv")) != -1) { + switch(c) { + case 'a': + And++; // do not run and_to_all + break; + case 'm': + Min++; // do not run min_to_all + break; + case 'o': + Or++; // do not run or_to_all + break; + case 'p': + Prod++; // do not run prod_to_all + break; + case 's': + Sum++; // do not run sum_to_all + break; + case 'x': + Xor++; // do not run xor_to_all + break; + case 'S': + Serialize++; + break; + case 'v': + Verbose++; + break; + case 'h': + default: + Rfprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); + shmem_finalize(); + return 1; + } + } + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { + pSync[i] = SHMEM_SYNC_VALUE; + pSync1[i] = SHMEM_SYNC_VALUE; + } + + tests = passed = 0; + + shmem_barrier_all(); + + passed += max_to_all(mype, num_pes); + tests++; + + if (!Min) { + passed += min_to_all(mype, num_pes); + tests++; + } + + if (!Sum) { + passed += sum_to_all(mype, num_pes); + tests++; + } + + if (!And) { + passed += and_to_all(mype, num_pes); + tests++; + } + + if (!Prod) { + passed += prod_to_all(mype, num_pes); + tests++; + } + + if (!Or) { + passed += or_to_all(mype, num_pes); + tests++; + } + + if (!Xor) { + passed += xor_to_all(mype, num_pes); + tests++; + } + + c = 0; + if (mype == 0) { + if ((Verbose || tests != passed)) + fprintf(stderr,"to_all[%d] %d of %d tests passed\n", + mype,passed,tests); + c = (tests == passed ? 0 : 1); + } + + shmem_finalize(); + + return c; +} diff --git a/test/unit/waituntil.c b/test/unit/waituntil.c new file mode 100644 index 0000000..0188d68 --- /dev/null +++ b/test/unit/waituntil.c @@ -0,0 +1,139 @@ +/* + * Copyright 2011 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government + * retains certain rights in this software. + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * exercise shmem_short_wait() and shmem_short_wait_until() functions. + */ + +#include +#include +#include +#include + +#include + +#define DataType long + +#define SHM_PUT shmem_long_put +#define SHM_PUTP shmem_long_p +#define SHM_GETP shmem_long_g + +#define SHM_WAITU shmem_long_wait_until +#define PF "%ld" + +#define Vprintf if (Verbose) printf + +int +main(int argc, char* argv[]) +{ + DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static DataType target[10]; + static DataType pong=666; + DataType *t2=NULL; + int me, num_pes, pe, Verbose=0; + + if (argc > 1 && (strcmp(argv[1],"-v") == 0)) { + Verbose++; + } + + shmem_init(); + me = shmem_my_pe(); + num_pes = shmem_n_pes(); + + if (num_pes == 1) { + printf("%s: Requires number of PEs > 1\n", argv[0]); + shmem_finalize(); + return 0; + } + + t2 = shmem_malloc(10*sizeof(DataType)); + if (!t2) { + if (me==0) printf("shmem_malloc() failed?\n"); + shmem_global_exit(1); + } + t2[9] = target[9] = 0xFF; + + shmem_barrier_all(); + + if (me == 0) { + memset(target, 0, sizeof(target)); + for(pe=1; pe < num_pes; pe++) + SHM_PUT(target, target, 10, pe); + + for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */ + SHM_PUT(target, source, 10, pe); + + SHM_WAITU( &pong, SHMEM_CMP_GT, 666 ); + Vprintf("PE[%d] pong now "PF"\n",me,pong); + + for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */ + SHM_PUTP(&t2[9], 0xDD, pe); + } + else { + /* wait for 10th element write of 'target' */ + SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF ); + Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]); + + SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 ); + Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]); + + if (me == 1) { + if (Verbose) { + DataType tmp = SHM_GETP( &pong, 0); + printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp); + } + SHM_PUTP( &pong, 999, 0); + } + + SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF ); + } + + //shmem_barrier_all(); /* sync sender and receiver */ + + if (me != 0) { + if (memcmp(source, target, sizeof(DataType) * 10) != 0) { + int i; + fprintf(stderr,"[%d] Src & Target mismatch?\n",me); + for (i = 0 ; i < 10 ; ++i) { + printf(PF","PF" ", source[i], target[i]); + } + printf("\n"); + shmem_global_exit(1); + } + } + shmem_free(t2); + + if (Verbose) + fprintf(stderr,"[%d] exit\n",shmem_my_pe()); + + shmem_finalize(); + return 0; +} diff --git a/test/unit/web.c b/test/unit/web.c new file mode 100644 index 0000000..e5e8642 --- /dev/null +++ b/test/unit/web.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * This software is available to you under the BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#define CHECK_ASSERT(cond) do { \ + if (!(cond)) { \ + fprintf(stderr, "[%d] Assertion failed: %s\n", shmem_my_pe(), #cond); \ + shmem_global_exit(10); \ + } \ + } while(0) + +#define CHECK_ERROR(num,...) do { \ + if((num)) { \ + fprintf(stderr,__VA_ARGS__); \ + return (num); \ + } \ + } while(0) + +#define NUM_UNSAFE_CTX 10 +#define NUM_CONTEXTS 20 + +typedef struct { + int idx; + unsigned seed; + long received; + long can_send; + long rec_val; + long send_val; + int stride; +} channel; + +shmem_ctx_t contexts[NUM_CONTEXTS]; +int ctx_is_safe[NUM_CONTEXTS]; +pthread_mutex_t ctx_locks[NUM_CONTEXTS]; +channel* channels; +int me,n_pes,n_threads; +int max_steps; + +// naive LCM of 1..numPes +static int get_max_steps(int numPes) { + int i; + int ret = 0; + int done = 0; + while(!done) { + ++ret; + done = 1; + for(i = 1; i < numPes; ++i) { + if((ret*i)%numPes != 0) { + done = 0; + break; + } + } + } + + return ret; +} + +static void* runchannel(void* chan) { + channel *c = chan; + int nextpe = (me + c->stride)%n_pes; + int prevpe = (me + n_pes - c->stride)%n_pes; + int i; + + /* printf("[%d:%d] prevpe = %d, nextpe = %d\n",me,c->idx,prevpe, nextpe); */ + + for(i = 0; i < max_steps; ++i) { + unsigned ctxind = ((unsigned)rand_r(&c->seed))%NUM_CONTEXTS; + /* printf("[%d:%d] %d\n",me,c->idx,ctxind); */ + int safe = ctx_is_safe[ctxind]; + if(!safe) { + pthread_mutex_lock(&ctx_locks[ctxind]); + } + shmem_ctx_long_p(contexts[ctxind],&c->can_send,1L,prevpe); + if(!safe) { + pthread_mutex_unlock(&ctx_locks[ctxind]); + } + + shmem_wait_until(&c->can_send,SHMEM_CMP_NE,0L); + c->can_send = 0; + + ctxind = ((unsigned)rand_r(&c->seed))%NUM_CONTEXTS; + safe = ctx_is_safe[ctxind]; + + if(!safe) { + pthread_mutex_lock(&ctx_locks[ctxind]); + } + shmem_ctx_long_p(contexts[ctxind],&c->rec_val,c->send_val,nextpe); + shmem_ctx_fence(contexts[ctxind]); + shmem_ctx_long_p(contexts[ctxind],&c->received,1L,nextpe); + if(!safe) { + pthread_mutex_unlock(&ctx_locks[ctxind]); + } + + shmem_wait_until(&c->received,SHMEM_CMP_NE,0L); + c->received = 0; + c->send_val = c->rec_val; + } + + /* printf("[%d:%d] done\n",me,c->idx); */ + + return 0; +} + +int main(int argc, char* argv[]) { + int i; + for(i = 0; i < NUM_CONTEXTS; ++i) { + int err; + err = pthread_mutex_init(&ctx_locks[i],NULL); + if (err) { + perror("Mutex initialization failed"); + return 1; + } + } + + int err, tl, ret; + int errors = 0; + + ret = shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + + if (tl != SHMEM_THREAD_MULTIPLE || ret != 0) { + printf("Init failed (requested thread level %d, got %d, ret %d)\n", + SHMEM_THREAD_MULTIPLE, tl, ret); + + if (ret == 0) { + shmem_global_exit(1); + } else { + return ret; + } + } + + n_pes = shmem_n_pes(); + me = shmem_my_pe(); + + if (n_pes < 2) { + if (me == 0) + printf("web: Requires 2 or more PEs\n"); + shmem_finalize(); + return 0; + } + + srand(1+me); + + n_threads = 4 * n_pes; + channels = shmem_malloc(n_threads*sizeof(channel)); + memset(channels,0,n_threads*sizeof(channel)); + + long* vals = malloc(sizeof(long)*n_threads); + for(i = 0; i < n_threads; ++i) { + channels[i].idx = i; + channels[i].received = 0; + channels[i].can_send = 0; + channels[i].rec_val = 0; + channels[i].send_val = vals[i] = rand(); + + channels[i].stride = (i)%(n_pes-1) + 1; + channels[i].seed = 1 + rand() + i; + } + + max_steps = get_max_steps(n_pes); + + for(i = 0; i < NUM_CONTEXTS; ++i) { + ctx_is_safe[i] = (i >= NUM_UNSAFE_CTX); + err = shmem_ctx_create(ctx_is_safe[i] ? 0 : SHMEM_CTX_SERIALIZED,&contexts[i]); + CHECK_ERROR(err,"Failed to create ctx %d\n",i); + } + + shmem_barrier_all(); + + pthread_t* threads = calloc(sizeof(pthread_t),n_threads); + for(i = 0; i < n_threads; ++i) { + int err; + err = pthread_create(&threads[i],NULL,runchannel,&channels[i]); + CHECK_ASSERT(0 == err); + } + + for (i = 0; i < n_threads; i++) { + int err; + err = pthread_join(threads[i], NULL); + CHECK_ASSERT(0 == err); + } + free(threads); + + for(i = 0; i < NUM_CONTEXTS; ++i) { + shmem_ctx_quiet(contexts[i]); + shmem_ctx_destroy(contexts[i]); + int err; + err = pthread_mutex_destroy(&ctx_locks[i]); + CHECK_ASSERT(!err); + } + + for(i = 0; i < n_threads; ++i) { + if(vals[i] != channels[i].rec_val) { + ++errors; + fprintf(stderr, + "PE %d, Thread %d (stride %d): got %ld, expected %ld\n", + me,i,channels[i].stride,channels[i].rec_val,vals[i]); + } + } + shmem_free(channels); + free(vals); + + shmem_finalize(); + + return errors; +} + diff --git a/test/unit/zero_comm.c b/test/unit/zero_comm.c new file mode 100644 index 0000000..0374f39 --- /dev/null +++ b/test/unit/zero_comm.c @@ -0,0 +1,18 @@ +#include +#include + +int main(void) { + shmem_init(); + + shmem_putmem(NULL, NULL, 0, 0); + shmem_getmem(NULL, NULL, 0, 0); + + shmem_putmem_nbi(NULL, NULL, 0, 0); + shmem_getmem_nbi(NULL, NULL, 0, 0); + + shmem_int_iget(NULL, NULL, 1, 1, 0, 0); + shmem_int_iput(NULL, NULL, 1, 1, 0, 0); + + shmem_finalize(); + return 0; +}