-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove asserts that create a segfault in debug mode #121
base: master
Are you sure you want to change the base?
Changes from all commits
815194f
4ddaaaa
8c167c1
a03d446
c75faf3
247b036
742ed10
0df5514
976c8d3
9e1a43e
7c17876
1c4b57e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,3 +88,14 @@ alltoallv-*.log | |
|
||
# VScode specific stuff | ||
.vscode/ | ||
|
||
|
||
examples/alltoallv_dt_c | ||
src/alltoall/examples/alltoall | ||
|
||
# JL stuff including results files | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is either generic or should not be in the repo |
||
alltoallv_*.err | ||
alltoallv_*.out | ||
src/alltoall/examples/results/* | ||
src/alltoall/examples/results_alltoallv/* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
#!/bin/bash | ||
|
||
cd /global/home/users/cyrusl/placement/expt0066/alltoall_profiling | ||
cd /global/home/users/cyrusl/placement/expt0070/alltoall_profiling | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should that file ultimately be in the repo? |
||
|
||
module purge | ||
spack unload --all | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ THIS_SCRIPT_DIR=$(dirname "$THIS_SCRIPT") | |
|
||
# environment and modules and some paths etc. for the job | ||
# /global/home/users/cyrusl/placement/expt0060/OSU/osu-micro-benchmarks-5.6.3/install/libexec/osu-micro-benchmarks/mpi/collective | ||
export PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0066 | ||
export PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0070 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should that file ultimately be in the repo? |
||
# TODO - set modulefiles!!? | ||
module purge | ||
HNAME=$(hostname) | ||
|
@@ -121,13 +121,13 @@ EOF | |
|
||
# set variables for the mpirun executable - repeat this section if more than one | ||
# full path? (which below help ldd find executable) | ||
export EXECUTABLE1=/global/home/users/cyrusl/placement/expt0066/alltoall_profiling/examples/alltoall | ||
export EXECUTABLE1=/global/home/users/cyrusl/placement/expt0070/alltoall_profiling/examples/alltoall | ||
export EXECUTABLE1_PARAMS="" | ||
|
||
# following example at /global/home/users/cyrusl/placement/expt0060/geoffs-profiler/build-570ff3aff83fa208f3d1e2fcbdb31d9ec7e93b6c/README.md | ||
# TODO put in the results dir | ||
|
||
ALLTOALL_LIB_ROOT=/global/home/users/cyrusl/placement/expt0066/alltoall_profiling/src/alltoall | ||
ALLTOALL_LIB_ROOT=/global/home/users/cyrusl/placement/expt0070/alltoall_profiling/src/alltoall | ||
declare -a COUNTSFLAGS | ||
COUNTSFLAGS[0]="$ALLTOALL_LIB_ROOT/liballtoall_counts.so" | ||
COUNTSFLAGS[1]="$ALLTOALL_LIB_ROOT/liballtoall_counts_unequal.so" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
module purge | ||
module load gcc/8.3.1 hpcx/2.7.0 | ||
|
||
PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0066/alltoall_profiling | ||
PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0070/alltoall_profiling | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that file ultimately really need to be in the repo? |
||
cd $PROJECT_ROOT | ||
make clean | ||
make | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
#ifndef _COLLECTIVE_PROFILER_ALLTOALL_CONFIG_H | ||
#define _COLLECTIVE_PROFILER_ALLTOALL_CONFIG_H | ||
|
||
#define DEBUG (0) | ||
#define DEBUG (1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we activate the debug mode by default? |
||
|
||
// A few environment variables to control a few things at runtime | ||
#define NUM_CALL_START_PROFILING_ENVVAR "A2A_NUM_CALL_START_PROFILING" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,8 +91,8 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int | |
int rank, count_num; | ||
int *_counts; | ||
|
||
DEBUG_ALLTOALL_PROFILING("Comparing data with existing data...\n"); | ||
DEBUG_ALLTOALL_PROFILING("-> Comparing send counts...\n"); | ||
DEBUG_ALLTOALL_PROFILING("Comparing data with existing data...\n", NULL); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another option (which i usually do) is create a |
||
DEBUG_ALLTOALL_PROFILING("-> Comparing send counts...\n", NULL); | ||
// First compare the send counts | ||
// #if ASSUME_COUNTS_EQUAL_ALL_RANKS !=1 | ||
for (rank = 0; rank < size; rank++) | ||
|
@@ -102,7 +102,7 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int | |
count_num = 0; // conversion from alltoallv: no need to loop since only one value for the rank | ||
if (_counts[count_num] != send_counts[num]) | ||
{ | ||
DEBUG_ALLTOALL_PROFILING("Data differs\n"); | ||
DEBUG_ALLTOALL_PROFILING("Data differs\n", NULL); | ||
return false; | ||
} | ||
} | ||
|
@@ -117,10 +117,10 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int | |
// return false; | ||
// } | ||
// #endif | ||
DEBUG_ALLTOALL_PROFILING("-> Send counts are the same\n"); | ||
DEBUG_ALLTOALL_PROFILING("-> Send counts are the same\n", NULL); | ||
|
||
// Then the receive counts | ||
DEBUG_ALLTOALL_PROFILING("-> Comparing recv counts...\n"); | ||
DEBUG_ALLTOALL_PROFILING("-> Comparing recv counts...\n", NULL); | ||
num = 0; | ||
// #if ASSUME_COUNTS_EQUAL_ALL_RANKS !=1 | ||
for (rank = 0; rank < size; rank++) | ||
|
@@ -129,7 +129,7 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int | |
count_num = 0; // conversion from alltoallv: no need to loop since only one value for the rank | ||
if (_counts[count_num] != recv_counts[num]) | ||
{ | ||
DEBUG_ALLTOALL_PROFILING("Data differs\n"); | ||
DEBUG_ALLTOALL_PROFILING("Data differs\n", NULL); | ||
return false; | ||
} | ||
} | ||
|
@@ -144,7 +144,7 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int | |
// } | ||
// #endif | ||
|
||
DEBUG_ALLTOALL_PROFILING("Data is the same\n"); | ||
DEBUG_ALLTOALL_PROFILING("Data is the same\n", NULL); | ||
return true; | ||
} | ||
|
||
|
@@ -182,7 +182,7 @@ static int extract_patterns_from_counts(int *send_counts, int *recv_counts, int | |
int send_patterns[size + 1]; | ||
int recv_patterns[size + 1]; | ||
|
||
DEBUG_ALLTOALL_PROFILING("Extracting patterns\n"); | ||
DEBUG_ALLTOALL_PROFILING("Extracting patterns\n", NULL); | ||
|
||
for (i = 0; i < size; i++) | ||
{ | ||
|
@@ -225,7 +225,7 @@ static int extract_patterns_from_counts(int *send_counts, int *recv_counts, int | |
} | ||
|
||
// From here we know who many ranks send to how many ranks and how many ranks receive from how many rank | ||
DEBUG_ALLTOALL_PROFILING("Handling send patterns\n"); | ||
DEBUG_ALLTOALL_PROFILING("Handling send patterns\n", NULL); | ||
for (i = 0; i < size; i++) | ||
{ | ||
if (send_patterns[i] != 0) | ||
|
@@ -238,7 +238,7 @@ static int extract_patterns_from_counts(int *send_counts, int *recv_counts, int | |
#endif // COMMSIZE_BASED_PATTERNS | ||
} | ||
} | ||
DEBUG_ALLTOALL_PROFILING("Handling receive patterns\n"); | ||
DEBUG_ALLTOALL_PROFILING("Handling receive patterns\n", NULL); | ||
for (i = 0; i < size; i++) | ||
{ | ||
if (recv_patterns[i] != 0) | ||
|
@@ -484,24 +484,23 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz | |
struct avSRCountNode *newNode = NULL; | ||
struct avSRCountNode *temp; | ||
|
||
DEBUG_ALLTOALL_PROFILING("Insert data for a new alltoall call...\n"); | ||
DEBUG_ALLTOALL_PROFILING("Insert data for a new alltoall call...\n", NULL); | ||
|
||
assert(sbuf); | ||
assert(rbuf); | ||
assert(logger); | ||
#if DEBUG | ||
assert(logger->f); | ||
#endif | ||
|
||
temp = head; | ||
while (temp != NULL) | ||
{ | ||
if (temp->size != size || temp->recvtype_size != recvtype_size || temp->sendtype_size != sendtype_size || !same_call_counters(temp, sbuf, rbuf, size)) | ||
{ | ||
// New data | ||
#if DEBUG | ||
fprintf(logger->f, "new data: %d\n", size); | ||
#endif | ||
|
||
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data) | ||
// #if DEBUG | ||
// fprintf(logger->f, "new data: %d\n", size); | ||
// #endif | ||
if (temp->next != NULL) | ||
temp = temp->next; | ||
else | ||
|
@@ -510,7 +509,7 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz | |
else | ||
{ | ||
// Data exist, adding call info to it | ||
DEBUG_ALLTOALL_PROFILING("Data already exists, updating metadata...\n"); | ||
DEBUG_ALLTOALL_PROFILING("Data already exists, updating metadata...\n", NULL); | ||
assert(temp->list_calls); | ||
if (temp->count >= temp->max_calls) | ||
{ | ||
|
@@ -520,17 +519,19 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz | |
} | ||
temp->list_calls[temp->count] = avCalls; // Note: count starts at 1, not 0 | ||
temp->count++; | ||
#if DEBUG | ||
fprintf(logger->f, "old data: %d --> %d --- %d\n", size, temp->size, temp->count); | ||
#endif | ||
DEBUG_ALLTOALL_PROFILING("Metadata successfully updated\n"); | ||
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data) | ||
// #if DEBUG | ||
// fprintf(logger->f, "old data: %d --> %d --- %d\n", size, temp->size, temp->count); | ||
// #endif | ||
DEBUG_ALLTOALL_PROFILING("Metadata successfully updated\n", NULL); | ||
return 0; | ||
} | ||
} | ||
|
||
#if DEBUG | ||
fprintf(logger->f, "no data: %d \n", size); | ||
#endif | ||
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data) | ||
// #if DEBUG | ||
// fprintf(logger->f, "no data: %d \n", size); | ||
// #endif | ||
newNode = (struct avSRCountNode *)malloc(sizeof(avSRCountNode_t)); // TODO Anaylse data structure written from here onwards | ||
assert(newNode); | ||
|
||
|
@@ -552,7 +553,7 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz | |
num = 0; | ||
int _rank; | ||
|
||
DEBUG_ALLTOALL_PROFILING("handling send counts...\n"); | ||
DEBUG_ALLTOALL_PROFILING("handling send counts...\n", NULL); | ||
for (_rank = 0; _rank < size; _rank++) | ||
{ | ||
//#if ASSUME_COUNTS_EQUAL_ALL_RANKS != 1 | ||
|
@@ -567,7 +568,7 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz | |
num++; // so num always = _rank - but why? | ||
} | ||
|
||
DEBUG_ALLTOALL_PROFILING("handling recv counts...\n"); | ||
DEBUG_ALLTOALL_PROFILING("handling recv counts...\n", NULL); | ||
num = 0; | ||
for (_rank = 0; _rank < size; _rank++) | ||
{ | ||
|
@@ -587,9 +588,10 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz | |
newNode->recvtype_size = recvtype_size; | ||
newNode->list_calls[0] = avCalls; | ||
newNode->next = NULL; | ||
#if DEBUG | ||
fprintf(logger->f, "new entry: %d --> %d --- %d\n", size, newNode->size, newNode->count); | ||
#endif | ||
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really do not like that type of comments: it is either a todo and then we need to open an issue or at least clearly say so (i myself do not always open issue for all todos) or dead code in which case it should be deleted. |
||
// #if DEBUG | ||
// fprintf(logger->f, "new entry: %d --> %d --- %d\n", size, newNode->size, newNode->count); | ||
// #endif | ||
|
||
DEBUG_ALLTOALL_PROFILING("Data for the new alltoall call has %d unique series for send counts and %d for recv counts\n", newNode->recv_data_size, newNode->send_data_size); | ||
|
||
|
@@ -659,7 +661,7 @@ static void save_call_patterns(int uniqueID) | |
char *filename = NULL; | ||
int size; | ||
|
||
DEBUG_ALLTOALL_PROFILING("Saving call patterns...\n"); | ||
DEBUG_ALLTOALL_PROFILING("Saving call patterns...\n", NULL); | ||
|
||
if (getenv(OUTPUT_DIR_ENVVAR)) | ||
{ | ||
|
@@ -692,7 +694,7 @@ static void save_patterns(int world_rank) | |
char *rpatterns_filename = NULL; | ||
int size; | ||
|
||
DEBUG_ALLTOALL_PROFILING("Saving patterns...\n"); | ||
DEBUG_ALLTOALL_PROFILING("Saving patterns...\n", NULL); | ||
|
||
if (getenv(OUTPUT_DIR_ENVVAR)) | ||
{ | ||
|
@@ -1207,7 +1209,6 @@ int _mpi_alltoall(const void *sendbuf, const int sendcount, MPI_Datatype sendtyp | |
{ | ||
int comm_size; | ||
int i, j; | ||
int localrank; | ||
int ret; | ||
bool need_profile = true; | ||
int my_comm_rank; | ||
|
@@ -1328,9 +1329,11 @@ int _mpi_alltoall(const void *sendbuf, const int sendcount, MPI_Datatype sendtyp | |
|
||
if (my_comm_rank == 0) | ||
{ | ||
#if DEBUG | ||
fprintf(logger->f, "Root: global %d - %d local %d - %d\n", world_size, myrank, size, localrank); | ||
#endif | ||
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data) | ||
// #if DEBUG | ||
// assert(logger->f); | ||
// fprintf(logger->f, "Root: global %d - %d local %d - %d\n", world_size, world_rank, comm_size, my_comm_rank); | ||
// #endif | ||
|
||
#if ((ENABLE_RAW_DATA || ENABLE_PER_RANK_STATS || ENABLE_VALIDATION) && ENABLE_COMPACT_FORMAT) | ||
int s_dt_size, r_dt_size; | ||
|
@@ -1446,4 +1449,4 @@ void calledLast() | |
{ | ||
_commit_data(); | ||
_finalize_profiling(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
#ifndef _COLLECTIVE_PROFILER_ALLTOALLV_CONFIG_H | ||
#define _COLLECTIVE_PROFILER_ALLTOALLV_CONFIG_H | ||
|
||
#define DEBUG (0) | ||
#define DEBUG (1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is debug active by default? |
||
|
||
// A few environment variables to control a few things at runtime | ||
#define NUM_CALL_START_PROFILING_ENVVAR "A2A_NUM_CALL_START_PROFILING" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should not have a example directory in
src/alltoall
.