Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove asserts that create a segfault in debug mode #121

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,14 @@ alltoallv-*.log

# VScode specific stuff
.vscode/


examples/alltoallv_dt_c
src/alltoall/examples/alltoall
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should not have a example directory in src/alltoall.


# JL stuff including results files
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is either generic or should not be in the repo

alltoallv_*.err
alltoallv_*.out
src/alltoall/examples/results/*
src/alltoall/examples/results_alltoallv/*

2 changes: 1 addition & 1 deletion build-scripts/build-hpcac.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

cd /global/home/users/cyrusl/placement/expt0066/alltoall_profiling
cd /global/home/users/cyrusl/placement/expt0070/alltoall_profiling
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should that file ultimately be in the repo?


module purge
spack unload --all
Expand Down
8 changes: 4 additions & 4 deletions examples/alltoall.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ bool is_rank_in_rankset(int rank, rank_set_t* rank_set){
void create_communicators(int world_size, rank_set_t* rank_sets, int rank_sets_count){
DEBUG_ALLTOALL_PROFILING("params for create_communicators: worldsize = %i, ranks_sets_count = %i\n", world_size, rank_sets_count);
for (int k; k<8; k++) DEBUG_ALLTOALL_PROFILING("%i ", rank_sets[0].ranks[k]);
DEBUG_ALLTOALL_PROFILING("\n)");
DEBUG_ALLTOALL_PROFILING("\n)", NULL);

//MPI_Comm** communicators = (MPI_Comm**) malloc(sizeof(MPI_Comm*) * world_size);
MPI_Group world_group;
Expand All @@ -84,10 +84,10 @@ void create_communicators(int world_size, rank_set_t* rank_sets, int rank_sets_c
DEBUG_ALLTOALL_PROFILING("World group size = %i\n", group_size);

for (int rank_set_idx=0; rank_set_idx< rank_sets_count; rank_set_idx++){
DEBUG_ALLTOALL_PROFILING("IN LOOP\n");
DEBUG_ALLTOALL_PROFILING("IN LOOP\n", NULL);
rank_set_t* rank_set = &rank_sets[rank_set_idx];
for (int k; k<8; k++) DEBUG_ALLTOALL_PROFILING("* %i ", rank_set->ranks[k]);
DEBUG_ALLTOALL_PROFILING("\n");
DEBUG_ALLTOALL_PROFILING("\n", NULL);
// signature: MPI_Group_incl( MPI_Group group , int n , const int ranks[] , MPI_Group* newgroup);
DEBUG_ALLTOALL_PROFILING("calling MPI_Group_incl rank_set_idx=%i ...\n", rank_set_idx);
// signature: int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup)
Expand Down Expand Up @@ -148,7 +148,7 @@ void* create_sendbuf(alltoall_test_node_params_t* node_params){
DEBUG_ALLTOALL_PROFILING("i=%i ", i);
b[i] = i / node_params->sendcount;
}
DEBUG_ALLTOALL_PROFILING("\n");
DEBUG_ALLTOALL_PROFILING("\n", NULL);
#if DEBUG == 1
for (int j=0; j<64; j++) DEBUG_ALLTOALL_PROFILING("~~ %i ", b[j]);
#endif
Expand Down
6 changes: 3 additions & 3 deletions examples/batch-run-alltoall-hpcx-2-7-0-counts-variants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ THIS_SCRIPT_DIR=$(dirname "$THIS_SCRIPT")

# environment and modules and some paths etc. for the job
# /global/home/users/cyrusl/placement/expt0060/OSU/osu-micro-benchmarks-5.6.3/install/libexec/osu-micro-benchmarks/mpi/collective
export PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0066
export PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0070
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should that file ultimately be in the repo?

# TODO - set modulefiles!!?
module purge
HNAME=$(hostname)
Expand Down Expand Up @@ -121,13 +121,13 @@ EOF

# set variables for the mpirun executable - repeat this section if more than one
# full path? (which below help ldd find executable)
export EXECUTABLE1=/global/home/users/cyrusl/placement/expt0066/alltoall_profiling/examples/alltoall
export EXECUTABLE1=/global/home/users/cyrusl/placement/expt0070/alltoall_profiling/examples/alltoall
export EXECUTABLE1_PARAMS=""

# following example at /global/home/users/cyrusl/placement/expt0060/geoffs-profiler/build-570ff3aff83fa208f3d1e2fcbdb31d9ec7e93b6c/README.md
# TODO put in the results dir

ALLTOALL_LIB_ROOT=/global/home/users/cyrusl/placement/expt0066/alltoall_profiling/src/alltoall
ALLTOALL_LIB_ROOT=/global/home/users/cyrusl/placement/expt0070/alltoall_profiling/src/alltoall
declare -a COUNTSFLAGS
COUNTSFLAGS[0]="$ALLTOALL_LIB_ROOT/liballtoall_counts.so"
COUNTSFLAGS[1]="$ALLTOALL_LIB_ROOT/liballtoall_counts_unequal.so"
Expand Down
2 changes: 1 addition & 1 deletion src/alltoall/build-scripts/build-at-hpcac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
module purge
module load gcc/8.3.1 hpcx/2.7.0

PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0066/alltoall_profiling
PROJECT_ROOT=/global/home/users/cyrusl/placement/expt0070/alltoall_profiling
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that file ultimately really need to be in the repo?

cd $PROJECT_ROOT
make clean
make
Expand Down
2 changes: 1 addition & 1 deletion src/alltoall/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef _COLLECTIVE_PROFILER_ALLTOALL_CONFIG_H
#define _COLLECTIVE_PROFILER_ALLTOALL_CONFIG_H

#define DEBUG (0)
#define DEBUG (1)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we activate the debug mode by default?


// A few environment variables to control a few things at runtime
#define NUM_CALL_START_PROFILING_ENVVAR "A2A_NUM_CALL_START_PROFILING"
Expand Down
77 changes: 40 additions & 37 deletions src/alltoall/mpi_alltoall.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int
int rank, count_num;
int *_counts;

DEBUG_ALLTOALL_PROFILING("Comparing data with existing data...\n");
DEBUG_ALLTOALL_PROFILING("-> Comparing send counts...\n");
DEBUG_ALLTOALL_PROFILING("Comparing data with existing data...\n", NULL);
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option (which i usually do) is create a DEBUG_ALLTOALL_PROFILING_NOARGS macro. I personally find it to keep the code cleaner but it is clearly a personal preference.

DEBUG_ALLTOALL_PROFILING("-> Comparing send counts...\n", NULL);
// First compare the send counts
// #if ASSUME_COUNTS_EQUAL_ALL_RANKS !=1
for (rank = 0; rank < size; rank++)
Expand All @@ -102,7 +102,7 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int
count_num = 0; // conversion from alltoallv: no need to loop since only one value for the rank
if (_counts[count_num] != send_counts[num])
{
DEBUG_ALLTOALL_PROFILING("Data differs\n");
DEBUG_ALLTOALL_PROFILING("Data differs\n", NULL);
return false;
}
}
Expand All @@ -117,10 +117,10 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int
// return false;
// }
// #endif
DEBUG_ALLTOALL_PROFILING("-> Send counts are the same\n");
DEBUG_ALLTOALL_PROFILING("-> Send counts are the same\n", NULL);

// Then the receive counts
DEBUG_ALLTOALL_PROFILING("-> Comparing recv counts...\n");
DEBUG_ALLTOALL_PROFILING("-> Comparing recv counts...\n", NULL);
num = 0;
// #if ASSUME_COUNTS_EQUAL_ALL_RANKS !=1
for (rank = 0; rank < size; rank++)
Expand All @@ -129,7 +129,7 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int
count_num = 0; // conversion from alltoallv: no need to loop since only one value for the rank
if (_counts[count_num] != recv_counts[num])
{
DEBUG_ALLTOALL_PROFILING("Data differs\n");
DEBUG_ALLTOALL_PROFILING("Data differs\n", NULL);
return false;
}
}
Expand All @@ -144,7 +144,7 @@ static bool same_call_counters(avSRCountNode_t *call_data, int *send_counts, int
// }
// #endif

DEBUG_ALLTOALL_PROFILING("Data is the same\n");
DEBUG_ALLTOALL_PROFILING("Data is the same\n", NULL);
return true;
}

Expand Down Expand Up @@ -182,7 +182,7 @@ static int extract_patterns_from_counts(int *send_counts, int *recv_counts, int
int send_patterns[size + 1];
int recv_patterns[size + 1];

DEBUG_ALLTOALL_PROFILING("Extracting patterns\n");
DEBUG_ALLTOALL_PROFILING("Extracting patterns\n", NULL);

for (i = 0; i < size; i++)
{
Expand Down Expand Up @@ -225,7 +225,7 @@ static int extract_patterns_from_counts(int *send_counts, int *recv_counts, int
}

// From here we know who many ranks send to how many ranks and how many ranks receive from how many rank
DEBUG_ALLTOALL_PROFILING("Handling send patterns\n");
DEBUG_ALLTOALL_PROFILING("Handling send patterns\n", NULL);
for (i = 0; i < size; i++)
{
if (send_patterns[i] != 0)
Expand All @@ -238,7 +238,7 @@ static int extract_patterns_from_counts(int *send_counts, int *recv_counts, int
#endif // COMMSIZE_BASED_PATTERNS
}
}
DEBUG_ALLTOALL_PROFILING("Handling receive patterns\n");
DEBUG_ALLTOALL_PROFILING("Handling receive patterns\n", NULL);
for (i = 0; i < size; i++)
{
if (recv_patterns[i] != 0)
Expand Down Expand Up @@ -484,24 +484,23 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz
struct avSRCountNode *newNode = NULL;
struct avSRCountNode *temp;

DEBUG_ALLTOALL_PROFILING("Insert data for a new alltoall call...\n");
DEBUG_ALLTOALL_PROFILING("Insert data for a new alltoall call...\n", NULL);

assert(sbuf);
assert(rbuf);
assert(logger);
#if DEBUG
assert(logger->f);
#endif

temp = head;
while (temp != NULL)
{
if (temp->size != size || temp->recvtype_size != recvtype_size || temp->sendtype_size != sendtype_size || !same_call_counters(temp, sbuf, rbuf, size))
{
// New data
#if DEBUG
fprintf(logger->f, "new data: %d\n", size);
#endif

// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data)
// #if DEBUG
// fprintf(logger->f, "new data: %d\n", size);
// #endif
if (temp->next != NULL)
temp = temp->next;
else
Expand All @@ -510,7 +509,7 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz
else
{
// Data exist, adding call info to it
DEBUG_ALLTOALL_PROFILING("Data already exists, updating metadata...\n");
DEBUG_ALLTOALL_PROFILING("Data already exists, updating metadata...\n", NULL);
assert(temp->list_calls);
if (temp->count >= temp->max_calls)
{
Expand All @@ -520,17 +519,19 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz
}
temp->list_calls[temp->count] = avCalls; // Note: count starts at 1, not 0
temp->count++;
#if DEBUG
fprintf(logger->f, "old data: %d --> %d --- %d\n", size, temp->size, temp->count);
#endif
DEBUG_ALLTOALL_PROFILING("Metadata successfully updated\n");
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data)
// #if DEBUG
// fprintf(logger->f, "old data: %d --> %d --- %d\n", size, temp->size, temp->count);
// #endif
DEBUG_ALLTOALL_PROFILING("Metadata successfully updated\n", NULL);
return 0;
}
}

#if DEBUG
fprintf(logger->f, "no data: %d \n", size);
#endif
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data)
// #if DEBUG
// fprintf(logger->f, "no data: %d \n", size);
// #endif
newNode = (struct avSRCountNode *)malloc(sizeof(avSRCountNode_t)); // TODO Anaylse data structure written from here onwards
assert(newNode);

Expand All @@ -552,7 +553,7 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz
num = 0;
int _rank;

DEBUG_ALLTOALL_PROFILING("handling send counts...\n");
DEBUG_ALLTOALL_PROFILING("handling send counts...\n", NULL);
for (_rank = 0; _rank < size; _rank++)
{
//#if ASSUME_COUNTS_EQUAL_ALL_RANKS != 1
Expand All @@ -567,7 +568,7 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz
num++; // so num always = _rank - but why?
}

DEBUG_ALLTOALL_PROFILING("handling recv counts...\n");
DEBUG_ALLTOALL_PROFILING("handling recv counts...\n", NULL);
num = 0;
for (_rank = 0; _rank < size; _rank++)
{
Expand All @@ -587,9 +588,10 @@ static int insert_sendrecv_data(int *sbuf, int *rbuf, int size, int sendtype_siz
newNode->recvtype_size = recvtype_size;
newNode->list_calls[0] = avCalls;
newNode->next = NULL;
#if DEBUG
fprintf(logger->f, "new entry: %d --> %d --- %d\n", size, newNode->size, newNode->count);
#endif
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really do not like that type of comments: it is either a todo and then we need to open an issue or at least clearly say so (i myself do not always open issue for all todos) or dead code in which case it should be deleted.

// #if DEBUG
// fprintf(logger->f, "new entry: %d --> %d --- %d\n", size, newNode->size, newNode->count);
// #endif

DEBUG_ALLTOALL_PROFILING("Data for the new alltoall call has %d unique series for send counts and %d for recv counts\n", newNode->recv_data_size, newNode->send_data_size);

Expand Down Expand Up @@ -659,7 +661,7 @@ static void save_call_patterns(int uniqueID)
char *filename = NULL;
int size;

DEBUG_ALLTOALL_PROFILING("Saving call patterns...\n");
DEBUG_ALLTOALL_PROFILING("Saving call patterns...\n", NULL);

if (getenv(OUTPUT_DIR_ENVVAR))
{
Expand Down Expand Up @@ -692,7 +694,7 @@ static void save_patterns(int world_rank)
char *rpatterns_filename = NULL;
int size;

DEBUG_ALLTOALL_PROFILING("Saving patterns...\n");
DEBUG_ALLTOALL_PROFILING("Saving patterns...\n", NULL);

if (getenv(OUTPUT_DIR_ENVVAR))
{
Expand Down Expand Up @@ -1207,7 +1209,6 @@ int _mpi_alltoall(const void *sendbuf, const int sendcount, MPI_Datatype sendtyp
{
int comm_size;
int i, j;
int localrank;
int ret;
bool need_profile = true;
int my_comm_rank;
Expand Down Expand Up @@ -1328,9 +1329,11 @@ int _mpi_alltoall(const void *sendbuf, const int sendcount, MPI_Datatype sendtyp

if (my_comm_rank == 0)
{
#if DEBUG
fprintf(logger->f, "Root: global %d - %d local %d - %d\n", world_size, myrank, size, localrank);
#endif
// Temporary solution?? - logger->f is not yet initialised (which is done in _commit_data)
// #if DEBUG
// assert(logger->f);
// fprintf(logger->f, "Root: global %d - %d local %d - %d\n", world_size, world_rank, comm_size, my_comm_rank);
// #endif

#if ((ENABLE_RAW_DATA || ENABLE_PER_RANK_STATS || ENABLE_VALIDATION) && ENABLE_COMPACT_FORMAT)
int s_dt_size, r_dt_size;
Expand Down Expand Up @@ -1446,4 +1449,4 @@ void calledLast()
{
_commit_data();
_finalize_profiling();
}
}
2 changes: 1 addition & 1 deletion src/alltoallv/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef _COLLECTIVE_PROFILER_ALLTOALLV_CONFIG_H
#define _COLLECTIVE_PROFILER_ALLTOALLV_CONFIG_H

#define DEBUG (0)
#define DEBUG (1)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is debug active by default?


// A few environment variables to control a few things at runtime
#define NUM_CALL_START_PROFILING_ENVVAR "A2A_NUM_CALL_START_PROFILING"
Expand Down
Loading