diff --git a/README.md b/README.md index 0bb4b60..8bfdbee 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,5 @@ # Description -Memory analysis tool for finding gather / scatter (gs) accesses from DynamoRio & NVBit traces. -gs_patterns discovers gather/scatters from analyzing access patterns in memory traces (doesn't just look for gs instructions). gs_patterns writes the "subtraces" to a binary trace and spatter yaml format. -The source lines of the top aggressors are reported. - -For CPU applications use the provided pin client in pin_tracing folder (or DynamoRio). Pin tends to be more reliable for larger applications. +Memory analysis tool for finding nontrivial gather / scatter (g/s) accesses from DynamoRio & NVBit traces. gs_patterns writes the subtraces to binary traces and a spatter yaml formatted file. The source lines of the top aggressors are reported. Use the provided pin clients in the pin_tracing folder or use DynamoRio. Pin tends to be more reliable for larger applications. For CUDA kernels use the provided nvbit client in the nvbit_tracing folder. @@ -31,16 +27,16 @@ gs_pattern gs_pattern -nv ``` -Trace file should be gzipped. For Pin or DynamoRio, Binary file should be compiled with symbols turned on (-g). +Trace file should be gzipped (not tar + gz). For Pin or DynamoRio, binary file should be compiled with symbols turned on (-g). For NVBit tracing the kernel must be compiled with line numbers (--generate-line-info). Please see nvbit_tracing/README.md for detailed information on how to extract traces for CUDA kernels which are compatible with gs_patterns. # How gs_patterns works -g/s accesses are found by looking at repeated instruction addresses (loops) that are memory instructions (scalar and vector). -The first pass finds the top g/s's. The second pass focuses on the top g/s accesses and records the normalized address distances to a binary file and spatter yaml. +g/s accesses are found by looking at repeated instruction addresses (loops) that are memory instructions (scalar and vector). The first pass finds the top g/s's and filters out instructions with trivial access patterns. The second pass focuses on the top g/s accesses and records the normalized address array indices to a binary file and spatter yaml file. # License BSD-3 License. See [the LICENSE file](https://github.com/lanl/gs_patterns/blob/main/LICENSE). -# Author +# Authors Kevin Sheridan, +Christopher Scott, diff --git a/gs_patterns.h b/gs_patterns.h index c81c75c..f74b67c 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -6,31 +6,37 @@ #include #include -#define MAX(X, Y) (((X) < (Y)) ? Y : X) -#define MIN(X, Y) (((X) > (Y)) ? Y : X) -#define ABS(X) (((X) < 0) ? (-1) * (X) : X) +//symbol lookup options +#if !defined(SYMBOLS_ONLY) +#define SYMBOLS_ONLY 1 //Filter out instructions that have no symbol +#endif //triggers -#define SAMPLE 0 #define PERSAMPLE 10000000 -//#define PERSAMPLE 1000 //info -#define CLSIZE (64) -#define NBUFS (1LL<<10) -#define IWINDOW (1024) -#define NGS (8096) +#define CLSIZE (64) //cacheline bytes +#define NBUFS (1LL<<10) //trace reading buffer size +#define IWINDOW (1024) //number of iaddrs per window +#define NGS (8096) //max number for gathers and scatters +#define OBOUNDS (512) //histogram positive max +#define OBOUNDS_ALLOC (2*OBOUNDS + 3) //patterns #define USTRIDES 1024 //Threshold for number of accesses #define NSTRIDES 15 //Threshold for number of unique distances #define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram -#define NTOP (10) +#define NTOP (10) //Final gather / scatters to keep #define INITIAL_PSIZE (1<<15) -#define MAX_PSIZE (1<<30) +#define MAX_PSIZE (1<<30) //Max number of indices recorded per gather/scatter #define MAX_LINE_LENGTH 1024 +#if !defined(VBITS) +# define VBITS (512L) +# define VBYTES (VBITS/8) +#endif + namespace gs_patterns { typedef uintptr_t addr_t; @@ -160,12 +166,18 @@ namespace gs_patterns std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } std::string getName() { return !_mType ? "Gather" : "Scatter"; } std::string getShortName() { return !_mType ? "G" : "S"; } + std::string getShortNameLower() { return !_mType ? "g" : "s"; } auto get_srcline() { return srcline[_mType]; } int ntop = 0; + int64_t iaddrs_nosym = 0; + int64_t indices_nosym = 0; + int64_t iaddrs_sym = 0; + int64_t indices_sym = 0; double cnt = 0.0; int offset[NTOP] = {0}; + int size[NTOP] = {0}; addr_t tot[NTOP] = {0}; addr_t top[NTOP] = {0}; @@ -201,8 +213,8 @@ namespace gs_patterns private: addr_t (*iaddrs)[NGS] = new addr_t[2][NGS]; - int64_t (*icnt)[NGS] = new int64_t[2][NGS]; - int64_t (*occ)[NGS] = new int64_t[2][NGS]; + int64_t (*icnt)[NGS] = new int64_t[2][NGS]; //vector instances + int64_t (*occ)[NGS] = new int64_t[2][NGS]; //load/store instances mem_access_type _mType; }; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index db36838..b9b0354 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -47,9 +47,11 @@ namespace gs_patterns_core //Create stride histogram and create spatter int sidx; + int firstgs = 1; int unique_strides; - int64_t n_stride[1027]; -// double outbounds; + int64_t hbin = 0; + int64_t n_stride[OBOUNDS_ALLOC]; + double outbounds; if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); @@ -60,73 +62,99 @@ namespace gs_patterns_core printf("***************************************************************************************\n"); unique_strides = 0; - for (j = 0; j < 1027; j++) + for (j = 0; j < OBOUNDS_ALLOC; j++) n_stride[j] = 0; for (j = 1; j < target_metrics.offset[i]; j++) { - sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513; + sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + OBOUNDS + 1; sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > 1025) ? 1026 : sidx; + sidx = (sidx > OBOUNDS_ALLOC - 1) ? OBOUNDS_ALLOC - 1 : sidx; n_stride[sidx]++; } - for (j = 0; j < 1027; j++) { + for (j = 0; j < OBOUNDS_ALLOC; j++) { if (n_stride[j] > 0) { unique_strides++; } } - //outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; + outbounds = (double) (n_stride[0] + n_stride[OBOUNDS_ALLOC-1]) / (double) target_metrics.offset[i]; - //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (true) { + if (((unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) && (target_metrics.offset[i] > USTRIDES ) )) { + //if (true) { + if (firstgs) { + firstgs = 0; + printf("***************************************************************************************\n"); + printf("%sS\n", target_metrics.type_as_string().c_str()); + } + printf("***************************************************************************************\n"); //create a binary file - FILE *fp_bin; - std::string bin_name = file_prefix + ".sbin"; - printf("%s\n", bin_name.c_str()); - fp_bin = fopen(bin_name.c_str(), "w"); - if (NULL == fp_bin) { - throw GSFileError("Could not open " + std::string(bin_name) + "!"); - } + FILE * fp_bin; + + char bin_name[1024]; + sprintf(bin_name, "%s.%s.%03d.%02dB.sbin", file_prefix.c_str(), target_metrics.getShortNameLower().c_str(), \ + i, target_metrics.size[i]); + printf("%s\n", bin_name); + //std::string bin_name = \ + // file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + \ + // std::to_string(target_metrics.size[i]) + "B.sbin"; + + fp_bin = fopen(bin_name, "w"); + if (NULL == fp_bin) + throw GSFileError("Could not open " + std::string(bin_name) + "!"); printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); - printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(), - '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%'); - printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]); - + printf("GATHER %c -- %6.3f%c (%4ld-bit chunks)\n", + '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%', VBITS); + printf("DTYPE -- %d bytes\n", target_metrics.size[i]); + printf("NINDICES -- %d\n", target_metrics.offset[i]); + printf("INDICES:\n"); + int64_t nlcnt = 0; for (j = 0; j < target_metrics.offset[i]; j++) { - - if (j < 39) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (0 == (++nlcnt % 13)) - printf("\n"); - - } else if (j >= (target_metrics.offset[i] - 39)) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (0 == (++nlcnt % 13)) - printf("\n"); - - } else if (39 == j) - printf("...\n"); + + if (j <= 49) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (( ++nlcnt % 10) == 0) + printf("\n"); + + } else if (j >= (target_metrics.offset[i] - 50)) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (( ++nlcnt % 10) == 0) + printf("\n"); + + } else if (j == 50) + printf("...\n"); } printf("\n"); printf("DIST HISTOGRAM --\n"); - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - if (0 == j) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (1026 == j) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j - 513, n_stride[j]); - } - } + hbin = 0; + for(j=0; j 1)) // ? > 1 stride (non-contiguous) <-------------------- + // ? > 1 stride (non-contiguous) <-------------------- + if ((gs == -1) && (abs(iw.get_maddr() - iw.get_maddr_prev()) > 1)) gs = w; } iw.get_maddr_prev() = iw.get_maddr(); } + //Once a gather/scatter, always a gather/scatter + if (gs == -1) { + + InstrInfo & target_iinfo = (w == 0) ? gather_iinfo : scatter_iinfo; + for(k=0; k(*this); } @@ -255,4 +258,4 @@ void MemPatternsForPin::update_source_lines() } // namespace gspin_patterns -} // namespace gs_patterns \ No newline at end of file +} // namespace gs_patterns diff --git a/gspin_patterns.h b/gspin_patterns.h index f943a41..78d64de 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -14,20 +14,14 @@ #include "gs_patterns_core.h" #include "utils.h" -//Terminal colors -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" -#define KYEL "\x1B[33m" -#define KBLU "\x1B[34m" -#define KMAG "\x1B[35m" -#define KCYN "\x1B[36m" +#if !defined(SYMBOLS_ONLY) +#define SYMBOLS_ONLY 1 //Filter out instructions that have no symbol +#endif -//address status -#define ADDREND (0xFFFFFFFFFFFFFFFFUL) -#define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) - -#define VBITS (512) -#define VBYTES (VBITS/8) //DONT CHANGE +#if !defined(VBITS) +# define VBITS (512L) +# define VBYTES (VBITS/8) +#endif namespace gs_patterns { @@ -143,4 +137,4 @@ namespace gspin_patterns } // namespace gspin_patterns -} // namespace gs_patterns \ No newline at end of file +} // namespace gs_patterns diff --git a/utils.cpp b/utils.cpp index 9d02c4f..adacb47 100644 --- a/utils.cpp +++ b/utils.cpp @@ -73,11 +73,6 @@ const char* str_replace(const char* orig, const char* rep, const char* with) { if (!result) return NULL; - // first time through the loop, all the variable are set correctly - // from here on, - // tmp points to the end of the result string - // ins points to the next occurrence of rep in orig - // orig points to the remainder of orig after "end of rep" while (count--) { ins = (char*)strstr(orig, rep); len_front = ins - orig; @@ -128,4 +123,4 @@ void close_trace_file (gzFile & fp) } // gs_patterns_core -} // gs_patterns \ No newline at end of file +} // gs_patterns