diff --git a/core/mod/HDF5reader.mod b/core/mod/HDF5reader.mod new file mode 100644 index 00000000..ade925a8 --- /dev/null +++ b/core/mod/HDF5reader.mod @@ -0,0 +1,1453 @@ +COMMENT +/** + * @file HDF5reader.mod + * @brief + * @author king + * @date 2009-10-23 + * @remark Copyright © BBP/EPFL 2005-2011; All rights reserved. Do not distribute without further notice. + */ +ENDCOMMENT + +COMMENT +This is intended to serve two purposes. One as a general purpose reader for HDF5 files with a basic set of +accessor functions. In addition, it will have special handling for our synapse data files such that they can be handled +more efficiently in a massively parallel manner. I feel inclined to spin this functionality out into a c++ class where +I can access more advanced coding structures, especially STL. +ENDCOMMENT + +NEURON { + ARTIFICIAL_CELL HDF5Reader + POINTER ptr +} + +PARAMETER { +} + +ASSIGNED { + ptr +} + +INITIAL { + : 20.07.2018 - For multicycle Model Generation using GluSynapse, I stopped closing the file during stdinit. Should revist this in the future + :closeFile() +} + +NET_RECEIVE(w) { +} + +VERBATIM +#ifndef CORENEURON_BUILD + +#if defined(NRN_VERSION_GTEQ) +#if NRN_VERSION_GTEQ(9,0,0) +#define NRN_VERSION_GTEQ_9_0_0 +#endif +#endif +#ifndef DISABLE_HDF5 + +#undef ptr +#define H5_USE_16_API 1 +#undef dt +#include "hdf5.h" +#define dt nrn_threads->_dt + +#ifndef DISABLE_MPI +#include "mpi.h" +#endif + +#include +#ifndef NRN_VERSION_GTEQ_8_2_0 +/// NEURON utility functions we want to use +extern double* hoc_pgetarg(int iarg); +extern double* getarg(int iarg); +extern char* gargstr(int iarg); +extern int hoc_is_str_arg(int iarg); +extern int ifarg(int iarg); +extern double chkarg(int iarg, double low, double high); +extern double* vector_vec(void* vv); +extern int vector_capacity(void* vv); +extern void* vector_arg(int); +#endif + +extern int nrnmpi_numprocs; +extern int nrnmpi_myid; +/** + * During synapse loading initialization, the h5 files with synapse data are catalogged + * such that each cpu looks at a subset of what's available and gets ready to report to + * other cpus where to find postsynaptic cells + */ +struct SynapseCatalog +{ + /// The base filename for synapse data. The fileID is applied as a suffix (%s.%d) + char* rootName; + + /// If possible, we should use the merge script generated by circuit building to find the gid->file id mapping + FILE *directFile; + + /// When working with multiple files, track the id of the active file open + int fileID; + + /// The number of files this cpu has catalogged + int nFiles; + + /// The IDs for the files this cpu has catalogged + int *fileIDs; + + /// For each file, the number of gids catalogged + int *availablegidCount; + + /// For each file, an array of the gids catalogged + int **availablegids; + + /// The index of the gid being handled in the catalog + int gidIndex; +}; + +typedef struct SynapseCatalog SynapseCatalog; + +/** + * After loading, the cpus will exchange requests for info about which files contain synapse + * data for their local gids. This is used to store the received info provided those gids receive + */ +struct ConfirmedCells +{ + /// The number of files and gids that are confirmed as loadable for this cpu + int count; + + /// The gids that can be loaded by this cpu + int *gids; + + /// The files to be opened for this cpu's gids + int *fileIDs; +}; + +typedef struct ConfirmedCells ConfirmedCells; + +#define NONE 0 +#define FLOAT_MATRIX 1 +#define LONG_VECTOR 2 + +/** + * Hold persistent HDF5 data such as file handle and info about latest dataset loaded + */ +struct Info { + + /// data members for general HDF5 usage + hid_t file_; + float * datamatrix_; + long long *datavector_; + char name_group[256]; + hsize_t rowsize_; + hsize_t columnsize_; + hid_t acc_tpl1; + int mode; + + /// Sometimes we want to silence certain warnings + int verboseLevel; + + /// Used to catalog contents of some h5 files tracked by this cpu + SynapseCatalog synapseCatalog; + + /// Receive info on which files contain data for gids local to this cpu + ConfirmedCells confirmedCells; +}; + +typedef struct Info Info; + +///Utility macro to make the NEURON pointer accessible for reading and writing (seems like we have more levels of indirection than necessary - JGK) +#define INFOCAST Info** ip = (Info**)(&(_p_ptr)) + +#define dp double* + +/** + * Utility function to ensure that all members of an Info struct are initialized. + */ +void initInfo( Info *info ) +{ + // These fields are used when data is being accessed from a dataset + info->file_ = -1; + info->datamatrix_ = NULL; + info->datavector_ = NULL; + info->mode = NONE; + info->name_group[0] = '\0'; + info->rowsize_ = 0; + info->columnsize_ = 0; + info->acc_tpl1 = -1; + info->verboseLevel = 0; + // These fields are used exclusively for catalogging which h5 files contain which postsynaptic gids + info->synapseCatalog.rootName = NULL; + info->synapseCatalog.directFile = NULL; + info->synapseCatalog.fileID = -1; + info->synapseCatalog.fileIDs = NULL; + info->synapseCatalog.nFiles = 0; + info->synapseCatalog.availablegidCount = NULL; + info->synapseCatalog.availablegids = NULL; + info->synapseCatalog.gidIndex = 0; + info->confirmedCells.count = 0; + info->confirmedCells.gids = NULL; + info->confirmedCells.fileIDs = NULL; +} + + + +/** + * Use to sort gids for look up during call of readDirectMapping func + */ +int gidcompare( const void *a, const void *b ) { + return ( *(double*)a - *(double*)b ); +} + + + +/** + * Use to confirm a gid is present on local cpu to help readDirectMapping func. use binary search on sorted gidList + */ +int gidexists( double searchgid, int ngids, double *gidList ) { + int first = 0; + int last = ngids-1; + int middle = (first+last)/2; + + while( first <= last ) { + if( gidList[middle] < searchgid ) { + first = middle + 1; + } else if( gidList[middle] == searchgid ) { + return 1; + } else { + last = middle - 1; + } + + middle = (first+last)/2; + } + + return 0; +} + + +/** + * Use Circuit Builder output script to know which files hold this cpu's gid synapse data. Should be + * short term solution until better loading method is available. + */ +void readDirectMapping( Info *info, int ngids, double *gidList ) { + + // gidList might already be sorted, but I don't want to make that assumption. I also shouldn't alter it, so make a copy + double *tmpList = (double*) malloc( ngids * sizeof(double) ); + memcpy( tmpList, gidList, ngids*sizeof(double) ); + qsort( tmpList, ngids, sizeof(double), gidcompare ); + + // prepare confirmed info + info->confirmedCells.count = ngids; + info->confirmedCells.gids = (int*) malloc(ngids*sizeof(int)); + info->confirmedCells.fileIDs = (int*) malloc(ngids*sizeof(int)); + + // read file data + FILE *fin = info->synapseCatalog.directFile; + info->synapseCatalog.directFile = NULL; + + // read line by line. care about lines with given substrings + // e.g. $CMD -i $H5.7950 -o $H5 -s /a216759 -d /a216759 -f $F + + char line[1024]; + char *res = fgets( line, 1024, fin ); + int gid = -1; + int gidIndex = 0; + while( res != NULL ) { + if( strncmp( line, "$CMD", 4 ) == 0 ) { + char *gidloc = strstr( line, "/a" ); + if( gidloc != NULL ) { + gid = atoi(&gidloc[2]); + } + if( gidexists( (double) gid, ngids, tmpList ) ) { + int fileID = atoi(&line[12]); + info->confirmedCells.gids[gidIndex] = gid; + info->confirmedCells.fileIDs[gidIndex] = fileID; + gidIndex++; + } + } + + res = fgets( line, 1024, fin ); + } + + fclose( fin ); + free(tmpList); +} + + + +/** + * Callback function for H5Giterate - if the dataset opened corresponds to a gid, it is catalogged so the + * local cpu can inform other cpus the whereabouts of that gid + * + * @param loc_id hdf5 handle to the open file + * @param name name of the dataset to be accessed during this iteration step + * @param opdata not used since we have global Info object + */ +herr_t loadShareData( hid_t loc_id, const char *name, void *opdata ) +{ + INFOCAST; + Info* info = *ip; + assert( info->file_ >= 0 ); + + //fprintf( stderr, "open dataset %s\n", name ); + + //make sure we are using a dataset that corresponds to a gid + int gid = atoi( name+1 ); + char rebuild[32]; + snprintf( rebuild, 32, "a%d", gid ); + if( strcmp( rebuild, name ) != 0 ) { + //non-synapse dataset, but not an error (could just be the version info) + //fprintf( stderr, "ignore non-gid dataset\n" ); + return 0; + } + + // we have confirmed that this dataset corresponds to a gid. The active file should make it part of its data + int fileIndex = info->synapseCatalog.nFiles-1; + info->synapseCatalog.availablegids[ fileIndex ][ info->synapseCatalog.gidIndex++ ] = gid; + + return 1; +} + + + +/** + * Open an HDF5 file for reading. In the event of synapse data, the datasets of the file may be iterated in order to + * build a catalog of available gids and their file locations. + * + * @param info Structure that manages hdf5 info + * @param filename File to open + * @param fileID Integer to identify this file (attached as suffix to filename) + * @param nNodesPerFile 0: open file, but don't load data; 1: open file for catalogging; N: read portion of file for catalogging + * @param startRank used to help calculate data range to load when file subportion is loaded + * @param myRank used to help calculate data range to load when file subportion is loaded + */ +int openFile( Info* info, const char *filename, int fileID, int nRanksPerFile, int startRank, int myRank ) +{ + if( info->file_ != -1 ) { + H5Fclose(info->file_); + } + + char nameoffile[512]; + //fprintf( stderr, "arg check: %s %d %d %d\n", filename, nRanksPerFile, startRank, myRank ); + if( fileID != -1 ) { + snprintf( nameoffile, 512, "%s.%d", filename, fileID ); + } else { + strncpy( nameoffile, filename, 512 ); + } + + info->name_group[0]='\0'; + + hid_t file_driver = (info->acc_tpl1 != -1)? info->acc_tpl1 : H5P_DEFAULT; + + // Opens the file with the alternate handler + info->file_ = H5Fopen( nameoffile, H5F_ACC_RDONLY, file_driver); + int result = (info->file_ < 0); + int failed = result; + +#ifndef DISABLE_MPI + if( info->acc_tpl1 != -1 ) { + MPI_Allreduce( &result, &failed, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); + + if( failed ) { + int canreport = (result<0)?nrnmpi_myid:nrnmpi_numprocs, willreport = 0; + MPI_Allreduce( &canreport, &willreport, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD ); + + if( willreport == nrnmpi_myid ) { + fprintf(stderr, "%d ERROR: %d ranks failed collective open of synapse file: %s\n", nrnmpi_myid, failed, nameoffile ); + } + info->file_ = -1; + H5Eprint(stderr); + return -1; + } + } else // to the serial-version if +#endif + if( failed ) { + info->file_ = -1; + fprintf(stderr, "ERROR: Failed to open synapse file: %s\n", nameoffile ); + H5Eprint(stderr); + return -1; + } + + if( nRanksPerFile == 0 ) { + // don't need to load data yet, so we return + return 0; + } + + // will catalog synapse data + info->synapseCatalog.fileID = fileID; + + int nDatasetsToImport=0, startIndex=0; + hsize_t nObjects; + H5Gget_num_objs( info->file_, &nObjects ); + + if( nRanksPerFile == 1 ) { + nDatasetsToImport = (int) nObjects; + } + else { + // need to determine which indices to read + nDatasetsToImport = (int) nObjects / nRanksPerFile; + if( nObjects%nRanksPerFile != 0 ) + nDatasetsToImport++; + + startIndex = (myRank-startRank)*nDatasetsToImport; + if( startIndex + nDatasetsToImport > (int) nObjects ) { + nDatasetsToImport = (int) nObjects - startIndex; + if( nDatasetsToImport <= 0 ) { + //fprintf( stderr, "No need to import any data on rank %d since all %d are claimed\n", myRank, (int) nObjects ); + return 0; + } + } + } + + int nFiles = ++info->synapseCatalog.nFiles; + info->synapseCatalog.fileIDs = (int*) realloc ( info->synapseCatalog.fileIDs, sizeof(int)*nFiles ); + info->synapseCatalog.fileIDs[nFiles-1] = fileID; + info->synapseCatalog.availablegidCount = (int*) realloc ( info->synapseCatalog.availablegidCount, sizeof(int)*nFiles ); + info->synapseCatalog.availablegids = (int**) realloc ( info->synapseCatalog.availablegids, sizeof(int*)*nFiles ); + + info->synapseCatalog.availablegidCount[nFiles-1] = nDatasetsToImport; + info->synapseCatalog.availablegids[nFiles-1] = (int*) calloc ( nDatasetsToImport, sizeof(int) ); + info->synapseCatalog.gidIndex=0; + + //fprintf( stderr, "load datasets %d through %d (max %d)\n", startIndex, startIndex+nDatasetsToImport, (int) nObjects ); + + int i, verify=startIndex; + for( i=startIndex; ifile_, "/", &verify, loadShareData, NULL ); + if( result != 1 ) + continue; + } + + return 0; +} + + + +/** + * Load a dataset so that the dimensions are available, but don't retrieve any data + * + * @param info Structure that manages hdf5 info, its datamatrix_ variable is populated with hdf5 data on success + * @param name The name of the dataset to access + * @return 0 on success, < 0 on error + */ +int loadDimensions( Info *info, char* name ) +{ + int isCurrentlyLoaded = strncmp( info->name_group, name, 256 ) == 0; + if( isCurrentlyLoaded ) + return 0; + + hsize_t dims[2] = {0}, offset[2] = {0}; + hid_t dataset_id, dataspace; + + if( H5Lexists(info->file_, name, H5P_DEFAULT) == 0) + { + fprintf(stderr, "Error accessing to dataset %s in synapse file\n", name); + return -1; + } + dataset_id = H5Dopen(info->file_, name); + + strncpy(info->name_group, name, 256); + + dataspace = H5Dget_space(dataset_id); + + int dimensions = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace,dims,NULL); + info->rowsize_ = (unsigned long)dims[0]; + if( dimensions > 1 ) + info->columnsize_ = dims[1]; + else + info->columnsize_ = 1; + + H5Sclose(dataspace); + H5Dclose(dataset_id); + + return 0; +} + + + +/** + * Given the name of a dataset, load it from the current hdf5 file into the matrix pointer + * + * @param info Structure that manages hdf5 info, its datamatrix_ variable is populated with hdf5 data on success + * @param name The name of the dataset to access and load in the hdf5 file + */ +int loadDataMatrix( Info *info, char* name ) +{ + int isCurrentlyLoaded = strncmp( info->name_group, name, 256 ) == 0; + if( isCurrentlyLoaded ) + return 0; + + hsize_t dims[2] = {0}, offset[2] = {0}; + hid_t dataset_id, dataspace; + + if( H5Lexists(info->file_, name, H5P_DEFAULT) == 0) { + return -1; + } + dataset_id = H5Dopen(info->file_, name); + + strncpy(info->name_group, name, 256); + + dataspace = H5Dget_space(dataset_id); + + int dimensions = H5Sget_simple_extent_ndims(dataspace); + //printf("Dimensions:%d\n",dimensions); + H5Sget_simple_extent_dims(dataspace,dims,NULL); + //printf("Accessing to %s , nrow:%lu,ncolumns:%lu\n",info->name_group,(unsigned long)dims[0],(unsigned long)dims[1]); + info->rowsize_ = (unsigned long)dims[0]; + if( dimensions > 1 ) + info->columnsize_ = dims[1]; + else + info->columnsize_ = 1; + //printf("\n Size of data is row= [%d], Col = [%lu]\n", dims[0], (unsigned long)dims[1]); + if(info->datamatrix_ != NULL) + { + //printf("Freeeing memory \n "); + free(info->datamatrix_); + } + info->datamatrix_ = (float *) malloc(sizeof(float) *(info->rowsize_*info->columnsize_)); + //info->datamatrix_ = (float *) hoc_Emalloc(sizeof(float) *(info->rowsize_*info->columnsize_)); hoc_malchk(); + // Last line fails, corrupt memory of argument 1 and probably more + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, dims, NULL); + hid_t dataspacetogetdata=H5Screate_simple(dimensions,dims,NULL); + H5Dread(dataset_id,H5T_NATIVE_FLOAT,dataspacetogetdata,H5S_ALL,H5P_DEFAULT,info->datamatrix_); + H5Sclose(dataspace); + H5Sclose(dataspacetogetdata); + H5Dclose(dataset_id); + //printf("Working , accessed %s , on argstr1 %s\n",info->name_group,gargstr(1)); + return 0; +} + + + +/** + * Given the name of a dataset with id values, load it from the current hdf5 file into the matrix pointer + * + * @param info Structure that manages hdf5 info, its datavector_ variable is populated with hdf5 data on success + * @param name The name of the dataset to access and load in the hdf5 file + */ +int loadDataVector( Info *info, char* name ) +{ + hsize_t dims[2] = {0}, offset[2] = {0}; + hid_t dataset_id, dataspace; + + if( H5Lexists(info->file_, name, H5P_DEFAULT) == 0) + { + fprintf(stderr, "Error accessing to dataset %s in synapse file\n", name); + return -1; + } + dataset_id = H5Dopen(info->file_, name); + dataspace = H5Dget_space(dataset_id); + + strncpy(info->name_group, name, 256); + + int dimensions = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace,dims,NULL); + info->rowsize_ = (unsigned long)dims[0]; + if( dimensions > 1 ) + info->columnsize_ = dims[1]; + else + info->columnsize_ = 1; + + if(info->datavector_ != NULL) { + free(info->datavector_); + } + info->datavector_ = (long long *) malloc(sizeof(long long)*(info->rowsize_*info->columnsize_)); + + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, dims, NULL); + hid_t dataspacetogetdata=H5Screate_simple(dimensions,dims,NULL); + H5Dread(dataset_id,H5T_NATIVE_ULONG,dataspacetogetdata,H5S_ALL,H5P_DEFAULT,info->datavector_); + H5Sclose(dataspace); + H5Sclose(dataspacetogetdata); + H5Dclose(dataset_id); + return 0; +} + + + +/** + * Load an individual value from a dataset + * + * @param info Shared data + * @param name dataset to open and read from + * @param row data item to retrieve + * @param dest int address where data is to be stored + */ +int loadDataInt( Info* info, char* name, hid_t row, int *dest ) +{ + #if defined(NRN_VERSION_GTEQ_9_0_0) + hsize_t dims[1] = {1}, offset[1] = {static_cast(row)}, offset_out[1] = {0}, count[1] = {1}; + #else + hsize_t dims[1] = {1}, offset[1] = {row}, offset_out[1] = {0}, count[1] = {1}; + #endif + hid_t dataset_id, dataspace, memspace, space; //, filetype; + herr_t status; + int ndims = 0; + long long temp; + + if( H5Lexists(info->file_, name, H5P_DEFAULT) == 0) { + fprintf(stderr, "Error accessing to dataset %s in h5 file\n", name); + return -1; + } + + dataset_id = H5Dopen(info->file_, name); + dataspace = H5Dget_space(dataset_id); + + //filetype = H5Dget_type (dataset_id); + space = H5Dget_space (dataset_id); + ndims = H5Sget_simple_extent_dims (space, dims, NULL); + + status = H5Sselect_hyperslab( space, H5S_SELECT_SET, offset, NULL, count, NULL ); + + // memory space to receive data and select hyperslab in that + memspace = H5Screate_simple( 1, dims, NULL ); + status = H5Sselect_hyperslab( memspace, H5S_SELECT_SET, offset_out, NULL, count, NULL ); + + status = H5Dread (dataset_id, H5T_NATIVE_ULONG, memspace, space, H5P_DEFAULT, &temp ); + + *dest = temp; + + status = H5Sclose (space); + status = H5Sclose(dataspace); + status = H5Dclose(dataset_id); + + return 0; +} + + + +/** + * Given the name of a dataset that should contain variable length string data, load those strings + */ +int loadDataString( Info* info, char* name, hid_t row, char **hoc_dest ) +{ + #if defined(NRN_VERSION_GTEQ_9_0_0) + hsize_t dims[1] = {1}, offset[1] = {static_cast(row)}, offset_out[1] = {0}, count[1] = {1}; + #else + hsize_t dims[1] = {1}, offset[1] = {row}, offset_out[1] = {0}, count[1] = {1}; + #endif + hid_t dataset_id, dataspace, memspace, space, filetype, memtype; + herr_t status; + char** rdata; + int ndims = 0; + + if( H5Lexists(info->file_, name, H5P_DEFAULT) == 0) + { + fprintf(stderr, "Error accessing to dataset %s in h5 file\n", name); + return -1; + } + dataset_id = H5Dopen(info->file_, name); + dataspace = H5Dget_space(dataset_id); + + filetype = H5Dget_type (dataset_id); + space = H5Dget_space (dataset_id); + ndims = H5Sget_simple_extent_dims (space, dims, NULL); + rdata = (char **) malloc (sizeof (char *)); + + memtype = H5Tcopy (H5T_C_S1); + status = H5Tset_size (memtype, H5T_VARIABLE); + H5Tset_cset(memtype, H5T_CSET_UTF8); + + status = H5Sselect_hyperslab( space, H5S_SELECT_SET, offset, NULL, count, NULL ); + + // memory space to receive data and select hyperslab in that + memspace = H5Screate_simple( 1, dims, NULL ); + status = H5Sselect_hyperslab( memspace, H5S_SELECT_SET, offset_out, NULL, count, NULL ); + + status = H5Dread (dataset_id, memtype, memspace, space, H5P_DEFAULT, rdata); + + hoc_assign_str( hoc_dest, rdata[0] ); + + //fprintf( stderr, "free h5 mem\n" ); // TODO: determine why this causes a crash. For now we leak memory + //status = H5Dvlen_reclaim (memtype, space, H5P_DEFAULT, rdata ); + //fprintf( stderr, "free rdata\n" ); + free(rdata); + + status = H5Sclose (space); + status = H5Sclose (dataspace); + status = H5Tclose (filetype); + status = H5Tclose (memtype); + status = H5Dclose (dataset_id); + + return 0; +} + +#endif // DISABLE_HDF5 +#endif +ENDVERBATIM + + + +CONSTRUCTOR { : double - loc of point process ??? ,string filename +VERBATIM { +#ifndef CORENEURON_BUILD +#ifdef DISABLE_HDF5 + // Neuron might init the mechanism. With args it's the user. + if(ifarg(1)) { + fprintf(stderr, "HDF5 support is not available\n"); + exit(-1); + } +#else + char nameoffile[512]; + int nFiles = 1; + + if( ifarg(2) ) { + nFiles = *getarg(2); + } + + if(ifarg(1) && hoc_is_str_arg(1)) { + INFOCAST; + Info* info = 0; + + strncpy(nameoffile, gargstr(1),512); + info = (Info*) hoc_Emalloc(sizeof(Info)); hoc_malchk(); + initInfo( info ); + + if( ifarg(3) ) { + info->verboseLevel = *getarg(3); + } + + *ip = info; + + if( nFiles == 1 ) { + hid_t plist_id = 0; + + // if a second arg was explicitly given as 1, I assume that we are doing a parallel file access + // saw deadlock while running on viz cluster and hence disabling parallel read + if( ifarg(2) ) { + if( nrnmpi_myid == 0 ) { fprintf( stderr, "using parallel hdf5 is disabled\n" ); } + //info->acc_tpl1 = H5Pcreate(H5P_FILE_ACCESS); + //H5Pset_fapl_mpio(info->acc_tpl1, MPI_COMM_WORLD, MPI_INFO_NULL); + } + + // normal case - open a file and be ready to load data as needed + openFile( info, nameoffile, -1, 0, 0, 0 ); + } + else { + // Each cpu is reponsible for a portion of the data + info->synapseCatalog.rootName = strdup( nameoffile ); + + int mpi_size=1, mpi_rank=0; +#ifndef DISABLE_MPI + MPI_Comm_size( MPI_COMM_WORLD, &mpi_size ); + MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank ); +#endif + //fprintf( stderr, "%d vs %d\n", mpi_size, nFiles ); + // attempt to use the merge script to catalog files. Only if it is not found should we then go to the + // individual files and iterate the datasets + int plength = strlen(nameoffile); + char *nrnPath = (char*) malloc( plength + 128 ); + strncpy( nrnPath, nameoffile, plength+1 ); + char *term = strrchr( nrnPath, '/' ); *term = 0; + + // rem: recent circuit building has changed the name of the merge script. Hopefully it will not change again. But anyways + // we might replace this whole mod file with a completely better method in the future. + strcat( nrnPath, "/mergeAllH5.sh" ); + FILE *fin = fopen( nrnPath, "r" ); + if( !fin ) { + // try the other name + *term = 0; + strcat( nrnPath, "/merge_nrn_positions.sh" ); + fin = fopen( nrnPath, "r" ); + } + free(nrnPath); + if( fin ) { + info->synapseCatalog.directFile = fin; + return; + } + + // need to determine if I open multiple files per cpu or multiple cpus share a file + if( mpi_size > nFiles ) { // multiple cpus per file + int nRanksPerFile = (int) mpi_size / nFiles; + if( mpi_size % nFiles != 0 ) + nRanksPerFile++; + + //fprintf( stderr, "nRanksPerFile %d = %d/%d\n", nRanksPerFile, mpi_size, nFiles ); + + if( nRanksPerFile * nFiles > mpi_size ) { // no files left for this rank + info->file_ = -1; + return; + } + + int fileIndex = (int) mpi_rank / nRanksPerFile; //this should be truncated + int startRank = fileIndex * nRanksPerFile; + + //sprintf( nameoffile, "%s.%d", gargstr(1), fileIndex ); + //fprintf( stderr, "I should open file %s\n", nameoffile ); + + openFile( info, nameoffile, fileIndex, nRanksPerFile, startRank, mpi_rank ); + + } else { + // one or more files per cpu - any file opened should load all the data. + int nFilesPerRank = nFiles / mpi_size; + //fprintf( stderr, "nFilesPerRank %d = %d/%d\n", nFilesPerRank, nFiles, mpi_size ); + + if( nFiles % mpi_size != 0 ) { + nFilesPerRank++; + } + int startFile = mpi_rank * nFilesPerRank; + if( startFile+nFilesPerRank > nFiles ) { + nFilesPerRank = nFiles-startFile; + if( nFilesPerRank <= 0 ) { + info->file_ = -1; + return; + } + } + + int fileIndex=0; + for( fileIndex=0; fileIndexfile_>=0) + { + if( info->acc_tpl1 != -1 ) { + if( nrnmpi_myid == 0 ) { fprintf( stderr, "terminating parallel h5 access\n" ); } + H5Pclose(info->acc_tpl1); + } + //printf("Trying to close\n"); + H5Fclose(info->file_); + //printf("Close\n"); + info->file_ = -1; + } + if(info->datamatrix_ != NULL) + { + free(info->datamatrix_); + info->datamatrix_ = NULL; + } + if( info->datavector_ != NULL ) + { + free( info->datavector_ ); + info->datavector_ = NULL; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +FUNCTION redirect() { +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 +#ifndef DISABLE_MPI + FILE *fout; + char fname[128]; + + int mpi_size, mpi_rank; + //fprintf( stderr, "rank %d\n", getpid() ); + //sleep(20); + + + // get MPI info + MPI_Comm_size (MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank); + + if( mpi_rank != 0 ) { + sprintf( fname, "NodeFiles/%d.%dnode.out", mpi_rank, mpi_size ); + fout = freopen( fname, "w", stdout ); + if( !fout ) { + fprintf( stderr, "failed to redirect. Terminating\n" ); + exit(0); + } + + sprintf( fname, "NodeFiles/%d.%dnode.err", mpi_rank, mpi_size ); + fout = freopen( fname, "w", stderr ); + setbuf( fout, NULL ); + } +#endif // DISABLE_MPI +#endif // DISABLE_HDF5 +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + +FUNCTION checkVersion() { +VERBATIM { +#ifndef CORENEURON_BUILD + int versionNumber = 0; +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + int mpi_size=1, mpi_rank=0; + +#ifndef DISABLE_MPI + // get MPI info + MPI_Comm_size (MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank); +#endif + + if( mpi_rank == 0 ) + { + if( H5Lexists(info->file_, "info", H5P_DEFAULT) == 0) { + versionNumber = 0; + } else { + hid_t dataset_id = H5Dopen( info->file_, "info" ); + hid_t attr_id = H5Aopen_name( dataset_id, "version" ); + H5Aread( attr_id, H5T_NATIVE_INT, &versionNumber ); + H5Aclose(attr_id); + H5Dclose(dataset_id); + } + } + +#ifndef DISABLE_MPI + MPI_Bcast( &versionNumber, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + +#endif // DISABLE_HDF5 + return versionNumber; +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + +FUNCTION loadData() { +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + + if(info->file_>=0 && ifarg(1) && hoc_is_str_arg(1)) + { + if( ifarg(2) ) { + if( *getarg(2) == 1 ) { //load vector + info->mode = LONG_VECTOR; + return loadDataVector( info, gargstr(1) ); + } + } + + info->mode = FLOAT_MATRIX; + return loadDataMatrix( info, gargstr(1) ); + } + else if( ifarg(1) ) + { + int gid = *getarg(1); + int gidIndex=0; + + for( gidIndex=0; gidIndexconfirmedCells.count; gidIndex++ ) { + if( info->confirmedCells.gids[gidIndex] == gid ) { + openFile( info, info->synapseCatalog.rootName, info->confirmedCells.fileIDs[gidIndex], 0, 0, 0 ); + + char cellname[256]; + sprintf( cellname, "a%d", gid ); + + info->mode = FLOAT_MATRIX; + return loadDataMatrix( info, cellname ); + } + } + + //if we reach here, did not find data + if( info->verboseLevel > 0 ) + fprintf( stderr, "Warning: failed to find data for gid %d\n", gid ); + } + + return -1; +#endif // DISABLE_HDF5 +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +FUNCTION getNoOfColumns(){ : string cellname +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + //printf("(Inside number of Col)Number of Col %s\n",gargstr(1)); + if(info->file_>=0 && ifarg(1) && hoc_is_str_arg(1)) + { + char name[256]; + strncpy(name,gargstr(1),256); + if(strncmp(info->name_group,name,256) == 0) + { + //printf("Returning :%d\n",(int)info->rowsize_); + int res = (int) info->columnsize_; + //printf("Res :%d\n",res); + return res; + } + //printf("NumberofCol Error on the name of last loaded data: trying to access:%s loaded:%s\n",name,info->name_group); + return 0; + } + else + { + return 0; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +FUNCTION numberofrows() { : string cellname +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + //printf("(Inside number of rows)Number of rows %s\n",gargstr(1)); + if(info->file_>=0 && ifarg(1) && hoc_is_str_arg(1)) + { + char name[256]; + strncpy(name,gargstr(1),256); + if(strncmp(info->name_group,name,256) == 0) + { + //printf("Returning :%d\n",(int)info->rowsize_); + int res = (int) info->rowsize_; + //printf("Res :%d\n",res); + return res; + } + //printf("Numberofrows Error on the name of last loaded data: trying to access:%s loaded:%s\n",name,info->name_group); + return 0; + } + else + { + fprintf(stderr, "general error: bad file handle, missing arg?"); + return 0; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +FUNCTION getData() { +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + if(info->file_>=0&& ifarg(1) && hoc_is_str_arg(1) && ifarg(2) && ifarg(3)) + { + char name[256]; + strncpy(name,gargstr(1),256); + if(strncmp(info->name_group,name,256) == 0) + { + hsize_t row,column; + row = (hsize_t) *getarg(2); + column = (hsize_t) *getarg(3); + if(row<0 || row >=info->rowsize_ || column < 0 || column>=info->columnsize_) + { + fprintf(stderr, "ERROR: trying to access to a row and column erroneus on %s, size: %lld,%lld accessing to %lld,%lld\n", + name, info->rowsize_, info->columnsize_, row, column); + return 0; + } + + if( info->mode == FLOAT_MATRIX ) { + return info->datamatrix_[row*info->columnsize_ + column]; + } else if( info->mode == LONG_VECTOR ) { + return (double) info->datavector_[row]; + } else { + fprintf( stderr, "unexpected mode: %d\n", info->mode ); + } + } + fprintf(stderr, "(Getting data)Error on the name of last loaded data: access:%s loaded:%s\n",name,info->name_group); + return 0; + } + else + { + fprintf( stderr, "ERROR:Error on number of rows of %s\n", gargstr(1) ); + return 0; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +COMMENT +Retrieve a integer value from an hdf5 dataset +Note that this function doesn't apply as many checks as other functions. +e.g. The row is assumed to be within the dataset; this code is expected to be refactored ( -JGK, Jul 6 2017) +@param dataset name +@param row +@return read integer +ENDCOMMENT +FUNCTION getDataInt() { +VERBATIM +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + int value = -1; + + _lgetDataInt = 0; + if( info->file_ >= 0 && ifarg(1) && hoc_is_str_arg(1) && ifarg(2) ) + { + // Use HDF5 interface to get the requested string item from the dataset + if( loadDataInt( info, gargstr(1), *getarg(2), &value ) == 0 ) { + _lgetDataInt = value; + } + } +#endif +#endif // CORENEURON_BUILD +ENDVERBATIM +} + + + +COMMENT +Load a dataset so that the dimensions can be accessed +Note that this function doesn't apply as many checks as other functions. +@param dataset name +ENDCOMMENT +PROCEDURE getDimensions() { +VERBATIM +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + + if( info->file_ >= 0 && ifarg(1) && hoc_is_str_arg(1) ) { + loadDimensions( info, gargstr(1) ); + } +#endif +#endif // CORENEURON_BUILD +ENDVERBATIM +} + + + +COMMENT +Retrieve a single string from an hdf5 dataset and store in the provided hoc strdef +Note that this function doesn't apply as many checks as other functions. +e.g. The row is assumed to be within the dataset; this code is expected to be refactored ( -JGK, Jul 6 2017) +@param dataset name +@param row +@param destination strdef from hoc +ENDCOMMENT +PROCEDURE getDataString() { +VERBATIM +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + + if( info->file_ >= 0 && ifarg(1) && hoc_is_str_arg(1) && ifarg(2) && ifarg(3) ) + { + // Use HDF5 interface to get the requested string item from the dataset + loadDataString( info, gargstr(1), *getarg(2), hoc_pgargstr(3) ); + } +#endif +#endif // CORENEURON_BUILD +ENDVERBATIM +} + + + +FUNCTION getColumnDataRange() { +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + IvocVect* pdVec = NULL; + double* pd = NULL; + int i = 0; + int nStart, nEnd, count; + if(info->file_>=0&& ifarg(1) && hoc_is_str_arg(1) && ifarg(2) ) + { + char name[256]; + strncpy(name,gargstr(1),256); + if(strncmp(info->name_group,name,256) == 0) + { + hsize_t column; + column = (hsize_t) *getarg(2); + if(column<0 || column >=info->columnsize_ ) + { + fprintf(stderr, "ERROR: trying to access to a column erroneus on %s, size: %lld,%lld accessing to column %lld\n ",name,info->rowsize_,info->columnsize_,column); + return 0; + } + pdVec = vector_arg(3); + nStart = (int)*getarg(4); + nEnd = (int)*getarg(5); + vector_resize(pdVec, nEnd-nStart+1); + pd = vector_vec(pdVec); + count =0; + for( i=nStart; i<=nEnd; i++){ + pd[count] = info->datamatrix_[i*info->columnsize_ + column]; + count = count +1; + //printf("\n Filling [%f]", pd[i]); + } + //float res = info->datamatrix_[row*info->columnsize_ + column]; + return 1; + } + fprintf(stderr, "(Getting data)Error on the name of last loaded data: access:%s loaded:%s\n",name,info->name_group); + return 0; + } + else + { + //printf("ERROR:Error on number of rows of \n"); + return 0; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +COMMENT +/** + * Load all the data from a single column of active dataset into a NEURON Vector object + * + * @param dataset name - used to confirm that the active dataset matches what is requested + * @param column index + * @param Vector object to fill - resized as needed to hold the available data + */ +ENDCOMMENT +FUNCTION getColumnData() { +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + IvocVect* pdVec = NULL; + double* pd = NULL; + int i = 0; + if(info->file_>=0&& ifarg(1) && hoc_is_str_arg(1) && ifarg(2) ) + { + char name[256]; + strncpy(name,gargstr(1),256); + if(strncmp(info->name_group,name,256) == 0) + { + hsize_t column; + column = (hsize_t) *getarg(2); + if(column<0 || column >=info->columnsize_ ) + { + fprintf(stderr, "ERROR: trying to access to a column erroneus on %s, size: %lld,%lld accessing to column %lld\n ",name,info->rowsize_,info->columnsize_,column); + return 0; + } + pdVec = vector_arg(3); + vector_resize(pdVec, (int) info->rowsize_); + pd = vector_vec(pdVec); + for( i=0; irowsize_; i++){ + pd[i] = info->datamatrix_[i*info->columnsize_ + column]; + //printf("\n Filling [%f]", pd[i]); + } + //float res = info->datamatrix_[row*info->columnsize_ + column]; + return 1; + } + fprintf(stderr, "(Getting data)Error on the name of last loaded data: access:%s loaded:%s\n",name,info->name_group); + return 0; + } + else + { + //printf("ERROR:Error on number of rows of \n"); + return 0; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +COMMENT +/** + * Retrieve the value for an attribute of the active dataset. Expected to contain only one value of double type + * + * @param dataset name + * @param attribute name + */ +ENDCOMMENT +FUNCTION getAttributeValue() { +VERBATIM +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + if( info->file_ >= 0 && ifarg(1) && hoc_is_str_arg(1) && ifarg(2) && hoc_is_str_arg(2) ) + { + if( H5Lexists(info->file_, gargstr(1), H5P_DEFAULT) == 0) + { + fprintf( stderr, "Error: no dataset with name %s available.\n", gargstr(1) ); + return 0; + } + hid_t dataset_id = H5Dopen( info->file_, gargstr(1) ); + + double soughtValue; + hid_t attr_id = H5Aopen_name( dataset_id, gargstr(2) ); + if( attr_id < 0 ) { + fprintf( stderr, "Error: failed to open attribute %s\n", gargstr(2) ); + return 0; + } + H5Aread( attr_id, H5T_NATIVE_DOUBLE, &soughtValue ); + H5Dclose(dataset_id); + + return soughtValue; + } + + return 0; +#endif +#endif // CORENEURON_BUILD +ENDVERBATIM +} + + + +FUNCTION closeFile() { +VERBATIM { +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 + INFOCAST; + Info* info = *ip; + if(info->file_ >=0) + { + H5Fclose(info->file_); + //printf("Close\n"); + info->file_ = -1; + } + if(info->datamatrix_ != NULL) + { + free(info->datamatrix_); + info->datamatrix_ = NULL; + } +#endif +#endif // CORENEURON_BUILD +} +ENDVERBATIM +} + + + +COMMENT +/** + * This function will have each cpu learn from the other cpus which nrn.h5 file contains the data it needs. It can then load + * the data as needed using the older functions. Hopefully this will be better since it avoids loading data that is not needed and then + * transmitting it across the network. My concern is that there will still be some contention on the various nrn.h5 files with multiple cpus + * accessing it at once, but we will see how that unfolds. + * + * @param gidvec hoc vector containing list of gids on local cpu + */ +ENDCOMMENT +FUNCTION exchangeSynapseLocations() { +VERBATIM +#ifndef CORENEURON_BUILD +#ifndef DISABLE_HDF5 +#ifndef DISABLE_MPI + INFOCAST; + Info* info = *ip; + + IvocVect* vv = vector_arg(1); + int gidCount = vector_capacity(vv); + double *gidList = vector_vec(vv); + + if( info->synapseCatalog.directFile != NULL ) { + // the file generated by circuit building with gid to file id exists, so use that to determine mapping instead + readDirectMapping( info, gidCount, gidList ); + return 0; + } + + int mpi_size, mpi_rank; + + MPI_Comm_size (MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank); + + // used to store the number of gids found per cpu + int *foundCountsAcrossCPUs = (int*) malloc( sizeof(int)*mpi_size ); + int *foundDispls = (int*) malloc( sizeof(int)*mpi_size ); + + int bufferSize; + + // have every cpu allocate a buffer capable of holding the data for the cpu with the most cells + MPI_Allreduce( &gidCount, &bufferSize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); + double* gidsRequested = (double*) malloc ( sizeof(double)*bufferSize ); + int* gidsFound = (int*) malloc( sizeof(int)*bufferSize ); + int* fileIDsFound = (int*) malloc( sizeof(int)*bufferSize ); + + double *tempRequest; //used to hold the gidsRequested buffer when this cpu is broadcasting its gidList + + // each cpu in turn will bcast its local gids. It should then get back the file indices where the data can be found. + int activeRank, requestCount; + for( activeRank=0; activeRanksynapseCatalog.nFiles; fileIndex++ ) { + for( gidIndex=0; gidIndex < info->synapseCatalog.availablegidCount[fileIndex]; gidIndex++ ) { + for( requestIndex=0; requestIndex < requestCount; requestIndex++ ) { + if( info->synapseCatalog.availablegids[fileIndex][gidIndex] == gidsRequested[requestIndex] ) { + gidsFound[nFiles] = gidsRequested[requestIndex]; + fileIDsFound[nFiles++] = info->synapseCatalog.fileIDs[fileIndex]; + } + } + } + } + + MPI_Gather( &nFiles, 1, MPI_INT, foundCountsAcrossCPUs, 1, MPI_INT, activeRank, MPI_COMM_WORLD ); + + if( activeRank == mpi_rank ) { + info->confirmedCells.count = 0; + + int nodeIndex; + for( nodeIndex=0; nodeIndexconfirmedCells.count; + info->confirmedCells.count += foundCountsAcrossCPUs[nodeIndex]; + } + info->confirmedCells.gids = (int*) malloc ( sizeof(int)*info->confirmedCells.count ); + info->confirmedCells.fileIDs = (int*) malloc ( sizeof(int)*info->confirmedCells.count ); + } + + MPI_Gatherv( gidsFound, nFiles, MPI_INT, info->confirmedCells.gids, foundCountsAcrossCPUs, foundDispls, MPI_INT, activeRank, MPI_COMM_WORLD ); + MPI_Gatherv( fileIDsFound, nFiles, MPI_INT, info->confirmedCells.fileIDs, foundCountsAcrossCPUs, foundDispls, MPI_INT, activeRank, MPI_COMM_WORLD ); + + // put back the original gid request buffer so as to not destroy this cpu's gidList + if( activeRank == mpi_rank ) { + gidsRequested = tempRequest; + } + + } + + free(gidsRequested); + free(gidsFound); + free(fileIDsFound); + free(foundCountsAcrossCPUs); + free(foundDispls); +#endif // DISABLE_MPI +#endif // DISABLE_HDF5 +#endif // CORENEURON_BUILD +ENDVERBATIM +} + diff --git a/core/mod/coreneuron_modlist.txt b/core/mod/coreneuron_modlist.txt index da9526b8..5aadbd42 100644 --- a/core/mod/coreneuron_modlist.txt +++ b/core/mod/coreneuron_modlist.txt @@ -1,5 +1,6 @@ ALU.mod CoreNEURONArtificialCell.mod +HDF5reader.mod netstim_inhpoisson.mod SonataReportHelper.mod SonataReports.mod