From 783b3fab4997d62559d53b581d9cd03e89456540 Mon Sep 17 00:00:00 2001 From: GillesDuvert Date: Tue, 29 Aug 2023 19:35:35 +0200 Subject: [PATCH] modified some flags of parallelize() to force (in the 'smart tpoll' mode) use of the max available number of threads, or other variant. --- src/datatypes.cpp | 12 ++++++------ src/math_fun_jmg.cpp | 12 ++++++------ src/minmax_include.cpp | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/datatypes.cpp b/src/datatypes.cpp index 4bb3aed1c..db24bf31d 100644 --- a/src/datatypes.cpp +++ b/src/datatypes.cpp @@ -1401,7 +1401,7 @@ BaseGDL* Data_::Transpose(DUInt* perm) { TRACE_ROUTINE(__FUNCTION__,__FILE__ long chunksize = nElem; long nchunk = 1; bool do_parallel = false; - GDL_NTHREADS=parallelize( nElem, TP_MEMORY_ACCESS); + GDL_NTHREADS=parallelize( nElem, TP_CPU_INTENSIVE); if (GDL_NTHREADS > 1) { //no use start parallel threading for small numbers. chunksize = nElem / GDL_NTHREADS; nchunk = nElem / chunksize; @@ -1491,7 +1491,7 @@ void Data_::Reverse(DLong dim) { TRACE_ROUTINE(__FUNCTION__,__FILE__,__LINE_ if (this->dim[dim]%2) halfDim++; SizeT outerStride = this->dim.Stride(dim + 1); SizeT span=outerStride - revStride; - if ((GDL_NTHREADS=parallelize(nEl, TP_MEMORY_ACCESS))==1) { //most frequent + if ((GDL_NTHREADS=parallelize(nEl, TP_CPU_INTENSIVE))==1) { //most frequent for (SizeT o = 0; o < nEl; o += outerStride) { for (SizeT i = o; i < o+revStride; ++i) { for (SizeT s = i, opp=span+i; s < halfDim+i ; s += revStride, opp-=revStride) { @@ -1529,7 +1529,7 @@ BaseGDL* Data_::DupReverse(DLong dim) { TRACE_ROUTINE(__FUNCTION__,__FILE__, if (this->dim[dim]%2) halfDim++; SizeT outerStride = this->dim.Stride(dim + 1); SizeT span=outerStride - revStride; - if ((GDL_NTHREADS=parallelize(nEl, TP_MEMORY_ACCESS))==1) { //most frequent + if ((GDL_NTHREADS=parallelize(nEl, TP_CPU_INTENSIVE))==1) { //most frequent for (SizeT o = 0; o < nEl; o += outerStride) { for (SizeT i = o; i < o+revStride; ++i) { for (SizeT s = i, opp=span+i; s < halfDim+i ; s += revStride, opp-=revStride) { @@ -1569,7 +1569,7 @@ BaseGDL* Data_::DupReverse(DLong dim) { if (this->dim[dim] % 2) halfDim++; SizeT outerStride = this->dim.Stride(dim + 1); SizeT span = outerStride - revStride; - if ((GDL_NTHREADS=parallelize(nEl, TP_MEMORY_ACCESS)) == 1) { //most frequent + if ((GDL_NTHREADS=parallelize(nEl, TP_CPU_INTENSIVE)) == 1) { //most frequent for (SizeT o = 0; o < nEl; o += outerStride) { for (SizeT i = o; i < o + revStride; ++i) { for (SizeT s = i, opp = span + i; s < halfDim + i; s += revStride, opp -= revStride) { @@ -1611,7 +1611,7 @@ BaseGDL* Data_::DupReverse(DLong dim) { if (this->dim[dim] % 2) halfDim++; SizeT outerStride = this->dim.Stride(dim + 1); SizeT span = outerStride - revStride; - if ((GDL_NTHREADS=parallelize(nEl, TP_MEMORY_ACCESS)) == 1) { //most frequent + if ((GDL_NTHREADS=parallelize(nEl, TP_CPU_INTENSIVE)) == 1) { //most frequent for (SizeT o = 0; o < nEl; o += outerStride) { for (SizeT i = o; i < o + revStride; ++i) { for (SizeT s = i, opp = span + i; s < halfDim + i; s += revStride, opp -= revStride) { @@ -3823,7 +3823,7 @@ void Data_::CatInsert (const Data_* srcArr, const SizeT atDim, SizeT& at) SizeT gap = this->dim.Stride (atDim + 1); // dest array //GD: speed up by using indexing that permit parallel and collapse. - if ((GDL_NTHREADS=parallelize( len*nCp, TP_MEMORY_ACCESS))==1) { //most frequent + if ((GDL_NTHREADS=parallelize( len*nCp, TP_CPU_INTENSIVE))==1) { //most frequent for (OMPInt c = 0; c < nCp; ++c) { for (SizeT destIx = 0; destIx < len; destIx++) (*this)[destIx + destStart + c * gap] = (*srcArr)[ destIx + c * len]; } diff --git a/src/math_fun_jmg.cpp b/src/math_fun_jmg.cpp index 18f83b5c8..bbef6d005 100644 --- a/src/math_fun_jmg.cpp +++ b/src/math_fun_jmg.cpp @@ -927,7 +927,7 @@ namespace lib { } /* Double loop on the output image */ - if ((GDL_NTHREADS=parallelize( nEl))==1) { + if ((GDL_NTHREADS=parallelize( nEl, TP_CPU_INTENSIVE))==1) { for (OMPInt j = 0; j < nRows; ++j) { for (OMPInt i = 0; i < nCols; ++i) { // Compute the original source for this pixel, note order of j and i in P and Q definition of IDL doc. @@ -1027,7 +1027,7 @@ namespace lib { } /* Double loop on the output image */ - if ((GDL_NTHREADS=parallelize( nEl))==1) { + if ((GDL_NTHREADS=parallelize( nEl, TP_CPU_INTENSIVE))==1) { for (OMPInt j = 0; j < nRows; ++j) { for (OMPInt i = 0; i < nCols; ++i) { // Compute the original source for this pixel, note order of j and i in P and Q definition of IDL doc. @@ -1225,7 +1225,7 @@ namespace lib { } /* Double loop on the output image */ - if ((GDL_NTHREADS=parallelize( nEl))==1) { + if ((GDL_NTHREADS=parallelize( nEl, TP_CPU_INTENSIVE))==1) { for (OMPInt j = 0; j < nRows; ++j) { for (OMPInt i = 0; i < nCols; ++i) { // Compute the original source for this pixel, note order of j and i in P and Q definition of IDL doc. @@ -1373,7 +1373,7 @@ namespace lib { } /* Double loop on the output image */ - if ((GDL_NTHREADS=parallelize( nEl))==1) { + if ((GDL_NTHREADS=parallelize( nEl, TP_CPU_INTENSIVE))==1) { for (OMPInt j = 0; j < nRows; ++j) { for (OMPInt i = 0; i < nCols; ++i) { // Compute the original source for this pixel, note order of j and i. @@ -1485,7 +1485,7 @@ namespace lib { } /* Double loop on the output image */ - if ((GDL_NTHREADS=parallelize( nEl))==1) { + if ((GDL_NTHREADS=parallelize( nEl, TP_CPU_INTENSIVE))==1) { for (OMPInt j = 0; j < nRows; ++j) { for (OMPInt i = 0; i < nCols; ++i) { // Compute the original source for this pixel, note order of j and i. @@ -1691,7 +1691,7 @@ namespace lib { } /* Double loop on the output image */ - if ((GDL_NTHREADS=parallelize( nEl))==1) { + if ((GDL_NTHREADS=parallelize( nEl, TP_CPU_INTENSIVE))==1) { for (OMPInt j = 0; j < nRows; ++j) { for (OMPInt i = 0; i < nCols; ++i) { // Compute the original source for this pixel, note order of j and i. diff --git a/src/minmax_include.cpp b/src/minmax_include.cpp index 78bf4c279..b7807f779 100644 --- a/src/minmax_include.cpp +++ b/src/minmax_include.cpp @@ -44,7 +44,7 @@ SizeT nElem = (stop - start) / step; - GDL_NTHREADS=parallelize( nElem); + GDL_NTHREADS=parallelize( nElem, TP_CPU_INTENSIVE); //trap existence of ABSFUNC and create something that stands cppchekck useage (needed by contiunous integration scripts!) #ifndef ABSFUNC #define FUNCABS