diff --git a/solvers/elliptic/elliptic.h b/solvers/elliptic/elliptic.h index d51aa1359..19eab0a7b 100644 --- a/solvers/elliptic/elliptic.h +++ b/solvers/elliptic/elliptic.h @@ -36,6 +36,7 @@ SOFTWARE. #include "mesh3D.h" #include "parAlmond.hpp" #include "ellipticPrecon.h" +#include "timer.h" // block size for reduction (hard coded) #define blockSize 256 @@ -46,6 +47,7 @@ typedef struct { int elementType; // number of edges (3=tri, 4=quad, 6=tet, 12=hex) mesh_t *mesh; + timer *profiler; precon_t *precon; diff --git a/solvers/elliptic/setups/setupHex3D.rc b/solvers/elliptic/setups/setupHex3D.rc index 7e1a3037c..78f25e889 100644 --- a/solvers/elliptic/setups/setupHex3D.rc +++ b/solvers/elliptic/setups/setupHex3D.rc @@ -1,6 +1,10 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + + [BENCHMARK] SOLVE #NONE diff --git a/solvers/elliptic/setups/setupQuad2D.rc b/solvers/elliptic/setups/setupQuad2D.rc index b95bfa55c..1e7e766f4 100644 --- a/solvers/elliptic/setups/setupQuad2D.rc +++ b/solvers/elliptic/setups/setupQuad2D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] data/ellipticHomogeneous2D.h diff --git a/solvers/elliptic/setups/setupQuad3D.rc b/solvers/elliptic/setups/setupQuad3D.rc index 2760b34d8..3c75d7bf8 100644 --- a/solvers/elliptic/setups/setupQuad3D.rc +++ b/solvers/elliptic/setups/setupQuad3D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] data/ellipticHomogeneous3D.h diff --git a/solvers/elliptic/setups/setupTet3D.rc b/solvers/elliptic/setups/setupTet3D.rc index acccc49d2..511e87e45 100644 --- a/solvers/elliptic/setups/setupTet3D.rc +++ b/solvers/elliptic/setups/setupTet3D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] data/ellipticHomogeneous3D.h diff --git a/solvers/elliptic/src/PCG.c b/solvers/elliptic/src/PCG.c index 11593b059..8adb31149 100644 --- a/solvers/elliptic/src/PCG.c +++ b/solvers/elliptic/src/PCG.c @@ -25,7 +25,8 @@ SOFTWARE. */ #include "elliptic.h" -#define CASCADE 1 +#define CASCADE 0 +#define TIMER 1 int pcg(elliptic_t* elliptic, dfloat lambda, @@ -34,7 +35,8 @@ int pcg(elliptic_t* elliptic, dfloat lambda, mesh_t *mesh = elliptic->mesh; setupAide options = elliptic->options; - + timer *profiler = elliptic->profiler; + int DEBUG_ENABLE_REDUCTIONS = 1; options.getArgs("DEBUG ENABLE REDUCTIONS", DEBUG_ENABLE_REDUCTIONS); @@ -55,28 +57,56 @@ int pcg(elliptic_t* elliptic, dfloat lambda, occa::memory &o_Ap = elliptic->o_Ap; occa::memory &o_Ax = elliptic->o_Ax; - +#if (TIMER) + profiler->tic("Inner Product"); +#endif /*compute norm b, set the tolerance */ #if CASCADE normB = ellipticCascadingWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_r); #else - normB = ellipticWeightedNorm2(elliptic, elliptic->o_invDegree, o_r); + normB = ellipticWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_r); #endif +#if (TIMER) + profiler->toc("Inner Product"); +#endif + + TOL = mymax(tol*tol*normB,tol*tol); - + +#if (TIMER) +profiler->tic("Ax Operator"); +#endif // compute A*x ellipticOperator(elliptic, lambda, o_x, elliptic->o_Ax, dfloatString); +#if (TIMER) +profiler->toc("Ax Operator"); +#endif + +#if (TIMER) +profiler->tic("Scale Add"); +#endif // subtract r = b - A*x ellipticScaledAdd(elliptic, -1.f, o_Ax, 1.f, o_r); +#if (TIMER) +profiler->toc("Scale Add"); +#endif + +#if (TIMER) +profiler->tic("Inner Product"); +#endif + #if CASCADE rdotr0 = ellipticCascadingWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_r); #else - rdotr0 = ellipticWeightedNorm2(elliptic, elliptic->o_invDegree, o_r); + rdotr0 = ellipticWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_r); #endif +#if (TIMER) +profiler->toc("Inner Product"); +#endif //sanity check if (rdotr0<1E-20) { if (options.compareArgs("VERBOSE", "TRUE")&&(mesh->rank==0)){ @@ -87,12 +117,22 @@ int pcg(elliptic_t* elliptic, dfloat lambda, if (options.compareArgs("VERBOSE", "TRUE")&&(mesh->rank==0)) printf("CG: initial res norm %12.12f WE NEED TO GET TO %12.12f \n", sqrt(rdotr0), sqrt(TOL)); +#if (TIMER) +profiler->tic("Preconditioner"); +#endif // Precon^{-1} (b-A*x) ellipticPreconditioner(elliptic, lambda, o_r, o_z); +#if (TIMER) +profiler->toc("Preconditioner"); +#endif + // p = z o_p.copyFrom(o_z); // PCG +#if (TIMER) +profiler->tic("Inner Product"); +#endif // dot(r,z) #if CASCADE rdotz0 = ellipticCascadingWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_z); @@ -100,12 +140,26 @@ int pcg(elliptic_t* elliptic, dfloat lambda, rdotz0 = ellipticWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_z); #endif +#if (TIMER) +profiler->toc("Inner Product"); +#endif + while((Niter tic("Ax Operator"); +#endif // [ // A*p ellipticOperator(elliptic, lambda, o_p, o_Ap, dfloatString); - + + #if (TIMER) + profiler->toc("Ax Operator"); +#endif + +#if (TIMER) +profiler->tic("Inner Product"); +#endif // dot(p,A*p) if(DEBUG_ENABLE_REDUCTIONS==1){ #if CASCADE @@ -117,8 +171,10 @@ int pcg(elliptic_t* elliptic, dfloat lambda, else pAp = 1; // ] - - // alpha = dot(r,z)/dot(p,A*p) + #if (TIMER) + profiler->toc("Inner Product"); + #endif +// alpha = dot(r,z)/dot(p,A*p) alpha = rdotz0/pAp; // TO DO: @@ -126,8 +182,14 @@ int pcg(elliptic_t* elliptic, dfloat lambda, // r <= r - alpha*A*p // dot(r,r) // + #if (TIMER) + profiler->tic("Combined Update"); + #endif rdotr1 = ellipticUpdatePCG(elliptic, o_p, o_Ap, alpha, o_x, o_r); - + #if (TIMER) + profiler->toc("Combined Update"); + #endif + if (options.compareArgs("VERBOSE", "TRUE")&&(mesh->rank==0)) printf("CG: it %d r norm %12.12f alpha = %f \n",Niter, sqrt(rdotr1), alpha); @@ -138,8 +200,17 @@ int pcg(elliptic_t* elliptic, dfloat lambda, // [ // z = Precon^{-1} r - ellipticPreconditioner(elliptic, lambda, o_r, o_z); - + #if (TIMER) + profiler->tic("Preconditioner"); + #endif + ellipticPreconditioner(elliptic, lambda, o_r, o_z); + #if (TIMER) + profiler->toc("Preconditioner"); + #endif + +#if (TIMER) +profiler->tic("Inner Product"); +#endif // dot(r,z) if(DEBUG_ENABLE_REDUCTIONS==1){ #if CASCADE @@ -150,7 +221,10 @@ int pcg(elliptic_t* elliptic, dfloat lambda, } else rdotz1 = 1; - + + #if (TIMER) +profiler->toc("Inner Product"); +#endif // ] // flexible pcg beta = (z.(-alpha*Ap))/zdotz0 @@ -158,10 +232,16 @@ int pcg(elliptic_t* elliptic, dfloat lambda, options.compareArgs("KRYLOV SOLVER", "PCG,FLEXIBLE")) { if(DEBUG_ENABLE_REDUCTIONS==1){ +#if (TIMER) +profiler->tic("Inner Product"); +#endif #if CASCADE zdotAp = ellipticCascadingWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_z, o_Ap); #else zdotAp = ellipticWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_z, o_Ap); +#endif + #if (TIMER) +profiler->toc("Inner Product"); #endif } else @@ -171,10 +251,14 @@ int pcg(elliptic_t* elliptic, dfloat lambda, } else { beta = rdotz1/rdotz0; } - +#if (TIMER) +profiler->tic("Scale Add"); +#endif // p = z + beta*p ellipticScaledAdd(elliptic, 1.f, o_z, beta, o_p); - +#if (TIMER) +profiler->toc("Scale Add"); +#endif // switch rdotz0 <= rdotz1 rdotz0 = rdotz1; @@ -193,6 +277,7 @@ dfloat ellipticUpdatePCG(elliptic_t *elliptic, mesh_t *mesh = elliptic->mesh; setupAide options = elliptic->options; + timer *profiler = elliptic->profiler; int DEBUG_ENABLE_REDUCTIONS = 1; options.getArgs("DEBUG ENABLE REDUCTIONS", DEBUG_ENABLE_REDUCTIONS); @@ -200,20 +285,35 @@ dfloat ellipticUpdatePCG(elliptic_t *elliptic, dfloat rdotr1 = 0; if(!options.compareArgs("DISCRETIZATION", "CONTINUOUS")){ - + #if (TIMER) + profiler->tic("Scale Add"); +#endif // x <= x + alpha*p ellipticScaledAdd(elliptic, alpha, o_p, 1.f, o_x); - + #if (TIMER) + profiler->toc("Scale Add"); +#endif // [ // r <= r - alpha*A*p + #if (TIMER) +profiler->tic("Scale Add"); +#endif ellipticScaledAdd(elliptic, -alpha, o_Ap, 1.f, o_r); - +#if (TIMER) +profiler->toc("Scale Add"); +#endif // dot(r,r) if(DEBUG_ENABLE_REDUCTIONS==1){ +#if (TIMER) +profiler->tic("Inner Product"); +#endif #if CASCADE rdotr1 = ellipticCascadingWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_r); #else - rdotr1 = ellipticWeightedNorm2(elliptic, elliptic->o_invDegree, o_r); + rdotr1 = ellipticWeightedInnerProduct(elliptic, elliptic->o_invDegree, o_r, o_r); +#endif +#if (TIMER) +profiler->toc("Inner Product"); #endif } else @@ -223,6 +323,10 @@ dfloat ellipticUpdatePCG(elliptic_t *elliptic, // x <= x + alpha*p // r <= r - alpha*A*p // dot(r,r) +#if (TIMER) +profiler->tic("Update"); +#endif + elliptic->updatePCGKernel(mesh->Nelements*mesh->Np, elliptic->NblocksUpdatePCG, elliptic->o_invDegree, o_p, o_Ap, alpha, o_x, o_r, elliptic->o_tmpNormr); @@ -236,8 +340,10 @@ dfloat ellipticUpdatePCG(elliptic_t *elliptic, dfloat globalrdotr1 = 0; MPI_Allreduce(&rdotr1, &globalrdotr1, 1, MPI_DFLOAT, MPI_SUM, mesh->comm); - - +#if (TIMER) +profiler->toc("Update"); +#endif + rdotr1 = globalrdotr1; } diff --git a/solvers/elliptic/src/ellipticSetup.c b/solvers/elliptic/src/ellipticSetup.c index c1688c2fd..182c4aee2 100644 --- a/solvers/elliptic/src/ellipticSetup.c +++ b/solvers/elliptic/src/ellipticSetup.c @@ -104,6 +104,10 @@ elliptic_t *ellipticSetup(mesh_t *mesh, dfloat lambda, occa::properties &kernelI // ellipticSolveSetup(elliptic, lambda, kernelInfo); + // Set Timer + elliptic->profiler = new timer; + elliptic->profiler->setTimer(elliptic->options); + elliptic->profiler->initTimer(mesh->device); dlong Nall = mesh->Np*(mesh->Nelements+mesh->totalHaloPairs); elliptic->r = (dfloat*) calloc(Nall, sizeof(dfloat)); diff --git a/solvers/ins/setups/setupHex3D.rc b/solvers/ins/setups/setupHex3D.rc index be6ba7efa..78bf500c3 100644 --- a/solvers/ins/setups/setupHex3D.rc +++ b/solvers/ins/setups/setupHex3D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] data/insBeltrami3D.h diff --git a/solvers/ins/setups/setupQuad2D.rc b/solvers/ins/setups/setupQuad2D.rc index 76164f031..c95623946 100644 --- a/solvers/ins/setups/setupQuad2D.rc +++ b/solvers/ins/setups/setupQuad2D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] data/insVortex2D.h diff --git a/solvers/ins/setups/setupQuad3D.rc b/solvers/ins/setups/setupQuad3D.rc index 49b4391d6..e199bf29e 100644 --- a/solvers/ins/setups/setupQuad3D.rc +++ b/solvers/ins/setups/setupQuad3D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] data/insBrownMinionQuad3D.h diff --git a/solvers/ins/setups/setupTet3D.rc b/solvers/ins/setups/setupTet3D.rc index 39b29545c..e74830002 100644 --- a/solvers/ins/setups/setupTet3D.rc +++ b/solvers/ins/setups/setupTet3D.rc @@ -1,6 +1,9 @@ [FORMAT] 1.0 +[APPLICATION PROFILER] +1 + [DATA FILE] #data/insUniform3D.h data/insBeltrami3D.h diff --git a/solvers/ins/src/insRunEXTBDF.c b/solvers/ins/src/insRunEXTBDF.c index 6e7cb2912..f00313724 100644 --- a/solvers/ins/src/insRunEXTBDF.c +++ b/solvers/ins/src/insRunEXTBDF.c @@ -31,11 +31,6 @@ void extbdfCoefficents(ins_t *ins, int order); void insRunEXTBDF(ins_t *ins){ mesh_t *mesh = ins->mesh; - // Initialize INS profiler - ins->profiler = new timer; - ins->profiler->setTimer(ins->options); - ins->profiler->initTimer(mesh->device); - timer *profiler = ins->profiler; profiler->tic("INS"); @@ -289,7 +284,7 @@ void insRunEXTBDF(ins_t *ins){ if(ins->outputStep) insReport(ins, finalTime,ins->NtimeSteps); - if(mesh->rank==0) profiler->printTimer(); + //if(mesh->rank==0) profiler->printTimer(); profiler->printTimer(mesh->rank, mesh->size, mesh->comm); } diff --git a/solvers/ins/src/insSetup.c b/solvers/ins/src/insSetup.c index d86727ff8..fdea7999d 100644 --- a/solvers/ins/src/insSetup.c +++ b/solvers/ins/src/insSetup.c @@ -630,9 +630,11 @@ if(options.compareArgs("INITIAL CONDITION", "BROWN-MINION") && } + // Set Timer + ins->profiler = new timer; + ins->profiler->setTimer(ins->options); + ins->profiler->initTimer(mesh->device); - - //make option objects for elliptc solvers ins->vOptions = options; ins->vOptions.setArgs("KRYLOV SOLVER", options.getArgs("VELOCITY KRYLOV SOLVER")); @@ -679,6 +681,7 @@ if(options.compareArgs("INITIAL CONDITION", "BROWN-MINION") && ins->uSolver = (elliptic_t*) calloc(1, sizeof(elliptic_t)); ins->uSolver->mesh = mesh; + ins->uSolver->profiler = ins->profiler; ins->uSolver->options = ins->vOptions; ins->uSolver->dim = ins->dim; ins->uSolver->elementType = ins->elementType; @@ -688,6 +691,7 @@ if(options.compareArgs("INITIAL CONDITION", "BROWN-MINION") && ins->vSolver = (elliptic_t*) calloc(1, sizeof(elliptic_t)); ins->vSolver->mesh = mesh; + ins->vSolver->profiler = ins->profiler; ins->vSolver->options = ins->vOptions; ins->vSolver->dim = ins->dim; ins->vSolver->elementType = ins->elementType; @@ -698,6 +702,7 @@ if(options.compareArgs("INITIAL CONDITION", "BROWN-MINION") && if (ins->dim==3) { ins->wSolver = (elliptic_t*) calloc(1, sizeof(elliptic_t)); ins->wSolver->mesh = mesh; + ins->wSolver->profiler = ins->profiler; ins->wSolver->options = ins->vOptions; ins->wSolver->dim = ins->dim; ins->wSolver->elementType = ins->elementType; @@ -709,6 +714,7 @@ if(options.compareArgs("INITIAL CONDITION", "BROWN-MINION") && if (mesh->rank==0) printf("==================PRESSURE SOLVE SETUP=========================\n"); ins->pSolver = (elliptic_t*) calloc(1, sizeof(elliptic_t)); ins->pSolver->mesh = mesh; + ins->pSolver->profiler = ins->profiler; ins->pSolver->options = ins->pOptions; ins->pSolver->dim = ins->dim; ins->pSolver->elementType = ins->elementType;