diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index a70204fb..94d7cbdb 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -255,15 +255,19 @@ public: void refresh(Field& U, GridSerialRNG & sRNG, GridParallelRNG& pRNG) { assert(P.Grid() == U.Grid()); - std::cout << GridLogIntegrator << "Integrator refresh\n"; + std::cout << GridLogIntegrator << "Integrator refresh" << std::endl; + std::cout << GridLogIntegrator << "Generating momentum" << std::endl; FieldImplementation::generate_momenta(P, sRNG, pRNG); // Update the smeared fields, can be implemented as observer // necessary to keep the fields updated even after a reject // of the Metropolis + std::cout << GridLogIntegrator << "Updating smeared fields" << std::endl; Smearer.set_Field(U); // Set the (eventual) representations gauge fields + + std::cout << GridLogIntegrator << "Updating representations" << std::endl; Representations.update(U); // The Smearer is attached to a pointer of the gauge field @@ -273,6 +277,7 @@ public: for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { // get gauge field from the SmearingPolicy and // based on the boolean is_smeared in actionID + std::cout << GridLogIntegrator << "Refreshing integrator level " << level << " index " << actionID << std::endl; Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared); as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG); } diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 52ac373a..75981e88 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -196,7 +196,8 @@ inline void *acceleratorAllocShared(size_t bytes) auto err = cudaMallocManaged((void **)&ptr,bytes); if( err != cudaSuccess ) { ptr = (void *) NULL; - printf(" cudaMallocManaged failed for %lu %s \n",bytes,cudaGetErrorString(err)); + printf(" cudaMallocManaged failed for %lu %s \n",bytes,cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); } return ptr; }; @@ -206,16 +207,53 @@ inline void *acceleratorAllocDevice(size_t bytes) auto err = cudaMalloc((void **)&ptr,bytes); if( err != cudaSuccess ) { ptr = (void *) NULL; - printf(" cudaMalloc failed for %lu %s \n",bytes,cudaGetErrorString(err)); + printf(" cudaMalloc failed for %lu %s \n",bytes,cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); } return ptr; }; -inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; -inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);}; -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} -inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToDevice);} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} -inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);} +inline void acceleratorFreeShared(void *ptr){ + auto err = cudaFree(ptr); + if( err != cudaSuccess ) { + printf(" cudaFree(Shared) failed %s \n",cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); + } +}; +inline void acceleratorFreeDevice(void *ptr){ + auto err = cudaFree(ptr); + if( err != cudaSuccess ) { + printf(" cudaFree(Device) failed %s \n",cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); + } +}; +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { + auto err = cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice); + if( err != cudaSuccess ) { + printf(" cudaMemcpy(host->device) failed for %lu %s \n",bytes,cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); + } +} +inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) { + auto err = cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToDevice); + if( err != cudaSuccess ) { + printf(" cudaMemcpy(device->device) failed for %lu %s \n",bytes,cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); + } +} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ + auto err = cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost); + if( err != cudaSuccess ) { + printf(" cudaMemcpy(device->host) failed for %lu %s \n",bytes,cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); + } +} +inline void acceleratorMemSet(void *base,int value,size_t bytes) { + auto err = cudaMemset(base,value,bytes); + if( err != cudaSuccess ) { + printf(" cudaMemSet failed for %lu %s \n",bytes,cudaGetErrorString(err)); fflush(stdout); + if (acceleratorAbortOnGpuError) assert(err==cudaSuccess); + } +} inline int acceleratorIsCommunicable(void *ptr) { // int uvm=0; diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 00d29ec0..1fe29bb2 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -31,14 +31,38 @@ using namespace std; using namespace Grid; ; -typedef typename GparityDomainWallFermionR::FermionField FermionField; +template +struct Setup{}; -RealD AllZero(RealD x){ return 0.;} +template<> +struct Setup{ + static GparityMobiusFermionR* getAction(LatticeGaugeField &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.01; + RealD M5=1.8; + RealD mob_b=1.5; + GparityMobiusFermionD ::ImplParams params; + std::vector twists({1,1,1,0}); + params.twists = twists; + return new GparityMobiusFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); + } +}; -int main (int argc, char ** argv) -{ - Grid_init(&argc,&argv); +template<> +struct Setup{ + static DomainWallFermionR* getAction(LatticeGaugeField &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.01; + RealD M5=1.8; + return new DomainWallFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + } +}; + + +template +void run(){ + typedef typename Action::FermionField FermionField; const int Ls=8; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); @@ -56,24 +80,10 @@ int main (int argc, char ** argv) LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4, Umu); - std::vector U(4,UGrid); - for(int mu=0;mu(Umu,mu); - } - - RealD mass=0.01; - RealD M5=1.8; - RealD mob_b=1.5; -// DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - GparityMobiusFermionD ::ImplParams params; - std::vector twists({1,1,1,0}); - params.twists = twists; - GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); - -// MdagMLinearOperator HermOp(Ddwf); -// SchurDiagTwoOperator HermOp(Ddwf); - SchurDiagTwoOperator HermOp(Ddwf); -// SchurDiagMooeeOperator HermOp(Ddwf); + Action *action = Setup::getAction(Umu,FGrid,FrbGrid,UGrid,UrbGrid); + + //MdagMLinearOperator HermOp(Ddwf); + SchurDiagTwoOperator HermOp(*action); const int Nstop = 30; const int Nk = 40; @@ -90,8 +100,7 @@ int main (int argc, char ** argv) PlainHermOp Op (HermOp); ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); - - + std::vector eval(Nm); FermionField src(FrbGrid); gaussian(RNG5rb,src); @@ -103,6 +112,28 @@ int main (int argc, char ** argv) int Nconv; IRL.calc(eval,evec,src,Nconv); + delete action; +} + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + std::string action = "GparityMobius"; + for(int i=1;i(); + }else if(action == "DWF"){ + run(); + }else{ + std::cout << "Unknown action" << std::endl; + exit(1); + } + Grid_finalize(); }