diff --git a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h index 6752ea19..2c2402f8 100644 --- a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h +++ b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h @@ -53,6 +53,9 @@ NAMESPACE_BEGIN(Grid); MultiShiftFunction PowerQuarter; MultiShiftFunction PowerNegQuarter; + MultiShiftFunction MDPowerNegHalf; + MultiShiftFunction MDPowerQuarter; + private: FermionOperator & NumOp;// the basic operator @@ -81,11 +84,13 @@ NAMESPACE_BEGIN(Grid); remez.generateApprox(param.degree,1,2); PowerHalf.Init(remez,param.tolerance,false); PowerNegHalf.Init(remez,param.tolerance,true); + MDPowerNegHalf.Init(remez,param.mdtolerance,true); // MdagM^(+- 1/4) std::cout< MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid()); std::vector MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid()); @@ -246,8 +251,8 @@ NAMESPACE_BEGIN(Grid); SchurDifferentiableOperator VdagV(NumOp); SchurDifferentiableOperator MdagM(DenOp); - ConjugateGradientMultiShift msCG_V(param.MaxIter,PowerQuarter); - ConjugateGradientMultiShift msCG_M(param.MaxIter,PowerNegHalf); + ConjugateGradientMultiShift msCG_V(param.MaxIter,MDPowerQuarter); + ConjugateGradientMultiShift msCG_M(param.MaxIter,MDPowerNegHalf); msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi); msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi); @@ -266,7 +271,7 @@ NAMESPACE_BEGIN(Grid); //(1) for(int k=0;k CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); //////////////////////////////////// // Collect actions //////////////////////////////////// ActionLevel Level1(1); ActionLevel Level2(4); - ActionLevel Level3(6); + ActionLevel Level3(8); //////////////////////////////////// // Strange action @@ -226,7 +228,7 @@ int main(int argc, char **argv) { Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params)); Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params)); if(h!=0) { - Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],CG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],MDCG,CG)); } else { Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); @@ -241,7 +243,7 @@ int main(int argc, char **argv) { for(int h=0;h dwf.64.64.64.256.8node +srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.${vol}.8node.shm-mpi1 +done - -PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 1" +PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1" echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node +srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node -PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 0" +PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0" echo $PARAMS -#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node.shm0 - -PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1" -echo $PARAMS -#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node - -PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0" -echo $PARAMS -#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0 - - +srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0 diff --git a/systems/Crusher/sourceme.sh b/systems/Crusher/sourceme.sh index 83bfe57c..051014dc 100644 --- a/systems/Crusher/sourceme.sh +++ b/systems/Crusher/sourceme.sh @@ -1,6 +1,9 @@ module load PrgEnv-gnu -module load rocm/4.5.0 +module load rocm/5.1.0 +module load cray-mpich/8.1.15 module load gmp -module load cray-fftw +#module load cray-fftw module load craype-accel-amd-gfx90a export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH +#Hack for lib +export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH