mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-19 16:27:05 +01:00
Compare commits
3 Commits
c4fc972fec
...
b50fb34e71
Author | SHA1 | Date | |
---|---|---|---|
b50fb34e71 | |||
de84d730ff | |||
c74d11e3d7 |
@ -74,7 +74,7 @@ public:
|
||||
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi=Zero();
|
||||
// psi=Zero();
|
||||
RealD cp, ssq,rsq;
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
@ -30,6 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
inline RealD AggregatePowerLaw(RealD x)
|
||||
@ -124,6 +126,53 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CreateSubspaceGCR(GridParallelRNG &RNG,LinearOperatorBase<FineField> &DiracOp,int nn=nbasis)
|
||||
{
|
||||
RealD scale;
|
||||
|
||||
TrivialPrecon<FineField> simple_fine;
|
||||
PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,30,DiracOp,simple_fine,12,12);
|
||||
FineField noise(FineGrid);
|
||||
FineField src(FineGrid);
|
||||
FineField guess(FineGrid);
|
||||
FineField Mn(FineGrid);
|
||||
|
||||
for(int b=0;b<nn;b++){
|
||||
|
||||
subspace[b] = Zero();
|
||||
gaussian(RNG,noise);
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|Op|n> "<<innerProduct(noise,Mn)<<std::endl;
|
||||
|
||||
for(int i=0;i<3;i++){
|
||||
// void operator() (const Field &src, Field &psi){
|
||||
#if 1
|
||||
std::cout << GridLogMessage << " inverting on noise "<<std::endl;
|
||||
src = noise;
|
||||
guess=Zero();
|
||||
GCR(src,guess);
|
||||
subspace[b] = guess;
|
||||
#else
|
||||
std::cout << GridLogMessage << " inverting on zero "<<std::endl;
|
||||
src=Zero();
|
||||
guess = noise;
|
||||
GCR(src,guess);
|
||||
subspace[b] = guess;
|
||||
#endif
|
||||
noise = subspace[b];
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
}
|
||||
|
||||
DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|Op|f> "<<innerProduct(noise,Mn)<<std::endl;
|
||||
subspace[b] = noise;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
|
||||
// and this is the best I found
|
||||
@ -160,14 +209,21 @@ public:
|
||||
|
||||
int b =0;
|
||||
{
|
||||
ComplexD ip;
|
||||
// Filter
|
||||
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
|
||||
Cheb(hermop,noise,Mn);
|
||||
// normalise
|
||||
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||
subspace[b] = Mn;
|
||||
hermop.Op(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||
|
||||
hermop.Op(Mn,tmp);
|
||||
ip= innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|Op|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
|
||||
hermop.AdjOp(Mn,tmp);
|
||||
ip = innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|AdjOp|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
b++;
|
||||
}
|
||||
|
||||
@ -213,8 +269,18 @@ public:
|
||||
Mn=*Tnp;
|
||||
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||
subspace[b] = Mn;
|
||||
hermop.Op(Mn,tmp);
|
||||
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||
|
||||
|
||||
ComplexD ip;
|
||||
|
||||
hermop.Op(Mn,tmp);
|
||||
ip= innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|Op|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
|
||||
hermop.AdjOp(Mn,tmp);
|
||||
ip = innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|AdjOp|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
|
||||
b++;
|
||||
}
|
||||
|
||||
|
80
systems/Aurora/benchmarks/bench16.pbs
Normal file
80
systems/Aurora/benchmarks/bench16.pbs
Normal file
@ -0,0 +1,80 @@
|
||||
#!/bin/bash
|
||||
|
||||
##PBS -q LatticeQCD_aesp_CNDA
|
||||
#PBS -q debug-scaling
|
||||
##PBS -q prod
|
||||
#PBS -l select=16
|
||||
#PBS -l walltime=00:20:00
|
||||
#PBS -A LatticeQCD_aesp_CNDA
|
||||
|
||||
cd $PBS_O_WORKDIR
|
||||
|
||||
source ../sourceme.sh
|
||||
|
||||
cp $PBS_NODEFILE nodefile
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MPICH_OFI_NIC_POLICY=GPU
|
||||
|
||||
#export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
|
||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
|
||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
|
||||
#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
|
||||
#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
|
||||
|
||||
#
|
||||
# Local vol 16.16.16.32
|
||||
#
|
||||
|
||||
VOL 128.64.128.96
|
||||
MPI 4.4.4.3
|
||||
NPROC 192
|
||||
mpiexec -np 192 -ppn 12 -envall ./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi 4.4.4.3 --grid 128.64.128.96 --shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap
|
||||
|
||||
|
||||
LX=32
|
||||
LY=16
|
||||
LZ=32
|
||||
LT=32
|
||||
|
||||
NX=2
|
||||
NY=2
|
||||
NZ=4
|
||||
NT=1
|
||||
|
||||
GX=2
|
||||
GY=2
|
||||
GZ=1
|
||||
GT=3
|
||||
|
||||
PX=$((NX * GX ))
|
||||
PY=$((NY * GY ))
|
||||
PZ=$((NZ * GZ ))
|
||||
PT=$((NT * GT ))
|
||||
|
||||
VX=$((PX * LX ))
|
||||
VY=$((PY * LY ))
|
||||
VZ=$((PZ * LZ ))
|
||||
VT=$((PT * LT ))
|
||||
|
||||
NP=$((PX*PY*PZ*PT))
|
||||
VOL=${VX}.${VY}.${VZ}.${VT}
|
||||
AT=8
|
||||
MPI=${PX}.${PY}.${PZ}.${PT}
|
||||
|
||||
CMD="mpiexec -np $NP -ppn 12 -envall \
|
||||
./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi $MPI --grid $VOL \
|
||||
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads $AT --comms-overlap "
|
||||
|
||||
echo VOL $VOL
|
||||
echo MPI $MPI
|
||||
echo NPROC $NP
|
||||
echo $CMD
|
||||
$CMD
|
||||
|
@ -30,8 +30,8 @@ echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_A
|
||||
|
||||
if [ $PALS_RANKID = "0" ]
|
||||
then
|
||||
numactl -p $NUMAP -N $NUMAP unitrace --chrome-kernel-logging --chrome-mpi-logging --chrome-sycl-logging --demangle "$@"
|
||||
# numactl -p $NUMAP -N $NUMAP "$@"
|
||||
# numactl -p $NUMAP -N $NUMAP unitrace --chrome-kernel-logging --chrome-mpi-logging --chrome-sycl-logging --demangle "$@"
|
||||
numactl -p $NUMAP -N $NUMAP "$@"
|
||||
else
|
||||
numactl -p $NUMAP -N $NUMAP "$@"
|
||||
fi
|
||||
|
@ -154,6 +154,8 @@ public:
|
||||
// std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
|
||||
double t;
|
||||
// Fine Smoother
|
||||
// out = in;
|
||||
out = Zero();
|
||||
t=-usecond();
|
||||
_PreSmoother(in,out);
|
||||
t+=usecond();
|
||||
@ -172,6 +174,7 @@ public:
|
||||
|
||||
// Coarse correction
|
||||
t=-usecond();
|
||||
Csol = Zero();
|
||||
_CoarseSolve(Csrc,Csol);
|
||||
//Csol=Zero();
|
||||
t+=usecond();
|
||||
@ -191,6 +194,8 @@ public:
|
||||
|
||||
// Fine Smoother
|
||||
t=-usecond();
|
||||
// vec2=vec1;
|
||||
vec2=Zero();
|
||||
_PostSmoother(vec1,vec2);
|
||||
t+=usecond();
|
||||
std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||
@ -215,7 +220,8 @@ int main (int argc, char ** argv)
|
||||
// Construct a coarsened grid
|
||||
Coordinate clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/4;
|
||||
clatt[d] = clatt[d]/2;
|
||||
// clatt[d] = clatt[d]/4;
|
||||
}
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
@ -244,7 +250,7 @@ int main (int argc, char ** argv)
|
||||
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
|
||||
|
||||
const int nbasis = 8;
|
||||
const int nbasis = 20;
|
||||
const int cb = 0 ;
|
||||
LatticeFermion prom(FGrid);
|
||||
|
||||
@ -260,7 +266,25 @@ int main (int argc, char ** argv)
|
||||
typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
|
||||
typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
|
||||
PVdagM_t PVdagM(Ddwf,Dpv);
|
||||
ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv);
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv); // 355
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(1.0,Ddwf,Dpv); // 246
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.5,Ddwf,Dpv); // 183
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 145
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 134
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 127 -- NULL space via inverse iteration
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 57 -- NULL space via inverse iteration; 3 iterations
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 57 , tighter inversion
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 49 iters
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 70 iters; asymmetric
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 58; Loosen coarse, tighten fine
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 56 ...
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 51 ... with 24 vecs
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 31 ... with 24 vecs and 2^4 blocking
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 43 ... with 16 vecs and 2^4 blocking, sloppier
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35 ... with 20 vecs and 2^4 blocking
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35 ... with 20 vecs and 2^4 blocking, looser coarse
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 64 ... with 20 vecs, Christoph setup, and 2^4 blocking, looser coarse
|
||||
ShiftedPVdagM_t ShiftedPVdagM(0.01,Ddwf,Dpv); //
|
||||
|
||||
|
||||
// Run power method on HOA??
|
||||
@ -269,6 +293,7 @@ int main (int argc, char ** argv)
|
||||
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
Subspace AggregatesPD(Coarse5d,FGrid,cb);
|
||||
/*
|
||||
AggregatesPD.CreateSubspaceChebyshev(RNG5,
|
||||
PVdagM,
|
||||
nbasis,
|
||||
@ -278,6 +303,10 @@ int main (int argc, char ** argv)
|
||||
200,
|
||||
200,
|
||||
0.0);
|
||||
*/
|
||||
AggregatesPD.CreateSubspaceGCR(RNG5,
|
||||
PVdagM,
|
||||
nbasis);
|
||||
|
||||
LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d);
|
||||
LittleDiracOpPV.CoarsenOperator(PVdagM,AggregatesPD);
|
||||
@ -334,12 +363,13 @@ int main (int argc, char ** argv)
|
||||
///////////////////////////////////////
|
||||
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" Coarse Grid Solve "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
TrivialPrecon<CoarseVector> simple;
|
||||
NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<CoarseVector> L2PGCR(1.0e-8, 100, LinOpCoarse,simple,10,10);
|
||||
L2PGCR.Level(2);
|
||||
// PrecGeneralisedConjugateResidualNonHermitian<CoarseVector> L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<CoarseVector> L2PGCR(3.0e-2, 100, LinOpCoarse,simple,10,10);
|
||||
L2PGCR.Level(3);
|
||||
c_res=Zero();
|
||||
L2PGCR(c_src,c_res);
|
||||
|
||||
@ -347,11 +377,12 @@ int main (int argc, char ** argv)
|
||||
// Fine grid smoother
|
||||
////////////////////////////////////////
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" Fine Grid Smoother "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
TrivialPrecon<LatticeFermionD> simple_fine;
|
||||
// NonHermitianLinearOperator<PVdagM_t,LatticeFermionD> LinOpSmooth(PVdagM);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,10,ShiftedPVdagM,simple_fine,4,4);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedPVdagM,simple_fine,16,16);
|
||||
SmootherGCR.Level(2);
|
||||
|
||||
LatticeFermionD f_src(FGrid);
|
||||
LatticeFermionD f_res(FGrid);
|
||||
@ -364,12 +395,12 @@ int main (int argc, char ** argv)
|
||||
|
||||
TwoLevelMG TwoLevelPrecon(AggregatesPD,
|
||||
PVdagM,
|
||||
SmootherGCR,
|
||||
simple_fine,
|
||||
SmootherGCR,
|
||||
LinOpCoarse,
|
||||
L2PGCR);
|
||||
|
||||
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,8,8);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,16,16);
|
||||
L1PGCR.Level(1);
|
||||
|
||||
f_res=Zero();
|
||||
|
Reference in New Issue
Block a user