1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
Peter Boyle 2021-03-12 09:31:50 -05:00
commit 226be84937
4 changed files with 99 additions and 78 deletions

View File

@ -43,7 +43,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
conformable(iftrue,predicate);
conformable(iftrue,ret);
GridBase *grid=iftrue._grid;
GridBase *grid=iftrue.Grid();
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
@ -52,22 +52,23 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
const int Nsimd = grid->Nsimd();
std::vector<Integer> mask(Nsimd);
std::vector<scalar_object> truevals (Nsimd);
std::vector<scalar_object> falsevals(Nsimd);
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
extract(iftrue._odata[ss] ,truevals);
extract(iffalse._odata[ss] ,falsevals);
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
for(int s=0;s<Nsimd;s++){
if (mask[s]) falsevals[s]=truevals[s];
autoView(iftrue_v,iftrue,CpuRead);
autoView(iffalse_v,iffalse,CpuRead);
autoView(predicate_v,predicate,CpuRead);
autoView(ret_v,ret,CpuWrite);
Integer NN= grid->oSites();
thread_for(ss,NN,{
Integer mask;
scalar_object trueval;
scalar_object falseval;
for(int l=0;l<Nsimd;l++){
trueval =extractLane(l,iftrue_v[ss]);
falseval=extractLane(l,iffalse_v[ss]);
mask =extractLane(l,predicate_v[ss]);
if (mask) falseval=trueval;
insertLane(l,ret_v[ss],falseval);
}
merge(ret._odata[ss],falsevals);
}
});
}
template<class vobj,class iobj>
@ -76,9 +77,9 @@ inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &ift
conformable(iftrue,iffalse);
conformable(iftrue,predicate);
Lattice<vobj> ret(iftrue._grid);
Lattice<vobj> ret(iftrue.Grid());
where(ret,predicate,iftrue,iffalse);
whereWolf(ret,predicate,iftrue,iffalse);
return ret;
}

View File

@ -1,5 +1,16 @@
#pragma once
#if defined(__NVCC__)
#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 0)
#error "NVCC version 11.0 breaks on Ampere, see Github issue 346"
#endif
#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 1)
#error "NVCC version 11.1 breaks on Ampere, see Github issue 346"
#endif
#endif
#if defined(__clang__)
#if __clang_major__ < 3

View File

@ -444,7 +444,7 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='-mavx2 -mfma -mf16c';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
SIMD_FLAGS='-mavx512f -mavx512cd';;
SKL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for SkyLake Xeon])
SIMD_FLAGS='-march=skylake-avx512';;
@ -497,6 +497,9 @@ case ${ax_cv_cxx_compiler_vendor} in
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-xcommon-avx512';;
SKL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-xcore-avx512';;
KNC)

View File

@ -51,87 +51,93 @@ int main (int argc, char ** argv)
}
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
LatticeComplexD zz(&GRID);
LatticeInteger coor(&GRID);
LatticeComplexD rn(&GRID);
LatticeComplexD sl(&GRID);
zz = ComplexD(0.0,0.0);
GridParallelRNG RNG(&GRID);
RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
gaussian(RNG,rn);
RealD nn=norm2(rn);
for(int mu=0;mu<nd;mu++){
RealD ns=0.0;
for(int t=0;t<latt_size[mu];t++){
LatticeCoordinate(coor,mu);
sl=where(coor==Integer(t),rn,zz);
std::cout <<GridLogMessage<< " sl " << sl<<std::endl;
std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
ns=ns+norm2(sl);
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"== LatticeComplex =="<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
{
LatticeComplexD zz(&GRID);
LatticeInteger coor(&GRID);
LatticeComplexD rn(&GRID);
LatticeComplexD sl(&GRID);
zz = ComplexD(0.0,0.0);
gaussian(RNG,rn);
RealD nn=norm2(rn);
for(int mu=0;mu<nd;mu++){
RealD ns=0.0;
for(int t=0;t<latt_size[mu];t++){
LatticeCoordinate(coor,mu);
sl=where(coor==Integer(t),rn,zz);
std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
ns=ns+norm2(sl);
}
std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" err " << nn-ns<<std::endl;
assert(abs(nn-ns) < 1.0e-10);
}
std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" " << nn-ns<<std::endl;
}
unsigned int tmin = 3;
int Ls = 2;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"== LatticeFermion =="<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
{
LatticeFermionD zz(&GRID);
LatticeInteger coor(&GRID);
LatticeFermionD rn(&GRID);
LatticeFermionD sl(&GRID);
LatticeFermion q_outF(FGrid); q_outF=0.0;
LatticeFermion tmpF(UGrid); random(RNG4,tmpF);
LatticeFermion tmp2F(UGrid);
LatticeFermion ZZF (UGrid); ZZF=0.0;
zz = ComplexD(0.0,0.0);
RealD nA=0.0;
RealD nB=0.0;
for(int s=0;s<Ls;s++){
nB = nB + norm2(tmpF);
tmp2F = where((lcoor>=tmin),tmpF,ZZF);
nA = nA + norm2(tmp2F);
InsertSlice(tmp2F, q_outF, s , 0);
gaussian(RNG,rn);
RealD nn=norm2(rn);
for(int mu=0;mu<nd;mu++){
RealD ns=0.0;
for(int t=0;t<latt_size[mu];t++){
LatticeCoordinate(coor,mu);
sl=where(coor==Integer(t),rn,zz);
std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
ns=ns+norm2(sl);
}
std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" err " << nn-ns<<std::endl;
assert(abs(nn-ns) < 1.0e-10);
}
}
RealD nQO=norm2(q_outF);
std::cout <<GridLogMessage << "norm_before_where: " << nB << std::endl;
std::cout <<GridLogMessage << "norm_after_where: " << nA << std::endl;
std::cout <<GridLogMessage << "norm_q_out: " << nQO << std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"== LatticePropagator =="<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
LatticePropagator q_outP(FGrid); q_outP=0.0;
LatticePropagator tmpP(UGrid); random(RNG4,tmpP);
LatticePropagator tmp2P(UGrid);
LatticePropagator ZZP (UGrid); ZZP=0.0;
{
LatticePropagatorD zz(&GRID);
LatticeInteger coor(&GRID);
LatticePropagatorD rn(&GRID);
LatticePropagatorD sl(&GRID);
nA=0.0;
nB=0.0;
for(int s=0;s<Ls;s++){
nB = nB + norm2(tmpP);
tmp2P = where((lcoor>=tmin),tmpP,ZZP);
nA = nA + norm2(tmp2P);
InsertSlice(tmp2P, q_outP, s , 0);
zz = ComplexD(0.0,0.0);
gaussian(RNG,rn);
RealD nn=norm2(rn);
for(int mu=0;mu<nd;mu++){
RealD ns=0.0;
for(int t=0;t<latt_size[mu];t++){
LatticeCoordinate(coor,mu);
sl=where(coor==Integer(t),rn,zz);
std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
ns=ns+norm2(sl);
}
std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" err " << nn-ns<<std::endl;
assert(abs(nn-ns) < 1.0e-10);
}
}
nQO=norm2(q_outP);
std::cout <<GridLogMessage << "norm_before_where: " << nB << std::endl;
std::cout <<GridLogMessage << "norm_after_where: " << nA << std::endl;
std::cout <<GridLogMessage << "norm_q_out: " << nQO << std::endl;
Grid_finalize();
}