mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
FTHMC compiled and merged to develop
This commit is contained in:
parent
07e4900218
commit
33097681b9
@ -30,7 +30,7 @@ int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &
|
|||||||
cudaGetDevice(&device);
|
cudaGetDevice(&device);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
hipGetDevice(&device);
|
auto r=hipGetDevice(&device);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Iterator warpSize = gpu_props[device].warpSize;
|
Iterator warpSize = gpu_props[device].warpSize;
|
||||||
|
@ -100,6 +100,9 @@ class GaugeGroup {
|
|||||||
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
|
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
|
||||||
template <typename vtype>
|
template <typename vtype>
|
||||||
using iAlgebraVector = iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
|
using iAlgebraVector = iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
|
||||||
|
template <typename vtype>
|
||||||
|
using iSUnAlgebraMatrix =
|
||||||
|
iScalar<iScalar<iMatrix<vtype, AdjointDimension> > >;
|
||||||
static int su2subgroups(void) { return su2subgroups(group_name()); }
|
static int su2subgroups(void) { return su2subgroups(group_name()); }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -128,10 +131,19 @@ class GaugeGroup {
|
|||||||
typedef Lattice<vMatrix> LatticeMatrix;
|
typedef Lattice<vMatrix> LatticeMatrix;
|
||||||
typedef Lattice<vMatrixF> LatticeMatrixF;
|
typedef Lattice<vMatrixF> LatticeMatrixF;
|
||||||
typedef Lattice<vMatrixD> LatticeMatrixD;
|
typedef Lattice<vMatrixD> LatticeMatrixD;
|
||||||
|
|
||||||
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
|
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
|
||||||
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
|
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
|
||||||
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
|
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
|
||||||
|
|
||||||
|
typedef iSUnAlgebraMatrix<vComplex> vAlgebraMatrix;
|
||||||
|
typedef iSUnAlgebraMatrix<vComplexF> vAlgebraMatrixF;
|
||||||
|
typedef iSUnAlgebraMatrix<vComplexD> vAlgebraMatrixD;
|
||||||
|
|
||||||
|
typedef Lattice<vAlgebraMatrix> LatticeAlgebraMatrix;
|
||||||
|
typedef Lattice<vAlgebraMatrixF> LatticeAlgebraMatrixF;
|
||||||
|
typedef Lattice<vAlgebraMatrixD> LatticeAlgebraMatrixD;
|
||||||
|
|
||||||
|
|
||||||
typedef iSU2Matrix<Complex> SU2Matrix;
|
typedef iSU2Matrix<Complex> SU2Matrix;
|
||||||
typedef iSU2Matrix<ComplexF> SU2MatrixF;
|
typedef iSU2Matrix<ComplexF> SU2MatrixF;
|
||||||
@ -160,7 +172,7 @@ class GaugeGroup {
|
|||||||
return generator(lieIndex, ta, group_name());
|
return generator(lieIndex, ta, group_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
|
static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
|
||||||
return su2SubGroupIndex(i1, i2, su2_index, group_name());
|
return su2SubGroupIndex(i1, i2, su2_index, group_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,6 +401,52 @@ class GaugeGroup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ta are hermitian (?)
|
||||||
|
// Anti herm is i Ta basis
|
||||||
|
static void LieAlgebraProject(LatticeAlgebraMatrix &out,const LatticeMatrix &in, int b)
|
||||||
|
{
|
||||||
|
conformable(in, out);
|
||||||
|
GridBase *grid = out.Grid();
|
||||||
|
LatticeComplex tmp(grid);
|
||||||
|
Matrix ta;
|
||||||
|
// Using Luchang's projection convention
|
||||||
|
// 2 Tr{Ta Tb} A_b= 2/2 delta ab A_b = A_a
|
||||||
|
autoView(out_v,out,AcceleratorWrite);
|
||||||
|
autoView(in_v,in,AcceleratorRead);
|
||||||
|
int N = ncolour;
|
||||||
|
int NNm1 = N * (N - 1);
|
||||||
|
int hNNm1= NNm1/2;
|
||||||
|
RealD sqrt_2 = sqrt(2.0);
|
||||||
|
Complex ci(0.0,1.0);
|
||||||
|
for(int su2Index=0;su2Index<hNNm1;su2Index++){
|
||||||
|
int i1, i2;
|
||||||
|
su2SubGroupIndex(i1, i2, su2Index);
|
||||||
|
int ax = su2Index*2;
|
||||||
|
int ay = su2Index*2+1;
|
||||||
|
accelerator_for(ss,grid->oSites(),1,{
|
||||||
|
// in is traceless ANTI-hermitian whereas Grid generators are Hermitian.
|
||||||
|
// trace( Ta x Ci in)
|
||||||
|
// Bet I need to move to real part with mult by -i
|
||||||
|
out_v[ss]()()(ax,b) = 0.5*(real(in_v[ss]()()(i2,i1)) - real(in_v[ss]()()(i1,i2)));
|
||||||
|
out_v[ss]()()(ay,b) = 0.5*(imag(in_v[ss]()()(i1,i2)) + imag(in_v[ss]()()(i2,i1)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for(int diagIndex=0;diagIndex<N-1;diagIndex++){
|
||||||
|
int k = diagIndex + 1; // diagIndex starts from 0
|
||||||
|
int a = NNm1+diagIndex;
|
||||||
|
RealD scale = 1.0/sqrt(2.0*k*(k+1));
|
||||||
|
accelerator_for(ss,grid->oSites(),vComplex::Nsimd(),{
|
||||||
|
auto tmp = in_v[ss]()()(0,0);
|
||||||
|
for(int i=1;i<k;i++){
|
||||||
|
tmp=tmp+in_v[ss]()()(i,i);
|
||||||
|
}
|
||||||
|
tmp = tmp - in_v[ss]()()(k,k)*k;
|
||||||
|
out_v[ss]()()(a,b) =imag(tmp) * scale;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int ncolour>
|
template <int ncolour>
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
// doesn't get found by the scripts/filelist during bootstrapping.
|
// doesn't get found by the scripts/filelist during bootstrapping.
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
template <ONLY_IF_SU>
|
template <ONLY_IF_SU>
|
||||||
static int su2subgroups(GroupName::SU) { return (ncolour * (ncolour - 1)) / 2; }
|
static int su2subgroups(GroupName::SU) { return (ncolour * (ncolour - 1)) / 2; }
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -576,3 +577,4 @@ static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeFie
|
|||||||
LieRandomize(pRNG,g,1.0);
|
LieRandomize(pRNG,g,1.0);
|
||||||
GaugeTransform<Gimpl>(Umu,g);
|
GaugeTransform<Gimpl>(Umu,g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +147,7 @@ void acceleratorInit(void)
|
|||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||||
|
|
||||||
hipGetDeviceProperties(&gpu_props[i], i);
|
auto r=hipGetDeviceProperties(&gpu_props[i], i);
|
||||||
hipDeviceProp_t prop;
|
hipDeviceProp_t prop;
|
||||||
prop = gpu_props[i];
|
prop = gpu_props[i];
|
||||||
totalDeviceMem = prop.totalGlobalMem;
|
totalDeviceMem = prop.totalGlobalMem;
|
||||||
|
@ -405,7 +405,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
|||||||
|
|
||||||
#define accelerator_barrier(dummy) \
|
#define accelerator_barrier(dummy) \
|
||||||
{ \
|
{ \
|
||||||
hipStreamSynchronize(computeStream); \
|
auto r=hipStreamSynchronize(computeStream); \
|
||||||
auto err = hipGetLastError(); \
|
auto err = hipGetLastError(); \
|
||||||
if ( err != hipSuccess ) { \
|
if ( err != hipSuccess ) { \
|
||||||
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
||||||
@ -438,19 +438,19 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
|||||||
return ptr;
|
return ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeShared(void *ptr){ auto r=hipFree(ptr);};
|
||||||
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ auto r=hipFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { auto r=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto r=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||||
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
||||||
//inline void acceleratorCopySynchronise(void) { }
|
//inline void acceleratorCopySynchronise(void) { }
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto r=hipMemset(base,value,bytes);}
|
||||||
|
|
||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||||
{
|
{
|
||||||
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
auto r=hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||||
}
|
}
|
||||||
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
inline void acceleratorCopySynchronise(void) { auto r=hipStreamSynchronize(copyStream); };
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ echo mpfr X$MPFR
|
|||||||
--disable-fermion-reps \
|
--disable-fermion-reps \
|
||||||
--disable-gparity \
|
--disable-gparity \
|
||||||
CXX=hipcc MPICXX=mpicxx \
|
CXX=hipcc MPICXX=mpicxx \
|
||||||
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
|
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++17 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
|
||||||
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
|
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user