mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
offload more of mgrid to GPU
This commit is contained in:
parent
736b19485e
commit
d18994eddc
@ -253,27 +253,6 @@ public:
|
|||||||
Chebyshev<FineField> ChebFilt (0.03,64.0,500);
|
Chebyshev<FineField> ChebFilt (0.03,64.0,500);
|
||||||
Chebyshev<FineField> ChebDependent(0.01,64.0,200);
|
Chebyshev<FineField> ChebDependent(0.01,64.0,200);
|
||||||
|
|
||||||
#if 0
|
|
||||||
auto latt_size = FineGrid->GlobalDimensions();
|
|
||||||
Coordinate Fourier[dependent] = {
|
|
||||||
Coordinate({0, 0,0,0,0}),
|
|
||||||
Coordinate({0, 1,0,0,0}),
|
|
||||||
Coordinate({0,-1,0,0,0}),
|
|
||||||
Coordinate({0,0, 1,0,0}),
|
|
||||||
Coordinate({0,0,-1,0,0}),
|
|
||||||
Coordinate({0,0,0, 1,0}),
|
|
||||||
Coordinate({0,0,0,-1,0}),
|
|
||||||
Coordinate({0,0,0,0, 1}),
|
|
||||||
Coordinate({0,0,0,0,-1})
|
|
||||||
};
|
|
||||||
|
|
||||||
ComplexD ci(0.0,1.0);
|
|
||||||
Lattice<CComplex> C(FineGrid);
|
|
||||||
Lattice<CComplex> coor(FineGrid);
|
|
||||||
FineField save(FineGrid);
|
|
||||||
FineField tmp (FineGrid);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
FineField noise(FineGrid);
|
FineField noise(FineGrid);
|
||||||
FineField Mn(FineGrid);
|
FineField Mn(FineGrid);
|
||||||
|
|
||||||
@ -295,17 +274,6 @@ public:
|
|||||||
if(b==bb) {
|
if(b==bb) {
|
||||||
ChebFilt(hermop,noise,Mn);
|
ChebFilt(hermop,noise,Mn);
|
||||||
} else {
|
} else {
|
||||||
#if 0
|
|
||||||
C=Zero();
|
|
||||||
for(int mu=0;mu<5;mu++){
|
|
||||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
|
||||||
LatticeCoordinate(coor,mu);
|
|
||||||
C = C + (TwoPiL * Fourier[dep][mu]) * coor;
|
|
||||||
}
|
|
||||||
C = exp(C*ci); // Fourier phase
|
|
||||||
noise=C*save;
|
|
||||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
|
||||||
#endif
|
|
||||||
ChebDependent(hermop,noise,Mn);
|
ChebDependent(hermop,noise,Mn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,7 +111,10 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
|
|
||||||
auto fineData_ = fineData.View();
|
auto fineData_ = fineData.View();
|
||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
// Loop over coars parallel, and then loop over fine associated with coarse.
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
|
// Otherwise do finee inner product per site, and make the update atomic
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
thread_for( sf, fine->oSites(), {
|
thread_for( sf, fine->oSites(), {
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
@ -120,10 +123,11 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
thread_critical {
|
|
||||||
for(int i=0;i<nbasis;i++) {
|
for(int i=0;i<nbasis;i++) {
|
||||||
auto Basis_ = Basis[i].View();
|
auto Basis_ = Basis[i].View();
|
||||||
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
|
auto ip = innerProduct(Basis_[sf],fineData_[sf]);
|
||||||
|
thread_critical {
|
||||||
|
coarseData_[sc](i)=coarseData_[sc](i) + ip;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -160,7 +164,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
auto fineY_ = fineY.View();
|
auto fineY_ = fineY.View();
|
||||||
auto coarseA_= coarseA.View();
|
auto coarseA_= coarseA.View();
|
||||||
|
|
||||||
thread_for(sf, fine->oSites(), {
|
accelerator_for(sf, fine->oSites(), 1, {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
@ -196,7 +200,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
|||||||
|
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
thread_for(ss, coarse->oSites(),{
|
accelerator_for(ss, coarse->oSites(), 1, {
|
||||||
CoarseInner_[ss] = coarse_inner_[ss];
|
CoarseInner_[ss] = coarse_inner_[ss];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -321,7 +325,7 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
|
|
||||||
// Loop with a cache friendly loop ordering
|
// Loop with a cache friendly loop ordering
|
||||||
thread_for(sf,fine->oSites(),{
|
acceelerator_for(sf,fine->oSites(),1,{
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Coordinate coor_f(_ndimension);
|
||||||
|
Loading…
Reference in New Issue
Block a user