mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Update to reduce memory footpring in benchmark test
This commit is contained in:
parent
5398b7e7e3
commit
2bf3b4d576
@ -388,6 +388,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
// TODO : make a OMP loop on CPU, call threaded bcopy
|
||||
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
|
||||
assert(shm!=NULL);
|
||||
std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl;
|
||||
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
|
||||
}
|
||||
|
||||
@ -399,12 +400,13 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||
{
|
||||
acceleratorCopySynchronise(); std::cout << "Copy Synchronised\n"<<std::endl;
|
||||
|
||||
int nreq=list.size();
|
||||
|
||||
if (nreq==0) return;
|
||||
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
acceleratorCopySynchronise();
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
assert(ierr==0);
|
||||
list.resize(0);
|
||||
|
@ -306,7 +306,7 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) {
|
||||
theGridAccelerator->memcpy(to,from,bytes);
|
||||
}
|
||||
inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); }
|
||||
inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); std::cout<<"acceleratorCopySynchronise() wait "<<std::endl; }
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();}
|
||||
|
@ -126,19 +126,10 @@ int main (int argc, char ** argv)
|
||||
// Naive wilson implementation
|
||||
////////////////////////////////////
|
||||
// replicate across fifth dimension
|
||||
LatticeGaugeFieldF Umu5d(FGrid);
|
||||
std::vector<LatticeColourMatrixF> U(4,FGrid);
|
||||
{
|
||||
autoView( Umu5d_v, Umu5d, CpuWrite);
|
||||
autoView( Umu_v , Umu , CpuRead);
|
||||
for(int ss=0;ss<Umu.Grid()->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
Umu5d_v[Ls*ss+s] = Umu_v[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
// LatticeGaugeFieldF Umu5d(FGrid);
|
||||
std::vector<LatticeColourMatrixF> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||
|
||||
@ -147,10 +138,28 @@ int main (int argc, char ** argv)
|
||||
ref = Zero();
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
tmp = Cshift(src,mu+1,1);
|
||||
{
|
||||
autoView( tmp_v , tmp , CpuWrite);
|
||||
autoView( U_v , U[mu] , CpuRead);
|
||||
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
tmp_v[Ls*ss+s] = U_v[ss]*tmp_v[Ls*ss+s];
|
||||
}
|
||||
}
|
||||
}
|
||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
{
|
||||
autoView( tmp_v , tmp , CpuWrite);
|
||||
autoView( U_v , U[mu] , CpuRead);
|
||||
autoView( src_v, src , CpuRead);
|
||||
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s];
|
||||
}
|
||||
}
|
||||
}
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||
}
|
||||
@ -242,16 +251,30 @@ int main (int argc, char ** argv)
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
tmp = Cshift(src,mu+1,1);
|
||||
{
|
||||
autoView( ref_v, ref, CpuWrite);
|
||||
autoView( tmp_v, tmp, CpuRead);
|
||||
for(int i=0;i<ref_v.size();i++){
|
||||
ref_v[i]+= tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]; ;
|
||||
autoView( U_v , U[mu] , CpuRead);
|
||||
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
int i=s+Ls*ss;
|
||||
ref_v[i]+= U_v[ss]*(tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]); ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
|
||||
{
|
||||
autoView( tmp_v , tmp , CpuWrite);
|
||||
autoView( U_v , U[mu] , CpuRead);
|
||||
autoView( src_v, src , CpuRead);
|
||||
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s];
|
||||
}
|
||||
}
|
||||
}
|
||||
// tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
{
|
||||
autoView( ref_v, ref, CpuWrite);
|
||||
|
Loading…
Reference in New Issue
Block a user