1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

Merge branch 'develop' into feature/staggered-comms-compute

Conflicts:
	lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
This commit is contained in:
Azusa Yamaguchi
2018-05-21 13:07:29 +01:00
238 changed files with 18155 additions and 7613 deletions

View File

@ -66,6 +66,8 @@ void Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const La
parallel_for(int i=0;i<num;i++){
compress.Compress(&buffer[off],table[i].first,rhs._odata[so+table[i].second]);
}
// Further optimisatoin: i) streaming store the result
// ii) software prefetch the first element of the next table entry
}
///////////////////////////////////////////////////////////////////
@ -105,7 +107,6 @@ template<class vobj,class cobj>
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
public:
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
typedef typename cobj::scalar_object scalar_object;
@ -511,25 +512,24 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
template<class decompressor>
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
mergetime-=usecond();
for(int i=0;i<mm.size();i++){
mergetime-=usecond();
parallel_for(int o=0;o<mm[i].buffer_size/2;o++){
decompress.Exchange(mm[i].mpointer,
mm[i].vpointers[0],
mm[i].vpointers[1],
mm[i].type,o);
}
mergetime+=usecond();
}
mergetime+=usecond();
decompresstime-=usecond();
for(int i=0;i<dd.size();i++){
decompresstime-=usecond();
parallel_for(int o=0;o<dd[i].buffer_size;o++){
decompress.Decompress(dd[i].kernel_p,dd[i].mpi_p,o);
}
decompresstime+=usecond();
}
decompresstime+=usecond();
}
////////////////////////////////////////
// Set up routines