1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Improvements to the assembler interface that let us move chunks of the

site and s loop into the kernels. This will save on function call overhead and
guarantee L2 prefetching strategy is right since OMP can't distribute the
sub-chunks of work.
This commit is contained in:
paboyle
2016-06-09 01:12:36 -07:00
parent d9408893b3
commit 55f65b81b5
10 changed files with 77 additions and 87 deletions

View File

@ -132,19 +132,21 @@ int main (int argc, char ** argv)
RealD NP = UGrid->_Nprocessors;
for(int doasm=0;doasm<1;doasm++){
for(int doasm=1;doasm<2;doasm++){
QCD::WilsonKernelsStatic::AsmOpt=doasm;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall =50;
int ncall =10;
if (1) {
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();