mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Optimisation...
This commit is contained in:
		@@ -138,16 +138,8 @@ void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
  return ;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
 | 
			
		||||
void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
{
 | 
			
		||||
  assert((dag==0) ||(dag==1));
 | 
			
		||||
 | 
			
		||||
  WilsonCompressor compressor(dag);
 | 
			
		||||
  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int sss=0;sss<grid->oSites();sss++){
 | 
			
		||||
 | 
			
		||||
    vHalfSpinColourVector  tmp;    
 | 
			
		||||
    vHalfSpinColourVector  chi;    
 | 
			
		||||
    vSpinColourVector result;
 | 
			
		||||
@@ -155,16 +147,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
    //    int ss = Stencil._LebesgueReorder[sss];
 | 
			
		||||
    int ss = sss;
 | 
			
		||||
    int ssu= ss;
 | 
			
		||||
 | 
			
		||||
    // Xp
 | 
			
		||||
    int idx = (Xp+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    offset = Stencil._offsets [Xp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Xp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Xp][ss];
 | 
			
		||||
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    ptype  = Stencil._permute_type[Xp];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjXp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
@@ -173,16 +163,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
 | 
			
		||||
    spReconXp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Yp
 | 
			
		||||
    idx = (Yp+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
 | 
			
		||||
    offset = Stencil._offsets [Yp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Yp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Yp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Yp];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjYp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
@@ -191,15 +179,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
 | 
			
		||||
    accumReconYp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Zp
 | 
			
		||||
    idx = (Zp+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    offset = Stencil._offsets [Zp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Zp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Zp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Zp];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjZp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
@@ -208,15 +195,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
 | 
			
		||||
    accumReconZp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Tp
 | 
			
		||||
    idx = (Tp+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    offset = Stencil._offsets [Tp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Tp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Tp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Tp];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjTp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
@@ -225,15 +211,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
 | 
			
		||||
    accumReconTp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Xm
 | 
			
		||||
    idx = (Xm+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    offset = Stencil._offsets [Xm][ss];
 | 
			
		||||
    local  = Stencil._is_local[Xm][ss];
 | 
			
		||||
    perm   = Stencil._permute[Xm][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Xm];
 | 
			
		||||
 | 
			
		||||
    if ( local && perm ) 
 | 
			
		||||
    {
 | 
			
		||||
@@ -244,16 +229,15 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
 | 
			
		||||
    accumReconXm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Ym
 | 
			
		||||
    idx = (Ym+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    offset = Stencil._offsets [Ym][ss];
 | 
			
		||||
    local  = Stencil._is_local[Ym][ss];
 | 
			
		||||
    perm   = Stencil._permute[Ym][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Ym];
 | 
			
		||||
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjYm(tmp,in._odata[offset]);
 | 
			
		||||
@@ -263,15 +247,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
 | 
			
		||||
    accumReconYm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Zm
 | 
			
		||||
    idx = (Zm+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    offset = Stencil._offsets [Zm][ss];
 | 
			
		||||
    local  = Stencil._is_local[Zm][ss];
 | 
			
		||||
    perm   = Stencil._permute[Zm][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Zm];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjZm(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
@@ -280,15 +263,14 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
 | 
			
		||||
    accumReconZm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Tm
 | 
			
		||||
    idx = (Tm+dag*4)%8;
 | 
			
		||||
    offset = Stencil._offsets [idx][ss];
 | 
			
		||||
    local  = Stencil._is_local[idx][ss];
 | 
			
		||||
    perm   = Stencil._permute[idx][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[idx];
 | 
			
		||||
    offset = Stencil._offsets [Tm][ss];
 | 
			
		||||
    local  = Stencil._is_local[Tm][ss];
 | 
			
		||||
    perm   = Stencil._permute[Tm][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Tm];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjTm(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
@@ -297,12 +279,175 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
 | 
			
		||||
    accumReconTm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    vstream(out._odata[ss],result);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
{
 | 
			
		||||
    vHalfSpinColourVector  tmp;    
 | 
			
		||||
    vHalfSpinColourVector  chi;    
 | 
			
		||||
    vSpinColourVector result;
 | 
			
		||||
    vHalfSpinColourVector Uchi;
 | 
			
		||||
    int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
    int ssu= ss;
 | 
			
		||||
 | 
			
		||||
    // Xp
 | 
			
		||||
    offset = Stencil._offsets [Xm][ss];
 | 
			
		||||
    local  = Stencil._is_local[Xm][ss];
 | 
			
		||||
    perm   = Stencil._permute[Xm][ss];
 | 
			
		||||
 | 
			
		||||
    ptype  = Stencil._permute_type[Xm];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjXp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjXp(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
 | 
			
		||||
    spReconXp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Yp
 | 
			
		||||
    offset = Stencil._offsets [Ym][ss];
 | 
			
		||||
    local  = Stencil._is_local[Ym][ss];
 | 
			
		||||
    perm   = Stencil._permute[Ym][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Ym];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjYp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjYp(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
 | 
			
		||||
    accumReconYp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Zp
 | 
			
		||||
    offset = Stencil._offsets [Zm][ss];
 | 
			
		||||
    local  = Stencil._is_local[Zm][ss];
 | 
			
		||||
    perm   = Stencil._permute[Zm][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Zm];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjZp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjZp(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
 | 
			
		||||
    accumReconZp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Tp
 | 
			
		||||
    offset = Stencil._offsets [Tm][ss];
 | 
			
		||||
    local  = Stencil._is_local[Tm][ss];
 | 
			
		||||
    perm   = Stencil._permute[Tm][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Tm];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjTp(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjTp(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
 | 
			
		||||
    accumReconTp(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Xm
 | 
			
		||||
    offset = Stencil._offsets [Xp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Xp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Xp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Xp];
 | 
			
		||||
 | 
			
		||||
    if ( local && perm ) 
 | 
			
		||||
    {
 | 
			
		||||
      spProjXm(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjXm(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
 | 
			
		||||
    accumReconXm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Ym
 | 
			
		||||
    offset = Stencil._offsets [Yp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Yp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Yp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Yp];
 | 
			
		||||
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjYm(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjYm(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
 | 
			
		||||
    accumReconYm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Zm
 | 
			
		||||
    offset = Stencil._offsets [Zp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Zp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Zp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Zp];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjZm(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjZm(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
 | 
			
		||||
    accumReconZm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    // Tm
 | 
			
		||||
    offset = Stencil._offsets [Tp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Tp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Tp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Tp];
 | 
			
		||||
    if ( local && perm ) {
 | 
			
		||||
      spProjTm(tmp,in._odata[offset]);
 | 
			
		||||
      permute(chi,tmp,ptype);
 | 
			
		||||
    } else if ( local ) {
 | 
			
		||||
      spProjTm(chi,in._odata[offset]);
 | 
			
		||||
    } else { 
 | 
			
		||||
      chi=comm_buf[offset];
 | 
			
		||||
    }
 | 
			
		||||
    mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
 | 
			
		||||
    accumReconTm(result,Uchi);
 | 
			
		||||
 | 
			
		||||
    vstream(out._odata[ss],result);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
 | 
			
		||||
{
 | 
			
		||||
  assert((dag==0) ||(dag==1));
 | 
			
		||||
 | 
			
		||||
  WilsonCompressor compressor(dag);
 | 
			
		||||
  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
 | 
			
		||||
 | 
			
		||||
  if ( dag ) {
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int sss=0;sss<grid->oSites();sss++){
 | 
			
		||||
      DhopSiteDag(sss,in,out);
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int sss=0;sss<grid->oSites();sss++){
 | 
			
		||||
      DhopSite(sss,in,out);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -46,6 +46,8 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      // non-hermitian hopping term; half cb or both
 | 
			
		||||
      void Dhop(const LatticeFermion &in, LatticeFermion &out,int dag);
 | 
			
		||||
      void DhopSite   (int ss,const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
      void DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
 | 
			
		||||
      typedef iScalar<iMatrix<vComplex, Nc> > matrix;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user