#ifdef KERNEL_DAG #define DIR0_PROJMEM(base) XP_PROJMEM(base); #define DIR1_PROJMEM(base) YP_PROJMEM(base); #define DIR2_PROJMEM(base) ZP_PROJMEM(base); #define DIR3_PROJMEM(base) TP_PROJMEM(base); #define DIR4_PROJMEM(base) XM_PROJMEM(base); #define DIR5_PROJMEM(base) YM_PROJMEM(base); #define DIR6_PROJMEM(base) ZM_PROJMEM(base); #define DIR7_PROJMEM(base) TM_PROJMEM(base); #define DIR0_RECON XP_RECON #define DIR1_RECON YP_RECON_ACCUM #define DIR2_RECON ZP_RECON_ACCUM #define DIR3_RECON TP_RECON_ACCUM #define DIR4_RECON XM_RECON_ACCUM #define DIR5_RECON YM_RECON_ACCUM #define DIR6_RECON ZM_RECON_ACCUM #define DIR7_RECON TM_RECON_ACCUM #else #define DIR0_PROJMEM(base) XM_PROJMEM(base); #define DIR1_PROJMEM(base) YM_PROJMEM(base); #define DIR2_PROJMEM(base) ZM_PROJMEM(base); #define DIR3_PROJMEM(base) TM_PROJMEM(base); #define DIR4_PROJMEM(base) XP_PROJMEM(base); #define DIR5_PROJMEM(base) YP_PROJMEM(base); #define DIR6_PROJMEM(base) ZP_PROJMEM(base); #define DIR7_PROJMEM(base) TP_PROJMEM(base); #define DIR0_RECON XM_RECON #define DIR1_RECON YM_RECON_ACCUM #define DIR2_RECON ZM_RECON_ACCUM #define DIR3_RECON TM_RECON_ACCUM #define DIR4_RECON XP_RECON_ACCUM #define DIR5_RECON YP_RECON_ACCUM #define DIR6_RECON ZP_RECON_ACCUM #define DIR7_RECON TP_RECON_ACCUM #endif //////////////////////////////////////////////////////////////////////////////// // Comms then compute kernel //////////////////////////////////////////////////////////////////////////////// #ifdef INTERIOR_AND_EXTERIOR #define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \ basep = st.GetPFInfo(nent,plocal); nent++; \ if ( local ) { \ LOAD64(%r10,isigns); \ PROJ(base); \ MAYBEPERM(PERMUTE_DIR,perm); \ } else { \ LOAD_CHI(base); \ } \ base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \ PREFETCH_CHIMU(base); \ MULT_2SPIN_DIR_PF(Dir,basep); \ LOAD64(%r10,isigns); \ RECON; \ #define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \ base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \ PF_GAUGE(Xp); \ PREFETCH1_CHIMU(base); \ ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) #define RESULT(base,basep) SAVE_RESULT(base,basep); #endif //////////////////////////////////////////////////////////////////////////////// // Pre comms kernel -- prefetch like normal because it is mostly right //////////////////////////////////////////////////////////////////////////////// #ifdef INTERIOR #define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \ basep = st.GetPFInfo(nent,plocal); nent++; \ if ( local ) { \ LOAD64(%r10,isigns); \ PROJ(base); \ MAYBEPERM(PERMUTE_DIR,perm); \ }else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \ if ( local || st.same_node[Dir] ) { \ MULT_2SPIN_DIR_PF(Dir,basep); \ LOAD64(%r10,isigns); \ RECON; \ } \ base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \ PREFETCH_CHIMU(base); \ #define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \ base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \ PF_GAUGE(Xp); \ PREFETCH1_CHIMU(base); \ { ZERO_PSI; } \ ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) #define RESULT(base,basep) SAVE_RESULT(base,basep); #endif //////////////////////////////////////////////////////////////////////////////// // Post comms kernel //////////////////////////////////////////////////////////////////////////////// #ifdef EXTERIOR #define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \ base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \ if((!local)&&(!st.same_node[Dir]) ) { \ LOAD_CHI(base); \ MULT_2SPIN_DIR_PF(Dir,base); \ LOAD64(%r10,isigns); \ RECON; \ nmu++; \ } #define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \ nmu=0; \ { ZERO_PSI;} \ base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \ if((!local)&&(!st.same_node[Dir]) ) { \ LOAD_CHI(base); \ MULT_2SPIN_DIR_PF(Dir,base); \ LOAD64(%r10,isigns); \ RECON; \ nmu++; \ } #define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);} #endif { int nmu; int local,perm, ptype; uint64_t base; uint64_t basep; const uint64_t plocal =(uint64_t) & in._odata[0]; COMPLEX_SIGNS(isigns); MASK_REGS; int nmax=U._grid->oSites(); for(int site=0;site=nmax) ssn=0; int sUn=lo.Reorder(ssn); LOCK_GAUGE(0); #else int sU =ssU; int ssn=ssU+1; if(ssn>=nmax) ssn=0; int sUn=ssn; #endif for(int s=0;s