{ int locala,perma, ptypea; int localb,permb, ptypeb; int localc,permc, ptypec; uint64_t basea, baseb, basec; uint64_t basex; const uint64_t plocal =(uint64_t) & in._odata[0]; // vComplexF isigns[2] = { signs[0], signs[1] }; vComplexF *isigns = &signs[0]; MASK_REGS; for(int site=0;site shuffle and xor the real part sign bit YM_PROJMEM(baseb); MAYBEPERM(PERMUTE_DIR2,permb); } else { LOAD_CHI(baseb); } { MULT_2SPIN_DIR_PFYP(Yp,basec); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit YM_RECON_ACCUM; //////////////////////////////// // Zp //////////////////////////////// baseb = st.GetInfo(ptypeb,localb,permb,Yp,ent,plocal); ent++; PREFETCH_CHIMU(baseb); label(FX(ZP) ); if ( localc ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit ZM_PROJMEM(basec); MAYBEPERM(PERMUTE_DIR1,permc); } else { LOAD_CHI(basec); } { MULT_2SPIN_DIR_PFZP(Zp,basea); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit ZM_RECON_ACCUM; //////////////////////////////// // Tp //////////////////////////////// basec = st.GetInfo(ptypec,localc,permc,Xp,ent,plocal); ent++; PREFETCH_CHIMU(basec); label(FX(TP) ); if ( locala ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit TM_PROJMEM(basea); MAYBEPERM(PERMUTE_DIR0,perma); } else { LOAD_CHI(basea); } { MULT_2SPIN_DIR_PFTP(Tp,baseb); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit TM_RECON_ACCUM; //////////////////////////////// // Xm //////////////////////////////// basea = st.GetInfo(ptypea,locala,perma,Yp,ent,plocal); ent++; PREFETCH_CHIMU(basea); label(FX(XM) ); if ( localb ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit XP_PROJMEM(baseb); MAYBEPERM(PERMUTE_DIR3,permb); } else { LOAD_CHI(baseb); } { MULT_2SPIN_DIR_PFXM(Xm,basec); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit XP_RECON_ACCUM; //////////////////////////////// // Ym //////////////////////////////// baseb = st.GetInfo(ptypeb,localb,permb,Xp,ent,plocal); ent++; PREFETCH_CHIMU(baseb); label(FX(YM) ); if ( localc ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit YP_PROJMEM(basec); MAYBEPERM(PERMUTE_DIR2,permc); } else { LOAD_CHI(basec); } { MULT_2SPIN_DIR_PFYM(Ym,basea); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit YP_RECON_ACCUM; //////////////////////////////// // Zm //////////////////////////////// basec = st.GetInfo(ptypec,localc,permc,Yp,ent,plocal); ent++; PREFETCH_CHIMU(basec); label(FX(ZM) ); if ( locala ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit ZP_PROJMEM(basea); MAYBEPERM(PERMUTE_DIR1,perma); } else { LOAD_CHI(basea); } { MULT_2SPIN_DIR_PFZM(Zm,baseb); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit ZP_RECON_ACCUM; //////////////////////////////// // Tm //////////////////////////////// basea = (uint64_t)&out._odata[ss]; PREFETCH_CHIMU(basea); label(FX(TM) ); if ( localb ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit TP_PROJMEM(baseb); MAYBEPERM(PERMUTE_DIR0,permb); } else { LOAD_CHI(baseb); } { MULT_2SPIN_DIR_PFTM(Tm,basec); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit TP_RECON_ACCUM; // PREFETCH_CHIMU(basex); label(FX(SAV) ); SAVE_RESULT(&out._odata[ss]); } ssU++; } }