From 6d0a907c5c2d7c69b2d46f475a222d686b2e2d13 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 12 Aug 2020 14:17:46 -0400 Subject: [PATCH] first try at A2A four quark offload --- Grid/qcd/utils/A2Autils.h | 85 +++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index b63d8571..328cb1cb 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -1119,33 +1119,39 @@ void A2Autils::ContractFourQuarkColourDiagonal(const PropagatorField &WWV assert(gamma0.size()==gamma1.size()); int Ng = gamma0.size(); + // Make device accessible copy + Vector Gamma0v (Ng); + Vector Gamma1v (Ng); + Gamma *Gamma0 = & Gamma0v[0]; + Gamma *Gamma1 = & Gamma1v[0]; + for(int g=0;goSites(),{ + typedef typename ComplexField::vector_object vobj; + autoView(WWVV0_v , WWVV0,AcceleratorRead); + autoView(WWVV1_v , WWVV1,AcceleratorRead); + autoView(O_trtr_v, O_trtr,AcceleratorWrite); + autoView(O_fig8_v, O_fig8,AcceleratorWrite); + accelerator_for(ss,grid->oSites(),vobj::Nsimd(),{ - typedef typename ComplexField::vector_object vobj; - - vobj v_trtr; - vobj v_fig8; - - auto VV0 = WWVV0_v[ss]; - auto VV1 = WWVV1_v[ss]; + auto VV0 = WWVV0_v(ss); + auto VV1 = WWVV1_v(ss); for(int g=0;g::ContractFourQuarkColourMix(const PropagatorField &WWVV0, GridBase *grid = WWVV0.Grid(); - autoView( WWVV0_v , WWVV0,CpuRead); - autoView( WWVV1_v , WWVV1,CpuRead); - autoView( O_trtr_v, O_trtr,CpuWrite); - autoView( O_fig8_v, O_fig8,CpuWrite); + // Make device accessible copy + Vector Gamma0v (Ng); + Vector Gamma1v (Ng); + Gamma *Gamma0 = & Gamma0v[0]; + Gamma *Gamma1 = & Gamma1v[0]; + for(int g=0;goSites(),{ + autoView( WWVV0_v , WWVV0,AcceleratorRead); + autoView( WWVV1_v , WWVV1,AcceleratorRead); + autoView( O_trtr_v, O_trtr,AcceleratorWrite); + autoView( O_fig8_v, O_fig8,AcceleratorWrite); - typedef typename ComplexField::vector_object vobj; + typedef typename ComplexField::vector_object vobj; + accelerator_for(ss,grid->oSites(),vobj::Nsimd(),{ + + auto VV0 = WWVV0_v(ss); + auto VV1 = WWVV1_v(ss); + + typdef decltype(trace(VV0)) scalar; - auto VV0 = WWVV0_v[ss]; - auto VV1 = WWVV1_v[ss]; - for(int g=0;g::ContractFourQuarkColourMix(const PropagatorField &WWVV0, // Wick1 [ spin TR TR ] // // (VV0*G0)_ss,ba . (VV1*G1)_tt,ab - // + // // Wick2 [ spin fig8 ] // // (VV0*G0)_st,aa (VV1*G1)_ts,bb @@ -1234,11 +1251,11 @@ Bag [8,4] fig8 (-227.58,3.58808e-17) trtr (-32.5776,1.83286e-17) // - 1602 }}}} if ( g==0 ) { - O_trtr_v[ss] = v_trtr; - O_fig8_v[ss] = v_fig8; + coalescedWrite(O_trtr_v[ss] , v_trtr); + coalescedWrite(O_fig8_v[ss] , v_fig8); } else { - O_trtr_v[ss]+= v_trtr; - O_fig8_v[ss]+= v_fig8; + coalescedWrite(O_trtr_v[ss],O_trtr_v(ss) + v_trtr); + coalescedWrite(O_fig8_v[ss],O_fig8_v(ss) + v_fig8; } }