mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-24 12:45:56 +01:00
first try at A2A four quark offload
This commit is contained in:
parent
3276aa67dc
commit
6d0a907c5c
@ -1119,33 +1119,39 @@ void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWV
|
|||||||
assert(gamma0.size()==gamma1.size());
|
assert(gamma0.size()==gamma1.size());
|
||||||
int Ng = gamma0.size();
|
int Ng = gamma0.size();
|
||||||
|
|
||||||
|
// Make device accessible copy
|
||||||
|
Vector<Gamma> Gamma0v (Ng);
|
||||||
|
Vector<Gamma> Gamma1v (Ng);
|
||||||
|
Gamma *Gamma0 = & Gamma0v[0];
|
||||||
|
Gamma *Gamma1 = & Gamma1v[0];
|
||||||
|
for(int g=0;g<Ng;g++) {
|
||||||
|
Gamma0[g]=gamma0[g];
|
||||||
|
Gamma1[g]=gamma1[g];
|
||||||
|
}
|
||||||
|
|
||||||
GridBase *grid = WWVV0.Grid();
|
GridBase *grid = WWVV0.Grid();
|
||||||
|
|
||||||
autoView(WWVV0_v , WWVV0,CpuRead);
|
typedef typename ComplexField::vector_object vobj;
|
||||||
autoView(WWVV1_v , WWVV1,CpuRead);
|
autoView(WWVV0_v , WWVV0,AcceleratorRead);
|
||||||
autoView(O_trtr_v, O_trtr,CpuWrite);
|
autoView(WWVV1_v , WWVV1,AcceleratorRead);
|
||||||
autoView(O_fig8_v, O_fig8,CpuWrite);
|
autoView(O_trtr_v, O_trtr,AcceleratorWrite);
|
||||||
thread_for(ss,grid->oSites(),{
|
autoView(O_fig8_v, O_fig8,AcceleratorWrite);
|
||||||
|
accelerator_for(ss,grid->oSites(),vobj::Nsimd(),{
|
||||||
|
|
||||||
typedef typename ComplexField::vector_object vobj;
|
auto VV0 = WWVV0_v(ss);
|
||||||
|
auto VV1 = WWVV1_v(ss);
|
||||||
vobj v_trtr;
|
|
||||||
vobj v_fig8;
|
|
||||||
|
|
||||||
auto VV0 = WWVV0_v[ss];
|
|
||||||
auto VV1 = WWVV1_v[ss];
|
|
||||||
|
|
||||||
for(int g=0;g<Ng;g++){
|
for(int g=0;g<Ng;g++){
|
||||||
|
|
||||||
v_trtr = trace(VV0 * gamma0[g])* trace(VV1*gamma1[g]);
|
auto v_trtr = trace(VV0 * gamma0[g])* trace(VV1*gamma1[g]);
|
||||||
v_fig8 = trace(VV0 * gamma0[g] * VV1 * gamma1[g]);
|
auto v_fig8 = trace(VV0 * gamma0[g] * VV1 * gamma1[g]);
|
||||||
|
|
||||||
if ( g==0 ) {
|
if ( g==0 ) {
|
||||||
O_trtr_v[ss] = v_trtr;
|
coalescedWrite(O_trtr_v[ss], v_trtr);
|
||||||
O_fig8_v[ss] = v_fig8;
|
coalescedWrite(O_fig8_v[ss], v_fig8);
|
||||||
} else {
|
} else {
|
||||||
O_trtr_v[ss]+= v_trtr;
|
coalescedWrite(O_trtr_v[ss], O_trtr_v(ss)+v_trtr);
|
||||||
O_fig8_v[ss]+= v_fig8;
|
coalescedWrite(O_fig8_v[ss], O_fig8_v(ss)+v_fig8);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1165,25 +1171,36 @@ void A2Autils<FImpl>::ContractFourQuarkColourMix(const PropagatorField &WWVV0,
|
|||||||
|
|
||||||
GridBase *grid = WWVV0.Grid();
|
GridBase *grid = WWVV0.Grid();
|
||||||
|
|
||||||
autoView( WWVV0_v , WWVV0,CpuRead);
|
// Make device accessible copy
|
||||||
autoView( WWVV1_v , WWVV1,CpuRead);
|
Vector<Gamma> Gamma0v (Ng);
|
||||||
autoView( O_trtr_v, O_trtr,CpuWrite);
|
Vector<Gamma> Gamma1v (Ng);
|
||||||
autoView( O_fig8_v, O_fig8,CpuWrite);
|
Gamma *Gamma0 = & Gamma0v[0];
|
||||||
|
Gamma *Gamma1 = & Gamma1v[0];
|
||||||
|
for(int g=0;g<Ng;g++) {
|
||||||
|
Gamma0[g]=gamma0[g];
|
||||||
|
Gamma1[g]=gamma1[g];
|
||||||
|
}
|
||||||
|
|
||||||
thread_for(ss,grid->oSites(),{
|
autoView( WWVV0_v , WWVV0,AcceleratorRead);
|
||||||
|
autoView( WWVV1_v , WWVV1,AcceleratorRead);
|
||||||
|
autoView( O_trtr_v, O_trtr,AcceleratorWrite);
|
||||||
|
autoView( O_fig8_v, O_fig8,AcceleratorWrite);
|
||||||
|
|
||||||
typedef typename ComplexField::vector_object vobj;
|
typedef typename ComplexField::vector_object vobj;
|
||||||
|
accelerator_for(ss,grid->oSites(),vobj::Nsimd(),{
|
||||||
|
|
||||||
|
auto VV0 = WWVV0_v(ss);
|
||||||
|
auto VV1 = WWVV1_v(ss);
|
||||||
|
|
||||||
|
typdef decltype(trace(VV0)) scalar;
|
||||||
|
|
||||||
auto VV0 = WWVV0_v[ss];
|
|
||||||
auto VV1 = WWVV1_v[ss];
|
|
||||||
|
|
||||||
for(int g=0;g<Ng;g++){
|
for(int g=0;g<Ng;g++){
|
||||||
|
|
||||||
auto VV0G = VV0 * gamma0[g]; // Spin multiply
|
auto VV0G = VV0 * gamma0[g]; // Spin multiply
|
||||||
auto VV1G = VV1 * gamma1[g];
|
auto VV1G = VV1 * gamma1[g];
|
||||||
|
|
||||||
vobj v_trtr=Zero();
|
scalar v_trtr=Zero();
|
||||||
vobj v_fig8=Zero();
|
scalar v_fig8=Zero();
|
||||||
|
|
||||||
/////////////////////////////////////////
|
/////////////////////////////////////////
|
||||||
// Colour mixed
|
// Colour mixed
|
||||||
@ -1197,7 +1214,7 @@ void A2Autils<FImpl>::ContractFourQuarkColourMix(const PropagatorField &WWVV0,
|
|||||||
// Wick1 [ spin TR TR ]
|
// Wick1 [ spin TR TR ]
|
||||||
//
|
//
|
||||||
// (VV0*G0)_ss,ba . (VV1*G1)_tt,ab
|
// (VV0*G0)_ss,ba . (VV1*G1)_tt,ab
|
||||||
//
|
//
|
||||||
// Wick2 [ spin fig8 ]
|
// Wick2 [ spin fig8 ]
|
||||||
//
|
//
|
||||||
// (VV0*G0)_st,aa (VV1*G1)_ts,bb
|
// (VV0*G0)_st,aa (VV1*G1)_ts,bb
|
||||||
@ -1234,11 +1251,11 @@ Bag [8,4] fig8 (-227.58,3.58808e-17) trtr (-32.5776,1.83286e-17) // - 1602
|
|||||||
}}}}
|
}}}}
|
||||||
|
|
||||||
if ( g==0 ) {
|
if ( g==0 ) {
|
||||||
O_trtr_v[ss] = v_trtr;
|
coalescedWrite(O_trtr_v[ss] , v_trtr);
|
||||||
O_fig8_v[ss] = v_fig8;
|
coalescedWrite(O_fig8_v[ss] , v_fig8);
|
||||||
} else {
|
} else {
|
||||||
O_trtr_v[ss]+= v_trtr;
|
coalescedWrite(O_trtr_v[ss],O_trtr_v(ss) + v_trtr);
|
||||||
O_fig8_v[ss]+= v_fig8;
|
coalescedWrite(O_fig8_v[ss],O_fig8_v(ss) + v_fig8;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user