1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-25 02:04:48 +01:00

Compare commits

..

5 Commits

Author SHA1 Message Date
Peter Boyle
d8c0c0ba0a Fix and compiles 2020-08-12 14:35:08 -04:00
Peter Boyle
c6cf918d4c Typo 2020-08-12 14:24:39 -04:00
Peter Boyle
6d0a907c5c first try at A2A four quark offload 2020-08-12 14:17:46 -04:00
Peter Boyle
3276aa67dc Update 2020-08-12 14:15:53 -04:00
Peter Boyle
7cf7f11e1a Doc recompile 2020-07-22 14:44:11 -04:00
3 changed files with 52 additions and 35 deletions

View File

@@ -1119,33 +1119,39 @@ void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWV
assert(gamma0.size()==gamma1.size()); assert(gamma0.size()==gamma1.size());
int Ng = gamma0.size(); int Ng = gamma0.size();
// Make device accessible copy
Vector<Gamma> Gamma0v (Ng);
Vector<Gamma> Gamma1v (Ng);
Gamma *Gamma0 = & Gamma0v[0];
Gamma *Gamma1 = & Gamma1v[0];
for(int g=0;g<Ng;g++) {
Gamma0[g]=gamma0[g];
Gamma1[g]=gamma1[g];
}
GridBase *grid = WWVV0.Grid(); GridBase *grid = WWVV0.Grid();
autoView(WWVV0_v , WWVV0,CpuRead); typedef typename ComplexField::vector_object vobj;
autoView(WWVV1_v , WWVV1,CpuRead); autoView(WWVV0_v , WWVV0,AcceleratorRead);
autoView(O_trtr_v, O_trtr,CpuWrite); autoView(WWVV1_v , WWVV1,AcceleratorRead);
autoView(O_fig8_v, O_fig8,CpuWrite); autoView(O_trtr_v, O_trtr,AcceleratorWrite);
thread_for(ss,grid->oSites(),{ autoView(O_fig8_v, O_fig8,AcceleratorWrite);
accelerator_for(ss,grid->oSites(),vobj::Nsimd(),{
typedef typename ComplexField::vector_object vobj; auto VV0 = WWVV0_v(ss);
auto VV1 = WWVV1_v(ss);
vobj v_trtr;
vobj v_fig8;
auto VV0 = WWVV0_v[ss];
auto VV1 = WWVV1_v[ss];
for(int g=0;g<Ng;g++){ for(int g=0;g<Ng;g++){
v_trtr = trace(VV0 * gamma0[g])* trace(VV1*gamma1[g]); auto v_trtr = trace(VV0 * gamma0[g])* trace(VV1*gamma1[g]);
v_fig8 = trace(VV0 * gamma0[g] * VV1 * gamma1[g]); auto v_fig8 = trace(VV0 * gamma0[g] * VV1 * gamma1[g]);
if ( g==0 ) { if ( g==0 ) {
O_trtr_v[ss] = v_trtr; coalescedWrite(O_trtr_v[ss], v_trtr);
O_fig8_v[ss] = v_fig8; coalescedWrite(O_fig8_v[ss], v_fig8);
} else { } else {
O_trtr_v[ss]+= v_trtr; coalescedWrite(O_trtr_v[ss], O_trtr_v(ss)+v_trtr);
O_fig8_v[ss]+= v_fig8; coalescedWrite(O_fig8_v[ss], O_fig8_v(ss)+v_fig8);
} }
} }
@@ -1165,25 +1171,36 @@ void A2Autils<FImpl>::ContractFourQuarkColourMix(const PropagatorField &WWVV0,
GridBase *grid = WWVV0.Grid(); GridBase *grid = WWVV0.Grid();
autoView( WWVV0_v , WWVV0,CpuRead); // Make device accessible copy
autoView( WWVV1_v , WWVV1,CpuRead); Vector<Gamma> Gamma0v (Ng);
autoView( O_trtr_v, O_trtr,CpuWrite); Vector<Gamma> Gamma1v (Ng);
autoView( O_fig8_v, O_fig8,CpuWrite); Gamma *Gamma0 = & Gamma0v[0];
Gamma *Gamma1 = & Gamma1v[0];
for(int g=0;g<Ng;g++) {
Gamma0[g]=gamma0[g];
Gamma1[g]=gamma1[g];
}
thread_for(ss,grid->oSites(),{ autoView( WWVV0_v , WWVV0,AcceleratorRead);
autoView( WWVV1_v , WWVV1,AcceleratorRead);
autoView( O_trtr_v, O_trtr,AcceleratorWrite);
autoView( O_fig8_v, O_fig8,AcceleratorWrite);
typedef typename ComplexField::vector_object vobj; typedef typename ComplexField::vector_object vobj;
accelerator_for(ss,grid->oSites(),vobj::Nsimd(),{
auto VV0 = WWVV0_v[ss]; auto VV0 = WWVV0_v(ss);
auto VV1 = WWVV1_v[ss]; auto VV1 = WWVV1_v(ss);
typedef decltype(trace(VV0)) scalar;
for(int g=0;g<Ng;g++){ for(int g=0;g<Ng;g++){
auto VV0G = VV0 * gamma0[g]; // Spin multiply auto VV0G = VV0 * gamma0[g]; // Spin multiply
auto VV1G = VV1 * gamma1[g]; auto VV1G = VV1 * gamma1[g];
vobj v_trtr=Zero(); scalar v_trtr=Zero();
vobj v_fig8=Zero(); scalar v_fig8=Zero();
///////////////////////////////////////// /////////////////////////////////////////
// Colour mixed // Colour mixed
@@ -1197,7 +1214,7 @@ void A2Autils<FImpl>::ContractFourQuarkColourMix(const PropagatorField &WWVV0,
// Wick1 [ spin TR TR ] // Wick1 [ spin TR TR ]
// //
// (VV0*G0)_ss,ba . (VV1*G1)_tt,ab // (VV0*G0)_ss,ba . (VV1*G1)_tt,ab
// //
// Wick2 [ spin fig8 ] // Wick2 [ spin fig8 ]
// //
// (VV0*G0)_st,aa (VV1*G1)_ts,bb // (VV0*G0)_st,aa (VV1*G1)_ts,bb
@@ -1234,11 +1251,11 @@ Bag [8,4] fig8 (-227.58,3.58808e-17) trtr (-32.5776,1.83286e-17) // - 1602
}}}} }}}}
if ( g==0 ) { if ( g==0 ) {
O_trtr_v[ss] = v_trtr; coalescedWrite(O_trtr_v[ss] , v_trtr);
O_fig8_v[ss] = v_fig8; coalescedWrite(O_fig8_v[ss] , v_fig8);
} else { } else {
O_trtr_v[ss]+= v_trtr; coalescedWrite(O_trtr_v[ss],O_trtr_v(ss) + v_trtr);
O_fig8_v[ss]+= v_fig8; coalescedWrite(O_fig8_v[ss],O_fig8_v(ss) + v_fig8);
} }
} }

Binary file not shown.

View File

@@ -20,7 +20,7 @@
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
project = 'Grid' project = 'Grid'
copyright = '2018, Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi' copyright = '2019, Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi'
author = 'Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi' author = 'Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi'
# The short X.Y version # The short X.Y version