1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Optimise lie algebra project

This commit is contained in:
Peter Boyle 2024-09-19 15:48:09 -04:00
parent da919949f9
commit bffd30abec
2 changed files with 22 additions and 22 deletions

View File

@ -418,32 +418,32 @@ static void LieAlgebraProject(LatticeAlgebraMatrix &out,const LatticeMatrix &in,
int hNNm1= NNm1/2; int hNNm1= NNm1/2;
RealD sqrt_2 = sqrt(2.0); RealD sqrt_2 = sqrt(2.0);
Complex ci(0.0,1.0); Complex ci(0.0,1.0);
for(int su2Index=0;su2Index<hNNm1;su2Index++){
int i1, i2; const int nsimd= Matrix::Nsimd();
su2SubGroupIndex(i1, i2, su2Index); accelerator_for(ss,grid->oSites(),nsimd,{
int ax = su2Index*2; for(int su2Index=0;su2Index<hNNm1;su2Index++){
int ay = su2Index*2+1; int i1, i2;
accelerator_for(ss,grid->oSites(),1,{ su2SubGroupIndex(i1, i2, su2Index);
int ax = su2Index*2;
int ay = su2Index*2+1;
// in is traceless ANTI-hermitian whereas Grid generators are Hermitian. // in is traceless ANTI-hermitian whereas Grid generators are Hermitian.
// trace( Ta x Ci in) // trace( Ta x Ci in)
// Bet I need to move to real part with mult by -i // Bet I need to move to real part with mult by -i
out_v[ss]()()(ax,b) = 0.5*(real(in_v[ss]()()(i2,i1)) - real(in_v[ss]()()(i1,i2))); coalescedWrite(out_v[ss]()()(ax,b),0.5*(real(in_v(ss)()()(i2,i1)) - real(in_v(ss)()()(i1,i2))));
out_v[ss]()()(ay,b) = 0.5*(imag(in_v[ss]()()(i1,i2)) + imag(in_v[ss]()()(i2,i1))); coalescedWrite(out_v[ss]()()(ay,b),0.5*(imag(in_v(ss)()()(i1,i2)) + imag(in_v(ss)()()(i2,i1))));
}); }
} for(int diagIndex=0;diagIndex<N-1;diagIndex++){
for(int diagIndex=0;diagIndex<N-1;diagIndex++){ int k = diagIndex + 1; // diagIndex starts from 0
int k = diagIndex + 1; // diagIndex starts from 0 int a = NNm1+diagIndex;
int a = NNm1+diagIndex; RealD scale = 1.0/sqrt(2.0*k*(k+1));
RealD scale = 1.0/sqrt(2.0*k*(k+1)); auto tmp = in_v(ss)()()(0,0);
accelerator_for(ss,grid->oSites(),vComplex::Nsimd(),{
auto tmp = in_v[ss]()()(0,0);
for(int i=1;i<k;i++){ for(int i=1;i<k;i++){
tmp=tmp+in_v[ss]()()(i,i); tmp=tmp+in_v(ss)()()(i,i);
} }
tmp = tmp - in_v[ss]()()(k,k)*k; tmp = tmp - in_v(ss)()()(k,k)*k;
out_v[ss]()()(a,b) =imag(tmp) * scale; coalescedWrite(out_v[ss]()()(a,b),imag(tmp) * scale);
}); }
} });
} }

View File

@ -118,7 +118,7 @@ static void generatorDiagonal(int diagIndex, iGroupMatrix<cplx> &ta) {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero // Map a su2 subgroup number to the pair of rows that are non zero
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) { static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) {
assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2)); assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2));
int spare = su2_index; int spare = su2_index;