mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
3e0eff6468
@ -55,7 +55,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
uint64_t lmax=96;
|
uint64_t lmax=64;
|
||||||
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
||||||
for(int lat=8;lat<=lmax;lat+=8){
|
for(int lat=8;lat<=lmax;lat+=8){
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ using namespace Grid::QCD;
|
|||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
#define LMAX (16)
|
#define LMAX (32)
|
||||||
#define LMIN (16)
|
#define LMIN (16)
|
||||||
#define LINC (4)
|
#define LINC (4)
|
||||||
|
|
||||||
@ -46,14 +46,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int64_t threads = GridThread::GetThreads();
|
int64_t threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
#if 1
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=4;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -85,7 +85,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -116,7 +116,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -147,7 +147,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -171,8 +171,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
||||||
@ -193,19 +191,63 @@ int main (int argc, char ** argv)
|
|||||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
for(int mu=0;mu<4;mu++){
|
||||||
|
double start=usecond();
|
||||||
|
for(int64_t i=0;i<Nloop;i++){
|
||||||
|
z = PeriodicBC::CovShiftForward(x,mu,y);
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
double time = (stop-start)/Nloop*1000.0;
|
||||||
|
|
||||||
|
|
||||||
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if 1
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
|
|
||||||
|
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||||
|
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||||
|
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||||
|
LatticeColourMatrix tmp(&Grid);
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
double tshift=0;
|
||||||
|
double tmult =0;
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int64_t i=0;i<Nloop;i++){
|
for(int64_t i=0;i<Nloop;i++){
|
||||||
z = PeriodicBC::CovShiftForward(x,mu,y);
|
tshift-=usecond();
|
||||||
|
tmp = Cshift(y,mu,-1);
|
||||||
|
tshift+=usecond();
|
||||||
|
tmult-=usecond();
|
||||||
|
z = x*tmp;
|
||||||
|
tmult+=usecond();
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000.0;
|
double time = (stop-start)/Nloop;
|
||||||
|
tshift = tshift/Nloop;
|
||||||
|
tmult = tmult /Nloop;
|
||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
|
||||||
|
time = time * 1000; // convert to NS for GB/s
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -277,7 +277,9 @@ public:
|
|||||||
uint8_t *cp = (uint8_t *)ptr;
|
uint8_t *cp = (uint8_t *)ptr;
|
||||||
if ( ptr ) {
|
if ( ptr ) {
|
||||||
// One touch per 4k page, static OMP loop to catch same loop order
|
// One touch per 4k page, static OMP loop to catch same loop order
|
||||||
|
#ifdef GRID_OMP
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
|
#endif
|
||||||
for(size_type n=0;n<bytes;n+=4096){
|
for(size_type n=0;n<bytes;n+=4096){
|
||||||
cp[n]=0;
|
cp[n]=0;
|
||||||
}
|
}
|
||||||
|
@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
int ent = 0;
|
||||||
|
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int bo = n*e2;
|
int bo = n*e2;
|
||||||
buffer[off+bo+b]=rhs._odata[so+o+b];
|
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
std::vector<std::pair<int,int> > table;
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parallel_for(int i=0;i<table.size();i++){
|
}
|
||||||
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
|
parallel_for(int i=0;i<ent;i++){
|
||||||
}
|
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
int ent =0;
|
||||||
|
|
||||||
if ( cbmask ==0x3 ) {
|
if ( cbmask ==0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int bo =n*rhs._grid->_slice_block[dimension];
|
int bo =n*rhs._grid->_slice_block[dimension];
|
||||||
rhs._odata[so+o+b]=buffer[bo+b];
|
table[ent++] = std::pair<int,int>(so+o+b,bo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
std::vector<std::pair<int,int> > table;
|
|
||||||
int bo=0;
|
int bo=0;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
if ( ocb & cbmask ) {
|
if ( ocb & cbmask ) {
|
||||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parallel_for(int i=0;i<table.size();i++){
|
}
|
||||||
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
|
|
||||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
parallel_for(int i=0;i<ent;i++){
|
||||||
}
|
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
int stride = rhs._grid->_slice_stride[dimension];
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
int ent=0;
|
||||||
|
|
||||||
if(cbmask == 0x3 ){
|
if(cbmask == 0x3 ){
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) {
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||||
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
int e2=rhs._grid->_slice_block [dimension];
|
int e2=rhs._grid->_slice_block [dimension];
|
||||||
int stride = rhs._grid->_slice_stride[dimension];
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
for(int b=0;b<e2;b++){
|
int ent=0;
|
||||||
|
|
||||||
|
double t_tab,t_perm;
|
||||||
|
if ( cbmask == 0x3 ) {
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o =n*stride;
|
||||||
|
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
|
}}
|
||||||
|
} else {
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*stride;
|
int o =n*stride;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
double t_local;
|
||||||
|
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||||
{
|
{
|
||||||
GridBase *grid = rhs._grid;
|
GridBase *grid = rhs._grid;
|
||||||
int fd = grid->_fdimensions[dimension];
|
int fd = grid->_fdimensions[dimension];
|
||||||
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
|
|
||||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
// FIXME : This must change where we have a
|
|
||||||
// Rotate slice.
|
|
||||||
|
|
||||||
// Document how this works ; why didn't I do this when I first wrote it...
|
|
||||||
// wrap is whether sshift > rd.
|
// wrap is whether sshift > rd.
|
||||||
// num is sshift mod rd.
|
// num is sshift mod rd.
|
||||||
//
|
//
|
||||||
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
|
|
||||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -256,13 +256,42 @@ public:
|
|||||||
_odata[ss]=r._odata[ss];
|
_odata[ss]=r._odata[ss];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Lattice(Lattice&& r){ // move constructor
|
Lattice(Lattice&& r){ // move constructor
|
||||||
_grid = r._grid;
|
_grid = r._grid;
|
||||||
checkerboard = r.checkerboard;
|
checkerboard = r.checkerboard;
|
||||||
_odata=std::move(r._odata);
|
_odata=std::move(r._odata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||||
|
{
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata =std::move(r._odata);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
|
this->checkerboard = r.checkerboard;
|
||||||
|
conformable(*this,r);
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
this->_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
void reset(GridBase* grid) {
|
void reset(GridBase* grid) {
|
||||||
@ -281,33 +310,6 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
|
||||||
this->checkerboard = r.checkerboard;
|
|
||||||
conformable(*this,r);
|
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
|
||||||
this->_odata[ss]=r._odata[ss];
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
|
||||||
_grid = r._grid;
|
|
||||||
checkerboard = r.checkerboard;
|
|
||||||
_odata.resize(_grid->oSites());// essential
|
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
|
||||||
_odata[ss]=r._odata[ss];
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
|
||||||
{
|
|
||||||
_grid = r._grid;
|
|
||||||
checkerboard = r.checkerboard;
|
|
||||||
_odata =std::move(r._odata);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||||
|
@ -110,11 +110,11 @@ class BinaryIO {
|
|||||||
lsites = 1;
|
lsites = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp parallel
|
PARALLEL_REGION
|
||||||
{
|
{
|
||||||
uint32_t nersc_csum_thr = 0;
|
uint32_t nersc_csum_thr = 0;
|
||||||
|
|
||||||
#pragma omp for
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||||
{
|
{
|
||||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||||
@ -124,7 +124,7 @@ class BinaryIO {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp critical
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
nersc_csum += nersc_csum_thr;
|
nersc_csum += nersc_csum_thr;
|
||||||
}
|
}
|
||||||
@ -146,14 +146,14 @@ class BinaryIO {
|
|||||||
std::vector<int> local_start =grid->LocalStarts();
|
std::vector<int> local_start =grid->LocalStarts();
|
||||||
std::vector<int> global_vol =grid->FullDimensions();
|
std::vector<int> global_vol =grid->FullDimensions();
|
||||||
|
|
||||||
#pragma omp parallel
|
PARALLEL_REGION
|
||||||
{
|
{
|
||||||
std::vector<int> coor(nd);
|
std::vector<int> coor(nd);
|
||||||
uint32_t scidac_csuma_thr=0;
|
uint32_t scidac_csuma_thr=0;
|
||||||
uint32_t scidac_csumb_thr=0;
|
uint32_t scidac_csumb_thr=0;
|
||||||
uint32_t site_crc=0;
|
uint32_t site_crc=0;
|
||||||
|
|
||||||
#pragma omp for
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||||
|
|
||||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||||
@ -183,7 +183,7 @@ class BinaryIO {
|
|||||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp critical
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
scidac_csuma^= scidac_csuma_thr;
|
scidac_csuma^= scidac_csuma_thr;
|
||||||
scidac_csumb^= scidac_csumb_thr;
|
scidac_csumb^= scidac_csumb_thr;
|
||||||
|
@ -114,18 +114,26 @@ class Integrator {
|
|||||||
// input U actually not used in the fundamental case
|
// input U actually not used in the fundamental case
|
||||||
// Fundamental updates, include smearing
|
// Fundamental updates, include smearing
|
||||||
|
|
||||||
for (int a = 0; a < as[level].actions.size(); ++a) {
|
for (int a = 0; a < as[level].actions.size(); ++a) {
|
||||||
|
double start_full = usecond();
|
||||||
Field force(U._grid);
|
Field force(U._grid);
|
||||||
conformable(U._grid, Mom._grid);
|
conformable(U._grid, Mom._grid);
|
||||||
|
|
||||||
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
|
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
|
||||||
|
double start_force = usecond();
|
||||||
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
|
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
|
||||||
|
|
||||||
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
|
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
|
||||||
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
|
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
|
||||||
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
||||||
|
double end_force = usecond();
|
||||||
Real force_abs = std::sqrt(norm2(force)/U._grid->gSites());
|
Real force_abs = std::sqrt(norm2(force)/U._grid->gSites());
|
||||||
std::cout << GridLogIntegrator << "Force average: " << force_abs << std::endl;
|
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
|
||||||
Mom -= force * ep;
|
Mom -= force * ep;
|
||||||
|
double end_full = usecond();
|
||||||
|
double time_full = (end_full - start_full) / 1e3;
|
||||||
|
double time_force = (end_force - start_force) / 1e3;
|
||||||
|
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Force from the other representations
|
// Force from the other representations
|
||||||
|
@ -6,30 +6,33 @@
|
|||||||
#ifndef GAUGE_CONFIG_
|
#ifndef GAUGE_CONFIG_
|
||||||
#define GAUGE_CONFIG_
|
#define GAUGE_CONFIG_
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid
|
||||||
|
{
|
||||||
|
|
||||||
namespace QCD {
|
namespace QCD
|
||||||
|
{
|
||||||
|
|
||||||
//trivial class for no smearing
|
//trivial class for no smearing
|
||||||
template< class Impl >
|
template <class Impl>
|
||||||
class NoSmearing {
|
class NoSmearing
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
INHERIT_FIELD_TYPES(Impl);
|
INHERIT_FIELD_TYPES(Impl);
|
||||||
|
|
||||||
Field* ThinField;
|
Field *ThinField;
|
||||||
|
|
||||||
NoSmearing(): ThinField(NULL) {}
|
NoSmearing() : ThinField(NULL) {}
|
||||||
|
|
||||||
void set_Field(Field& U) { ThinField = &U; }
|
void set_Field(Field &U) { ThinField = &U; }
|
||||||
|
|
||||||
void smeared_force(Field&) const {}
|
void smeared_force(Field &) const {}
|
||||||
|
|
||||||
Field& get_SmearedU() { return *ThinField; }
|
Field &get_SmearedU() { return *ThinField; }
|
||||||
|
|
||||||
Field& get_U(bool smeared = false) {
|
Field &get_U(bool smeared = false)
|
||||||
|
{
|
||||||
return *ThinField;
|
return *ThinField;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -44,32 +47,36 @@ public:
|
|||||||
It stores a list of smeared configurations.
|
It stores a list of smeared configurations.
|
||||||
*/
|
*/
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
class SmearedConfiguration {
|
class SmearedConfiguration
|
||||||
public:
|
{
|
||||||
|
public:
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const unsigned int smearingLevels;
|
const unsigned int smearingLevels;
|
||||||
Smear_Stout<Gimpl> StoutSmearing;
|
Smear_Stout<Gimpl> StoutSmearing;
|
||||||
std::vector<GaugeField> SmearedSet;
|
std::vector<GaugeField> SmearedSet;
|
||||||
|
|
||||||
// Member functions
|
// Member functions
|
||||||
//====================================================================
|
//====================================================================
|
||||||
void fill_smearedSet(GaugeField& U) {
|
void fill_smearedSet(GaugeField &U)
|
||||||
ThinLinks = &U; // attach the smearing routine to the field U
|
{
|
||||||
|
ThinLinks = &U; // attach the smearing routine to the field U
|
||||||
|
|
||||||
// check the pointer is not null
|
// check the pointer is not null
|
||||||
if (ThinLinks == NULL)
|
if (ThinLinks == NULL)
|
||||||
std::cout << GridLogError
|
std::cout << GridLogError
|
||||||
<< "[SmearedConfiguration] Error in ThinLinks pointer\n";
|
<< "[SmearedConfiguration] Error in ThinLinks pointer\n";
|
||||||
|
|
||||||
if (smearingLevels > 0) {
|
if (smearingLevels > 0)
|
||||||
|
{
|
||||||
std::cout << GridLogDebug
|
std::cout << GridLogDebug
|
||||||
<< "[SmearedConfiguration] Filling SmearedSet\n";
|
<< "[SmearedConfiguration] Filling SmearedSet\n";
|
||||||
GaugeField previous_u(ThinLinks->_grid);
|
GaugeField previous_u(ThinLinks->_grid);
|
||||||
|
|
||||||
previous_u = *ThinLinks;
|
previous_u = *ThinLinks;
|
||||||
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) {
|
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl)
|
||||||
|
{
|
||||||
StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
|
StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
|
||||||
previous_u = SmearedSet[smearLvl];
|
previous_u = SmearedSet[smearLvl];
|
||||||
|
|
||||||
@ -81,9 +88,10 @@ class SmearedConfiguration {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
//====================================================================
|
//====================================================================
|
||||||
GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
|
GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime,
|
||||||
const GaugeField& GaugeK) const {
|
const GaugeField &GaugeK) const
|
||||||
GridBase* grid = GaugeK._grid;
|
{
|
||||||
|
GridBase *grid = GaugeK._grid;
|
||||||
GaugeField C(grid), SigmaK(grid), iLambda(grid);
|
GaugeField C(grid), SigmaK(grid), iLambda(grid);
|
||||||
GaugeLinkField iLambda_mu(grid);
|
GaugeLinkField iLambda_mu(grid);
|
||||||
GaugeLinkField iQ(grid), e_iQ(grid);
|
GaugeLinkField iQ(grid), e_iQ(grid);
|
||||||
@ -94,7 +102,8 @@ class SmearedConfiguration {
|
|||||||
SigmaK = zero;
|
SigmaK = zero;
|
||||||
iLambda = zero;
|
iLambda = zero;
|
||||||
|
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
{
|
||||||
Cmu = peekLorentz(C, mu);
|
Cmu = peekLorentz(C, mu);
|
||||||
GaugeKmu = peekLorentz(GaugeK, mu);
|
GaugeKmu = peekLorentz(GaugeK, mu);
|
||||||
SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu);
|
SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu);
|
||||||
@ -104,20 +113,22 @@ class SmearedConfiguration {
|
|||||||
pokeLorentz(iLambda, iLambda_mu, mu);
|
pokeLorentz(iLambda, iLambda_mu, mu);
|
||||||
}
|
}
|
||||||
StoutSmearing.derivative(SigmaK, iLambda,
|
StoutSmearing.derivative(SigmaK, iLambda,
|
||||||
GaugeK); // derivative of SmearBase
|
GaugeK); // derivative of SmearBase
|
||||||
return SigmaK;
|
return SigmaK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! @brief Returns smeared configuration at level 'Level' */
|
/*! @brief Returns smeared configuration at level 'Level' */
|
||||||
const GaugeField& get_smeared_conf(int Level) const {
|
const GaugeField &get_smeared_conf(int Level) const
|
||||||
|
{
|
||||||
return SmearedSet[Level];
|
return SmearedSet[Level];
|
||||||
}
|
}
|
||||||
|
|
||||||
//====================================================================
|
//====================================================================
|
||||||
void set_iLambda(GaugeLinkField& iLambda, GaugeLinkField& e_iQ,
|
void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ,
|
||||||
const GaugeLinkField& iQ, const GaugeLinkField& Sigmap,
|
const GaugeLinkField &iQ, const GaugeLinkField &Sigmap,
|
||||||
const GaugeLinkField& GaugeK) const {
|
const GaugeLinkField &GaugeK) const
|
||||||
GridBase* grid = iQ._grid;
|
{
|
||||||
|
GridBase *grid = iQ._grid;
|
||||||
GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid);
|
GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid);
|
||||||
GaugeLinkField unity(grid);
|
GaugeLinkField unity(grid);
|
||||||
unity = 1.0;
|
unity = 1.0;
|
||||||
@ -206,15 +217,15 @@ class SmearedConfiguration {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//====================================================================
|
//====================================================================
|
||||||
public:
|
public:
|
||||||
GaugeField*
|
GaugeField *
|
||||||
ThinLinks; /*!< @brief Pointer to the thin
|
ThinLinks; /* Pointer to the thin links configuration */
|
||||||
links configuration */
|
|
||||||
|
|
||||||
/*! @brief Standard constructor */
|
/* Standard constructor */
|
||||||
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
|
SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear,
|
||||||
Smear_Stout<Gimpl>& Stout)
|
Smear_Stout<Gimpl> &Stout)
|
||||||
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) {
|
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL)
|
||||||
|
{
|
||||||
for (unsigned int i = 0; i < smearingLevels; ++i)
|
for (unsigned int i = 0; i < smearingLevels; ++i)
|
||||||
SmearedSet.push_back(*(new GaugeField(UGrid)));
|
SmearedSet.push_back(*(new GaugeField(UGrid)));
|
||||||
}
|
}
|
||||||
@ -223,21 +234,29 @@ class SmearedConfiguration {
|
|||||||
SmearedConfiguration()
|
SmearedConfiguration()
|
||||||
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
|
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// attach the smeared routines to the thin links U and fill the smeared set
|
// attach the smeared routines to the thin links U and fill the smeared set
|
||||||
void set_Field(GaugeField& U) { fill_smearedSet(U); }
|
void set_Field(GaugeField &U)
|
||||||
|
{
|
||||||
|
double start = usecond();
|
||||||
|
fill_smearedSet(U);
|
||||||
|
double end = usecond();
|
||||||
|
double time = (end - start)/ 1e3;
|
||||||
|
std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
//====================================================================
|
//====================================================================
|
||||||
void smeared_force(GaugeField& SigmaTilde) const {
|
void smeared_force(GaugeField &SigmaTilde) const
|
||||||
if (smearingLevels > 0) {
|
{
|
||||||
|
if (smearingLevels > 0)
|
||||||
|
{
|
||||||
|
double start = usecond();
|
||||||
GaugeField force = SigmaTilde; // actually = U*SigmaTilde
|
GaugeField force = SigmaTilde; // actually = U*SigmaTilde
|
||||||
GaugeLinkField tmp_mu(SigmaTilde._grid);
|
GaugeLinkField tmp_mu(SigmaTilde._grid);
|
||||||
|
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
{
|
||||||
// to get just SigmaTilde
|
// to get just SigmaTilde
|
||||||
tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) *
|
tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * peekLorentz(force, mu);
|
||||||
peekLorentz(force, mu);
|
|
||||||
pokeLorentz(force, tmp_mu, mu);
|
pokeLorentz(force, tmp_mu, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,33 +265,43 @@ class SmearedConfiguration {
|
|||||||
|
|
||||||
force = AnalyticSmearedForce(force, *ThinLinks);
|
force = AnalyticSmearedForce(force, *ThinLinks);
|
||||||
|
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
{
|
||||||
tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu);
|
tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu);
|
||||||
pokeLorentz(SigmaTilde, tmp_mu, mu);
|
pokeLorentz(SigmaTilde, tmp_mu, mu);
|
||||||
}
|
}
|
||||||
} // if smearingLevels = 0 do nothing
|
double end = usecond();
|
||||||
|
double time = (end - start)/ 1e3;
|
||||||
|
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
|
||||||
|
} // if smearingLevels = 0 do nothing
|
||||||
}
|
}
|
||||||
//====================================================================
|
//====================================================================
|
||||||
|
|
||||||
GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
|
GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
|
||||||
|
|
||||||
GaugeField& get_U(bool smeared = false) {
|
GaugeField &get_U(bool smeared = false)
|
||||||
|
{
|
||||||
// get the config, thin links by default
|
// get the config, thin links by default
|
||||||
if (smeared) {
|
if (smeared)
|
||||||
if (smearingLevels) {
|
{
|
||||||
|
if (smearingLevels)
|
||||||
|
{
|
||||||
RealD impl_plaq =
|
RealD impl_plaq =
|
||||||
WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]);
|
WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]);
|
||||||
std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq
|
std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return get_SmearedU();
|
return get_SmearedU();
|
||||||
|
}
|
||||||
} else {
|
else
|
||||||
|
{
|
||||||
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
|
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
|
||||||
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
|
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return *ThinLinks;
|
return *ThinLinks;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
|
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
|
||||||
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
|
std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
@ -49,6 +49,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
const int Ls=8;
|
const int Ls=8;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
@ -90,24 +92,23 @@ int main (int argc, char ** argv)
|
|||||||
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
|
||||||
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
|
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
|
||||||
|
|
||||||
std::cout << "Starting mixed CG" << std::endl;
|
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
||||||
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
||||||
mCG(src_o,result_o);
|
mCG(src_o,result_o);
|
||||||
|
|
||||||
std::cout << "Starting regular CG" << std::endl;
|
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
||||||
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
||||||
CG(HermOpEO,src_o,result_o_2);
|
CG(HermOpEO,src_o,result_o_2);
|
||||||
|
|
||||||
LatticeFermionD diff_o(FrbGrid);
|
LatticeFermionD diff_o(FrbGrid);
|
||||||
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
|
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
|
||||||
|
|
||||||
std::cout << "Diff between mixed and regular CG: " << diff << std::endl;
|
std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
|
||||||
|
|
||||||
#ifdef HAVE_LIME
|
#ifdef HAVE_LIME
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){
|
||||||
|
|
||||||
std::string file1("./Propagator1");
|
std::string file1("./Propagator1");
|
||||||
std::string file2("./Propagator2");
|
|
||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
uint32_t nersc_csum;
|
uint32_t nersc_csum;
|
||||||
uint32_t scidac_csuma;
|
uint32_t scidac_csuma;
|
||||||
@ -121,12 +122,12 @@ int main (int argc, char ** argv)
|
|||||||
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format,
|
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
|
||||||
std::cout << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
std::cout << GridLogMessage << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
||||||
|
|
||||||
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format,
|
BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
|
|
||||||
std::cout << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
std::cout << GridLogMessage << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user