mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Merge branch 'develop' into hisq_fat_links
This commit is contained in:
commit
2c824c2641
@ -29,8 +29,27 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
extern Vector<std::pair<int,int> > Cshift_table;
|
extern std::vector<std::pair<int,int> > Cshift_table;
|
||||||
|
extern commVector<std::pair<int,int> > Cshift_table_device;
|
||||||
|
|
||||||
|
inline std::pair<int,int> *MapCshiftTable(void)
|
||||||
|
{
|
||||||
|
// GPU version
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
|
uint64_t sz=Cshift_table.size();
|
||||||
|
if (Cshift_table_device.size()!=sz ) {
|
||||||
|
Cshift_table_device.resize(sz);
|
||||||
|
}
|
||||||
|
acceleratorCopyToDevice((void *)&Cshift_table[0],
|
||||||
|
(void *)&Cshift_table_device[0],
|
||||||
|
sizeof(Cshift_table[0])*sz);
|
||||||
|
|
||||||
|
return &Cshift_table_device[0];
|
||||||
|
#else
|
||||||
|
return &Cshift_table[0];
|
||||||
|
#endif
|
||||||
|
// CPU version use identify map
|
||||||
|
}
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
// Gather for when there is no need to SIMD split
|
// Gather for when there is no need to SIMD split
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
@ -74,8 +93,8 @@ Gather_plane_simple (const Lattice<vobj> &rhs,cshiftVector<vobj> &buffer,int dim
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto buffer_p = & buffer[0];
|
auto buffer_p = & buffer[0];
|
||||||
auto table = &Cshift_table[0];
|
auto table = MapCshiftTable();
|
||||||
#ifdef ACCELERATOR_CSHIFT
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||||
@ -225,7 +244,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,cshiftVector<
|
|||||||
|
|
||||||
{
|
{
|
||||||
auto buffer_p = & buffer[0];
|
auto buffer_p = & buffer[0];
|
||||||
auto table = &Cshift_table[0];
|
auto table = MapCshiftTable();
|
||||||
#ifdef ACCELERATOR_CSHIFT
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView( rhs_v, rhs, AcceleratorWrite);
|
autoView( rhs_v, rhs, AcceleratorWrite);
|
||||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
@ -297,30 +316,6 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T iDivUp(T a, T b) // Round a / b to nearest higher integer value
|
|
||||||
{ return (a % b != 0) ? (a / b + 1) : (a / b); }
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
__global__ void populate_Cshift_table(T* vector, T lo, T ro, T e1, T e2, T stride)
|
|
||||||
{
|
|
||||||
int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
|
||||||
if (idx >= e1*e2) return;
|
|
||||||
|
|
||||||
int n, b, o;
|
|
||||||
|
|
||||||
n = idx / e2;
|
|
||||||
b = idx % e2;
|
|
||||||
o = n*stride + b;
|
|
||||||
|
|
||||||
vector[2*idx + 0] = lo + o;
|
|
||||||
vector[2*idx + 1] = ro + o;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// local to node block strided copies
|
// local to node block strided copies
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
@ -345,20 +340,12 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
int ent=0;
|
int ent=0;
|
||||||
|
|
||||||
if(cbmask == 0x3 ){
|
if(cbmask == 0x3 ){
|
||||||
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
|
|
||||||
ent = e1*e2;
|
|
||||||
dim3 blockSize(acceleratorThreads());
|
|
||||||
dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x));
|
|
||||||
populate_Cshift_table<<<gridSize, blockSize>>>(&Cshift_table[0].first, lo, ro, e1, e2, stride);
|
|
||||||
accelerator_barrier();
|
|
||||||
#else
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
@ -372,7 +359,7 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto table = &Cshift_table[0];
|
auto table = MapCshiftTable();
|
||||||
#ifdef ACCELERATOR_CSHIFT
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
autoView(lhs_v , lhs, AcceleratorWrite);
|
autoView(lhs_v , lhs, AcceleratorWrite);
|
||||||
@ -409,19 +396,11 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
int ent=0;
|
int ent=0;
|
||||||
|
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
|
|
||||||
ent = e1*e2;
|
|
||||||
dim3 blockSize(acceleratorThreads());
|
|
||||||
dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x));
|
|
||||||
populate_Cshift_table<<<gridSize, blockSize>>>(&Cshift_table[0].first, lo, ro, e1, e2, stride);
|
|
||||||
accelerator_barrier();
|
|
||||||
#else
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*stride;
|
int o =n*stride;
|
||||||
Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
}}
|
}}
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
@ -432,7 +411,7 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto table = &Cshift_table[0];
|
auto table = MapCshiftTable();
|
||||||
#ifdef ACCELERATOR_CSHIFT
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView( rhs_v, rhs, AcceleratorRead);
|
autoView( rhs_v, rhs, AcceleratorRead);
|
||||||
autoView( lhs_v, lhs, AcceleratorWrite);
|
autoView( lhs_v, lhs, AcceleratorWrite);
|
||||||
|
@ -52,7 +52,8 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
|||||||
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
|
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
|
||||||
int splice_dim = rhs.Grid()->_simd_layout[dimension]>1 && (comm_dim);
|
int splice_dim = rhs.Grid()->_simd_layout[dimension]>1 && (comm_dim);
|
||||||
|
|
||||||
|
RealD t1,t0;
|
||||||
|
t0=usecond();
|
||||||
if ( !comm_dim ) {
|
if ( !comm_dim ) {
|
||||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||||
@ -63,6 +64,8 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
|||||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||||
Cshift_comms(ret,rhs,dimension,shift);
|
Cshift_comms(ret,rhs,dimension,shift);
|
||||||
}
|
}
|
||||||
|
t1=usecond();
|
||||||
|
// std::cout << GridLogPerformance << "Cshift took "<< (t1-t0)/1e3 << " ms"<<std::endl;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,16 +130,20 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||||
|
RealD tcopy=0.0;
|
||||||
|
RealD tgather=0.0;
|
||||||
|
RealD tscatter=0.0;
|
||||||
|
RealD tcomms=0.0;
|
||||||
|
uint64_t xbytes=0;
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
int comm_proc = ((x+sshift)/rd)%pd;
|
int comm_proc = ((x+sshift)/rd)%pd;
|
||||||
|
|
||||||
if (comm_proc==0) {
|
if (comm_proc==0) {
|
||||||
|
tcopy-=usecond();
|
||||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||||
|
tcopy+=usecond();
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
int words = buffer_size;
|
int words = buffer_size;
|
||||||
@ -144,26 +151,39 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
|
|
||||||
int bytes = words * sizeof(vobj);
|
int bytes = words * sizeof(vobj);
|
||||||
|
|
||||||
|
tgather-=usecond();
|
||||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
||||||
|
tgather+=usecond();
|
||||||
|
|
||||||
// int rank = grid->_processor;
|
// int rank = grid->_processor;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
grid->Barrier();
|
tcomms-=usecond();
|
||||||
|
// grid->Barrier();
|
||||||
|
|
||||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||||
xmit_to_rank,
|
xmit_to_rank,
|
||||||
(void *)&recv_buf[0],
|
(void *)&recv_buf[0],
|
||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
|
xbytes+=bytes;
|
||||||
|
// grid->Barrier();
|
||||||
|
tcomms+=usecond();
|
||||||
|
|
||||||
grid->Barrier();
|
tscatter-=usecond();
|
||||||
|
|
||||||
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
||||||
|
tscatter+=usecond();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
std::cout << GridLogPerformance << " Cshift copy "<<tcopy/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift gather "<<tgather/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift scatter "<<tscatter/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift comm "<<tcomms/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||||
@ -190,6 +210,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
|
RealD tcopy=0.0;
|
||||||
|
RealD tgather=0.0;
|
||||||
|
RealD tscatter=0.0;
|
||||||
|
RealD tcomms=0.0;
|
||||||
|
uint64_t xbytes=0;
|
||||||
|
|
||||||
int permute_type=grid->PermuteType(dimension);
|
int permute_type=grid->PermuteType(dimension);
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
@ -227,7 +253,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
pointers[i] = &send_buf_extract[i][0];
|
pointers[i] = &send_buf_extract[i][0];
|
||||||
}
|
}
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
|
tgather-=usecond();
|
||||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||||
|
tgather+=usecond();
|
||||||
|
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
|
||||||
@ -252,7 +280,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
if(nbr_proc){
|
if(nbr_proc){
|
||||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
grid->Barrier();
|
tcomms-=usecond();
|
||||||
|
// grid->Barrier();
|
||||||
|
|
||||||
send_buf_extract_mpi = &send_buf_extract[nbr_lane][0];
|
send_buf_extract_mpi = &send_buf_extract[nbr_lane][0];
|
||||||
recv_buf_extract_mpi = &recv_buf_extract[i][0];
|
recv_buf_extract_mpi = &recv_buf_extract[i][0];
|
||||||
@ -262,7 +291,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
|
|
||||||
grid->Barrier();
|
xbytes+=bytes;
|
||||||
|
// grid->Barrier();
|
||||||
|
tcomms+=usecond();
|
||||||
|
|
||||||
rpointers[i] = &recv_buf_extract[i][0];
|
rpointers[i] = &recv_buf_extract[i][0];
|
||||||
} else {
|
} else {
|
||||||
@ -270,9 +301,17 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
tscatter-=usecond();
|
||||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||||
|
tscatter+=usecond();
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) copy "<<tcopy/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) gather "<<tgather/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) scatter "<<tscatter/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) comm "<<tcomms/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||||
@ -292,6 +331,11 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(comm_dim==1);
|
assert(comm_dim==1);
|
||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
RealD tcopy=0.0;
|
||||||
|
RealD tgather=0.0;
|
||||||
|
RealD tscatter=0.0;
|
||||||
|
RealD tcomms=0.0;
|
||||||
|
uint64_t xbytes=0;
|
||||||
|
|
||||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||||
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
|
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
|
||||||
@ -315,7 +359,9 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
|
|
||||||
if (comm_proc==0) {
|
if (comm_proc==0) {
|
||||||
|
|
||||||
|
tcopy-=usecond();
|
||||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||||
|
tcopy+=usecond();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
@ -324,7 +370,9 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
|
|
||||||
int bytes = words * sizeof(vobj);
|
int bytes = words * sizeof(vobj);
|
||||||
|
|
||||||
|
tgather-=usecond();
|
||||||
Gather_plane_simple (rhs,send_buf_v,dimension,sx,cbmask);
|
Gather_plane_simple (rhs,send_buf_v,dimension,sx,cbmask);
|
||||||
|
tgather+=usecond();
|
||||||
|
|
||||||
// int rank = grid->_processor;
|
// int rank = grid->_processor;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
@ -332,7 +380,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
|
|
||||||
grid->Barrier();
|
tcomms-=usecond();
|
||||||
|
// grid->Barrier();
|
||||||
|
|
||||||
acceleratorCopyDeviceToDevice((void *)&send_buf_v[0],(void *)&send_buf[0],bytes);
|
acceleratorCopyDeviceToDevice((void *)&send_buf_v[0],(void *)&send_buf[0],bytes);
|
||||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||||
@ -340,13 +389,24 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
(void *)&recv_buf[0],
|
(void *)&recv_buf[0],
|
||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
|
xbytes+=bytes;
|
||||||
acceleratorCopyDeviceToDevice((void *)&recv_buf[0],(void *)&recv_buf_v[0],bytes);
|
acceleratorCopyDeviceToDevice((void *)&recv_buf[0],(void *)&recv_buf_v[0],bytes);
|
||||||
|
|
||||||
grid->Barrier();
|
// grid->Barrier();
|
||||||
|
tcomms+=usecond();
|
||||||
|
|
||||||
|
tscatter-=usecond();
|
||||||
Scatter_plane_simple (ret,recv_buf_v,dimension,x,cbmask);
|
Scatter_plane_simple (ret,recv_buf_v,dimension,x,cbmask);
|
||||||
|
tscatter+=usecond();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
std::cout << GridLogPerformance << " Cshift copy "<<tcopy/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift gather "<<tgather/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift scatter "<<tscatter/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift comm "<<tcomms/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||||
@ -372,6 +432,11 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
assert(simd_layout==2);
|
assert(simd_layout==2);
|
||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
RealD tcopy=0.0;
|
||||||
|
RealD tgather=0.0;
|
||||||
|
RealD tscatter=0.0;
|
||||||
|
RealD tcomms=0.0;
|
||||||
|
uint64_t xbytes=0;
|
||||||
|
|
||||||
int permute_type=grid->PermuteType(dimension);
|
int permute_type=grid->PermuteType(dimension);
|
||||||
|
|
||||||
@ -414,8 +479,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
pointers[i] = &send_buf_extract[i][0];
|
pointers[i] = &send_buf_extract[i][0];
|
||||||
}
|
}
|
||||||
|
tgather-=usecond();
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||||
|
tgather+=usecond();
|
||||||
|
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
|
||||||
@ -440,7 +507,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
if(nbr_proc){
|
if(nbr_proc){
|
||||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
grid->Barrier();
|
tcomms-=usecond();
|
||||||
|
// grid->Barrier();
|
||||||
|
|
||||||
acceleratorCopyDeviceToDevice((void *)&send_buf_extract[nbr_lane][0],(void *)send_buf_extract_mpi,bytes);
|
acceleratorCopyDeviceToDevice((void *)&send_buf_extract[nbr_lane][0],(void *)send_buf_extract_mpi,bytes);
|
||||||
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
|
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
|
||||||
@ -449,17 +517,28 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
acceleratorCopyDeviceToDevice((void *)recv_buf_extract_mpi,(void *)&recv_buf_extract[i][0],bytes);
|
acceleratorCopyDeviceToDevice((void *)recv_buf_extract_mpi,(void *)&recv_buf_extract[i][0],bytes);
|
||||||
|
xbytes+=bytes;
|
||||||
|
|
||||||
grid->Barrier();
|
// grid->Barrier();
|
||||||
|
tcomms+=usecond();
|
||||||
rpointers[i] = &recv_buf_extract[i][0];
|
rpointers[i] = &recv_buf_extract[i][0];
|
||||||
} else {
|
} else {
|
||||||
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
tscatter-=usecond();
|
||||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||||
}
|
tscatter+=usecond();
|
||||||
|
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) copy "<<tcopy/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) gather "<<tgather/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) scatter "<<tscatter/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift (s) comm "<<tcomms/1e3<<" ms"<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s"<<std::endl;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
Vector<std::pair<int,int> > Cshift_table;
|
std::vector<std::pair<int,int> > Cshift_table;
|
||||||
|
commVector<std::pair<int,int> > Cshift_table_device;
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -270,5 +270,42 @@ RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const L
|
|||||||
return axpby_norm_fast(ret,a,b,x,y);
|
return axpby_norm_fast(ret,a,b,x,y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Trace product
|
||||||
|
template<class obj> auto traceProduct(const Lattice<obj> &rhs_1,const Lattice<obj> &rhs_2)
|
||||||
|
-> Lattice<decltype(trace(obj()))>
|
||||||
|
{
|
||||||
|
typedef decltype(trace(obj())) robj;
|
||||||
|
Lattice<robj> ret_i(rhs_1.Grid());
|
||||||
|
autoView( rhs1 , rhs_1, AcceleratorRead);
|
||||||
|
autoView( rhs2 , rhs_2, AcceleratorRead);
|
||||||
|
autoView( ret , ret_i, AcceleratorWrite);
|
||||||
|
ret.Checkerboard() = rhs_1.Checkerboard();
|
||||||
|
accelerator_for(ss,rhs1.size(),obj::Nsimd(),{
|
||||||
|
coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2(ss)));
|
||||||
|
});
|
||||||
|
return ret_i;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class obj1,class obj2> auto traceProduct(const Lattice<obj1> &rhs_1,const obj2 &rhs2)
|
||||||
|
-> Lattice<decltype(trace(obj1()))>
|
||||||
|
{
|
||||||
|
typedef decltype(trace(obj1())) robj;
|
||||||
|
Lattice<robj> ret_i(rhs_1.Grid());
|
||||||
|
autoView( rhs1 , rhs_1, AcceleratorRead);
|
||||||
|
autoView( ret , ret_i, AcceleratorWrite);
|
||||||
|
ret.Checkerboard() = rhs_1.Checkerboard();
|
||||||
|
accelerator_for(ss,rhs1.size(),obj1::Nsimd(),{
|
||||||
|
coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2));
|
||||||
|
});
|
||||||
|
return ret_i;
|
||||||
|
}
|
||||||
|
template<class obj1,class obj2> auto traceProduct(const obj2 &rhs_2,const Lattice<obj1> &rhs_1)
|
||||||
|
-> Lattice<decltype(trace(obj1()))>
|
||||||
|
{
|
||||||
|
return traceProduct(rhs_1,rhs_2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
#endif
|
#endif
|
||||||
|
@ -30,7 +30,7 @@ int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &
|
|||||||
cudaGetDevice(&device);
|
cudaGetDevice(&device);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
hipGetDevice(&device);
|
auto r=hipGetDevice(&device);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Iterator warpSize = gpu_props[device].warpSize;
|
Iterator warpSize = gpu_props[device].warpSize;
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
/*!
|
/*!
|
||||||
@file GaugeConfiguration.h
|
@file GaugeConfiguration.h
|
||||||
@brief Declares the GaugeConfiguration class
|
@brief Declares the GaugeConfiguration class
|
||||||
@ -6,6 +7,15 @@
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> void Dump(const Lattice<T> & lat,
|
||||||
|
std::string s,
|
||||||
|
Coordinate site = Coordinate({0,0,0,0}))
|
||||||
|
{
|
||||||
|
typename T::scalar_object tmp;
|
||||||
|
peekSite(tmp,lat,site);
|
||||||
|
std::cout << " Dump "<<s<<" "<<tmp<<std::endl;
|
||||||
|
}
|
||||||
/*!
|
/*!
|
||||||
@brief Smeared configuration masked container
|
@brief Smeared configuration masked container
|
||||||
Modified for a multi-subset smearing (aka Luscher Flowed HMC)
|
Modified for a multi-subset smearing (aka Luscher Flowed HMC)
|
||||||
@ -28,6 +38,101 @@ private:
|
|||||||
typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField;
|
typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField;
|
||||||
typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField;
|
typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField;
|
||||||
|
|
||||||
|
void BaseSmearDerivative(GaugeField& SigmaTerm,
|
||||||
|
const GaugeField& iLambda,
|
||||||
|
const GaugeField& U,
|
||||||
|
int mmu, RealD rho)
|
||||||
|
{
|
||||||
|
// Reference
|
||||||
|
// Morningstar, Peardon, Phys.Rev.D69,054501(2004)
|
||||||
|
// Equation 75
|
||||||
|
// Computing Sigma_mu, derivative of S[fat links] with respect to the thin links
|
||||||
|
// Output SigmaTerm
|
||||||
|
|
||||||
|
GridBase *grid = U.Grid();
|
||||||
|
|
||||||
|
WilsonLoops<Gimpl> WL;
|
||||||
|
GaugeLinkField staple(grid), u_tmp(grid);
|
||||||
|
GaugeLinkField iLambda_mu(grid), iLambda_nu(grid);
|
||||||
|
GaugeLinkField U_mu(grid), U_nu(grid);
|
||||||
|
GaugeLinkField sh_field(grid), temp_Sigma(grid);
|
||||||
|
Real rho_munu, rho_numu;
|
||||||
|
|
||||||
|
rho_munu = rho;
|
||||||
|
rho_numu = rho;
|
||||||
|
for(int mu = 0; mu < Nd; ++mu){
|
||||||
|
U_mu = peekLorentz( U, mu);
|
||||||
|
iLambda_mu = peekLorentz(iLambda, mu);
|
||||||
|
|
||||||
|
for(int nu = 0; nu < Nd; ++nu){
|
||||||
|
if(nu==mu) continue;
|
||||||
|
|
||||||
|
U_nu = peekLorentz( U, nu);
|
||||||
|
|
||||||
|
// Nd(nd-1) = 12 staples normally.
|
||||||
|
// We must compute 6 of these
|
||||||
|
// in FTHMC case
|
||||||
|
if ( (mu==mmu)||(nu==mmu) )
|
||||||
|
WL.StapleUpper(staple, U, mu, nu);
|
||||||
|
|
||||||
|
if(nu==mmu) {
|
||||||
|
iLambda_nu = peekLorentz(iLambda, nu);
|
||||||
|
|
||||||
|
temp_Sigma = -rho_numu*staple*iLambda_nu; //ok
|
||||||
|
//-r_numu*U_nu(x+mu)*Udag_mu(x+nu)*Udag_nu(x)*Lambda_nu(x)
|
||||||
|
Gimpl::AddLink(SigmaTerm, temp_Sigma, mu);
|
||||||
|
|
||||||
|
sh_field = Cshift(iLambda_nu, mu, 1);// general also for Gparity?
|
||||||
|
|
||||||
|
temp_Sigma = rho_numu*sh_field*staple; //ok
|
||||||
|
//r_numu*Lambda_nu(mu)*U_nu(x+mu)*Udag_mu(x+nu)*Udag_nu(x)
|
||||||
|
Gimpl::AddLink(SigmaTerm, temp_Sigma, mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( mu == mmu ) {
|
||||||
|
sh_field = Cshift(iLambda_mu, nu, 1);
|
||||||
|
|
||||||
|
temp_Sigma = -rho_munu*staple*U_nu*sh_field*adj(U_nu); //ok
|
||||||
|
//-r_munu*U_nu(x+mu)*Udag_mu(x+nu)*Lambda_mu(x+nu)*Udag_nu(x)
|
||||||
|
Gimpl::AddLink(SigmaTerm, temp_Sigma, mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
// staple = Zero();
|
||||||
|
sh_field = Cshift(U_nu, mu, 1);
|
||||||
|
|
||||||
|
temp_Sigma = Zero();
|
||||||
|
|
||||||
|
if ( mu == mmu )
|
||||||
|
temp_Sigma = -rho_munu*adj(sh_field)*adj(U_mu)*iLambda_mu*U_nu;
|
||||||
|
|
||||||
|
if ( nu == mmu ) {
|
||||||
|
temp_Sigma += rho_numu*adj(sh_field)*adj(U_mu)*iLambda_nu*U_nu;
|
||||||
|
|
||||||
|
u_tmp = adj(U_nu)*iLambda_nu;
|
||||||
|
sh_field = Cshift(u_tmp, mu, 1);
|
||||||
|
temp_Sigma += -rho_numu*sh_field*adj(U_mu)*U_nu;
|
||||||
|
}
|
||||||
|
|
||||||
|
sh_field = Cshift(temp_Sigma, nu, -1);
|
||||||
|
Gimpl::AddLink(SigmaTerm, sh_field, mu);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BaseSmear(GaugeLinkField& Cup, const GaugeField& U,int mu,RealD rho) {
|
||||||
|
GridBase *grid = U.Grid();
|
||||||
|
GaugeLinkField tmp_stpl(grid);
|
||||||
|
WilsonLoops<Gimpl> WL;
|
||||||
|
Cup = Zero();
|
||||||
|
for(int nu=0; nu<Nd; ++nu){
|
||||||
|
if (nu != mu) {
|
||||||
|
// get the staple in direction mu, nu
|
||||||
|
WL.Staple(tmp_stpl, U, mu, nu); //nb staple conventions of IroIro and Grid differ by a dagger
|
||||||
|
Cup += adj(tmp_stpl*rho);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
// Adjoint vector to GaugeField force
|
// Adjoint vector to GaugeField force
|
||||||
void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu)
|
void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu)
|
||||||
{
|
{
|
||||||
@ -47,27 +152,54 @@ private:
|
|||||||
GaugeLinkField UtaU(PlaqL.Grid());
|
GaugeLinkField UtaU(PlaqL.Grid());
|
||||||
GaugeLinkField D(PlaqL.Grid());
|
GaugeLinkField D(PlaqL.Grid());
|
||||||
AdjMatrixField Dbc(PlaqL.Grid());
|
AdjMatrixField Dbc(PlaqL.Grid());
|
||||||
|
AdjMatrixField Dbc_opt(PlaqL.Grid());
|
||||||
LatticeComplex tmp(PlaqL.Grid());
|
LatticeComplex tmp(PlaqL.Grid());
|
||||||
const int Ngen = SU3Adjoint::Dimension;
|
const int Ngen = SU3Adjoint::Dimension;
|
||||||
Complex ci(0,1);
|
Complex ci(0,1);
|
||||||
ColourMatrix ta,tb,tc;
|
ColourMatrix ta,tb,tc;
|
||||||
|
RealD t=0;
|
||||||
|
RealD tp=0;
|
||||||
|
RealD tta=0;
|
||||||
|
RealD tpk=0;
|
||||||
|
t-=usecond();
|
||||||
for(int a=0;a<Ngen;a++) {
|
for(int a=0;a<Ngen;a++) {
|
||||||
|
tta-=usecond();
|
||||||
SU3::generator(a, ta);
|
SU3::generator(a, ta);
|
||||||
|
ta = 2.0 * ci * ta;
|
||||||
// Qlat Tb = 2i Tb^Grid
|
// Qlat Tb = 2i Tb^Grid
|
||||||
UtaU= 2.0*ci*adj(PlaqL)*ta*PlaqR;
|
UtaU= adj(PlaqL)*ta*PlaqR; // 6ms
|
||||||
|
tta+=usecond();
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Could add this entire C-loop to a projection routine
|
||||||
|
// for performance. Could also pick checkerboard on UtaU
|
||||||
|
// and set checkerboard on result for 2x perf
|
||||||
|
////////////////////////////////////////////
|
||||||
for(int c=0;c<Ngen;c++) {
|
for(int c=0;c<Ngen;c++) {
|
||||||
SU3::generator(c, tc);
|
SU3::generator(c, tc);
|
||||||
D = Ta( (2.0)*ci*tc *UtaU);
|
tc = 2.0*ci*tc;
|
||||||
|
tp-=usecond();
|
||||||
|
D = Ta( tc *UtaU); // 2ms
|
||||||
|
#if 1
|
||||||
|
SU3::LieAlgebraProject(Dbc_opt,D,c); // 5.5ms
|
||||||
|
#else
|
||||||
for(int b=0;b<Ngen;b++){
|
for(int b=0;b<Ngen;b++){
|
||||||
SU3::generator(b, tb);
|
SU3::generator(b, tb);
|
||||||
tmp =-trace(ci*tb*D);
|
tmp =-trace(ci*tb*D);
|
||||||
PokeIndex<ColourIndex>(Dbc,tmp,b,c); // Adjoint rep
|
PokeIndex<ColourIndex>(Dbc,tmp,b,c); // Adjoint rep
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
tp+=usecond();
|
||||||
}
|
}
|
||||||
tmp = trace(MpInvJx * Dbc);
|
// Dump(Dbc_opt,"Dbc_opt");
|
||||||
|
// Dump(Dbc,"Dbc");
|
||||||
|
tpk-=usecond();
|
||||||
|
tmp = trace(MpInvJx * Dbc_opt);
|
||||||
PokeIndex<ColourIndex>(Fdet2,tmp,a);
|
PokeIndex<ColourIndex>(Fdet2,tmp,a);
|
||||||
|
tpk+=usecond();
|
||||||
}
|
}
|
||||||
|
t+=usecond();
|
||||||
|
std::cout << GridLogPerformance << " Compute_MpInvJx_dNxxdSy " << t/1e3 << " ms proj "<<tp/1e3<< " ms"
|
||||||
|
<< " ta "<<tta/1e3<<" ms" << " poke "<<tpk/1e3<< " ms"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd)
|
void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd)
|
||||||
@ -79,12 +211,17 @@ private:
|
|||||||
ColourMatrix tc;
|
ColourMatrix tc;
|
||||||
for(int b=0;b<Ngen;b++) {
|
for(int b=0;b<Ngen;b++) {
|
||||||
SU3::generator(b, tb);
|
SU3::generator(b, tb);
|
||||||
Nx = (2.0)*Ta( adj(PlaqL)*ci*tb * PlaqR );
|
tb = 2.0 * ci * tb;
|
||||||
|
Nx = Ta( adj(PlaqL)*tb * PlaqR );
|
||||||
|
#if 1
|
||||||
|
SU3::LieAlgebraProject(NxAd,Nx,b);
|
||||||
|
#else
|
||||||
for(int c=0;c<Ngen;c++) {
|
for(int c=0;c<Ngen;c++) {
|
||||||
SU3::generator(c, tc);
|
SU3::generator(c, tc);
|
||||||
auto tmp =closure( -trace(ci*tc*Nx));
|
auto tmp =closure( -trace(ci*tc*Nx));
|
||||||
PokeIndex<ColourIndex>(NxAd,tmp,c,b);
|
PokeIndex<ColourIndex>(NxAd,tmp,c,b);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void ApplyMask(GaugeField &U,int smr)
|
void ApplyMask(GaugeField &U,int smr)
|
||||||
@ -164,8 +301,7 @@ public:
|
|||||||
// Computes ALL the staples -- could compute one only and do it here
|
// Computes ALL the staples -- could compute one only and do it here
|
||||||
RealD time;
|
RealD time;
|
||||||
time=-usecond();
|
time=-usecond();
|
||||||
this->StoutSmearing->BaseSmear(C, U);
|
BaseSmear(Cmu, U,mu,rho);
|
||||||
Cmu = peekLorentz(C, mu);
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// Assemble Luscher exp diff map J matrix
|
// Assemble Luscher exp diff map J matrix
|
||||||
@ -209,6 +345,36 @@ public:
|
|||||||
// dJ(x)/dxe
|
// dJ(x)/dxe
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
time=-usecond();
|
time=-usecond();
|
||||||
|
#if 1
|
||||||
|
std::vector<AdjMatrixField> dJdX; dJdX.resize(8,grid);
|
||||||
|
std::vector<AdjMatrix> TRb_s; TRb_s.resize(8);
|
||||||
|
AdjMatrixField tbXn(grid);
|
||||||
|
AdjMatrixField sumXtbX(grid);
|
||||||
|
AdjMatrixField t2(grid);
|
||||||
|
AdjMatrixField dt2(grid);
|
||||||
|
AdjMatrixField t3(grid);
|
||||||
|
AdjMatrixField dt3(grid);
|
||||||
|
AdjMatrixField aunit(grid);
|
||||||
|
|
||||||
|
for(int b=0;b<8;b++){
|
||||||
|
SU3Adjoint::generator(b, TRb_s[b]);
|
||||||
|
dJdX[b] = TRb_s[b];
|
||||||
|
}
|
||||||
|
aunit = ComplexD(1.0);
|
||||||
|
// Could put into an accelerator_for
|
||||||
|
X = (-1.0)*ZxAd;
|
||||||
|
t2 = X;
|
||||||
|
for (int j = 12; j > 1; --j) {
|
||||||
|
t3 = t2*(1.0 / (j + 1)) + aunit;
|
||||||
|
t2 = X * t3;
|
||||||
|
for(int b=0;b<8;b++){
|
||||||
|
dJdX[b]= TRb_s[b] * t3 + X * dJdX[b]*(1.0 / (j + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int b=0;b<8;b++){
|
||||||
|
dJdX[b] = -dJdX[b];
|
||||||
|
}
|
||||||
|
#else
|
||||||
std::vector<AdjMatrixField> dJdX; dJdX.resize(8,grid);
|
std::vector<AdjMatrixField> dJdX; dJdX.resize(8,grid);
|
||||||
AdjMatrixField tbXn(grid);
|
AdjMatrixField tbXn(grid);
|
||||||
AdjMatrixField sumXtbX(grid);
|
AdjMatrixField sumXtbX(grid);
|
||||||
@ -224,14 +390,15 @@ public:
|
|||||||
X = (-1.0)*ZxAd;
|
X = (-1.0)*ZxAd;
|
||||||
t2 = X;
|
t2 = X;
|
||||||
dt2 = TRb;
|
dt2 = TRb;
|
||||||
for (int j = 20; j > 1; --j) {
|
for (int j = 12; j > 1; --j) {
|
||||||
t3 = t2*(1.0 / (j + 1)) + aunit;
|
t3 = t2*(1.0 / (j + 1)) + aunit;
|
||||||
dt3 = dt2*(1.0 / (j + 1));
|
dt3 = dt2*(1.0 / (j + 1));
|
||||||
t2 = X * t3;
|
t2 = X * t3;
|
||||||
dt2 = TRb * t3 + X * dt3;
|
dt2 = TRb * t3 + X * dt3;
|
||||||
}
|
}
|
||||||
dJdX[b] = -dt2;
|
dJdX[b] = -dt2;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
time+=usecond();
|
time+=usecond();
|
||||||
std::cout << GridLogMessage << "dJx took "<<time<< " us"<<std::endl;
|
std::cout << GridLogMessage << "dJx took "<<time<< " us"<<std::endl;
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
@ -281,8 +448,8 @@ public:
|
|||||||
|
|
||||||
for(int e =0 ; e<8 ; e++){
|
for(int e =0 ; e<8 ; e++){
|
||||||
LatticeComplexD tr(grid);
|
LatticeComplexD tr(grid);
|
||||||
ColourMatrix te;
|
// ColourMatrix te;
|
||||||
SU3::generator(e, te);
|
// SU3::generator(e, te);
|
||||||
tr = trace(dJdX[e] * nMpInv);
|
tr = trace(dJdX[e] * nMpInv);
|
||||||
pokeColour(dJdXe_nMpInv,tr,e);
|
pokeColour(dJdXe_nMpInv,tr,e);
|
||||||
}
|
}
|
||||||
@ -493,20 +660,25 @@ public:
|
|||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// Assemble the N matrix
|
// Assemble the N matrix
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// Computes ALL the staples -- could compute one only here
|
double rho=this->StoutSmearing->SmearRho[1];
|
||||||
this->StoutSmearing->BaseSmear(C, U);
|
BaseSmear(Cmu, U,mu,rho);
|
||||||
Cmu = peekLorentz(C, mu);
|
|
||||||
Umu = peekLorentz(U, mu);
|
Umu = peekLorentz(U, mu);
|
||||||
Complex ci(0,1);
|
Complex ci(0,1);
|
||||||
for(int b=0;b<Ngen;b++) {
|
for(int b=0;b<Ngen;b++) {
|
||||||
SU3::generator(b, Tb);
|
SU3::generator(b, Tb);
|
||||||
// Qlat Tb = 2i Tb^Grid
|
// Qlat Tb = 2i Tb^Grid
|
||||||
Nb = (2.0)*Ta( ci*Tb * Umu * adj(Cmu));
|
Nb = (2.0)*Ta( ci*Tb * Umu * adj(Cmu));
|
||||||
|
// FIXME -- replace this with LieAlgebraProject
|
||||||
|
#if 0
|
||||||
|
SU3::LieAlgebraProject(Ncb,tmp,b);
|
||||||
|
#else
|
||||||
for(int c=0;c<Ngen;c++) {
|
for(int c=0;c<Ngen;c++) {
|
||||||
SU3::generator(c, Tc);
|
SU3::generator(c, Tc);
|
||||||
auto tmp = -trace(ci*Tc*Nb); // Luchang's norm: (2Tc) (2Td) N^db = -2 delta cd N^db // - was important
|
auto tmp = -trace(ci*Tc*Nb); // Luchang's norm: (2Tc) (2Td) N^db = -2 delta cd N^db // - was important
|
||||||
PokeIndex<ColourIndex>(Ncb,tmp,c,b);
|
PokeIndex<ColourIndex>(Ncb,tmp,c,b);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
@ -693,15 +865,19 @@ private:
|
|||||||
const GaugeField& GaugeK,int level)
|
const GaugeField& GaugeK,int level)
|
||||||
{
|
{
|
||||||
GridBase* grid = GaugeK.Grid();
|
GridBase* grid = GaugeK.Grid();
|
||||||
GaugeField C(grid), SigmaK(grid), iLambda(grid);
|
GaugeField SigmaK(grid), iLambda(grid);
|
||||||
GaugeField SigmaKPrimeA(grid);
|
GaugeField SigmaKPrimeA(grid);
|
||||||
GaugeField SigmaKPrimeB(grid);
|
GaugeField SigmaKPrimeB(grid);
|
||||||
GaugeLinkField iLambda_mu(grid);
|
GaugeLinkField iLambda_mu(grid);
|
||||||
GaugeLinkField iQ(grid), e_iQ(grid);
|
GaugeLinkField iQ(grid), e_iQ(grid);
|
||||||
GaugeLinkField SigmaKPrime_mu(grid);
|
GaugeLinkField SigmaKPrime_mu(grid);
|
||||||
GaugeLinkField GaugeKmu(grid), Cmu(grid);
|
GaugeLinkField GaugeKmu(grid), Cmu(grid);
|
||||||
|
|
||||||
this->StoutSmearing->BaseSmear(C, GaugeK);
|
int mmu= (level/2) %Nd;
|
||||||
|
int cb= (level%2);
|
||||||
|
double rho=this->StoutSmearing->SmearRho[1];
|
||||||
|
|
||||||
|
// Can override this to do one direction only.
|
||||||
SigmaK = Zero();
|
SigmaK = Zero();
|
||||||
iLambda = Zero();
|
iLambda = Zero();
|
||||||
|
|
||||||
@ -712,18 +888,38 @@ private:
|
|||||||
// Could get away with computing only one polarisation here
|
// Could get away with computing only one polarisation here
|
||||||
// int mu= (smr/2) %Nd;
|
// int mu= (smr/2) %Nd;
|
||||||
// SigmaKprime_A has only one component
|
// SigmaKprime_A has only one component
|
||||||
for (int mu = 0; mu < Nd; mu++)
|
#if 0
|
||||||
|
BaseSmear(Cmu, GaugeK,mu,rho);
|
||||||
|
GaugeKmu = peekLorentz(GaugeK, mu);
|
||||||
|
SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu);
|
||||||
|
iQ = Ta(Cmu * adj(GaugeKmu));
|
||||||
|
this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
|
||||||
|
pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
|
||||||
|
pokeLorentz(iLambda, iLambda_mu, mu);
|
||||||
|
BaseSmearDerivative(SigmaK, iLambda,GaugeK,mu,rho); // derivative of SmearBase
|
||||||
|
#else
|
||||||
|
// GaugeField C(grid);
|
||||||
|
// this->StoutSmearing->BaseSmear(C, GaugeK);
|
||||||
|
// for (int mu = 0; mu < Nd; mu++)
|
||||||
|
int mu =mmu;
|
||||||
|
BaseSmear(Cmu, GaugeK,mu,rho);
|
||||||
{
|
{
|
||||||
Cmu = peekLorentz(C, mu);
|
// Cmu = peekLorentz(C, mu);
|
||||||
GaugeKmu = peekLorentz(GaugeK, mu);
|
GaugeKmu = peekLorentz(GaugeK, mu);
|
||||||
SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu);
|
SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu);
|
||||||
iQ = Ta(Cmu * adj(GaugeKmu));
|
iQ = Ta(Cmu * adj(GaugeKmu));
|
||||||
this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
|
this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
|
||||||
pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
|
pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
|
||||||
pokeLorentz(iLambda, iLambda_mu, mu);
|
pokeLorentz(iLambda, iLambda_mu, mu);
|
||||||
|
std::cout << " mu "<<mu<<" SigmaKPrime_mu"<<norm2(SigmaKPrime_mu)<< " iLambda_mu " <<norm2(iLambda_mu)<<std::endl;
|
||||||
}
|
}
|
||||||
this->StoutSmearing->derivative(SigmaK, iLambda,GaugeK); // derivative of SmearBase
|
// GaugeField SigmaKcopy(grid);
|
||||||
|
// SigmaKcopy = SigmaK;
|
||||||
|
BaseSmearDerivative(SigmaK, iLambda,GaugeK,mu,rho); // derivative of SmearBase
|
||||||
|
// this->StoutSmearing->derivative(SigmaK, iLambda,GaugeK); // derivative of SmearBase
|
||||||
|
// SigmaKcopy = SigmaKcopy - SigmaK;
|
||||||
|
// std::cout << " BaseSmearDerivative fast path error" <<norm2(SigmaKcopy)<<std::endl;
|
||||||
|
#endif
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// propagate the rest of the force as identity map, just add back
|
// propagate the rest of the force as identity map, just add back
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -100,6 +100,9 @@ class GaugeGroup {
|
|||||||
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
|
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
|
||||||
template <typename vtype>
|
template <typename vtype>
|
||||||
using iAlgebraVector = iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
|
using iAlgebraVector = iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
|
||||||
|
template <typename vtype>
|
||||||
|
using iSUnAlgebraMatrix =
|
||||||
|
iScalar<iScalar<iMatrix<vtype, AdjointDimension> > >;
|
||||||
static int su2subgroups(void) { return su2subgroups(group_name()); }
|
static int su2subgroups(void) { return su2subgroups(group_name()); }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -128,10 +131,19 @@ class GaugeGroup {
|
|||||||
typedef Lattice<vMatrix> LatticeMatrix;
|
typedef Lattice<vMatrix> LatticeMatrix;
|
||||||
typedef Lattice<vMatrixF> LatticeMatrixF;
|
typedef Lattice<vMatrixF> LatticeMatrixF;
|
||||||
typedef Lattice<vMatrixD> LatticeMatrixD;
|
typedef Lattice<vMatrixD> LatticeMatrixD;
|
||||||
|
|
||||||
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
|
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
|
||||||
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
|
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
|
||||||
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
|
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
|
||||||
|
|
||||||
|
typedef iSUnAlgebraMatrix<vComplex> vAlgebraMatrix;
|
||||||
|
typedef iSUnAlgebraMatrix<vComplexF> vAlgebraMatrixF;
|
||||||
|
typedef iSUnAlgebraMatrix<vComplexD> vAlgebraMatrixD;
|
||||||
|
|
||||||
|
typedef Lattice<vAlgebraMatrix> LatticeAlgebraMatrix;
|
||||||
|
typedef Lattice<vAlgebraMatrixF> LatticeAlgebraMatrixF;
|
||||||
|
typedef Lattice<vAlgebraMatrixD> LatticeAlgebraMatrixD;
|
||||||
|
|
||||||
|
|
||||||
typedef iSU2Matrix<Complex> SU2Matrix;
|
typedef iSU2Matrix<Complex> SU2Matrix;
|
||||||
typedef iSU2Matrix<ComplexF> SU2MatrixF;
|
typedef iSU2Matrix<ComplexF> SU2MatrixF;
|
||||||
@ -160,7 +172,7 @@ class GaugeGroup {
|
|||||||
return generator(lieIndex, ta, group_name());
|
return generator(lieIndex, ta, group_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
|
static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
|
||||||
return su2SubGroupIndex(i1, i2, su2_index, group_name());
|
return su2SubGroupIndex(i1, i2, su2_index, group_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,6 +401,52 @@ class GaugeGroup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ta are hermitian (?)
|
||||||
|
// Anti herm is i Ta basis
|
||||||
|
static void LieAlgebraProject(LatticeAlgebraMatrix &out,const LatticeMatrix &in, int b)
|
||||||
|
{
|
||||||
|
conformable(in, out);
|
||||||
|
GridBase *grid = out.Grid();
|
||||||
|
LatticeComplex tmp(grid);
|
||||||
|
Matrix ta;
|
||||||
|
// Using Luchang's projection convention
|
||||||
|
// 2 Tr{Ta Tb} A_b= 2/2 delta ab A_b = A_a
|
||||||
|
autoView(out_v,out,AcceleratorWrite);
|
||||||
|
autoView(in_v,in,AcceleratorRead);
|
||||||
|
int N = ncolour;
|
||||||
|
int NNm1 = N * (N - 1);
|
||||||
|
int hNNm1= NNm1/2;
|
||||||
|
RealD sqrt_2 = sqrt(2.0);
|
||||||
|
Complex ci(0.0,1.0);
|
||||||
|
for(int su2Index=0;su2Index<hNNm1;su2Index++){
|
||||||
|
int i1, i2;
|
||||||
|
su2SubGroupIndex(i1, i2, su2Index);
|
||||||
|
int ax = su2Index*2;
|
||||||
|
int ay = su2Index*2+1;
|
||||||
|
accelerator_for(ss,grid->oSites(),1,{
|
||||||
|
// in is traceless ANTI-hermitian whereas Grid generators are Hermitian.
|
||||||
|
// trace( Ta x Ci in)
|
||||||
|
// Bet I need to move to real part with mult by -i
|
||||||
|
out_v[ss]()()(ax,b) = 0.5*(real(in_v[ss]()()(i2,i1)) - real(in_v[ss]()()(i1,i2)));
|
||||||
|
out_v[ss]()()(ay,b) = 0.5*(imag(in_v[ss]()()(i1,i2)) + imag(in_v[ss]()()(i2,i1)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for(int diagIndex=0;diagIndex<N-1;diagIndex++){
|
||||||
|
int k = diagIndex + 1; // diagIndex starts from 0
|
||||||
|
int a = NNm1+diagIndex;
|
||||||
|
RealD scale = 1.0/sqrt(2.0*k*(k+1));
|
||||||
|
accelerator_for(ss,grid->oSites(),vComplex::Nsimd(),{
|
||||||
|
auto tmp = in_v[ss]()()(0,0);
|
||||||
|
for(int i=1;i<k;i++){
|
||||||
|
tmp=tmp+in_v[ss]()()(i,i);
|
||||||
|
}
|
||||||
|
tmp = tmp - in_v[ss]()()(k,k)*k;
|
||||||
|
out_v[ss]()()(a,b) =imag(tmp) * scale;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int ncolour>
|
template <int ncolour>
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
// doesn't get found by the scripts/filelist during bootstrapping.
|
// doesn't get found by the scripts/filelist during bootstrapping.
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
template <ONLY_IF_SU>
|
template <ONLY_IF_SU>
|
||||||
static int su2subgroups(GroupName::SU) { return (ncolour * (ncolour - 1)) / 2; }
|
static int su2subgroups(GroupName::SU) { return (ncolour * (ncolour - 1)) / 2; }
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -576,3 +577,4 @@ static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeFie
|
|||||||
LieRandomize(pRNG,g,1.0);
|
LieRandomize(pRNG,g,1.0);
|
||||||
GaugeTransform<Gimpl>(Umu,g);
|
GaugeTransform<Gimpl>(Umu,g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,6 +69,35 @@ accelerator_inline auto trace(const iVector<vtype,N> &arg) -> iVector<decltype(t
|
|||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
////////////////////////////
|
||||||
|
// Fast path traceProduct
|
||||||
|
////////////////////////////
|
||||||
|
template<class S1 , class S2, IfNotGridTensor<S1> = 0, IfNotGridTensor<S2> = 0>
|
||||||
|
accelerator_inline auto traceProduct( const S1 &arg1,const S2 &arg2)
|
||||||
|
-> decltype(arg1*arg2)
|
||||||
|
{
|
||||||
|
return arg1*arg2;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vtype,class rtype,int N >
|
||||||
|
accelerator_inline auto traceProduct(const iMatrix<vtype,N> &arg1,const iMatrix<rtype,N> &arg2) -> iScalar<decltype(trace(arg1._internal[0][0]*arg2._internal[0][0]))>
|
||||||
|
{
|
||||||
|
iScalar<decltype( trace(arg1._internal[0][0]*arg2._internal[0][0] )) > ret;
|
||||||
|
zeroit(ret._internal);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
ret._internal=ret._internal+traceProduct(arg1._internal[i][j],arg2._internal[j][i]);
|
||||||
|
}}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vtype,class rtype >
|
||||||
|
accelerator_inline auto traceProduct(const iScalar<vtype> &arg1,const iScalar<rtype> &arg2) -> iScalar<decltype(trace(arg1._internal*arg2._internal))>
|
||||||
|
{
|
||||||
|
iScalar<decltype(trace(arg1._internal*arg2._internal))> ret;
|
||||||
|
ret._internal=traceProduct(arg1._internal,arg2._internal);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -34,9 +34,12 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
// These are the Grid tensors
|
// These are the Grid tensors
|
||||||
template<typename T> struct isGridTensor : public std::false_type { static constexpr bool notvalue = true; };
|
template<typename T> struct isGridTensor : public std::false_type { static constexpr bool notvalue = true; };
|
||||||
template<class T> struct isGridTensor<iScalar<T>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T> struct isGridTensor<iScalar<T> > : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T, int N> struct isGridTensor<iVector<T, N> >: public std::true_type { static constexpr bool notvalue = false; };
|
||||||
template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T, int N> struct isGridTensor<iMatrix<T, N> >: public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
|
||||||
|
template <typename T> using IfGridTensor = Invoke<std::enable_if<isGridTensor<T>::value, int> >;
|
||||||
|
template <typename T> using IfNotGridTensor = Invoke<std::enable_if<!isGridTensor<T>::value, int> >;
|
||||||
|
|
||||||
// Traits to identify scalars
|
// Traits to identify scalars
|
||||||
template<typename T> struct isGridScalar : public std::false_type { static constexpr bool notvalue = true; };
|
template<typename T> struct isGridScalar : public std::false_type { static constexpr bool notvalue = true; };
|
||||||
|
@ -147,7 +147,7 @@ void acceleratorInit(void)
|
|||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||||
|
|
||||||
hipGetDeviceProperties(&gpu_props[i], i);
|
auto r=hipGetDeviceProperties(&gpu_props[i], i);
|
||||||
hipDeviceProp_t prop;
|
hipDeviceProp_t prop;
|
||||||
prop = gpu_props[i];
|
prop = gpu_props[i];
|
||||||
totalDeviceMem = prop.totalGlobalMem;
|
totalDeviceMem = prop.totalGlobalMem;
|
||||||
|
@ -405,7 +405,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
|||||||
|
|
||||||
#define accelerator_barrier(dummy) \
|
#define accelerator_barrier(dummy) \
|
||||||
{ \
|
{ \
|
||||||
hipStreamSynchronize(computeStream); \
|
auto r=hipStreamSynchronize(computeStream); \
|
||||||
auto err = hipGetLastError(); \
|
auto err = hipGetLastError(); \
|
||||||
if ( err != hipSuccess ) { \
|
if ( err != hipSuccess ) { \
|
||||||
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
||||||
@ -438,19 +438,19 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
|||||||
return ptr;
|
return ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeShared(void *ptr){ auto r=hipFree(ptr);};
|
||||||
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ auto r=hipFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { auto r=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto r=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||||
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
||||||
//inline void acceleratorCopySynchronise(void) { }
|
//inline void acceleratorCopySynchronise(void) { }
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto r=hipMemset(base,value,bytes);}
|
||||||
|
|
||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||||
{
|
{
|
||||||
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
auto r=hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||||
}
|
}
|
||||||
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
inline void acceleratorCopySynchronise(void) { auto r=hipStreamSynchronize(copyStream); };
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -575,4 +575,11 @@ accelerator_inline void acceleratorFence(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
||||||
|
{
|
||||||
|
acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
|
||||||
|
acceleratorCopySynchronise();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -54,15 +54,16 @@ int main(int argc, char **argv)
|
|||||||
// MD.name = std::string("Force Gradient");
|
// MD.name = std::string("Force Gradient");
|
||||||
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
MD.name = std::string("MinimumNorm2");
|
MD.name = std::string("MinimumNorm2");
|
||||||
MD.MDsteps = 12;
|
MD.MDsteps = 24;
|
||||||
MD.trajL = 1.0;
|
MD.trajL = 1.0;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
HMCparams.StartTrajectory = 0;
|
HMCparams.StartTrajectory = 104;
|
||||||
HMCparams.Trajectories = 200;
|
HMCparams.Trajectories = 200;
|
||||||
HMCparams.NoMetropolisUntil= 20;
|
HMCparams.NoMetropolisUntil= 20;
|
||||||
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
HMCparams.StartingType =std::string("HotStart");
|
// HMCparams.StartingType =std::string("HotStart");
|
||||||
|
HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
HMCparams.MD = MD;
|
HMCparams.MD = MD;
|
||||||
HMCWrapper TheHMC(HMCparams);
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
@ -87,6 +88,7 @@ int main(int argc, char **argv)
|
|||||||
// here there is too much indirection
|
// here there is too much indirection
|
||||||
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
||||||
TheHMC.Resources.AddObservable<PlaqObs>();
|
TheHMC.Resources.AddObservable<PlaqObs>();
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
const int Ls = 16;
|
const int Ls = 16;
|
||||||
@ -134,7 +136,6 @@ int main(int argc, char **argv)
|
|||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
ActionLevel<HMCWrapper::Field> Level1(1);
|
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
ActionLevel<HMCWrapper::Field> Level2(2);
|
ActionLevel<HMCWrapper::Field> Level2(2);
|
||||||
ActionLevel<HMCWrapper::Field> Level3(4);
|
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Strange action
|
// Strange action
|
||||||
@ -191,7 +192,7 @@ int main(int argc, char **argv)
|
|||||||
Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
|
Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
|
||||||
SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
|
SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
|
||||||
JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
|
JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
|
||||||
if( ApplySmearing ) Level2.push_back(&Jacobian);
|
if( ApplySmearing ) Level1.push_back(&Jacobian);
|
||||||
std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
|
std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
@ -200,7 +201,7 @@ int main(int argc, char **argv)
|
|||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// GaugeAction.is_smeared = ApplySmearing;
|
// GaugeAction.is_smeared = ApplySmearing;
|
||||||
GaugeAction.is_smeared = true;
|
GaugeAction.is_smeared = true;
|
||||||
Level3.push_back(&GaugeAction);
|
Level2.push_back(&GaugeAction);
|
||||||
|
|
||||||
std::cout << GridLogMessage << " ************************************************"<< std::endl;
|
std::cout << GridLogMessage << " ************************************************"<< std::endl;
|
||||||
std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
|
std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
|
||||||
@ -210,10 +211,11 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
|
std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
|
||||||
|
|
||||||
TheHMC.TheAction.push_back(Level1);
|
TheHMC.TheAction.push_back(Level1);
|
||||||
TheHMC.TheAction.push_back(Level2);
|
TheHMC.TheAction.push_back(Level2);
|
||||||
TheHMC.TheAction.push_back(Level3);
|
|
||||||
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
|
||||||
TheHMC.Run(SmearingPolicy); // for smearing
|
TheHMC.Run(SmearingPolicy); // for smearing
|
||||||
|
|
||||||
|
226
HMC/FTHMC2p1f_3GeV.cc
Normal file
226
HMC/FTHMC2p1f_3GeV.cc
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Copyright (C) 2023
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
|
||||||
|
#include <Grid/qcd/smearing/JacobianAction.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(12);
|
||||||
|
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
// here make a routine to print all the relevant information on the run
|
||||||
|
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||||
|
|
||||||
|
// Typedefs to simplify notation
|
||||||
|
typedef WilsonImplR FermionImplPolicy;
|
||||||
|
typedef MobiusFermionD FermionAction;
|
||||||
|
typedef typename FermionAction::FermionField FermionField;
|
||||||
|
|
||||||
|
typedef Grid::XmlReader Serialiser;
|
||||||
|
|
||||||
|
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
IntegratorParameters MD;
|
||||||
|
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||||
|
// MD.name = std::string("Leap Frog");
|
||||||
|
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||||
|
// MD.name = std::string("Force Gradient");
|
||||||
|
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
|
MD.name = std::string("MinimumNorm2");
|
||||||
|
MD.MDsteps = 24;
|
||||||
|
MD.trajL = 1.0;
|
||||||
|
|
||||||
|
HMCparameters HMCparams;
|
||||||
|
HMCparams.StartTrajectory = 0;
|
||||||
|
HMCparams.Trajectories = 200;
|
||||||
|
HMCparams.NoMetropolisUntil= 20;
|
||||||
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
|
// HMCparams.StartingType =std::string("HotStart");
|
||||||
|
HMCparams.StartingType =std::string("ColdStart");
|
||||||
|
// HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
|
HMCparams.MD = MD;
|
||||||
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
|
// Grid from the command line arguments --grid and --mpi
|
||||||
|
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||||
|
|
||||||
|
CheckpointerParameters CPparams;
|
||||||
|
CPparams.config_prefix = "ckpoint_EODWF_lat";
|
||||||
|
CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr";
|
||||||
|
CPparams.rng_prefix = "ckpoint_EODWF_rng";
|
||||||
|
CPparams.saveInterval = 1;
|
||||||
|
CPparams.saveSmeared = true;
|
||||||
|
CPparams.format = "IEEE64BIG";
|
||||||
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
|
|
||||||
|
RNGModuleParameters RNGpar;
|
||||||
|
RNGpar.serial_seeds = "1 2 3 4 5";
|
||||||
|
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||||
|
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||||
|
|
||||||
|
// Construct observables
|
||||||
|
// here there is too much indirection
|
||||||
|
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
||||||
|
TheHMC.Resources.AddObservable<PlaqObs>();
|
||||||
|
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
const int Ls = 12;
|
||||||
|
Real beta = 2.37;
|
||||||
|
Real light_mass = 0.0047;
|
||||||
|
Real strange_mass = 0.0186;
|
||||||
|
Real pv_mass = 1.0;
|
||||||
|
RealD M5 = 1.8;
|
||||||
|
RealD b = 1.0; // Scale factor one, Shamir
|
||||||
|
RealD c = 0.0;
|
||||||
|
|
||||||
|
OneFlavourRationalParams OFRp;
|
||||||
|
OFRp.lo = 1.0e-2;
|
||||||
|
OFRp.hi = 64;
|
||||||
|
OFRp.MaxIter = 10000;
|
||||||
|
OFRp.tolerance= 1.0e-10;
|
||||||
|
OFRp.degree = 14;
|
||||||
|
OFRp.precision= 40;
|
||||||
|
|
||||||
|
std::vector<Real> hasenbusch({ 0.05, 0.1, 0.25, 0.5 });
|
||||||
|
|
||||||
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
|
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
||||||
|
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
|
||||||
|
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
|
||||||
|
|
||||||
|
IwasakiGaugeActionR GaugeAction(beta);
|
||||||
|
|
||||||
|
// temporarily need a gauge field
|
||||||
|
LatticeGaugeField U(GridPtr);
|
||||||
|
LatticeGaugeField Uhot(GridPtr);
|
||||||
|
|
||||||
|
// These lines are unecessary if BC are all periodic
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionAction::ImplParams Params(boundary);
|
||||||
|
|
||||||
|
double StoppingCondition = 1e-10;
|
||||||
|
double MaxCGIterations = 30000;
|
||||||
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
|
|
||||||
|
bool ApplySmearing = true;
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Collect actions
|
||||||
|
////////////////////////////////////
|
||||||
|
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level2(2);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Strange action
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
|
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
|
||||||
|
MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
|
||||||
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
||||||
|
EOFA(Strange_Op_L, Strange_Op_R,
|
||||||
|
CG,
|
||||||
|
CG, CG,
|
||||||
|
CG, CG,
|
||||||
|
OFRp, false);
|
||||||
|
|
||||||
|
EOFA.is_smeared = ApplySmearing;
|
||||||
|
Level1.push_back(&EOFA);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// up down action
|
||||||
|
////////////////////////////////////
|
||||||
|
std::vector<Real> light_den;
|
||||||
|
std::vector<Real> light_num;
|
||||||
|
|
||||||
|
int n_hasenbusch = hasenbusch.size();
|
||||||
|
light_den.push_back(light_mass);
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_den.push_back(hasenbusch[h]);
|
||||||
|
light_num.push_back(hasenbusch[h]);
|
||||||
|
}
|
||||||
|
light_num.push_back(pv_mass);
|
||||||
|
|
||||||
|
std::vector<FermionAction *> Numerators;
|
||||||
|
std::vector<FermionAction *> Denominators;
|
||||||
|
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch+1;h++){
|
||||||
|
std::cout << GridLogMessage << " 2f quotient Action "<< light_num[h] << " / " << light_den[h]<< std::endl;
|
||||||
|
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
|
||||||
|
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
|
||||||
|
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch+1;h++){
|
||||||
|
Quotients[h]->is_smeared = ApplySmearing;
|
||||||
|
Level1.push_back(Quotients[h]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// lnDetJacobianAction
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
double rho = 0.1; // smearing parameter
|
||||||
|
int Nsmear = 1; // number of smearing levels - must be multiple of 2Nd
|
||||||
|
int Nstep = 8*Nsmear; // number of smearing levels - must be multiple of 2Nd
|
||||||
|
Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
|
||||||
|
SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
|
||||||
|
JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
|
||||||
|
if( ApplySmearing ) Level1.push_back(&Jacobian);
|
||||||
|
std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Gauge action
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
GaugeAction.is_smeared = ApplySmearing;
|
||||||
|
Level2.push_back(&GaugeAction);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " ************************************************"<< std::endl;
|
||||||
|
std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
|
||||||
|
std::cout << GridLogMessage << " ************************************************"<< std::endl;
|
||||||
|
std::cout << GridLogMessage << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
|
||||||
|
TheHMC.TheAction.push_back(Level1);
|
||||||
|
TheHMC.TheAction.push_back(Level2);
|
||||||
|
|
||||||
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
|
||||||
|
TheHMC.Run(SmearingPolicy); // for smearing
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
} // main
|
||||||
|
|
||||||
|
|
||||||
|
|
350
HMC/Mobius2p1f_DD_EOFA_96I_double.cc
Normal file
350
HMC/Mobius2p1f_DD_EOFA_96I_double.cc
Normal file
@ -0,0 +1,350 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_hmc_EODWFRatio.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015-2016
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
CartesianCommunicator::BarrierWorld();
|
||||||
|
std::cout << GridLogMessage << " Clock skew check" <<std::endl;
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
|
// Typedefs to simplify notation
|
||||||
|
typedef WilsonImplD FermionImplPolicy;
|
||||||
|
typedef MobiusFermionD FermionAction;
|
||||||
|
typedef MobiusEOFAFermionD FermionEOFAAction;
|
||||||
|
typedef typename FermionAction::FermionField FermionField;
|
||||||
|
|
||||||
|
typedef Grid::XmlReader Serialiser;
|
||||||
|
|
||||||
|
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
IntegratorParameters MD;
|
||||||
|
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||||
|
// MD.name = std::string("Leap Frog");
|
||||||
|
typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||||
|
MD.name = std::string("Force Gradient");
|
||||||
|
//typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
|
// MD.name = std::string("MinimumNorm2");
|
||||||
|
// TrajL = 2
|
||||||
|
// 4/2 => 0.6 dH
|
||||||
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
|
//MD.MDsteps = 4;
|
||||||
|
MD.MDsteps = 3;
|
||||||
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
|
HMCparameters HMCparams;
|
||||||
|
HMCparams.StartTrajectory = 1077;
|
||||||
|
HMCparams.Trajectories = 1;
|
||||||
|
HMCparams.NoMetropolisUntil= 0;
|
||||||
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
|
// HMCparams.StartingType =std::string("ColdStart");
|
||||||
|
HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
|
HMCparams.MD = MD;
|
||||||
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
|
// Grid from the command line arguments --grid and --mpi
|
||||||
|
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||||
|
|
||||||
|
CheckpointerParameters CPparams;
|
||||||
|
CPparams.config_prefix = "ckpoint_DDHMC_lat";
|
||||||
|
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
|
||||||
|
CPparams.saveInterval = 1;
|
||||||
|
CPparams.format = "IEEE64BIG";
|
||||||
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
|
std::cout << "loaded NERSC checpointer"<<std::endl;
|
||||||
|
RNGModuleParameters RNGpar;
|
||||||
|
RNGpar.serial_seeds = "1 2 3 4 5";
|
||||||
|
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||||
|
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||||
|
|
||||||
|
// Construct observables
|
||||||
|
// here there is too much indirection
|
||||||
|
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
||||||
|
TheHMC.Resources.AddObservable<PlaqObs>();
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
const int Ls = 12;
|
||||||
|
RealD M5 = 1.8;
|
||||||
|
RealD b = 1.5;
|
||||||
|
RealD c = 0.5;
|
||||||
|
Real beta = 2.13;
|
||||||
|
// Real light_mass = 5.4e-4;
|
||||||
|
Real light_mass = 7.8e-4;
|
||||||
|
Real light_mass_dir = 0.01;
|
||||||
|
Real strange_mass = 0.0362;
|
||||||
|
Real pv_mass = 1.0;
|
||||||
|
std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
|
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
|
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||||
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
|
int SP_iters=9000;
|
||||||
|
|
||||||
|
RationalActionParams OFRp; // Up/down
|
||||||
|
OFRp.lo = 6.0e-5;
|
||||||
|
OFRp.hi = 90.0;
|
||||||
|
OFRp.inv_pow = 2;
|
||||||
|
OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space
|
||||||
|
OFRp.action_tolerance= 1.0e-8;
|
||||||
|
OFRp.action_degree = 18;
|
||||||
|
OFRp.md_tolerance= 1.0e-7;
|
||||||
|
OFRp.md_degree = 14;
|
||||||
|
// OFRp.degree = 20; converges
|
||||||
|
// OFRp.degree = 16;
|
||||||
|
OFRp.precision= 80;
|
||||||
|
OFRp.BoundsCheckFreq=0;
|
||||||
|
std::vector<RealD> ActionTolByPole({
|
||||||
|
// 1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
3.0e-7,1.0e-7,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8
|
||||||
|
});
|
||||||
|
std::vector<RealD> MDTolByPole({
|
||||||
|
// 1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more
|
||||||
|
// 1.0e-6,3.0e-7,1.0e-7,1.0e-7,
|
||||||
|
1.0e-5,1.0e-6,1.0e-7,1.0e-7, // soften convergence
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8
|
||||||
|
});
|
||||||
|
|
||||||
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
|
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
||||||
|
|
||||||
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
|
typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Domain decomposed
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
Coordinate latt4 = GridPtr->GlobalDimensions();
|
||||||
|
Coordinate mpi = GridPtr->ProcessorGrid();
|
||||||
|
Coordinate shm;
|
||||||
|
|
||||||
|
GlobalSharedMemory::GetShmDims(mpi,shm);
|
||||||
|
|
||||||
|
Coordinate CommDim(Nd);
|
||||||
|
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
|
||||||
|
|
||||||
|
Coordinate NonDirichlet(Nd+1,0);
|
||||||
|
Coordinate Dirichlet(Nd+1,0);
|
||||||
|
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
|
||||||
|
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
|
||||||
|
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
|
||||||
|
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
|
||||||
|
//Dirichlet[1] = 0;
|
||||||
|
//Dirichlet[2] = 0;
|
||||||
|
//Dirichlet[3] = 0;
|
||||||
|
|
||||||
|
//
|
||||||
|
Coordinate Block4(Nd);
|
||||||
|
Block4[0] = Dirichlet[1];
|
||||||
|
Block4[1] = Dirichlet[2];
|
||||||
|
Block4[2] = Dirichlet[3];
|
||||||
|
Block4[3] = Dirichlet[4];
|
||||||
|
|
||||||
|
int Width=4;
|
||||||
|
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplD::Field>(Block4,Width));
|
||||||
|
|
||||||
|
//////////////////////////
|
||||||
|
// Fermion Grids
|
||||||
|
//////////////////////////
|
||||||
|
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
|
||||||
|
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
|
||||||
|
|
||||||
|
IwasakiGaugeActionR GaugeAction(beta);
|
||||||
|
|
||||||
|
// temporarily need a gauge field
|
||||||
|
LatticeGaugeFieldD U(GridPtr); U=Zero();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||||
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
std::cout << "loaded NERSC gauge field"<<std::endl;
|
||||||
|
|
||||||
|
// These lines are unecessary if BC are all periodic
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionAction::ImplParams Params(boundary);
|
||||||
|
FermionAction::ImplParams ParamsDir(boundary);
|
||||||
|
|
||||||
|
Params.dirichlet=NonDirichlet;
|
||||||
|
ParamsDir.dirichlet=Dirichlet;
|
||||||
|
ParamsDir.partialDirichlet=0;
|
||||||
|
std::cout << GridLogMessage<< "Partial Dirichlet depth is "<<dwf_compressor_depth<<std::endl;
|
||||||
|
|
||||||
|
// double StoppingCondition = 1e-14;
|
||||||
|
// double MDStoppingCondition = 1e-9;
|
||||||
|
double StoppingCondition = 1e-8;
|
||||||
|
double MDStoppingCondition = 1e-8;
|
||||||
|
double MDStoppingConditionLoose = 1e-8;
|
||||||
|
double MDStoppingConditionStrange = 1e-8;
|
||||||
|
double MaxCGIterations = 300000;
|
||||||
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
|
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Collect actions
|
||||||
|
////////////////////////////////////
|
||||||
|
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level2(3);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level3(15);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Strange action
|
||||||
|
////////////////////////////////////
|
||||||
|
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
|
||||||
|
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
|
||||||
|
|
||||||
|
// Probably dominates the force - back to EOFA.
|
||||||
|
OneFlavourRationalParams SFRp;
|
||||||
|
SFRp.lo = 0.1;
|
||||||
|
SFRp.hi = 25.0;
|
||||||
|
SFRp.MaxIter = 10000;
|
||||||
|
SFRp.tolerance= 1.0e-8;
|
||||||
|
SFRp.mdtolerance= 2.0e-6;
|
||||||
|
SFRp.degree = 12;
|
||||||
|
SFRp.precision= 50;
|
||||||
|
|
||||||
|
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
|
||||||
|
MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
|
||||||
|
ConjugateGradient<FermionField> ActionCG(StoppingCondition,MaxCGIterations);
|
||||||
|
ConjugateGradient<FermionField> DerivativeCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
|
||||||
|
LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
|
||||||
|
|
||||||
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
||||||
|
EOFA(Strange_Op_L, Strange_Op_R,
|
||||||
|
ActionCG,
|
||||||
|
ActionCG, ActionCG,
|
||||||
|
DerivativeCG, DerivativeCG,
|
||||||
|
SFRp, true);
|
||||||
|
Level2.push_back(&EOFA);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// up down action
|
||||||
|
////////////////////////////////////
|
||||||
|
std::vector<Real> light_den;
|
||||||
|
std::vector<Real> light_num;
|
||||||
|
std::vector<int> dirichlet_den;
|
||||||
|
std::vector<int> dirichlet_num;
|
||||||
|
|
||||||
|
int n_hasenbusch = hasenbusch.size();
|
||||||
|
light_den.push_back(light_mass); dirichlet_den.push_back(0);
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
|
||||||
|
}
|
||||||
|
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
|
||||||
|
|
||||||
|
std::vector<FermionAction *> Numerators;
|
||||||
|
std::vector<FermionAction *> Denominators;
|
||||||
|
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
||||||
|
|
||||||
|
std::vector<GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
||||||
|
|
||||||
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
|
std::vector<LinearOperatorD *> LinOpD;
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch+1;h++){
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< " 2f quotient Action ";
|
||||||
|
std::cout << "det D("<<light_den[h]<<")";
|
||||||
|
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
|
||||||
|
std::cout << "/ det D("<<light_num[h]<<")";
|
||||||
|
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
FermionAction::ImplParams ParamsNum(boundary);
|
||||||
|
FermionAction::ImplParams ParamsDen(boundary);
|
||||||
|
|
||||||
|
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
|
||||||
|
else ParamsNum.dirichlet = NonDirichlet;
|
||||||
|
|
||||||
|
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
|
||||||
|
else ParamsDen.dirichlet = NonDirichlet;
|
||||||
|
|
||||||
|
if ( dirichlet_num[h]==1) ParamsNum.partialDirichlet = 1;
|
||||||
|
else ParamsNum.partialDirichlet = 0;
|
||||||
|
|
||||||
|
if ( dirichlet_den[h]==1) ParamsDen.partialDirichlet = 1;
|
||||||
|
else ParamsDen.partialDirichlet = 0;
|
||||||
|
|
||||||
|
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
|
||||||
|
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
|
||||||
|
|
||||||
|
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
|
||||||
|
|
||||||
|
double conv = MDStoppingCondition;
|
||||||
|
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
|
||||||
|
|
||||||
|
if(h!=0) {
|
||||||
|
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
|
||||||
|
} else {
|
||||||
|
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
|
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int h=0;h<Bdys.size();h++){
|
||||||
|
Bdys[h]->SetTolerances(ActionTolByPole,MDTolByPole);
|
||||||
|
}
|
||||||
|
int nquo=Quotients.size();
|
||||||
|
Level1.push_back(Bdys[0]);
|
||||||
|
Level1.push_back(Bdys[1]);
|
||||||
|
Level2.push_back(Quotients[0]);
|
||||||
|
for(int h=1;h<nquo-1;h++){
|
||||||
|
Level2.push_back(Quotients[h]);
|
||||||
|
}
|
||||||
|
Level2.push_back(Quotients[nquo-1]);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Gauge action
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
Level3.push_back(&GaugeAction);
|
||||||
|
TheHMC.TheAction.push_back(Level1);
|
||||||
|
TheHMC.TheAction.push_back(Level2);
|
||||||
|
TheHMC.TheAction.push_back(Level3);
|
||||||
|
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
TheHMC.Run(); // no smearing
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
} // main
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -343,7 +343,7 @@ int main(int argc, char **argv) {
|
|||||||
// Probably dominates the force - back to EOFA.
|
// Probably dominates the force - back to EOFA.
|
||||||
OneFlavourRationalParams SFRp;
|
OneFlavourRationalParams SFRp;
|
||||||
SFRp.lo = 0.1;
|
SFRp.lo = 0.1;
|
||||||
SFRp.hi = 25.0;
|
SFRp.hi = 30.0;
|
||||||
SFRp.MaxIter = 10000;
|
SFRp.MaxIter = 10000;
|
||||||
SFRp.tolerance= 1.0e-5;
|
SFRp.tolerance= 1.0e-5;
|
||||||
SFRp.mdtolerance= 2.0e-4;
|
SFRp.mdtolerance= 2.0e-4;
|
||||||
|
@ -128,7 +128,7 @@ template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, c
|
|||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Make a mixed precision conjugate gradient
|
// Make a mixed precision conjugate gradient
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
#if 1
|
#if 0
|
||||||
RealD delta=1.e-4;
|
RealD delta=1.e-4;
|
||||||
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
|
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
|
||||||
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
|
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
|
||||||
@ -180,7 +180,7 @@ int main(int argc, char **argv) {
|
|||||||
// 4/2 => 0.6 dH
|
// 4/2 => 0.6 dH
|
||||||
// 3/3 => 0.8 dH .. depth 3, slower
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
//MD.MDsteps = 4;
|
//MD.MDsteps = 4;
|
||||||
MD.MDsteps = 14;
|
MD.MDsteps = 12;
|
||||||
MD.trajL = 0.5;
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
@ -204,7 +204,7 @@ int main(int argc, char **argv) {
|
|||||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
std::cout << "loaded NERSC checpointer"<<std::endl;
|
std::cout << "loaded NERSC checpointer"<<std::endl;
|
||||||
RNGModuleParameters RNGpar;
|
RNGModuleParameters RNGpar;
|
||||||
RNGpar.serial_seeds = "1 2 3 4 5";
|
RNGpar.serial_seeds = "1 2 3 4 5 6 7 8 9 10";
|
||||||
RNGpar.parallel_seeds = "6 7 8 9 10";
|
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||||
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||||
|
|
||||||
@ -218,15 +218,14 @@ int main(int argc, char **argv) {
|
|||||||
RealD M5 = 1.8;
|
RealD M5 = 1.8;
|
||||||
RealD b = 1.5;
|
RealD b = 1.5;
|
||||||
RealD c = 0.5;
|
RealD c = 0.5;
|
||||||
Real beta = 2.13;
|
RealD beta = 2.13;
|
||||||
// Real light_mass = 5.4e-4;
|
// Real light_mass = 5.4e-4;
|
||||||
Real light_mass = 7.8e-4;
|
Real light_mass = 7.8e-4;
|
||||||
|
// Real light_mass = 7.8e-3;
|
||||||
Real strange_mass = 0.0362;
|
Real strange_mass = 0.0362;
|
||||||
Real pv_mass = 1.0;
|
Real pv_mass = 1.0;
|
||||||
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.35 , 0.51, 0.6, 0.8 }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
//std::vector<Real> hasenbusch({ 0.0145, 0.045, 0.108, 0.25, 0.35 , 0.51, 0.6, 0.8 }); // Updated
|
||||||
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
|
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
|
||||||
|
|
||||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
||||||
@ -277,20 +276,20 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
// double StoppingCondition = 1e-14;
|
// double StoppingCondition = 1e-14;
|
||||||
// double MDStoppingCondition = 1e-9;
|
// double MDStoppingCondition = 1e-9;
|
||||||
double StoppingCondition = 1e-9;
|
double StoppingCondition = 1e-14;
|
||||||
double MDStoppingCondition = 1e-8;
|
double MDStoppingCondition = 1e-9;
|
||||||
double MDStoppingConditionLoose = 1e-8;
|
double MDStoppingConditionLoose = 1e-9;
|
||||||
double MDStoppingConditionStrange = 1e-8;
|
double MDStoppingConditionStrange = 1e-9;
|
||||||
double MaxCGIterations = 300000;
|
double MaxCGIterations = 50000;
|
||||||
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Collect actions
|
// Collect actions
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// ActionLevel<HMCWrapper::Field> Level1(1);
|
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
ActionLevel<HMCWrapper::Field> Level2(1);
|
ActionLevel<HMCWrapper::Field> Level2(2);
|
||||||
ActionLevel<HMCWrapper::Field> Level3(15);
|
ActionLevel<HMCWrapper::Field> Level3(4);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Strange action
|
// Strange action
|
||||||
@ -300,11 +299,11 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
// Probably dominates the force - back to EOFA.
|
// Probably dominates the force - back to EOFA.
|
||||||
OneFlavourRationalParams SFRp;
|
OneFlavourRationalParams SFRp;
|
||||||
SFRp.lo = 0.1;
|
SFRp.lo = 0.8;
|
||||||
SFRp.hi = 30.0;
|
SFRp.hi = 30.0;
|
||||||
SFRp.MaxIter = 10000;
|
SFRp.MaxIter = 10000;
|
||||||
SFRp.tolerance= 1.0e-8;
|
SFRp.tolerance= 1.0e-12;
|
||||||
SFRp.mdtolerance= 2.0e-6;
|
SFRp.mdtolerance= 1.0e-9;
|
||||||
SFRp.degree = 10;
|
SFRp.degree = 10;
|
||||||
SFRp.precision= 50;
|
SFRp.precision= 50;
|
||||||
|
|
||||||
@ -355,8 +354,10 @@ int main(int argc, char **argv) {
|
|||||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
||||||
EOFA(Strange_Op_L, Strange_Op_R,
|
EOFA(Strange_Op_L, Strange_Op_R,
|
||||||
ActionCG,
|
ActionCG,
|
||||||
ActionCGL, ActionCGR,
|
// ActionCGL, ActionCGR,
|
||||||
DerivativeCGL, DerivativeCGR,
|
// DerivativeCGL, DerivativeCGR,
|
||||||
|
ActionCG, ActionCG,
|
||||||
|
DerivativeCG, DerivativeCG,
|
||||||
SFRp, true);
|
SFRp, true);
|
||||||
Level2.push_back(&EOFA);
|
Level2.push_back(&EOFA);
|
||||||
|
|
||||||
@ -443,13 +444,14 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
int nquo=Quotients.size();
|
int nquo=Quotients.size();
|
||||||
for(int h=0;h<nquo;h++){
|
for(int h=0;h<nquo;h++){
|
||||||
Level2.push_back(Quotients[h]);
|
Level1.push_back(Quotients[h]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Gauge action
|
// Gauge action
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
Level3.push_back(&GaugeAction);
|
Level3.push_back(&GaugeAction);
|
||||||
|
TheHMC.TheAction.push_back(Level1);
|
||||||
TheHMC.TheAction.push_back(Level2);
|
TheHMC.TheAction.push_back(Level2);
|
||||||
TheHMC.TheAction.push_back(Level3);
|
TheHMC.TheAction.push_back(Level3);
|
||||||
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||||
|
268
HMC/Mobius2p1f_EOFA_96I_hmc_double.cc
Normal file
268
HMC/Mobius2p1f_EOFA_96I_hmc_double.cc
Normal file
@ -0,0 +1,268 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_hmc_EODWFRatio.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015-2016
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
std::cout << " Grid Initialise "<<std::endl;
|
||||||
|
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
CartesianCommunicator::BarrierWorld();
|
||||||
|
std::cout << GridLogMessage << " Clock skew check" <<std::endl;
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
|
// Typedefs to simplify notation
|
||||||
|
typedef WilsonImplD FermionImplPolicy;
|
||||||
|
typedef MobiusFermionD FermionAction;
|
||||||
|
typedef MobiusEOFAFermionD FermionEOFAAction;
|
||||||
|
typedef typename FermionAction::FermionField FermionField;
|
||||||
|
|
||||||
|
typedef WilsonImplF FermionImplPolicyF;
|
||||||
|
typedef MobiusFermionF FermionActionF;
|
||||||
|
typedef MobiusEOFAFermionF FermionEOFAActionF;
|
||||||
|
typedef typename FermionActionF::FermionField FermionFieldF;
|
||||||
|
|
||||||
|
typedef Grid::XmlReader Serialiser;
|
||||||
|
|
||||||
|
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
IntegratorParameters MD;
|
||||||
|
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||||
|
// MD.name = std::string("Leap Frog");
|
||||||
|
typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||||
|
MD.name = std::string("Force Gradient");
|
||||||
|
// typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
|
// MD.name = std::string("MinimumNorm2");
|
||||||
|
// TrajL = 2
|
||||||
|
// 4/2 => 0.6 dH
|
||||||
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
|
//MD.MDsteps = 4;
|
||||||
|
MD.MDsteps = 8;
|
||||||
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
|
HMCparameters HMCparams;
|
||||||
|
HMCparams.StartTrajectory = 1077;
|
||||||
|
HMCparams.Trajectories = 20;
|
||||||
|
HMCparams.NoMetropolisUntil= 0;
|
||||||
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
|
HMCparams.StartingType =std::string("ColdStart");
|
||||||
|
// HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
|
HMCparams.MD = MD;
|
||||||
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
|
// Grid from the command line arguments --grid and --mpi
|
||||||
|
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||||
|
|
||||||
|
CheckpointerParameters CPparams;
|
||||||
|
CPparams.config_prefix = "ckpoint_HMC_lat";
|
||||||
|
CPparams.rng_prefix = "ckpoint_HMC_rng";
|
||||||
|
CPparams.saveInterval = 1;
|
||||||
|
CPparams.format = "IEEE64BIG";
|
||||||
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
|
std::cout << "loaded NERSC checpointer"<<std::endl;
|
||||||
|
RNGModuleParameters RNGpar;
|
||||||
|
RNGpar.serial_seeds = "1 2 3 4 5 6 7 8 9 10";
|
||||||
|
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||||
|
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||||
|
|
||||||
|
// Construct observables
|
||||||
|
// here there is too much indirection
|
||||||
|
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
||||||
|
TheHMC.Resources.AddObservable<PlaqObs>();
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
const int Ls = 12;
|
||||||
|
RealD M5 = 1.8;
|
||||||
|
RealD b = 1.5;
|
||||||
|
RealD c = 0.5;
|
||||||
|
RealD beta = 2.13;
|
||||||
|
// Real light_mass = 5.4e-4;
|
||||||
|
Real light_mass = 7.8e-4;
|
||||||
|
// Real light_mass = 7.8e-3;
|
||||||
|
Real strange_mass = 0.0362;
|
||||||
|
Real pv_mass = 1.0;
|
||||||
|
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.35 , 0.51, 0.6, 0.8 }); // Updated
|
||||||
|
//std::vector<Real> hasenbusch({ 0.0145, 0.045, 0.108, 0.25, 0.35 , 0.51, 0.6, 0.8 }); // Updated
|
||||||
|
|
||||||
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
|
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
||||||
|
|
||||||
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
|
typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Domain decomposed
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
Coordinate latt4 = GridPtr->GlobalDimensions();
|
||||||
|
Coordinate mpi = GridPtr->ProcessorGrid();
|
||||||
|
Coordinate shm;
|
||||||
|
|
||||||
|
GlobalSharedMemory::GetShmDims(mpi,shm);
|
||||||
|
|
||||||
|
//////////////////////////
|
||||||
|
// Fermion Grids
|
||||||
|
//////////////////////////
|
||||||
|
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
|
||||||
|
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
|
||||||
|
|
||||||
|
IwasakiGaugeActionR GaugeAction(beta);
|
||||||
|
|
||||||
|
// temporarily need a gauge field
|
||||||
|
LatticeGaugeFieldD U(GridPtr); U=Zero();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||||
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
std::cout << "loaded NERSC gauge field"<<std::endl;
|
||||||
|
|
||||||
|
// These lines are unecessary if BC are all periodic
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionAction::ImplParams Params(boundary);
|
||||||
|
|
||||||
|
// double StoppingCondition = 1e-14;
|
||||||
|
// double MDStoppingCondition = 1e-9;
|
||||||
|
double StoppingCondition = 1e-14;
|
||||||
|
double MDStoppingCondition = 1e-9;
|
||||||
|
double MDStoppingConditionLoose = 1e-9;
|
||||||
|
double MDStoppingConditionStrange = 1e-9;
|
||||||
|
double MaxCGIterations = 50000;
|
||||||
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
|
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Collect actions
|
||||||
|
////////////////////////////////////
|
||||||
|
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level2(2);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level3(4);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Strange action
|
||||||
|
////////////////////////////////////
|
||||||
|
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
|
||||||
|
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
|
||||||
|
|
||||||
|
// Probably dominates the force - back to EOFA.
|
||||||
|
OneFlavourRationalParams SFRp;
|
||||||
|
SFRp.lo = 0.8;
|
||||||
|
SFRp.hi = 30.0;
|
||||||
|
SFRp.MaxIter = 10000;
|
||||||
|
SFRp.tolerance= 1.0e-12;
|
||||||
|
SFRp.mdtolerance= 1.0e-9;
|
||||||
|
SFRp.degree = 10;
|
||||||
|
SFRp.precision= 50;
|
||||||
|
|
||||||
|
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
|
||||||
|
MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
|
||||||
|
ConjugateGradient<FermionField> ActionCG(StoppingCondition,MaxCGIterations);
|
||||||
|
ConjugateGradient<FermionField> DerivativeCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
|
||||||
|
LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
|
||||||
|
|
||||||
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
||||||
|
EOFA(Strange_Op_L, Strange_Op_R,
|
||||||
|
ActionCG,
|
||||||
|
ActionCG, ActionCG,
|
||||||
|
DerivativeCG, DerivativeCG,
|
||||||
|
SFRp, true);
|
||||||
|
Level2.push_back(&EOFA);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// up down action
|
||||||
|
////////////////////////////////////
|
||||||
|
std::vector<Real> light_den;
|
||||||
|
std::vector<Real> light_num;
|
||||||
|
|
||||||
|
int n_hasenbusch = hasenbusch.size();
|
||||||
|
light_den.push_back(light_mass);
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_den.push_back(hasenbusch[h]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_num.push_back(hasenbusch[h]);
|
||||||
|
}
|
||||||
|
light_num.push_back(pv_mass);
|
||||||
|
|
||||||
|
std::vector<FermionAction *> Numerators;
|
||||||
|
std::vector<FermionAction *> Denominators;
|
||||||
|
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
||||||
|
|
||||||
|
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
||||||
|
|
||||||
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
|
std::vector<LinearOperatorD *> LinOpD;
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch+1;h++){
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< " 2f quotient Action ";
|
||||||
|
std::cout << "det D("<<light_den[h]<<")";
|
||||||
|
std::cout << "/ det D("<<light_num[h]<<")";
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
FermionAction::ImplParams ParamsNum(boundary);
|
||||||
|
FermionAction::ImplParams ParamsDen(boundary);
|
||||||
|
|
||||||
|
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
|
||||||
|
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
|
||||||
|
|
||||||
|
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
|
||||||
|
|
||||||
|
double conv = MDStoppingCondition;
|
||||||
|
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
|
||||||
|
|
||||||
|
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG,CG));
|
||||||
|
}
|
||||||
|
int nquo=Quotients.size();
|
||||||
|
for(int h=0;h<nquo;h++){
|
||||||
|
Level1.push_back(Quotients[h]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Gauge action
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
Level3.push_back(&GaugeAction);
|
||||||
|
TheHMC.TheAction.push_back(Level1);
|
||||||
|
TheHMC.TheAction.push_back(Level2);
|
||||||
|
TheHMC.TheAction.push_back(Level3);
|
||||||
|
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
TheHMC.Run(); // no smearing
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
} // main
|
||||||
|
|
||||||
|
|
||||||
|
|
57
systems/Lumi/HMC/32cube/fthmc3gev.slurm
Normal file
57
systems/Lumi/HMC/32cube/fthmc3gev.slurm
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#!/bin/bash -l
|
||||||
|
#SBATCH --job-name=fthmc3ge
|
||||||
|
#SBATCH --partition=small-g
|
||||||
|
#SBATCH --nodes=1
|
||||||
|
#SBATCH --ntasks-per-node=8
|
||||||
|
##SBATCH --cpus-per-task=8
|
||||||
|
#SBATCH --gpus-per-node=8
|
||||||
|
#SBATCH --time=2:00:00
|
||||||
|
#SBATCH --account=project_465000546
|
||||||
|
#SBATCH --gpu-bind=none
|
||||||
|
#SBATCH --exclusive
|
||||||
|
#SBATCH --mem=0
|
||||||
|
|
||||||
|
|
||||||
|
#sbatch --dependency=afterany:$SLURM_JOBID fthmc3gev.slurm
|
||||||
|
|
||||||
|
CPU_BIND="map_ldom:3,3,1,1,0,0,2,2"
|
||||||
|
MEM_BIND="map_mem:3,3,1,1,0,0,2,2"
|
||||||
|
echo $CPU_BIND
|
||||||
|
|
||||||
|
cat << EOF > ./select_gpu
|
||||||
|
#!/bin/bash
|
||||||
|
export GPU_MAP=(0 1 2 3 4 5 6 7)
|
||||||
|
export NUMA_MAP=(3 3 1 1 0 0 2 2)
|
||||||
|
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||||
|
export NUM=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||||
|
#export HIP_VISIBLE_DEVICES=\$GPU
|
||||||
|
export ROCR_VISIBLE_DEVICES=\$GPU
|
||||||
|
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||||
|
echo NUMA \$SLURM_LOCALID using NUMA \${NUM}
|
||||||
|
echo numactl -m \$NUM -N \$NUM \$*
|
||||||
|
exec numactl -m \$NUM -N \$NUM \$*
|
||||||
|
EOF
|
||||||
|
cat ./select_gpu
|
||||||
|
|
||||||
|
chmod +x ./select_gpu
|
||||||
|
|
||||||
|
root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
|
||||||
|
source ${root}/sourceme.sh
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=7
|
||||||
|
export MPICH_SMP_SINGLE_COPY_MODE=CMA
|
||||||
|
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||||
|
|
||||||
|
#cfg=`ls -rt ckpoint_*lat* | tail -n 1 `
|
||||||
|
#traj="${cfg#*.}"
|
||||||
|
#cfg=`ls -rt ckpoint_*lat* | tail -n 1 `
|
||||||
|
traj=0
|
||||||
|
|
||||||
|
vol=32.32.32.64
|
||||||
|
mpi=1.2.2.2
|
||||||
|
PARAMS="--mpi $mpi --accelerator-threads 16 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol"
|
||||||
|
#HMCPARAMS="--StartingType CheckpointStart --StartingTrajectory $traj --Trajectories 200"
|
||||||
|
HMCPARAMS="--StartingType ColdStart --StartingTrajectory $traj --Trajectories 20"
|
||||||
|
|
||||||
|
srun ./select_gpu ../FTHMC2p1f_3GeV $HMCPARAMS $PARAMS
|
||||||
|
|
@ -23,7 +23,7 @@ echo mpfr X$MPFR
|
|||||||
--disable-fermion-reps \
|
--disable-fermion-reps \
|
||||||
--disable-gparity \
|
--disable-gparity \
|
||||||
CXX=hipcc MPICXX=mpicxx \
|
CXX=hipcc MPICXX=mpicxx \
|
||||||
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
|
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++17 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
|
||||||
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
|
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user