diff --git a/Grid/algorithms/FFT.h b/Grid/algorithms/FFT.h index dc972537..329d1d46 100644 --- a/Grid/algorithms/FFT.h +++ b/Grid/algorithms/FFT.h @@ -191,7 +191,7 @@ public: Lattice pgbuf(&pencil_g); autoView(pgbuf_v , pgbuf, CpuWrite); - std::cout << "CPU view" << std::endl; + //std::cout << "CPU view" << std::endl; typedef typename FFTW::FFTW_scalar FFTW_scalar; typedef typename FFTW::FFTW_plan FFTW_plan; @@ -215,7 +215,7 @@ public: else if ( sign == forward ) div = 1.0; else assert(0); - std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl; + //std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl; FFTW_plan p; { FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0]; @@ -229,7 +229,7 @@ public: } // Barrel shift and collect global pencil - std::cout << GridLogPerformance<<"Making pencil" << std::endl; + //std::cout << GridLogPerformance<<"Making pencil" << std::endl; Coordinate lcoor(Nd), gcoor(Nd); result = source; int pc = processor_coor[dim]; @@ -251,7 +251,7 @@ public: } } - std::cout <::fftw_destroy_plan(p); #endif diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index 04cd6dcb..5c171ff2 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -277,6 +277,38 @@ public: assert(0); } }; +template +class ShiftedNonHermitianLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + RealD shift; +public: + ShiftedNonHermitianLinearOperator(Matrix &Mat,RealD shft): _Mat(Mat),shift(shft){}; + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + _Mat.Mdiag(in,out); + out = out + shift*in; + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + _Mat.Mdir(in,out,dir,disp); + } + void OpDirAll (const Field &in, std::vector &out){ + _Mat.MdirAll(in,out); + }; + void Op (const Field &in, Field &out){ + _Mat.M(in,out); + out = out + shift * in; + } + void AdjOp (const Field &in, Field &out){ + _Mat.Mdag(in,out); + out = out + shift * in; + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; ////////////////////////////////////////////////////////// // Even Odd Schur decomp operators; there are several diff --git a/Grid/algorithms/blas/BatchedBlas.h b/Grid/algorithms/blas/BatchedBlas.h index f4245319..3a7bbc44 100644 --- a/Grid/algorithms/blas/BatchedBlas.h +++ b/Grid/algorithms/blas/BatchedBlas.h @@ -208,8 +208,8 @@ public: assert(Bkn.size()==batchCount); assert(Cmn.size()==batchCount); - assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose - assert(OpB!=GridBLAS_OP_T); + //assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose + //assert(OpB!=GridBLAS_OP_T); int lda = m; // m x k column major int ldb = k; // k x n column major @@ -367,28 +367,67 @@ public: Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn ; + else + eCmn = alpha * eAmk * eBkn ; }); } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ; + else + eCmn = alpha * eAmk.adjoint() * eBkn ; + }); + } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) { + thread_for (p, batchCount, { + Eigen::Map eAmk(Amk[p],k,m); + Eigen::Map eBkn(Bkn[p],k,n); + Eigen::Map eCmn(Cmn[p],m,n); + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; + else + eCmn = alpha * eAmk.transpose() * eBkn ; }); } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ; + else + eCmn = alpha * eAmk * eBkn.adjoint() ; + }); + } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) { + thread_for (p, batchCount, { + Eigen::Map eAmk(Amk[p],m,k); + Eigen::Map eBkn(Bkn[p],n,k); + Eigen::Map eCmn(Cmn[p],m,n); + eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; }); } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ; + else + eCmn = alpha * eAmk.adjoint() * eBkn.adjoint() ; + } ); + } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) { + thread_for (p, batchCount, { + Eigen::Map eAmk(Amk[p],k,m); + Eigen::Map eBkn(Bkn[p],n,k); + Eigen::Map eCmn(Cmn[p],m,n); + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; + else + eCmn = alpha * eAmk.transpose() * eBkn.transpose() ; } ); } else { assert(0); @@ -414,8 +453,8 @@ public: RealD t2=usecond(); int32_t batchCount = Amk.size(); - assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose - assert(OpB!=GridBLAS_OP_T); + //assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose + //assert(OpB!=GridBLAS_OP_T); int lda = m; // m x k column major int ldb = k; // k x n column major @@ -514,28 +553,70 @@ public: Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn ; + else + eCmn = alpha * eAmk * eBkn ; }); } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ; + else + eCmn = alpha * eAmk.adjoint() * eBkn ; + }); + } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) { + thread_for (p, batchCount, { + Eigen::Map eAmk(Amk[p],k,m); + Eigen::Map eBkn(Bkn[p],k,n); + Eigen::Map eCmn(Cmn[p],m,n); + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; + else + eCmn = alpha * eAmk.transpose() * eBkn ; }); } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ; + else + eCmn = alpha * eAmk * eBkn.adjoint() ; + }); + } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) { + thread_for (p, batchCount, { + Eigen::Map eAmk(Amk[p],m,k); + Eigen::Map eBkn(Bkn[p],n,k); + Eigen::Map eCmn(Cmn[p],m,n); + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; + else + eCmn = alpha * eAmk * eBkn.transpose() ; }); } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ; + else + eCmn = alpha * eAmk.adjoint() * eBkn.adjoint() ; + } ); + } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) { + thread_for (p, batchCount, { + Eigen::Map eAmk(Amk[p],k,m); + Eigen::Map eBkn(Bkn[p],n,k); + Eigen::Map eCmn(Cmn[p],m,n); + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; + else + eCmn = alpha * eAmk.transpose() * eBkn.transpose() ; } ); } else { assert(0); @@ -661,29 +742,41 @@ public: Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn ; + else + eCmn = alpha * eAmk * eBkn ; }); } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; + else + eCmn = alpha * eAmk.transpose() * eBkn ; }); } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; + else + eCmn = alpha * eAmk * eBkn.transpose() ; }); } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; - } ); + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; + else + eCmn = alpha * eAmk.transpose() * eBkn.transpose() ; + }); } else { assert(0); } @@ -809,28 +902,40 @@ public: Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn ; + else + eCmn = alpha * eAmk * eBkn ; }); } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],k,n); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; + else + eCmn = alpha * eAmk.transpose() * eBkn ; }); } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],m,k); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; + else + eCmn = alpha * eAmk * eBkn.transpose() ; }); } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) { thread_for (p, batchCount, { Eigen::Map eAmk(Amk[p],k,m); Eigen::Map eBkn(Bkn[p],n,k); Eigen::Map eCmn(Cmn[p],m,n); - eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; + if (std::abs(beta) != 0.0) + eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; + else + eCmn = alpha * eAmk.transpose() * eBkn.transpose() ; }); } else { assert(0); diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h index 37f36dda..eb0d0761 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -245,9 +245,10 @@ until convergence _HermOp(src_n,tmp); // std::cout << GridLogMessage<< tmp< lme(Nm); std::vector lme2(Nm); diff --git a/Grid/algorithms/multigrid/Aggregates.h b/Grid/algorithms/multigrid/Aggregates.h index 953e3020..4a4c4816 100644 --- a/Grid/algorithms/multigrid/Aggregates.h +++ b/Grid/algorithms/multigrid/Aggregates.h @@ -97,7 +97,7 @@ public: RealD scale; - ConjugateGradient CG(1.0e-2,100,false); + ConjugateGradient CG(1.0e-3,400,false); FineField noise(FineGrid); FineField Mn(FineGrid); @@ -110,7 +110,7 @@ public: hermop.Op(noise,Mn); std::cout< "< "< > &linop, Aggregation & Subspace) + { + CoarsenOperator(linop,Subspace,Subspace); + } + ////////////////////////////////////////////////////////////////////// + // Petrov - Galerkin projection of matrix + ////////////////////////////////////////////////////////////////////// + void CoarsenOperator(LinearOperatorBase > &linop, + Aggregation & U, + Aggregation & V) { std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; GridBase *grid = FineGrid(); @@ -458,11 +470,9 @@ public: // Orthogonalise the subblocks over the basis ///////////////////////////////////////////////////////////// CoarseScalar InnerProd(CoarseGrid()); - blockOrthogonalise(InnerProd,Subspace.subspace); + blockOrthogonalise(InnerProd,V.subspace); + blockOrthogonalise(InnerProd,U.subspace); - // for(int s=0;sGlobalDimensions(); @@ -542,7 +552,7 @@ public: std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<=DeviceMaxBytes) { + printf("EvictVictims bytes %ld DeviceMaxBytes %ld\n",bytes,DeviceMaxBytes); + } assert(bytes DeviceMaxBytes){ if ( DeviceLRUBytes > 0){ diff --git a/Grid/communicator/Communicator_base.h b/Grid/communicator/Communicator_base.h index 1db5bc59..62fcfa7b 100644 --- a/Grid/communicator/Communicator_base.h +++ b/Grid/communicator/Communicator_base.h @@ -149,7 +149,8 @@ public: sizeof(obj),d*100+p); } - CommsComplete(list); + if (!list.empty()) // avoid triggering assert in comms == none + CommsComplete(list); for(int p=1;p<_processors[d];p++){ accum = accum + column[p]; } diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index b667d32e..de7d81f8 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -438,8 +438,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorShmBufferTranslate(from,xmit); + assert(shm!=NULL); + acceleratorCopyDeviceToDeviceAsynch(shm,recv,rbytes); + } +#endif } - + // This is a NVLINK PUT if (dox) { if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { tag= dir+_processor*32; @@ -448,9 +455,11 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorShmBufferTranslate(dest,recv); assert(shm!=NULL); acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes); +#endif } } return off_node_bytes; @@ -459,7 +468,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list,int dir) { int nreq=list.size(); - + /*finishes Get/Put*/ acceleratorCopySynchronise(); if (nreq==0) return; @@ -746,26 +755,31 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list,int dir) { - // int nreq=list.size(); + acceleratorCopySynchronise(); // Complete all pending copy transfers D2D - // if (nreq==0) return; - // std::vector status(nreq); - // std::vector MpiRequests(nreq); + std::vector status; + std::vector MpiRequests; + + for(int r=0;r0) { + status.resize(MpiRequests.size()); + int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing. + assert(ierr==0); + } - // int ierr = MPI_Waitall(nreq,&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing. - // assert(ierr==0); - // for(int r=0;rHostBufferFreeAll(); // Clean up the buffer allocs diff --git a/Grid/communicator/Communicator_none.cc b/Grid/communicator/Communicator_none.cc index f162a903..3dee8f4d 100644 --- a/Grid/communicator/Communicator_none.cc +++ b/Grid/communicator/Communicator_none.cc @@ -91,7 +91,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, { assert(0); } -void CartesianCommunicator::CommsComplete(std::vector &list){ assert(0);} +void CartesianCommunicator::CommsComplete(std::vector &list){ assert(list.size()==0);} void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, void *xmit, int dest, diff --git a/Grid/communicator/SharedMemory.h b/Grid/communicator/SharedMemory.h index f8099a1d..cdaae51c 100644 --- a/Grid/communicator/SharedMemory.h +++ b/Grid/communicator/SharedMemory.h @@ -137,7 +137,7 @@ public: /////////////////////////////////////////////////// static void SharedMemoryAllocate(uint64_t bytes, int flags); static void SharedMemoryFree(void); - static void SharedMemoryCopy(void *dest,void *src,size_t bytes); + // static void SharedMemoryCopy(void *dest,void *src,size_t bytes); static void SharedMemoryZero(void *dest,size_t bytes); }; diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index dc22aee0..7d500af9 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -542,12 +542,12 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) // Each MPI rank should allocate our own buffer /////////////////////////////////////////////////////////////////////////////////////////////////////////// #ifndef ACCELERATOR_AWARE_MPI - printf("Host buffer allocate for GPU non-aware MPI\n"); + // printf("Host buffer allocate for GPU non-aware MPI\n"); #if 0 HostCommBuf= acceleratorAllocHost(bytes); #else HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host -#ifdef HAVE_NUMAIF_H +#if 0 #warning "Moving host buffers to specific NUMA domain" int numa; char *numa_name=(char *)getenv("MPI_BUF_NUMA"); @@ -916,14 +916,14 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) bzero(dest,bytes); #endif } -void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) -{ -#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) - acceleratorCopyToDevice(src,dest,bytes); -#else - bcopy(src,dest,bytes); -#endif -} +//void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) +//{ +//#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) +// acceleratorCopyToDevice(src,dest,bytes); +//#else +// bcopy(src,dest,bytes); +//#endif +//} //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality @@ -959,6 +959,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm); ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr]; + // std::cerr << " SetCommunicator rank "< void Cshift_comms(Lattice &ret,const Lattice &r static deviceVector send_buf; send_buf.resize(buffer_size); static deviceVector recv_buf; recv_buf.resize(buffer_size); #ifndef ACCELERATOR_AWARE_MPI - static hostVector hsend_buf; hsend_buf.resize(buffer_size); - static hostVector hrecv_buf; hrecv_buf.resize(buffer_size); + static hostVector hsend_buf; hsend_buf.resize(buffer_size); + static hostVector hrecv_buf; hrecv_buf.resize(buffer_size); #endif int cb= (cbmask==0x2)? Odd : Even; @@ -244,7 +244,6 @@ template void Cshift_comms_simd(Lattice &ret,const Lattice(acceleratorAllocDevice((rd+1)*sizeof(int))); //copy offsets to device - acceleratorCopyToDeviceAsync(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream); + acceleratorCopyToDeviceAsynch(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream); gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, ::gpucub::Sum(), zero_init, computeStream); @@ -88,7 +88,7 @@ inline void sliceSumReduction_cub_small(const vobj *Data, exit(EXIT_FAILURE); } - acceleratorCopyFromDeviceAsync(d_out,&lvSum[0],rd*sizeof(vobj),computeStream); + acceleratorCopyFromDeviceAsynch(d_out,&lvSum[0],rd*sizeof(vobj),computeStream); //sync after copy accelerator_barrier(); diff --git a/Grid/lattice/PaddedCell.h b/Grid/lattice/PaddedCell.h index bdb0301c..0340698c 100644 --- a/Grid/lattice/PaddedCell.h +++ b/Grid/lattice/PaddedCell.h @@ -510,7 +510,6 @@ public: grid->SendToRecvFromBegin(fwd_req, (void *)&hsend_buf[d*buffer_size], xmit_to_rank, (void *)&hrecv_buf[d*buffer_size], recv_from_rank, bytes, tag); - acceleratorCopyToDevice(&hrecv_buf[d*buffer_size],&recv_buf[d*buffer_size],bytes); #endif t_comms+=usecond()-t; } @@ -531,7 +530,6 @@ public: grid->SendToRecvFromBegin(bwd_req, (void *)&hsend_buf[(d+depth)*buffer_size], recv_from_rank, (void *)&hrecv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag); - acceleratorCopyToDevice(&hrecv_buf[(d+depth)*buffer_size],&recv_buf[(d+depth)*buffer_size],bytes); #endif t_comms+=usecond()-t; } @@ -555,8 +553,13 @@ public: t=usecond(); grid->CommsComplete(fwd_req); +#ifndef ACCELERATOR_AWARE_MPI + for ( int d=0;d < depth ; d ++ ) { + acceleratorCopyToDevice(&hrecv_buf[d*buffer_size],&recv_buf[d*buffer_size],bytes); + } +#endif t_comms+= usecond() - t; - + t=usecond(); for ( int d=0;d < depth ; d ++ ) { ScatterSlice(recv_buf,to,nld-depth+d,dimension,plane*buffer_size); plane++; @@ -565,6 +568,11 @@ public: t=usecond(); grid->CommsComplete(bwd_req); +#ifndef ACCELERATOR_AWARE_MPI + for ( int d=0;d < depth ; d ++ ) { + acceleratorCopyToDevice(&hrecv_buf[(d+depth)*buffer_size],&recv_buf[(d+depth)*buffer_size],bytes); + } +#endif t_comms+= usecond() - t; t=usecond(); diff --git a/Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h b/Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h new file mode 100644 index 00000000..2c6aa587 --- /dev/null +++ b/Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h @@ -0,0 +1,196 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion5D.h + + Copyright (C) 2020 - 2025 + + Author: Daniel Richtmann + Author: Nils Meyer + Author: Christoph Lehner + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#pragma once + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +// see Grid/qcd/action/fermion/CompactWilsonCloverFermion.h for description + +template +class CompactWilsonCloverFermion5D : public WilsonFermion5D, + public WilsonCloverHelpers, + public CompactWilsonCloverHelpers { + ///////////////////////////////////////////// + // Sizes + ///////////////////////////////////////////// + +public: + + INHERIT_COMPACT_CLOVER_SIZES(Impl); + + ///////////////////////////////////////////// + // Type definitions + ///////////////////////////////////////////// + +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + typedef WilsonFermion5D WilsonBase; + typedef WilsonCloverHelpers Helpers; + typedef CompactWilsonCloverHelpers CompactHelpers; + + ///////////////////////////////////////////// + // Constructors + ///////////////////////////////////////////// + +public: + + CompactWilsonCloverFermion5D(GaugeField& _Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + const RealD _mass, + const RealD _csw_r = 0.0, + const RealD _csw_t = 0.0, + const RealD _cF = 1.0, + const ImplParams& impl_p = ImplParams()); + + ///////////////////////////////////////////// + // Member functions (implementing interface) + ///////////////////////////////////////////// + +public: + + virtual void Instantiatable() {}; + int ConstEE() override { return 0; }; + int isTrivialEE() override { return 0; }; + + void Dhop(const FermionField& in, FermionField& out, int dag) override; + + void DhopOE(const FermionField& in, FermionField& out, int dag) override; + + void DhopEO(const FermionField& in, FermionField& out, int dag) override; + + void DhopDir(const FermionField& in, FermionField& out, int dir, int disp) override; + + void DhopDirAll(const FermionField& in, std::vector& out) /* override */; + + void M(const FermionField& in, FermionField& out) override; + + void Mdag(const FermionField& in, FermionField& out) override; + + void Meooe(const FermionField& in, FermionField& out) override; + + void MeooeDag(const FermionField& in, FermionField& out) override; + + void Mooee(const FermionField& in, FermionField& out) override; + + void MooeeDag(const FermionField& in, FermionField& out) override; + + void MooeeInv(const FermionField& in, FermionField& out) override; + + void MooeeInvDag(const FermionField& in, FermionField& out) override; + + void Mdir(const FermionField& in, FermionField& out, int dir, int disp) override; + + void MdirAll(const FermionField& in, std::vector& out) override; + + void MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) override; + + void MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override; + + void MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override; + + ///////////////////////////////////////////// + // Member functions (internals) + ///////////////////////////////////////////// + + void MooeeInternal(const FermionField& in, + FermionField& out, + const CloverDiagonalField& diagonal, + const CloverTriangleField& triangle); + + ///////////////////////////////////////////// + // Helpers + ///////////////////////////////////////////// + + void ImportGauge(const GaugeField& _Umu) override; + + ///////////////////////////////////////////// + // Helpers + ///////////////////////////////////////////// + +private: + + template + const MaskField* getCorrectMaskField(const Field &in) const { + if(in.Grid()->_isCheckerBoarded) { + if(in.Checkerboard() == Odd) { + return &this->BoundaryMaskOdd; + } else { + return &this->BoundaryMaskEven; + } + } else { + return &this->BoundaryMask; + } + } + + template + void ApplyBoundaryMask(Field& f) { + const MaskField* m = getCorrectMaskField(f); assert(m != nullptr); + assert(m != nullptr); + CompactHelpers::ApplyBoundaryMask(f, *m); + } + + ///////////////////////////////////////////// + // Member Data + ///////////////////////////////////////////// + +public: + + RealD csw_r; + RealD csw_t; + RealD cF; + int n_rhs; + + bool fixedBoundaries; + + CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd; + CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd; + + CloverTriangleField Triangle, TriangleEven, TriangleOdd; + CloverTriangleField TriangleInv, TriangleInvEven, TriangleInvOdd; + + FermionField Tmp; + + MaskField BoundaryMask, BoundaryMaskEven, BoundaryMaskOdd; +}; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index 8d7b3fdc..a3d96d9b 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -55,6 +55,7 @@ NAMESPACE_CHECK(Wilson); NAMESPACE_CHECK(WilsonTM); #include // 4d wilson clover fermions #include // 4d compact wilson clover fermions +#include // 5d compact wilson clover fermions NAMESPACE_CHECK(WilsonClover); #include // 5d base used by all 5d overlap types NAMESPACE_CHECK(Wilson5D); @@ -164,12 +165,17 @@ typedef WilsonClover WilsonCloverTwoIndexAntiS // Compact Clover fermions template using CompactWilsonClover = CompactWilsonCloverFermion>; +template using CompactWilsonClover5D = CompactWilsonCloverFermion5D>; template using CompactWilsonExpClover = CompactWilsonCloverFermion>; typedef CompactWilsonClover CompactWilsonCloverFermionD2; typedef CompactWilsonClover CompactWilsonCloverFermionF; typedef CompactWilsonClover CompactWilsonCloverFermionD; +typedef CompactWilsonClover5D CompactWilsonCloverFermion5DD2; +typedef CompactWilsonClover5D CompactWilsonCloverFermion5DF; +typedef CompactWilsonClover5D CompactWilsonCloverFermion5DD; + typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD2; typedef CompactWilsonExpClover CompactWilsonExpCloverFermionF; typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD; diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index 1c6571e1..22f2d8e3 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -485,7 +485,6 @@ public: assert(this->u_comm_offset==this->_unified_buffer_size); accelerator_barrier(); #ifdef NVLINK_GET - #warning "NVLINK_GET" this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Or issue barrier AFTER the DMA is running diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 40c1871f..9dadc866 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -91,13 +91,13 @@ public: virtual void Mdag (const FermionField &in, FermionField &out){assert(0);}; // half checkerboard operations; leave unimplemented as abstract for now - virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; - virtual void Mooee (const FermionField &in, FermionField &out){assert(0);}; - virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);}; + virtual void Meooe (const FermionField &in, FermionField &out); + virtual void Mooee (const FermionField &in, FermionField &out); + virtual void MooeeInv (const FermionField &in, FermionField &out); - virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);}; - virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; - virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; + virtual void MeooeDag (const FermionField &in, FermionField &out); + virtual void MooeeDag (const FermionField &in, FermionField &out); + virtual void MooeeInvDag (const FermionField &in, FermionField &out); virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void MdirAll(const FermionField &in, std::vector &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermion5DImplementation.h new file mode 100644 index 00000000..e65fb6d6 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermion5DImplementation.h @@ -0,0 +1,376 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion5DImplementation.h + + Copyright (C) 2017 - 2025 + + Author: paboyle + Author: Guido Cossu + Author: Daniel Richtmann + Author: Christoph Lehner + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include + + +NAMESPACE_BEGIN(Grid); +template +CompactWilsonCloverFermion5D::CompactWilsonCloverFermion5D(GaugeField& _Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + const RealD _mass, + const RealD _csw_r, + const RealD _csw_t, + const RealD _cF, + const ImplParams& impl_p) + : WilsonBase(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, _mass, impl_p) + , csw_r(_csw_r) + , csw_t(_csw_t) + , cF(_cF) + , fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0) + , Diagonal(&FourDimGrid), Triangle(&FourDimGrid) + , DiagonalEven(&FourDimRedBlackGrid), TriangleEven(&FourDimRedBlackGrid) + , DiagonalOdd(&FourDimRedBlackGrid), TriangleOdd(&FourDimRedBlackGrid) + , DiagonalInv(&FourDimGrid), TriangleInv(&FourDimGrid) + , DiagonalInvEven(&FourDimRedBlackGrid), TriangleInvEven(&FourDimRedBlackGrid) + , DiagonalInvOdd(&FourDimRedBlackGrid), TriangleInvOdd(&FourDimRedBlackGrid) + , Tmp(&FiveDimGrid) + , BoundaryMask(&FiveDimGrid) + , BoundaryMaskEven(&FiveDimRedBlackGrid), BoundaryMaskOdd(&FiveDimRedBlackGrid) +{ + assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3); + + csw_r *= 0.5; + csw_t *= 0.5; + //if (clover_anisotropy.isAnisotropic) + // csw_r /= clover_anisotropy.xi_0; + + ImportGauge(_Umu); + if (fixedBoundaries) { + this->BoundaryMaskEven.Checkerboard() = Even; + this->BoundaryMaskOdd.Checkerboard() = Odd; + CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); + } +} + +template +void CompactWilsonCloverFermion5D::Dhop(const FermionField& in, FermionField& out, int dag) { + WilsonBase::Dhop(in, out, dag); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::DhopOE(const FermionField& in, FermionField& out, int dag) { + WilsonBase::DhopOE(in, out, dag); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::DhopEO(const FermionField& in, FermionField& out, int dag) { + WilsonBase::DhopEO(in, out, dag); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { + WilsonBase::DhopDir(in, out, dir, disp); + if(this->fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::DhopDirAll(const FermionField& in, std::vector& out) { + WilsonBase::DhopDirAll(in, out); + if(this->fixedBoundaries) { + for(auto& o : out) ApplyBoundaryMask(o); + } +} + +template +void CompactWilsonCloverFermion5D::M(const FermionField& in, FermionField& out) { + out.Checkerboard() = in.Checkerboard(); + WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc + Mooee(in, Tmp); + axpy(out, 1.0, out, Tmp); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::Mdag(const FermionField& in, FermionField& out) { + out.Checkerboard() = in.Checkerboard(); + WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc + MooeeDag(in, Tmp); + axpy(out, 1.0, out, Tmp); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::Meooe(const FermionField& in, FermionField& out) { + WilsonBase::Meooe(in, out); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::MeooeDag(const FermionField& in, FermionField& out) { + WilsonBase::MeooeDag(in, out); + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::Mooee(const FermionField& in, FermionField& out) { + if(in.Grid()->_isCheckerBoarded) { + if(in.Checkerboard() == Odd) { + MooeeInternal(in, out, DiagonalOdd, TriangleOdd); + } else { + MooeeInternal(in, out, DiagonalEven, TriangleEven); + } + } else { + MooeeInternal(in, out, Diagonal, Triangle); + } + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::MooeeDag(const FermionField& in, FermionField& out) { + Mooee(in, out); // blocks are hermitian +} + +template +void CompactWilsonCloverFermion5D::MooeeInv(const FermionField& in, FermionField& out) { + if(in.Grid()->_isCheckerBoarded) { + if(in.Checkerboard() == Odd) { + MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd); + } else { + MooeeInternal(in, out, DiagonalInvEven, TriangleInvEven); + } + } else { + MooeeInternal(in, out, DiagonalInv, TriangleInv); + } + if(fixedBoundaries) ApplyBoundaryMask(out); +} + +template +void CompactWilsonCloverFermion5D::MooeeInvDag(const FermionField& in, FermionField& out) { + MooeeInv(in, out); // blocks are hermitian +} + +template +void CompactWilsonCloverFermion5D::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { + DhopDir(in, out, dir, disp); +} + +template +void CompactWilsonCloverFermion5D::MdirAll(const FermionField& in, std::vector& out) { + DhopDirAll(in, out); +} + +template +void CompactWilsonCloverFermion5D::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { + assert(!fixedBoundaries); // TODO check for changes required for open bc + + // NOTE: code copied from original clover term + conformable(X.Grid(), Y.Grid()); + conformable(X.Grid(), force.Grid()); + GaugeLinkField force_mu(force.Grid()), lambda(force.Grid()); + GaugeField clover_force(force.Grid()); + PropagatorField Lambda(force.Grid()); + + // Guido: Here we are hitting some performance issues: + // need to extract the components of the DoubledGaugeField + // for each call + // Possible solution + // Create a vector object to store them? (cons: wasting space) + std::vector U(Nd, this->Umu.Grid()); + + Impl::extractLinkField(U, this->Umu); + + force = Zero(); + // Derivative of the Wilson hopping term + this->DhopDeriv(force, X, Y, dag); + + /////////////////////////////////////////////////////////// + // Clover term derivative + /////////////////////////////////////////////////////////// + Impl::outerProductImpl(Lambda, X, Y); + //std::cout << "Lambda:" << Lambda << std::endl; + + Gamma::Algebra sigma[] = { + Gamma::Algebra::SigmaXY, + Gamma::Algebra::SigmaXZ, + Gamma::Algebra::SigmaXT, + Gamma::Algebra::MinusSigmaXY, + Gamma::Algebra::SigmaYZ, + Gamma::Algebra::SigmaYT, + Gamma::Algebra::MinusSigmaXZ, + Gamma::Algebra::MinusSigmaYZ, + Gamma::Algebra::SigmaZT, + Gamma::Algebra::MinusSigmaXT, + Gamma::Algebra::MinusSigmaYT, + Gamma::Algebra::MinusSigmaZT}; + + /* + sigma_{\mu \nu}= + | 0 sigma[0] sigma[1] sigma[2] | + | sigma[3] 0 sigma[4] sigma[5] | + | sigma[6] sigma[7] 0 sigma[8] | + | sigma[9] sigma[10] sigma[11] 0 | + */ + + int count = 0; + clover_force = Zero(); + for (int mu = 0; mu < 4; mu++) + { + force_mu = Zero(); + for (int nu = 0; nu < 4; nu++) + { + if (mu == nu) + continue; + + RealD factor; + if (nu == 4 || mu == 4) + { + factor = 2.0 * csw_t; + } + else + { + factor = 2.0 * csw_r; + } + PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked + Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked + count++; + } + + pokeLorentz(clover_force, U[mu] * force_mu, mu); + } + //clover_force *= csw; + force += clover_force; +} + +template +void CompactWilsonCloverFermion5D::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { + assert(0); +} + +template +void CompactWilsonCloverFermion5D::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { + assert(0); +} + +template +void CompactWilsonCloverFermion5D::MooeeInternal(const FermionField& in, + FermionField& out, + const CloverDiagonalField& diagonal, + const CloverTriangleField& triangle) { + assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); + out.Checkerboard() = in.Checkerboard(); + conformable(in, out); + CompactHelpers::MooeeKernel(diagonal.oSites(), this->Ls, in, out, diagonal, triangle); +} + +template +void CompactWilsonCloverFermion5D::ImportGauge(const GaugeField& _Umu) { + // NOTE: parts copied from original implementation + + // Import gauge into base class + double t0 = usecond(); + WilsonBase::ImportGauge(_Umu); // NOTE: called here and in wilson constructor -> performed twice, but can't avoid that + + // Initialize temporary variables + double t1 = usecond(); + conformable(_Umu.Grid(), this->GaugeGrid()); + GridBase* grid = _Umu.Grid(); + typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); + CloverField TmpOriginal(grid); + CloverField TmpInverse(grid); + + // Compute the field strength terms mu>nu + double t2 = usecond(); + WilsonLoops::FieldStrength(Bx, _Umu, Zdir, Ydir); + WilsonLoops::FieldStrength(By, _Umu, Zdir, Xdir); + WilsonLoops::FieldStrength(Bz, _Umu, Ydir, Xdir); + WilsonLoops::FieldStrength(Ex, _Umu, Tdir, Xdir); + WilsonLoops::FieldStrength(Ey, _Umu, Tdir, Ydir); + WilsonLoops::FieldStrength(Ez, _Umu, Tdir, Zdir); + + // Compute the Clover Operator acting on Colour and Spin + // multiply here by the clover coefficients for the anisotropy + double t3 = usecond(); + TmpOriginal = Helpers::fillCloverYZ(Bx) * csw_r; + TmpOriginal += Helpers::fillCloverXZ(By) * csw_r; + TmpOriginal += Helpers::fillCloverXY(Bz) * csw_r; + TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; + TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; + TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; + + // Instantiate the clover term + // - In case of the standard clover the mass term is added + // - In case of the exponential clover the clover term is exponentiated + double t4 = usecond(); + CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, 4.0 + this->M5 /*this->diag_mass*/); + + // Convert the data layout of the clover term + double t5 = usecond(); + CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); + + // Modify the clover term at the temporal boundaries in case of open boundary conditions + double t6 = usecond(); + if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, 4.0 + this->M5 /*this->diag_mass*/); + + // Invert the Clover term + // In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved + // in TmpInverse can be used. In all other cases the clover term has to be explictly inverted. + // TODO: For now this inversion is explictly done on the CPU + double t7 = usecond(); + CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries); + + // Fill the remaining clover fields + double t8 = usecond(); + pickCheckerboard(Even, DiagonalEven, Diagonal); + pickCheckerboard(Even, TriangleEven, Triangle); + pickCheckerboard(Odd, DiagonalOdd, Diagonal); + pickCheckerboard(Odd, TriangleOdd, Triangle); + pickCheckerboard(Even, DiagonalInvEven, DiagonalInv); + pickCheckerboard(Even, TriangleInvEven, TriangleInv); + pickCheckerboard(Odd, DiagonalInvOdd, DiagonalInv); + pickCheckerboard(Odd, TriangleInvOdd, TriangleInv); + + // Report timings + double t9 = usecond(); + + std::cout << GridLogDebug << "CompactWilsonCloverFermion5D::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl; + std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl; +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 3d4e5cc5..9598552f 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -14,6 +14,7 @@ Author: paboyle Author: Guido Cossu Author: Andrew Lawson Author: Vera Guelpers +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -484,6 +485,54 @@ void WilsonFermion5D::DW(const FermionField &in, FermionField &out,int dag Dhop(in,out,dag); // -0.5 is included axpy(out,4.0-M5,in,out); } +template +void WilsonFermion5D::Meooe(const FermionField &in, FermionField &out) +{ + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } +} + +template +void WilsonFermion5D::MeooeDag(const FermionField &in, FermionField &out) +{ + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } +} + +template +void WilsonFermion5D::Mooee(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + typename FermionField::scalar_type scal(4.0 + M5); + out = scal * in; +} + +template +void WilsonFermion5D::MooeeDag(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + Mooee(in, out); +} + +template +void WilsonFermion5D::MooeeInv(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + out = (1.0/(4.0 + M5))*in; +} + +template +void WilsonFermion5D::MooeeInvDag(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + MooeeInv(in,out); +} template void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in, RealD mass,std::vector twist) diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 09d09afb..26e80838 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -63,7 +63,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) } else { \ chi = coalescedRead(buf[SE->_offset],lane); \ } \ - acceleratorSynchronise(); \ + acceleratorSynchronise(); \ Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ Recon(result, Uchi); @@ -504,7 +504,7 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField autoView(st_v , st,AcceleratorRead); if( interior && exterior ) { - // acceleratorFenceComputeStream(); + acceleratorFenceComputeStream(); if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} #ifndef GRID_CUDA diff --git a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermion5DInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermion5DInstantiation.cc.master new file mode 100644 index 00000000..fd9c787b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermion5DInstantiation.cc.master @@ -0,0 +1,45 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/ qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation5D.cc.master + + Copyright (C) 2017 - 2025 + + Author: paboyle + Author: Guido Cossu + Author: Daniel Richtmann + Author: Mattia Bruno + Author: Christoph Lehner + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CompactWilsonCloverFermion5D>; +template class CompactWilsonCloverFermion5D>; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/CompactWilsonCloverFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CompactWilsonCloverFermion5DInstantiationWilsonImplD.cc new file mode 120000 index 00000000..ad6eba28 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CompactWilsonCloverFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../CompactWilsonCloverFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/CompactWilsonCloverFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CompactWilsonCloverFermion5DInstantiationWilsonImplF.cc new file mode 120000 index 00000000..ad6eba28 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CompactWilsonCloverFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../CompactWilsonCloverFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index 728dc5e7..18194a23 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -62,7 +62,7 @@ do done done -CC_LIST="CompactWilsonCloverFermionInstantiation" +CC_LIST="CompactWilsonCloverFermionInstantiation CompactWilsonCloverFermion5DInstantiation" for impl in $COMPACT_WILSON_IMPL_LIST do diff --git a/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h b/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h index b7f31d0e..9116e78f 100644 --- a/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h +++ b/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h @@ -76,27 +76,27 @@ public: return action; }; - virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) { + virtual void deriv(const GaugeField &U, GaugeField &dSdU) { //extend Ta to include Lorentz indexes RealD factor_p = c_plaq/RealD(Nc)*0.5; RealD factor_r = c_rect/RealD(Nc)*0.5; - GridBase *grid = Umu.Grid(); + GridBase *grid = U.Grid(); - std::vector U (Nd,grid); + std::vector Umu (Nd,grid); for(int mu=0;mu(Umu,mu); + Umu[mu] = PeekIndex(U,mu); } std::vector RectStaple(Nd,grid), Staple(Nd,grid); - WilsonLoops::StapleAndRectStapleAll(Staple, RectStaple, U, workspace); + WilsonLoops::StapleAndRectStapleAll(Staple, RectStaple, Umu, workspace); GaugeLinkField dSdU_mu(grid); GaugeLinkField staple(grid); for (int mu=0; mu < Nd; mu++){ - dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p; - dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r; - + dSdU_mu = Ta(Umu[mu]*Staple[mu])*factor_p; + dSdU_mu = dSdU_mu + Ta(Umu[mu]*RectStaple[mu])*factor_r; + PokeIndex(dSdU, dSdU_mu, mu); } diff --git a/Grid/qcd/action/gauge/WilsonGaugeAction.h b/Grid/qcd/action/gauge/WilsonGaugeAction.h index 22c792cc..b3c30416 100644 --- a/Grid/qcd/action/gauge/WilsonGaugeAction.h +++ b/Grid/qcd/action/gauge/WilsonGaugeAction.h @@ -73,20 +73,23 @@ public: // extend Ta to include Lorentz indexes RealD factor = 0.5 * beta / RealD(Nc); + GridBase *grid = U.Grid(); - GaugeLinkField Umu(U.Grid()); - GaugeLinkField dSdU_mu(U.Grid()); + GaugeLinkField dSdU_mu(grid); + std::vector Umu(Nd, grid); for (int mu = 0; mu < Nd; mu++) { + Umu[mu] = PeekIndex(U, mu); + } - Umu = PeekIndex(U, mu); - + for (int mu = 0; mu < Nd; mu++) { // Staple in direction mu - WilsonLoops::Staple(dSdU_mu, U, mu); - dSdU_mu = Ta(Umu * dSdU_mu) * factor; - + WilsonLoops::Staple(dSdU_mu, Umu, mu); + dSdU_mu = Ta(Umu[mu] * dSdU_mu) * factor; + PokeIndex(dSdU, dSdU_mu, mu); } } + private: RealD beta; }; diff --git a/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h b/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h index a91ec67f..17a1a181 100644 --- a/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h @@ -111,8 +111,8 @@ public: }; void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); + std::string config, rng, smr; + this->build_filenames(traj, Params, config, smr, rng); this->check_filename(rng); this->check_filename(config); diff --git a/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h b/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h index 12d391ef..50cca63d 100644 --- a/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h @@ -75,7 +75,7 @@ public: GridParallelRNG &pRNG) { if ((traj % Params.saveInterval) == 0) { std::string config, rng, smr; - this->build_filenames(traj, Params, config, rng); + this->build_filenames(traj, Params, config, smr, rng); GridBase *grid = SmartConfig.get_U(false).Grid(); uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); @@ -102,7 +102,7 @@ public: if ( Params.saveSmeared ) { IldgWriter _IldgWriter(grid->IsBoss()); _IldgWriter.open(smr); - _IldgWriter.writeConfiguration(SmartConfig.get_U(true), traj, config, config); + _IldgWriter.writeConfiguration(SmartConfig.get_U(true), traj, smr, smr); _IldgWriter.close(); std::cout << GridLogMessage << "Written ILDG Configuration on " << smr @@ -118,8 +118,8 @@ public: void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); + std::string config, rng, smr; + this->build_filenames(traj, Params, config, smr, rng); this->check_filename(rng); this->check_filename(config); diff --git a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h index 259a44b0..e3825972 100644 --- a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h @@ -107,8 +107,8 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer { void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { - std::string config, rng; - this->build_filenames(traj, Params, config, rng); + std::string config, rng, smr; + this->build_filenames(traj, Params, config, smr, rng); this->check_filename(rng); this->check_filename(config); diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index dfd0a416..e98e9b87 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -62,15 +62,15 @@ accelerator_inline int stencilIndex(int mu, int nu) { /*! @brief structure holding the link treatment */ -struct SmearingParameters{ - SmearingParameters(){} +struct HISQSmearingParameters{ + HISQSmearingParameters(){} Real c_1; // 1 link Real c_naik; // Naik term Real c_3; // 3 link Real c_5; // 5 link Real c_7; // 7 link Real c_lp; // 5 link Lepage - SmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) + HISQSmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) : c_1(c1), c_naik(cnaik), c_3(c3), @@ -86,7 +86,7 @@ class Smear_HISQ : public Gimpl { private: GridCartesian* const _grid; - SmearingParameters _linkTreatment; + HISQSmearingParameters _linkTreatment; public: @@ -117,7 +117,7 @@ public: // IN--u_thin void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { - SmearingParameters lt = this->_linkTreatment; + HISQSmearingParameters lt = this->_linkTreatment; auto grid = this->_grid; // Create a padded cell of extra padding depth=1 and fill the padding. diff --git a/Grid/qcd/smearing/WilsonFlow.h b/Grid/qcd/smearing/WilsonFlow.h index f169d02b..dc135823 100644 --- a/Grid/qcd/smearing/WilsonFlow.h +++ b/Grid/qcd/smearing/WilsonFlow.h @@ -207,11 +207,14 @@ std::vector WilsonFlowBase::flowMeasureEnergyDensityCloverleaf(con } template -void WilsonFlowBase::setDefaultMeasurements(int topq_meas_interval){ - addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){ +void WilsonFlowBase::setDefaultMeasurements(int meas_interval){ + addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl; }); - addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ + addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ + std::cout << GridLogMessage << "[WilsonFlow] Energy density (cloverleaf) : " << step << " " << t << " " << energyDensityCloverleaf(t,U) << std::endl; + }); + addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops::TopologicalCharge(U) << std::endl; }); } diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index 851ba172..7466f4bf 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -292,19 +292,21 @@ public: ////////////////////////////////////////////////// // the sum over all nu-oriented staples for nu != mu on each site ////////////////////////////////////////////////// - static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { + static void Staple(GaugeMat &staple, const GaugeLorentz &U, int mu) { - GridBase *grid = Umu.Grid(); - - std::vector U(Nd, grid); + std::vector Umu(Nd, U.Grid()); for (int d = 0; d < Nd; d++) { - U[d] = PeekIndex(Umu, d); + Umu[d] = PeekIndex(U, d); } - Staple(staple, U, mu); + Staple(staple, Umu, mu); } - static void Staple(GaugeMat &staple, const std::vector &U, int mu) { - staple = Zero(); + static void Staple(GaugeMat &staple, const std::vector &Umu, int mu) { + + autoView(staple_v, staple, AcceleratorWrite); + accelerator_for(i, staple.Grid()->oSites(), Simd::Nsimd(), { + staple_v[i] = Zero(); + }); for (int nu = 0; nu < Nd; nu++) { @@ -318,12 +320,12 @@ public: // | // __| // - + staple += Gimpl::ShiftStaple( Gimpl::CovShiftForward( - U[nu], nu, + Umu[nu], nu, Gimpl::CovShiftBackward( - U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))), + Umu[mu], mu, Gimpl::CovShiftIdentityBackward(Umu[nu], nu))), mu); // __ @@ -333,8 +335,8 @@ public: // staple += Gimpl::ShiftStaple( - Gimpl::CovShiftBackward(U[nu], nu, - Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); + Gimpl::CovShiftBackward(Umu[nu], nu, + Gimpl::CovShiftBackward(Umu[mu], mu, Umu[nu])), mu); } } } diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 2a666a04..0f59f293 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -446,6 +446,7 @@ public: Communicate(); CommsMergeSHM(compress); CommsMerge(compress); + accelerator_barrier(); } template int HaloGatherDir(const Lattice &source,compressor &compress,int point,int & face_idx) @@ -518,7 +519,6 @@ public: } accelerator_barrier(); // All my local gathers are complete #ifdef NVLINK_GET - #warning "NVLINK_GET" _grid->StencilBarrier(); // He can now get mu local gather, I can get his // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Or issue barrier AFTER the DMA is running @@ -690,6 +690,7 @@ public: } } } + // std::cout << "BuildSurfaceList size is "< surface_list_host(surface_list_size); int32_t ss=0; @@ -709,7 +710,7 @@ public: } } acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int)); - std::cout << GridLogMessage<<"BuildSurfaceList size is "<_entries_host_p = &_entries[0]; this->_entries_p = &_entries_device[0]; - std::cout << GridLogMessage << " Stencil object allocated for "<_osites - <<" sites table "<_entries_p<< " GridPtr "<<_grid<_osites + // <<" sites table "<_entries_p<< " GridPtr "<<_grid<memcpy(to,from,bytes); } inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); } -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} +inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} +inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();} inline int acceleratorIsCommunicable(void *ptr) @@ -478,7 +492,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) inline void *acceleratorAllocHost(size_t bytes) { void *ptr=NULL; - auto err = hipMallocHost((void **)&ptr,bytes); + auto err = hipHostMalloc((void **)&ptr,bytes); if( err != hipSuccess ) { ptr = (void *) NULL; fprintf(stderr," hipMallocManaged failed for %ld %s \n",bytes,hipGetErrorString(err)); fflush(stderr); @@ -511,23 +525,35 @@ inline void *acceleratorAllocDevice(size_t bytes) inline void acceleratorFreeHost(void *ptr){ auto discard=hipFree(ptr);}; inline void acceleratorFreeShared(void *ptr){ auto discard=hipFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ auto discard=hipFree(ptr);}; -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { auto discard=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto discard=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);} +inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { auto discard=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);} +inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ auto discard=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);} inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto discard=hipMemset(base,value,bytes);} -inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch +typedef int acceleratorEvent_t; + +inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch { auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream); + return 0; } -inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { - auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyHostToDevice, stream); +inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { + acceleratorCopyToDevice(from,to,bytes); + return 0; } -inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { - auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToHost, stream); +inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { + acceleratorCopyFromDevice(from,to,bytes); + return 0; } inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize(copyStream); }; +inline void acceleratorEventWait(acceleratorEvent_t ev) +{ + // auto discard=hipStreamSynchronize(ev); +} +inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev) ; return 1;} + + #endif inline void acceleratorPin(void *ptr,unsigned long bytes) @@ -564,6 +590,8 @@ inline void acceleratorPin(void *ptr,unsigned long bytes) #undef GRID_SIMT +typedef int acceleratorEvent_t; + inline void acceleratorMem(void) { /* @@ -584,8 +612,13 @@ inline void acceleratorMem(void) accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); } -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ thread_bcopy(from,to,bytes);} -inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); } +inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyToDevice(from,to,bytes); return 0; } +inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyFromDevice(from,to,bytes); return 0; } +inline void acceleratorEventWait(acceleratorEvent_t ev){} +inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev); return 1;} +inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); return 0;} + inline void acceleratorCopySynchronise(void) {}; inline int acceleratorIsCommunicable(void *ptr){ return 1; } @@ -674,9 +707,9 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) acceleratorCopySynchronise(); } -template void acceleratorPut(T& dev,T&host) +template void acceleratorPut(T& dev,const T&host) { - acceleratorCopyToDevice(&host,&dev,sizeof(T)); + acceleratorCopyToDevice((void *)&host,&dev,sizeof(T)); } template T acceleratorGet(T& dev) { diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index 6887134d..cdb4fa62 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -73,9 +73,9 @@ Author: paboyle #define thread_critical DO_PRAGMA(omp critical) #ifdef GRID_OMP -inline void thread_bcopy(void *from, void *to,size_t bytes) +inline void thread_bcopy(const void *from, void *to,size_t bytes) { - uint64_t *ufrom = (uint64_t *)from; + const uint64_t *ufrom = (const uint64_t *)from; uint64_t *uto = (uint64_t *)to; assert(bytes%8==0); uint64_t words=bytes/8; @@ -84,7 +84,7 @@ inline void thread_bcopy(void *from, void *to,size_t bytes) }); } #else -inline void thread_bcopy(void *from, void *to,size_t bytes) +inline void thread_bcopy(const void *from, void *to,size_t bytes) { bcopy(from,to,bytes); } diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 1424667e..feb44645 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -509,7 +509,14 @@ void Grid_init(int *argc,char ***argv) Grid_default_latt, Grid_default_mpi); - + if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){ + std::cout << GridLogMessage <<" Enabling flight recorder " <=2*1024*1024*1024LL ){ - std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "< + +#if Nc == 3 #include #include +#endif using namespace Grid; int main(int argc, char **argv) { +#if Nc != 3 +#warning FTHMC2p1f will not work for Nc != 3 + std::cout << "This program will currently only work for Nc == 3." << std::endl; +#else std::cout << std::setprecision(12); Grid_init(&argc, &argv); @@ -220,7 +227,6 @@ int main(int argc, char **argv) TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); +#endif } // main - - diff --git a/HMC/FTHMC2p1f_3GeV.cc b/HMC/FTHMC2p1f_3GeV.cc index a8aa67f8..36d5caa3 100644 --- a/HMC/FTHMC2p1f_3GeV.cc +++ b/HMC/FTHMC2p1f_3GeV.cc @@ -24,14 +24,22 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + #include + +#if Nc == 3 #include #include +#endif using namespace Grid; int main(int argc, char **argv) { +#if Nc != 3 +#warning FTHMC2p1f_3GeV will not work for Nc != 3 + std::cout << "This program will currently only work for Nc == 3." << std::endl; +#else std::cout << std::setprecision(12); Grid_init(&argc, &argv); @@ -220,6 +228,7 @@ int main(int argc, char **argv) TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); +#endif } // main diff --git a/HMC/HMC2p1f_3GeV.cc b/HMC/HMC2p1f_3GeV.cc index 4bf088d7..199d4be8 100644 --- a/HMC/HMC2p1f_3GeV.cc +++ b/HMC/HMC2p1f_3GeV.cc @@ -25,13 +25,20 @@ directory *************************************************************************************/ /* END LEGAL */ #include + +#if Nc == 3 #include #include +#endif using namespace Grid; int main(int argc, char **argv) { +#if Nc != 3 +#warning HMC2p1f_3GeV will not work for Nc != 3 + std::cout << "This program will currently only work for Nc == 3." << std::endl; +#else std::cout << std::setprecision(12); Grid_init(&argc, &argv); @@ -220,6 +227,7 @@ int main(int argc, char **argv) TheHMC.Run(SmearingPolicy); // for smearing Grid_finalize(); +#endif } // main diff --git a/benchmarks/Benchmark_usqcd.cc b/benchmarks/Benchmark_usqcd.cc index da78b91e..1ca0a6ca 100644 --- a/benchmarks/Benchmark_usqcd.cc +++ b/benchmarks/Benchmark_usqcd.cc @@ -492,17 +492,18 @@ public: } FGrid->Barrier(); double t1=usecond(); - uint64_t ncall = 500; - - FGrid->Broadcast(0,&ncall,sizeof(ncall)); + uint64_t no = 50; + uint64_t ni = 100; // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); - for(uint64_t i=0;i t_time(no); + for(uint64_t i=0;iBarrier(); double t1=usecond(); - uint64_t ncall = 500; - FGrid->Broadcast(0,&ncall,sizeof(ncall)); + uint64_t no = 50; + uint64_t ni = 100; // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); - for(uint64_t i=0;i t_time(no); + for(uint64_t i=0;iBarrier(); - double t1=usecond(); - uint64_t ncall = 500; - - FGrid->Broadcast(0,&ncall,sizeof(ncall)); + uint64_t ni = 100; + uint64_t no = 50; // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); - for(uint64_t i=0;i t_time(no); + for(uint64_t i=0;iBarrier(); @@ -814,20 +818,21 @@ public: double mf_hi, mf_lo, mf_err; timestat.statistics(t_time); - mf_hi = flops/timestat.min; - mf_lo = flops/timestat.max; + mf_hi = flops/timestat.min*ni; + mf_lo = flops/timestat.max*ni; mf_err= flops/timestat.min * timestat.err/timestat.mean; - mflops = flops/timestat.mean; + mflops = flops/timestat.mean*ni; mflops_all.push_back(mflops); if ( mflops_best == 0 ) mflops_best = mflops; if ( mflops_worst== 0 ) mflops_worst= mflops; if ( mflops>mflops_best ) mflops_best = mflops; if ( mflops L_list({8,12,16,24}); + std::vector L_list({8,12,16,24,32}); int selm1=sel-1; std::vector clover; diff --git a/configure.ac b/configure.ac index e4b553bf..9664a675 100644 --- a/configure.ac +++ b/configure.ac @@ -151,7 +151,7 @@ AC_ARG_ENABLE([tracing], case ${ac_TRACING} in nvtx) AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX]) - LIBS="${LIBS} -lnvToolsExt64_1" + LIBS="${LIBS} -lnvToolsExt" ;; roctx) AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX]) diff --git a/examples/Example_Laplacian_smearing.cc b/examples/Example_Laplacian_smearing.cc index 9780e8a0..5c037461 100644 --- a/examples/Example_Laplacian_smearing.cc +++ b/examples/Example_Laplacian_smearing.cc @@ -93,10 +93,13 @@ int main(int argc, char ** argv) Real coeff = (width*width) / Real(4*Iterations); chi=kronecker; + // chi = (1-p^2/2N)^N kronecker for(int n = 0; n < Iterations; ++n) { Laplacian.M(chi,psi); chi = chi - coeff*psi; + RealD n2 = norm2(chi); + chi = chi * (1.0/std::sqrt(n2)); } std::cout << " Wuppertal smeared operator is chi = \n" << chi < log.shm0.ov.$vol -srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol - -srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol -srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol +srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol -Ls 16 done diff --git a/systems/Frontier/config-command b/systems/Frontier/config-command index a1c113e4..8a3fdcfd 100644 --- a/systems/Frontier/config-command +++ b/systems/Frontier/config-command @@ -3,20 +3,19 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` --with-lime=$CLIME \ --enable-unified=no \ --enable-shm=nvlink \ ---enable-tracing=timer \ +--enable-tracing=none \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --disable-gparity \ --disable-fermion-reps \ --enable-simd=GPU \ ---enable-accelerator-cshift \ --with-gmp=$OLCF_GMP_ROOT \ --with-fftw=$FFTW_DIR/.. \ --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ --disable-fermion-reps \ CXX=hipcc MPICXX=mpicxx \ -CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \ - LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 -lhipblas -lrocblas" +CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \ + LDFLAGS="-L/lib64 -L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lhipblas -lrocblas" diff --git a/systems/Frontier/sourceme.sh b/systems/Frontier/sourceme.sh index 09619e2a..71028ff7 100644 --- a/systems/Frontier/sourceme.sh +++ b/systems/Frontier/sourceme.sh @@ -1,12 +1,25 @@ + +echo spack . /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh -spack load c-lime -module load emacs -module load PrgEnv-gnu -module load rocm/6.0.0 -module load cray-mpich -module load gmp + +module load cce/15.0.1 +module load rocm/5.3.0 module load cray-fftw module load craype-accel-amd-gfx90a + +#Ugly hacks to get down level software working on current system +export LD_LIBRARY_PATH=/opt/cray/libfabric/1.20.1/lib64/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH +ln -s /opt/rocm-6.0.0/lib/libamdhip64.so.6 . + +#echo spack load c-lime +#spack load c-lime +#module load emacs +##module load PrgEnv-gnu +##module load cray-mpich +##module load cray-fftw +##module load craype-accel-amd-gfx90a +##export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH #Hack for lib -#export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH +##export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH diff --git a/systems/WorkArounds.txt b/systems/WorkArounds.txt new file mode 100644 index 00000000..7191b4ff --- /dev/null +++ b/systems/WorkArounds.txt @@ -0,0 +1,206 @@ +The purpose of this file is to collate all non-obvious known magic shell variables +and compiler flags required for either correctness or performance on various systems. + +A repository of work-arounds. + +Contents: +1. Interconnect + MPI +2. Compilation +3. Profiling + +************************ +* 1. INTERCONNECT + MPI +************************ + +-------------------------------------------------------------------- +MPI2-IO correctness: force OpenMPI to use the MPICH romio implementation for parallel I/O +-------------------------------------------------------------------- +export OMPI_MCA_io=romio321 + +-------------------------------------- +ROMIO fail with > 2GB per node read (32 bit issue) +-------------------------------------- + +Use later MPICH + +https://github.com/paboyle/Grid/issues/381 + +https://github.com/pmodels/mpich/commit/3a479ab0 + +-------------------------------------------------------------------- +Slingshot: Frontier and Perlmutter libfabric slow down +and physical memory fragmentation +-------------------------------------------------------------------- +export FI_MR_CACHE_MONITOR=disabled +or +export FI_MR_CACHE_MONITOR=kdreg2 + +-------------------------------------------------------------------- +Perlmutter +-------------------------------------------------------------------- + +export MPICH_RDMA_ENABLED_CUDA=1 +export MPICH_GPU_IPC_ENABLED=1 +export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 +export MPICH_GPU_NO_ASYNC_MEMCPY=0 + +-------------------------------------------------------------------- +Frontier/LumiG +-------------------------------------------------------------------- + +Hiding ROCR_VISIBLE_DEVICES triggers SDMA engines to be used for GPU-GPU + +cat << EOF > select_gpu +#!/bin/bash +export MPICH_GPU_SUPPORT_ENABLED=1 +export MPICH_SMP_SINGLE_COPY_MODE=XPMEM +export GPU_MAP=(0 1 2 3 7 6 5 4) +export NUMA_MAP=(3 3 1 1 2 2 0 0) +export GPU=\${GPU_MAP[\$SLURM_LOCALID]} +export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]} +export HIP_VISIBLE_DEVICES=\$GPU +unset ROCR_VISIBLE_DEVICES +echo RANK \$SLURM_LOCALID using GPU \$GPU +exec numactl -m \$NUMA -N \$NUMA \$* +EOF +chmod +x ./select_gpu + +srun ./select_gpu BINARY + + +-------------------------------------------------------------------- +Mellanox performance with A100 GPU (Tursa, Booster, Leonardo) +-------------------------------------------------------------------- +export OMPI_MCA_btl=^uct,openib +export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc +export UCX_RNDV_SCHEME=put_zcopy +export UCX_RNDV_THRESH=16384 +export UCX_IB_GPU_DIRECT_RDMA=yes + +-------------------------------------------------------------------- +Mellanox + A100 correctness (Tursa, Booster, Leonardo) +-------------------------------------------------------------------- +export UCX_MEMTYPE_CACHE=n + +-------------------------------------------------------------------- +MPICH/Aurora/PVC correctness and performance +-------------------------------------------------------------------- + +https://github.com/pmodels/mpich/issues/7302 + +--enable-cuda-aware-mpi=no +--enable-unified=no + +Grid's internal D-H-H-D pipeline mode, avoid device memory in MPI +Do not use SVM + +Ideally use MPICH with fix to issue 7302: + +https://github.com/pmodels/mpich/pull/7312 + +Ideally: +MPIR_CVAR_CH4_IPC_GPU_HANDLE_CACHE=generic + +Alternatives: +export MPIR_CVAR_NOLOCAL=1 +export MPIR_CVAR_CH4_IPC_GPU_P2P_THRESHOLD=1000000000 + +-------------------------------------------------------------------- +MPICH/Aurora/PVC correctness and performance +-------------------------------------------------------------------- + +Broken: +export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1 + +This gives good peformance without requiring +--enable-cuda-aware-mpi=no + +But is an open issue reported by James Osborn +https://github.com/pmodels/mpich/issues/7139 + +Possibly resolved but unclear if in the installed software yet. + +************************ +* 2. COMPILATION +************************ + +-------------------------------------------------------------------- +G++ compiler breakage / graveyard +-------------------------------------------------------------------- + +9.3.0, 10.3.1, +https://github.com/paboyle/Grid/issues/290 +https://github.com/paboyle/Grid/issues/264 + +Working (-) Broken (X): + +4.9.0 - +4.9.1 - +5.1.0 X +5.2.0 X +5.3.0 X +5.4.0 X +6.1.0 X +6.2.0 X +6.3.0 - +7.1.0 - +8.0.0 (HEAD) - + +https://github.com/paboyle/Grid/issues/100 + +-------------------------------------------------------------------- +AMD GPU nodes : +-------------------------------------------------------------------- + +multiple ROCM versions broken; use 5.3.0 +manifests itself as wrong results in fp32 + +https://github.com/paboyle/Grid/issues/464 + +-------------------------------------------------------------------- +Aurora/PVC +-------------------------------------------------------------------- + +SYCL ahead of time compilation (fixes rare runtime JIT errors and faster runtime, PB) +SYCL slow link and relocatable code issues (Christoph Lehner) +Opt large register file required for good performance in fp64 + + +export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file" +export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc" +export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -fPIC" + +-------------------------------------------------------------------- +Aurora/PVC useful extra options +-------------------------------------------------------------------- + +Host only sanitizer: +-Xarch_host -fsanitize=leak +-Xarch_host -fsanitize=address + +Deterministic MPI reduction: +export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0 +export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0 +export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling +unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE +unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE +unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE + + + +************************ +* 3. Visual profile tools +************************ + +-------------------------------------------------------------------- +Frontier/rocprof +-------------------------------------------------------------------- + +-------------------------------------------------------------------- +Aurora/unitrace +-------------------------------------------------------------------- + + +-------------------------------------------------------------------- +Tursa/nsight-sys +-------------------------------------------------------------------- diff --git a/systems/mac-arm/config-command-mpi b/systems/mac-arm/config-command-mpi index be4d23dc..34f4229b 100644 --- a/systems/mac-arm/config-command-mpi +++ b/systems/mac-arm/config-command-mpi @@ -1,2 +1,16 @@ -CXXFLAGS=-I/opt/local/include LDFLAGS=-L/opt/local/lib/ CXX=c++-13 MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug + + +CXX=mpicxx ../../configure \ + --enable-simd=GEN \ + --enable-comms=mpi-auto \ + --enable-Sp=yes \ + --enable-unified=yes \ + --prefix /Users/peterboyle/QCD/vtk/Grid/install \ + --with-lime=$CLIME \ + --with-hdf5=$HDF5 \ + --with-fftw=$FFTW \ + --with-openssl=$OPENSSL \ + --with-gmp=$GMP \ + --with-mpfr=$MPFR \ + --disable-debug diff --git a/systems/sdcc-genoa/bench.slurm b/systems/sdcc-genoa/bench.slurm new file mode 100644 index 00000000..2c7f6c32 --- /dev/null +++ b/systems/sdcc-genoa/bench.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --partition lqcd +#SBATCH --time=00:50:00 +#SBATCH -A lqcdtest +#SBATCH -q lqcd +#SBATCH --exclusive +#SBATCH --nodes=1 +#SBATCH -w genoahost001,genoahost003,genoahost050,genoahost054 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=64 +#SBATCH --qos lqcd + +source sourceme.sh + +export PLACES=(1:16:4 1:32:2 0:64:1); +export THR=(16 32 64) + +for t in 2 +do + +export OMP_NUM_THREADS=${THR[$t]} +export OMP_PLACES=${PLACES[$t]} +export thr=${THR[$t]} + +#for vol in 24.24.24.24 32.32.32.32 48.48.48.96 +for vol in 48.48.48.96 +do +srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid $vol --dslash-asm --shm 8192 > $vol.1node.thr$thr +done +#srun -N1 -n1 ./benchmarks/Benchmark_usqcd --mpi 1.1.1.1 --grid $vol > usqcd.1node.thr$thr +done + diff --git a/systems/sdcc-genoa/bench2.slurm b/systems/sdcc-genoa/bench2.slurm new file mode 100644 index 00000000..be21c816 --- /dev/null +++ b/systems/sdcc-genoa/bench2.slurm @@ -0,0 +1,36 @@ +#!/bin/bash +#SBATCH --partition lqcd +#SBATCH --time=00:50:00 +#SBATCH -A lqcdtest +#SBATCH -q lqcd +#SBATCH --exclusive +#SBATCH --nodes=2 +#SBATCH -w genoahost001,genoahost003,genoahost050,genoahost054 +#SBATCH --ntasks=2 +#SBATCH --cpus-per-task=64 +#SBATCH --qos lqcd + +source sourceme.sh + +export PLACES=(1:16:4 1:32:2 0:64:1); +export THR=(16 32 64) + +nodes=2 +mpi=1.1.1.2 + +for t in 2 +do + +export OMP_NUM_THREADS=${THR[$t]} +export OMP_PLACES=${PLACES[$t]} +export thr=${THR[$t]} + +#srun -N$nodes -n$nodes ./benchmarks/Benchmark_usqcd --mpi $mpi --grid 32.32.32.32 > usqcd.n$nodes.thr$thr + +for vol in 64.64.64.128 +do +srun -N$nodes -n$nodes ./benchmarks/Benchmark_dwf_fp32 --mpi $mpi --grid $vol --dslash-asm --comms-overlap --shm 8192 > $vol.n$nodes.overlap.thr$thr +done + +done + diff --git a/systems/sdcc-genoa/config-command b/systems/sdcc-genoa/config-command new file mode 100644 index 00000000..d992e1da --- /dev/null +++ b/systems/sdcc-genoa/config-command @@ -0,0 +1,16 @@ +../../configure \ +--enable-comms=mpi-auto \ +--enable-unified=yes \ +--enable-shm=shmopen \ +--enable-shm-fast-path=shmopen \ +--enable-accelerator=none \ +--enable-simd=AVX512 \ +--disable-accelerator-cshift \ +--disable-fermion-reps \ +--disable-gparity \ +CXX=clang++ \ +MPICXX=mpicxx \ +CXXFLAGS="-std=c++17" + + + diff --git a/systems/sdcc-genoa/sourceme.sh b/systems/sdcc-genoa/sourceme.sh new file mode 100644 index 00000000..4f37888c --- /dev/null +++ b/systems/sdcc-genoa/sourceme.sh @@ -0,0 +1,4 @@ +source $HOME/spack/share/spack/setup-env.sh +spack load llvm@17.0.4 +export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-17.0.4-laufdrcip63ivkadmtgoepwmj3dtztdu/lib:$LD_LIBRARY_PATH +module load openmpi diff --git a/systems/spack-linux/config-command b/systems/spack-linux/config-command new file mode 100644 index 00000000..06bcc205 --- /dev/null +++ b/systems/spack-linux/config-command @@ -0,0 +1,17 @@ +../../src/Grid/configure \ + --prefix /home/pab/NPR/install \ + --enable-comms=mpi-auto \ + --enable-simd=AVX2 \ + --enable-shm=none \ + --enable-debug \ + --with-lime=$CLIME \ + --with-hdf5=$HDF5 \ + --with-fftw=$FFTW \ + --with-gmp=$GMP \ + --with-mpfr=$MPFR \ + --disable-gparity \ + --disable-fermion-reps \ + CXX=clang++ \ + MPICXX=mpicxx \ + CXXFLAGS="-std=c++17 " + diff --git a/systems/spack-linux/sourceme.sh b/systems/spack-linux/sourceme.sh new file mode 100644 index 00000000..f4ff9c85 --- /dev/null +++ b/systems/spack-linux/sourceme.sh @@ -0,0 +1,28 @@ +source $HOME/spack/share/spack/setup-env.sh +spack load llvm@12 +spack load autoconf%clang@12.0.1 +spack load automake%clang@12.0.1 +spack load c-lime%clang@12.0.1 +spack load fftw%clang@12.0.1 +spack load gmp%clang@12.0.1 +spack load mpfr%clang@12.0.1 +spack load openmpi%clang@12.0.1 +spack load openssl%clang@12.0.1 +spack load hdf5+cxx%clang@12.0.1 +spack load cmake%clang@12.0.1 +export FFTW=`spack find --paths fftw%clang@12.0.1 | grep ^fftw | awk '{print $2}' ` +export HDF5=`spack find --paths hdf5+cxx%clang@12.0.1 | grep ^hdf5 | awk '{print $2}' ` +export CLIME=`spack find --paths c-lime%clang@12.0.1 | grep ^c-lime | awk '{print $2}' ` +export MPFR=`spack find --paths mpfr%clang@12.0.1 | grep ^mpfr | awk '{print $2}' ` +export LLVM=`spack find --paths llvm@12 | grep ^llvm | awk '{print $2}' ` +export OPENSSL=`spack find --paths openssl%clang@12.0.1 | grep openssl | awk '{print $2}' ` +export GMP=`spack find --paths gmp%clang@12.0.1 | grep ^gmp | awk '{print $2}' ` +export TCLAP=`spack find --paths tclap%clang@12.0.1 | grep ^tclap | awk '{print $2}' ` +export LD_LIBRARY_PATH=${TCLAP}/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$MPFR/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$GMP/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$FFTW/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$LLVM/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$LLVM/lib/x86_64-unknown-linux-gnu/:$LD_LIBRARY_PATH + +ulimit -s 81920 diff --git a/systems/spack-linux/spack-install b/systems/spack-linux/spack-install new file mode 100644 index 00000000..eaf73a49 --- /dev/null +++ b/systems/spack-linux/spack-install @@ -0,0 +1,19 @@ +cd +git clone https://github.com/spack/spack.git +source $HOME/spack/share/spack/setup-env.sh + +spack install llvm@12 + +spack install autoconf%clang@12.0.1 +spack install automake%clang@12.0.1 +spack install c-lime%clang@12.0.1 +spack install fftw%clang@12.0.1 +spack install gmp%clang@12.0.1 +spack install mpfr%clang@12.0.1 +spack install openmpi%clang@12.0.1 +spack install openssl%clang@12.0.1 +spack install hdf5+cxx%clang@12.0.1 +spack install cmake%clang@12.0.1 +spack install tclap%clang@12.0.1 +spack install emacs%clang@12.0.1 + diff --git a/tests/Test_dwf_dslash_repro.cc b/tests/Test_dwf_dslash_repro.cc index 1bf813d9..b0eac64b 100644 --- a/tests/Test_dwf_dslash_repro.cc +++ b/tests/Test_dwf_dslash_repro.cc @@ -62,7 +62,7 @@ int VerifyOnDevice(const FermionField &res, FermionField &ref) if (((random()&0xF)==0)&&injection) { uint64_t sF = random()%(NN); int lane=0; - printf("Error injection site %ld on rank %d\n",sF,res.Grid()->ThisRank()); + printf("Error injection site %ld on rank %d\n",(long)sF,res.Grid()->ThisRank()); auto vv = acceleratorGet(res_v[sF]); double *dd = (double *)&vv; *dd=M_PI; diff --git a/tests/debug/Test_general_coarse_hdcg.cc b/tests/debug/Test_general_coarse_hdcg.cc index 24918955..0d6b0a64 100644 --- a/tests/debug/Test_general_coarse_hdcg.cc +++ b/tests/debug/Test_general_coarse_hdcg.cc @@ -195,8 +195,8 @@ int main (int argc, char ** argv) int Nk=nrhs; int Nm=Nk*3; - int Nk=36; - int Nm=144; + // int Nk=36; + // int Nm=144; int Nstop=Nk; int Nconv_test_interval=1; diff --git a/tests/debug/Test_general_coarse_pvdagm.cc b/tests/debug/Test_general_coarse_pvdagm.cc index d382ea64..096574d0 100644 --- a/tests/debug/Test_general_coarse_pvdagm.cc +++ b/tests/debug/Test_general_coarse_pvdagm.cc @@ -47,20 +47,20 @@ public: void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); } void OpDirAll (const Field &in, std::vector &out){ assert(0); }; void Op (const Field &in, Field &out){ - std::cout << "Op: PVdag M "< &out){ assert(0); }; void Op (const Field &in, Field &out){ - std::cout << "Op: PVdag M "< = %1.15e\n", Meofa.S(Umu)); } - +#endif return 0; } diff --git a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc index fd3d96f8..b6481731 100644 --- a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc +++ b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc @@ -56,6 +56,7 @@ const RealD M5 = 1.8; int main(int argc, char** argv) { +#ifdef ENABLE_GPARITY Grid_init(&argc, &argv); int threads = GridThread::GetThreads(); @@ -106,6 +107,6 @@ int main(int argc, char** argv) Meofa.refresh(Umu, sRNG, RNG5); printf(" = %1.15e\n", Meofa.S(Umu)); } - +#endif return 0; } diff --git a/tests/debug/Test_padded_cell.cc b/tests/debug/Test_padded_cell.cc index f110df46..593b3542 100644 --- a/tests/debug/Test_padded_cell.cc +++ b/tests/debug/Test_padded_cell.cc @@ -33,6 +33,7 @@ using namespace std; using namespace Grid; // This is to optimize the SIMD +/* template void gpermute(vobj & inout,int perm){ vobj tmp=inout; if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} @@ -40,7 +41,7 @@ template void gpermute(vobj & inout,int perm){ if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} } - +*/ int main (int argc, char ** argv) { diff --git a/tests/debug/Test_padded_cell_staple.cc b/tests/debug/Test_padded_cell_staple.cc index 6d7bc8c9..326f8810 100644 --- a/tests/debug/Test_padded_cell_staple.cc +++ b/tests/debug/Test_padded_cell_staple.cc @@ -153,7 +153,7 @@ public: t=usecond(); { autoView( gStaple_v , gStaple, AcceleratorWrite); - auto gStencil_v = gStencil.View(); + auto gStencil_v = gStencil.View(AcceleratorRead); autoView( Ug_mu_v , Ug_mu, AcceleratorRead); autoView( Ug_nu_v , Ug_nu, AcceleratorRead); @@ -389,7 +389,7 @@ public: GeneralLocalStencil gStencil(ggrid,shifts); { autoView( gStaple_v , gStaple, AcceleratorWrite); - auto gStencil_v = gStencil.View(); + auto gStencil_v = gStencil.View(AcceleratorRead); typedef LatticeView GaugeViewType; size_t vsize = Nd*sizeof(GaugeViewType); diff --git a/tests/debug/Test_reweight_dwf_eofa_gparity.cc b/tests/debug/Test_reweight_dwf_eofa_gparity.cc index 70ae94aa..08d80971 100644 --- a/tests/debug/Test_reweight_dwf_eofa_gparity.cc +++ b/tests/debug/Test_reweight_dwf_eofa_gparity.cc @@ -83,6 +83,7 @@ std::vector jack_stats(const std::vector& data) int main(int argc, char **argv) { +#ifdef ENABLE_GPARITY Grid_init(&argc, &argv); // Initialize spacetime grid @@ -206,4 +207,5 @@ int main(int argc, char **argv) std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; Grid_finalize(); +#endif } diff --git a/tests/debug/Test_reweight_mobius_eofa_gparity.cc b/tests/debug/Test_reweight_mobius_eofa_gparity.cc index e2a4fb47..cf6787b7 100644 --- a/tests/debug/Test_reweight_mobius_eofa_gparity.cc +++ b/tests/debug/Test_reweight_mobius_eofa_gparity.cc @@ -85,6 +85,7 @@ std::vector jack_stats(const std::vector& data) int main(int argc, char **argv) { +#ifdef ENABLE_GPARITY Grid_init(&argc, &argv); // Initialize spacetime grid @@ -215,4 +216,5 @@ int main(int argc, char **argv) std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; Grid_finalize(); +#endif } diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index 72d30369..db61813e 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -35,6 +35,7 @@ using namespace Grid; int main (int argc, char ** argv) { +#ifdef ENABLE_GPARITY Grid_init(&argc,&argv); Coordinate latt_size = GridDefaultLatt(); @@ -244,4 +245,5 @@ int main (int argc, char ** argv) std::cout<< GridLogMessage << "Done" < +#ifdef ENABLE_GPARITY + using namespace std; using namespace Grid; - ; typedef GparityWilsonImplD FermionImplPolicyD; typedef GparityMobiusEOFAFermionD FermionActionD; @@ -231,3 +232,7 @@ int main (int argc, char** argv) std::cout << GridLogMessage << "Done" << std::endl; Grid_finalize(); } +#else +int main(int argc,char ** argv) { return 0;}; + +#endif diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index dd71b565..c0b7117a 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -31,14 +31,14 @@ See the full license in the file "LICENSE" in the top level distribution directo using namespace std; using namespace Grid; - ; + typedef GparityWilsonImplR FermionImplPolicy; typedef GparityMobiusEOFAFermionD FermionAction; typedef typename FermionAction::FermionField FermionField; - int main (int argc, char** argv) { +#ifdef ENABLE_GPARITY Grid_init(&argc, &argv); Coordinate latt_size = GridDefaultLatt(); @@ -171,4 +171,5 @@ int main (int argc, char** argv) std::cout << GridLogMessage << "Done" << std::endl; Grid_finalize(); +#endif } diff --git a/tests/hmc/Test_action_dwf_gparity2fvs1f.cc b/tests/hmc/Test_action_dwf_gparity2fvs1f.cc index 46f87d93..1c9d01fa 100644 --- a/tests/hmc/Test_action_dwf_gparity2fvs1f.cc +++ b/tests/hmc/Test_action_dwf_gparity2fvs1f.cc @@ -30,7 +30,7 @@ using namespace Grid; - +#ifdef ENABLE_GPARITY template void copy2fTo1fFermionField(FermionField1f &out, const FermionField2f &in, int gpdir){ @@ -255,3 +255,6 @@ int main(int argc, char **argv) { } // main +#else +int main(int argc, char **argv){}; +#endif diff --git a/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc b/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc index f98d0edc..9f5a7c9e 100644 --- a/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc +++ b/tests/hmc/Test_hmc_EODWFRatio_Gparity.cc @@ -30,6 +30,7 @@ Author: paboyle int main(int argc, char **argv) { +#ifdef ENABLE_GPARITY using namespace Grid; ; @@ -139,7 +140,7 @@ int main(int argc, char **argv) { Grid_finalize(); - +#endif } // main diff --git a/tests/hmc/Test_hmc_EOMobiusRatio.cc b/tests/hmc/Test_hmc_EOMobiusRatio.cc index 0e0a6611..ca3774cd 100644 --- a/tests/hmc/Test_hmc_EOMobiusRatio.cc +++ b/tests/hmc/Test_hmc_EOMobiusRatio.cc @@ -55,13 +55,13 @@ namespace Grid{ }; - struct SmearingParameters: Serializable { - GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, + struct HmcSmearingParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(HmcSmearingParameters, double, rho, Integer, Nsmear) template - SmearingParameters(Reader& Reader){ + HmcSmearingParameters(Reader& Reader){ read(Reader, "StoutSmearing", *this); } @@ -213,7 +213,7 @@ int main(int argc, char **argv) { // Reset performance counters if (ApplySmearing){ - SmearingParameters SmPar(Reader); + HmcSmearingParameters SmPar(Reader); //double rho = 0.1; // smearing parameter //int Nsmear = 3; // number of smearing levels Smear_Stout Stout(SmPar.rho); diff --git a/tests/lanczos/LanParams.xml b/tests/lanczos/LanParams.xml new file mode 100644 index 00000000..5328b46b --- /dev/null +++ b/tests/lanczos/LanParams.xml @@ -0,0 +1,14 @@ + + + + 0.00107 + 1.8 + 48 + 10 + 15 + 85 + 0.003 + 60 + 201 + + diff --git a/tests/lanczos/Test_compressed_lanczos_gparity.cc b/tests/lanczos/Test_compressed_lanczos_gparity.cc index ca353d61..d5e09c0b 100644 --- a/tests/lanczos/Test_compressed_lanczos_gparity.cc +++ b/tests/lanczos/Test_compressed_lanczos_gparity.cc @@ -35,6 +35,8 @@ Author: Peter Boyle #include #include +#ifdef ENABLE_GPARITY + using namespace std; using namespace Grid; @@ -378,7 +380,8 @@ void runTest(const Options &opt){ //Note: because we rely upon physical properties we must use a "real" gauge configuration -int main (int argc, char ** argv) { +int main (int argc, char ** argv) +{ Grid_init(&argc,&argv); GridLogIRL.TimingMode(1); @@ -482,4 +485,8 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else +int main(int argc, char **argv){}; + +#endif diff --git a/tests/lanczos/Test_dwf_G5R5.cc b/tests/lanczos/Test_dwf_G5R5.cc new file mode 100644 index 00000000..0fd1bc35 --- /dev/null +++ b/tests/lanczos/Test_dwf_G5R5.cc @@ -0,0 +1,346 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_dwf_G5R5.cc + +Copyright (C) 2015 + +Author: Chulwoo Jung +From Duo and Bob's Chirality study + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +//typedef WilsonFermionD FermionOp; +typedef DomainWallFermionD FermionOp; +typedef typename DomainWallFermionD::FermionField FermionField; + + +RealD AllZero(RealD x) { return 0.; } + +namespace Grid { + +struct LanczosParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters, + RealD, mass , + RealD, M5 , + Integer, Ls, + Integer, Nstop, + Integer, Nk, + Integer, Np, + RealD, ChebyLow, + RealD, ChebyHigh, + Integer, ChebyOrder) +// Integer, StartTrajectory, +// Integer, Trajectories, /* @brief Number of sweeps in this run */ +// bool, MetropolisTest, +// Integer, NoMetropolisUntil, +// std::string, StartingType, +// Integer, SW, +// RealD, Kappa, +// IntegratorParameters, MD) + + LanczosParameters() { + ////////////////////////////// Default values + mass = 0; +// MetropolisTest = true; +// NoMetropolisUntil = 10; +// StartTrajectory = 0; +// SW = 2; +// Trajectories = 10; +// StartingType = "HotStart"; + ///////////////////////////////// + } + + template + LanczosParameters(Reader & TheReader){ + initialize(TheReader); + } + + template < class ReaderClass > + void initialize(Reader &TheReader){ +// std::cout << GridLogMessage << "Reading HMC\n"; + read(TheReader, "HMC", *this); + } + + + void print_parameters() const { +// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n"; +// MD.print_parameters(); + } + +}; + +} + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + LanczosParameters LanParams; +#if 1 + { + XmlReader HMCrd("LanParams.xml"); + read(HMCrd,"LanczosParameters",LanParams); + } +#else + { + LanParams.mass = mass; + } +#endif + std::cout << GridLogMessage<< LanParams < seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + + FieldMetaData header; + std::string file("./config"); + + int precision32 = 0; + int tworow = 0; + NerscIO::readConfiguration(Umu,header,file); + +/* + std::vector U(4, UGrid); + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } +*/ + + int Nstop = 10; + int Nk = 20; + int Np = 80; + Nstop=LanParams.Nstop; + Nk=LanParams.Nk; + Np=LanParams.Np; + + int Nm = Nk + Np; + int MaxIt = 10000; + RealD resid = 1.0e-5; + + +//while ( mass > - 5.0){ + FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + MdagMLinearOperator HermOp(Ddwf); /// <----- +// Gamma5HermitianLinearOperator HermOp2(WilsonOperator); /// <----- + Gamma5R5HermitianLinearOperator G5R5Herm(Ddwf); +// Gamma5R5HermitianLinearOperator + std::vector Coeffs{0, 1.}; + Polynomial PolyX(Coeffs); + + Chebyshev Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder); + + FunctionHermOp OpCheby(Cheby,HermOp); + PlainHermOp Op (HermOp); + PlainHermOp Op2 (G5R5Herm); + + ImplicitlyRestartedLanczos IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt); + + std::vector eval(Nm); + FermionField src(FGrid); + gaussian(RNG5, src); + std::vector evec(Nm, FGrid); + for (int i = 0; i < 1; i++) { + std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid() + << std::endl; + }; + + int Nconv; + IRL.calc(eval, evec, src, Nconv); + + std::cout << mass <<" : " << eval << std::endl; + +#if 0 + Gamma g5(Gamma::Algebra::Gamma5) ; + ComplexD dot; + FermionField tmp(FGrid); +// RealD eMe,eMMe; + for (int i = 0; i < Nstop ; i++) { +// tmp = g5*evec[i]; + dot = innerProduct(evec[i],evec[i]); +// G5R5(tmp,evec[i]); + G5R5Herm.HermOpAndNorm(evec[i],tmp,eMe,eMMe); + std::cout <<"Norm "< G5R5Mevec(Nconv, FGrid); + vector finalevec(Nconv, FGrid); + vector eMe(Nconv), eMMe(Nconv); + for(int i = 0; i < Nconv; i++){ + G5R5Herm.HermOpAndNorm(evec[i], G5R5Mevec[i], eMe[i], eMMe[i]); + } + cout << "Re: " << endl; + cout << eMe << endl; + cout << "" << endl; + cout << eMMe << endl; + vector> VevecG5R5Mevec(Nconv); + Eigen::MatrixXcd evecG5R5Mevec = Eigen::MatrixXcd::Zero(Nconv, Nconv); + for(int i = 0; i < Nconv; i++){ + VevecG5R5Mevec[i].resize(Nconv); + for(int j = 0; j < Nconv; j++){ + VevecG5R5Mevec[i][j] = innerProduct(evec[i], G5R5Mevec[j]); + evecG5R5Mevec(i, j) = VevecG5R5Mevec[i][j]; + } + } + //calculate eigenvector + cout << "Eigen solver" << endl; + Eigen::SelfAdjointEigenSolver eigensolver(evecG5R5Mevec); + vector eigeneval(Nconv); + vector> eigenevec(Nconv); + for(int i = 0; i < Nconv; i++){ + eigeneval[i] = eigensolver.eigenvalues()[i]; + eigenevec[i].resize(Nconv); + for(int j = 0; j < Nconv; j++){ + eigenevec[i][j] = eigensolver.eigenvectors()(i, j); + } + } + //rotation + cout << "Do rotation" << endl; + for(int i = 0; i < Nconv; i++){ + finalevec[i] = finalevec[i] - finalevec[i]; + for(int j = 0; j < Nconv; j++){ + finalevec[i] = eigenevec[j][i]*evec[j] + finalevec[i]; + } + } + //normalize again; + for(int i = 0; i < Nconv; i++){ + RealD tmp_RealD = norm2(finalevec[i]); + tmp_RealD = 1./pow(tmp_RealD, 0.5); + finalevec[i] = finalevec[i]*tmp_RealD; + } + + //check + for(int i = 0; i < Nconv; i++){ + G5R5Herm.HermOpAndNorm(finalevec[i], G5R5Mevec[i], eMe[i], eMMe[i]); + } + + //********************************************************************** + //sort the eigenvectors + vector finalevec_copy(Nconv, FGrid); + for(int i = 0; i < Nconv; i++){ + finalevec_copy[i] = finalevec[i]; + } + vector eMe_copy(eMe); + for(int i = 0; i < Nconv; i++){ + eMe[i] = fabs(eMe[i]); + eMe_copy[i] = eMe[i]; + } + sort(eMe_copy.begin(), eMe_copy.end()); + for(int i = 0; i < Nconv; i++){ + for(int j = 0; j < Nconv; j++){ + if(eMe[j] == eMe_copy[i]){ + finalevec[i] = finalevec_copy[j]; + } + } + } + for(int i = 0; i < Nconv; i++){ + G5R5Herm.HermOpAndNorm(finalevec[i], G5R5Mevec[i], eMe[i], eMMe[i]); + } + cout << "Re: " << endl; + cout << eMe << endl; + cout << "" << endl; + cout << eMMe << endl; + + +// vector finalevec(Nconv, FGrid); +// temporary, until doing rotation +// for(int i = 0; i < Nconv; i++) +// finalevec[i]=evec[i]; + //********************************************************************** + //calculate chirality matrix + vector G5evec(Nconv, FGrid); + vector> chiral_matrix(Nconv); + vector> chiral_matrix_real(Nconv); + for(int i = 0; i < Nconv; i++){ +// G5evec[i] = G5evec[i] - G5evec[i]; + G5evec[i] = Zero(); + for(int j = 0; j < Ls/2; j++){ + axpby_ssp(G5evec[i], 1., finalevec[i], 0., G5evec[i], j, j); + } + for(int j = Ls/2; j < Ls; j++){ + axpby_ssp(G5evec[i], -1., finalevec[i], 0., G5evec[i], j, j); + } + } + for(int i = 0; i < Nconv; i++){ + chiral_matrix_real[i].resize(Nconv); + chiral_matrix[i].resize(Nconv); + for(int j = 0; j < Nconv; j++){ + chiral_matrix[i][j] = innerProduct(finalevec[i], G5evec[j]); + chiral_matrix_real[i][j] = abs(chiral_matrix[i][j]); + std::cout <<" chiral_matrix_real "< using namespace std; using namespace Grid; - ; template struct Setup{}; +#ifdef ENABLE_GPARITY template<> struct Setup{ static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu, @@ -47,16 +47,24 @@ struct Setup{ return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); } }; +#endif template<> struct Setup{ static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.00054; + RealD M5=1.8; + return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + } +}; +template<> struct Setup{ static DomainWallFermionD* getAction(LatticeGaugeField &Umu, GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ RealD mass=0.00054; RealD M5=1.8; - return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + return new DomainWallFermionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); } }; @@ -168,7 +176,9 @@ int main (int argc, char ** argv) } if(action == "GparityMobius"){ +#ifdef ENABLE_GPARITY run(); +#endif }else if(action == "DWF"){ run(); }else if(action == "Mobius"){ diff --git a/tests/lanczos/Test_evec_compression.cc b/tests/lanczos/Test_evec_compression.cc index 1636ea3a..5ba1597c 100644 --- a/tests/lanczos/Test_evec_compression.cc +++ b/tests/lanczos/Test_evec_compression.cc @@ -555,6 +555,7 @@ int main (int argc, char ** argv) { double c = (args.mobius_scale - bmc)/2.; // c = 1/2 [ (b+c) - (b-c) ] if(is_gparity){ +#ifdef ENABLE_GPARITY GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs); readConfiguration(Umu, config, args.is_cps_cfg); //Read the gauge field @@ -564,7 +565,10 @@ int main (int argc, char ** argv) { }else if(action_s == "Mobius"){ GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params); run(action, config, args); - } + } +#else + assert(0); +#endif }else{ WilsonImplD::ImplParams Params = setupParams(); readConfiguration(Umu, config, args.is_cps_cfg); //Read the gauge field diff --git a/tests/lanczos/Test_wilson_DWFKernel.cc b/tests/lanczos/Test_wilson_DWFKernel.cc new file mode 100644 index 00000000..30cbd1b3 --- /dev/null +++ b/tests/lanczos/Test_wilson_DWFKernel.cc @@ -0,0 +1,278 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_dwf_lanczos.cc + +Copyright (C) 2015 + +Author: Chulwoo Jung + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +typedef WilsonFermionD FermionOp; +typedef typename WilsonFermionD::FermionField FermionField; + + +RealD AllZero(RealD x) { return 0.; } + +namespace Grid { + +#if 0 +template +class RationalHermOp : public LinearFunction { +public: + using LinearFunction::operator(); +// OperatorFunction & _poly; + LinearOperatorBase &_Linop; + RealD _massDen, _massNum; + + FunctionHermOp(LinearOperatorBase& linop, RealD massDen,RealD massNum) + : _Linop(linop) ,_massDen(massDen),_massNum(massNum) {}; + + void operator()(const Field& in, Field& out) { +// _poly(_Linop,in,out); + } +}; +#endif + +template +class InvG5LinearOperator : public LinearOperatorBase { + Matrix &_Mat; + RealD _num; + RealD _Tol; + Integer _MaxIt; + Gamma g5; + +public: + InvG5LinearOperator(Matrix &Mat,RealD num): _Mat(Mat),_num(num), _Tol(1e-12),_MaxIt(10000), g5(Gamma::Algebra::Gamma5) {}; + + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + assert(0); + _Mat.Mdiag(in,out); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + _Mat.Mdir(in,out,dir,disp); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + _Mat.MdirAll(in,out); + }; + void Op (const Field &in, Field &out){ + assert(0); + _Mat.M(in,out); + } + void AdjOp (const Field &in, Field &out){ + assert(0); + _Mat.Mdag(in,out); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + HermOp(in,out); + ComplexD dot = innerProduct(in,out); + n1=real(dot); + n2=norm2(out); + } + void HermOp(const Field &in, Field &out){ + Field tmp(in.Grid()); + MdagMLinearOperator denom(_Mat); + ConjugateGradient CG(_Tol,_MaxIt); + _Mat.M(in,tmp); + tmp += _num*in; + _Mat.Mdag(tmp,out); + CG(denom,out,tmp); + out = g5*tmp; + } +}; + + +struct LanczosParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters, + RealD, mass , + RealD, resid, + RealD, ChebyLow, + RealD, ChebyHigh, + Integer, ChebyOrder) +// Integer, StartTrajectory, +// Integer, Trajectories, /* @brief Number of sweeps in this run */ +// bool, MetropolisTest, +// Integer, NoMetropolisUntil, +// std::string, StartingType, +// Integer, SW, +// RealD, Kappa, +// IntegratorParameters, MD) + + LanczosParameters() { + ////////////////////////////// Default values + mass = 0; +// MetropolisTest = true; +// NoMetropolisUntil = 10; +// StartTrajectory = 0; +// SW = 2; +// Trajectories = 10; +// StartingType = "HotStart"; + ///////////////////////////////// + } + + template + LanczosParameters(Reader & TheReader){ + initialize(TheReader); + } + + template < class ReaderClass > + void initialize(Reader &TheReader){ +// std::cout << GridLogMessage << "Reading HMC\n"; + read(TheReader, "HMC", *this); + } + + + void print_parameters() const { +// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n"; +// MD.print_parameters(); + } + +}; + +} + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = + SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = UGrid; + GridRedBlackCartesian* FrbGrid = UrbGrid; +// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid); + + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGrid); + RNG5.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); +// SU::HotConfiguration(RNG4, Umu); + + FieldMetaData header; + std::string file("./config"); + + int precision32 = 0; + int tworow = 0; +// NerscIO::writeConfiguration(Umu,file,tworow,precision32); + NerscIO::readConfiguration(Umu,header,file); + +/* + std::vector U(4, UGrid); + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } +*/ + + int Nstop = 5; + int Nk = 10; + int Np = 90; + int Nm = Nk + Np; + int MaxIt = 10000; + RealD resid = 1.0e-5; + + RealD mass = -1.0; + + LanczosParameters LanParams; +#if 1 + { + XmlReader HMCrd("LanParams.xml"); + read(HMCrd,"LanczosParameters",LanParams); + } +#else + { + LanParams.mass = mass; + } +#endif + std::cout << GridLogMessage<< LanParams < - 5.0){ + FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,2.+mass); + InvG5LinearOperator HermOp(WilsonOperator,-2.); /// <----- + //SchurDiagTwoOperator HermOp(WilsonOperator); +// Gamma5HermitianLinearOperator HermOp2(WilsonOperator); /// <----- + + std::vector Coeffs{0, 0, 1.}; + Polynomial PolyX(Coeffs); + Chebyshev Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder); + + FunctionHermOp OpCheby(Cheby,HermOp); +// InvHermOp Op(WilsonOperator,HermOp); + PlainHermOp Op (HermOp); +// PlainHermOp Op2 (HermOp2); + + ImplicitlyRestartedLanczos IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt); + + std::vector eval(Nm); + FermionField src(FGrid); + gaussian(RNG5, src); + std::vector evec(Nm, FGrid); + for (int i = 0; i < 1; i++) { + std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid() + << std::endl; + }; + + int Nconv; + IRL.calc(eval, evec, src, Nconv); + + std::cout << mass <<" : " << eval << std::endl; + + Gamma g5(Gamma::Algebra::Gamma5) ; + ComplexD dot; + FermionField tmp(FGrid); + for (int i = 0; i < Nstop ; i++) { + tmp = g5*evec[i]; + dot = innerProduct(tmp,evec[i]); + std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ; + } + src = evec[0]+evec[1]+evec[2]; + mass += -0.1; +} + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_wilson_specflow.cc b/tests/lanczos/Test_wilson_specflow.cc new file mode 100644 index 00000000..e9bd04df --- /dev/null +++ b/tests/lanczos/Test_wilson_specflow.cc @@ -0,0 +1,211 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_dwf_lanczos.cc + +Copyright (C) 2015 + +Author: Chulwoo Jung + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +typedef WilsonFermionD FermionOp; +typedef typename WilsonFermionD::FermionField FermionField; + + +RealD AllZero(RealD x) { return 0.; } + +namespace Grid { + +struct LanczosParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters, + RealD, mass , + RealD, ChebyLow, + RealD, ChebyHigh, + Integer, ChebyOrder) +// Integer, StartTrajectory, +// Integer, Trajectories, /* @brief Number of sweeps in this run */ +// bool, MetropolisTest, +// Integer, NoMetropolisUntil, +// std::string, StartingType, +// Integer, SW, +// RealD, Kappa, +// IntegratorParameters, MD) + + LanczosParameters() { + ////////////////////////////// Default values + mass = 0; +// MetropolisTest = true; +// NoMetropolisUntil = 10; +// StartTrajectory = 0; +// SW = 2; +// Trajectories = 10; +// StartingType = "HotStart"; + ///////////////////////////////// + } + + template + LanczosParameters(Reader & TheReader){ + initialize(TheReader); + } + + template < class ReaderClass > + void initialize(Reader &TheReader){ +// std::cout << GridLogMessage << "Reading HMC\n"; + read(TheReader, "HMC", *this); + } + + + void print_parameters() const { +// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n"; +// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n"; +// MD.print_parameters(); + } + +}; + +} + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = + SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = UGrid; + GridRedBlackCartesian* FrbGrid = UrbGrid; +// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid); + + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGrid); + RNG5.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); +// SU::HotConfiguration(RNG4, Umu); + + FieldMetaData header; + std::string file("./config"); + + int precision32 = 0; + int tworow = 0; +// NerscIO::writeConfiguration(Umu,file,tworow,precision32); + NerscIO::readConfiguration(Umu,header,file); + +/* + std::vector U(4, UGrid); + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } +*/ + + int Nstop = 10; + int Nk = 20; + int Np = 80; + int Nm = Nk + Np; + int MaxIt = 10000; + RealD resid = 1.0e-5; + + RealD mass = -1.0; + + LanczosParameters LanParams; +#if 1 + { + XmlReader HMCrd("LanParams.xml"); + read(HMCrd,"LanczosParameters",LanParams); + } +#else + { + LanParams.mass = mass; + } +#endif + std::cout << GridLogMessage<< LanParams < - 5.0){ + FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass); + MdagMLinearOperator HermOp(WilsonOperator); /// <----- + //SchurDiagTwoOperator HermOp(WilsonOperator); + Gamma5HermitianLinearOperator HermOp2(WilsonOperator); /// <----- + + std::vector Coeffs{0, 1.}; + Polynomial PolyX(Coeffs); +// Chebyshev Cheby(0.5, 60., 31); +// RealD, ChebyLow, +// RealD, ChebyHigh, +// Integer, ChebyOrder) + + Chebyshev Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder); + + FunctionHermOp OpCheby(Cheby,HermOp); + PlainHermOp Op (HermOp); + PlainHermOp Op2 (HermOp2); + + ImplicitlyRestartedLanczos IRL(OpCheby, Op2, Nstop, Nk, Nm, resid, MaxIt); + + std::vector eval(Nm); + FermionField src(FGrid); + gaussian(RNG5, src); + std::vector evec(Nm, FGrid); + for (int i = 0; i < 1; i++) { + std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid() + << std::endl; + }; + + int Nconv; + IRL.calc(eval, evec, src, Nconv); + + std::cout << mass <<" : " << eval << std::endl; + + Gamma g5(Gamma::Algebra::Gamma5) ; + ComplexD dot; + FermionField tmp(FGrid); + for (int i = 0; i < Nstop ; i++) { + tmp = g5*evec[i]; + dot = innerProduct(tmp,evec[i]); + std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ; + } + src = evec[0]+evec[1]+evec[2]; + mass += -0.1; +} + + Grid_finalize(); +} diff --git a/tests/smearing/Test_WilsonFlow.cc b/tests/smearing/Test_WilsonFlow.cc index e0726f87..4acd3b4f 100644 --- a/tests/smearing/Test_WilsonFlow.cc +++ b/tests/smearing/Test_WilsonFlow.cc @@ -33,8 +33,7 @@ namespace Grid{ GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters, int, steps, double, step_size, - int, meas_interval, - double, maxTau); // for the adaptive algorithm + int, meas_interval); template @@ -86,7 +85,7 @@ int main(int argc, char **argv) { WFParameters WFPar(Reader); ConfParameters CPar(Reader); CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix); - BinaryHmcCheckpointer CPBin(CPPar); + NerscHmcCheckpointer CPBin(CPPar); for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){ @@ -96,19 +95,13 @@ int main(int argc, char **argv) { std::cout << GridLogMessage << "Initial plaquette: " << WilsonLoops::avgPlaquette(Umu) << std::endl; - int t=WFPar.maxTau; - WilsonFlowAdaptive WF(WFPar.step_size, WFPar.maxTau, - 1.0e-4, + WilsonFlow WF(WFPar.step_size, WFPar.steps, WFPar.meas_interval); WF.smear(Uflow, Umu); RealD WFlow_plaq = WilsonLoops::avgPlaquette(Uflow); - RealD WFlow_TC = WilsonLoops::TopologicalCharge(Uflow); - RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow); std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl; - std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl; - std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl; std::cout<< GridLogMessage << " Admissibility check:\n"; const double sp_adm = 0.067; // admissible threshold diff --git a/tests/solver/Test_dwf_cg_prec.cc b/tests/solver/Test_dwf_cg_prec.cc index af3f4cf0..efbd7fc6 100644 --- a/tests/solver/Test_dwf_cg_prec.cc +++ b/tests/solver/Test_dwf_cg_prec.cc @@ -1,4 +1,4 @@ -************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid diff --git a/tests/solver/Test_dwf_multishift_mixedprec.cc b/tests/solver/Test_dwf_multishift_mixedprec.cc index bdede459..63ffe1c6 100644 --- a/tests/solver/Test_dwf_multishift_mixedprec.cc +++ b/tests/solver/Test_dwf_multishift_mixedprec.cc @@ -165,6 +165,7 @@ int main (int argc, char ** argv) } } if(gparity){ +#ifdef ENABLE_GPARITY std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl; GparityWilsonImplParams params; params.twists[gpdir] = 1; @@ -174,6 +175,9 @@ int main (int argc, char ** argv) ConjugateGimplD::setDirections(conj_dirs); run_test(argc,argv,params); +#else + std::cout << " Gparity is not compiled "<= self.steps : + iren.DestroyTimer(self.timerId) + +def WriteImage(fileName, renWin): + ''' + ''' + + import os + + if fileName: + # Select the writer to use. + path, ext = os.path.splitext(fileName) + ext = ext.lower() + if not ext: + ext = '.png' + fileName = fileName + ext + elif ext == '.jpg': + writer = vtkJPEGWriter() + else: + writer = vtkPNGWriter() + + windowto_image_filter = vtkWindowToImageFilter() + windowto_image_filter.SetInput(renWin) + windowto_image_filter.SetScale(1) # image quality + windowto_image_filter.SetInputBufferTypeToRGBA() + + writer.SetFileName(fileName) + writer.SetInputConnection(windowto_image_filter.GetOutputPort()) + writer.Write() + else: + raise RuntimeError('Need a filename.') + + +def main(): + + colors = vtkNamedColors() + + file_name = get_program_parameters() + + colors.SetColor('InstantonColor', [240, 184, 160, 255]) + colors.SetColor('BackfaceColor', [255, 229, 200, 255]) + colors.SetColor('BkgColor', [51, 77, 102, 255]) + + # Create the renderer, the render window, and the interactor. The renderer + # draws into the render window, the interactor enables mouse- and + # keyboard-based interaction with the data within the render window. + # + a_renderer = vtkRenderer() + ren_win = vtkRenderWindow() + ren_win.AddRenderer(a_renderer) + + iren = vtkRenderWindowInteractor() + iren.SetRenderWindow(ren_win) + + # The following reader is used to read a series of 2D slices (images) + # that compose the volume. The slice dimensions are set, and the + # pixel spacing. The data Endianness must also be specified. The reader + # uses the FilePrefix in combination with the slice number to construct + # filenames using the format FilePrefix.%d. (In this case the FilePrefix + # is the root name of the file: quarter.) + imageData = vtkImageData() + imageData.SetDimensions(16, 16, 16) + imageData.AllocateScalars(VTK_DOUBLE, 1) + + dims = imageData.GetDimensions() + + # Fill every entry of the image data with '2.0' + # Set the input data + for z in range(dims[2]): + z0 = dims[2]/2 + for y in range(dims[1]): + y0 = dims[1]/2 + for x in range(dims[0]): + x0 = dims[0]/2 + imageData.SetScalarComponentFromDouble(x, y, z, 0, math.exp(-0.25*((x-x0)*(x-x0)+(y-y0)*(y-y0)+z*z)) - math.exp(-0.25*((x-x0)*(x-x0)+y*y+(z-z0)*(z-z0)))) + + instanton_extractor = vtkMarchingCubes() + instanton_extractor.SetInputData(imageData) + instanton_extractor.SetValue(0, 0.1) + + instanton_stripper = vtkStripper() + instanton_stripper.SetInputConnection(instanton_extractor.GetOutputPort()) + + instanton_mapper = vtkPolyDataMapper() + instanton_mapper.SetInputConnection(instanton_stripper.GetOutputPort()) + instanton_mapper.ScalarVisibilityOff() + + instanton = vtkActor() + instanton.SetMapper(instanton_mapper) + instanton.GetProperty().SetDiffuseColor(colors.GetColor3d('InstantonColor')) + instanton.GetProperty().SetSpecular(0.3) + instanton.GetProperty().SetSpecularPower(20) + instanton.GetProperty().SetOpacity(0.5) + + # The triangle stripper is used to create triangle strips from the + # isosurface these render much faster on may systems. + antiinstanton_extractor = vtkMarchingCubes() + antiinstanton_extractor.SetInputData(imageData) + antiinstanton_extractor.SetValue(0, -0.1) + + antiinstanton_stripper = vtkStripper() + antiinstanton_stripper.SetInputConnection(antiinstanton_extractor.GetOutputPort()) + + antiinstanton_mapper = vtkPolyDataMapper() + antiinstanton_mapper.SetInputConnection(antiinstanton_stripper.GetOutputPort()) + antiinstanton_mapper.ScalarVisibilityOff() + + antiinstanton = vtkActor() + antiinstanton.SetMapper(antiinstanton_mapper) + antiinstanton.GetProperty().SetDiffuseColor(colors.GetColor3d('Ivory')) + + # An outline provides box around the data. + outline_data = vtkOutlineFilter() + outline_data.SetInputData(imageData) + + map_outline = vtkPolyDataMapper() + map_outline.SetInputConnection(outline_data.GetOutputPort()) + + outline = vtkActor() + outline.SetMapper(map_outline) + outline.GetProperty().SetColor(colors.GetColor3d('Black')) + + # It is convenient to create an initial view of the data. The FocalPoint + # and Position form a vector direction. Later on (ResetCamera() method) + # this vector is used to position the camera to look at the data in + # this direction. + a_camera = vtkCamera() + a_camera.SetViewUp(0, 0, -1) + a_camera.SetPosition(0, -100, 0) + a_camera.SetFocalPoint(0, 0, 0) + a_camera.ComputeViewPlaneNormal() + a_camera.Azimuth(30.0) + a_camera.Elevation(30.0) + + # Actors are added to the renderer. An initial camera view is created. + # The Dolly() method moves the camera towards the FocalPoint, + # thereby enlarging the image. + a_renderer.AddActor(outline) + a_renderer.AddActor(instanton) + a_renderer.AddActor(antiinstanton) + a_renderer.SetActiveCamera(a_camera) + a_renderer.ResetCamera() + a_camera.Dolly(1.0) + + # Set a background color for the renderer and set the size of the + # render window (expressed in pixels). + a_renderer.SetBackground(colors.GetColor3d('BkgColor')) + ren_win.SetSize(1024, 1024) + ren_win.SetWindowName('ExpoDemo') + + # Note that when camera movement occurs (as it does in the Dolly() + # method), the clipping planes often need adjusting. Clipping planes + # consist of two planes: near and far along the view direction. The + # near plane clips out objects in front of the plane the far plane + # clips out objects behind the plane. This way only what is drawn + # between the planes is actually rendered. + a_renderer.ResetCameraClippingRange() + + # write image + # WriteImage('exp.jpg',ren_win) + + # Sign up to receive TimerEvent + cb = vtkTimerCallback(200, imageData, iren) + iren.AddObserver('TimerEvent', cb.execute) + cb.timerId = iren.CreateRepeatingTimer(50) + + # start the interaction and timer + ren_win.Render() + + # Initialize the event loop and then start it. + iren.Initialize() + iren.Start() + + +def get_program_parameters(): + import argparse + description = 'Simple lattice volumetric demo' + epilogue = ''' + Derived from VTK/Examples/Cxx/Medical2.cxx + ''' + parser = argparse.ArgumentParser(description=description, epilog=epilogue, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('filename', help='FieldDensity.py') + args = parser.parse_args() + return args.filename + + +def vtk_version_ok(major, minor, build): + """ + Check the VTK version. + + :param major: Major version. + :param minor: Minor version. + :param build: Build version. + :return: True if the requested VTK version is greater or equal to the actual VTK version. + """ + needed_version = 10000000000 * int(major) + 100000000 * int(minor) + int(build) + try: + vtk_version_number = VTK_VERSION_NUMBER + except AttributeError: # as error: + ver = vtkVersion() + vtk_version_number = 10000000000 * ver.GetVTKMajorVersion() + 100000000 * ver.GetVTKMinorVersion() \ + + ver.GetVTKBuildVersion() + if vtk_version_number >= needed_version: + return True + else: + return False + + +if __name__ == '__main__': + main() diff --git a/visualisation/FieldDensityAnimate.cxx b/visualisation/FieldDensityAnimate.cxx new file mode 100644 index 00000000..825bb626 --- /dev/null +++ b/visualisation/FieldDensityAnimate.cxx @@ -0,0 +1,490 @@ +// Derived from VTK/Examples/Cxx/Medical2.cxx +// The example reads a volume dataset, extracts two isosurfaces that +// represent the skin and bone, and then displays them. +// +// Modified heavily by Peter Boyle to display lattice field theory data as movies and compare multiple files + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MPEG +#ifdef MPEG +#include +#endif + +#include +#include +#include +#include + +#include +#include + +#include + +#define USE_FLYING_EDGES +#ifdef USE_FLYING_EDGES +#include +typedef vtkFlyingEdges3D isosurface; +#else +#include +typedef vtkMarchingCubes isosurface; +#endif + +int mpeg = 0 ; +int xlate = 0 ; + +template void readFile(T& out, std::string const fname){ +#ifdef HAVE_LIME + Grid::emptyUserRecord record; + Grid::ScidacReader RD; + RD.open(fname); + RD.readScidacFieldRecord(out,record); + RD.close(); +#endif +} +using namespace Grid; + +class FrameUpdater : public vtkCallbackCommand +{ +public: + + FrameUpdater() { + TimerCount = 0; + xoff = 0; + t = 0; + imageData = nullptr; + grid_data = nullptr; + timerId = 0; + maxCount = -1; + } + + static FrameUpdater* New() + { + FrameUpdater* cb = new FrameUpdater; + cb->TimerCount = 0; + return cb; + } + + virtual void Execute(vtkObject* caller, unsigned long eventId,void* vtkNotUsed(callData)) + { + const int max=256; + char text_string[max]; + + if (this->TimerCount < this->maxCount) { + + if (vtkCommand::TimerEvent == eventId) + { + ++this->TimerCount; + + // Make a new frame + auto latt_size = grid_data->Grid()->GlobalDimensions(); + for(int xx=0;xxSetScalarComponentFromDouble(xx,yy,zz,0,value); + }}} + + if ( xlate ) { + xoff = (xoff + 1)%latt_size[0]; + if ( xoff== 0 ) t = (t+1)%latt_size[3]; + } else { + t = (t+1)%latt_size[3]; + if ( t== 0 ) xoff = (xoff + 1)%latt_size[0]; + } + + snprintf(text_string,max,"T=%d",t); + text->SetInput(text_string); + + std::cout << this->TimerCount<<"/"<Modified(); + + vtkRenderWindowInteractor* iren = dynamic_cast(caller); + iren->GetRenderWindow()->Render(); + + } + } + + if (this->TimerCount >= this->maxCount) { + vtkRenderWindowInteractor* iren = dynamic_cast(caller); + if (this->timerId > -1) + { + iren->DestroyTimer(this->timerId); + } + } + } + + +private: + int TimerCount; + int xoff; + int t; +public: + Grid::LatticeComplexD * grid_data; + vtkImageData* imageData = nullptr; + vtkTextActor* text = nullptr; + vtkFFMPEGWriter *writer = nullptr; + int timerId ; + int maxCount ; + double rms; + isosurface * posExtractor; + isosurface * negExtractor; +}; +class SliderCallback : public vtkCommand +{ +public: + static SliderCallback* New() + { + return new SliderCallback; + } + virtual void Execute(vtkObject* caller, unsigned long eventId, void* callData) + { + vtkSliderWidget *sliderWidget = vtkSliderWidget::SafeDownCast(caller); + if (sliderWidget) + { + contour = ((vtkSliderRepresentation *)sliderWidget->GetRepresentation())->GetValue(); + } + for(int i=0;iposExtractor->SetValue(0, SliderCallback::contour*fu_list[i]->rms); + fu_list[i]->negExtractor->SetValue(0, -SliderCallback::contour*fu_list[i]->rms); + fu_list[i]->posExtractor->Modified(); + fu_list[i]->negExtractor->Modified(); + } + } +public: + static double contour; + std::vector fu_list; +}; + + +double SliderCallback::contour; + +int main(int argc, char* argv[]) +{ + using namespace Grid; + + Grid_init(&argc, &argv); + GridLogLayout(); + + auto latt_size = GridDefaultLatt(); + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + + + std::cout << argc << " command Line arguments "< file_list; + double default_contour = 1.0; + std::string arg; +#ifdef MPEG + if( GridCmdOptionExists(argv,argv+argc,"--mpeg") ){ + mpeg = 1; + } +#endif + + if( GridCmdOptionExists(argv,argv+argc,"--xlate") ){ + xlate = 1; + } + + if( GridCmdOptionExists(argv,argv+argc,"--isosurface") ){ + arg=GridCmdOptionPayload(argv,argv+argc,"--isosurface"); + GridCmdOptionFloat(arg,default_contour); + } + if( GridCmdOptionExists(argv,argv+argc,"--file1") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--file1"); + file_list.push_back(arg); + } + if( GridCmdOptionExists(argv,argv+argc,"--file2") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--file2"); + file_list.push_back(arg); + } + if( GridCmdOptionExists(argv,argv+argc,"--file3") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--file3"); + file_list.push_back(arg); + } + if( GridCmdOptionExists(argv,argv+argc,"--file4") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--file4"); + file_list.push_back(arg); + } + for(int c=0;c colors; + std::array posColor{{240, 184, 160, 255}}; colors->SetColor("posColor", posColor.data()); + std::array bkg{{51, 77, 102, 255}}; colors->SetColor("BkgColor", bkg.data()); + + // Create the renderer, the render window, and the interactor. The renderer + // draws into the render window, the interactor enables mouse- and + // keyboard-based interaction with the data within the render window. + // + vtkNew renWin; + vtkNew iren; + iren->SetRenderWindow(renWin); + + + std::vector data(file_list.size(),&Grid); + FieldMetaData header; + + + int frameCount; + if ( mpeg ) frameCount = latt_size[3]; + else frameCount = latt_size[0] * latt_size[3]; + + std::vector fu_list; + for (int f=0;f aCamera; + aCamera->SetViewUp(0, 0, -1); + aCamera->SetPosition(0, -1000, 0); + aCamera->SetFocalPoint(0, 0, 0); + aCamera->ComputeViewPlaneNormal(); + aCamera->Azimuth(30.0); + aCamera->Elevation(30.0); + + + vtkNew aRenderer; + renWin->AddRenderer(aRenderer); + + double vol = data[f].Grid()->gSites(); + + std::cout << "Reading "< imageData; + imageData->SetDimensions(latt_size[0],latt_size[1],latt_size[2]); + imageData->AllocateScalars(VTK_DOUBLE, 1); + for(int xx=0;xxSetScalarComponentFromDouble(xx,yy,zz,0,value); + }}} + + vtkNew posExtractor; + posExtractor->SetInputData(imageData); + posExtractor->SetValue(0, contour); + + vtkNew posStripper; + posStripper->SetInputConnection(posExtractor->GetOutputPort()); + + vtkNew posMapper; + posMapper->SetInputConnection(posStripper->GetOutputPort()); + posMapper->ScalarVisibilityOff(); + + vtkNew pos; + pos->SetMapper(posMapper); + pos->GetProperty()->SetDiffuseColor(colors->GetColor3d("posColor").GetData()); + pos->GetProperty()->SetSpecular(0.3); + pos->GetProperty()->SetSpecularPower(20); + pos->GetProperty()->SetOpacity(0.5); + + // An isosurface, or contour value is set + // The triangle stripper is used to create triangle strips from the + // isosurface; these render much faster on may systems. + vtkNew negExtractor; + negExtractor->SetInputData(imageData); + negExtractor->SetValue(0, -contour); + + vtkNew negStripper; + negStripper->SetInputConnection(negExtractor->GetOutputPort()); + + vtkNew negMapper; + negMapper->SetInputConnection(negStripper->GetOutputPort()); + negMapper->ScalarVisibilityOff(); + + vtkNew neg; + neg->SetMapper(negMapper); + neg->GetProperty()->SetDiffuseColor(colors->GetColor3d("Ivory").GetData()); + + // An outline provides context around the data. + vtkNew outlineData; + outlineData->SetInputData(imageData); + + vtkNew mapOutline; + mapOutline->SetInputConnection(outlineData->GetOutputPort()); + + vtkNew outline; + outline->SetMapper(mapOutline); + outline->GetProperty()->SetColor(colors->GetColor3d("Black").GetData()); + + vtkNew Text; + Text->SetInput(file_list[f].c_str()); + Text->SetPosition2(0,0); + Text->GetTextProperty()->SetFontSize(48); + Text->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData()); + + vtkNew TextT; + TextT->SetInput("T=0"); + TextT->SetPosition(0,.9*1025); + TextT->GetTextProperty()->SetFontSize(48); + TextT->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData()); + + + // Actors are added to the renderer. An initial camera view is created. + // The Dolly() method moves the camera towards the FocalPoint, + // thereby enlarging the image. + aRenderer->AddActor(Text); + aRenderer->AddActor(TextT); + aRenderer->AddActor(outline); + aRenderer->AddActor(pos); + aRenderer->AddActor(neg); + + // Sign up to receive TimerEvent + vtkNew fu; + fu->imageData = imageData; + fu->grid_data = &data[f]; + fu->text = TextT; + fu->maxCount = frameCount; + fu->posExtractor = posExtractor; + fu->negExtractor = negExtractor; + fu->rms = rms; + + iren->AddObserver(vtkCommand::TimerEvent, fu); + + aRenderer->SetActiveCamera(aCamera); + aRenderer->ResetCamera(); + aRenderer->SetBackground(colors->GetColor3d("BkgColor").GetData()); + aCamera->Dolly(1.0); + + double nf = file_list.size(); + std::cout << " Adding renderer " <SetViewport((1.0/nf)*f, 0.0,(1.0/nf)*(f+1) , 1.0); + + // Note that when camera movement occurs (as it does in the Dolly() + // method), the clipping planes often need adjusting. Clipping planes + // consist of two planes: near and far along the view direction. The + // near plane clips out objects in front of the plane; the far plane + // clips out objects behind the plane. This way only what is drawn + // between the planes is actually rendered. + aRenderer->ResetCameraClippingRange(); + + fu_list.push_back(fu); + } + + + // Set a background color for the renderer and set the size of the + // render window (expressed in pixels). + // Initialize the event loop and then start it. + renWin->SetSize(1024*file_list.size(), 1024); + renWin->SetWindowName("FieldDensity"); + renWin->Render(); + + iren->Initialize(); + + if ( mpeg ) { +#ifdef MPEG + vtkWindowToImageFilter *imageFilter = vtkWindowToImageFilter::New(); + imageFilter->SetInput( renWin ); + imageFilter->SetInputBufferTypeToRGB(); + + vtkFFMPEGWriter *writer = vtkFFMPEGWriter::New(); + writer->SetFileName("movie.avi"); + writer->SetRate(1); + writer->SetInputConnection(imageFilter->GetOutputPort()); + writer->Start(); + + for(int i=0;imaxCount;i++){ + for(int f=0;fExecute(iren,vtkCommand::TimerEvent,nullptr); + } + imageFilter->Modified(); + writer->Write(); + } + writer->End(); + writer->Delete(); +#else + assert(-1 && "MPEG support not compiled"); +#endif + } else { + + // Add control of contour threshold + // Create a slider widget + vtkSmartPointer sliderRep = vtkSmartPointer::New(); + sliderRep->SetMinimumValue(0.1); + sliderRep->SetMaximumValue(5.0); + sliderRep->SetValue(1.0); + sliderRep->SetTitleText("Fraction RMS"); + // Set color properties: + + // Change the color of the knob that slides + // sliderRep->GetSliderProperty()->SetColor(colors->GetColor3d("Green").GetData()); + sliderRep->GetTitleProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData()); + sliderRep->GetLabelProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData()); + sliderRep->GetSelectedProperty()->SetColor(colors->GetColor3d("DeepPink").GetData()); + + // Change the color of the bar + sliderRep->GetTubeProperty()->SetColor(colors->GetColor3d("MistyRose").GetData()); + sliderRep->GetCapProperty()->SetColor(colors->GetColor3d("Yellow").GetData()); + sliderRep->SetSliderLength(0.05); + sliderRep->SetSliderWidth(0.025); + sliderRep->SetEndCapLength(0.02); + + double nf = file_list.size(); + sliderRep->GetPoint1Coordinate()->SetCoordinateSystemToNormalizedDisplay(); + sliderRep->GetPoint1Coordinate()->SetValue(0.1, 0.1); + sliderRep->GetPoint2Coordinate()->SetCoordinateSystemToNormalizedDisplay(); + sliderRep->GetPoint2Coordinate()->SetValue(0.9/nf, 0.1); + + vtkSmartPointer sliderWidget = vtkSmartPointer::New(); + sliderWidget->SetInteractor(iren); + sliderWidget->SetRepresentation(sliderRep); + sliderWidget->SetAnimationModeToAnimate(); + sliderWidget->EnabledOn(); + + // Create the slider callback + vtkSmartPointer slidercallback = vtkSmartPointer::New(); + slidercallback->fu_list = fu_list; + sliderWidget->AddObserver(vtkCommand::InteractionEvent, slidercallback); + + int timerId = iren->CreateRepeatingTimer(300); + std::cout << "timerId: " << timerId << std::endl; + + // Start the interaction and timer + iren->Start(); + } + + Grid_finalize(); + + return EXIT_SUCCESS; +} diff --git a/visualisation/README b/visualisation/README new file mode 100644 index 00000000..b950e8eb --- /dev/null +++ b/visualisation/README @@ -0,0 +1,113 @@ +======================================== +Visualisation of Grid / SciDAC format density fields using VTK (visualisation toolkit). Peter Boyle, 2025. +======================================== + +Uses: + +https://vtk.org + +Files are, for example, those produced by + Grid/HMC/ComputeWilsonFlow.cc +and + Grid/HMC/site_plaquette.cc + +======================================== +Prerequisites: +======================================== + + +1) Install ffmpeg-7.0.2 (developer install, includes headers and libraries). + MacOS note: must install ffmpeg from source -- homebrew only installs binaries. + + https://ffmpeg.org/download.html#releases + + + Note: the latest ffmpeg (7.1.1) broke software compatibility with VTK. + + +2) Build and install VTK-9.4.2, with FFMEG support enabled. + + This is particularly involved on MacOS, so documented here. + + cd VTK-9.4.2 + mkdir build + cd build + ccmake .. + + +Using ccmake editor, set: + + FFMPEG_DIR /usr/local + +Toggle "advanced mode" (t) + +Set: + CMAKE_EXE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS +each to: + + -framework Foundation -framework AudioToolbox -framework CoreAudio -liconv -lm -framework AVFoundation -framework CoreVideo -framework CoreMedia -framework CoreGraphics -framework AudioToolbox -framework OpenGL -framework OpenGL -framework VideoToolbox -framework CoreImage -framework AppKit -framework CoreFoundation -framework CoreServices -lz -lbz2 -Wl,-framework,CoreFoundation -Wl,-framework,Security -L/usr/local/lib -lavdevice -lavfilter -lavformat -lavcodec -lswresample -lswscale -lavutil + + Set paths for each of + FFMPEG_DIR /usr/local + FFMPEG_avcodec_INCLUDE_DIR /usr/local/include + FFMPEG_avcodec_LIBRARY /usr/local/lib/libavcodec.a + FFMPEG_avdevice_INCLUDE_DIR /usr/local/include + FFMPEG_avdevice_LIBRARY /usr/local/lib/libavdevice.a + FFMPEG_avfilter_INCLUDE_DIR /usr/local/include + FFMPEG_avfilter_LIBRARY /usr/local/lib/libavfilter.a + FFMPEG_avformat_INCLUDE_DIR /usr/local/include + FFMPEG_avformat_LIBRARY /usr/local/lib/libavformat.a + FFMPEG_avresample_INCLUDE_DIR /usr/local/include + FFMPEG_avresample_LIBRARY /usr/local/lib/libavresample.a + FFMPEG_avutil_INCLUDE_DIR /usr/local/include + FFMPEG_avutil_LIBRARY /usr/local/lib/libavutil.a + FFMPEG_swresample_INCLUDE_DIR /usr/local/include + FFMPEG_swresample_LIBRARY /usr/local/lib/libswresample.a + FFMPEG_swscale_INCLUDE_DIR /usr/local/include + FFMPEG_swscale_LIBRARY /usr/local/lib/libswscale.a + + VTK_MODULE_ENABLE_VTK_IOFFMPEG YES + + + VTK really should make it easier to pick up the flags required for FFMPEG linkage, especially as they are very quirky on MacOS. + + +======================================== +Grid: +======================================== + +3) Build and install a version of Grid + +4) Ensure "grid-config" is in your path. + +5) cd Grid/visualisation/ + libs=`grid-config --libs` + ldflags=`grid-config --ldflags` + cxxflags=`grid-config --cxxflags` + cxx=`grid-config --cxx` + + mkdir build + cd build + + LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags + + make + +6) Invoke as: + + FieldDensityAnimate --isosurface --grid X.Y.Z.T --file1 SciDacDensityFile1 [--xlate] [--mpeg] + FieldDensityAnimate --isosurface --grid X.Y.Z.T --file1 SciDacDensityFile1 --file2 SciDacDensityFile2 [--xlate] [--mpeg] + +================================== +Extensions +================================== + +7) Direct calling from Grid ?: + + Not yet implemented, but could develop sufficient interface to write a Lattice scalar field into MPEG direct from running code. + +8) Example python code: FieldDensity.py . This is not interfaced to Grid. + + diff --git a/visualisation/Topo-vs-flowtime.avi b/visualisation/Topo-vs-flowtime.avi new file mode 100644 index 00000000..e575b222 Binary files /dev/null and b/visualisation/Topo-vs-flowtime.avi differ diff --git a/visualisation/cmake-command b/visualisation/cmake-command new file mode 100644 index 00000000..d19afbad --- /dev/null +++ b/visualisation/cmake-command @@ -0,0 +1,9 @@ +libs=`grid-config --libs` +ldflags=`grid-config --ldflags` +cxxflags=`grid-config --cxxflags` +cxx=`grid-config --cxx` + +mkdir build +cd build + +LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags