mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
commit
65e6e7da6f
@ -120,6 +120,39 @@ public:
|
|||||||
blockPromote(CoarseVec,FineVec,subspace);
|
blockPromote(CoarseVec,FineVec,subspace);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
||||||
|
|
||||||
|
RealD scale;
|
||||||
|
|
||||||
|
ConjugateGradient<FineField> CG(1.0e-2,100,false);
|
||||||
|
FineField noise(FineGrid);
|
||||||
|
FineField Mn(FineGrid);
|
||||||
|
|
||||||
|
for(int b=0;b<nn;b++){
|
||||||
|
|
||||||
|
subspace[b] = Zero();
|
||||||
|
gaussian(RNG,noise);
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||||
|
|
||||||
|
for(int i=0;i<1;i++){
|
||||||
|
|
||||||
|
CG(hermop,noise,subspace[b]);
|
||||||
|
|
||||||
|
noise = subspace[b];
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
|
||||||
|
subspace[b] = noise;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
|
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
|
||||||
// and this is the best I found
|
// and this is the best I found
|
||||||
|
@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Advise the LatticeAccelerator class
|
// Advise the LatticeAccelerator class
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
enum ViewAdvise {
|
enum ViewAdvise {
|
||||||
AdviseDefault = 0x0, // Reegular data
|
AdviseDefault = 0x0, // Regular data
|
||||||
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
||||||
// significantly influence performance of bulk storage.
|
// significantly influence performance of bulk storage.
|
||||||
|
|
||||||
|
@ -170,17 +170,24 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD
|
|||||||
std::vector<int> primes({2,3,5});
|
std::vector<int> primes({2,3,5});
|
||||||
|
|
||||||
int dim = 0;
|
int dim = 0;
|
||||||
|
int last_dim = ndimension - 1;
|
||||||
int AutoShmSize = 1;
|
int AutoShmSize = 1;
|
||||||
while(AutoShmSize != WorldShmSize) {
|
while(AutoShmSize != WorldShmSize) {
|
||||||
for(int p=0;p<primes.size();p++) {
|
int p;
|
||||||
|
for(p=0;p<primes.size();p++) {
|
||||||
int prime=primes[p];
|
int prime=primes[p];
|
||||||
if ( divides(prime,WorldDims[dim]/ShmDims[dim])
|
if ( divides(prime,WorldDims[dim]/ShmDims[dim])
|
||||||
&& divides(prime,WorldShmSize/AutoShmSize) ) {
|
&& divides(prime,WorldShmSize/AutoShmSize) ) {
|
||||||
AutoShmSize*=prime;
|
AutoShmSize*=prime;
|
||||||
ShmDims[dim]*=prime;
|
ShmDims[dim]*=prime;
|
||||||
|
last_dim = (dim + ndimension - 1) % ndimension;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (p == primes.size() && last_dim == dim) {
|
||||||
|
std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
dim=(dim+1) %ndimension;
|
dim=(dim+1) %ndimension;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,13 +73,14 @@ private:
|
|||||||
dealloc();
|
dealloc();
|
||||||
|
|
||||||
this->_odata_size = size;
|
this->_odata_size = size;
|
||||||
if ( size )
|
if ( size )
|
||||||
this->_odata = alloc.allocate(this->_odata_size);
|
this->_odata = alloc.allocate(this->_odata_size);
|
||||||
else
|
else
|
||||||
this->_odata = nullptr;
|
this->_odata = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////
|
||||||
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
|
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
|
||||||
/////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -164,7 +164,7 @@ accelerator_inline void convertType(Lattice<T1> & out, const Lattice<T2> & in) {
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
||||||
-> Lattice<iScalar<decltype(TensorRemove(innerProductD2(lhs.View()[0],rhs.View()[0])))>>
|
-> Lattice<iScalar<decltype(TensorRemove(innerProductD2(lhs.View(CpuRead)[0],rhs.View(CpuRead)[0])))>>
|
||||||
{
|
{
|
||||||
autoView( lhs_v , lhs, AcceleratorRead);
|
autoView( lhs_v , lhs, AcceleratorRead);
|
||||||
autoView( rhs_v , rhs, AcceleratorRead);
|
autoView( rhs_v , rhs, AcceleratorRead);
|
||||||
@ -283,7 +283,7 @@ template<class vobj,class CComplex>
|
|||||||
Lattice<dotp> coarse_inner(coarse);
|
Lattice<dotp> coarse_inner(coarse);
|
||||||
|
|
||||||
// Precision promotion
|
// Precision promotion
|
||||||
fine_inner = localInnerProductD(fineX,fineY);
|
fine_inner = localInnerProductD<vobj>(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
{
|
{
|
||||||
autoView( CoarseInner_ , CoarseInner,AcceleratorWrite);
|
autoView( CoarseInner_ , CoarseInner,AcceleratorWrite);
|
||||||
@ -486,13 +486,14 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
for(int i=0;i<nbasis;i++) {
|
for(int i=0;i<nbasis;i++) {
|
||||||
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
||||||
|
|
||||||
Lattice<CComplex> cip(coarse);
|
//Lattice<CComplex> cip(coarse);
|
||||||
autoView( cip_ , cip, AcceleratorWrite);
|
//autoView( cip_ , cip, AcceleratorWrite);
|
||||||
autoView( ip_ , ip, AcceleratorRead);
|
//autoView( ip_ , ip, AcceleratorRead);
|
||||||
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
|
//accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
|
||||||
coalescedWrite(cip_[sc], ip_(sc)());
|
// coalescedWrite(cip_[sc], ip_(sc)());
|
||||||
});
|
// });
|
||||||
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
//blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
||||||
|
blockZAXPY(fineData,ip,Basis[i],fineData);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -30,11 +30,14 @@ protected:
|
|||||||
int checkerboard;
|
int checkerboard;
|
||||||
vobj *_odata; // A managed pointer
|
vobj *_odata; // A managed pointer
|
||||||
uint64_t _odata_size;
|
uint64_t _odata_size;
|
||||||
|
ViewAdvise advise;
|
||||||
public:
|
public:
|
||||||
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { };
|
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { };
|
||||||
accelerator_inline uint64_t oSites(void) const { return _odata_size; };
|
accelerator_inline uint64_t oSites(void) const { return _odata_size; };
|
||||||
accelerator_inline int Checkerboard(void) const { return checkerboard; };
|
accelerator_inline int Checkerboard(void) const { return checkerboard; };
|
||||||
accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
||||||
|
accelerator_inline ViewAdvise Advise(void) const { return advise; };
|
||||||
|
accelerator_inline ViewAdvise &Advise(void) { return this->advise; }; // can assign advise on a container, not a view
|
||||||
accelerator_inline void Conformable(GridBase * &grid) const
|
accelerator_inline void Conformable(GridBase * &grid) const
|
||||||
{
|
{
|
||||||
if (grid) conformable(grid, _grid);
|
if (grid) conformable(grid, _grid);
|
||||||
@ -86,7 +89,7 @@ public:
|
|||||||
MemoryManager::ViewOpen(this->cpu_ptr,
|
MemoryManager::ViewOpen(this->cpu_ptr,
|
||||||
this->_odata_size*sizeof(vobj),
|
this->_odata_size*sizeof(vobj),
|
||||||
mode,
|
mode,
|
||||||
AdviseDefault);
|
this->advise);
|
||||||
}
|
}
|
||||||
void ViewClose(void)
|
void ViewClose(void)
|
||||||
{ // Inform the manager
|
{ // Inform the manager
|
||||||
|
Loading…
Reference in New Issue
Block a user