/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/lattice/Lattice_basis.h Copyright (C) 2015 Author: Peter Boyle Author: paboyle Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once NAMESPACE_BEGIN(Grid); template void basisOrthogonalize(std::vector &basis,Field &w,int k) { // If assume basis[j] are already orthonormal, // can take all inner products in parallel saving 2x bandwidth // Save 3x bandwidth on the second line of loop. // perhaps 2.5x speed up. // 2x overall in Multigrid Lanczos for(int j=0; j void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) { typedef decltype(basis[0]) Field; typedef decltype(basis[0].View(AcceleratorRead)) View; hostVector h_basis_v(basis.size()); deviceVector d_basis_v(basis.size()); typedef typename std::remove_reference::type vobj; typedef typename std::remove_reference::type Coeff_t; GridBase* grid = basis[0].Grid(); for(int k=0;koSites(); uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead deviceVector Bt(siteBlock * nrot); auto Bp=&Bt[0]; // GPU readable copy of matrix hostVector h_Qt_jv(Nm*Nm); deviceVector Qt_jv(Nm*Nm); Coeff_t *Qt_p = & Qt_jv[0]; thread_for(i,Nm*Nm,{ int j = i/Nm; int k = i%Nm; h_Qt_jv[i]=Qt(j,k); }); acceleratorCopyToDevice(&h_Qt_jv[0],Qt_p,Nm*Nm*sizeof(Coeff_t)); // Block the loop to keep storage footprint down for(uint64_t s=0;s void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) { typedef decltype(basis[0].View(AcceleratorRead)) View; typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); result.Checkerboard() = basis[0].Checkerboard(); hostVector h_basis_v(basis.size()); deviceVector d_basis_v(basis.size()); for(int k=0;k Qt_jv(Nm); double * Qt_j = & Qt_jv[0]; for(int k=0;koSites(),vobj::Nsimd(),{ vobj zzz=Zero(); auto B=coalescedRead(zzz); for(int k=k0; k void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) { int vlen = idx.size(); assert(vlen>=1); assert(vlen<=sort_vals.size()); assert(vlen<=_v.size()); for (size_t i=0;ii for which _vnew[j] = _vold[i], // track the move idx[j] => idx[i] // track the move idx[i] => i ////////////////////////////////////// size_t j; for (j=i;j i); assert(j!=idx.size()); assert(idx[j]==i); swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy std::swap(sort_vals[i],sort_vals[idx[i]]); idx[j] = idx[i]; idx[i] = i; } } } inline std::vector basisSortGetIndex(std::vector& sort_vals) { std::vector idx(sort_vals.size()); std::iota(idx.begin(), idx.end(), 0); // sort indexes based on comparing values in v std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); }); return idx; } template void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) { std::vector idx = basisSortGetIndex(sort_vals); if (reverse) std::reverse(idx.begin(), idx.end()); basisReorderInPlace(_v,sort_vals,idx); } // PAB: faster to compute the inner products first then fuse loops. // If performance critical can improve. template void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { result = Zero(); assert(_v.size()==eval.size()); int N = (int)_v.size(); for (int i=0;i