1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Booster update

This commit is contained in:
Peter Boyle 2024-03-06 19:03:45 +01:00
parent 228bbb9d81
commit 7e5bd46dd3
4 changed files with 13 additions and 20 deletions

View File

@ -31,7 +31,7 @@ Author: Peter Boyle <pboyle@bnl.gov>
#include <hipblas/hipblas.h> #include <hipblas/hipblas.h>
#endif #endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
#include <hipblas/hipblas.h> #include <cublas_v2.h>
#endif #endif
#ifdef GRID_SYCL #ifdef GRID_SYCL
#include <oneapi/mkl.hpp> #include <oneapi/mkl.hpp>
@ -51,7 +51,7 @@ NAMESPACE_BEGIN(Grid);
typedef hipblasHandle_t gridblasHandle_t; typedef hipblasHandle_t gridblasHandle_t;
#endif #endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
typedef cudablasHandle_t gridblasHandle_t; typedef cublasHandle_t gridblasHandle_t;
#endif #endif
#ifdef GRID_SYCL #ifdef GRID_SYCL
typedef cl::sycl::queue *gridblasHandle_t; typedef cl::sycl::queue *gridblasHandle_t;
@ -78,6 +78,7 @@ public:
#ifdef GRID_CUDA #ifdef GRID_CUDA
std::cout << "cublasCreate"<<std::endl; std::cout << "cublasCreate"<<std::endl;
cublasCreate(&gridblasHandle); cublasCreate(&gridblasHandle);
cublasSetPointerMode(gridblasHandle, CUBLAS_POINTER_MODE_DEVICE);
#endif #endif
#ifdef GRID_HIP #ifdef GRID_HIP
std::cout << "hipblasCreate"<<std::endl; std::cout << "hipblasCreate"<<std::endl;

View File

@ -280,20 +280,16 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
if( interior && exterior ) { if( interior && exterior ) {
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;} if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
#ifndef GRID_CUDA
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;} if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;}
#ifndef GRID_CUDA
if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;} if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;}
#endif #endif
} else if( interior ) { } else if( interior ) {
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;} if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
#ifndef GRID_CUDA
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;} if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;}
#endif
} else if( exterior ) { } else if( exterior ) {
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;} if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
#ifndef GRID_CUDA
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;} if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;}
#endif
} }
assert(0 && " Kernel optimisation case not covered "); assert(0 && " Kernel optimisation case not covered ");
} }
@ -322,19 +318,13 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
if( interior && exterior ) { if( interior && exterior ) {
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;} if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
#ifndef GRID_CUDA
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;} if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;}
#endif
} else if( interior ) { } else if( interior ) {
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;} if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
#ifndef GRID_CUDA
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;} if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;}
#endif
} else if( exterior ) { } else if( exterior ) {
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;} if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
#ifndef GRID_CUDA
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;} if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;}
#endif
} }
} }

View File

@ -5,10 +5,12 @@ LIME=/p/home/jusers/boyle2/juwels/gm2dwf/boyle/
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--enable-shm=nvlink \ --enable-shm=nvlink \
--enable-accelerator=cuda \ --enable-accelerator=cuda \
--disable-gparity \
--disable-fermion-reps \
--with-lime=$LIME \ --with-lime=$LIME \
--disable-accelerator-cshift \ --enable-accelerator-cshift \
--disable-unified \ --disable-unified \
CXX=nvcc \ CXX=nvcc \
LDFLAGS="-cudart shared " \ LDFLAGS="-cudart shared " \
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared" CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared -lcublas"

View File

@ -1,5 +1,5 @@
module load GCC/9.3.0 module load GCC
module load GMP/6.2.0 module load GMP
module load MPFR/4.1.0 module load MPFR
module load OpenMPI/4.1.0rc1 module load OpenMPI
module load CUDA/11.3 module load CUDA