1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-18 15:57:05 +01:00

Merge branch 'develop' into feature/qed-fvol

This commit is contained in:
James Harrison
2018-02-07 10:11:00 +00:00
15 changed files with 186 additions and 38 deletions

View File

@ -182,6 +182,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -218,6 +219,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@ -232,6 +234,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);
@ -259,7 +262,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap");
assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
@ -318,11 +325,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
}
ShmBufferFreeAll();

View File

@ -469,7 +469,7 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(Simd::scalar_type);
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;
@ -701,7 +701,7 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
}
a0 = a0+incr;
a1 = a1+incr;
a2 = a2+sizeof(Simd::scalar_type);
a2 = a2+sizeof(typename Simd::scalar_type);
}}
{
int lexa = s1+LLs*site;

View File

@ -475,7 +475,7 @@ namespace QCD {
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(Simd::scalar_type);
a2 = a2 + sizeof(typename Simd::scalar_type);
}
}

View File

@ -853,7 +853,7 @@ namespace QCD {
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(Simd::scalar_type);
a2 = a2 + sizeof(typename Simd::scalar_type);
}
}

View File

@ -556,7 +556,7 @@ namespace Optimization {
v3 = _mm256_add_epi32(v1, v2);
v1 = _mm256_hadd_epi32(v3, v3);
v2 = _mm256_hadd_epi32(v1, v1);
u1 = _mm256_castsi256_si128(v2) // upper half
u1 = _mm256_castsi256_si128(v2); // upper half
u2 = _mm256_extracti128_si256(v2, 1); // lower half
ret = _mm_add_epi32(u1, u2);
return _mm_cvtsi128_si32(ret);

View File

@ -79,7 +79,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define ZEND2f(Criir,Ciirr, tmp) "vshufps $0xb1," #Ciirr "," #Ciirr "," #tmp ";\n"\
"vsubps " #tmp "," #Ciirr "," #Criir"{%k7}" ";\n"
#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\
#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\
"vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n"
#define ZEND2d(Criir,Ciirr, tmp) "vshufpd $0x55," #Ciirr "," #Ciirr "," #tmp ";\n"\

72
lib/util/Profiling.h Normal file
View File

@ -0,0 +1,72 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/util/Profiling.h
Copyright (C) 2018
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_PERF_PROFILING_H
#define GRID_PERF_PROFILING_H
#include <sstream>
#include <iostream>
#include <functional>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
struct System
{
static void profile(const std::string& name,std::function<void()> body) {
std::string filename = name.find(".data") == std::string::npos ? (name + ".data") : name;
// Launch profiler
pid_t pid;
std::stringstream s;
s << getpid();
pid = fork();
if (pid == 0) {
auto fd=open("/dev/null",O_RDWR);
dup2(fd,1);
dup2(fd,2);
exit(execl("/usr/bin/perf","perf","record","-o",filename.c_str(),"-p",s.str().c_str(),nullptr));
}
// Run body
body();
// Kill profiler
kill(pid,SIGINT);
waitpid(pid,nullptr,0);
}
static void profile(std::function<void()> body) {
profile("perf.data",body);
}
};
#endif // GRID_PERF_PROFILING_H