Grid/lib/communicator/Communicator_base.h

    /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

    Source file: ./lib/communicator/Communicator_base.h

    Copyright (C) 2015

Author: Peter Boyle <paboyle@ph.ed.ac.uk>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
#ifndef GRID_COMMUNICATOR_BASE_H
#define GRID_COMMUNICATOR_BASE_H

///////////////////////////////////
// Processor layout information
///////////////////////////////////
#ifdef GRID_COMMS_MPI
#include <mpi.h>
#endif
#ifdef GRID_COMMS_SHMEM
#include <mpp/shmem.h>
#endif
namespace Grid {
class CartesianCommunicator {
  public:    

  // Communicator should know nothing of the physics grid, only processor grid.

    int              _Nprocessors;     // How many in all
    std::vector<int> _processors;      // Which dimensions get relayed out over processors lanes.
    int              _processor;       // linear processor rank
    std::vector<int> _processor_coor;  // linear processor coordinate
    unsigned long _ndimension;

#ifdef GRID_COMMS_MPI
    MPI_Comm communicator;
    typedef MPI_Request CommsRequest_t;
#else 
    typedef int CommsRequest_t;
#endif

    // Constructor
    CartesianCommunicator(const std::vector<int> &pdimensions_in);

    // Wraps MPI_Cart routines
    void ShiftedRanks(int dim,int shift,int & source, int & dest);
    int  RankFromProcessorCoor(std::vector<int> &coor);
    void ProcessorCoorFromRank(int rank,std::vector<int> &coor);

    /////////////////////////////////
    // Grid information queries
    /////////////////////////////////
    int                      IsBoss(void)            { return _processor==0; };
    int                      BossRank(void)          { return 0; };
    int                      ThisRank(void)          { return _processor; };
    const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };
    const std::vector<int> & ProcessorGrid(void)     { return _processors; };
    int                      ProcessorCount(void)    { return _Nprocessors; };

    ////////////////////////////////////////////////////////////
    // Reduction
    ////////////////////////////////////////////////////////////
    void GlobalSum(RealF &);
    void GlobalSumVector(RealF *,int N);

    void GlobalSum(RealD &);
    void GlobalSumVector(RealD *,int N);

    void GlobalSum(uint32_t &);

    void GlobalSum(ComplexF &c)
    {
      GlobalSumVector((float *)&c,2);
    }
    void GlobalSumVector(ComplexF *c,int N)
    {
      GlobalSumVector((float *)c,2*N);
    }

    void GlobalSum(ComplexD &c)
    {
      GlobalSumVector((double *)&c,2);
    }
    void GlobalSumVector(ComplexD *c,int N)
    {
      GlobalSumVector((double *)c,2*N);
    }
    
    template<class obj> void GlobalSum(obj &o){
      typedef typename obj::scalar_type scalar_type;
      int words = sizeof(obj)/sizeof(scalar_type);
      scalar_type * ptr = (scalar_type *)& o;
      GlobalSumVector(ptr,words);
    }
    ////////////////////////////////////////////////////////////
    // Face exchange, buffer swap in translational invariant way
    ////////////////////////////////////////////////////////////
    void SendToRecvFrom(void *xmit,
			int xmit_to_rank,
			void *recv,
			int recv_from_rank,
			int bytes);

    void RecvFrom(void *recv,
		  int recv_from_rank,
		  int bytes);
    void SendTo(void *xmit,
		int xmit_to_rank,
		int bytes);

    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
			 void *xmit,
			 int xmit_to_rank,
			 void *recv,
			 int recv_from_rank,
			 int bytes);
    void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);

    ////////////////////////////////////////////////////////////
    // Barrier
    ////////////////////////////////////////////////////////////
    void Barrier(void);

    ////////////////////////////////////////////////////////////
    // Broadcast a buffer and composite larger
    ////////////////////////////////////////////////////////////
    void Broadcast(int root,void* data, int bytes);
    template<class obj> void Broadcast(int root,obj &data)
    {
      Broadcast(root,(void *)&data,sizeof(data));
    };

    static void BroadcastWorld(int root,void* data, int bytes);

}; 
}

#endif
Global edit adding copyright and license info to every source file. 2016-01-02 14:51:32 +00:00			`/*************************************************************************************`

			`Grid physics library, www.github.com/paboyle/Grid`

			`Source file: ./lib/communicator/Communicator_base.h`

			`Copyright (C) 2015`

			`Author: Peter Boyle <paboyle@ph.ed.ac.uk>`

			`This program is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation; either version 2 of the License, or`
			`(at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License along`
			`with this program; if not, write to the Free Software Foundation, Inc.,`
			`51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.`

			`See the full license in the file "LICENSE" in the top level distribution directory`
			`*************************************************************************************/`
			`/* END LEGAL */`
Moving some things around for pretty 2015-05-11 19:09:49 +01:00			`#ifndef GRID_COMMUNICATOR_BASE_H`
			`#define GRID_COMMUNICATOR_BASE_H`

			`///////////////////////////////////`
			`// Processor layout information`
			`///////////////////////////////////`
			`#ifdef GRID_COMMS_MPI`
			`#include <mpi.h>`
			`#endif`
Updates for shmem 2016-02-11 00:50:32 +00:00			`#ifdef GRID_COMMS_SHMEM`
			`#include <mpp/shmem.h>`
			`#endif`
Moving some things around for pretty 2015-05-11 19:09:49 +01:00			`namespace Grid {`
			`class CartesianCommunicator {`
			`public:`

			`// Communicator should know nothing of the physics grid, only processor grid.`

			`int _Nprocessors; // How many in all`
			`std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.`
			`int _processor; // linear processor rank`
			`std::vector<int> _processor_coor; // linear processor coordinate`
			`unsigned long _ndimension;`

			`#ifdef GRID_COMMS_MPI`
			`MPI_Comm communicator;`
			`typedef MPI_Request CommsRequest_t;`
			`#else`
			`typedef int CommsRequest_t;`
			`#endif`

			`// Constructor`
Domain wall fermions now invert ; have the basis set up for Tanh/Zolo * (Cayley/PartFrac/ContFrac) * (Mobius/Shamir/Wilson) Approx Representation Kernel. All are done with space-time taking part in checkerboarding, Ls uncheckerboarded Have only so far tested the Domain Wall limit of mobius, and at that only checked that it i) Inverts ii) 5dim DW == Ls copies of 4dim D2 iii) MeeInv Mee == 1 iv) Meo+Mee+Moe+Moo == M unprec. v) MpcDagMpc is hermitan vi) Mdag is the adjoint of M between stochastic vectors. That said, the RB schur solve, RB MpcDagMpc solve, Unprec solve all converge and the true residual becomes small; so pretty good tests. 2015-06-02 16:57:12 +01:00			`CartesianCommunicator(const std::vector<int> &pdimensions_in);`
Moving some things around for pretty 2015-05-11 19:09:49 +01:00
			`// Wraps MPI_Cart routines`
			`void ShiftedRanks(int dim,int shift,int & source, int & dest);`
			`int RankFromProcessorCoor(std::vector<int> &coor);`
			`void ProcessorCoorFromRank(int rank,std::vector<int> &coor);`

			`/////////////////////////////////`
			`// Grid information queries`
			`/////////////////////////////////`
			`int IsBoss(void) { return _processor==0; };`
			`int BossRank(void) { return 0; };`
			`int ThisRank(void) { return _processor; };`
			`const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };`
			`const std::vector<int> & ProcessorGrid(void) { return _processors; };`
			`int ProcessorCount(void) { return _Nprocessors; };`

			`////////////////////////////////////////////////////////////`
			`// Reduction`
			`////////////////////////////////////////////////////////////`
			`void GlobalSum(RealF &);`
			`void GlobalSumVector(RealF *,int N);`

			`void GlobalSum(RealD &);`
			`void GlobalSumVector(RealD *,int N);`

			`void GlobalSum(uint32_t &);`

			`void GlobalSum(ComplexF &c)`
			`{`
			`GlobalSumVector((float *)&c,2);`
			`}`
			`void GlobalSumVector(ComplexF *c,int N)`
			`{`
			`GlobalSumVector((float )c,2N);`
			`}`

			`void GlobalSum(ComplexD &c)`
			`{`
			`GlobalSumVector((double *)&c,2);`
			`}`
			`void GlobalSumVector(ComplexD *c,int N)`
			`{`
			`GlobalSumVector((double )c,2N);`
			`}`

			`template<class obj> void GlobalSum(obj &o){`
			`typedef typename obj::scalar_type scalar_type;`
			`int words = sizeof(obj)/sizeof(scalar_type);`
			`scalar_type * ptr = (scalar_type *)& o;`
			`GlobalSumVector(ptr,words);`
			`}`
			`////////////////////////////////////////////////////////////`
			`// Face exchange, buffer swap in translational invariant way`
			`////////////////////////////////////////////////////////////`
			`void SendToRecvFrom(void *xmit,`
			`int xmit_to_rank,`
			`void *recv,`
			`int recv_from_rank,`
			`int bytes);`
Binary IO file for generic Grid array parallel I/O. Number of IO MPI tasks can be varied by selecting which dimensions use parallel IO and which dimensions use Serial send to boss I/O. Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes doing the I/O. Interpolates nicely between ALL nodes write their data, a single boss per time-plane in processor space [old UKQCD fortran code did this], and a single node doing all I/O. Not sure I have the transfer sizes big enough and am not overly convinced fstream is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero. Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations on my MacOS + OpenMPI and Clang environment. It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from each node in order to gather bigger chunks at the syscall level. That would push us up to the circa 8x 1848 == 4KB size write chunk, and by taking, say, x/y non parallel we get to 16MB contiguous chunks written in multi 4KB transactions per IOnode in 64^3 lattices for configuration I/O. I suspect this is fine for system performance. 2015-08-26 13:40:29 +01:00
			`void RecvFrom(void *recv,`
			`int recv_from_rank,`
			`int bytes);`
			`void SendTo(void *xmit,`
			`int xmit_to_rank,`
			`int bytes);`

Moving some things around for pretty 2015-05-11 19:09:49 +01:00			`void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,`
			`void *xmit,`
			`int xmit_to_rank,`
			`void *recv,`
			`int recv_from_rank,`
			`int bytes);`
			`void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);`

			`////////////////////////////////////////////////////////////`
			`// Barrier`
			`////////////////////////////////////////////////////////////`
			`void Barrier(void);`

			`////////////////////////////////////////////////////////////`
			`// Broadcast a buffer and composite larger`
			`////////////////////////////////////////////////////////////`
			`void Broadcast(int root,void* data, int bytes);`
			`template<class obj> void Broadcast(int root,obj &data)`
			`{`
			`Broadcast(root,(void *)&data,sizeof(data));`
			`};`

			`static void BroadcastWorld(int root,void* data, int bytes);`

			`};`
			`}`

			`#endif`