mirror of
https://github.com/paboyle/Grid.git
synced 2025-01-11 20:20:26 +00:00
dc814f30da
Number of IO MPI tasks can be varied by selecting which dimensions use parallel IO and which dimensions use Serial send to boss I/O. Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes doing the I/O. Interpolates nicely between ALL nodes write their data, a single boss per time-plane in processor space [old UKQCD fortran code did this], and a single node doing all I/O. Not sure I have the transfer sizes big enough and am not overly convinced fstream is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero. Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations on my MacOS + OpenMPI and Clang environment. It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from each node in order to gather bigger chunks at the syscall level. That would push us up to the circa 8x 18*4*8 == 4KB size write chunk, and by taking, say, x/y non parallel we get to 16MB contiguous chunks written in multi 4KB transactions per IOnode in 64^3 lattices for configuration I/O. I suspect this is fine for system performance.
55 lines
1.0 KiB
C++
55 lines
1.0 KiB
C++
//
|
|
// Grid.h
|
|
// simd
|
|
//
|
|
// Created by Peter Boyle on 09/05/2014.
|
|
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
|
|
//
|
|
|
|
#ifndef GRID_H
|
|
#define GRID_H
|
|
|
|
///////////////////
|
|
// Std C++ dependencies
|
|
///////////////////
|
|
#include <cassert>
|
|
#include <complex>
|
|
#include <vector>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <random>
|
|
#include <functional>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <signal.h>
|
|
#include <ctime>
|
|
#include <sys/time.h>
|
|
#include <chrono>
|
|
|
|
///////////////////
|
|
// Grid headers
|
|
///////////////////
|
|
#include <serialisation/Serialisation.h>
|
|
#include <Config.h>
|
|
#include <Timer.h>
|
|
#include <Log.h>
|
|
#include <AlignedAllocator.h>
|
|
#include <Simd.h>
|
|
#include <Threads.h>
|
|
#include <Communicator.h>
|
|
#include <Cartesian.h>
|
|
#include <Tensors.h>
|
|
#include <Lattice.h>
|
|
#include <Cshift.h>
|
|
#include <Stencil.h>
|
|
#include <Algorithms.h>
|
|
#include <qcd/QCD.h>
|
|
#include <parallelIO/BinaryIO.h>
|
|
#include <parallelIO/NerscIO.h>
|
|
|
|
#include <Init.h>
|
|
|
|
|
|
#endif
|