d4/d0e/cuda_2backend_8h_source.html

 #pragma once

 // Standard C++ includes
 #include <algorithm>
 #include <array>
 #include <functional>
 #include <map>
 #include <numeric>
 #include <string>

 // Standard C includes
 #include <cassert>

 // CUDA includes
 #include <cuda.h>
 #include <cuda_runtime.h>

 // GeNN includes
 #include "backendExport.h"

 // GeNN code generator includes
 #include "code_generator/backendSIMT.h"
 #include "code_generator/codeStream.h"
 #include "code_generator/substitutions.h"

 // Forward declarations
 namespace filesystem
 {
     class path;
 }

 //--------------------------------------------------------------------------
 // CodeGenerator::CUDA::DeviceSelectMethod
 //--------------------------------------------------------------------------
 namespace CodeGenerator
 {
 namespace CUDA
 {
 enum class DeviceSelect
 {
     OPTIMAL,
     MOST_MEMORY,
     MANUAL,
     MANUAL_RUNTIME,
 };

 //--------------------------------------------------------------------------
 // CodeGenerator::CUDA::BlockSizeSelect
 //--------------------------------------------------------------------------
 enum class BlockSizeSelect
 {
     OCCUPANCY,
     MANUAL,
 };

 //--------------------------------------------------------------------------
 // CodeGenerator::CUDA::Preferences
 //--------------------------------------------------------------------------
 struct Preferences : public PreferencesBase
 {
     Preferences()
     {
         std::fill(manualBlockSizes.begin(), manualBlockSizes.end(), 32);
     }

     bool showPtxInfo = false;

     bool generateLineInfo = false;

     bool selectGPUByDeviceID = false;

     bool enableNCCLReductions = false;

     bool generateSimpleErrorHandling = false;

     DeviceSelect deviceSelectMethod = DeviceSelect::OPTIMAL;

     unsigned int manualDeviceID = 0;

     BlockSizeSelect blockSizeSelectMethod = BlockSizeSelect::OCCUPANCY;

     KernelBlockSize manualBlockSizes;


     size_t constantCacheOverhead = 72 * 5;

     std::string userNvccFlags = "";

     void updateHash(boost::uuids::detail::sha1 &hash) const
     {
         // Superclass
         PreferencesBase::updateHash(hash);

         // **NOTE** showPtxInfo, generateLineInfo and userNvccFlags only affect makefiles/msbuild
         // **NOTE** block size optimization is also not relevant, the chosen block size is hashed in the backend
         // **NOTE** while device selection is also not relevant as the chosen device is hashed in the backend, DeviceSelect::MANUAL_OVERRIDE is used in the backend

         Utils::updateHash(selectGPUByDeviceID, hash);
         Utils::updateHash(deviceSelectMethod, hash);
         Utils::updateHash(constantCacheOverhead, hash);
         Utils::updateHash(enableNCCLReductions, hash);
         Utils::updateHash(generateSimpleErrorHandling, hash);
     }
 };

 //--------------------------------------------------------------------------
 // CodeGenerator::CUDA::Backend
 //--------------------------------------------------------------------------
 class BACKEND_EXPORT Backend : public BackendSIMT
 {
 public:
     Backend(const KernelBlockSize &kernelBlockSizes, const Preferences &preferences,
             const std::string &scalarType, int device);

     //--------------------------------------------------------------------------
     // CodeGenerator::BackendSIMT virtuals
     //--------------------------------------------------------------------------
     virtual bool areSharedMemAtomicsSlow() const override;

     virtual std::string getSharedPrefix() const override{ return "__shared__ "; }

     virtual std::string getThreadID(unsigned int axis = 0) const override;

     virtual std::string getBlockID(unsigned int axis = 0) const override;

     virtual std::string getCLZ() const override { return "__clz"; }

     virtual std::string getAtomic(const std::string &type, AtomicOperation op = AtomicOperation::ADD,
                                   AtomicMemSpace memSpace = AtomicMemSpace::GLOBAL) const override;

     virtual void genSharedMemBarrier(CodeStream &os) const override;

     virtual void genPopulationRNGInit(CodeStream &os, const std::string &globalRNG, const std::string &seed, const std::string &sequence) const override;

     virtual void genPopulationRNGPreamble(CodeStream &os, Substitutions &subs, const std::string &globalRNG, const std::string &name = "rng") const override;


     virtual void genPopulationRNGPostamble(CodeStream &os, const std::string &globalRNG) const override;

     virtual void genGlobalRNGSkipAhead(CodeStream &os, Substitutions &subs, const std::string &sequence, const std::string &name = "rng") const override;

     //--------------------------------------------------------------------------
     // CodeGenerator::BackendBase virtuals
     //--------------------------------------------------------------------------
     virtual void genNeuronUpdate(CodeStream &os, const ModelSpecMerged &modelMerged,
                                  HostHandler preambleHandler, HostHandler pushEGPHandler) const override;

     virtual void genSynapseUpdate(CodeStream &os, const ModelSpecMerged &modelMerged,
                                   HostHandler preambleHandler, HostHandler pushEGPHandler) const override;

     virtual void genCustomUpdate(CodeStream &os, const ModelSpecMerged &modelMerged,
                                  HostHandler preambleHandler, HostHandler pushEGPHandler) const override;

     virtual void genInit(CodeStream &os, const ModelSpecMerged &modelMerged,
                          HostHandler preambleHandler, HostHandler initPushEGPHandler, HostHandler initSparsePushEGPHandler) const override;

     virtual void genDefinitionsPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
     virtual void genDefinitionsInternalPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
     virtual void genRunnerPreamble(CodeStream &os, const ModelSpecMerged &modelMerged, const MemAlloc &memAlloc) const override;
     virtual void genAllocateMemPreamble(CodeStream &os, const ModelSpecMerged &modelMerged, const MemAlloc &memAlloc) const override;
     virtual void genFreeMemPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
     virtual void genStepTimeFinalisePreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;

     virtual void genVariableDefinition(CodeStream &definitions, CodeStream &definitionsInternal, const std::string &type, const std::string &name, VarLocation loc) const override;
     virtual void genVariableImplementation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
     virtual void genVariableAllocation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, size_t count, MemAlloc &memAlloc) const override;
     virtual void genVariableFree(CodeStream &os, const std::string &name, VarLocation loc) const override;

     virtual void genExtraGlobalParamDefinition(CodeStream &definitions, CodeStream &definitionsInternal, const std::string &type, const std::string &name, VarLocation loc) const override;
     virtual void genExtraGlobalParamImplementation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
     virtual void genExtraGlobalParamAllocation(CodeStream &os, const std::string &type, const std::string &name,
                                                VarLocation loc, const std::string &countVarName = "count", const std::string &prefix = "") const override;
     virtual void genExtraGlobalParamPush(CodeStream &os, const std::string &type, const std::string &name,
                                          VarLocation loc, const std::string &countVarName = "count", const std::string &prefix = "") const override;
     virtual void genExtraGlobalParamPull(CodeStream &os, const std::string &type, const std::string &name,
                                          VarLocation loc, const std::string &countVarName = "count", const std::string &prefix = "") const override;

     virtual void genMergedExtraGlobalParamPush(CodeStream &os, const std::string &suffix, size_t mergedGroupIdx,
                                                const std::string &groupIdx, const std::string &fieldName,
                                                const std::string &egpName) const override;

     virtual std::string getMergedGroupFieldHostType(const std::string &type) const override;

     virtual std::string getMergedGroupSimRNGType() const override { return "curandState"; }

     virtual void genVariablePush(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, bool autoInitialized, size_t count) const override;
     virtual void genVariablePull(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, size_t count) const override;

     virtual void genCurrentVariablePush(CodeStream &os, const NeuronGroupInternal &ng, const std::string &type,
                                         const std::string &name, VarLocation loc, unsigned int batchSize) const override;
     virtual void genCurrentVariablePull(CodeStream &os, const NeuronGroupInternal &ng, const std::string &type,
                                         const std::string &name, VarLocation loc, unsigned int batchSize) const override;

     virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
     {
         genCurrentSpikePush(os, ng, batchSize, false);
     }
     virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
     {
         genCurrentSpikePull(os, ng, batchSize, false);
     }
     virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
     {
         genCurrentSpikePush(os, ng, batchSize, true);
     }
     virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
     {
         genCurrentSpikePull(os, ng, batchSize, true);
     }

     virtual void genGlobalDeviceRNG(CodeStream &definitions, CodeStream &definitionsInternal,
                                     CodeStream &runner, CodeStream &allocations, CodeStream &free,
                                     MemAlloc &memAlloc) const override;
     virtual void genPopulationRNG(CodeStream &definitions, CodeStream &definitionsInternal,
                                   CodeStream &runner, CodeStream &allocations, CodeStream &free,
                                   const std::string &name, size_t count, MemAlloc &memAlloc) const override;
     virtual void genTimer(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner,
                           CodeStream &allocations, CodeStream &free, CodeStream &stepTimeFinalise,
                           const std::string &name, bool updateInStepTime) const override;

     virtual void genReturnFreeDeviceMemoryBytes(CodeStream &os) const override;

     virtual void genMakefilePreamble(std::ostream &os) const override;
     virtual void genMakefileLinkRule(std::ostream &os) const override;
     virtual void genMakefileCompileRule(std::ostream &os) const override;

     virtual void genMSBuildConfigProperties(std::ostream &os) const override;
     virtual void genMSBuildImportProps(std::ostream &os) const override;
     virtual void genMSBuildItemDefinitions(std::ostream &os) const override;
     virtual void genMSBuildCompileModule(const std::string &moduleName, std::ostream &os) const override;
     virtual void genMSBuildImportTarget(std::ostream &os) const override;

     virtual std::string getAllocateMemParams(const ModelSpecMerged &) const override;

     virtual bool isPopulationRNGInitialisedOnDevice() const override { return true; }

     virtual bool isHostReductionRequired() const override { return getPreferences<Preferences>().enableNCCLReductions; }

     virtual size_t getDeviceMemoryBytes() const override{ return m_ChosenDevice.totalGlobalMem; }

     virtual MemorySpaces getMergedGroupMemorySpaces(const ModelSpecMerged &modelMerged) const override;

     virtual bool supportsNamespace() const override { return true; };

     virtual boost::uuids::detail::sha1::digest_type getHashDigest() const override;

     //--------------------------------------------------------------------------
     // Public API
     //--------------------------------------------------------------------------
     const cudaDeviceProp &getChosenCUDADevice() const{ return m_ChosenDevice; }
     int getChosenDeviceID() const{ return m_ChosenDeviceID; }
     int getRuntimeVersion() const{ return m_RuntimeVersion; }
     std::string getNVCCFlags() const;

 private:
     //--------------------------------------------------------------------------
     // Private methods
     //--------------------------------------------------------------------------
     template<typename T>
     void genMergedStructArrayPush(CodeStream &os, const std::vector<T> &groups) const
     {
         // Loop through groups
         for(const auto &g : groups) {
             // Check that a memory space has been assigned
             assert(!g.getMemorySpace().empty());

             // Implement merged group array in previously assigned memory space
             os << g.getMemorySpace() << " Merged" << T::name << "Group" << g.getIndex() << " d_merged" << T::name << "Group" << g.getIndex() << "[" << g.getGroups().size() << "];" << std::endl;

             // Write function to update
             os << "void pushMerged" << T::name << "Group" << g.getIndex() << "ToDevice(unsigned int idx, ";
             g.generateStructFieldArgumentDefinitions(os, *this);
             os << ")";
             {
                 CodeStream::Scope b(os);

                 // Loop through sorted fields and build struct on the stack
                 os << "Merged" << T::name << "Group" << g.getIndex() << " group = {";
                 const auto sortedFields = g.getSortedFields(*this);
                 for(const auto &f : sortedFields) {
                     os << std::get<1>(f) << ", ";
                 }
                 os << "};" << std::endl;

                 // Push to device
                 os << "CHECK_CUDA_ERRORS(cudaMemcpyToSymbolAsync(d_merged" << T::name << "Group" << g.getIndex() << ", &group, ";
                 os << "sizeof(Merged" << T::name << "Group" << g.getIndex() << "), idx * sizeof(Merged" << T::name << "Group" << g.getIndex() << ")));" << std::endl;
             }
         }
     }


     size_t getChosenDeviceSafeConstMemBytes() const
     {
         return m_ChosenDevice.totalConstMem - getPreferences<Preferences>().constantCacheOverhead;
     }

     void genCurrentSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize, bool spikeEvent) const;
     void genCurrentSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize, bool spikeEvent) const;

     void genKernelDimensions(CodeStream &os, Kernel kernel, size_t numThreadsX, size_t batchSize, size_t numBlockThreadsY = 1) const;

     //--------------------------------------------------------------------------
     // Members
     //--------------------------------------------------------------------------
     const int m_ChosenDeviceID;
     cudaDeviceProp m_ChosenDevice;
     int m_RuntimeVersion;
 };
 }   // CUDA
 }   // CodeGenerator
NeuronGroupInternal
Definition: neuronGroupInternal.h:9

CodeGenerator::BackendSIMT
Base class for Single Instruction Multiple Thread style backends.
Definition: backendSIMT.h:51

CodeGenerator::CUDA::Backend::getCLZ
virtual std::string getCLZ() const override
Get the name of the count-leading-zeros function.
Definition: cuda/backend.h:151

VarLocation
VarLocation
< Flags defining which memory space variables should be allocated in
Definition: variableMode.h:10

CodeGenerator::CUDA::Backend::getDeviceMemoryBytes
virtual size_t getDeviceMemoryBytes() const override
How many bytes of memory does &#39;device&#39; have.
Definition: cuda/backend.h:278

CodeGenerator::CUDA::Backend::getChosenDeviceID
int getChosenDeviceID() const
Definition: cuda/backend.h:294

CodeGenerator::CUDA::Backend::supportsNamespace
virtual bool supportsNamespace() const override
Does this backend support namespaces i.e. can C++ implementation of support functions be used...
Definition: cuda/backend.h:285

CodeGenerator::CUDA::DeviceSelect::MOST_MEMORY
Pick device with most global memory.

CodeGenerator::CUDA::Backend::getSharedPrefix
virtual std::string getSharedPrefix() const override
Get the prefix to use for shared memory variables.
Definition: cuda/backend.h:142

CodeGenerator::CUDA::DeviceSelect::OPTIMAL
Pick optimal device based on how well kernels can be simultaneously simulated and occupancy...

CodeGenerator::CUDA::BlockSizeSelect
BlockSizeSelect
Methods for selecting CUDA kernel block size.
Definition: cuda/backend.h:52

CodeGenerator::PreferencesBase
Base class for backend preferences - can be accessed via a global in &#39;classic&#39; C++ code generator...
Definition: backendBase.h:58

CodeGenerator::CUDA::Backend::getRuntimeVersion
int getRuntimeVersion() const
Definition: cuda/backend.h:295

CodeGenerator::ModelSpecMerged
Definition: modelSpecMerged.h:31

CodeGenerator
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:30

CodeGenerator::CUDA::Backend::genCurrentTrueSpikePush
virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing true spikes emitted by a neuron group in the current timestep to the &#39;devic...
Definition: cuda/backend.h:228

substitutions.h

CodeGenerator::CUDA::DeviceSelect::MANUAL
Use device specified by user.

CodeGenerator::BackendBase::MemorySpaces
std::vector< std::pair< std::string, size_t > > MemorySpaces
Vector of prefixes required to allocate in memory space and size of memory space. ...
Definition: backendBase.h:190

CodeGenerator::CodeStream
Definition: codeStream.h:21

CodeGenerator::CUDA::Backend::isHostReductionRequired
virtual bool isHostReductionRequired() const override
Backends which support batch-parallelism might require an additional host reduction phase after reduc...
Definition: cuda/backend.h:275

BACKEND_EXPORT
#define BACKEND_EXPORT
Definition: backendExport.h:13

CodeGenerator::MemAlloc
Definition: backendBase.h:107

CodeGenerator::CUDA::DeviceSelect::MANUAL_RUNTIME
Use device specified by user at runtime with allocateMem parameter. Optimisation will be performed on...

CodeGenerator::CUDA::Backend::genCurrentTrueSpikePull
virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling true spikes emitted by a neuron group in the current timestep from the &#39;dev...
Definition: cuda/backend.h:232

CodeGenerator::Substitutions
Definition: substitutions.h:21

CodeGenerator::CUDA::Backend::getMergedGroupSimRNGType
virtual std::string getMergedGroupSimRNGType() const override
When generating merged structures what type to use for simulation RNGs.
Definition: cuda/backend.h:218

CodeGenerator::CUDA::Backend::genCurrentSpikeLikeEventPush
virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing spike-like events emitted by a neuron group in the current timestep to the ...
Definition: cuda/backend.h:236

Utils::updateHash
void updateHash(const T &value, boost::uuids::detail::sha1 &hash)
Hash arithmetic types and enums.
Definition: gennUtils.h:128

CodeGenerator::BackendSIMT::AtomicMemSpace
AtomicMemSpace
What memory space atomic operation is required.
Definition: backendSIMT.h:70

CodeGenerator::BackendBase::HostHandler
std::function< void(CodeStream &)> HostHandler
Definition: backendBase.h:182

CodeGenerator::CUDA::Backend
Definition: cuda/backend.h:129

backendSIMT.h

backendExport.h

CodeGenerator::CUDA::Preferences
Preferences for CUDA backend.
Definition: cuda/backend.h:62

CodeGenerator::CUDA::Backend::genCurrentSpikeLikeEventPull
virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling spike-like events emitted by a neuron group in the current timestep from th...
Definition: cuda/backend.h:240

CodeGenerator::Kernel
Kernel
Kernels generated by SIMT backends.
Definition: backendSIMT.h:24

CodeGenerator::BackendSIMT::AtomicOperation
AtomicOperation
What atomic operation is required.
Definition: backendSIMT.h:63

CodeGenerator::CUDA::Preferences::Preferences
Preferences()
Definition: cuda/backend.h:64

codeStream.h

filesystem
Definition: generateModules.h:16

CodeGenerator::CUDA::DeviceSelect
DeviceSelect
Methods for selecting CUDA device.
Definition: cuda/backend.h:40

CodeGenerator::CUDA::BlockSizeSelect::OCCUPANCY
Pick optimal blocksize for each kernel based on occupancy.

CodeGenerator::CUDA::Preferences::manualBlockSizes
KernelBlockSize manualBlockSizes
If block size select method is set to BlockSizeSelect::MANUAL, block size to use for each kernel...
Definition: cuda/backend.h:97

CodeGenerator::CUDA::Backend::getChosenCUDADevice
const cudaDeviceProp & getChosenCUDADevice() const
Definition: cuda/backend.h:293

CodeGenerator::CUDA::Backend::isPopulationRNGInitialisedOnDevice
virtual bool isPopulationRNGInitialisedOnDevice() const override
Different backends seed RNGs in different ways. Does this one initialise population RNGS on device...
Definition: cuda/backend.h:272

CodeGenerator::KernelBlockSize
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: backendSIMT.h:44

CodeGenerator::CUDA::Preferences::updateHash
void updateHash(boost::uuids::detail::sha1 &hash) const
Definition: cuda/backend.h:108

CodeGenerator::CodeStream::Scope
Definition: codeStream.h:94