GeNN  4.0.0
GPU enhanced Neuronal Networks (GeNN)
genn_cuda_backend/backend.h
Go to the documentation of this file.
1 #pragma once
2 
3 // Standard C++ includes
4 #include <algorithm>
5 #include <array>
6 #include <functional>
7 #include <map>
8 #include <string>
9 
10 // CUDA includes
11 #include <cuda.h>
12 #include <cuda_runtime.h>
13 
14 // GeNN includes
15 #include "backendExport.h"
16 
17 // GeNN code generator includes
21 
22 // Forward declarations
23 namespace filesystem
24 {
25  class path;
26 }
27 
28 //--------------------------------------------------------------------------
29 // CodeGenerator::CUDA::DeviceSelectMethod
30 //--------------------------------------------------------------------------
31 namespace CodeGenerator
32 {
33 namespace CUDA
34 {
36 enum class DeviceSelect
37 {
38  OPTIMAL,
39  MOST_MEMORY,
40  MANUAL,
41 };
42 
43 //--------------------------------------------------------------------------
44 // CodeGenerator::CUDA::BlockSizeSelect
45 //--------------------------------------------------------------------------
47 enum class BlockSizeSelect
48 {
49  OCCUPANCY,
50  MANUAL,
51 };
52 
53 //--------------------------------------------------------------------------
54 // Kernel
55 //--------------------------------------------------------------------------
57 enum Kernel
58 {
68 };
69 
70 //--------------------------------------------------------------------------
71 // Type definitions
72 //--------------------------------------------------------------------------
74 using KernelBlockSize = std::array<size_t, KernelMax>;
75 
76 //--------------------------------------------------------------------------
77 // CodeGenerator::CUDA::Preferences
78 //--------------------------------------------------------------------------
81 {
83  {
84  std::fill(manualBlockSizes.begin(), manualBlockSizes.end(), 32);
85  }
86 
88  bool showPtxInfo = false;
89 
91  DeviceSelect deviceSelectMethod = DeviceSelect::OPTIMAL;
92 
94  unsigned int manualDeviceID = 0;
95 
97  BlockSizeSelect blockSizeSelectMethod = BlockSizeSelect::OCCUPANCY;
98 
101 
103  std::string userNvccFlags = "";
104 };
105 
106 //--------------------------------------------------------------------------
107 // CodeGenerator::CUDA::Backend
108 //--------------------------------------------------------------------------
110 {
111 public:
112  Backend(const KernelBlockSize &kernelBlockSizes, const Preferences &preferences, int localHostID, int device);
113 
114  //--------------------------------------------------------------------------
115  // CodeGenerator::Backends:: virtuals
116  //--------------------------------------------------------------------------
117  virtual void genNeuronUpdate(CodeStream &os, const ModelSpecInternal &model, NeuronGroupSimHandler simHandler, NeuronGroupHandler wuVarUpdateHandler) const override;
118 
119  virtual void genSynapseUpdate(CodeStream &os, const ModelSpecInternal &model,
120  SynapseGroupHandler wumThreshHandler, SynapseGroupHandler wumSimHandler, SynapseGroupHandler wumEventHandler,
121  SynapseGroupHandler postLearnHandler, SynapseGroupHandler synapseDynamicsHandler) const override;
122 
123  virtual void genInit(CodeStream &os, const ModelSpecInternal &model,
124  NeuronGroupHandler localNGHandler, NeuronGroupHandler remoteNGHandler,
125  SynapseGroupHandler sgDenseInitHandler, SynapseGroupHandler sgSparseConnectHandler,
126  SynapseGroupHandler sgSparseInitHandler) const override;
127 
128  virtual void genDefinitionsPreamble(CodeStream &os) const override;
129  virtual void genDefinitionsInternalPreamble(CodeStream &os) const override;
130  virtual void genRunnerPreamble(CodeStream &os) const override;
131  virtual void genAllocateMemPreamble(CodeStream &os, const ModelSpecInternal &model) const override;
132  virtual void genStepTimeFinalisePreamble(CodeStream &os, const ModelSpecInternal &model) const override;
133 
134  virtual void genVariableDefinition(CodeStream &definitions, CodeStream &definitionsInternal, const std::string &type, const std::string &name, VarLocation loc) const override;
135  virtual void genVariableImplementation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
136  virtual void genVariableAllocation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, size_t count) const override;
137  virtual void genVariableFree(CodeStream &os, const std::string &name, VarLocation loc) const override;
138 
139  virtual void genExtraGlobalParamDefinition(CodeStream &definitions, const std::string &type, const std::string &name, VarLocation loc) const override;
140  virtual void genExtraGlobalParamImplementation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
141  virtual void genExtraGlobalParamAllocation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
142  virtual void genExtraGlobalParamPush(CodeStream &os, const std::string &type, const std::string &name) const override;
143  virtual void genExtraGlobalParamPull(CodeStream &os, const std::string &type, const std::string &name) const override;
144 
145  virtual void genPopVariableInit(CodeStream &os, VarLocation loc, const Substitutions &kernelSubs, Handler handler) const override;
146  virtual void genVariableInit(CodeStream &os, VarLocation loc, size_t count, const std::string &indexVarName,
147  const Substitutions &kernelSubs, Handler handler) const override;
148  virtual void genSynapseVariableRowInit(CodeStream &os, VarLocation loc, const SynapseGroupInternal &sg,
149  const Substitutions &kernelSubs, Handler handler) const override;
150 
151  virtual void genVariablePush(CodeStream &os, const std::string &type, const std::string &name, bool autoInitialized, size_t count) const override;
152  virtual void genVariablePull(CodeStream &os, const std::string &type, const std::string &name, size_t count) const override;
153  virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng) const override
154  {
155  genCurrentSpikePush(os, ng, false);
156  }
157  virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng) const override
158  {
159  genCurrentSpikePull(os, ng, false);
160  }
161  virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng) const override
162  {
163  genCurrentSpikePush(os, ng, true);
164  }
165  virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng) const override
166  {
167  genCurrentSpikePull(os, ng, true);
168  }
169 
170  virtual void genGlobalRNG(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner, CodeStream &allocations, CodeStream &free, const ModelSpecInternal &model) const override;
171  virtual void genPopulationRNG(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner, CodeStream &allocations, CodeStream &free,
172  const std::string &name, size_t count) const override;
173  virtual void genTimer(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner, CodeStream &allocations, CodeStream &free,
174  CodeStream &stepTimeFinalise, const std::string &name, bool updateInStepTime) const override;
175 
176  virtual void genMakefilePreamble(std::ostream &os) const override;
177  virtual void genMakefileLinkRule(std::ostream &os) const override;
178  virtual void genMakefileCompileRule(std::ostream &os) const override;
179 
180  virtual void genMSBuildConfigProperties(std::ostream &os) const override;
181  virtual void genMSBuildImportProps(std::ostream &os) const override;
182  virtual void genMSBuildItemDefinitions(std::ostream &os) const override;
183  virtual void genMSBuildCompileModule(const std::string &moduleName, std::ostream &os) const override;
184  virtual void genMSBuildImportTarget(std::ostream &os) const override;
185 
186  virtual std::string getVarPrefix() const override{ return "dd_"; }
187 
188  virtual bool isGlobalRNGRequired(const ModelSpecInternal &model) const override;
189  virtual bool isSynRemapRequired() const override{ return true; }
190  virtual bool isPostsynapticRemapRequired() const override{ return true; }
191 
192  //--------------------------------------------------------------------------
193  // Public API
194  //--------------------------------------------------------------------------
195  const cudaDeviceProp &getChosenCUDADevice() const{ return m_ChosenDevice; }
196  int getChosenDeviceID() const{ return m_ChosenDeviceID; }
197  std::string getNVCCFlags() const;
198 
199  //--------------------------------------------------------------------------
200  // Static API
201  //--------------------------------------------------------------------------
202  static size_t getNumPresynapticUpdateThreads(const SynapseGroupInternal &sg);
203  static size_t getNumPostsynapticUpdateThreads(const SynapseGroupInternal &sg);
204  static size_t getNumSynapseDynamicsThreads(const SynapseGroupInternal &sg);
205 
206  //--------------------------------------------------------------------------
207  // Constants
208  //--------------------------------------------------------------------------
209  static const char *KernelNames[KernelMax];
210 
211 private:
212  //--------------------------------------------------------------------------
213  // Type definitions
214  //--------------------------------------------------------------------------
215  template<typename T>
216  using GetPaddedGroupSizeFunc = std::function<size_t(const T&)>;
217 
218  template<typename T>
219  using FilterGroupFunc = std::function<bool(const T&)>;
220 
221  //--------------------------------------------------------------------------
222  // Private methods
223  //--------------------------------------------------------------------------
224  template<typename T>
225  void genParallelGroup(CodeStream &os, const Substitutions &kernelSubs, const std::map<std::string, T> &groups, size_t &idStart,
226  GetPaddedGroupSizeFunc<T> getPaddedSizeFunc,
227  FilterGroupFunc<T> filter,
228  GroupHandler<T> handler) const
229  {
230  // Populate neuron update groups
231  for (const auto &g : groups) {
232  // If this synapse group should be processed
233  Substitutions popSubs(&kernelSubs);
234  if(filter(g.second)) {
235  const size_t paddedSize = getPaddedSizeFunc(g.second);
236 
237  os << "// " << g.first << std::endl;
238 
239  // If this is the first group
240  if (idStart == 0) {
241  os << "if(id < " << paddedSize << ")" << CodeStream::OB(1);
242  popSubs.addVarSubstitution("id", "id");
243  }
244  else {
245  os << "if(id >= " << idStart << " && id < " << idStart + paddedSize << ")" << CodeStream::OB(1);
246  os << "const unsigned int lid = id - " << idStart << ";" << std::endl;
247  popSubs.addVarSubstitution("id", "lid");
248  }
249 
250  handler(os, g.second, popSubs);
251 
252  idStart += paddedSize;
253  os << CodeStream::CB(1) << std::endl;
254  }
255  }
256  }
257 
258  template<typename T>
259  void genParallelGroup(CodeStream &os, const Substitutions &kernelSubs, const std::map<std::string, T> &groups, size_t &idStart,
260  GetPaddedGroupSizeFunc<T> getPaddedSizeFunc,
261  GroupHandler<T> handler) const
262  {
263  genParallelGroup<T>(os, kernelSubs, groups, idStart, getPaddedSizeFunc,
264  [](const T&){ return true; }, handler);
265  }
266 
267  void genEmitSpike(CodeStream &os, const Substitutions &subs, const std::string &suffix) const;
268 
269  void genCurrentSpikePush(CodeStream &os, const NeuronGroupInternal &ng, bool spikeEvent) const;
270  void genCurrentSpikePull(CodeStream &os, const NeuronGroupInternal &ng, bool spikeEvent) const;
271 
272  void genPresynapticUpdatePreSpan(CodeStream &os, const ModelSpecInternal &model, const SynapseGroupInternal &sg, const Substitutions &popSubs, bool trueSpike,
273  SynapseGroupHandler wumThreshHandler, SynapseGroupHandler wumSimHandler) const;
274  void genPresynapticUpdatePostSpan(CodeStream &os, const ModelSpecInternal &model, const SynapseGroupInternal &sg, const Substitutions &popSubs, bool trueSpike,
275  SynapseGroupHandler wumThreshHandler, SynapseGroupHandler wumSimHandler) const;
276 
277  void genKernelDimensions(CodeStream &os, Kernel kernel, size_t numThreads) const;
278 
279  bool shouldAccumulateInLinSyn(const SynapseGroupInternal &sg) const;
280 
281  bool shouldAccumulateInSharedMemory(const SynapseGroupInternal &sg) const;
282 
283  std::string getFloatAtomicAdd(const std::string &ftype) const;
284 
285  //--------------------------------------------------------------------------
286  // Members
287  //--------------------------------------------------------------------------
288  const KernelBlockSize m_KernelBlockSizes;
289  const Preferences m_Preferences;
290 
291  const int m_ChosenDeviceID;
292  cudaDeviceProp m_ChosenDevice;
293 
294  int m_RuntimeVersion;
295 };
296 } // CUDA
297 } // CodeGenerator
virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng) const override
Definition: genn_cuda_backend/backend.h:161
Definition: neuronGroupInternal.h:9
A close bracket marker.
Definition: codeStream.h:82
virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng) const override
Definition: genn_cuda_backend/backend.h:153
VarLocation
< Flags defining which memory space variables should be allocated in
Definition: variableMode.h:10
Definition: genn_cuda_backend/backend.h:63
int getChosenDeviceID() const
Definition: genn_cuda_backend/backend.h:196
virtual std::string getVarPrefix() const override
Definition: genn_cuda_backend/backend.h:186
Pick device with most global memory.
Pick optimal device based on how well kernels can be simultaneously simulated and occupancy...
BlockSizeSelect
Methods for selecting CUDA kernel block size.
Definition: genn_cuda_backend/backend.h:47
An open bracket marker.
Definition: codeStream.h:69
Base class for backend preferences - can be accessed via a global in &#39;classic&#39; C++ code generator...
Definition: backendBase.h:33
Definition: genn_cuda_backend/backend.h:65
std::function< void(CodeStream &, const NeuronGroupInternal &, Substitutions &, NeuronGroupHandler, NeuronGroupHandler)> NeuronGroupSimHandler
Callback function type for generation neuron group simulation code.
Definition: backendBase.h:74
GroupHandler< SynapseGroupInternal > SynapseGroupHandler
Standard callback type which provides a CodeStream to write platform-independent code for the specifi...
Definition: backendBase.h:69
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: genn_cuda_backend/backend.h:74
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:22
GroupHandler< NeuronGroupInternal > NeuronGroupHandler
Standard callback type which provides a CodeStream to write platform-independent code for the specifi...
Definition: backendBase.h:66
Use device specified by user.
Definition: synapseGroupInternal.h:9
virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng) const override
Definition: genn_cuda_backend/backend.h:165
Definition: codeStream.h:19
#define BACKEND_EXPORT
Definition: backendExport.h:13
Definition: substitutions.h:19
void addVarSubstitution(const std::string &source, const std::string &destionation, bool allowOverride=false)
Definition: substitutions.h:39
Definition: genn_cuda_backend/backend.h:59
Definition: genn_cuda_backend/backend.h:60
Definition: backendBase.h:54
Definition: genn_cuda_backend/backend.h:66
Definition: genn_cuda_backend/backend.h:109
Definition: genn_cuda_backend/backend.h:61
Definition: genn_cuda_backend/backend.h:67
virtual bool isSynRemapRequired() const override
Definition: genn_cuda_backend/backend.h:189
Preferences for CUDA backend.
Definition: genn_cuda_backend/backend.h:80
Definition: genn_cuda_backend/backend.h:62
Definition: modelSpecInternal.h:10
Preferences()
Definition: genn_cuda_backend/backend.h:82
Definition: genn_cuda_backend/backend.h:64
Definition: generateAll.h:18
std::function< void(CodeStream &, const T &, Substitutions &)> GroupHandler
Definition: backendBase.h:63
DeviceSelect
Methods for selecting CUDA device.
Definition: genn_cuda_backend/backend.h:36
Kernel
Kernels generated by CUDA backend.
Definition: genn_cuda_backend/backend.h:57
Pick optimal blocksize for each kernel based on occupancy.
virtual bool isPostsynapticRemapRequired() const override
Definition: genn_cuda_backend/backend.h:190
KernelBlockSize manualBlockSizes
If block size select method is set to BlockSizeSelect::MANUAL, block size to use for each kernel...
Definition: genn_cuda_backend/backend.h:100
virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng) const override
Definition: genn_cuda_backend/backend.h:157
const cudaDeviceProp & getChosenCUDADevice() const
Definition: genn_cuda_backend/backend.h:195
std::function< void(CodeStream &, Substitutions &)> Handler
Definition: backendBase.h:60