GeNN  4.9.0
GPU enhanced Neuronal Networks (GeNN)
backendSIMT.h
Go to the documentation of this file.
1 #pragma once
2 
3 // Standard C++ includes
4 #include <array>
5 #include <numeric>
6 #include <unordered_set>
7 
8 // GeNN includes
9 #include "gennExport.h"
10 #include "varAccess.h"
11 
12 // GeNN code generator includes
17 
18 //--------------------------------------------------------------------------
19 // CodeGenerator::Kernel
20 //--------------------------------------------------------------------------
21 namespace CodeGenerator
22 {
24 enum Kernel
25 {
38 };
39 
40 //--------------------------------------------------------------------------
41 // Type definitions
42 //--------------------------------------------------------------------------
44 using KernelBlockSize = std::array<size_t, KernelMax>;
45 
46 //--------------------------------------------------------------------------
47 // CodeGenerator::BackendSIMT
48 //--------------------------------------------------------------------------
50 
52 {
53 public:
54  BackendSIMT(const KernelBlockSize &kernelBlockSizes, const PreferencesBase &preferences,
55  const std::string &scalarType)
56  : BackendBase(scalarType, preferences), m_KernelBlockSizes(kernelBlockSizes)
57  {}
58 
59  //------------------------------------------------------------------------
60  // Enumerations
61  //------------------------------------------------------------------------
63  enum class AtomicOperation
64  {
65  ADD,
66  OR,
67  };
68 
70  enum class AtomicMemSpace
71  {
72  GLOBAL,
73  SHARED,
74  };
75 
76  //------------------------------------------------------------------------
77  // Declared virtuals
78  //------------------------------------------------------------------------
80  virtual bool areSharedMemAtomicsSlow() const = 0;
81 
83  virtual std::string getSharedPrefix() const = 0;
84 
86  virtual std::string getThreadID(unsigned int axis = 0) const = 0;
87 
89  virtual std::string getBlockID(unsigned int axis = 0) const = 0;
90 
92  virtual std::string getCLZ() const = 0;
93 
95  virtual std::string getAtomic(const std::string &type, AtomicOperation op = AtomicOperation::ADD,
96  AtomicMemSpace memSpace = AtomicMemSpace::GLOBAL) const = 0;
97 
99  virtual void genSharedMemBarrier(CodeStream &os) const = 0;
100 
102  virtual void genPopulationRNGInit(CodeStream &os, const std::string &globalRNG, const std::string &seed, const std::string &sequence) const = 0;
103 
105  virtual void genPopulationRNGPreamble(CodeStream &os, Substitutions &subs, const std::string &globalRNG, const std::string &name = "rng") const = 0;
106 
108 
109  virtual void genPopulationRNGPostamble(CodeStream &os, const std::string &globalRNG) const = 0;
110 
112  virtual void genGlobalRNGSkipAhead(CodeStream &os, Substitutions &subs, const std::string &sequence, const std::string &name = "rng") const = 0;
113 
114  //------------------------------------------------------------------------
115  // BackendBase virtuals
116  //------------------------------------------------------------------------
118  virtual size_t getSynapticMatrixRowStride(const SynapseGroupInternal &sg) const final;
119 
122  virtual std::string getDeviceVarPrefix() const final { return getPreferences().automaticCopy ? "" : "d_"; }
123 
124  virtual void genPopVariableInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final;
125  virtual void genVariableInit(CodeStream &os, const std::string &count, const std::string &indexVarName,
126  const Substitutions &kernelSubs, Handler handler) const final;
127  virtual void genSparseSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final
128  {
129  genSynapseVariableRowInit(os, kernelSubs, handler);
130  }
131 
132  virtual void genDenseSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final
133  {
134  genSynapseVariableRowInit(os, kernelSubs, handler);
135  }
136 
137  virtual void genKernelSynapseVariableInit(CodeStream &os, const SynapseInitGroupMerged &sg, const Substitutions &kernelSubs, Handler handler) const final;
138  virtual void genKernelCustomUpdateVariableInit(CodeStream &os, const CustomWUUpdateInitGroupMerged &cu, const Substitutions &kernelSubs, Handler handler) const final;
139 
141  virtual bool isDeviceScalarRequired() const final { return true; }
142 
143  virtual bool isGlobalHostRNGRequired(const ModelSpecMerged &modelMerged) const final;
144  virtual bool isGlobalDeviceRNGRequired(const ModelSpecMerged &modelMerged) const final;
145  virtual bool isPopulationRNGRequired() const final { return true; }
146 
147  virtual bool isPostsynapticRemapRequired() const final { return true; }
148 
149  //------------------------------------------------------------------------
150  // Public API
151  //------------------------------------------------------------------------
153 
154  size_t getNumInitialisationRNGStreams(const ModelSpecMerged & modelMerged) const;
155 
156  size_t getKernelBlockSize(Kernel kernel) const { return m_KernelBlockSizes.at(kernel); }
157 
158  size_t getPaddedNumCustomUpdateThreads(const CustomUpdateInternal &cg, unsigned int batchSize) const;
159  size_t getPaddedNumCustomUpdateWUThreads(const CustomUpdateWUInternal &cg, unsigned int batchSize) const;
160  size_t getPaddedNumCustomUpdateTransposeWUThreads(const CustomUpdateWUInternal &cg, unsigned int batchSize) const;
161 
162  //--------------------------------------------------------------------------
163  // Static API
164  //--------------------------------------------------------------------------
165  static size_t getNumPresynapticUpdateThreads(const SynapseGroupInternal &sg, const PreferencesBase &preferences);
166  static size_t getNumPostsynapticUpdateThreads(const SynapseGroupInternal &sg);
167  static size_t getNumSynapseDynamicsThreads(const SynapseGroupInternal &sg);
168  static size_t getNumConnectivityInitThreads(const SynapseGroupInternal &sg);
169  static size_t getNumInitThreads(const SynapseGroupInternal &sg);
170  static size_t getNumInitThreads(const CustomUpdateWUInternal &cg);
171 
173 
174  static void addPresynapticUpdateStrategy(PresynapticUpdateStrategySIMT::Base *strategy);
175 
176  //--------------------------------------------------------------------------
177  // Constants
178  //--------------------------------------------------------------------------
179  static const char *KernelNames[KernelMax];
180 
181 protected:
182  //------------------------------------------------------------------------
183  // Protected API
184  //------------------------------------------------------------------------
185  void genNeuronPrevSpikeTimeUpdateKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged, size_t &idStart) const;
186  void genNeuronSpikeQueueUpdateKernel(CodeStream &os, const ModelSpecMerged &modelMerged, size_t &idStart) const;
187 
188  void genNeuronUpdateKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged, size_t &idStart) const;
189 
190  void genSynapseDendriticDelayUpdateKernel(CodeStream &os, const ModelSpecMerged &modelMerged, size_t &idStart) const;
191  void genPresynapticUpdateKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged, size_t &idStart) const;
192  void genPostsynapticUpdateKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged, size_t &idStart) const;
193  void genSynapseDynamicsKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged, size_t &idStart) const;
194 
195  void genCustomUpdateKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged,
196  const std::string &updateGroup, size_t &idStart) const;
197 
198  void genCustomUpdateWUKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged,
199  const std::string &updateGroup, size_t &idStart) const;
200 
201  void genCustomTransposeUpdateWUKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged,
202  const std::string &updateGroup, size_t &idStart) const;
203 
204  void genInitializeKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged, size_t &idStart) const;
205 
206  void genInitializeSparseKernel(CodeStream &os, const Substitutions &kernelSubs, const ModelSpecMerged &modelMerged,
207  size_t numInitializeThreads, size_t &idStart) const;
208 
210  void addDeviceType(const std::string &type, size_t size, const std::string &maxValue = "");
211 
213  bool isDeviceType(const std::string &type) const;
214 
216  size_t padKernelSize(size_t size, Kernel kernel) const;
217 
219  const KernelBlockSize &getKernelBlockSize() const { return m_KernelBlockSizes; }
220 
221 private:
222  //--------------------------------------------------------------------------
223  // Type definitions
224  //--------------------------------------------------------------------------
225  template<typename T>
226  using GetPaddedGroupSizeFunc = std::function<size_t(const T &)>;
227 
228  //--------------------------------------------------------------------------
229  // Private methods
230  //--------------------------------------------------------------------------
231  template<typename T, typename S, typename F>
232  void genParallelGroup(CodeStream &os, const Substitutions &kernelSubs, const std::vector<T> &groups, size_t &idStart,
233  S getPaddedSizeFunc, F filter, GroupHandler<T> handler) const
234  {
235  // Loop through groups
236  for(const auto &gMerge : groups) {
237  if(filter(gMerge)) {
238  // Sum padded sizes of each group within merged group
239  const size_t paddedSize = std::accumulate(
240  gMerge.getGroups().cbegin(), gMerge.getGroups().cend(), size_t{0},
241  [gMerge, getPaddedSizeFunc](size_t acc, std::reference_wrapper<const typename T::GroupInternal> g)
242  {
243  return (acc + getPaddedSizeFunc(g.get()));
244  });
245 
246  os << "// merged" << gMerge.getIndex() << std::endl;
247 
248  // If this is the first group
249  if(idStart == 0) {
250  os << "if(id < " << paddedSize << ")";
251  }
252  else {
253  os << "if(id >= " << idStart << " && id < " << idStart + paddedSize << ")";
254  }
255  {
256  CodeStream::Scope b(os);
257  Substitutions popSubs(&kernelSubs);
258 
259  if(gMerge.getGroups().size() == 1) {
260  os << getPointerPrefix() << "struct Merged" << T::name << "Group" << gMerge.getIndex() << " *group";
261  os << " = &d_merged" << T::name << "Group" << gMerge.getIndex() << "[0]; " << std::endl;
262  os << "const unsigned int lid = id - " << idStart << ";" << std::endl;
263 
264  // Use the starting thread ID of the whole merged group as group_start_id
265  popSubs.addVarSubstitution("group_start_id", std::to_string(idStart));
266  }
267  else {
268  // Perform bisect operation to get index of merged struct
269  os << "unsigned int lo = 0;" << std::endl;
270  os << "unsigned int hi = " << gMerge.getGroups().size() << ";" << std::endl;
271  os << "while(lo < hi)" << std::endl;
272  {
273  CodeStream::Scope b(os);
274  os << "const unsigned int mid = (lo + hi) / 2;" << std::endl;
275 
276  os << "if(id < d_merged" << T::name << "GroupStartID" << gMerge.getIndex() << "[mid])";
277  {
278  CodeStream::Scope b(os);
279  os << "hi = mid;" << std::endl;
280  }
281  os << "else";
282  {
283  CodeStream::Scope b(os);
284  os << "lo = mid + 1;" << std::endl;
285  }
286  }
287 
288  // Use this to get reference to merged group structure
289  os << getPointerPrefix() << "struct Merged" << T::name << "Group" << gMerge.getIndex() << " *group";
290  os << " = &d_merged" << T::name << "Group" << gMerge.getIndex() << "[lo - 1]; " << std::endl;
291 
292  // Get group start thread ID and use as group_start_id
293  os << "const unsigned int groupStartID = d_merged" << T::name << "GroupStartID" << gMerge.getIndex() << "[lo - 1];" << std::endl;
294  popSubs.addVarSubstitution("group_start_id", "groupStartID");
295 
296  // Use this to calculate local id within group
297  os << "const unsigned int lid = id - groupStartID;" << std::endl;
298  }
299  popSubs.addVarSubstitution("id", "lid");
300 
301  handler(os, gMerge, popSubs);
302 
303  idStart += paddedSize;
304  }
305  }
306  }
307  }
308 
309 
310  template<typename T, typename S>
311  void genParallelGroup(CodeStream &os, const Substitutions &kernelSubs, const std::vector<T> &groups, size_t &idStart,
312  S getPaddedSizeFunc, GroupHandler<T> handler) const
313  {
314  genParallelGroup(os, kernelSubs, groups, idStart, getPaddedSizeFunc,
315  [](const T &) { return true; }, handler);
316  }
317 
318  // Helper function to generate kernel code to initialise variables associated with synapse group or custom WU update with dense/kernel connectivity
319  template<typename G>
320  void genSynapseVarInit(CodeStream &os, const ModelSpecMerged &modelMerged, const G &g, Substitutions &popSubs,
321  bool initRNGRequired, bool kernel, size_t kernelDimensions) const
322  {
323  os << "if(" << popSubs["id"] << " < ";
324 
325  // If synapse group has kernel weights, check ID against product of kernel dimensions
326  if (kernel) {
327  // Loop through kernel dimensions and multiply together
328  os << "(";
329  for (size_t i = 0; i < kernelDimensions; i++) {
330  os << g.getKernelSize(i);
331  if (i != (kernelDimensions - 1)) {
332  os << " * ";
333  }
334  }
335  os << ")";
336  }
337  // Otherwise, against number of postsynaptic neurons
338  else {
339  os << "group->numTrgNeurons";
340  }
341  os << ")";
342  {
343  CodeStream::Scope b(os);
344 
345  // If an RNG is required for initialisation,
346  // make copy of global phillox RNG and skip ahead by thread id
347  // **NOTE** not LOCAL id
348  if(initRNGRequired) {
349  genGlobalRNGSkipAhead(os, popSubs, "id");
350  }
351 
352  // If synapse group has kernel weights
353  if (kernel) {
354  // Loop through kernel dimensions to generate seperate indices
355  for (size_t i = 0; i < kernelDimensions; i++) {
356  os << "const unsigned int kernelID" << i << " = (" << popSubs["id"];
357 
358  // If this isn't the last dimension
359  if (i < (kernelDimensions - 1)) {
360  // Loop backwards through other kernel and generate code to divide by product of subsequent dimensions
361  os << " / (";
362  for (size_t j = (kernelDimensions - 1); j > i; j--) {
363  os << g.getKernelSize(j);
364 
365  if (j != (i + 1)) {
366  os << " * ";
367  }
368  }
369  os << ")";
370  }
371  os << ")";
372 
373  // If this isn't the first dimension, take modulus of kernel size
374  if (i > 0) {
375  os << " % " << g.getKernelSize(i);
376  }
377 
378  os << ";" << std::endl;
379 
380  // Add substitution
381  popSubs.addVarSubstitution("id_kernel_" + std::to_string(i), "kernelID" + std::to_string(i));
382  }
383  }
384  // Otherwise, just substitute postsynaptic index
385  else {
386  popSubs.addVarSubstitution("id_post", popSubs["id"]);
387  }
388 
389  // Generate init code
390  g.generateInit(*this, os, modelMerged, popSubs);
391  }
392  }
393 
394  // Helper function to generate kernel code to initialise variables associated with synapse group or custom WU update with sparse connectivity
395  template<typename G>
396  void genSparseSynapseVarInit(CodeStream &os, const ModelSpecMerged &modelMerged, const G &g, Substitutions &popSubs,
397  bool varInitRequired, GroupHandler<G> handler) const
398  {
399  // Calculate how many blocks rows need to be processed in (in order to store row lengths in shared memory)
400  const size_t blockSize = getKernelBlockSize(KernelInitializeSparse);
401  os << "const unsigned int numBlocks = (group->numSrcNeurons + " << blockSize << " - 1) / " << blockSize << ";" << std::endl;
402 
403  os << "unsigned int idx = " << popSubs["id"] << ";" << std::endl;
404 
405  // Loop through blocks
406  os << "for(unsigned int r = 0; r < numBlocks; r++)";
407  {
408  CodeStream::Scope b(os);
409 
410  // Calculate number of rows to process in this block
411  os << "const unsigned numRowsInBlock = (r == (numBlocks - 1))";
412  os << " ? ((group->numSrcNeurons - 1) % " << blockSize << ") + 1";
413  os << " : " << blockSize << ";" << std::endl;
414 
415  // Use threads to copy block of sparse structure into shared memory
416  genSharedMemBarrier(os);
417  os << "if (" << getThreadID() << " < numRowsInBlock)";
418  {
419  CodeStream::Scope b(os);
420  os << "shRowLength[" << getThreadID() << "] = group->rowLength[(r * " << blockSize << ") + " << getThreadID() << "];" << std::endl;
421  }
422  genSharedMemBarrier(os);
423 
424  // Loop through rows
425  os << "for(unsigned int i = 0; i < numRowsInBlock; i++)";
426  {
427  CodeStream::Scope b(os);
428 
429  // If there is a synapse for this thread to initialise
430  os << "if(" << popSubs["id"] << " < shRowLength[i])";
431  {
432  CodeStream::Scope b(os);
433 
434  // Generate initialisation code
435  if(varInitRequired) {
436  popSubs.addVarSubstitution("id_pre", "((r * " + std::to_string(blockSize) + ") + i)");
437  popSubs.addVarSubstitution("id_post", "group->ind[idx]");
438  g.generateInit(*this, os, modelMerged, popSubs);
439  }
440 
441  // Call handler
442  handler(os, g, popSubs);
443  }
444 
445  // If matrix is ragged, advance index to next row by adding stride
446  os << "idx += group->rowStride;" << std::endl;
447  }
448  }
449  }
450 
451  void genEmitSpike(CodeStream &os, const Substitutions &subs, const std::string &suffix, bool recordingEnabled) const;
452 
453  void genRecordingSharedMemInit(CodeStream &os, const std::string &suffix) const;
454 
455  void genSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const;
456 
457  // Get appropriate presynaptic update strategy to use for this synapse group
458  const PresynapticUpdateStrategySIMT::Base *getPresynapticUpdateStrategy(const SynapseGroupInternal &sg) const
459  {
460  return getPresynapticUpdateStrategy(sg, getPreferences());
461  }
462 
463  //--------------------------------------------------------------------------
464  // Private static methods
465  //--------------------------------------------------------------------------
466  // Get appropriate presynaptic update strategy to use for this synapse group
467  static const PresynapticUpdateStrategySIMT::Base *getPresynapticUpdateStrategy(const SynapseGroupInternal &sg,
468  const PreferencesBase &preferences);
469 
470  //--------------------------------------------------------------------------
471  // Members
472  //--------------------------------------------------------------------------
473  const KernelBlockSize m_KernelBlockSizes;
474 
476  std::unordered_set<std::string> m_DeviceTypes;
477 
478  //--------------------------------------------------------------------------
479  // Static members
480  //--------------------------------------------------------------------------
481  static std::vector<PresynapticUpdateStrategySIMT::Base *> s_PresynapticUpdateStrategies;
482 };
483 
484 } // namespace CodeGenerator
Base class for Single Instruction Multiple Thread style backends.
Definition: backendSIMT.h:51
Definition: backendSIMT.h:28
const KernelBlockSize & getKernelBlockSize() const
Get kernel block size.
Definition: backendSIMT.h:219
Definition: backendSIMT.h:37
Definition: backendSIMT.h:31
#define GENN_EXPORT
Definition: gennExport.h:13
virtual void genDenseSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final
Definition: backendSIMT.h:132
Base class for backend preferences - can be accessed via a global in &#39;classic&#39; C++ code generator...
Definition: backendBase.h:58
Definition: backendSIMT.h:26
Definition: modelSpecMerged.h:31
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:30
Definition: synapseGroupInternal.h:9
Definition: codeStream.h:21
virtual bool isPostsynapticRemapRequired() const final
Different backends may implement synaptic plasticity differently. Does this one require a postsynapti...
Definition: backendSIMT.h:147
Definition: substitutions.h:21
virtual bool isDeviceScalarRequired() const final
Should &#39;scalar&#39; variables be implemented on device or can host variables be used directly?
Definition: backendSIMT.h:141
void addVarSubstitution(const std::string &source, const std::string &destionation, bool allowOverride=false)
Definition: substitutions.cc:25
AtomicMemSpace
What memory space atomic operation is required.
Definition: backendSIMT.h:70
Definition: backendBase.h:176
BackendSIMT(const KernelBlockSize &kernelBlockSizes, const PreferencesBase &preferences, const std::string &scalarType)
Definition: backendSIMT.h:54
Definition: initGroupMerged.h:329
Definition: customUpdateInternal.h:41
Definition: backendSIMT.h:29
Definition: initGroupMerged.h:100
Kernel
Kernels generated by SIMT backends.
Definition: backendSIMT.h:24
AtomicOperation
What atomic operation is required.
Definition: backendSIMT.h:63
std::function< void(CodeStream &, const T &, Substitutions &)> GroupHandler
Definition: backendBase.h:187
virtual bool isPopulationRNGRequired() const final
Different backends use different RNGs for different things. Does this one require population RNGs...
Definition: backendSIMT.h:145
Definition: presynapticUpdateStrategySIMT.h:22
Definition: backendSIMT.h:35
virtual std::string getDeviceVarPrefix() const final
Definition: backendSIMT.h:122
Definition: customUpdateInternal.h:9
virtual void genSparseSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final
Definition: backendSIMT.h:127
Definition: backendSIMT.h:27
std::function< void(CodeStream &, Substitutions &)> Handler
Definition: backendBase.h:184
Definition: backendSIMT.h:30
This variable should be duplicated in each batch.
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: backendSIMT.h:44
Definition: codeStream.h:94
size_t getKernelBlockSize(Kernel kernel) const
Definition: backendSIMT.h:156
Definition: backendSIMT.h:36