6 #include <unordered_set> 55 const std::string &scalarType)
56 :
BackendBase(scalarType, preferences), m_KernelBlockSizes(kernelBlockSizes)
80 virtual bool areSharedMemAtomicsSlow()
const = 0;
83 virtual std::string getSharedPrefix()
const = 0;
86 virtual std::string getThreadID(
unsigned int axis = 0)
const = 0;
89 virtual std::string getBlockID(
unsigned int axis = 0)
const = 0;
92 virtual std::string getCLZ()
const = 0;
95 virtual std::string getAtomic(
const std::string &type,
AtomicOperation op = AtomicOperation::ADD,
99 virtual void genSharedMemBarrier(
CodeStream &os)
const = 0;
102 virtual void genPopulationRNGInit(
CodeStream &os,
const std::string &globalRNG,
const std::string &seed,
const std::string &sequence)
const = 0;
105 virtual void genPopulationRNGPreamble(
CodeStream &os,
Substitutions &subs,
const std::string &globalRNG,
const std::string &name =
"rng")
const = 0;
109 virtual void genPopulationRNGPostamble(
CodeStream &os,
const std::string &globalRNG)
const = 0;
112 virtual void genGlobalRNGSkipAhead(
CodeStream &os,
Substitutions &subs,
const std::string &sequence,
const std::string &name =
"rng")
const = 0;
122 virtual std::string
getDeviceVarPrefix() const final {
return getPreferences().automaticCopy ?
"" :
"d_"; }
125 virtual void genVariableInit(
CodeStream &os,
const std::string &count,
const std::string &indexVarName,
129 genSynapseVariableRowInit(os, kernelSubs, handler);
134 genSynapseVariableRowInit(os, kernelSubs, handler);
143 virtual bool isGlobalHostRNGRequired(
const ModelSpecMerged &modelMerged)
const final;
144 virtual bool isGlobalDeviceRNGRequired(
const ModelSpecMerged &modelMerged)
const final;
154 size_t getNumInitialisationRNGStreams(
const ModelSpecMerged & modelMerged)
const;
158 size_t getPaddedNumCustomUpdateThreads(
const CustomUpdateInternal &cg,
unsigned int batchSize)
const;
160 size_t getPaddedNumCustomUpdateTransposeWUThreads(
const CustomUpdateWUInternal &cg,
unsigned int batchSize)
const;
196 const std::string &updateGroup,
size_t &idStart)
const;
199 const std::string &updateGroup,
size_t &idStart)
const;
202 const std::string &updateGroup,
size_t &idStart)
const;
207 size_t numInitializeThreads,
size_t &idStart)
const;
210 void addDeviceType(
const std::string &type,
size_t size,
const std::string &maxValue =
"");
213 bool isDeviceType(
const std::string &type)
const;
216 size_t padKernelSize(
size_t size,
Kernel kernel)
const;
226 using GetPaddedGroupSizeFunc = std::function<size_t(const T &)>;
231 template<
typename T,
typename S,
typename F>
232 void genParallelGroup(
CodeStream &os,
const Substitutions &kernelSubs,
const std::vector<T> &groups,
size_t &idStart,
236 for(
const auto &gMerge : groups) {
239 const size_t paddedSize = std::accumulate(
240 gMerge.getGroups().cbegin(), gMerge.getGroups().cend(),
size_t{0},
241 [gMerge, getPaddedSizeFunc](
size_t acc, std::reference_wrapper<const typename T::GroupInternal> g)
243 return (acc + getPaddedSizeFunc(g.get()));
246 os <<
"// merged" << gMerge.getIndex() << std::endl;
250 os <<
"if(id < " << paddedSize <<
")";
253 os <<
"if(id >= " << idStart <<
" && id < " << idStart + paddedSize <<
")";
259 if(gMerge.getGroups().size() == 1) {
260 os << getPointerPrefix() <<
"struct Merged" << T::name <<
"Group" << gMerge.getIndex() <<
" *group";
261 os <<
" = &d_merged" << T::name <<
"Group" << gMerge.getIndex() <<
"[0]; " << std::endl;
262 os <<
"const unsigned int lid = id - " << idStart <<
";" << std::endl;
269 os <<
"unsigned int lo = 0;" << std::endl;
270 os <<
"unsigned int hi = " << gMerge.getGroups().size() <<
";" << std::endl;
271 os <<
"while(lo < hi)" << std::endl;
274 os <<
"const unsigned int mid = (lo + hi) / 2;" << std::endl;
276 os <<
"if(id < d_merged" << T::name <<
"GroupStartID" << gMerge.getIndex() <<
"[mid])";
279 os <<
"hi = mid;" << std::endl;
284 os <<
"lo = mid + 1;" << std::endl;
289 os << getPointerPrefix() <<
"struct Merged" << T::name <<
"Group" << gMerge.getIndex() <<
" *group";
290 os <<
" = &d_merged" << T::name <<
"Group" << gMerge.getIndex() <<
"[lo - 1]; " << std::endl;
293 os <<
"const unsigned int groupStartID = d_merged" << T::name <<
"GroupStartID" << gMerge.getIndex() <<
"[lo - 1];" << std::endl;
297 os <<
"const unsigned int lid = id - groupStartID;" << std::endl;
301 handler(os, gMerge, popSubs);
303 idStart += paddedSize;
310 template<
typename T,
typename S>
311 void genParallelGroup(
CodeStream &os,
const Substitutions &kernelSubs,
const std::vector<T> &groups,
size_t &idStart,
314 genParallelGroup(os, kernelSubs, groups, idStart, getPaddedSizeFunc,
315 [](
const T &) {
return true; }, handler);
321 bool initRNGRequired,
bool kernel,
size_t kernelDimensions)
const 323 os <<
"if(" << popSubs[
"id"] <<
" < ";
329 for (
size_t i = 0; i < kernelDimensions; i++) {
330 os << g.getKernelSize(i);
331 if (i != (kernelDimensions - 1)) {
339 os <<
"group->numTrgNeurons";
348 if(initRNGRequired) {
349 genGlobalRNGSkipAhead(os, popSubs,
"id");
355 for (
size_t i = 0; i < kernelDimensions; i++) {
356 os <<
"const unsigned int kernelID" << i <<
" = (" << popSubs[
"id"];
359 if (i < (kernelDimensions - 1)) {
362 for (
size_t j = (kernelDimensions - 1); j > i; j--) {
363 os << g.getKernelSize(j);
375 os <<
" % " << g.getKernelSize(i);
378 os <<
";" << std::endl;
381 popSubs.addVarSubstitution(
"id_kernel_" + std::to_string(i),
"kernelID" + std::to_string(i));
386 popSubs.addVarSubstitution(
"id_post", popSubs[
"id"]);
390 g.generateInit(*
this, os, modelMerged, popSubs);
401 os <<
"const unsigned int numBlocks = (group->numSrcNeurons + " << blockSize <<
" - 1) / " << blockSize <<
";" << std::endl;
403 os <<
"unsigned int idx = " << popSubs[
"id"] <<
";" << std::endl;
406 os <<
"for(unsigned int r = 0; r < numBlocks; r++)";
411 os <<
"const unsigned numRowsInBlock = (r == (numBlocks - 1))";
412 os <<
" ? ((group->numSrcNeurons - 1) % " << blockSize <<
") + 1";
413 os <<
" : " << blockSize <<
";" << std::endl;
416 genSharedMemBarrier(os);
417 os <<
"if (" << getThreadID() <<
" < numRowsInBlock)";
420 os <<
"shRowLength[" << getThreadID() <<
"] = group->rowLength[(r * " << blockSize <<
") + " << getThreadID() <<
"];" << std::endl;
422 genSharedMemBarrier(os);
425 os <<
"for(unsigned int i = 0; i < numRowsInBlock; i++)";
430 os <<
"if(" << popSubs[
"id"] <<
" < shRowLength[i])";
435 if(varInitRequired) {
436 popSubs.addVarSubstitution(
"id_pre",
"((r * " + std::to_string(blockSize) +
") + i)");
437 popSubs.addVarSubstitution(
"id_post",
"group->ind[idx]");
438 g.generateInit(*
this, os, modelMerged, popSubs);
442 handler(os, g, popSubs);
446 os <<
"idx += group->rowStride;" << std::endl;
451 void genEmitSpike(
CodeStream &os,
const Substitutions &subs,
const std::string &suffix,
bool recordingEnabled)
const;
453 void genRecordingSharedMemInit(
CodeStream &os,
const std::string &suffix)
const;
460 return getPresynapticUpdateStrategy(sg, getPreferences());
476 std::unordered_set<std::string> m_DeviceTypes;
481 static std::vector<PresynapticUpdateStrategySIMT::Base *> s_PresynapticUpdateStrategies;
Base class for Single Instruction Multiple Thread style backends.
Definition: backendSIMT.h:51
Definition: backendSIMT.h:28
const KernelBlockSize & getKernelBlockSize() const
Get kernel block size.
Definition: backendSIMT.h:219
Definition: backendSIMT.h:37
Definition: backendSIMT.h:31
#define GENN_EXPORT
Definition: gennExport.h:13
virtual void genDenseSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final
Definition: backendSIMT.h:132
Base class for backend preferences - can be accessed via a global in 'classic' C++ code generator...
Definition: backendBase.h:58
Definition: backendSIMT.h:26
Definition: modelSpecMerged.h:31
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:30
Definition: synapseGroupInternal.h:9
Definition: codeStream.h:21
Definition: backendSIMT.h:34
virtual bool isPostsynapticRemapRequired() const final
Different backends may implement synaptic plasticity differently. Does this one require a postsynapti...
Definition: backendSIMT.h:147
Definition: substitutions.h:21
virtual bool isDeviceScalarRequired() const final
Should 'scalar' variables be implemented on device or can host variables be used directly?
Definition: backendSIMT.h:141
void addVarSubstitution(const std::string &source, const std::string &destionation, bool allowOverride=false)
Definition: substitutions.cc:25
AtomicMemSpace
What memory space atomic operation is required.
Definition: backendSIMT.h:70
Definition: backendBase.h:176
BackendSIMT(const KernelBlockSize &kernelBlockSizes, const PreferencesBase &preferences, const std::string &scalarType)
Definition: backendSIMT.h:54
Definition: initGroupMerged.h:329
Definition: customUpdateInternal.h:41
Definition: backendSIMT.h:29
Definition: initGroupMerged.h:100
Kernel
Kernels generated by SIMT backends.
Definition: backendSIMT.h:24
AtomicOperation
What atomic operation is required.
Definition: backendSIMT.h:63
std::function< void(CodeStream &, const T &, Substitutions &)> GroupHandler
Definition: backendBase.h:187
virtual bool isPopulationRNGRequired() const final
Different backends use different RNGs for different things. Does this one require population RNGs...
Definition: backendSIMT.h:145
Definition: presynapticUpdateStrategySIMT.h:22
Definition: backendSIMT.h:35
virtual std::string getDeviceVarPrefix() const final
Definition: backendSIMT.h:122
Definition: customUpdateInternal.h:9
virtual void genSparseSynapseVariableRowInit(CodeStream &os, const Substitutions &kernelSubs, Handler handler) const final
Definition: backendSIMT.h:127
Definition: backendSIMT.h:32
Definition: backendSIMT.h:27
std::function< void(CodeStream &, Substitutions &)> Handler
Definition: backendBase.h:184
Definition: backendSIMT.h:30
This variable should be duplicated in each batch.
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: backendSIMT.h:44
Definition: backendSIMT.h:33
Definition: codeStream.h:94
size_t getKernelBlockSize(Kernel kernel) const
Definition: backendSIMT.h:156
Definition: backendSIMT.h:36