12 #include "../../../../share/genn/backends/opencl/cl2.hpp" 67 std::fill(manualWorkGroupSizes.begin(), manualWorkGroupSizes.end(), 32);
74 unsigned int manualPlatformID = 0;
80 unsigned int manualDeviceID = 0;
96 const std::string &scalarType,
unsigned int platformIndex,
unsigned int deviceIndex);
102 virtual bool areSharedMemAtomicsSlow()
const override;
108 virtual std::string
getThreadID(
unsigned int axis = 0)
const override{
return "get_local_id(" + std::to_string(axis) +
")"; }
111 virtual std::string
getBlockID(
unsigned int axis = 0)
const override{
return "get_group_id(" + std::to_string(axis) +
")"; }
114 virtual std::string
getCLZ()
const override {
return "clz"; }
117 virtual std::string getAtomic(
const std::string &type,
AtomicOperation op = AtomicOperation::ADD,
121 virtual void genSharedMemBarrier(
CodeStream &os)
const override;
124 virtual void genPopulationRNGInit(
CodeStream &os,
const std::string &globalRNG,
const std::string &seed,
const std::string &sequence)
const override;
127 virtual void genPopulationRNGPreamble(
CodeStream &os,
Substitutions &subs,
const std::string &globalRNG,
const std::string &name =
"rng")
const override;
131 virtual void genPopulationRNGPostamble(
CodeStream &os,
const std::string &globalRNG)
const override;
134 virtual void genGlobalRNGSkipAhead(
CodeStream &os,
Substitutions &subs,
const std::string &sequence,
const std::string &name =
"rng")
const override;
158 virtual void genVariableDefinition(
CodeStream &definitions,
CodeStream &definitionsInternal,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
159 virtual void genVariableImplementation(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
160 virtual void genVariableAllocation(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc,
size_t count,
MemAlloc &memAlloc)
const override;
161 virtual void genVariableFree(
CodeStream &os,
const std::string &name,
VarLocation loc)
const override;
163 virtual void genExtraGlobalParamDefinition(
CodeStream &definitions,
CodeStream &definitionsInternal,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
164 virtual void genExtraGlobalParamImplementation(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
165 virtual void genExtraGlobalParamAllocation(
CodeStream &os,
const std::string &type,
const std::string &name,
166 VarLocation loc,
const std::string &countVarName =
"count",
const std::string &prefix =
"")
const override;
167 virtual void genExtraGlobalParamPush(
CodeStream &os,
const std::string &type,
const std::string &name,
168 VarLocation loc,
const std::string &countVarName =
"count",
const std::string &prefix =
"")
const override;
169 virtual void genExtraGlobalParamPull(
CodeStream &os,
const std::string &type,
const std::string &name,
170 VarLocation loc,
const std::string &countVarName =
"count",
const std::string &prefix =
"")
const override;
173 virtual void genMergedExtraGlobalParamPush(
CodeStream &os,
const std::string &suffix,
size_t mergedGroupIdx,
174 const std::string &groupIdx,
const std::string &fieldName,
175 const std::string &egpName)
const override;
178 virtual std::string getMergedGroupFieldHostType(
const std::string &type)
const override;
183 virtual void genVariablePush(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc,
bool autoInitialized,
size_t count)
const override;
184 virtual void genVariablePull(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc,
size_t count)
const override;
187 const std::string &name,
VarLocation loc,
unsigned int batchSize)
const override;
189 const std::string &name,
VarLocation loc,
unsigned int batchSize)
const override;
193 genCurrentSpikePush(os, ng, batchSize,
false);
197 genCurrentSpikePull(os, ng, batchSize,
false);
201 genCurrentSpikePush(os, ng, batchSize,
true);
205 genCurrentSpikePull(os, ng, batchSize,
true);
211 CodeStream &free,
const std::string &name,
size_t count,
MemAlloc &memAlloc)
const override;
214 const std::string &name,
bool updateInStepTime)
const override;
217 virtual void genReturnFreeDeviceMemoryBytes(
CodeStream &os)
const override;
219 virtual void genMakefilePreamble(std::ostream &os)
const override;
220 virtual void genMakefileLinkRule(std::ostream &os)
const override;
221 virtual void genMakefileCompileRule(std::ostream &os)
const override;
223 virtual void genMSBuildConfigProperties(std::ostream &os)
const override;
224 virtual void genMSBuildImportProps(std::ostream &os)
const override;
225 virtual void genMSBuildItemDefinitions(std::ostream &os)
const override;
226 virtual void genMSBuildCompileModule(
const std::string &moduleName, std::ostream &os)
const override;
227 virtual void genMSBuildImportTarget(std::ostream &os)
const override;
231 virtual std::vector<filesystem::path> getFilesToCopy(
const ModelSpecMerged &modelMerged)
const override;
246 virtual size_t getDeviceMemoryBytes()
const override {
return m_ChosenDevice.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>(); }
256 virtual boost::uuids::detail::sha1::digest_type getHashDigest()
const override;
263 std::string getFloatAtomicAdd(
const std::string &ftype,
const char* memoryType =
"global")
const;
273 for(
const auto &g : groups) {
275 assert(g.getMemorySpace().empty());
278 const std::string buildKernelName =
"build" + T::name + std::to_string(g.getIndex()) +
"Kernel";
279 os <<
"cl::Kernel " << buildKernelName <<
";" << std::endl;
282 os <<
"cl::Buffer d_merged" << T::name <<
"Group" << g.getIndex() <<
";" << std::endl;
285 os <<
"void pushMerged" << T::name <<
"Group" << g.getIndex() <<
"ToDevice(unsigned int idx, ";
286 g.generateStructFieldArgumentDefinitions(os, *
this);
292 os <<
"CHECK_OPENCL_ERRORS(" << buildKernelName <<
".setArg(1, idx));" << std::endl;
295 const auto sortedFields = g.getSortedFields(*
this);
296 for(
size_t fieldIndex = 0; fieldIndex < sortedFields.size(); fieldIndex++) {
297 const auto &f = sortedFields[fieldIndex];
299 os <<
"CHECK_OPENCL_ERRORS(" << buildKernelName <<
".setArg(" << (2 + fieldIndex) <<
", " << std::get<1>(f) <<
"));" << std::endl;
303 os <<
"const cl::NDRange globalWorkSize(1, 1);" << std::endl;
304 os <<
"const cl::NDRange localWorkSize(1, 1);" << std::endl;
305 os <<
"CHECK_OPENCL_ERRORS(commandQueue.enqueueNDRangeKernel(" << buildKernelName <<
", cl::NullRange, globalWorkSize, localWorkSize));" << std::endl;
309 if(!groups.empty()) {
314 for(
auto f : mergedGroupFields) {
315 os <<
"cl::Kernel setMerged" << T::name << f.mergedGroupIndex << f.fieldName <<
"Kernel;" << std::endl;
321 void genMergedStructBuild(
CodeStream &os,
const ModelSpecMerged &modelMerged,
const std::vector<T> &groups,
const std::string &programName)
const 324 for(
const auto &g : groups) {
326 const std::string kernelName =
"build" + T::name + std::to_string(g.getIndex()) +
"Kernel";
327 os <<
"CHECK_OPENCL_ERRORS_POINTER(" << kernelName <<
" = cl::Kernel(" << programName <<
", \"" << kernelName <<
"\", &error));" << std::endl;
330 os <<
"CHECK_OPENCL_ERRORS_POINTER(d_merged" << T::name <<
"Group" << g.getIndex() <<
" = cl::Buffer(clContext, CL_MEM_READ_WRITE, size_t{" << g.getStructArraySize(*
this) <<
"}, nullptr, &error));" << std::endl;
333 os <<
"CHECK_OPENCL_ERRORS(" << kernelName <<
".setArg(0, d_merged" << T::name <<
"Group" << g.getIndex() <<
"));" << std::endl;
337 if(!groups.empty()) {
342 for(
auto f : mergedGroupFields) {
344 const std::string kernelName =
"setMerged" + T::name + std::to_string(f.mergedGroupIndex) + f.fieldName +
"Kernel";
345 os <<
"CHECK_OPENCL_ERRORS_POINTER(" << kernelName <<
" = cl::Kernel(" << programName <<
", \"" << kernelName <<
"\", &error));" << std::endl;
348 os <<
"CHECK_OPENCL_ERRORS(" << kernelName <<
".setArg(0, d_merged" << T::name <<
"Group" << f.mergedGroupIndex <<
"));" << std::endl;
358 for(
const auto &g : groups) {
360 os <<
"__kernel void build" << T::name << g.getIndex() <<
"Kernel(";
361 os <<
"__global struct Merged" << T::name <<
"Group" << g.getIndex() <<
" *group, unsigned int idx, ";
364 const auto sortedFields = g.getSortedFields(*
this);
365 for(
size_t fieldIndex = 0; fieldIndex < sortedFields.size(); fieldIndex++) {
366 const auto &f = sortedFields[fieldIndex];
370 os << std::get<0>(f) <<
" " << std::get<1>(f);
371 if(fieldIndex != (sortedFields.size() - 1)) {
380 for(
const auto &f : sortedFields) {
381 os <<
"group[idx]." << std::get<1>(f) <<
" = " << std::get<1>(f) <<
";" << std::endl;
387 if(!groups.empty()) {
392 for(
auto f : mergedGroupFields) {
394 os <<
"__kernel void setMerged" << T::name << f.mergedGroupIndex << f.fieldName <<
"Kernel(";
395 os <<
"__global struct Merged" << T::name <<
"Group" << f.mergedGroupIndex <<
" *group, unsigned int idx, ";
399 os << f.type <<
" " << f.fieldName <<
")";
402 os <<
"group[idx]." << f.fieldName <<
" = " << f.fieldName <<
";" << std::endl;
409 void genAtomicAddFloat(
CodeStream &os,
const std::string &memoryType)
const;
413 genCurrentSpikePushPull(os, ng, batchSize, spikeEvent,
true);
418 genCurrentSpikePushPull(os, ng, batchSize, spikeEvent,
false);
423 void genKernelDimensions(
CodeStream &os,
Kernel kernel,
size_t numThreadsX,
size_t batchSize,
size_t numBlockThreadsY = 1)
const;
428 void genBuildProgramFlagsString(
CodeStream &os)
const;
430 void divideKernelStreamInParts(
CodeStream &os,
const std::stringstream &kernelCode,
size_t partLength)
const;
433 bool isChosenDeviceAMD()
const;
436 bool isChosenDeviceNVIDIA()
const;
441 bool isChosenPlatformNVIDIA()
const;
445 bool shouldUseSubBufferAllocations()
const;
450 const unsigned int m_ChosenPlatformIndex;
451 const unsigned int m_ChosenDeviceIndex;
452 unsigned int m_AllocationAlignementBytes;
453 cl::Device m_ChosenDevice;
454 cl::Platform m_ChosenPlatform;
DeviceSelect
Methods for selecting OpenCL device.
Definition: opencl/backend.h:44
Definition: neuronGroupInternal.h:9
Base class for Single Instruction Multiple Thread style backends.
Definition: backendSIMT.h:51
virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling true spikes emitted by a neuron group in the current timestep from the 'dev...
Definition: opencl/backend.h:195
VarLocation
< Flags defining which memory space variables should be allocated in
Definition: variableMode.h:10
virtual std::string getMergedGroupSimRNGType() const override
When generating merged structures what type to use for simulation RNGs.
Definition: opencl/backend.h:181
Definition: opencl/backend.h:92
virtual std::string getHostVarPrefix() const final
Definition: opencl/backend.h:235
virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing spike-like events emitted by a neuron group in the current timestep to the ...
Definition: opencl/backend.h:199
KernelBlockSize manualWorkGroupSizes
If block size select method is set to BlockSizeSelect::MANUAL, block size to use for each kernel...
Definition: opencl/backend.h:86
virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing true spikes emitted by a neuron group in the current timestep to the 'devic...
Definition: opencl/backend.h:191
std::set< EGPField > getMergedGroupFields() const
Definition: modelSpecMerged.h:242
Base class for backend preferences - can be accessed via a global in 'classic' C++ code generator...
Definition: backendBase.h:58
virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling spike-like events emitted by a neuron group in the current timestep from th...
Definition: opencl/backend.h:203
virtual std::string getPointerPrefix() const override
Different backends may have different or no pointer prefix (e.g. __global for OpenCL) ...
Definition: opencl/backend.h:237
Definition: modelSpecMerged.h:31
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:30
virtual bool isPopulationRNGInitialisedOnDevice() const override
Different backends seed RNGs in different ways. Does this one initialise population RNGS on device...
Definition: opencl/backend.h:240
virtual std::string getThreadID(unsigned int axis=0) const override
Get the ID of the current thread within the threadblock.
Definition: opencl/backend.h:108
std::vector< std::pair< std::string, size_t > > MemorySpaces
Vector of prefixes required to allocate in memory space and size of memory space. ...
Definition: backendBase.h:190
WorkGroupSizeSelect
Methods for selecting OpenCL kernel workgroup size.
Definition: opencl/backend.h:54
Definition: codeStream.h:21
#define BACKEND_EXPORT
Definition: backendExport.h:13
Definition: backendBase.h:107
Definition: substitutions.h:21
virtual std::string getCLZ() const override
Get the name of the count-leading-zeros function.
Definition: opencl/backend.h:114
virtual std::string getBlockID(unsigned int axis=0) const override
Get the ID of the current thread block.
Definition: opencl/backend.h:111
AtomicMemSpace
What memory space atomic operation is required.
Definition: backendSIMT.h:70
GENN_EXPORT bool isTypePointer(const std::string &type)
Function to determine whether a string containing a type is a pointer.
Definition: gennUtils.cc:75
std::function< void(CodeStream &)> HostHandler
Definition: backendBase.h:182
virtual size_t getDeviceMemoryBytes() const override
How many bytes of memory does 'device' have.
Definition: opencl/backend.h:246
const cl::Device & getChosenOpenCLDevice() const
Definition: opencl/backend.h:261
Preferences()
Definition: opencl/backend.h:65
virtual bool supportsNamespace() const override
Does this backend support namespaces i.e. can C++ implementation of support functions be used...
Definition: opencl/backend.h:253
virtual std::string getSharedPrefix() const override
Get the prefix to use for shared memory variables.
Definition: opencl/backend.h:105
Kernel
Kernels generated by SIMT backends.
Definition: backendSIMT.h:24
AtomicOperation
What atomic operation is required.
Definition: backendSIMT.h:63
virtual bool isHostReductionRequired() const override
Backends which support batch-parallelism might require an additional host reduction phase after reduc...
Definition: opencl/backend.h:243
Definition: generateModules.h:16
Preferences for OpenCL backend.
Definition: opencl/backend.h:63
PlatformSelect
Methods for selecting OpenCL platform.
Definition: opencl/backend.h:38
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: backendSIMT.h:44
Definition: codeStream.h:94