GeNN  4.9.0
GPU enhanced Neuronal Networks (GeNN)
opencl/backend.h
Go to the documentation of this file.
1 #pragma once
2 
3 // Standard C++ includes
4 #include <algorithm>
5 #include <array>
6 #include <functional>
7 #include <map>
8 #include <numeric>
9 #include <string>
10 
11 // OpenCL includes
12 #include "../../../../share/genn/backends/opencl/cl2.hpp"
13 
14 // GeNN includes
15 #include "backendExport.h"
16 #include "gennUtils.h"
17 
18 // GeNN code generator includes
23 
24 // Forward declarations
25 namespace filesystem
26 {
27  class path;
28 }
29 
30 //--------------------------------------------------------------------------
31 // CodeGenerator::OpenCL::DeviceSelectMethod
32 //--------------------------------------------------------------------------
33 namespace CodeGenerator
34 {
35 namespace OpenCL
36 {
38 enum class PlatformSelect
39 {
40  MANUAL,
41 };
42 
44 enum class DeviceSelect
45 {
46  MOST_MEMORY,
47  MANUAL,
48 };
49 
50 //--------------------------------------------------------------------------
51 // CodeGenerator::OpenCL::WorkGroupSizeSelect
52 //--------------------------------------------------------------------------
55 {
56  MANUAL,
57 };
58 
59 //--------------------------------------------------------------------------
60 // CodeGenerator::OpenCL::Preferences
61 //--------------------------------------------------------------------------
64 {
66  {
67  std::fill(manualWorkGroupSizes.begin(), manualWorkGroupSizes.end(), 32);
68  }
69 
71  PlatformSelect platformSelectMethod = PlatformSelect::MANUAL;
72 
74  unsigned int manualPlatformID = 0;
75 
77  DeviceSelect deviceSelectMethod = DeviceSelect::MOST_MEMORY;
78 
80  unsigned int manualDeviceID = 0;
81 
83  WorkGroupSizeSelect workGroupSizeSelectMethod = WorkGroupSizeSelect::MANUAL;
84 
87 };
88 
89 //--------------------------------------------------------------------------
90 // CodeGenerator::OpenCL::Backend
91 //--------------------------------------------------------------------------
93 {
94 public:
95  Backend(const KernelBlockSize &kernelWorkGroupSizes, const Preferences &preferences,
96  const std::string &scalarType, unsigned int platformIndex, unsigned int deviceIndex);
97 
98  //--------------------------------------------------------------------------
99  // CodeGenerator::BackendSIMT virtuals
100  //--------------------------------------------------------------------------
102  virtual bool areSharedMemAtomicsSlow() const override;
103 
105  virtual std::string getSharedPrefix() const override { return "__local "; }
106 
108  virtual std::string getThreadID(unsigned int axis = 0) const override{ return "get_local_id(" + std::to_string(axis) + ")"; }
109 
111  virtual std::string getBlockID(unsigned int axis = 0) const override{ return "get_group_id(" + std::to_string(axis) + ")"; }
112 
114  virtual std::string getCLZ() const override { return "clz"; }
115 
117  virtual std::string getAtomic(const std::string &type, AtomicOperation op = AtomicOperation::ADD,
118  AtomicMemSpace memSpace = AtomicMemSpace::GLOBAL) const override;
119 
121  virtual void genSharedMemBarrier(CodeStream &os) const override;
122 
124  virtual void genPopulationRNGInit(CodeStream &os, const std::string &globalRNG, const std::string &seed, const std::string &sequence) const override;
125 
127  virtual void genPopulationRNGPreamble(CodeStream &os, Substitutions &subs, const std::string &globalRNG, const std::string &name = "rng") const override;
128 
130 
131  virtual void genPopulationRNGPostamble(CodeStream &os, const std::string &globalRNG) const override;
132 
134  virtual void genGlobalRNGSkipAhead(CodeStream &os, Substitutions &subs, const std::string &sequence, const std::string &name = "rng") const override;
135 
136  //--------------------------------------------------------------------------
137  // CodeGenerator::BackendBase:: virtuals
138  //--------------------------------------------------------------------------
139  virtual void genNeuronUpdate(CodeStream &os, const ModelSpecMerged &modelMerged,
140  HostHandler preambleHandler, HostHandler pushEGPHandler) const override;
141 
142  virtual void genSynapseUpdate(CodeStream &os, const ModelSpecMerged &modelMerged,
143  HostHandler preambleHandler, HostHandler pushEGPHandler) const override;
144 
145  virtual void genCustomUpdate(CodeStream &os, const ModelSpecMerged &modelMerged,
146  HostHandler preambleHandler, HostHandler pushEGPHandler) const override;
147 
148  virtual void genInit(CodeStream &os, const ModelSpecMerged &modelMerged,
149  HostHandler preambleHandler, HostHandler initPushEGPHandler, HostHandler initSparsePushEGPHandler) const override;
150 
151  virtual void genDefinitionsPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
152  virtual void genDefinitionsInternalPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
153  virtual void genRunnerPreamble(CodeStream &os, const ModelSpecMerged &modelMerged, const MemAlloc &memAlloc) const override;
154  virtual void genAllocateMemPreamble(CodeStream &os, const ModelSpecMerged &modelMerged, const MemAlloc &allocations) const override;
155  virtual void genFreeMemPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
156  virtual void genStepTimeFinalisePreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const override;
157 
158  virtual void genVariableDefinition(CodeStream &definitions, CodeStream &definitionsInternal, const std::string &type, const std::string &name, VarLocation loc) const override;
159  virtual void genVariableImplementation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
160  virtual void genVariableAllocation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, size_t count, MemAlloc &memAlloc) const override;
161  virtual void genVariableFree(CodeStream &os, const std::string &name, VarLocation loc) const override;
162 
163  virtual void genExtraGlobalParamDefinition(CodeStream &definitions, CodeStream &definitionsInternal, const std::string &type, const std::string &name, VarLocation loc) const override;
164  virtual void genExtraGlobalParamImplementation(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc) const override;
165  virtual void genExtraGlobalParamAllocation(CodeStream &os, const std::string &type, const std::string &name,
166  VarLocation loc, const std::string &countVarName = "count", const std::string &prefix = "") const override;
167  virtual void genExtraGlobalParamPush(CodeStream &os, const std::string &type, const std::string &name,
168  VarLocation loc, const std::string &countVarName = "count", const std::string &prefix = "") const override;
169  virtual void genExtraGlobalParamPull(CodeStream &os, const std::string &type, const std::string &name,
170  VarLocation loc, const std::string &countVarName = "count", const std::string &prefix = "") const override;
171 
173  virtual void genMergedExtraGlobalParamPush(CodeStream &os, const std::string &suffix, size_t mergedGroupIdx,
174  const std::string &groupIdx, const std::string &fieldName,
175  const std::string &egpName) const override;
176 
178  virtual std::string getMergedGroupFieldHostType(const std::string &type) const override;
179 
181  virtual std::string getMergedGroupSimRNGType() const override { return "clrngLfsr113HostStream"; }
182 
183  virtual void genVariablePush(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, bool autoInitialized, size_t count) const override;
184  virtual void genVariablePull(CodeStream &os, const std::string &type, const std::string &name, VarLocation loc, size_t count) const override;
185 
186  virtual void genCurrentVariablePush(CodeStream &os, const NeuronGroupInternal &ng, const std::string &type,
187  const std::string &name, VarLocation loc, unsigned int batchSize) const override;
188  virtual void genCurrentVariablePull(CodeStream &os, const NeuronGroupInternal &ng, const std::string &type,
189  const std::string &name, VarLocation loc, unsigned int batchSize) const override;
190 
191  virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
192  {
193  genCurrentSpikePush(os, ng, batchSize, false);
194  }
195  virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
196  {
197  genCurrentSpikePull(os, ng, batchSize, false);
198  }
199  virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
200  {
201  genCurrentSpikePush(os, ng, batchSize, true);
202  }
203  virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
204  {
205  genCurrentSpikePull(os, ng, batchSize, true);
206  }
207 
208  virtual void genGlobalDeviceRNG(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner,
209  CodeStream &allocations, CodeStream &free, MemAlloc &memAlloc) const override;
210  virtual void genPopulationRNG(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner, CodeStream &allocations,
211  CodeStream &free, const std::string &name, size_t count, MemAlloc &memAlloc) const override;
212  virtual void genTimer(CodeStream &definitions, CodeStream &definitionsInternal, CodeStream &runner,
213  CodeStream &allocations, CodeStream &free, CodeStream &stepTimeFinalise,
214  const std::string &name, bool updateInStepTime) const override;
215 
217  virtual void genReturnFreeDeviceMemoryBytes(CodeStream &os) const override;
218 
219  virtual void genMakefilePreamble(std::ostream &os) const override;
220  virtual void genMakefileLinkRule(std::ostream &os) const override;
221  virtual void genMakefileCompileRule(std::ostream &os) const override;
222 
223  virtual void genMSBuildConfigProperties(std::ostream &os) const override;
224  virtual void genMSBuildImportProps(std::ostream &os) const override;
225  virtual void genMSBuildItemDefinitions(std::ostream &os) const override;
226  virtual void genMSBuildCompileModule(const std::string &moduleName, std::ostream &os) const override;
227  virtual void genMSBuildImportTarget(std::ostream &os) const override;
228 
230 
231  virtual std::vector<filesystem::path> getFilesToCopy(const ModelSpecMerged &modelMerged) const override;
232 
235  virtual std::string getHostVarPrefix() const final { return "h_"; }
236 
237  virtual std::string getPointerPrefix() const override { return "__global "; };
238 
240  virtual bool isPopulationRNGInitialisedOnDevice() const override { return false; }
241 
243  virtual bool isHostReductionRequired() const override { return false; }
244 
246  virtual size_t getDeviceMemoryBytes() const override { return m_ChosenDevice.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>(); }
247 
251  virtual MemorySpaces getMergedGroupMemorySpaces(const ModelSpecMerged &modelMerged) const override;
252 
253  virtual bool supportsNamespace() const override { return false; };
254 
256  virtual boost::uuids::detail::sha1::digest_type getHashDigest() const override;
257 
258  //--------------------------------------------------------------------------
259  // Public API
260  //--------------------------------------------------------------------------
261  const cl::Device &getChosenOpenCLDevice() const { return m_ChosenDevice; }
262 
263  std::string getFloatAtomicAdd(const std::string &ftype, const char* memoryType = "global") const;
264 
265 private:
266  //--------------------------------------------------------------------------
267  // Private methods
268  //--------------------------------------------------------------------------
269  template<typename T>
270  void genMergedStructPreamble(CodeStream &os, const ModelSpecMerged &modelMerged, const std::vector<T> &groups) const
271  {
272  // Loop through groups
273  for(const auto &g : groups) {
274  // Check there's no memory space assigned as OpenCL doesn't currently support them
275  assert(g.getMemorySpace().empty());
276 
277  // Declare build kernel
278  const std::string buildKernelName = "build" + T::name + std::to_string(g.getIndex()) + "Kernel";
279  os << "cl::Kernel " << buildKernelName << ";" << std::endl;
280 
281  // Declare buffer
282  os << "cl::Buffer d_merged" << T::name << "Group" << g.getIndex() << ";" << std::endl;
283 
284  // Write function to update
285  os << "void pushMerged" << T::name << "Group" << g.getIndex() << "ToDevice(unsigned int idx, ";
286  g.generateStructFieldArgumentDefinitions(os, *this);
287  os << ")";
288  {
289  CodeStream::Scope b(os);
290 
291  // Add idx parameter
292  os << "CHECK_OPENCL_ERRORS(" << buildKernelName << ".setArg(1, idx));" << std::endl;
293 
294  // Loop through sorted fields and add arguments
295  const auto sortedFields = g.getSortedFields(*this);
296  for(size_t fieldIndex = 0; fieldIndex < sortedFields.size(); fieldIndex++) {
297  const auto &f = sortedFields[fieldIndex];
298 
299  os << "CHECK_OPENCL_ERRORS(" << buildKernelName << ".setArg(" << (2 + fieldIndex) << ", " << std::get<1>(f) << "));" << std::endl;
300  }
301 
302  // Launch kernel
303  os << "const cl::NDRange globalWorkSize(1, 1);" << std::endl;
304  os << "const cl::NDRange localWorkSize(1, 1);" << std::endl;
305  os << "CHECK_OPENCL_ERRORS(commandQueue.enqueueNDRangeKernel(" << buildKernelName << ", cl::NullRange, globalWorkSize, localWorkSize));" << std::endl;
306  }
307  }
308 
309  if(!groups.empty()) {
310  // Get set of unique fields referenced in a merged group
311  const auto mergedGroupFields = modelMerged.getMergedGroupFields<T>();
312 
313  // Loop through resultant fields and declare kernel for setting EGP
314  for(auto f : mergedGroupFields) {
315  os << "cl::Kernel setMerged" << T::name << f.mergedGroupIndex << f.fieldName << "Kernel;" << std::endl;
316  }
317  }
318  }
319 
320  template<typename T>
321  void genMergedStructBuild(CodeStream &os, const ModelSpecMerged &modelMerged, const std::vector<T> &groups, const std::string &programName) const
322  {
323  // Loop through groups
324  for(const auto &g : groups) {
325  // Create kernel object
326  const std::string kernelName = "build" + T::name + std::to_string(g.getIndex()) + "Kernel";
327  os << "CHECK_OPENCL_ERRORS_POINTER(" << kernelName << " = cl::Kernel(" << programName << ", \"" << kernelName << "\", &error));" << std::endl;
328 
329  // Create group buffer
330  os << "CHECK_OPENCL_ERRORS_POINTER(d_merged" << T::name << "Group" << g.getIndex() << " = cl::Buffer(clContext, CL_MEM_READ_WRITE, size_t{" << g.getStructArraySize(*this) << "}, nullptr, &error));" << std::endl;
331 
332  // Set group buffer as first kernel argument
333  os << "CHECK_OPENCL_ERRORS(" << kernelName << ".setArg(0, d_merged" << T::name << "Group" << g.getIndex() << "));" << std::endl;
334  os << std::endl;
335  }
336 
337  if(!groups.empty()) {
338  // Get set of unique fields referenced in a merged group
339  const auto mergedGroupFields = modelMerged.getMergedGroupFields<T>();
340 
341  // Loop through resultant fields
342  for(auto f : mergedGroupFields) {
343  // Create kernel object
344  const std::string kernelName = "setMerged" + T::name + std::to_string(f.mergedGroupIndex) + f.fieldName + "Kernel";
345  os << "CHECK_OPENCL_ERRORS_POINTER(" << kernelName << " = cl::Kernel(" << programName << ", \"" << kernelName << "\", &error));" << std::endl;
346 
347  // Set group buffer as first kernel argument
348  os << "CHECK_OPENCL_ERRORS(" << kernelName << ".setArg(0, d_merged" << T::name << "Group" << f.mergedGroupIndex << "));" << std::endl;
349  os << std::endl;
350  }
351  }
352  }
353 
354  template<typename T>
355  void genMergedStructBuildKernels(CodeStream &os, const ModelSpecMerged &modelMerged, const std::vector<T> &groups) const
356  {
357  // Loop through groups
358  for(const auto &g : groups) {
359  // Generate kernel to build struct on device
360  os << "__kernel void build" << T::name << g.getIndex() << "Kernel(";
361  os << "__global struct Merged" << T::name << "Group" << g.getIndex() << " *group, unsigned int idx, ";
362 
363  // Loop through sorted struct fields
364  const auto sortedFields = g.getSortedFields(*this);
365  for(size_t fieldIndex = 0; fieldIndex < sortedFields.size(); fieldIndex++) {
366  const auto &f = sortedFields[fieldIndex];
367  if(::Utils::isTypePointer(std::get<0>(f))) {
368  os << "__global ";
369  }
370  os << std::get<0>(f) << " " << std::get<1>(f);
371  if(fieldIndex != (sortedFields.size() - 1)) {
372  os << ", ";
373  }
374  }
375  os << ")";
376  {
377  CodeStream::Scope b(os);
378 
379  // Assign all structure fields to values passed through parameters
380  for(const auto &f : sortedFields) {
381  os << "group[idx]." << std::get<1>(f) << " = " << std::get<1>(f) << ";" << std::endl;
382  }
383  }
384  os << std::endl;
385  }
386 
387  if(!groups.empty()) {
388  // Get set of unique fields referenced in a merged group
389  const auto mergedGroupFields = modelMerged.getMergedGroupFields<T>();
390 
391  // Loop through resultant fields and generate push function for pointer extra global parameters
392  for(auto f : mergedGroupFields) {
393 
394  os << "__kernel void setMerged" << T::name << f.mergedGroupIndex << f.fieldName << "Kernel(";
395  os << "__global struct Merged" << T::name << "Group" << f.mergedGroupIndex << " *group, unsigned int idx, ";
396  if(::Utils::isTypePointer(f.type)) {
397  os << "__global ";
398  }
399  os << f.type << " " << f.fieldName << ")";
400  {
401  CodeStream::Scope b(os);
402  os << "group[idx]." << f.fieldName << " = " << f.fieldName << ";" << std::endl;
403  }
404  os << std::endl;
405  }
406  }
407  }
408 
409  void genAtomicAddFloat(CodeStream &os, const std::string &memoryType) const;
410 
411  void genCurrentSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize, bool spikeEvent) const
412  {
413  genCurrentSpikePushPull(os, ng, batchSize, spikeEvent, true);
414  }
415 
416  void genCurrentSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize, bool spikeEvent) const
417  {
418  genCurrentSpikePushPull(os, ng, batchSize, spikeEvent, false);
419  }
420 
421  void genCurrentSpikePushPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize, bool spikeEvent, bool push) const;
422 
423  void genKernelDimensions(CodeStream &os, Kernel kernel, size_t numThreadsX, size_t batchSize, size_t numBlockThreadsY = 1) const;
424 
425  void genKernelPreamble(CodeStream &os, const ModelSpecMerged &modelMerged) const;
426 
428  void genBuildProgramFlagsString(CodeStream &os) const;
429 
430  void divideKernelStreamInParts(CodeStream &os, const std::stringstream &kernelCode, size_t partLength) const;
431 
433  bool isChosenDeviceAMD() const;
434 
436  bool isChosenDeviceNVIDIA() const;
437 
439 
441  bool isChosenPlatformNVIDIA() const;
442 
444 
445  bool shouldUseSubBufferAllocations() const;
446 
447  //--------------------------------------------------------------------------
448  // Members
449  //--------------------------------------------------------------------------
450  const unsigned int m_ChosenPlatformIndex;
451  const unsigned int m_ChosenDeviceIndex;
452  unsigned int m_AllocationAlignementBytes;
453  cl::Device m_ChosenDevice;
454  cl::Platform m_ChosenPlatform;
455 };
456 } // OpenCL
457 } // CodeGenerator
DeviceSelect
Methods for selecting OpenCL device.
Definition: opencl/backend.h:44
Definition: neuronGroupInternal.h:9
Base class for Single Instruction Multiple Thread style backends.
Definition: backendSIMT.h:51
virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling true spikes emitted by a neuron group in the current timestep from the &#39;dev...
Definition: opencl/backend.h:195
VarLocation
< Flags defining which memory space variables should be allocated in
Definition: variableMode.h:10
virtual std::string getMergedGroupSimRNGType() const override
When generating merged structures what type to use for simulation RNGs.
Definition: opencl/backend.h:181
Definition: opencl/backend.h:92
virtual std::string getHostVarPrefix() const final
Definition: opencl/backend.h:235
virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing spike-like events emitted by a neuron group in the current timestep to the ...
Definition: opencl/backend.h:199
KernelBlockSize manualWorkGroupSizes
If block size select method is set to BlockSizeSelect::MANUAL, block size to use for each kernel...
Definition: opencl/backend.h:86
virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing true spikes emitted by a neuron group in the current timestep to the &#39;devic...
Definition: opencl/backend.h:191
std::set< EGPField > getMergedGroupFields() const
Definition: modelSpecMerged.h:242
Base class for backend preferences - can be accessed via a global in &#39;classic&#39; C++ code generator...
Definition: backendBase.h:58
virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling spike-like events emitted by a neuron group in the current timestep from th...
Definition: opencl/backend.h:203
virtual std::string getPointerPrefix() const override
Different backends may have different or no pointer prefix (e.g. __global for OpenCL) ...
Definition: opencl/backend.h:237
Definition: modelSpecMerged.h:31
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:30
virtual bool isPopulationRNGInitialisedOnDevice() const override
Different backends seed RNGs in different ways. Does this one initialise population RNGS on device...
Definition: opencl/backend.h:240
virtual std::string getThreadID(unsigned int axis=0) const override
Get the ID of the current thread within the threadblock.
Definition: opencl/backend.h:108
std::vector< std::pair< std::string, size_t > > MemorySpaces
Vector of prefixes required to allocate in memory space and size of memory space. ...
Definition: backendBase.h:190
WorkGroupSizeSelect
Methods for selecting OpenCL kernel workgroup size.
Definition: opencl/backend.h:54
Definition: codeStream.h:21
#define BACKEND_EXPORT
Definition: backendExport.h:13
Definition: backendBase.h:107
Definition: substitutions.h:21
virtual std::string getCLZ() const override
Get the name of the count-leading-zeros function.
Definition: opencl/backend.h:114
virtual std::string getBlockID(unsigned int axis=0) const override
Get the ID of the current thread block.
Definition: opencl/backend.h:111
AtomicMemSpace
What memory space atomic operation is required.
Definition: backendSIMT.h:70
GENN_EXPORT bool isTypePointer(const std::string &type)
Function to determine whether a string containing a type is a pointer.
Definition: gennUtils.cc:75
std::function< void(CodeStream &)> HostHandler
Definition: backendBase.h:182
virtual size_t getDeviceMemoryBytes() const override
How many bytes of memory does &#39;device&#39; have.
Definition: opencl/backend.h:246
const cl::Device & getChosenOpenCLDevice() const
Definition: opencl/backend.h:261
Preferences()
Definition: opencl/backend.h:65
virtual bool supportsNamespace() const override
Does this backend support namespaces i.e. can C++ implementation of support functions be used...
Definition: opencl/backend.h:253
virtual std::string getSharedPrefix() const override
Get the prefix to use for shared memory variables.
Definition: opencl/backend.h:105
Kernel
Kernels generated by SIMT backends.
Definition: backendSIMT.h:24
AtomicOperation
What atomic operation is required.
Definition: backendSIMT.h:63
virtual bool isHostReductionRequired() const override
Backends which support batch-parallelism might require an additional host reduction phase after reduc...
Definition: opencl/backend.h:243
Definition: generateModules.h:16
Preferences for OpenCL backend.
Definition: opencl/backend.h:63
PlatformSelect
Methods for selecting OpenCL platform.
Definition: opencl/backend.h:38
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: backendSIMT.h:44
Definition: codeStream.h:94