16 #include <cuda_runtime.h> 66 std::fill(manualBlockSizes.begin(), manualBlockSizes.end(), 32);
70 bool showPtxInfo =
false;
73 bool generateLineInfo =
false;
77 bool selectGPUByDeviceID =
false;
80 bool enableNCCLReductions =
false;
85 bool generateSimpleErrorHandling =
false;
91 unsigned int manualDeviceID = 0;
103 size_t constantCacheOverhead = 72 * 5;
106 std::string userNvccFlags =
"";
133 const std::string &scalarType,
int device);
139 virtual bool areSharedMemAtomicsSlow()
const override;
145 virtual std::string getThreadID(
unsigned int axis = 0)
const override;
148 virtual std::string getBlockID(
unsigned int axis = 0)
const override;
151 virtual std::string
getCLZ()
const override {
return "__clz"; }
154 virtual std::string getAtomic(
const std::string &type,
AtomicOperation op = AtomicOperation::ADD,
158 virtual void genSharedMemBarrier(
CodeStream &os)
const override;
161 virtual void genPopulationRNGInit(
CodeStream &os,
const std::string &globalRNG,
const std::string &seed,
const std::string &sequence)
const override;
164 virtual void genPopulationRNGPreamble(
CodeStream &os,
Substitutions &subs,
const std::string &globalRNG,
const std::string &name =
"rng")
const override;
168 virtual void genPopulationRNGPostamble(
CodeStream &os,
const std::string &globalRNG)
const override;
171 virtual void genGlobalRNGSkipAhead(
CodeStream &os,
Substitutions &subs,
const std::string &sequence,
const std::string &name =
"rng")
const override;
195 virtual void genVariableDefinition(
CodeStream &definitions,
CodeStream &definitionsInternal,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
196 virtual void genVariableImplementation(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
197 virtual void genVariableAllocation(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc,
size_t count,
MemAlloc &memAlloc)
const override;
198 virtual void genVariableFree(
CodeStream &os,
const std::string &name,
VarLocation loc)
const override;
200 virtual void genExtraGlobalParamDefinition(
CodeStream &definitions,
CodeStream &definitionsInternal,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
201 virtual void genExtraGlobalParamImplementation(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc)
const override;
202 virtual void genExtraGlobalParamAllocation(
CodeStream &os,
const std::string &type,
const std::string &name,
203 VarLocation loc,
const std::string &countVarName =
"count",
const std::string &prefix =
"")
const override;
204 virtual void genExtraGlobalParamPush(
CodeStream &os,
const std::string &type,
const std::string &name,
205 VarLocation loc,
const std::string &countVarName =
"count",
const std::string &prefix =
"")
const override;
206 virtual void genExtraGlobalParamPull(
CodeStream &os,
const std::string &type,
const std::string &name,
207 VarLocation loc,
const std::string &countVarName =
"count",
const std::string &prefix =
"")
const override;
210 virtual void genMergedExtraGlobalParamPush(
CodeStream &os,
const std::string &suffix,
size_t mergedGroupIdx,
211 const std::string &groupIdx,
const std::string &fieldName,
212 const std::string &egpName)
const override;
215 virtual std::string getMergedGroupFieldHostType(
const std::string &type)
const override;
220 virtual void genVariablePush(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc,
bool autoInitialized,
size_t count)
const override;
221 virtual void genVariablePull(
CodeStream &os,
const std::string &type,
const std::string &name,
VarLocation loc,
size_t count)
const override;
224 const std::string &name,
VarLocation loc,
unsigned int batchSize)
const override;
226 const std::string &name,
VarLocation loc,
unsigned int batchSize)
const override;
230 genCurrentSpikePush(os, ng, batchSize,
false);
234 genCurrentSpikePull(os, ng, batchSize,
false);
238 genCurrentSpikePush(os, ng, batchSize,
true);
242 genCurrentSpikePull(os, ng, batchSize,
true);
250 const std::string &name,
size_t count,
MemAlloc &memAlloc)
const override;
253 const std::string &name,
bool updateInStepTime)
const override;
256 virtual void genReturnFreeDeviceMemoryBytes(
CodeStream &os)
const override;
258 virtual void genMakefilePreamble(std::ostream &os)
const override;
259 virtual void genMakefileLinkRule(std::ostream &os)
const override;
260 virtual void genMakefileCompileRule(std::ostream &os)
const override;
262 virtual void genMSBuildConfigProperties(std::ostream &os)
const override;
263 virtual void genMSBuildImportProps(std::ostream &os)
const override;
264 virtual void genMSBuildItemDefinitions(std::ostream &os)
const override;
265 virtual void genMSBuildCompileModule(
const std::string &moduleName, std::ostream &os)
const override;
266 virtual void genMSBuildImportTarget(std::ostream &os)
const override;
269 virtual std::string getAllocateMemParams(
const ModelSpecMerged &)
const override;
288 virtual boost::uuids::detail::sha1::digest_type getHashDigest()
const override;
296 std::string getNVCCFlags()
const;
303 void genMergedStructArrayPush(
CodeStream &os,
const std::vector<T> &groups)
const 306 for(
const auto &g : groups) {
308 assert(!g.getMemorySpace().empty());
311 os << g.getMemorySpace() <<
" Merged" << T::name <<
"Group" << g.getIndex() <<
" d_merged" << T::name <<
"Group" << g.getIndex() <<
"[" << g.getGroups().size() <<
"];" << std::endl;
314 os <<
"void pushMerged" << T::name <<
"Group" << g.getIndex() <<
"ToDevice(unsigned int idx, ";
315 g.generateStructFieldArgumentDefinitions(os, *
this);
321 os <<
"Merged" << T::name <<
"Group" << g.getIndex() <<
" group = {";
322 const auto sortedFields = g.getSortedFields(*
this);
323 for(
const auto &f : sortedFields) {
324 os << std::get<1>(f) <<
", ";
326 os <<
"};" << std::endl;
329 os <<
"CHECK_CUDA_ERRORS(cudaMemcpyToSymbolAsync(d_merged" << T::name <<
"Group" << g.getIndex() <<
", &group, ";
330 os <<
"sizeof(Merged" << T::name <<
"Group" << g.getIndex() <<
"), idx * sizeof(Merged" << T::name <<
"Group" << g.getIndex() <<
")));" << std::endl;
337 size_t getChosenDeviceSafeConstMemBytes()
const 339 return m_ChosenDevice.totalConstMem - getPreferences<Preferences>().constantCacheOverhead;
345 void genKernelDimensions(
CodeStream &os,
Kernel kernel,
size_t numThreadsX,
size_t batchSize,
size_t numBlockThreadsY = 1)
const;
350 const int m_ChosenDeviceID;
351 cudaDeviceProp m_ChosenDevice;
352 int m_RuntimeVersion;
Definition: neuronGroupInternal.h:9
Base class for Single Instruction Multiple Thread style backends.
Definition: backendSIMT.h:51
virtual std::string getCLZ() const override
Get the name of the count-leading-zeros function.
Definition: cuda/backend.h:151
VarLocation
< Flags defining which memory space variables should be allocated in
Definition: variableMode.h:10
virtual size_t getDeviceMemoryBytes() const override
How many bytes of memory does 'device' have.
Definition: cuda/backend.h:278
int getChosenDeviceID() const
Definition: cuda/backend.h:294
virtual bool supportsNamespace() const override
Does this backend support namespaces i.e. can C++ implementation of support functions be used...
Definition: cuda/backend.h:285
Pick device with most global memory.
virtual std::string getSharedPrefix() const override
Get the prefix to use for shared memory variables.
Definition: cuda/backend.h:142
Pick optimal device based on how well kernels can be simultaneously simulated and occupancy...
BlockSizeSelect
Methods for selecting CUDA kernel block size.
Definition: cuda/backend.h:52
Base class for backend preferences - can be accessed via a global in 'classic' C++ code generator...
Definition: backendBase.h:58
int getRuntimeVersion() const
Definition: cuda/backend.h:295
Definition: modelSpecMerged.h:31
Helper class for generating code - automatically inserts brackets, indents etc.
Definition: backendBase.h:30
virtual void genCurrentTrueSpikePush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing true spikes emitted by a neuron group in the current timestep to the 'devic...
Definition: cuda/backend.h:228
Use device specified by user.
std::vector< std::pair< std::string, size_t > > MemorySpaces
Vector of prefixes required to allocate in memory space and size of memory space. ...
Definition: backendBase.h:190
Definition: codeStream.h:21
virtual bool isHostReductionRequired() const override
Backends which support batch-parallelism might require an additional host reduction phase after reduc...
Definition: cuda/backend.h:275
#define BACKEND_EXPORT
Definition: backendExport.h:13
Definition: backendBase.h:107
Use device specified by user at runtime with allocateMem parameter. Optimisation will be performed on...
virtual void genCurrentTrueSpikePull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling true spikes emitted by a neuron group in the current timestep from the 'dev...
Definition: cuda/backend.h:232
Definition: substitutions.h:21
virtual std::string getMergedGroupSimRNGType() const override
When generating merged structures what type to use for simulation RNGs.
Definition: cuda/backend.h:218
virtual void genCurrentSpikeLikeEventPush(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pushing spike-like events emitted by a neuron group in the current timestep to the ...
Definition: cuda/backend.h:236
void updateHash(const T &value, boost::uuids::detail::sha1 &hash)
Hash arithmetic types and enums.
Definition: gennUtils.h:128
AtomicMemSpace
What memory space atomic operation is required.
Definition: backendSIMT.h:70
std::function< void(CodeStream &)> HostHandler
Definition: backendBase.h:182
Definition: cuda/backend.h:129
Preferences for CUDA backend.
Definition: cuda/backend.h:62
virtual void genCurrentSpikeLikeEventPull(CodeStream &os, const NeuronGroupInternal &ng, unsigned int batchSize) const override
Generate code for pulling spike-like events emitted by a neuron group in the current timestep from th...
Definition: cuda/backend.h:240
Kernel
Kernels generated by SIMT backends.
Definition: backendSIMT.h:24
AtomicOperation
What atomic operation is required.
Definition: backendSIMT.h:63
Preferences()
Definition: cuda/backend.h:64
Definition: generateModules.h:16
DeviceSelect
Methods for selecting CUDA device.
Definition: cuda/backend.h:40
Pick optimal blocksize for each kernel based on occupancy.
KernelBlockSize manualBlockSizes
If block size select method is set to BlockSizeSelect::MANUAL, block size to use for each kernel...
Definition: cuda/backend.h:97
const cudaDeviceProp & getChosenCUDADevice() const
Definition: cuda/backend.h:293
virtual bool isPopulationRNGInitialisedOnDevice() const override
Different backends seed RNGs in different ways. Does this one initialise population RNGS on device...
Definition: cuda/backend.h:272
std::array< size_t, KernelMax > KernelBlockSize
Array of block sizes for each kernel.
Definition: backendSIMT.h:44
void updateHash(boost::uuids::detail::sha1 &hash) const
Definition: cuda/backend.h:108
Definition: codeStream.h:94