32 #ifndef _Random123_sse_dot_h__ 33 #define _Random123_sse_dot_h__ 37 #if R123_USE_X86INTRIN_H 38 #include <x86intrin.h> 40 #if R123_USE_IA32INTRIN_H 41 #include <ia32intrin.h> 43 #if R123_USE_XMMINTRIN_H 44 #include <xmmintrin.h> 46 #if R123_USE_EMMINTRIN_H 47 #include <emmintrin.h> 49 #if R123_USE_SMMINTRIN_H 50 #include <smmintrin.h> 52 #if R123_USE_WMMINTRIN_H 53 #include <wmmintrin.h> 68 unsigned int eax, ebx, ecx, edx;
69 __asm__ __volatile__ (
"cpuid":
"=a" (eax),
"=b" (ebx),
"=c" (ecx),
"=d" (edx) :
73 #elif R123_USE_CPUID_MSVC 77 return (CPUInfo[2]>>25)&1;
80 #warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false" 93 #if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64)) 105 return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
118 #if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__) 124 _mm_store_si128(&u.m, si);
127 #elif defined(__llvm__) || defined(__ICC) 129 return (uint64_t)_mm_cvtsi128_si64(si);
136 return (uint64_t)_mm_cvtsi128_si64x(si);
139 #if defined(__GNUC__) && __GNUC__ < 4 150 #if R123_USE_CXX11_UNRESTRICTED_UNIONS 156 r123m128i() =
default;
157 r123m128i(__m128i _m): m(_m){}
159 r123m128i& operator=(
const __m128i& rhs){ m=rhs;
return *
this;}
160 r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n);
return *
this;}
161 #if R123_USE_CXX11_EXPLICIT_CONVERSIONS 165 explicit operator bool()
const {
return _bool();}
169 operator const void*()
const{
return _bool()?
this:0;}
171 operator __m128i()
const {
return m;}
175 bool _bool()
const{
return !_mm_testz_si128(m,m); }
177 bool _bool()
const{
return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
183 __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
184 c = _mm_add_epi64(c, zeroone);
187 __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
189 __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
190 c = _mm_add_epi64(c, onezero);
193 unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
197 __m128i onezero = _mm_set_epi64x(1,0);
198 c = _mm_add_epi64(c, onezero);
206 __m128i incr128 = _mm_set_epi64x(0, n);
207 c = _mm_add_epi64(c, incr128);
210 int64_t lo64 = _mm_extract_lo64(c);
211 if((uint64_t)lo64 < n)
212 c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
219 throw std::runtime_error(
"operator<=(unsigned long long, r123m128i) is unimplemented.");}
225 throw std::runtime_error(
"operator<(r123m128i, r123m128i) is unimplemented.");}
227 throw std::runtime_error(
"operator<=(r123m128i, r123m128i) is unimplemented.");}
229 throw std::runtime_error(
"operator>(r123m128i, r123m128i) is unimplemented.");}
231 throw std::runtime_error(
"operator>=(r123m128i, r123m128i) is unimplemented.");}
234 return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
238 r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs);
return LHS == rhs; }
246 _mm_storeu_si128(&u.m, m.m);
247 return os << u.u64[0] <<
" " << u.u64[1];
252 is >> u64[0] >> u64[1];
253 m.m = _mm_set_epi64x(u64[1], u64[0]);
257 template<
typename T>
inline T assemble_from_u32(uint32_t *p32);
260 inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){
262 ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
std::ostream & operator<<(std::ostream &out, const MemAlloc &m)
Definition: backendBase.h:159
std::istream & operator>>(std::istream &in, MemAlloc &m)
Definition: backendBase.h:165
#define R123_BUILTIN_EXPECT(expr, likely)
R123_STATIC_INLINE int haveAESNI()
Definition: sse.h:275
#define R123_STATIC_INLINE
m
Definition: genn_model.py:117