Random123
Loading...
Searching...
No Matches
sse.h
Go to the documentation of this file.
1/*
2Copyright 2010-2011, D. E. Shaw Research.
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
8
9* Redistributions of source code must retain the above copyright
10 notice, this list of conditions, and the following disclaimer.
11
12* Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions, and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16* Neither the name of D. E. Shaw Research nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32#ifndef _Random123_sse_dot_h__
33#define _Random123_sse_dot_h__
34
35#if R123_USE_SSE
36
37#if R123_USE_X86INTRIN_H
38#include <x86intrin.h>
39#endif
40#if R123_USE_IA32INTRIN_H
41#include <ia32intrin.h>
42#endif
43#if R123_USE_XMMINTRIN_H
44#include <xmmintrin.h>
45#endif
46#if R123_USE_EMMINTRIN_H
47#include <emmintrin.h>
48#endif
49#if R123_USE_SMMINTRIN_H
50#include <smmintrin.h>
51#endif
52#if R123_USE_WMMINTRIN_H
53#include <wmmintrin.h>
54#endif
55#if R123_USE_INTRIN_H
56#include <intrin.h>
57#endif
58#ifdef __cplusplus
59#include <iostream>
60#include <limits>
61#include <stdexcept>
62#endif
63
64#if R123_USE_ASM_GNU
65
66/* bit25 of CX tells us whether AES is enabled. */
67R123_STATIC_INLINE int haveAESNI(){
68 unsigned int eax, ebx, ecx, edx;
69 __asm__ __volatile__ ("cpuid": "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) :
70 "a" (1));
71 return (ecx>>25) & 1;
72}
73#elif R123_USE_CPUID_MSVC
74R123_STATIC_INLINE int haveAESNI(){
75 int CPUInfo[4];
76 __cpuid(CPUInfo, 1);
77 return (CPUInfo[2]>>25)&1;
78}
79#else /* R123_USE_CPUID_??? */
80#warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false"
81R123_STATIC_INLINE int haveAESNI(){
82 return 0;
83}
84#endif /* R123_USE_ASM_GNU || R123_USE_CPUID_MSVC */
85
86// There is a lot of annoying and inexplicable variation in the
87// SSE intrinsics available in different compilation environments.
88// The details seem to depend on the compiler, the version and
89// the target architecture. Rather than insisting on
90// R123_USE_feature tests for each of these in each of the
91// compilerfeatures.h files we just keep the complexity localized
92// to here...
93#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64) && _MSC_VER < 1900)
94/* Is there an intrinsic to assemble an __m128i from two 64-bit words?
95 If not, use the 4x32-bit intrisic instead. N.B. It looks like Intel
96 added _mm_set_epi64x to icc version 12.1 in Jan 2012.
97*/
98R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
99 union{
100 uint64_t u64;
101 uint32_t u32[2];
102 } u1, u0;
103 u1.u64 = v1;
104 u0.u64 = v0;
105 return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
106}
107#endif
108/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit
109 word from an __m128i. The _mm_cvtsi128_si64 intrinsic does the job
110 on 64-bit platforms. Unfortunately, both MSVC and Open64 fail
111 assertions in ut_M128.cpp and ut_carray.cpp when we use the
112 _mm_cvtsi128_si64 intrinsic. (See
113 https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug).
114 On 32-bit platforms, there's no MOVQ, so there's no intrinsic.
115 Finally, even if the intrinsic exists, it may be spelled with or
116 without the 'x'.
117*/
118#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
119R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
120 union{
121 uint64_t u64[2];
122 __m128i m;
123 }u;
124 _mm_store_si128(&u.m, si);
125 return u.u64[0];
126}
127#elif defined(__llvm__) || defined(__ICC)
128R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
129 return (uint64_t)_mm_cvtsi128_si64(si);
130}
131#else /* GNUC, others */
132/* FWIW, gcc's emmintrin.h has had the 'x' spelling
133 since at least gcc-3.4.4. The no-'x' spelling showed up
134 around 4.2. */
135R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
136 return (uint64_t)_mm_cvtsi128_si64x(si);
137}
138#endif
139#if defined(__GNUC__) && __GNUC__ < 4
140/* the cast builtins showed up in gcc4. */
141R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
142 return (__m128)si;
143}
144#endif
145
146#ifdef __cplusplus
147
149 __m128i m;
150#if R123_USE_CXX11_UNRESTRICTED_UNIONS
151 // C++98 forbids a union member from having *any* constructors.
152 // C++11 relaxes this, and allows union members to have constructors
153 // as long as there is a "trivial" default construtor. So in C++11
154 // we can provide a r123m128i constructor with an __m128i argument, and still
155 // have the default (and hence trivial) default constructor.
156 r123m128i() = default;
157 r123m128i(__m128i _m): m(_m){}
158#endif
159 r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;}
160 r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;}
161#if R123_USE_CXX11_EXPLICIT_CONVERSIONS
162 // With C++11 we can attach explicit to the bool conversion operator
163 // to disambiguate undesired promotions. For g++, this works
164 // only in 4.5 and above.
165 explicit operator bool() const {return _bool();}
166#else
167 // Pre-C++11, we have to do something else. Google for the "safe bool"
168 // idiom for other ideas...
169 operator const void*() const{return _bool()?this:0;}
170#endif
171 operator __m128i() const {return m;}
172
173private:
174#if R123_USE_SSE4_1
175 bool _bool() const{ return !_mm_testz_si128(m,m); }
176#else
177 bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
178#endif
179};
180
181R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){
182 __m128i& c = v.m;
183 __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
184 c = _mm_add_epi64(c, zeroone);
185 //return c;
186#if R123_USE_SSE4_1
187 __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
188 if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
189 __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
190 c = _mm_add_epi64(c, onezero);
191 }
192#else
193 unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
194 // The low two bits of mask are 11 iff the low 64 bits of
195 // c are zero.
196 if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
197 __m128i onezero = _mm_set_epi64x(1,0);
198 c = _mm_add_epi64(c, onezero);
199 }
200#endif
201 return v;
202}
203
204R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, R123_ULONG_LONG n){
205 __m128i c = lhs.m;
206 __m128i incr128 = _mm_set_epi64x(0, n);
207 c = _mm_add_epi64(c, incr128);
208 // return c; // NO CARRY!
209
210 int64_t lo64 = _mm_extract_lo64(c);
211 if((uint64_t)lo64 < n)
212 c = _mm_add_epi64(c, _mm_set_epi64x(R123_64BIT(1),R123_64BIT(0)));
213 lhs.m = c;
214 return lhs;
215}
216
217// We need this one because it's present, but never used in r123array1xm128i::incr
218R123_STATIC_INLINE bool operator<=(R123_ULONG_LONG, const r123m128i &){
219 throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");}
220
221// The comparisons aren't implemented, but if we leave them out, and
222// somebody writes, e.g., M1 < M2, the compiler will do an implicit
223// conversion through void*. Sigh...
224R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){
225 throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");}
226R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){
227 throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");}
228R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){
229 throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");}
230R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){
231 throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");}
232
233R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){
234 return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
235R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){
236 return !(lhs==rhs);}
237R123_STATIC_INLINE bool operator==(R123_ULONG_LONG lhs, const r123m128i &rhs){
238 r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; }
239R123_STATIC_INLINE bool operator!=(R123_ULONG_LONG lhs, const r123m128i &rhs){
240 return !(lhs==rhs);}
241R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){
242 union{
243 uint64_t u64[2];
244 __m128i m;
245 }u;
246 _mm_storeu_si128(&u.m, m.m);
247 return os << u.u64[0] << " " << u.u64[1];
248}
249
250R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){
251 uint64_t u64[2];
252 is >> u64[0] >> u64[1];
253 m.m = _mm_set_epi64x(u64[1], u64[0]);
254 return is;
255}
256
257template<typename T> inline T assemble_from_u32(uint32_t *p32); // forward declaration
258
259template <>
261 r123m128i ret;
262 ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
263 return ret;
264}
265
266#else
267
268typedef struct {
269 __m128i m;
270} r123m128i;
271
272#endif /* __cplusplus */
273
274#else /* !R123_USE_SSE */
275R123_STATIC_INLINE int haveAESNI(){
276 return 0;
277}
278#endif /* R123_USE_SSE */
279
280#endif /* _Random123_sse_dot_h__ */
static r123m128i & operator++(r123m128i &v)
Definition sse.h:181
static int haveAESNI()
Definition sse.h:81
r123m128i assemble_from_u32< r123m128i >(uint32_t *p32)
Definition sse.h:260
static bool operator<=(R123_ULONG_LONG, const r123m128i &)
Definition sse.h:218
T assemble_from_u32(uint32_t *p32)
static bool operator!=(const r123m128i &lhs, const r123m128i &rhs)
Definition sse.h:235
static std::ostream & operator<<(std::ostream &os, const r123m128i &m)
Definition sse.h:241
static bool operator>=(const r123m128i &, const r123m128i &)
Definition sse.h:230
static bool operator==(const r123m128i &lhs, const r123m128i &rhs)
Definition sse.h:233
static std::istream & operator>>(std::istream &is, r123m128i &m)
Definition sse.h:250
static bool operator<(const r123m128i &, const r123m128i &)
Definition sse.h:224
static bool operator>(const r123m128i &, const r123m128i &)
Definition sse.h:228
static uint64_t _mm_extract_lo64(__m128i si)
Definition sse.h:119
static r123m128i & operator+=(r123m128i &lhs, R123_ULONG_LONG n)
Definition sse.h:204
Definition sse.h:148
r123m128i & operator=(R123_ULONG_LONG n)
Definition sse.h:160
r123m128i & operator=(const __m128i &rhs)
Definition sse.h:159
__m128i m
Definition sse.h:149