3 #include "device_functions.h" 7 int atomicCAS(
int* address,
int compare,
int val)
9 __atomic_compare_exchange_n(
10 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
16 unsigned int atomicCAS(
17 unsigned int* address,
unsigned int compare,
unsigned int val)
19 __atomic_compare_exchange_n(
20 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
26 unsigned long long atomicCAS(
27 unsigned long long* address,
28 unsigned long long compare,
29 unsigned long long val)
31 __atomic_compare_exchange_n(
32 address, &compare, val,
false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
39 int atomicAdd(
int* address,
int val)
41 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
45 unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
47 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
51 unsigned long long atomicAdd(
52 unsigned long long* address,
unsigned long long val)
54 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
58 float atomicAdd(
float* address,
float val)
60 unsigned int* uaddr{
reinterpret_cast<unsigned int*
>(address)};
61 unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
66 old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
69 return __uint_as_float(r);
73 double atomicAdd(
double* address,
double val)
75 unsigned long long* uaddr{
reinterpret_cast<unsigned long long*
>(address)};
76 unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
82 uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
85 return __longlong_as_double(r);
90 int atomicSub(
int* address,
int val)
92 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
96 unsigned int atomicSub(
unsigned int* address,
unsigned int val)
98 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
103 int atomicExch(
int* address,
int val)
105 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
109 unsigned int atomicExch(
unsigned int* address,
unsigned int val)
111 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
115 unsigned long long atomicExch(
unsigned long long* address,
unsigned long long val)
117 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
121 float atomicExch(
float* address,
float val)
123 return __uint_as_float(__atomic_exchange_n(
124 reinterpret_cast<unsigned int*>(address),
125 __float_as_uint(val),
131 int atomicMin(
int* address,
int val)
133 return __sync_fetch_and_min(address, val);
137 unsigned int atomicMin(
unsigned int* address,
unsigned int val)
139 return __sync_fetch_and_umin(address, val);
143 unsigned long long atomicMin(
144 unsigned long long* address,
unsigned long long val)
146 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
147 while (val < tmp) { tmp = atomicCAS(address, tmp, val); }
154 int atomicMax(
int* address,
int val)
156 return __sync_fetch_and_max(address, val);
160 unsigned int atomicMax(
unsigned int* address,
unsigned int val)
162 return __sync_fetch_and_umax(address, val);
166 unsigned long long atomicMax(
167 unsigned long long* address,
unsigned long long val)
169 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
170 while (tmp < val) { tmp = atomicCAS(address, tmp, val); }
177 unsigned int atomicInc(
unsigned int* address,
unsigned int val)
181 unsigned int __builtin_amdgcn_atomic_inc(
186 bool) __asm(
"llvm.amdgcn.atomic.inc.i32.p0i32");
188 return __builtin_amdgcn_atomic_inc(
189 address, val, __ATOMIC_RELAXED, 1 ,
false);
194 unsigned int atomicDec(
unsigned int* address,
unsigned int val)
198 unsigned int __builtin_amdgcn_atomic_dec(
203 bool) __asm(
"llvm.amdgcn.atomic.dec.i32.p0i32");
205 return __builtin_amdgcn_atomic_dec(
206 address, val, __ATOMIC_RELAXED, 1 ,
false);
211 int atomicAnd(
int* address,
int val)
213 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
217 unsigned int atomicAnd(
unsigned int* address,
unsigned int val)
219 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
223 unsigned long long atomicAnd(
224 unsigned long long* address,
unsigned long long val)
226 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
231 int atomicOr(
int* address,
int val)
233 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
237 unsigned int atomicOr(
unsigned int* address,
unsigned int val)
239 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
243 unsigned long long atomicOr(
244 unsigned long long* address,
unsigned long long val)
246 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
251 int atomicXor(
int* address,
int val)
253 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
257 unsigned int atomicXor(
unsigned int* address,
unsigned int val)
259 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
263 unsigned long long atomicXor(
264 unsigned long long* address,
unsigned long long val)
266 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);