Remove memory_barrier() calls from atomics.
This was unnecessary overhead to impose on all callers; the user should handle these as needed on their own. Also added some explanatory text to the documentation that highlights that memory_barrier() is only needed across HW threads/cores, not across program instances in a gang.
This commit is contained in:
@@ -3880,6 +3880,11 @@ code.
|
|||||||
|
|
||||||
void memory_barrier();
|
void memory_barrier();
|
||||||
|
|
||||||
|
Note that this barrier is *not* needed for coordinating reads and writes
|
||||||
|
among the program instances in a gang; it's only needed for coordinating
|
||||||
|
between multiple hardware threads running on different cores. See the
|
||||||
|
section `Data Races Within a Gang`_ for the guarantees provided about
|
||||||
|
memory read/write ordering across a gang.
|
||||||
|
|
||||||
Prefetches
|
Prefetches
|
||||||
----------
|
----------
|
||||||
|
|||||||
29
stdlib.ispc
29
stdlib.ispc
@@ -1,6 +1,6 @@
|
|||||||
// -*- mode: c++ -*-
|
// -*- mode: c++ -*-
|
||||||
/*
|
/*
|
||||||
Copyright (c) 2010-2011, Intel Corporation
|
Copyright (c) 2010-2012, Intel Corporation
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -1588,22 +1588,17 @@ static inline void memory_barrier() {
|
|||||||
|
|
||||||
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
|
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||||
memory_barrier(); \
|
|
||||||
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||||
uniform TA value) { \
|
uniform TA value) { \
|
||||||
memory_barrier(); \
|
|
||||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||||
uniform TA * uniform ptrArray[programCount]; \
|
uniform TA * uniform ptrArray[programCount]; \
|
||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
|
||||||
TA ret; \
|
TA ret; \
|
||||||
__foreach_active (i) { \
|
__foreach_active (i) { \
|
||||||
uniform TA * uniform p = ptrArray[i]; \
|
uniform TA * uniform p = ptrArray[i]; \
|
||||||
@@ -1611,13 +1606,11 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
|||||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||||
ret = insert(ret, i, r); \
|
ret = insert(ret, i, r); \
|
||||||
} \
|
} \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
||||||
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||||
memory_barrier(); \
|
|
||||||
uniform int i = 0; \
|
uniform int i = 0; \
|
||||||
TA ret[programCount]; \
|
TA ret[programCount]; \
|
||||||
TA memVal; \
|
TA memVal; \
|
||||||
@@ -1647,20 +1640,16 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
|||||||
/* And the last instance that wanted to swap gets the value we \
|
/* And the last instance that wanted to swap gets the value we \
|
||||||
originally got back from memory... */ \
|
originally got back from memory... */ \
|
||||||
ret[lastSwap] = memVal; \
|
ret[lastSwap] = memVal; \
|
||||||
memory_barrier(); \
|
|
||||||
return ret[programIndex]; \
|
return ret[programIndex]; \
|
||||||
} \
|
} \
|
||||||
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
|
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
|
||||||
uniform TA value) { \
|
uniform TA value) { \
|
||||||
memory_barrier(); \
|
|
||||||
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
|
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||||
uniform TA * uniform ptrArray[programCount]; \
|
uniform TA * uniform ptrArray[programCount]; \
|
||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
|
||||||
TA ret; \
|
TA ret; \
|
||||||
__foreach_active (i) { \
|
__foreach_active (i) { \
|
||||||
uniform TA * uniform p = ptrArray[i]; \
|
uniform TA * uniform p = ptrArray[i]; \
|
||||||
@@ -1668,7 +1657,6 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
|||||||
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
||||||
ret = insert(ret, i, r); \
|
ret = insert(ret, i, r); \
|
||||||
} \
|
} \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
@@ -1676,25 +1664,19 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
|||||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||||
uniform TA oneval = reduce_##OPA(value); \
|
uniform TA oneval = reduce_##OPA(value); \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
if (lanemask() != 0) { \
|
if (lanemask() != 0) \
|
||||||
memory_barrier(); \
|
|
||||||
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
|
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
|
||||||
memory_barrier(); \
|
|
||||||
} \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||||
uniform TA value) { \
|
uniform TA value) { \
|
||||||
memory_barrier(); \
|
|
||||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||||
TA value) { \
|
TA value) { \
|
||||||
uniform TA * uniform ptrArray[programCount]; \
|
uniform TA * uniform ptrArray[programCount]; \
|
||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
|
||||||
TA ret; \
|
TA ret; \
|
||||||
__foreach_active (i) { \
|
__foreach_active (i) { \
|
||||||
uniform TA * uniform p = ptrArray[i]; \
|
uniform TA * uniform p = ptrArray[i]; \
|
||||||
@@ -1702,7 +1684,6 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
|||||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||||
ret = insert(ret, i, r); \
|
ret = insert(ret, i, r); \
|
||||||
} \
|
} \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1757,25 +1738,20 @@ DEFINE_ATOMIC_SWAP(double,double)
|
|||||||
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
|
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
|
||||||
static inline uniform TA atomic_compare_exchange_global( \
|
static inline uniform TA atomic_compare_exchange_global( \
|
||||||
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
|
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
|
||||||
memory_barrier(); \
|
|
||||||
uniform TA ret = \
|
uniform TA ret = \
|
||||||
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
|
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline TA atomic_compare_exchange_global( \
|
static inline TA atomic_compare_exchange_global( \
|
||||||
uniform TA * uniform ptr, TA oldval, TA newval) { \
|
uniform TA * uniform ptr, TA oldval, TA newval) { \
|
||||||
memory_barrier(); \
|
|
||||||
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
|
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
|
||||||
(MASKTYPE)__mask); \
|
(MASKTYPE)__mask); \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline TA atomic_compare_exchange_global( \
|
static inline TA atomic_compare_exchange_global( \
|
||||||
uniform TA * varying ptr, TA oldval, TA newval) { \
|
uniform TA * varying ptr, TA oldval, TA newval) { \
|
||||||
uniform TA * uniform ptrArray[programCount]; \
|
uniform TA * uniform ptrArray[programCount]; \
|
||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
memory_barrier(); \
|
|
||||||
TA ret; \
|
TA ret; \
|
||||||
__foreach_active (i) { \
|
__foreach_active (i) { \
|
||||||
uniform TA r = \
|
uniform TA r = \
|
||||||
@@ -1784,7 +1760,6 @@ static inline TA atomic_compare_exchange_global( \
|
|||||||
extract(newval, i)); \
|
extract(newval, i)); \
|
||||||
ret = insert(ret, i, r); \
|
ret = insert(ret, i, r); \
|
||||||
} \
|
} \
|
||||||
memory_barrier(); \
|
|
||||||
return ret; \
|
return ret; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user