diff --git a/builtins/util.m4 b/builtins/util.m4 index 2d2918db..cab53e0d 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -737,9 +737,7 @@ define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val, %eltvec`'i = insertelement <$1 x $3> %eltvec`'eval(i-1), $3 %red`'eval(i-1), i32 i') ; make the atomic call, passing it the final reduced value -ifelse(LLVM_VERSION, `LLVM_2_9',` - %final0 = call $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %red`'eval($1-1))', ` - %final0 = atomicrmw $2 $3 * %ptr, $3 %red`'eval($1-1) seq_cst') + %final0 = atomicrmw $2 $3 * %ptr, $3 %red`'eval($1-1) seq_cst ; now go back and compute the values to be returned for each program ; instance--this just involves smearing the old value returned from the @@ -768,38 +766,21 @@ ifelse(LLVM_VERSION, `LLVM_2_9',` ;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32) define(`global_atomic_uniform', ` - -ifelse(LLVM_VERSION, `LLVM_2_9',` -declare $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %delta) - -define $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val) nounwind alwaysinline { - %r = call $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %val) - ret $3 %r -} -', ` define $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val) nounwind alwaysinline { %r = atomicrmw $2 $3 * %ptr, $3 %val seq_cst ret $3 %r } ') -') - ;; Macro to declare the function that implements the swap atomic. ;; Takes three parameters: ;; $1: vector width of the target ;; $2: llvm type of the vector elements (e.g. i32) ;; $3: ispc type of the elements (e.g. int32) -ifelse(LLVM_VERSION, `LLVM_2_9',` -declare i32 @llvm.atomic.swap.i32.p0i32(i32 * %ptr, i32 %val) -declare i64 @llvm.atomic.swap.i64.p0i64(i64 * %ptr, i64 %val)') - define(`global_swap', ` define $2 @__atomic_swap_uniform_$3_global($2* %ptr, $2 %val) nounwind alwaysinline { -ifelse(LLVM_VERSION, `LLVM_2_9',` - %r = call $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val)', ` - %r = atomicrmw xchg $2 * %ptr, $2 %val seq_cst') + %r = atomicrmw xchg $2 * %ptr, $2 %val seq_cst ret $2 %r } ') @@ -813,9 +794,6 @@ ifelse(LLVM_VERSION, `LLVM_2_9',` define(`global_atomic_exchange', ` -ifelse(LLVM_VERSION, `LLVM_2_9',` -declare $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)') - define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, <$1 x $2> %val, <$1 x MASK> %mask) nounwind alwaysinline { %rptr = alloca <$1 x $2> @@ -824,10 +802,7 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, per_lane($1, <$1 x MASK> %mask, ` %cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE %val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE -ifelse(LLVM_VERSION, `LLVM_2_9',` - %r_LANE_ID = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp_LANE_ID, - $2 %val_LANE_ID)', ` - %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst') + %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst %rp_LANE_ID = getelementptr $2 * %rptr32, i32 LANE store $2 %r_LANE_ID, $2 * %rp_LANE_ID') @@ -837,9 +812,7 @@ ifelse(LLVM_VERSION, `LLVM_2_9',` define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp, $2 %val) nounwind alwaysinline { -ifelse(LLVM_VERSION, `LLVM_2_9',` - %r = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)', ` - %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst') + %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst ret $2 %r } ') @@ -873,30 +846,6 @@ define i64 @__count_leading_zeros_i64(i64) nounwind readnone alwaysinline { ;; prefetching define(`define_prefetches', ` -ifelse(LLVM_VERSION, `LLVM_2_9', -` -declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality) - -define void @__prefetch_read_uniform_1(i8 *) alwaysinline { - call void @llvm.prefetch(i8 * %0, i32 0, i32 3) - ret void -} - -define void @__prefetch_read_uniform_2(i8 *) alwaysinline { - call void @llvm.prefetch(i8 * %0, i32 0, i32 2) - ret void -} - -define void @__prefetch_read_uniform_3(i8 *) alwaysinline { - call void @llvm.prefetch(i8 * %0, i32 0, i32 1) - ret void -} - -define void @__prefetch_read_uniform_nt(i8 *) alwaysinline { - call void @llvm.prefetch(i8 * %0, i32 0, i32 0) - ret void -} -', ` declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality, i32 %cachetype) ; cachetype == 1 is dcache @@ -920,7 +869,6 @@ define void @__prefetch_read_uniform_nt(i8 *) alwaysinline { ret void } ') -') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; AOS/SOA conversion primitives