diff --git a/builtins.cpp b/builtins.cpp index 81a9a64b..0193e8d3 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -487,8 +487,6 @@ lSetInternalFunctions(llvm::Module *module) { "__reduce_add_float", "__reduce_add_int32", "__reduce_add_int64", - "__reduce_add_uint32", - "__reduce_add_uint64", "__reduce_equal_double", "__reduce_equal_float", "__reduce_equal_int32", diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index 55a1c802..8c6b7753 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -272,7 +272,7 @@ reduce_equal(16) ;; horizontal int32 ops define <16 x i32> @__add_varying_int32(<16 x i32>, - <16 x i32>) nounwind readnone alwaysinline { + <16 x i32>) nounwind readnone alwaysinline { %s = add <16 x i32> %0, %1 ret <16 x i32> %s } @@ -300,11 +300,6 @@ define i32 @__reduce_max_int32(<16 x i32>) nounwind readnone alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; horizontal uint32 ops -define i32 @__reduce_add_uint32(<16 x i32> %v) nounwind readnone alwaysinline { - %r = call i32 @__reduce_add_int32(<16 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<16 x i32>) nounwind readnone alwaysinline { reduce16(i32, @__min_varying_uint32, @__min_uniform_uint32) } @@ -382,11 +377,6 @@ define i64 @__reduce_max_int64(<16 x i64>) nounwind readnone alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; horizontal uint64 ops -define i64 @__reduce_add_uint64(<16 x i64> %v) nounwind readnone alwaysinline { - %r = call i64 @__reduce_add_int64(<16 x i64> %v) - ret i64 %r -} - define i64 @__reduce_min_uint64(<16 x i64>) nounwind readnone alwaysinline { reduce16(i64, @__min_varying_uint64, @__min_uniform_uint64) } diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 3283376a..e6ab3a4b 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -261,11 +261,6 @@ define i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; horizontal uint32 ops -define i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline { - %r = call i32 @__reduce_add_int32(<8 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline { reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32) } @@ -337,11 +332,6 @@ define i64 @__reduce_max_int64(<8 x i64>) nounwind readnone alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; horizontal uint64 ops -define i64 @__reduce_add_uint64(<8 x i64> %v) nounwind readnone alwaysinline { - %r = call i64 @__reduce_add_int64(<8 x i64> %v) - ret i64 %r -} - define i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone alwaysinline { reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64) } diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index e18bc0ff..9b747e2e 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -502,11 +502,6 @@ define i32 @__reduce_max_int32(<1 x i32>) nounwind readnone { ret i32 %r } -define i32 @__reduce_add_uint32(<1 x i32> %v) nounwind readnone { - %r = call i32 @__reduce_add_int32(<1 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<1 x i32>) nounwind readnone { %r = extractelement <1 x i32> %0, i32 0 ret i32 %r diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index fbce2531..25421893 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -234,7 +234,6 @@ declare i32 @__reduce_add_int32() nounwind readnone declare i32 @__reduce_min_int32() nounwind readnone declare i32 @__reduce_max_int32() nounwind readnone -declare i32 @__reduce_add_uint32() nounwind readnone declare i32 @__reduce_min_uint32() nounwind readnone declare i32 @__reduce_max_uint32() nounwind readnone @@ -246,7 +245,6 @@ declare i64 @__reduce_add_int64() nounwind readnone declare i64 @__reduce_min_int64() nounwind readnone declare i64 @__reduce_max_int64() nounwind readnone -declare i64 @__reduce_add_uint64() nounwind readnone declare i64 @__reduce_min_uint64() nounwind readnone declare i64 @__reduce_max_uint64() nounwind readnone diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 3910dfdb..73361720 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -415,11 +415,6 @@ define i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline { reduce8(i32, @__max_varying_int32, @__max_uniform_int32) } -define i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline { - %r = call i32 @__reduce_add_int32(<8 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline { reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32) } diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 618a61d9..2bb06391 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -303,18 +303,13 @@ define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone { reduce4(i32, @__max_varying_int32, @__max_uniform_int32) } -define i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone { - %r = call i32 @__reduce_add_int32(<4 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone { reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32) } define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone { reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32) - } +} define double @__reduce_add_double(<4 x double>) nounwind readnone { diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 81efc5cb..ccae4d51 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -342,11 +342,6 @@ define i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline { reduce8by4(i32, @llvm.x86.sse41.pmaxsd, @__max_uniform_int32) } -define i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline { - %r = call i32 @__reduce_add_int32(<8 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline { reduce8by4(i32, @llvm.x86.sse41.pminud, @__min_uniform_uint32) } diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index f638d220..f622b839 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -334,18 +334,13 @@ define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone { reduce4(i32, @__max_varying_int32, @__max_uniform_int32) } -define i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone { - %r = call i32 @__reduce_add_int32(<4 x i32> %v) - ret i32 %r -} - define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone { reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32) } define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone { reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32) - } +} define double @__reduce_add_double(<4 x double>) nounwind readnone { diff --git a/stdlib.ispc b/stdlib.ispc index 3774c4a4..80f44e91 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -922,7 +922,7 @@ __declspec(safe) static inline uniform unsigned int reduce_add(unsigned int x) { // Set values for non-running lanes to zero so they don't affect the // result. - return __reduce_add_uint32(__mask ? x : 0); + return __reduce_add_int32(__mask ? x : 0); } __declspec(safe)