Add separate set of builtins for AVX2.

(i.e., stop just reusing the ones for AVX1). For now the only difference is that the int/uint min/max functions call the new intrinsic for that. Once gather is available from LLVM, that will go here as well.
2012-01-13 14:39:33 -08:00
parent 0f8eee9809
commit 58a0b4a20d
9 changed files with 374 additions and 101 deletions
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -170,33 +170,6 @@ define <16 x float> @__min_varying_float(<16 x float>,
 }


-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; int min/max
-
-define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
-  binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
-  ret <16 x i32> %ret
-}
-
-define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
-  binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
-  ret <16 x i32> %ret
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; unsigned int min/max
-
-define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
-  binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
-  ret <16 x i32> %ret
-}
-
-define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
-  binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
-  ret <16 x i32> %ret
-}
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops

@@ -622,12 +595,7 @@ define void @__masked_store_blend_64(<16 x i64>* nocapture %ptr, <16 x i64> %new
 }

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; gather/scatter
-
-gen_gather(16, i8)
-gen_gather(16, i16)
-gen_gather(16, i32)
-gen_gather(16, i64)
+;; scatter

 gen_scatter(16, i8)
 gen_scatter(16, i16)