Add separate variants of memory built-ins for floats and doubles.

Previously, we'd bitcast e.g. a vector of floats to a vector of i32s and then use the i32 variant of masked_load/masked_store/gather/scatter. Now, we have separate float/double variants of each of those.
2012-06-07 14:29:17 -07:00
parent 1ac3e03171
commit 89a2566e01
17 changed files with 593 additions and 41 deletions
--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -401,6 +401,8 @@ define void @__masked_store_blend_i64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
 }


+masked_store_float_double()
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; rcp

@@ -563,12 +565,16 @@ gen_masked_store(i64)
 load_and_broadcast(i8)
 load_and_broadcast(i16)
 load_and_broadcast(i32)
+load_and_broadcast(float)
 load_and_broadcast(i64)
+load_and_broadcast(double)

 masked_load(i8,  1)
 masked_load(i16, 2)
 masked_load(i32, 4)
+masked_load(float, 4)
 masked_load(i64, 8)
+masked_load(double, 8)

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; gather/scatter
@@ -578,9 +584,13 @@ masked_load(i64, 8)
 gen_gather(i8)
 gen_gather(i16)
 gen_gather(i32)
+gen_gather(float)
 gen_gather(i64)
+gen_gather(double)

 gen_scatter(i8)
 gen_scatter(i16)
 gen_scatter(i32)
+gen_scatter(float)
 gen_scatter(i64)
+gen_scatter(double)