Improve naming of masked load/store instructions in builtins.
Now, use _i32 suffixes, rather than _32, etc. Also cleaned up the m4 macro to generate these functions, using WIDTH to get the target width, etc.
This commit is contained in:
@@ -350,16 +350,16 @@ reduce_equal(4)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <4 x i32> * %0, align 4
|
||||
%newval = call <4 x i32> @__vselect_i32(<4 x i32> %val, <4 x i32> %1, <4 x i32> %mask)
|
||||
store <4 x i32> %newval, <4 x i32> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
define void @__masked_store_blend_i64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
%oldValue = load <4 x i64>* %ptr, align 8
|
||||
|
||||
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
||||
@@ -552,10 +552,10 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
|
||||
|
||||
masked_store_blend_8_16_by_4()
|
||||
|
||||
gen_masked_store(4, i8, 8)
|
||||
gen_masked_store(4, i16, 16)
|
||||
gen_masked_store(4, i32, 32)
|
||||
gen_masked_store(4, i64, 64)
|
||||
gen_masked_store(i8)
|
||||
gen_masked_store(i16)
|
||||
gen_masked_store(i32)
|
||||
gen_masked_store(i64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
@@ -565,10 +565,10 @@ load_and_broadcast(i16)
|
||||
load_and_broadcast(i32)
|
||||
load_and_broadcast(i64)
|
||||
|
||||
masked_load(4, i8, 8, 1)
|
||||
masked_load(4, i16, 16, 2)
|
||||
masked_load(4, i32, 32, 4)
|
||||
masked_load(4, i64, 64, 8)
|
||||
masked_load(i8, 1)
|
||||
masked_load(i16, 2)
|
||||
masked_load(i32, 4)
|
||||
masked_load(i64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
Reference in New Issue
Block a user