diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 81127897..533def68 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -233,7 +233,7 @@ declare @__masked_load_32(i8 * nocapture, %mask) noun declare @__masked_load_64(i8 * nocapture, %mask) nounwind readonly declare void @__masked_store_8(* nocapture, , - ) nounwind + ) nounwind declare void @__masked_store_16(* nocapture, , ) nounwind declare void @__masked_store_32(* nocapture, , @@ -241,8 +241,9 @@ declare void @__masked_store_32(* nocapture, , declare void @__masked_store_64(* nocapture, , %mask) nounwind +ifelse(LLVM_VERSION, `LLVM_3_1svn',` define void @__masked_store_blend_8(* nocapture, , - ) nounwind { + ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 @@ -250,7 +251,7 @@ define void @__masked_store_blend_8(* nocapture, , } define void @__masked_store_blend_16(* nocapture, , - ) nounwind { + ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 @@ -258,7 +259,7 @@ define void @__masked_store_blend_16(* nocapture, , } define void @__masked_store_blend_32(* nocapture, , - ) nounwind { + ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 @@ -266,12 +267,22 @@ define void @__masked_store_blend_32(* nocapture, , } define void @__masked_store_blend_64(* nocapture, - , ) nounwind { + , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } +',` +declare void @__masked_store_blend_8(* nocapture, , + ) nounwind +declare void @__masked_store_blend_16(* nocapture, , + ) nounwind +declare void @__masked_store_blend_32(* nocapture, , + ) nounwind +declare void @__masked_store_blend_64(* nocapture, , + %mask) nounwind +') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather/scatter diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 3ac77a41..c7600918 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -1010,6 +1010,26 @@ static FORCEINLINE void __masked_store_64(void *p, __vec16_i64 val, ptr[i] = val.v[i]; } +static FORCEINLINE void __masked_store_blend_8(void *p, __vec16_i8 val, + __vec16_i1 mask) { + __masked_store_8(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_16(void *p, __vec16_i16 val, + __vec16_i1 mask) { + __masked_store_16(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_32(void *p, __vec16_i32 val, + __vec16_i1 mask) { + __masked_store_32(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_64(void *p, __vec16_i64 val, + __vec16_i1 mask) { + __masked_store_64(p, val, mask); +} + /////////////////////////////////////////////////////////////////////////// // gather/scatter diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 5e5131f3..81444ecb 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -2439,7 +2439,8 @@ static FORCEINLINE void __masked_store_8(void *p, __vec4_i8 val, ptr[3] = _mm_extract_epi8(val.v, 3); } -static FORCEINLINE void __masked_store_16(void *p, __vec4_i16 val, __vec4_i1 mask) { +static FORCEINLINE void __masked_store_16(void *p, __vec4_i16 val, + __vec4_i1 mask) { int16_t *ptr = (int16_t *)p; uint32_t m = _mm_extract_ps(mask.v, 0); @@ -2499,6 +2500,28 @@ static FORCEINLINE void __masked_store_64(void *p, __vec4_i64 val, ptr[3] = _mm_extract_epi64(val.v[1], 1); } +static FORCEINLINE void __masked_store_blend_8(void *p, __vec4_i8 val, + __vec4_i1 mask) { + __masked_store_8(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_16(void *p, __vec4_i16 val, + __vec4_i1 mask) { + __masked_store_16(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_32(void *p, __vec4_i32 val, + __vec4_i1 mask) { + // FIXME: do a load, blendvps, store here... + __masked_store_32(p, val, mask); +} + +static FORCEINLINE void __masked_store_blend_64(void *p, __vec4_i64 val, + __vec4_i1 mask) { + // FIXME: do a 2x (load, blendvps, store) here... + __masked_store_64(p, val, mask); +} + /////////////////////////////////////////////////////////////////////////// // gather/scatter // offsets * offsetScale is in bytes (for all of these)