Fix generic code emission when building with LLVM3.0/2.9.

Specifically, don't use vector select for masked store blend there,
but emit a call to a undefined __masked_store_blend_*() functions.

Added implementations of these functions to the sse4.h and generic-16.h
in examples/instrinsics.  (Calls to these will never be generated with
LLVM 3.1).
This commit is contained in:
Matt Pharr
2012-01-17 18:52:56 -08:00
parent 642150095d
commit d14a2de168
3 changed files with 60 additions and 6 deletions

View File

@@ -2439,7 +2439,8 @@ static FORCEINLINE void __masked_store_8(void *p, __vec4_i8 val,
ptr[3] = _mm_extract_epi8(val.v, 3);
}
static FORCEINLINE void __masked_store_16(void *p, __vec4_i16 val, __vec4_i1 mask) {
static FORCEINLINE void __masked_store_16(void *p, __vec4_i16 val,
__vec4_i1 mask) {
int16_t *ptr = (int16_t *)p;
uint32_t m = _mm_extract_ps(mask.v, 0);
@@ -2499,6 +2500,28 @@ static FORCEINLINE void __masked_store_64(void *p, __vec4_i64 val,
ptr[3] = _mm_extract_epi64(val.v[1], 1);
}
static FORCEINLINE void __masked_store_blend_8(void *p, __vec4_i8 val,
__vec4_i1 mask) {
__masked_store_8(p, val, mask);
}
static FORCEINLINE void __masked_store_blend_16(void *p, __vec4_i16 val,
__vec4_i1 mask) {
__masked_store_16(p, val, mask);
}
static FORCEINLINE void __masked_store_blend_32(void *p, __vec4_i32 val,
__vec4_i1 mask) {
// FIXME: do a load, blendvps, store here...
__masked_store_32(p, val, mask);
}
static FORCEINLINE void __masked_store_blend_64(void *p, __vec4_i64 val,
__vec4_i1 mask) {
// FIXME: do a 2x (load, blendvps, store) here...
__masked_store_64(p, val, mask);
}
///////////////////////////////////////////////////////////////////////////
// gather/scatter
// offsets * offsetScale is in bytes (for all of these)