Merge pull request #321 from mmp/setzero

More varied support for constant vectors from C++ backend.
This commit is contained in:
Jean-Luc Duprat
2012-07-09 08:57:05 -07:00
7 changed files with 330 additions and 104 deletions

View File

@@ -477,10 +477,18 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v
*ptr = v.m;
}
static FORCEINLINE __vec16_i1 __smear_i1(__vec16_i1, int i) {
static FORCEINLINE __vec16_i1 __smear_i1(int i) {
return i?0xFFFF:0x0;
}
static FORCEINLINE __vec16_i1 __setzero_i1() {
return 0;
}
static FORCEINLINE __vec16_i1 __undef_i1() {
return __vec16_i1(); // FIXME? __mm512_undef_mask();
}
///////////////////////////////////////////////////////////////////////////
// int8
@@ -686,10 +694,18 @@ static FORCEINLINE __vec16_i32 __select(bool cond, __vec16_i32 a, __vec16_i32 b)
static FORCEINLINE int32_t __extract_element(__vec16_i32 v, int index) { return ((int32_t *)&v)[index]; }
static FORCEINLINE void __insert_element(__vec16_i32 *v, int index, int32_t val) { ((int32_t *)v)[index] = val; }
static FORCEINLINE __vec16_i32 __smear_i32(__vec16_i32, int32_t i) {
static FORCEINLINE __vec16_i32 __smear_i32(int32_t i) {
return _mm512_set_1to16_epi32(i);
}
static FORCEINLINE __vec16_i32 __setzero_i32() {
return _mm512_setzero_epi32();
}
static FORCEINLINE __vec16_i32 __undef_i32() {
return _mm512_undefined_epi32();
}
static FORCEINLINE __vec16_i32 __broadcast_i32(__vec16_i32 v, int index) {
int32_t val = __extract_element(v, index & 0xf);
return _mm512_set_1to16_epi32(val);
@@ -966,10 +982,18 @@ static FORCEINLINE void __insert_element(__vec16_f *v, int index, float val) {
((float *)v)[index] = val;
}
static FORCEINLINE __vec16_f __smear_float(__vec16_f, float f) {
static FORCEINLINE __vec16_f __smear_float(float f) {
return _mm512_set_1to16_ps(f);
}
static FORCEINLINE __vec16_f __setzero_float() {
return _mm512_setzero_ps();
}
static FORCEINLINE __vec16_f __undef_float() {
return _mm512_undefined_ps();
}
static FORCEINLINE __vec16_f __broadcast_float(__vec16_f v, int index) {
int32_t val = __extract_element(v, index & 0xf);
return _mm512_set_1to16_ps(val);
@@ -1116,13 +1140,27 @@ static FORCEINLINE void __insert_element(__vec16_d *v, int index, double val) {
((double *)v)[index] = val;
}
static FORCEINLINE __vec16_d __smear_double(__vec16_d, double d) {
static FORCEINLINE __vec16_d __smear_double(double d) {
__vec16_d ret;
ret.v1 = _mm512_extload_pd(&d, _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
ret.v2 = ret.v1;
return ret;
}
static FORCEINLINE __vec16_d __setzero_double() {
__vec16_d ret;
ret.v1 = _mm512_setzero_pd();
ret.v2 = ret.v1;
return ret;
}
static FORCEINLINE __vec16_d __undef_double() {
__vec16_d ret;
ret.v1 = _mm512_undefined_pd();
ret.v2 = ret.v1;
return ret;
}
static FORCEINLINE __vec16_d __broadcast_double(__vec16_d v, int index) {
__vec16_d ret;
int32_t val = __extract_element(v, index & 0xf);