Merge pull request #1038 from Vsevolod-Livinskij/round_var_double

Add roundings for varying double in knc.h and knl.h
This commit is contained in:
Dmitry Babokin
2015-05-14 18:19:15 +03:00
7 changed files with 99 additions and 0 deletions

View File

@@ -2675,6 +2675,27 @@ static FORCEINLINE __vec16_f __ceil_varying_float(__vec16_f v) {
return _mm512_ceil_ps(v);
}
static FORCEINLINE __vec16_d __round_varying_double(__vec16_d v) {
double tmp [16];
for (int i = 0; i < 16; i++)
tmp [i] = round(v [i]);
return __vec16_d (tmp [8], tmp [9], tmp [10], tmp [11],
tmp [12], tmp [13], tmp [14], tmp [15],
tmp [0], tmp [1], tmp [2], tmp [3],
tmp [4], tmp [5], tmp [6], tmp [7]);
}
static FORCEINLINE __vec16_d __floor_varying_double(__vec16_d v) {
__m512d tmp1 = _mm512_floor_pd(v.v1);
__m512d tmp2 = _mm512_floor_pd(v.v2);
return __vec16_d (tmp1, tmp2);
}
static FORCEINLINE __vec16_d __ceil_varying_double(__vec16_d v) {
__m512d tmp1 = _mm512_ceil_pd(v.v1);
__m512d tmp2 = _mm512_ceil_pd(v.v2);
return __vec16_d (tmp1, tmp2);
}
// min/max
static FORCEINLINE float __min_uniform_float(float a, float b) { return (a<b) ? a : b; }

View File

@@ -2536,6 +2536,24 @@ static FORCEINLINE __vec16_f __ceil_varying_float(__vec16_f v) {
return _mm512_ceil_ps(v);
}
static FORCEINLINE __vec16_d __round_varying_double(__vec16_d v) {
__m512d tmp1 =_mm512_cvtepi32_pd(_mm512_cvtpd_epi32(v.v_lo));
__m512d tmp2 =_mm512_cvtepi32_pd(_mm512_cvtpd_epi32(v.v_hi));
return __vec16_d (tmp1, tmp2);
}
static FORCEINLINE __vec16_d __floor_varying_double(__vec16_d v) {
__m512d tmp1 = _mm512_floor_pd(v.v_lo);
__m512d tmp2 = _mm512_floor_pd(v.v_hi);
return __vec16_d (tmp1, tmp2);
}
static FORCEINLINE __vec16_d __ceil_varying_double(__vec16_d v) {
__m512d tmp1 = _mm512_ceil_pd(v.v_lo);
__m512d tmp2 = _mm512_ceil_pd(v.v_hi);
return __vec16_d (tmp1, tmp2);
}
// min/max
static FORCEINLINE float __min_uniform_float(float a, float b) { return (a<b) ? a : b; }

12
tests/test-149.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = aFOO[programIndex];
RET[programIndex] = round(a+.49999);
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (double) programIndex;
}

12
tests/test-150.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = aFOO[programIndex];
RET[programIndex] = round(a+(0.50001d));
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex + 2;
}

12
tests/test-151.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = aFOO[programIndex];
RET[programIndex] = floor(a+.99999);
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 +(double) programIndex;
}

12
tests/test-152.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = aFOO[programIndex];
RET[programIndex] = ceil(a-1e-5);
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + (double) programIndex;
}

12
tests/test-153.ispc Normal file
View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = aFOO[programIndex];
RET[programIndex] = ceil(-a+1e-5);
}
export void result(uniform float RET[]) {
RET[programIndex] = - (double) programIndex;
}