More AVX fixes

Fix RNG state initialization for 16-wide targets
Fix a number of bugs in reduce_add builtin implementations for AVX.
Fix some tests that had incorrect expected results for the 16-wide
  case.
This commit is contained in:
Matt Pharr
2011-09-06 15:53:11 -07:00
parent c76ef7b174
commit 4f451bd041
8 changed files with 24 additions and 22 deletions

View File

@@ -232,8 +232,8 @@ define internal float @__reduce_add_float(<16 x float>) nounwind readonly always
%v1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %va, <8 x float> %vb)
%v2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v1, <8 x float> %v1)
%v3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v2, <8 x float> %v2)
%scalar1 = extractelement <8 x float> %v2, i32 0
%scalar2 = extractelement <8 x float> %v2, i32 4
%scalar1 = extractelement <8 x float> %v3, i32 0
%scalar2 = extractelement <8 x float> %v3, i32 4
%sum = fadd float %scalar1, %scalar2
ret float %sum
}
@@ -316,7 +316,9 @@ define internal double @__reduce_add_double(<16 x double>) nounwind readonly alw
%sum0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %vab, <4 x double> %vcd)
%sum1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum0, <4 x double> %sum0)
%sum = extractelement <4 x double> %sum1, i32 0
%final0 = extractelement <4 x double> %sum1, i32 0
%final1 = extractelement <4 x double> %sum1, i32 2
%sum = fadd double %final0, %final1
ret double %sum
}

View File

@@ -294,10 +294,12 @@ define internal double @__reduce_add_double(<8 x double>) nounwind readonly alwa
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v1 = shufflevector <8 x double> %0, <8 x double> undef,
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
%sum01 = fadd <4 x double> %v0, %v1
%red0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum01, <4 x double> %sum01)
%red1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %red0, <4 x double> %red0)
%sum = extractelement <4 x double> %red1, i32 0
%sum0 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %v0, <4 x double> %v1)
%sum1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %sum0, <4 x double> %sum0)
%final0 = extractelement <4 x double> %sum1, i32 0
%final1 = extractelement <4 x double> %sum1, i32 2
%sum = fadd double %final0, %final1
ret double %sum
}

View File

@@ -2862,6 +2862,12 @@ static inline void seed_rng(reference uniform RNGState state, uniform unsigned i
seed = __seed4(state, 0, seed);
if (programCount == 8)
__seed4(state, 4, seed ^ 0xbeeff00d);
if (programCount == 16) {
__seed4(state, 4, seed ^ 0xbeeff00d);
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
}
}
static inline void fastmath() {

View File

@@ -11,5 +11,5 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) {
RET[programIndex] = 10;
RET[programIndex] = max(10, 1 + programIndex);
}

View File

@@ -9,7 +9,10 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
}
export void result(uniform float RET[]) {
uniform int pc[16] = { 1, 1, 2, 1, 2, 2, 3, 1, 1, 2, 2, 3, 2, 3, 3, 4 };
uniform int pc[16] = { 1, 1, 2, 1,
2, 2, 3, 1,
2, 2, 3, 2,
3, 3, 4, 1 };
RET[programIndex] = pc[programIndex];
}

View File

@@ -15,7 +15,7 @@ export void result(uniform float RET[]) {
uniform int x = -1234;
if (programCount == 4) x = 10;
else if (programCount == 8) x = 36;
else if (programCount == 16) x = 124;
else if (programCount == 16) x = 136;
RET[programIndex] = x;
}

View File

@@ -15,7 +15,7 @@ export void result(uniform float RET[]) {
uniform int x = -1234;
if (programCount == 4) x = 10;
else if (programCount == 8) x = 36;
else if (programCount == 16) x = 124;
else if (programCount == 16) x = 136;
RET[programIndex] = x;
}

View File

@@ -9,17 +9,6 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform float<5> z = c ? x : y;
RET[programIndex] = z[programIndex];
}
/*CO return x[y];*/
/*CO int index = aFOO[programIndex];*/
/*CO index = min(index, 3);*/
/*CO return x[index];*/
/*CO return x << 1;*/
/*CO return c[0] ? 1 : 0;*/
/*CO x = b;*/
/*CO y = b;*/
/*CO return x+y;*/
}
export void result(uniform float RET[]) {