changing uniform_min and uniform_max implementations for avx targets

2014-03-05 20:18:36 +04:00
parent 9ab8f4e10e
commit 6738af0a0c
16 changed files with 183 additions and 56 deletions
--- a/builtins/target-avx-common.ll
+++ b/builtins/target-avx-common.ll
@@ -203,49 +203,51 @@ define void @__fastmath() nounwind alwaysinline {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

-declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
-
 define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
+  %cmp = fcmp ogt float %1, %0
+  %ret = select i1 %cmp, float %1, float %0
  ret float %ret
 }

 define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
+  %cmp = fcmp ogt float %1, %0
+  %ret = select i1 %cmp, float %0, float %1
  ret float %ret
 }

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; double precision min/max

-declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
-declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
-
 define double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
-  sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
+  %cmp = fcmp ogt double %1, %0
+  %ret = select i1 %cmp, double %0, double %1
  ret double %ret
 }

 define double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
-  sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
+  %cmp = fcmp ogt double %1, %0
+  %ret = select i1 %cmp, double %1, double %0
  ret double %ret
 }

+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; int min/max

-declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
-
 define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
+  %cmp = icmp sgt i32 %1, %0
+  %ret = select i1 %cmp, i32 %0, i32 %1
  ret i32 %ret
 }

 define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
+  %cmp = icmp sgt i32 %1, %0
+  %ret = select i1 %cmp, i32 %1, i32 %0
  ret i32 %ret
 }

@@ -253,16 +255,15 @@ define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unsigned int min/max

-declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
-
 define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
+  %cmp = icmp ugt i32 %1, %0
+  %ret = select i1 %cmp, i32 %0, i32 %1
  ret i32 %ret
 }

 define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
+  %cmp = icmp ugt i32 %1, %0
+  %ret = select i1 %cmp, i32 %1, i32 %0
  ret i32 %ret
 }

--- a/tests/max-double-1.ispc
+++ b/tests/max-double-1.ispc
@@ -0,0 +1,19 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = max(3 * a, (double)10.f);
+    RET[width()-1] = max(b, (double)100);
+}
+
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 3 * (1+programIndex);
+    RET[0] = 10;
+    RET[1] = 10;
+    RET[2] = 10;
+    RET[programCount-1] = 100;
+}
--- a/tests/max-double-2.ispc
+++ b/tests/max-double-2.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = max(-10 * (a-3), (double).1f);
+    RET[width() - 1] = max(-10 * b, (double)2);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = .1;
+    RET[0] = 20;
+    RET[1] = 10;
+    RET[programCount - 1] = 2;
+}
+
--- a/tests/max-float-1.ispc
+++ b/tests/max-float-1.ispc
@@ -3,9 +3,17 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
-    RET[programIndex] = max(10 * a, 10.f);
+    RET[programIndex] = max(3 * a, 10.f);
+    RET[width()-1] = max(b, 100);
 }

-export void result(uniform float RET[]) { RET[programIndex] = 10 * (1+programIndex); }
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 3 * (1+programIndex);
+    RET[0] = 10;
+    RET[1] = 10;
+    RET[2] = 10;
+    RET[programCount-1] = 100;
+}
--- a/tests/max-float-2.ispc
+++ b/tests/max-float-2.ispc
@@ -3,10 +3,16 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
-    RET[programIndex] = max(-10 * a, 10.f);
+    RET[programIndex] = max(-10 * (a-3), .1f);
+    RET[width() - 1] = max(-10 * b, 2);
 }

-export void result(uniform float RET[]) { RET[programIndex] = 10.; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = .1;
+    RET[0] = 20;
+    RET[1] = 10;
+    RET[programCount - 1] = 2;
+}

--- a/tests/max-int-1.ispc
+++ b/tests/max-int-1.ispc
@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
    int i = (int)a;
-    RET[programIndex] = max((int)200, i);
+    RET[programIndex] = max((int)2, i);
+    RET[width()-1] = max(10, (int)b);
 }

-export void result(uniform float RET[]) { RET[programIndex] = 200.; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = programIndex + 1;
+    RET[0] = 2;
+    RET[programCount-1] = 10;
+}

--- a/tests/max-int.ispc
+++ b/tests/max-int.ispc
@@ -3,11 +3,15 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
    int i = (int)a;
-    RET[programIndex] = max((int)-20, i);
+    RET[programIndex] = max((int)-2, -1 * i);
+    RET[width() - 1] = max(-2, -1 * (int)b);
 }

-export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = -2;
+    RET[0] = -1;
+}

--- a/tests/max-uint-1.ispc
+++ b/tests/max-uint-1.ispc
@@ -1,14 +1,16 @@

 export uniform int width() { return programCount; }

-export void f_f(uniform float r[], uniform float a[]) {
+export void f_fu(uniform float r[], uniform float a[], uniform float b) {
    unsigned int i = (unsigned int)a[programIndex];
    r[programIndex] = max((unsigned int)2, i);
+    r[width() - 1] = max((unsigned int)10, (unsigned int)b);
 }

 export void result(uniform float r[]) { 
    r[programIndex] = 1+programIndex;
    r[0] = 2;
+    r[programCount - 1] = 10;
 }


--- a/tests/min-double-1.ispc
+++ b/tests/min-double-1.ispc
@@ -0,0 +1,19 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = min(3 * a, (double)10.f);
+    RET[width()-1] = min(b, (double)100);
+}
+
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 10;
+    RET[0] = 3;
+    RET[1] = 6;
+    RET[2] = 9;
+    RET[programCount-1] = 5;
+}
--- a/tests/min-double-2.ispc
+++ b/tests/min-double-2.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = min(-10 * (a-3), (double).1f);
+    RET[width() - 1] = min(-10 * b, (double)2);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = -10 * (programIndex - 2);
+    RET[0] = .1;
+    RET[1] = .1;
+    RET[programCount - 1] = -50;
+}
+
--- a/tests/min-float-1.ispc
+++ b/tests/min-float-1.ispc
@@ -3,9 +3,17 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
-    RET[programIndex] = min(10 * a, 10.f);
+    RET[programIndex] = min(3 * a, 10.f);
+    RET[width()-1] = min(b, 100);
 }

-export void result(uniform float RET[]) { RET[programIndex] = 10.; }
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 10;
+    RET[0] = 3;
+    RET[1] = 6;
+    RET[2] = 9;
+    RET[programCount-1] = 5;
+}
--- a/tests/min-float-2.ispc
+++ b/tests/min-float-2.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    float a = aFOO[programIndex];
+    RET[programIndex] = min(-10 * (a-3), .1f);
+    RET[width() - 1] = min(-10 * b, 2);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = -10 * (programIndex - 2);
+    RET[0] = .1;
+    RET[1] = .1;
+    RET[programCount - 1] = -50;
+}
+
--- a/tests/min-float.ispc
+++ b/tests/min-float.ispc
@@ -1,11 +0,0 @@
-
-export uniform int width() { return programCount; }
-
-
-
-export void f_f(uniform float RET[], uniform float aFOO[]) {
-    float a = aFOO[programIndex];
-    RET[programIndex] = min(a, 200.f);
-}
-
-export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
--- a/tests/min-int-1.ispc
+++ b/tests/min-int-1.ispc
@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
    int i = (int)a;
-    RET[programIndex] = min((int)-20, i);
+    RET[programIndex] = min((int)2, i);
+    RET[width()-1] = min(10, (int)b);
 }

-export void result(uniform float RET[]) { RET[programIndex] = -20; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = 2;
+    RET[0] = 1;
+    RET[programCount-1] = 5;
+}

--- a/tests/min-int.ispc
+++ b/tests/min-int.ispc
@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }



-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
    int i = (int)a;
-    RET[programIndex] = min((int)200, i);
+    RET[programIndex] = min((int)-2, -1 * i);
+    RET[width() - 1] = min(-2, -1 * (int)b);
 }

-export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = - programIndex - 1;
+    RET[0] = -2;
+    RET[programCount - 1] = -5;
+}

--- a/tests/min-uint-1.ispc
+++ b/tests/min-uint-1.ispc
@@ -1,14 +1,16 @@

 export uniform int width() { return programCount; }

-export void f_f(uniform float result[], uniform float aa[]) {
-    unsigned int i = (unsigned int)aa[programIndex];
-    result[programIndex] = min((unsigned int)2, i);
+export void f_fu(uniform float r[], uniform float a[], uniform float b) {
+    unsigned int i = (unsigned int)a[programIndex];
+    r[programIndex] = min((unsigned int)2, i);
+    r[width() - 1] = min((unsigned int)10, (unsigned int)b);
 }

 export void result(uniform float r[]) { 
    r[programIndex] = 2;
    r[0] = 1;
+    r[programCount - 1] = 5;
 }