From 6738af0a0c159e96cf766ab7aeda6c77e7056eb4 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 5 Mar 2014 20:18:36 +0400 Subject: [PATCH] changing uniform_min and uniform_max implementations for avx targets --- builtins/target-avx-common.ll | 41 ++++++++++++++++++----------------- tests/max-double-1.ispc | 19 ++++++++++++++++ tests/max-double-2.ispc | 18 +++++++++++++++ tests/max-float-1.ispc | 14 +++++++++--- tests/max-float-2.ispc | 12 +++++++--- tests/max-int-1.ispc | 11 +++++++--- tests/max-int.ispc | 10 ++++++--- tests/max-uint-1.ispc | 4 +++- tests/min-double-1.ispc | 19 ++++++++++++++++ tests/min-double-2.ispc | 18 +++++++++++++++ tests/min-float-1.ispc | 14 +++++++++--- tests/min-float-2.ispc | 18 +++++++++++++++ tests/min-float.ispc | 11 ---------- tests/min-int-1.ispc | 11 +++++++--- tests/min-int.ispc | 11 +++++++--- tests/min-uint-1.ispc | 8 ++++--- 16 files changed, 183 insertions(+), 56 deletions(-) create mode 100644 tests/max-double-1.ispc create mode 100644 tests/max-double-2.ispc create mode 100644 tests/min-double-1.ispc create mode 100644 tests/min-double-2.ispc create mode 100644 tests/min-float-2.ispc delete mode 100644 tests/min-float.ispc diff --git a/builtins/target-avx-common.ll b/builtins/target-avx-common.ll index 1c467476..54656d9f 100644 --- a/builtins/target-avx-common.ll +++ b/builtins/target-avx-common.ll @@ -203,49 +203,51 @@ define void @__fastmath() nounwind alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max -declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone -declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone - define float @__max_uniform_float(float, float) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1) + %cmp = fcmp ogt float %1, %0 + %ret = select i1 %cmp, float %1, float %0 ret float %ret } define float @__min_uniform_float(float, float) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1) + %cmp = fcmp ogt float %1, %0 + %ret = select i1 %cmp, float %0, float %1 ret float %ret } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; double precision min/max -declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone -declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone - define double @__min_uniform_double(double, double) nounwind readnone alwaysinline { - sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1) + %cmp = fcmp ogt double %1, %0 + %ret = select i1 %cmp, double %0, double %1 ret double %ret } define double @__max_uniform_double(double, double) nounwind readnone alwaysinline { - sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1) + %cmp = fcmp ogt double %1, %0 + %ret = select i1 %cmp, double %1, double %0 ret double %ret } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max -declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone - define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1) + %cmp = icmp sgt i32 %1, %0 + %ret = select i1 %cmp, i32 %0, i32 %1 ret i32 %ret } define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1) + %cmp = icmp sgt i32 %1, %0 + %ret = select i1 %cmp, i32 %1, i32 %0 ret i32 %ret } @@ -253,16 +255,15 @@ define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unsigned int min/max -declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone - define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1) + %cmp = icmp ugt i32 %1, %0 + %ret = select i1 %cmp, i32 %0, i32 %1 ret i32 %ret } define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1) + %cmp = icmp ugt i32 %1, %0 + %ret = select i1 %cmp, i32 %1, i32 %0 ret i32 %ret } diff --git a/tests/max-double-1.ispc b/tests/max-double-1.ispc new file mode 100644 index 00000000..e9c4a6a3 --- /dev/null +++ b/tests/max-double-1.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = max(3 * a, (double)10.f); + RET[width()-1] = max(b, (double)100); +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 3 * (1+programIndex); + RET[0] = 10; + RET[1] = 10; + RET[2] = 10; + RET[programCount-1] = 100; +} diff --git a/tests/max-double-2.ispc b/tests/max-double-2.ispc new file mode 100644 index 00000000..5f4c854e --- /dev/null +++ b/tests/max-double-2.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = max(-10 * (a-3), (double).1f); + RET[width() - 1] = max(-10 * b, (double)2); +} + +export void result(uniform float RET[]) { + RET[programIndex] = .1; + RET[0] = 20; + RET[1] = 10; + RET[programCount - 1] = 2; +} + diff --git a/tests/max-float-1.ispc b/tests/max-float-1.ispc index b77de7e3..24b9822d 100644 --- a/tests/max-float-1.ispc +++ b/tests/max-float-1.ispc @@ -3,9 +3,17 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - RET[programIndex] = max(10 * a, 10.f); + RET[programIndex] = max(3 * a, 10.f); + RET[width()-1] = max(b, 100); } -export void result(uniform float RET[]) { RET[programIndex] = 10 * (1+programIndex); } + +export void result(uniform float RET[]) { + RET[programIndex] = 3 * (1+programIndex); + RET[0] = 10; + RET[1] = 10; + RET[2] = 10; + RET[programCount-1] = 100; +} diff --git a/tests/max-float-2.ispc b/tests/max-float-2.ispc index ca025c2f..f990b102 100644 --- a/tests/max-float-2.ispc +++ b/tests/max-float-2.ispc @@ -3,10 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - RET[programIndex] = max(-10 * a, 10.f); + RET[programIndex] = max(-10 * (a-3), .1f); + RET[width() - 1] = max(-10 * b, 2); } -export void result(uniform float RET[]) { RET[programIndex] = 10.; } +export void result(uniform float RET[]) { + RET[programIndex] = .1; + RET[0] = 20; + RET[1] = 10; + RET[programCount - 1] = 2; +} diff --git a/tests/max-int-1.ispc b/tests/max-int-1.ispc index f1492b8b..7a565d4c 100644 --- a/tests/max-int-1.ispc +++ b/tests/max-int-1.ispc @@ -3,11 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = max((int)200, i); + RET[programIndex] = max((int)2, i); + RET[width()-1] = max(10, (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = 200.; } +export void result(uniform float RET[]) { + RET[programIndex] = programIndex + 1; + RET[0] = 2; + RET[programCount-1] = 10; +} diff --git a/tests/max-int.ispc b/tests/max-int.ispc index 3a4bb641..783a9274 100644 --- a/tests/max-int.ispc +++ b/tests/max-int.ispc @@ -3,11 +3,15 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = max((int)-20, i); + RET[programIndex] = max((int)-2, -1 * i); + RET[width() - 1] = max(-2, -1 * (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } +export void result(uniform float RET[]) { + RET[programIndex] = -2; + RET[0] = -1; +} diff --git a/tests/max-uint-1.ispc b/tests/max-uint-1.ispc index d1143f5d..78a66625 100644 --- a/tests/max-uint-1.ispc +++ b/tests/max-uint-1.ispc @@ -1,14 +1,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float r[], uniform float a[]) { +export void f_fu(uniform float r[], uniform float a[], uniform float b) { unsigned int i = (unsigned int)a[programIndex]; r[programIndex] = max((unsigned int)2, i); + r[width() - 1] = max((unsigned int)10, (unsigned int)b); } export void result(uniform float r[]) { r[programIndex] = 1+programIndex; r[0] = 2; + r[programCount - 1] = 10; } diff --git a/tests/min-double-1.ispc b/tests/min-double-1.ispc new file mode 100644 index 00000000..813a99fe --- /dev/null +++ b/tests/min-double-1.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = min(3 * a, (double)10.f); + RET[width()-1] = min(b, (double)100); +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 10; + RET[0] = 3; + RET[1] = 6; + RET[2] = 9; + RET[programCount-1] = 5; +} diff --git a/tests/min-double-2.ispc b/tests/min-double-2.ispc new file mode 100644 index 00000000..26609b81 --- /dev/null +++ b/tests/min-double-2.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = min(-10 * (a-3), (double).1f); + RET[width() - 1] = min(-10 * b, (double)2); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -10 * (programIndex - 2); + RET[0] = .1; + RET[1] = .1; + RET[programCount - 1] = -50; +} + diff --git a/tests/min-float-1.ispc b/tests/min-float-1.ispc index 914ae994..5b62c5c5 100644 --- a/tests/min-float-1.ispc +++ b/tests/min-float-1.ispc @@ -3,9 +3,17 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - RET[programIndex] = min(10 * a, 10.f); + RET[programIndex] = min(3 * a, 10.f); + RET[width()-1] = min(b, 100); } -export void result(uniform float RET[]) { RET[programIndex] = 10.; } + +export void result(uniform float RET[]) { + RET[programIndex] = 10; + RET[0] = 3; + RET[1] = 6; + RET[2] = 9; + RET[programCount-1] = 5; +} diff --git a/tests/min-float-2.ispc b/tests/min-float-2.ispc new file mode 100644 index 00000000..85c226ca --- /dev/null +++ b/tests/min-float-2.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + + + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + float a = aFOO[programIndex]; + RET[programIndex] = min(-10 * (a-3), .1f); + RET[width() - 1] = min(-10 * b, 2); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -10 * (programIndex - 2); + RET[0] = .1; + RET[1] = .1; + RET[programCount - 1] = -50; +} + diff --git a/tests/min-float.ispc b/tests/min-float.ispc deleted file mode 100644 index caedd962..00000000 --- a/tests/min-float.ispc +++ /dev/null @@ -1,11 +0,0 @@ - -export uniform int width() { return programCount; } - - - -export void f_f(uniform float RET[], uniform float aFOO[]) { - float a = aFOO[programIndex]; - RET[programIndex] = min(a, 200.f); -} - -export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } diff --git a/tests/min-int-1.ispc b/tests/min-int-1.ispc index 1c81936f..86f0821d 100644 --- a/tests/min-int-1.ispc +++ b/tests/min-int-1.ispc @@ -3,11 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = min((int)-20, i); + RET[programIndex] = min((int)2, i); + RET[width()-1] = min(10, (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = -20; } +export void result(uniform float RET[]) { + RET[programIndex] = 2; + RET[0] = 1; + RET[programCount-1] = 5; +} diff --git a/tests/min-int.ispc b/tests/min-int.ispc index 483b9b41..7f97e28c 100644 --- a/tests/min-int.ispc +++ b/tests/min-int.ispc @@ -3,11 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = min((int)200, i); + RET[programIndex] = min((int)-2, -1 * i); + RET[width() - 1] = min(-2, -1 * (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } +export void result(uniform float RET[]) { + RET[programIndex] = - programIndex - 1; + RET[0] = -2; + RET[programCount - 1] = -5; +} diff --git a/tests/min-uint-1.ispc b/tests/min-uint-1.ispc index d1cd4461..042382f0 100644 --- a/tests/min-uint-1.ispc +++ b/tests/min-uint-1.ispc @@ -1,14 +1,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float result[], uniform float aa[]) { - unsigned int i = (unsigned int)aa[programIndex]; - result[programIndex] = min((unsigned int)2, i); +export void f_fu(uniform float r[], uniform float a[], uniform float b) { + unsigned int i = (unsigned int)a[programIndex]; + r[programIndex] = min((unsigned int)2, i); + r[width() - 1] = min((unsigned int)10, (unsigned int)b); } export void result(uniform float r[]) { r[programIndex] = 2; r[0] = 1; + r[programCount - 1] = 5; }