Merge pull request #761 from ifilippov/max_min

Changing uniform_min and uniform_max implementations for avx targets
This commit is contained in:
Dmitry Babokin
2014-03-06 14:24:04 +03:00
16 changed files with 183 additions and 56 deletions

View File

@@ -203,49 +203,51 @@ define void @__fastmath() nounwind alwaysinline {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float min/max ;; float min/max
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline { define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1) %cmp = fcmp ogt float %1, %0
%ret = select i1 %cmp, float %1, float %0
ret float %ret ret float %ret
} }
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline { define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1) %cmp = fcmp ogt float %1, %0
%ret = select i1 %cmp, float %0, float %1
ret float %ret ret float %ret
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; double precision min/max ;; double precision min/max
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
define double @__min_uniform_double(double, double) nounwind readnone alwaysinline { define double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1) %cmp = fcmp ogt double %1, %0
%ret = select i1 %cmp, double %0, double %1
ret double %ret ret double %ret
} }
define double @__max_uniform_double(double, double) nounwind readnone alwaysinline { define double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1) %cmp = fcmp ogt double %1, %0
%ret = select i1 %cmp, double %1, double %0
ret double %ret ret double %ret
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int min/max ;; int min/max
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline { define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1) %cmp = icmp sgt i32 %1, %0
%ret = select i1 %cmp, i32 %0, i32 %1
ret i32 %ret ret i32 %ret
} }
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline { define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1) %cmp = icmp sgt i32 %1, %0
%ret = select i1 %cmp, i32 %1, i32 %0
ret i32 %ret ret i32 %ret
} }
@@ -253,16 +255,15 @@ define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unsigned int min/max ;; unsigned int min/max
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline { define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1) %cmp = icmp ugt i32 %1, %0
%ret = select i1 %cmp, i32 %0, i32 %1
ret i32 %ret ret i32 %ret
} }
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1) %cmp = icmp ugt i32 %1, %0
%ret = select i1 %cmp, i32 %1, i32 %0
ret i32 %ret ret i32 %ret
} }

19
tests/max-double-1.ispc Normal file
View File

@@ -0,0 +1,19 @@
export uniform int width() { return programCount; }
export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
double a = aFOO[programIndex];
RET[programIndex] = max(3 * a, (double)10.f);
RET[width()-1] = max(b, (double)100);
}
export void result(uniform float RET[]) {
RET[programIndex] = 3 * (1+programIndex);
RET[0] = 10;
RET[1] = 10;
RET[2] = 10;
RET[programCount-1] = 100;
}

18
tests/max-double-2.ispc Normal file
View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
double a = aFOO[programIndex];
RET[programIndex] = max(-10 * (a-3), (double).1f);
RET[width() - 1] = max(-10 * b, (double)2);
}
export void result(uniform float RET[]) {
RET[programIndex] = .1;
RET[0] = 20;
RET[1] = 10;
RET[programCount - 1] = 2;
}

View File

@@ -3,9 +3,17 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
RET[programIndex] = max(10 * a, 10.f); RET[programIndex] = max(3 * a, 10.f);
RET[width()-1] = max(b, 100);
} }
export void result(uniform float RET[]) { RET[programIndex] = 10 * (1+programIndex); }
export void result(uniform float RET[]) {
RET[programIndex] = 3 * (1+programIndex);
RET[0] = 10;
RET[1] = 10;
RET[2] = 10;
RET[programCount-1] = 100;
}

View File

@@ -3,10 +3,16 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
RET[programIndex] = max(-10 * a, 10.f); RET[programIndex] = max(-10 * (a-3), .1f);
RET[width() - 1] = max(-10 * b, 2);
} }
export void result(uniform float RET[]) { RET[programIndex] = 10.; } export void result(uniform float RET[]) {
RET[programIndex] = .1;
RET[0] = 20;
RET[1] = 10;
RET[programCount - 1] = 2;
}

View File

@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
int i = (int)a; int i = (int)a;
RET[programIndex] = max((int)200, i); RET[programIndex] = max((int)2, i);
RET[width()-1] = max(10, (int)b);
} }
export void result(uniform float RET[]) { RET[programIndex] = 200.; } export void result(uniform float RET[]) {
RET[programIndex] = programIndex + 1;
RET[0] = 2;
RET[programCount-1] = 10;
}

View File

@@ -3,11 +3,15 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
int i = (int)a; int i = (int)a;
RET[programIndex] = max((int)-20, i); RET[programIndex] = max((int)-2, -1 * i);
RET[width() - 1] = max(-2, -1 * (int)b);
} }
export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } export void result(uniform float RET[]) {
RET[programIndex] = -2;
RET[0] = -1;
}

View File

@@ -1,14 +1,16 @@
export uniform int width() { return programCount; } export uniform int width() { return programCount; }
export void f_f(uniform float r[], uniform float a[]) { export void f_fu(uniform float r[], uniform float a[], uniform float b) {
unsigned int i = (unsigned int)a[programIndex]; unsigned int i = (unsigned int)a[programIndex];
r[programIndex] = max((unsigned int)2, i); r[programIndex] = max((unsigned int)2, i);
r[width() - 1] = max((unsigned int)10, (unsigned int)b);
} }
export void result(uniform float r[]) { export void result(uniform float r[]) {
r[programIndex] = 1+programIndex; r[programIndex] = 1+programIndex;
r[0] = 2; r[0] = 2;
r[programCount - 1] = 10;
} }

19
tests/min-double-1.ispc Normal file
View File

@@ -0,0 +1,19 @@
export uniform int width() { return programCount; }
export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
double a = aFOO[programIndex];
RET[programIndex] = min(3 * a, (double)10.f);
RET[width()-1] = min(b, (double)100);
}
export void result(uniform float RET[]) {
RET[programIndex] = 10;
RET[0] = 3;
RET[1] = 6;
RET[2] = 9;
RET[programCount-1] = 5;
}

18
tests/min-double-2.ispc Normal file
View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
double a = aFOO[programIndex];
RET[programIndex] = min(-10 * (a-3), (double).1f);
RET[width() - 1] = min(-10 * b, (double)2);
}
export void result(uniform float RET[]) {
RET[programIndex] = -10 * (programIndex - 2);
RET[0] = .1;
RET[1] = .1;
RET[programCount - 1] = -50;
}

View File

@@ -3,9 +3,17 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
RET[programIndex] = min(10 * a, 10.f); RET[programIndex] = min(3 * a, 10.f);
RET[width()-1] = min(b, 100);
} }
export void result(uniform float RET[]) { RET[programIndex] = 10.; }
export void result(uniform float RET[]) {
RET[programIndex] = 10;
RET[0] = 3;
RET[1] = 6;
RET[2] = 9;
RET[programCount-1] = 5;
}

18
tests/min-float-2.ispc Normal file
View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
RET[programIndex] = min(-10 * (a-3), .1f);
RET[width() - 1] = min(-10 * b, 2);
}
export void result(uniform float RET[]) {
RET[programIndex] = -10 * (programIndex - 2);
RET[0] = .1;
RET[1] = .1;
RET[programCount - 1] = -50;
}

View File

@@ -1,11 +0,0 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
RET[programIndex] = min(a, 200.f);
}
export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }

View File

@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
int i = (int)a; int i = (int)a;
RET[programIndex] = min((int)-20, i); RET[programIndex] = min((int)2, i);
RET[width()-1] = min(10, (int)b);
} }
export void result(uniform float RET[]) { RET[programIndex] = -20; } export void result(uniform float RET[]) {
RET[programIndex] = 2;
RET[0] = 1;
RET[programCount-1] = 5;
}

View File

@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
int i = (int)a; int i = (int)a;
RET[programIndex] = min((int)200, i); RET[programIndex] = min((int)-2, -1 * i);
RET[width() - 1] = min(-2, -1 * (int)b);
} }
export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } export void result(uniform float RET[]) {
RET[programIndex] = - programIndex - 1;
RET[0] = -2;
RET[programCount - 1] = -5;
}

View File

@@ -1,14 +1,16 @@
export uniform int width() { return programCount; } export uniform int width() { return programCount; }
export void f_f(uniform float result[], uniform float aa[]) { export void f_fu(uniform float r[], uniform float a[], uniform float b) {
unsigned int i = (unsigned int)aa[programIndex]; unsigned int i = (unsigned int)a[programIndex];
result[programIndex] = min((unsigned int)2, i); r[programIndex] = min((unsigned int)2, i);
r[width() - 1] = min((unsigned int)10, (unsigned int)b);
} }
export void result(uniform float r[]) { export void result(uniform float r[]) {
r[programIndex] = 2; r[programIndex] = 2;
r[0] = 1; r[0] = 1;
r[programCount - 1] = 5;
} }