fixed doubles

This commit is contained in:
Evghenii
2014-01-25 15:31:56 +01:00
parent bd34729217
commit 805196a6a0

View File

@@ -349,27 +349,6 @@ rdrand_decls()
define_shuffles()
;; declare <WIDTH x float> @__smear_float(float) nounwind readnone
;; declare <WIDTH x double> @__smear_double(double) nounwind readnone
;; declare <WIDTH x i8> @__smear_i8(i8) nounwind readnone
;; declare <WIDTH x i16> @__smear_i16(i16) nounwind readnone
;; declare <WIDTH x i32> @__smear_i32(i32) nounwind readnone
;; declare <WIDTH x i64> @__smear_i64(i64) nounwind readnone
;; declare <WIDTH x float> @__setzero_float() nounwind readnone
;; declare <WIDTH x double> @__setzero_double() nounwind readnone
;; declare <WIDTH x i8> @__setzero_i8() nounwind readnone
;; declare <WIDTH x i16> @__setzero_i16() nounwind readnone
;; declare <WIDTH x i32> @__setzero_i32() nounwind readnone
;; declare <WIDTH x i64> @__setzero_i64() nounwind readnone
;; declare <WIDTH x float> @__undef_float() nounwind readnone
;; declare <WIDTH x double> @__undef_double() nounwind readnone
;; declare <WIDTH x i8> @__undef_i8() nounwind readnone
;; declare <WIDTH x i16> @__undef_i16() nounwind readnone
;; declare <WIDTH x i32> @__undef_i32() nounwind readnone
;; declare <WIDTH x i64> @__undef_i64() nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; aos/soa
@@ -377,52 +356,27 @@ define_shuffles()
aossoa()
;; dummy 1 wide vector ops
define void
declare void
@__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
<1 x float> %v3, <1 x float> * noalias %out0,
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
<1 x float> * noalias %out3) nounwind alwaysinline {
<1 x float> * noalias %out3) nounwind alwaysinline ;
store <1 x float> %v0, <1 x float > * %out0
store <1 x float> %v1, <1 x float > * %out1
store <1 x float> %v2, <1 x float > * %out2
store <1 x float> %v3, <1 x float > * %out3
ret void
}
define void
declare void
@__soa_to_aos4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
<1 x float> %v3, <1 x float> * noalias %out0,
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
<1 x float> * noalias %out3) nounwind alwaysinline {
call void @__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> %v3, <1 x float> * %out0,
<1 x float> * %out1, <1 x float> * %out2, <1 x float> * %out3)
ret void
}
<1 x float> * noalias %out3) nounwind alwaysinline ;
define void
declare void
@__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
<1 x float> * %out2) {
store <1 x float> %v0, <1 x float > * %out0
store <1 x float> %v1, <1 x float > * %out1
store <1 x float> %v2, <1 x float > * %out2
<1 x float> * %out2);
ret void
}
define void
declare void
@__soa_to_aos3_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
<1 x float> * %out2) {
call void @__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
<1 x float> * %out2)
ret void
}
<1 x float> * %out2);
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
@@ -630,11 +584,30 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
;; declare i64 @__min_uniform_int64(i64, i64) nounwind readnone
;; declare i64 @__max_uniform_int64(i64, i64) nounwind readnone
define i64 @__min_uniform_int64X(i64, i64) nounwind readonly alwaysinline {
%c = icmp slt i64 %0, %1
%r = select i1 %c, i64 %0, i64 %1
ret i64 %r
}
define i64 @__max_uniform_int64X(i64, i64) nounwind readonly alwaysinline {
%c = icmp sgt i64 %0, %1
%r = select i1 %c, i64 %0, i64 %1
ret i64 %r
}
;; declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone
;; declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone
define i64 @__min_uniform_uint64X(i64, i64) nounwind readonly alwaysinline {
%c = icmp ult i64 %0, %1
%r = select i1 %c, i64 %0, i64 %1
ret i64 %r
}
define i64 @__max_uniform_uint64X(i64, i64) nounwind readonly alwaysinline {
%c = icmp ugt i64 %0, %1
%r = select i1 %c, i64 %0, i64 %1
ret i64 %r
}
;; declare double @__min_uniform_double(double, double) nounwind readnone
;; declare double @__max_uniform_double(double, double) nounwind readnone
define double @__max_uniform_double(double, double) nounwind readonly alwaysinline {
%d = fcmp ogt double %0, %1
%r = select i1 %d, double %0, double %1
@@ -648,57 +621,32 @@ define double @__min_uniform_double(double, double) nounwind readonly alwaysinl
;; min/max uniform
;; /* float */
define <1 x float> @__max_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
%a = extractelement <1 x float> %0, i32 0
%b = extractelement <1 x float> %1, i32 0
%r = call float @__max_uniform_float(float %a, float %b)
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv
}
define <1 x float> @__min_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
%a = extractelement <1 x float> %0, i32 0
%b = extractelement <1 x float> %1, i32 0
%r = call float @__min_uniform_float(float %a, float %b)
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv
define(`minmax_vy',`
define <1 x $2> @__$1_varying_$3(<1 x $2>, <1 x $2>) nounwind readnone alwaysinline
{
%v0 = extractelement <1 x $2> %0, i32 0
%v1 = extractelement <1 x $2> %1, i32 0
%r = call $2 @__$1_uniform_$3($2 %v0, $2 %v1)
%ret = insertelement <1 x $2> undef, $2 %r, i32 0
ret <1 x $2> %ret;
}
;; /* int32 */
define <1 x i32> @__max_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
%a = extractelement <1 x i32> %0, i32 0
%b = extractelement <1 x i32> %1, i32 0
%r = call i32 @__max_uniform_int32(i32 %a, i32 %b)
%rv = insertelement <1 x i32> undef, i32 %r, i32 0
ret <1 x i32> %rv
}
define <1 x i32> @__min_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
%a = extractelement <1 x i32> %0, i32 0
%b = extractelement <1 x i32> %1, i32 0
%r = call i32 @__min_uniform_int32(i32 %a, i32 %b)
%rv = insertelement <1 x i32> undef, i32 %r, i32 0
ret <1 x i32> %rv
}
;; /* uint32 */
declare <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
declare <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
;; declare <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
;; declare <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
;; declare <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
;; declare <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
declare <WIDTH x double> @__min_varying_double(<WIDTH x double>,
<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
<WIDTH x double>) nounwind readnone
')
minmax_vy(min, i32, int32)
minmax_vy(max, i32, int32)
minmax_vy(min, i32, uint32)
minmax_vy(max, i32, uint32)
minmax_vy(min, float, float)
minmax_vy(max, float, float)
minmax_vy(min, double, double)
minmax_vy(max, double, double)
;; sqrt/rsqrt/rcp
declare float @llvm.nvvm.rsqrt.approx.f(float %f) nounwind readonly alwaysinline
declare float @llvm.nvvm.sqrt.f(float %f) nounwind readonly alwaysinline
declare float @llvm.sqrt.f32(float %f) nounwind readonly alwaysinline
declare double @llvm.nvvm.rsqrt.approx.d(double %f) nounwind readonly alwaysinline
declare double @llvm.nvvm.sqrt.d(double %f) nounwind readonly alwaysinline
declare double @llvm.sqrt.f64(double %f) nounwind readonly alwaysinline
;; declare float @__rcp_uniform_float(float) nounwind readnone
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
@@ -710,7 +658,7 @@ define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
}
;; declare float @__sqrt_uniform_float(float) nounwind readnone
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
%ret = call float @llvm.nvvm.sqrt.f(float %0)
%ret = call float @llvm.sqrt.f32(float %0)
; %ret = tail call float asm sideeffect "sqrt.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline
ret float %ret
}
@@ -746,10 +694,16 @@ define <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
;; declare double @__sqrt_uniform_double(double) nounwind readnone
define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline {
%ret = call double @llvm.nvvm.sqrt.d(double %0)
%ret = call double @llvm.sqrt.f64(double %0)
ret double %ret
}
declare <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone
define <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone alwaysinline
{
%v = extractelement <1 x double> %0, i32 0
%r = call double @__sqrt_uniform_double(double %v)
%rv = insertelement <1 x double> undef, double %r, i32 0
ret <WIDTH x double> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; population count