diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index fa537977..dd3cbb5c 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -349,27 +349,6 @@ rdrand_decls() define_shuffles() -;; declare @__smear_float(float) nounwind readnone -;; declare @__smear_double(double) nounwind readnone -;; declare @__smear_i8(i8) nounwind readnone -;; declare @__smear_i16(i16) nounwind readnone -;; declare @__smear_i32(i32) nounwind readnone -;; declare @__smear_i64(i64) nounwind readnone - -;; declare @__setzero_float() nounwind readnone -;; declare @__setzero_double() nounwind readnone -;; declare @__setzero_i8() nounwind readnone -;; declare @__setzero_i16() nounwind readnone -;; declare @__setzero_i32() nounwind readnone -;; declare @__setzero_i64() nounwind readnone - -;; declare @__undef_float() nounwind readnone -;; declare @__undef_double() nounwind readnone -;; declare @__undef_i8() nounwind readnone -;; declare @__undef_i16() nounwind readnone -;; declare @__undef_i32() nounwind readnone -;; declare @__undef_i64() nounwind readnone - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; aos/soa @@ -377,52 +356,27 @@ define_shuffles() aossoa() ;; dummy 1 wide vector ops -define void +declare void @__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2, <1 x float> %v3, <1 x float> * noalias %out0, <1 x float> * noalias %out1, <1 x float> * noalias %out2, - <1 x float> * noalias %out3) nounwind alwaysinline { + <1 x float> * noalias %out3) nounwind alwaysinline ; - store <1 x float> %v0, <1 x float > * %out0 - store <1 x float> %v1, <1 x float > * %out1 - store <1 x float> %v2, <1 x float > * %out2 - store <1 x float> %v3, <1 x float > * %out3 - - ret void -} - -define void +declare void @__soa_to_aos4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2, <1 x float> %v3, <1 x float> * noalias %out0, <1 x float> * noalias %out1, <1 x float> * noalias %out2, - <1 x float> * noalias %out3) nounwind alwaysinline { - call void @__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1, - <1 x float> %v2, <1 x float> %v3, <1 x float> * %out0, - <1 x float> * %out1, <1 x float> * %out2, <1 x float> * %out3) - ret void -} + <1 x float> * noalias %out3) nounwind alwaysinline ; -define void +declare void @__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1, - <1 x float> * %out2) { - store <1 x float> %v0, <1 x float > * %out0 - store <1 x float> %v1, <1 x float > * %out1 - store <1 x float> %v2, <1 x float > * %out2 + <1 x float> * %out2); - ret void -} - -define void +declare void @__soa_to_aos3_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1, - <1 x float> * %out2) { - call void @__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1, - <1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1, - <1 x float> * %out2) - ret void -} - + <1 x float> * %out2); ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; half conversion routines @@ -630,11 +584,30 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { ;; declare i64 @__min_uniform_int64(i64, i64) nounwind readnone ;; declare i64 @__max_uniform_int64(i64, i64) nounwind readnone +define i64 @__min_uniform_int64X(i64, i64) nounwind readonly alwaysinline { + %c = icmp slt i64 %0, %1 + %r = select i1 %c, i64 %0, i64 %1 + ret i64 %r +} +define i64 @__max_uniform_int64X(i64, i64) nounwind readonly alwaysinline { + %c = icmp sgt i64 %0, %1 + %r = select i1 %c, i64 %0, i64 %1 + ret i64 %r +} + ;; declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone ;; declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone +define i64 @__min_uniform_uint64X(i64, i64) nounwind readonly alwaysinline { + %c = icmp ult i64 %0, %1 + %r = select i1 %c, i64 %0, i64 %1 + ret i64 %r +} +define i64 @__max_uniform_uint64X(i64, i64) nounwind readonly alwaysinline { + %c = icmp ugt i64 %0, %1 + %r = select i1 %c, i64 %0, i64 %1 + ret i64 %r +} -;; declare double @__min_uniform_double(double, double) nounwind readnone -;; declare double @__max_uniform_double(double, double) nounwind readnone define double @__max_uniform_double(double, double) nounwind readonly alwaysinline { %d = fcmp ogt double %0, %1 %r = select i1 %d, double %0, double %1 @@ -648,57 +621,32 @@ define double @__min_uniform_double(double, double) nounwind readonly alwaysinl ;; min/max uniform -;; /* float */ -define <1 x float> @__max_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline { - %a = extractelement <1 x float> %0, i32 0 - %b = extractelement <1 x float> %1, i32 0 - %r = call float @__max_uniform_float(float %a, float %b) - %rv = insertelement <1 x float> undef, float %r, i32 0 - ret <1 x float> %rv -} -define <1 x float> @__min_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline { - %a = extractelement <1 x float> %0, i32 0 - %b = extractelement <1 x float> %1, i32 0 - %r = call float @__min_uniform_float(float %a, float %b) - %rv = insertelement <1 x float> undef, float %r, i32 0 - ret <1 x float> %rv +define(`minmax_vy',` +define <1 x $2> @__$1_varying_$3(<1 x $2>, <1 x $2>) nounwind readnone alwaysinline +{ + %v0 = extractelement <1 x $2> %0, i32 0 + %v1 = extractelement <1 x $2> %1, i32 0 + %r = call $2 @__$1_uniform_$3($2 %v0, $2 %v1) + %ret = insertelement <1 x $2> undef, $2 %r, i32 0 + ret <1 x $2> %ret; } - -;; /* int32 */ -define <1 x i32> @__max_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline { - %a = extractelement <1 x i32> %0, i32 0 - %b = extractelement <1 x i32> %1, i32 0 - %r = call i32 @__max_uniform_int32(i32 %a, i32 %b) - %rv = insertelement <1 x i32> undef, i32 %r, i32 0 - ret <1 x i32> %rv -} -define <1 x i32> @__min_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline { - %a = extractelement <1 x i32> %0, i32 0 - %b = extractelement <1 x i32> %1, i32 0 - %r = call i32 @__min_uniform_int32(i32 %a, i32 %b) - %rv = insertelement <1 x i32> undef, i32 %r, i32 0 - ret <1 x i32> %rv -} - -;; /* uint32 */ -declare @__min_varying_uint32(, ) nounwind readnone -declare @__max_varying_uint32(, ) nounwind readnone -;; declare @__min_varying_int64(, ) nounwind readnone -;; declare @__max_varying_int64(, ) nounwind readnone -;; declare @__min_varying_uint64(, ) nounwind readnone -;; declare @__max_varying_uint64(, ) nounwind readnone -declare @__min_varying_double(, - ) nounwind readnone -declare @__max_varying_double(, - ) nounwind readnone +') +minmax_vy(min, i32, int32) +minmax_vy(max, i32, int32) +minmax_vy(min, i32, uint32) +minmax_vy(max, i32, uint32) +minmax_vy(min, float, float) +minmax_vy(max, float, float) +minmax_vy(min, double, double) +minmax_vy(max, double, double) ;; sqrt/rsqrt/rcp declare float @llvm.nvvm.rsqrt.approx.f(float %f) nounwind readonly alwaysinline -declare float @llvm.nvvm.sqrt.f(float %f) nounwind readonly alwaysinline +declare float @llvm.sqrt.f32(float %f) nounwind readonly alwaysinline declare double @llvm.nvvm.rsqrt.approx.d(double %f) nounwind readonly alwaysinline -declare double @llvm.nvvm.sqrt.d(double %f) nounwind readonly alwaysinline +declare double @llvm.sqrt.f64(double %f) nounwind readonly alwaysinline ;; declare float @__rcp_uniform_float(float) nounwind readnone define float @__rcp_uniform_float(float) nounwind readonly alwaysinline { @@ -710,7 +658,7 @@ define float @__rcp_uniform_float(float) nounwind readonly alwaysinline { } ;; declare float @__sqrt_uniform_float(float) nounwind readnone define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline { - %ret = call float @llvm.nvvm.sqrt.f(float %0) + %ret = call float @llvm.sqrt.f32(float %0) ; %ret = tail call float asm sideeffect "sqrt.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline ret float %ret } @@ -746,10 +694,16 @@ define @__sqrt_varying_float() nounwind readnone ;; declare double @__sqrt_uniform_double(double) nounwind readnone define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline { - %ret = call double @llvm.nvvm.sqrt.d(double %0) + %ret = call double @llvm.sqrt.f64(double %0) ret double %ret } -declare @__sqrt_varying_double() nounwind readnone +define @__sqrt_varying_double() nounwind readnone alwaysinline +{ + %v = extractelement <1 x double> %0, i32 0 + %r = call double @__sqrt_uniform_double(double %v) + %rv = insertelement <1 x double> undef, double %r, i32 0 + ret %rv +} ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; population count