Workaround change to linker behavior in LLVM 3.1
Now, the Linker::LinkModules() call doesn't link in any functions marked as 'internal', which is problematic, since we'd like to have just about all of the builtins marked as internal so that they are eliminated after they've been inlined when they are in fact used. This change removes all of the internal qualifiers in the builtins and adds a lSetInternalFunctions() routine to builtins.cpp that sets this property on the functions that need it after they've been linked in by LinkModules().
This commit is contained in:
@@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; uniform float iv = extract(__rcp_u(v), 0);
|
; uniform float iv = extract(__rcp_u(v), 0);
|
||||||
; return iv * (2. - v * iv);
|
; return iv * (2. - v * iv);
|
||||||
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
||||||
@@ -56,7 +56,7 @@ define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||||
; the roundss intrinsic is a total mess--docs say:
|
; the roundss intrinsic is a total mess--docs say:
|
||||||
;
|
;
|
||||||
@@ -79,7 +79,7 @@ define internal float @__round_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
ret float %rs
|
ret float %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
@@ -88,7 +88,7 @@ define internal float @__floor_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
ret float %rs
|
ret float %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
@@ -102,14 +102,14 @@ define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinlin
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
%xi = insertelement <2 x double> undef, double %0, i32 0
|
%xi = insertelement <2 x double> undef, double %0, i32 0
|
||||||
%xr = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %xi, <2 x double> %xi, i32 8)
|
%xr = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %xi, <2 x double> %xi, i32 8)
|
||||||
%rs = extractelement <2 x double> %xr, i32 0
|
%rs = extractelement <2 x double> %xr, i32 0
|
||||||
ret double %rs
|
ret double %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <2 x double> undef, double %0, i32 0
|
%xi = insertelement <2 x double> undef, double %0, i32 0
|
||||||
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
@@ -118,7 +118,7 @@ define internal double @__floor_uniform_double(double) nounwind readonly alwaysi
|
|||||||
ret double %rs
|
ret double %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <2 x double> undef, double %0, i32 0
|
%xi = insertelement <2 x double> undef, double %0, i32 0
|
||||||
; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
@@ -133,7 +133,7 @@ define internal double @__ceil_uniform_double(double) nounwind readonly alwaysin
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; uniform float is = extract(__rsqrt_u(v), 0);
|
; uniform float is = extract(__rsqrt_u(v), 0);
|
||||||
%v = insertelement <4 x float> undef, float %0, i32 0
|
%v = insertelement <4 x float> undef, float %0, i32 0
|
||||||
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
||||||
@@ -154,7 +154,7 @@ define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
@@ -166,7 +166,7 @@ define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinlin
|
|||||||
declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
|
declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
|
||||||
declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
|
declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
|
||||||
|
|
||||||
define internal void @__fastmath() nounwind alwaysinline {
|
define void @__fastmath() nounwind alwaysinline {
|
||||||
%ptr = alloca i32
|
%ptr = alloca i32
|
||||||
%ptr8 = bitcast i32 * %ptr to i8 *
|
%ptr8 = bitcast i32 * %ptr to i8 *
|
||||||
call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
|
call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
|
||||||
@@ -185,12 +185,12 @@ define internal void @__fastmath() nounwind alwaysinline {
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
@@ -202,12 +202,12 @@ define internal float @__min_uniform_float(float, float) nounwind readonly alway
|
|||||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
@@ -219,12 +219,12 @@ define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinlin
|
|||||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
@@ -234,14 +234,14 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
|
|||||||
|
|
||||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||||
ret i32 %call
|
ret i32 %call
|
||||||
}
|
}
|
||||||
|
|
||||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||||
|
|
||||||
define internal i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||||
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
||||||
ret i64 %call
|
ret i64 %call
|
||||||
}
|
}
|
||||||
@@ -251,7 +251,7 @@ define internal i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse.sqrt.sd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse.sqrt.sd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
define double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
||||||
sse_unary_scalar(ret, 2, double, @llvm.x86.sse.sqrt.sd, %0)
|
sse_unary_scalar(ret, 2, double, @llvm.x86.sse.sqrt.sd, %0)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
@@ -263,12 +263,12 @@ define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
|
define double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
|
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
|
define double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
|
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ include(`builtins-avx-common.ll')
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
; float iv = __rcp_v(v);
|
; float iv = __rcp_v(v);
|
||||||
; return iv * (2. - v * iv);
|
; return iv * (2. - v * iv);
|
||||||
|
|
||||||
@@ -64,17 +64,17 @@ define internal <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonl
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x float> @__round_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
define <16 x float> @__round_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||||
round8to16(%0, 8)
|
round8to16(%0, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x float> @__floor_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
define <16 x float> @__floor_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
round8to16(%0, 9)
|
round8to16(%0, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x float> @__ceil_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
define <16 x float> @__ceil_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
round8to16(%0, 10)
|
round8to16(%0, 10)
|
||||||
}
|
}
|
||||||
@@ -84,15 +84,15 @@ define internal <16 x float> @__ceil_varying_float(<16 x float>) nounwind readon
|
|||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x double> @__round_varying_double(<16 x double>) nounwind readonly alwaysinline {
|
define <16 x double> @__round_varying_double(<16 x double>) nounwind readonly alwaysinline {
|
||||||
round4to16double(%0, 8)
|
round4to16double(%0, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x double> @__floor_varying_double(<16 x double>) nounwind readonly alwaysinline {
|
define <16 x double> @__floor_varying_double(<16 x double>) nounwind readonly alwaysinline {
|
||||||
round4to16double(%0, 9)
|
round4to16double(%0, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x double> @__ceil_varying_double(<16 x double>) nounwind readonly alwaysinline {
|
define <16 x double> @__ceil_varying_double(<16 x double>) nounwind readonly alwaysinline {
|
||||||
round4to16double(%0, 10)
|
round4to16double(%0, 10)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,7 +102,7 @@ define internal <16 x double> @__ceil_varying_double(<16 x double>) nounwind rea
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
||||||
; float is = __rsqrt_v(v);
|
; float is = __rsqrt_v(v);
|
||||||
unary8to16(is, float, @llvm.x86.avx.rsqrt.ps.256, %v)
|
unary8to16(is, float, @llvm.x86.avx.rsqrt.ps.256, %v)
|
||||||
; return 0.5 * is * (3. - (v * is) * is);
|
; return 0.5 * is * (3. - (v * is) * is);
|
||||||
@@ -125,7 +125,7 @@ define internal <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind re
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
define <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
unary8to16(call, float, @llvm.x86.avx.sqrt.ps.256, %0)
|
unary8to16(call, float, @llvm.x86.avx.sqrt.ps.256, %0)
|
||||||
ret <16 x float> %call
|
ret <16 x float> %call
|
||||||
}
|
}
|
||||||
@@ -153,13 +153,13 @@ declare <16 x float> @__svml_pow(<16 x float>, <16 x float>)
|
|||||||
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||||
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x float> @__max_varying_float(<16 x float>,
|
define <16 x float> @__max_varying_float(<16 x float>,
|
||||||
<16 x float>) nounwind readonly alwaysinline {
|
<16 x float>) nounwind readonly alwaysinline {
|
||||||
binary8to16(call, float, @llvm.x86.avx.max.ps.256, %0, %1)
|
binary8to16(call, float, @llvm.x86.avx.max.ps.256, %0, %1)
|
||||||
ret <16 x float> %call
|
ret <16 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x float> @__min_varying_float(<16 x float>,
|
define <16 x float> @__min_varying_float(<16 x float>,
|
||||||
<16 x float>) nounwind readonly alwaysinline {
|
<16 x float>) nounwind readonly alwaysinline {
|
||||||
binary8to16(call, float, @llvm.x86.avx.min.ps.256, %0, %1)
|
binary8to16(call, float, @llvm.x86.avx.min.ps.256, %0, %1)
|
||||||
ret <16 x float> %call
|
ret <16 x float> %call
|
||||||
@@ -169,12 +169,12 @@ define internal <16 x float> @__min_varying_float(<16 x float>,
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; int min/max
|
;; int min/max
|
||||||
|
|
||||||
define internal <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
@@ -183,12 +183,12 @@ define internal <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unsigned int min/max
|
;; unsigned int min/max
|
||||||
|
|
||||||
define internal <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
@@ -198,7 +198,7 @@ define internal <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwin
|
|||||||
|
|
||||||
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
|
define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
|
||||||
%floatmask = bitcast <16 x i32> %0 to <16 x float>
|
%floatmask = bitcast <16 x i32> %0 to <16 x float>
|
||||||
%mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef,
|
%mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef,
|
||||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
@@ -217,7 +217,7 @@ define internal i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__reduce_add_float(<16 x float>) nounwind readonly alwaysinline {
|
define float @__reduce_add_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
%va = shufflevector <16 x float> %0, <16 x float> undef,
|
%va = shufflevector <16 x float> %0, <16 x float> undef,
|
||||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
%vb = shufflevector <16 x float> %0, <16 x float> undef,
|
%vb = shufflevector <16 x float> %0, <16 x float> undef,
|
||||||
@@ -232,12 +232,12 @@ define internal float @__reduce_add_float(<16 x float>) nounwind readonly always
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal float @__reduce_min_float(<16 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_min_float(<16 x float>) nounwind readnone alwaysinline {
|
||||||
reduce16(float, @__min_varying_float, @__min_uniform_float)
|
reduce16(float, @__min_varying_float, @__min_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal float @__reduce_max_float(<16 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_max_float(<16 x float>) nounwind readnone alwaysinline {
|
||||||
reduce16(float, @__max_varying_float, @__max_uniform_float)
|
reduce16(float, @__max_varying_float, @__max_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -246,28 +246,28 @@ reduce_equal(16)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; horizontal int32 ops
|
;; horizontal int32 ops
|
||||||
|
|
||||||
define internal <16 x i32> @__add_varying_int32(<16 x i32>,
|
define <16 x i32> @__add_varying_int32(<16 x i32>,
|
||||||
<16 x i32>) nounwind readnone alwaysinline {
|
<16 x i32>) nounwind readnone alwaysinline {
|
||||||
%s = add <16 x i32> %0, %1
|
%s = add <16 x i32> %0, %1
|
||||||
ret <16 x i32> %s
|
ret <16 x i32> %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__add_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
define i32 @__add_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
||||||
%s = add i32 %0, %1
|
%s = add i32 %0, %1
|
||||||
ret i32 %s
|
ret i32 %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_int32(<16 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_int32(<16 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce16(i32, @__add_varying_int32, @__add_uniform_int32)
|
reduce16(i32, @__add_varying_int32, @__add_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i32 @__reduce_min_int32(<16 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_int32(<16 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce16(i32, @__min_varying_int32, @__min_uniform_int32)
|
reduce16(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i32 @__reduce_max_int32(<16 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_int32(<16 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce16(i32, @__max_varying_int32, @__max_uniform_int32)
|
reduce16(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,17 +275,17 @@ define internal i32 @__reduce_max_int32(<16 x i32>) nounwind readnone alwaysinli
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;; horizontal uint32 ops
|
;;; horizontal uint32 ops
|
||||||
|
|
||||||
define internal i32 @__reduce_add_uint32(<16 x i32> %v) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_uint32(<16 x i32> %v) nounwind readnone alwaysinline {
|
||||||
%r = call i32 @__reduce_add_int32(<16 x i32> %v)
|
%r = call i32 @__reduce_add_int32(<16 x i32> %v)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_uint32(<16 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_uint32(<16 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce16(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
reduce16(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i32 @__reduce_max_uint32(<16 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_uint32(<16 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce16(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
reduce16(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -295,7 +295,7 @@ define internal i32 @__reduce_max_uint32(<16 x i32>) nounwind readnone alwaysinl
|
|||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__reduce_add_double(<16 x double>) nounwind readonly alwaysinline {
|
define double @__reduce_add_double(<16 x double>) nounwind readonly alwaysinline {
|
||||||
%va = shufflevector <16 x double> %0, <16 x double> undef,
|
%va = shufflevector <16 x double> %0, <16 x double> undef,
|
||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
%vb = shufflevector <16 x double> %0, <16 x double> undef,
|
%vb = shufflevector <16 x double> %0, <16 x double> undef,
|
||||||
@@ -315,12 +315,12 @@ define internal double @__reduce_add_double(<16 x double>) nounwind readonly alw
|
|||||||
ret double %sum
|
ret double %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_min_double(<16 x double>) nounwind readnone alwaysinline {
|
define double @__reduce_min_double(<16 x double>) nounwind readnone alwaysinline {
|
||||||
reduce16(double, @__min_varying_double, @__min_uniform_double)
|
reduce16(double, @__min_varying_double, @__min_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal double @__reduce_max_double(<16 x double>) nounwind readnone alwaysinline {
|
define double @__reduce_max_double(<16 x double>) nounwind readnone alwaysinline {
|
||||||
reduce16(double, @__max_varying_double, @__max_uniform_double)
|
reduce16(double, @__max_varying_double, @__max_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,28 +328,28 @@ define internal double @__reduce_max_double(<16 x double>) nounwind readnone alw
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; horizontal int64 ops
|
;; horizontal int64 ops
|
||||||
|
|
||||||
define internal <16 x i64> @__add_varying_int64(<16 x i64>,
|
define <16 x i64> @__add_varying_int64(<16 x i64>,
|
||||||
<16 x i64>) nounwind readnone alwaysinline {
|
<16 x i64>) nounwind readnone alwaysinline {
|
||||||
%s = add <16 x i64> %0, %1
|
%s = add <16 x i64> %0, %1
|
||||||
ret <16 x i64> %s
|
ret <16 x i64> %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
define i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||||
%s = add i64 %0, %1
|
%s = add i64 %0, %1
|
||||||
ret i64 %s
|
ret i64 %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_add_int64(<16 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_add_int64(<16 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce16(i64, @__add_varying_int64, @__add_uniform_int64)
|
reduce16(i64, @__add_varying_int64, @__add_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i64 @__reduce_min_int64(<16 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_min_int64(<16 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce16(i64, @__min_varying_int64, @__min_uniform_int64)
|
reduce16(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i64 @__reduce_max_int64(<16 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_max_int64(<16 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce16(i64, @__max_varying_int64, @__max_uniform_int64)
|
reduce16(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -357,17 +357,17 @@ define internal i64 @__reduce_max_int64(<16 x i64>) nounwind readnone alwaysinli
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;; horizontal uint64 ops
|
;;; horizontal uint64 ops
|
||||||
|
|
||||||
define internal i64 @__reduce_add_uint64(<16 x i64> %v) nounwind readnone alwaysinline {
|
define i64 @__reduce_add_uint64(<16 x i64> %v) nounwind readnone alwaysinline {
|
||||||
%r = call i64 @__reduce_add_int64(<16 x i64> %v)
|
%r = call i64 @__reduce_add_int64(<16 x i64> %v)
|
||||||
ret i64 %r
|
ret i64 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_uint64(<16 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_min_uint64(<16 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce16(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
reduce16(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i64 @__reduce_max_uint64(<16 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_max_uint64(<16 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce16(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce16(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -635,7 +635,7 @@ gen_scatter(16, i64)
|
|||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x double> @__sqrt_varying_double(<16 x double>) nounwind alwaysinline {
|
define <16 x double> @__sqrt_varying_double(<16 x double>) nounwind alwaysinline {
|
||||||
unary4to16(ret, double, @llvm.x86.avx.sqrt.pd.256, %0)
|
unary4to16(ret, double, @llvm.x86.avx.sqrt.pd.256, %0)
|
||||||
ret <16 x double> %ret
|
ret <16 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -647,12 +647,12 @@ define internal <16 x double> @__sqrt_varying_double(<16 x double>) nounwind alw
|
|||||||
declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
||||||
declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <16 x double> @__min_varying_double(<16 x double>, <16 x double>) nounwind readnone alwaysinline {
|
define <16 x double> @__min_varying_double(<16 x double>, <16 x double>) nounwind readnone alwaysinline {
|
||||||
binary4to16(ret, double, @llvm.x86.avx.min.pd.256, %0, %1)
|
binary4to16(ret, double, @llvm.x86.avx.min.pd.256, %0, %1)
|
||||||
ret <16 x double> %ret
|
ret <16 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwind readnone alwaysinline {
|
define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwind readnone alwaysinline {
|
||||||
binary4to16(ret, double, @llvm.x86.avx.max.pd.256, %0, %1)
|
binary4to16(ret, double, @llvm.x86.avx.max.pd.256, %0, %1)
|
||||||
ret <16 x double> %ret
|
ret <16 x double> %ret
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ include(`builtins-avx-common.ll')
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; float iv = __rcp_v(v);
|
; float iv = __rcp_v(v);
|
||||||
; return iv * (2. - v * iv);
|
; return iv * (2. - v * iv);
|
||||||
|
|
||||||
@@ -62,19 +62,19 @@ define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||||
%call = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %0, i32 8)
|
%call = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %0, i32 8)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
%call = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %0, i32 9)
|
%call = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %0, i32 9)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
%call = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %0, i32 10)
|
%call = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %0, i32 10)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
@@ -85,17 +85,17 @@ define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__round_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__round_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
round4to8double(%0, 8)
|
round4to8double(%0, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__floor_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__floor_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
; roundpd, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
; roundpd, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||||
round4to8double(%0, 9)
|
round4to8double(%0, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal <8 x double> @__ceil_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__ceil_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
; roundpd, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
; roundpd, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||||
round4to8double(%0, 10)
|
round4to8double(%0, 10)
|
||||||
}
|
}
|
||||||
@@ -106,7 +106,7 @@ define internal <8 x double> @__ceil_varying_double(<8 x double>) nounwind reado
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
define <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
||||||
; float is = __rsqrt_v(v);
|
; float is = __rsqrt_v(v);
|
||||||
%is = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %v)
|
%is = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %v)
|
||||||
; return 0.5 * is * (3. - (v * is) * is);
|
; return 0.5 * is * (3. - (v * is) * is);
|
||||||
@@ -125,7 +125,7 @@ define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind read
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %0)
|
%call = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %0)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
@@ -153,13 +153,13 @@ declare <8 x float> @__svml_pow(<8 x float>, <8 x float>)
|
|||||||
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||||
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__max_varying_float(<8 x float>,
|
define <8 x float> @__max_varying_float(<8 x float>,
|
||||||
<8 x float>) nounwind readonly alwaysinline {
|
<8 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %0, <8 x float> %1)
|
%call = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %0, <8 x float> %1)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__min_varying_float(<8 x float>,
|
define <8 x float> @__min_varying_float(<8 x float>,
|
||||||
<8 x float>) nounwind readonly alwaysinline {
|
<8 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %0, <8 x float> %1)
|
%call = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %0, <8 x float> %1)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
@@ -169,12 +169,12 @@ define internal <8 x float> @__min_varying_float(<8 x float>,
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; int min/max
|
;; int min/max
|
||||||
|
|
||||||
define internal <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||||
ret <8 x i32> %ret
|
ret <8 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||||
ret <8 x i32> %ret
|
ret <8 x i32> %ret
|
||||||
}
|
}
|
||||||
@@ -183,12 +183,12 @@ define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind re
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unsigned int min/max
|
;; unsigned int min/max
|
||||||
|
|
||||||
define internal <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||||
ret <8 x i32> %ret
|
ret <8 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||||
ret <8 x i32> %ret
|
ret <8 x i32> %ret
|
||||||
}
|
}
|
||||||
@@ -198,7 +198,7 @@ define internal <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind r
|
|||||||
|
|
||||||
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||||
%v = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask) nounwind readnone
|
%v = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask) nounwind readnone
|
||||||
ret i32 %v
|
ret i32 %v
|
||||||
@@ -209,7 +209,7 @@ define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
|||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysinline {
|
define float @__reduce_add_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
%v1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %0, <8 x float> %0)
|
%v1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %0, <8 x float> %0)
|
||||||
%v2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v1, <8 x float> %v1)
|
%v2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %v1, <8 x float> %v1)
|
||||||
%scalar1 = extractelement <8 x float> %v2, i32 0
|
%scalar1 = extractelement <8 x float> %v2, i32 0
|
||||||
@@ -219,12 +219,12 @@ define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysi
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8(float, @__min_varying_float, @__min_uniform_float)
|
reduce8(float, @__min_varying_float, @__min_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8(float, @__max_varying_float, @__max_uniform_float)
|
reduce8(float, @__max_varying_float, @__max_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -233,28 +233,28 @@ reduce_equal(8)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; horizontal int32 ops
|
;; horizontal int32 ops
|
||||||
|
|
||||||
define internal <8 x i32> @__add_varying_int32(<8 x i32>,
|
define <8 x i32> @__add_varying_int32(<8 x i32>,
|
||||||
<8 x i32>) nounwind readnone alwaysinline {
|
<8 x i32>) nounwind readnone alwaysinline {
|
||||||
%s = add <8 x i32> %0, %1
|
%s = add <8 x i32> %0, %1
|
||||||
ret <8 x i32> %s
|
ret <8 x i32> %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__add_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
define i32 @__add_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
||||||
%s = add i32 %0, %1
|
%s = add i32 %0, %1
|
||||||
ret i32 %s
|
ret i32 %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__add_varying_int32, @__add_uniform_int32)
|
reduce8(i32, @__add_varying_int32, @__add_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__min_varying_int32, @__min_uniform_int32)
|
reduce8(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__max_varying_int32, @__max_uniform_int32)
|
reduce8(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -262,17 +262,17 @@ define internal i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinlin
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;; horizontal uint32 ops
|
;;; horizontal uint32 ops
|
||||||
|
|
||||||
define internal i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
||||||
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
reduce8(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,7 +282,7 @@ define internal i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinli
|
|||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__reduce_add_double(<8 x double>) nounwind readonly alwaysinline {
|
define double @__reduce_add_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
%v0 = shufflevector <8 x double> %0, <8 x double> undef,
|
%v0 = shufflevector <8 x double> %0, <8 x double> undef,
|
||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
%v1 = shufflevector <8 x double> %0, <8 x double> undef,
|
%v1 = shufflevector <8 x double> %0, <8 x double> undef,
|
||||||
@@ -296,12 +296,12 @@ define internal double @__reduce_add_double(<8 x double>) nounwind readonly alwa
|
|||||||
ret double %sum
|
ret double %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_min_double(<8 x double>) nounwind readnone alwaysinline {
|
define double @__reduce_min_double(<8 x double>) nounwind readnone alwaysinline {
|
||||||
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal double @__reduce_max_double(<8 x double>) nounwind readnone alwaysinline {
|
define double @__reduce_max_double(<8 x double>) nounwind readnone alwaysinline {
|
||||||
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -309,28 +309,28 @@ define internal double @__reduce_max_double(<8 x double>) nounwind readnone alwa
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; horizontal int64 ops
|
;; horizontal int64 ops
|
||||||
|
|
||||||
define internal <8 x i64> @__add_varying_int64(<8 x i64>,
|
define <8 x i64> @__add_varying_int64(<8 x i64>,
|
||||||
<8 x i64>) nounwind readnone alwaysinline {
|
<8 x i64>) nounwind readnone alwaysinline {
|
||||||
%s = add <8 x i64> %0, %1
|
%s = add <8 x i64> %0, %1
|
||||||
ret <8 x i64> %s
|
ret <8 x i64> %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
define i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||||
%s = add i64 %0, %1
|
%s = add i64 %0, %1
|
||||||
ret i64 %s
|
ret i64 %s
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_add_int64(<8 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_add_int64(<8 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce8(i64, @__add_varying_int64, @__add_uniform_int64)
|
reduce8(i64, @__add_varying_int64, @__add_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i64 @__reduce_min_int64(<8 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_min_int64(<8 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i64 @__reduce_max_int64(<8 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_max_int64(<8 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -338,17 +338,17 @@ define internal i64 @__reduce_max_int64(<8 x i64>) nounwind readnone alwaysinlin
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;; horizontal uint64 ops
|
;;; horizontal uint64 ops
|
||||||
|
|
||||||
define internal i64 @__reduce_add_uint64(<8 x i64> %v) nounwind readnone alwaysinline {
|
define i64 @__reduce_add_uint64(<8 x i64> %v) nounwind readnone alwaysinline {
|
||||||
%r = call i64 @__reduce_add_int64(<8 x i64> %v)
|
%r = call i64 @__reduce_add_int64(<8 x i64> %v)
|
||||||
ret i64 %r
|
ret i64 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinline {
|
define i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinline {
|
||||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -533,7 +533,7 @@ gen_scatter(8, i64)
|
|||||||
|
|
||||||
declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
define <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
||||||
unary4to8(ret, double, @llvm.x86.avx.sqrt.pd.256, %0)
|
unary4to8(ret, double, @llvm.x86.avx.sqrt.pd.256, %0)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -545,12 +545,12 @@ define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alway
|
|||||||
declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
||||||
declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
define <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||||
binary4to8(ret, double, @llvm.x86.avx.min.pd.256, %0, %1)
|
binary4to8(ret, double, @llvm.x86.avx.min.pd.256, %0, %1)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||||
binary4to8(ret, double, @llvm.x86.avx.max.pd.256, %0, %1)
|
binary4to8(ret, double, @llvm.x86.avx.max.pd.256, %0, %1)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ declare void @abort() noreturn
|
|||||||
|
|
||||||
%0 = type { i32, i32, i32, i32 }
|
%0 = type { i32, i32, i32, i32 }
|
||||||
|
|
||||||
define internal i32 @__get_system_isa() nounwind ssp {
|
define i32 @__get_system_isa() nounwind ssp {
|
||||||
%1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
%1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||||
%2 = extractvalue %0 %1, 2
|
%2 = extractvalue %0 %1, 2
|
||||||
%3 = extractvalue %0 %1, 3
|
%3 = extractvalue %0 %1, 3
|
||||||
@@ -106,7 +106,7 @@ define internal i32 @__get_system_isa() nounwind ssp {
|
|||||||
;; This function is called by each of the dispatch functions we generate;
|
;; This function is called by each of the dispatch functions we generate;
|
||||||
;; it sets @__system_best_isa if it is unset.
|
;; it sets @__system_best_isa if it is unset.
|
||||||
|
|
||||||
define internal void @__set_system_isa() {
|
define void @__set_system_isa() {
|
||||||
entry:
|
entry:
|
||||||
%bi = load i32* @__system_best_isa
|
%bi = load i32* @__system_best_isa
|
||||||
%unset = icmp eq i32 %bi, -1
|
%unset = icmp eq i32 %bi, -1
|
||||||
|
|||||||
@@ -34,7 +34,7 @@
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; do the rcpss call
|
; do the rcpss call
|
||||||
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
||||||
%call = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecval)
|
%call = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecval)
|
||||||
@@ -53,7 +53,7 @@ define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; uniform float is = extract(__rsqrt_u(v), 0);
|
; uniform float is = extract(__rsqrt_u(v), 0);
|
||||||
%v = insertelement <4 x float> undef, float %0, i32 0
|
%v = insertelement <4 x float> undef, float %0, i32 0
|
||||||
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
||||||
@@ -76,7 +76,7 @@ define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
@@ -87,7 +87,7 @@ define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinlin
|
|||||||
declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
|
declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
|
||||||
declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
|
declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
|
||||||
|
|
||||||
define internal void @__fastmath() nounwind alwaysinline {
|
define void @__fastmath() nounwind alwaysinline {
|
||||||
%ptr = alloca i32
|
%ptr = alloca i32
|
||||||
%ptr8 = bitcast i32 * %ptr to i8 *
|
%ptr8 = bitcast i32 * %ptr to i8 *
|
||||||
call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
|
call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
|
||||||
@@ -106,13 +106,13 @@ define internal void @__fastmath() nounwind alwaysinline {
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
@@ -122,7 +122,7 @@ define internal float @__min_uniform_float(float, float) nounwind readonly alway
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
define double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
||||||
sse_unary_scalar(ret, 2, double, @llvm.x86.sse2.sqrt.sd, %0)
|
sse_unary_scalar(ret, 2, double, @llvm.x86.sse2.sqrt.sd, %0)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
@@ -134,12 +134,12 @@ define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__min_uniform_double(double, double) nounwind readnone {
|
define double @__min_uniform_double(double, double) nounwind readnone {
|
||||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
|
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__max_uniform_double(double, double) nounwind readnone {
|
define double @__max_uniform_double(double, double) nounwind readnone {
|
||||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
|
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
@@ -166,7 +166,7 @@ define internal double @__max_uniform_double(double, double) nounwind readnone {
|
|||||||
; return x;
|
; return x;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
%float_to_int_bitcast.i.i.i.i = bitcast float %0 to i32
|
%float_to_int_bitcast.i.i.i.i = bitcast float %0 to i32
|
||||||
%bitop.i.i = and i32 %float_to_int_bitcast.i.i.i.i, -2147483648
|
%bitop.i.i = and i32 %float_to_int_bitcast.i.i.i.i, -2147483648
|
||||||
%bitop.i = xor i32 %bitop.i.i, %float_to_int_bitcast.i.i.i.i
|
%bitop.i = xor i32 %bitop.i.i, %float_to_int_bitcast.i.i.i.i
|
||||||
@@ -192,7 +192,7 @@ define internal float @__round_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
; return y + delta;
|
; return y + delta;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
%calltmp.i = tail call float @__round_uniform_float(float %0) nounwind
|
%calltmp.i = tail call float @__round_uniform_float(float %0) nounwind
|
||||||
%bincmp.i = fcmp ogt float %calltmp.i, %0
|
%bincmp.i = fcmp ogt float %calltmp.i, %0
|
||||||
%selectexpr.i = sext i1 %bincmp.i to i32
|
%selectexpr.i = sext i1 %bincmp.i to i32
|
||||||
@@ -214,7 +214,7 @@ define internal float @__floor_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
; return y + delta;
|
; return y + delta;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
%calltmp.i = tail call float @__round_uniform_float(float %0) nounwind
|
%calltmp.i = tail call float @__round_uniform_float(float %0) nounwind
|
||||||
%bincmp.i = fcmp olt float %calltmp.i, %0
|
%bincmp.i = fcmp olt float %calltmp.i, %0
|
||||||
%selectexpr.i = sext i1 %bincmp.i to i32
|
%selectexpr.i = sext i1 %bincmp.i to i32
|
||||||
@@ -232,17 +232,17 @@ declare double @round(double)
|
|||||||
declare double @floor(double)
|
declare double @floor(double)
|
||||||
declare double @ceil(double)
|
declare double @ceil(double)
|
||||||
|
|
||||||
define internal double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
%r = call double @round(double %0)
|
%r = call double @round(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
%r = call double @floor(double %0)
|
%r = call double @floor(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
%r = call double @ceil(double %0)
|
%r = call double @ceil(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
@@ -253,12 +253,12 @@ define internal double @__ceil_uniform_double(double) nounwind readonly alwaysin
|
|||||||
declare i32 @llvm.ctpop.i32(i32)
|
declare i32 @llvm.ctpop.i32(i32)
|
||||||
declare i64 @llvm.ctpop.i64(i64)
|
declare i64 @llvm.ctpop.i64(i64)
|
||||||
|
|
||||||
define internal i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||||
%val = call i32 @llvm.ctpop.i32(i32 %0)
|
%val = call i32 @llvm.ctpop.i32(i32 %0)
|
||||||
ret i32 %val
|
ret i32 %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
|
define i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
|
||||||
%val = call i64 @llvm.ctpop.i64(i64 %0)
|
%val = call i64 @llvm.ctpop.i64(i64 %0)
|
||||||
ret i64 %val
|
ret i64 %val
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ include(`builtins-sse2-common.ll')
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; float iv = __rcp_v(v);
|
; float iv = __rcp_v(v);
|
||||||
; return iv * (2. - v * iv);
|
; return iv * (2. - v * iv);
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
define <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
||||||
; float is = __rsqrt_v(v);
|
; float is = __rsqrt_v(v);
|
||||||
unary4to8(is, float, @llvm.x86.sse.rsqrt.ps, %v)
|
unary4to8(is, float, @llvm.x86.sse.rsqrt.ps, %v)
|
||||||
; return 0.5 * is * (3. - (v * is) * is);
|
; return 0.5 * is * (3. - (v * is) * is);
|
||||||
@@ -85,7 +85,7 @@ define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind read
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
unary4to8(call, float, @llvm.x86.sse.sqrt.ps, %0)
|
unary4to8(call, float, @llvm.x86.sse.sqrt.ps, %0)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
@@ -104,17 +104,17 @@ declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
|||||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define internal <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_sinf4, %0)
|
unary4to8(ret, float, @__svml_sinf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_cosf4, %0)
|
unary4to8(ret, float, @__svml_cosf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal void @__svml_sincos(<8 x float>, <8 x float> *,
|
define void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||||
<8 x float> *) nounwind readnone alwaysinline {
|
<8 x float> *) nounwind readnone alwaysinline {
|
||||||
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
||||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||||
@@ -143,33 +143,33 @@ define internal void @__svml_sincos(<8 x float>, <8 x float> *,
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_tanf4, %0)
|
unary4to8(ret, float, @__svml_tanf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_atanf4, %0)
|
unary4to8(ret, float, @__svml_atanf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_atan2(<8 x float>,
|
define <8 x float> @__svml_atan2(<8 x float>,
|
||||||
<8 x float>) nounwind readnone alwaysinline {
|
<8 x float>) nounwind readnone alwaysinline {
|
||||||
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_expf4, %0)
|
unary4to8(ret, float, @__svml_expf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_logf4, %0)
|
unary4to8(ret, float, @__svml_logf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_pow(<8 x float>,
|
define <8 x float> @__svml_pow(<8 x float>,
|
||||||
<8 x float>) nounwind readnone alwaysinline {
|
<8 x float>) nounwind readnone alwaysinline {
|
||||||
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
@@ -182,12 +182,12 @@ define internal <8 x float> @__svml_pow(<8 x float>,
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__max_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__max_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, float, @llvm.x86.sse.max.ps, %0, %1)
|
binary4to8(call, float, @llvm.x86.sse.max.ps, %0, %1)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, float, @llvm.x86.sse.min.ps, %0, %1)
|
binary4to8(call, float, @llvm.x86.sse.min.ps, %0, %1)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
@@ -200,7 +200,7 @@ define internal <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounw
|
|||||||
; vector element, if the mask is on, we return the corresponding value
|
; vector element, if the mask is on, we return the corresponding value
|
||||||
; from %1, and otherwise return the value from %0.
|
; from %1, and otherwise return the value from %0.
|
||||||
|
|
||||||
define internal <8 x i32> @__vselect_i32(<8 x i32>, <8 x i32> ,
|
define <8 x i32> @__vselect_i32(<8 x i32>, <8 x i32> ,
|
||||||
<8 x i32> %mask) nounwind readnone alwaysinline {
|
<8 x i32> %mask) nounwind readnone alwaysinline {
|
||||||
%notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
%notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||||
%cleared_old = and <8 x i32> %0, %notmask
|
%cleared_old = and <8 x i32> %0, %notmask
|
||||||
@@ -209,7 +209,7 @@ define internal <8 x i32> @__vselect_i32(<8 x i32>, <8 x i32> ,
|
|||||||
ret <8 x i32> %new
|
ret <8 x i32> %new
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__vselect_float(<8 x float>, <8 x float>,
|
define <8 x float> @__vselect_float(<8 x float>, <8 x float>,
|
||||||
<8 x i32> %mask) nounwind readnone alwaysinline {
|
<8 x i32> %mask) nounwind readnone alwaysinline {
|
||||||
%v0 = bitcast <8 x float> %0 to <8 x i32>
|
%v0 = bitcast <8 x float> %0 to <8 x i32>
|
||||||
%v1 = bitcast <8 x float> %1 to <8 x i32>
|
%v1 = bitcast <8 x float> %1 to <8 x i32>
|
||||||
@@ -223,27 +223,27 @@ define internal <8 x float> @__vselect_float(<8 x float>, <8 x float>,
|
|||||||
; extend the i1 vector result to an i32 mask. The __vselect does the
|
; extend the i1 vector result to an i32 mask. The __vselect does the
|
||||||
; rest...
|
; rest...
|
||||||
|
|
||||||
define internal <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp slt <8 x i32> %0, %1
|
%c = icmp slt <8 x i32> %0, %1
|
||||||
%mask = sext <8 x i1> %c to <8 x i32>
|
%mask = sext <8 x i1> %c to <8 x i32>
|
||||||
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
||||||
ret <8 x i32> %v
|
ret <8 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp slt i32 %0, %1
|
%c = icmp slt i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp sgt <8 x i32> %0, %1
|
%c = icmp sgt <8 x i32> %0, %1
|
||||||
%mask = sext <8 x i1> %c to <8 x i32>
|
%mask = sext <8 x i1> %c to <8 x i32>
|
||||||
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
||||||
ret <8 x i32> %v
|
ret <8 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp sgt i32 %0, %1
|
%c = icmp sgt i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
@@ -252,27 +252,27 @@ define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinlin
|
|||||||
; The functions for unsigned ints are similar, just with unsigned
|
; The functions for unsigned ints are similar, just with unsigned
|
||||||
; comparison functions...
|
; comparison functions...
|
||||||
|
|
||||||
define internal <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp ult <8 x i32> %0, %1
|
%c = icmp ult <8 x i32> %0, %1
|
||||||
%mask = sext <8 x i1> %c to <8 x i32>
|
%mask = sext <8 x i1> %c to <8 x i32>
|
||||||
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
||||||
ret <8 x i32> %v
|
ret <8 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp ult i32 %0, %1
|
%c = icmp ult i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp ugt <8 x i32> %0, %1
|
%c = icmp ugt <8 x i32> %0, %1
|
||||||
%mask = sext <8 x i1> %c to <8 x i32>
|
%mask = sext <8 x i1> %c to <8 x i32>
|
||||||
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
%v = call <8 x i32> @__vselect_i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %mask)
|
||||||
ret <8 x i32> %v
|
ret <8 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp ugt i32 %0, %1
|
%c = icmp ugt i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
@@ -283,7 +283,7 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
|
|||||||
|
|
||||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
; first do two 4-wide movmsk calls
|
; first do two 4-wide movmsk calls
|
||||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||||
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
||||||
@@ -300,118 +300,118 @@ define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
|||||||
ret i32 %v
|
ret i32 %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__vec4_add_float(<4 x float> %v0,
|
define <4 x float> @__vec4_add_float(<4 x float> %v0,
|
||||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||||
%v = fadd <4 x float> %v0, %v1
|
%v = fadd <4 x float> %v0, %v1
|
||||||
ret <4 x float> %v
|
ret <4 x float> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__add_float(float, float) nounwind readnone alwaysinline {
|
define float @__add_float(float, float) nounwind readnone alwaysinline {
|
||||||
%v = fadd float %0, %1
|
%v = fadd float %0, %1
|
||||||
ret float %v
|
ret float %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_add_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_add_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(float, @__vec4_add_float, @__add_float)
|
reduce8by4(float, @__vec4_add_float, @__add_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8(float, @__min_varying_float, @__min_uniform_float)
|
reduce8(float, @__min_varying_float, @__min_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8(float, @__max_varying_float, @__max_uniform_float)
|
reduce8(float, @__max_varying_float, @__max_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
; helper function for reduce_add_int32
|
; helper function for reduce_add_int32
|
||||||
define internal <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||||
%v = add <4 x i32> %v0, %v1
|
%v = add <4 x i32> %v0, %v1
|
||||||
ret <4 x i32> %v
|
ret <4 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
; helper function for reduce_add_int32
|
; helper function for reduce_add_int32
|
||||||
define internal i32 @__add_int32(i32, i32) nounwind readnone alwaysinline {
|
define i32 @__add_int32(i32, i32) nounwind readnone alwaysinline {
|
||||||
%v = add i32 %0, %1
|
%v = add i32 %0, %1
|
||||||
ret i32 %v
|
ret i32 %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(i32, @__vec4_add_int32, @__add_int32)
|
reduce8by4(i32, @__vec4_add_int32, @__add_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__min_varying_int32, @__min_uniform_int32)
|
reduce8(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__max_varying_int32, @__max_uniform_int32)
|
reduce8(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
||||||
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
reduce8(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__add_varying_double(<4 x double>,
|
define <4 x double> @__add_varying_double(<4 x double>,
|
||||||
<4 x double>) nounwind readnone alwaysinline {
|
<4 x double>) nounwind readnone alwaysinline {
|
||||||
%r = fadd <4 x double> %0, %1
|
%r = fadd <4 x double> %0, %1
|
||||||
ret <4 x double> %r
|
ret <4 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__add_uniform_double(double, double) nounwind readnone alwaysinline {
|
define double @__add_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||||
%r = fadd double %0, %1
|
%r = fadd double %0, %1
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_add_double(<8 x double>) nounwind readnone {
|
define double @__reduce_add_double(<8 x double>) nounwind readnone {
|
||||||
reduce8by4(double, @__add_varying_double, @__add_uniform_double)
|
reduce8by4(double, @__add_varying_double, @__add_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_min_double(<8 x double>) nounwind readnone {
|
define double @__reduce_min_double(<8 x double>) nounwind readnone {
|
||||||
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_max_double(<8 x double>) nounwind readnone {
|
define double @__reduce_max_double(<8 x double>) nounwind readnone {
|
||||||
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x i64> @__add_varying_int64(<4 x i64>,
|
define <4 x i64> @__add_varying_int64(<4 x i64>,
|
||||||
<4 x i64>) nounwind readnone alwaysinline {
|
<4 x i64>) nounwind readnone alwaysinline {
|
||||||
%r = add <4 x i64> %0, %1
|
%r = add <4 x i64> %0, %1
|
||||||
ret <4 x i64> %r
|
ret <4 x i64> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
define i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||||
%r = add i64 %0, %1
|
%r = add i64 %0, %1
|
||||||
ret i64 %r
|
ret i64 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_add_int64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_add_int64(<8 x i64>) nounwind readnone {
|
||||||
reduce8by4(i64, @__add_varying_int64, @__add_uniform_int64)
|
reduce8by4(i64, @__add_varying_int64, @__add_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_int64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_min_int64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_int64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_max_int64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -468,7 +468,7 @@ gen_scatter(8, i64)
|
|||||||
; return x;
|
; return x;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
%float_to_int_bitcast.i.i.i.i = bitcast <8 x float> %0 to <8 x i32>
|
%float_to_int_bitcast.i.i.i.i = bitcast <8 x float> %0 to <8 x i32>
|
||||||
%bitop.i.i = and <8 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
%bitop.i.i = and <8 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
||||||
%bitop.i = xor <8 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
%bitop.i = xor <8 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
||||||
@@ -494,7 +494,7 @@ define internal <8 x float> @__round_varying_float(<8 x float>) nounwind readonl
|
|||||||
; return y + delta;
|
; return y + delta;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
%calltmp.i = tail call <8 x float> @__round_varying_float(<8 x float> %0) nounwind
|
%calltmp.i = tail call <8 x float> @__round_varying_float(<8 x float> %0) nounwind
|
||||||
%bincmp.i = fcmp ogt <8 x float> %calltmp.i, %0
|
%bincmp.i = fcmp ogt <8 x float> %calltmp.i, %0
|
||||||
%val_to_boolvec32.i = sext <8 x i1> %bincmp.i to <8 x i32>
|
%val_to_boolvec32.i = sext <8 x i1> %bincmp.i to <8 x i32>
|
||||||
@@ -516,7 +516,7 @@ define internal <8 x float> @__floor_varying_float(<8 x float>) nounwind readonl
|
|||||||
; return y + delta;
|
; return y + delta;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
%calltmp.i = tail call <8 x float> @__round_varying_float(<8 x float> %0) nounwind
|
%calltmp.i = tail call <8 x float> @__round_varying_float(<8 x float> %0) nounwind
|
||||||
%bincmp.i = fcmp olt <8 x float> %calltmp.i, %0
|
%bincmp.i = fcmp olt <8 x float> %calltmp.i, %0
|
||||||
%val_to_boolvec32.i = sext <8 x i1> %bincmp.i to <8 x i32>
|
%val_to_boolvec32.i = sext <8 x i1> %bincmp.i to <8 x i32>
|
||||||
@@ -530,15 +530,15 @@ define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rounding doubles
|
;; rounding doubles
|
||||||
|
|
||||||
define internal <8 x double> @__round_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__round_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
unary1to8(double, @round)
|
unary1to8(double, @round)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__floor_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__floor_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
unary1to8(double, @floor)
|
unary1to8(double, @floor)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__ceil_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__ceil_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
unary1to8(double, @ceil)
|
unary1to8(double, @ceil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -609,7 +609,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
define <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
||||||
unary2to8(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
unary2to8(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -620,12 +620,12 @@ define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alway
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
define <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||||
binary2to8(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
binary2to8(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||||
binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
|
|||||||
102
builtins-sse2.ll
102
builtins-sse2.ll
@@ -62,7 +62,7 @@ include(`builtins-sse2-common.ll')
|
|||||||
; return x;
|
; return x;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%float_to_int_bitcast.i.i.i.i = bitcast <4 x float> %0 to <4 x i32>
|
%float_to_int_bitcast.i.i.i.i = bitcast <4 x float> %0 to <4 x i32>
|
||||||
%bitop.i.i = and <4 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
%bitop.i.i = and <4 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
||||||
%bitop.i = xor <4 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
%bitop.i = xor <4 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
||||||
@@ -88,7 +88,7 @@ define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonl
|
|||||||
; return y + delta;
|
; return y + delta;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
|
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
|
||||||
%bincmp.i = fcmp ogt <4 x float> %calltmp.i, %0
|
%bincmp.i = fcmp ogt <4 x float> %calltmp.i, %0
|
||||||
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
|
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
|
||||||
@@ -110,7 +110,7 @@ define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonl
|
|||||||
; return y + delta;
|
; return y + delta;
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
|
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
|
||||||
%bincmp.i = fcmp olt <4 x float> %calltmp.i, %0
|
%bincmp.i = fcmp olt <4 x float> %calltmp.i, %0
|
||||||
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
|
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
|
||||||
@@ -123,15 +123,15 @@ define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rounding doubles
|
;; rounding doubles
|
||||||
|
|
||||||
define internal <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
define <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
||||||
unary1to4(double, @round)
|
unary1to4(double, @round)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
define <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
||||||
unary1to4(double, @floor)
|
unary1to4(double, @floor)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
define <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
||||||
unary1to4(double, @ceil)
|
unary1to4(double, @ceil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind reado
|
|||||||
; vector element, if the mask is on, we return the corresponding value
|
; vector element, if the mask is on, we return the corresponding value
|
||||||
; from %1, and otherwise return the value from %0.
|
; from %1, and otherwise return the value from %0.
|
||||||
|
|
||||||
define internal <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||||
%cleared_old = and <4 x i32> %0, %notmask
|
%cleared_old = and <4 x i32> %0, %notmask
|
||||||
@@ -152,7 +152,7 @@ define internal <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
|||||||
ret <4 x i32> %new
|
ret <4 x i32> %new
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
define <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
||||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||||
%v0 = bitcast <4 x float> %0 to <4 x i32>
|
%v0 = bitcast <4 x float> %0 to <4 x i32>
|
||||||
%v1 = bitcast <4 x float> %1 to <4 x i32>
|
%v1 = bitcast <4 x float> %1 to <4 x i32>
|
||||||
@@ -166,27 +166,27 @@ define internal <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
|||||||
; extend the i1 vector result to an i32 mask. The __vselect does the
|
; extend the i1 vector result to an i32 mask. The __vselect does the
|
||||||
; rest...
|
; rest...
|
||||||
|
|
||||||
define internal <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp slt <4 x i32> %0, %1
|
%c = icmp slt <4 x i32> %0, %1
|
||||||
%mask = sext <4 x i1> %c to <4 x i32>
|
%mask = sext <4 x i1> %c to <4 x i32>
|
||||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||||
ret <4 x i32> %v
|
ret <4 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp slt i32 %0, %1
|
%c = icmp slt i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp sgt <4 x i32> %0, %1
|
%c = icmp sgt <4 x i32> %0, %1
|
||||||
%mask = sext <4 x i1> %c to <4 x i32>
|
%mask = sext <4 x i1> %c to <4 x i32>
|
||||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||||
ret <4 x i32> %v
|
ret <4 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp sgt i32 %0, %1
|
%c = icmp sgt i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
@@ -195,27 +195,27 @@ define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinlin
|
|||||||
; The functions for unsigned ints are similar, just with unsigned
|
; The functions for unsigned ints are similar, just with unsigned
|
||||||
; comparison functions...
|
; comparison functions...
|
||||||
|
|
||||||
define internal <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp ult <4 x i32> %0, %1
|
%c = icmp ult <4 x i32> %0, %1
|
||||||
%mask = sext <4 x i1> %c to <4 x i32>
|
%mask = sext <4 x i1> %c to <4 x i32>
|
||||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||||
ret <4 x i32> %v
|
ret <4 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp ult i32 %0, %1
|
%c = icmp ult i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%c = icmp ugt <4 x i32> %0, %1
|
%c = icmp ugt <4 x i32> %0, %1
|
||||||
%mask = sext <4 x i1> %c to <4 x i32>
|
%mask = sext <4 x i1> %c to <4 x i32>
|
||||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||||
ret <4 x i32> %v
|
ret <4 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
%c = icmp ugt i32 %0, %1
|
%c = icmp ugt i32 %0, %1
|
||||||
%r = select i1 %c, i32 %0, i32 %1
|
%r = select i1 %c, i32 %0, i32 %1
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
@@ -227,13 +227,13 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
|
|||||||
|
|
||||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||||
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
||||||
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
||||||
ret i32 %v
|
ret i32 %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
|
define float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
|
||||||
%v1 = shufflevector <4 x float> %v, <4 x float> undef,
|
%v1 = shufflevector <4 x float> %v, <4 x float> undef,
|
||||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||||
%m1 = fadd <4 x float> %v1, %v
|
%m1 = fadd <4 x float> %v1, %v
|
||||||
@@ -243,15 +243,15 @@ define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwa
|
|||||||
ret float %sum
|
ret float %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_min_float(<4 x float>) nounwind readnone {
|
define float @__reduce_min_float(<4 x float>) nounwind readnone {
|
||||||
reduce4(float, @__min_varying_float, @__min_uniform_float)
|
reduce4(float, @__min_varying_float, @__min_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_max_float(<4 x float>) nounwind readnone {
|
define float @__reduce_max_float(<4 x float>) nounwind readnone {
|
||||||
reduce4(float, @__max_varying_float, @__max_uniform_float)
|
reduce4(float, @__max_varying_float, @__max_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
define i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
||||||
%v1 = shufflevector <4 x i32> %v, <4 x i32> undef,
|
%v1 = shufflevector <4 x i32> %v, <4 x i32> undef,
|
||||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||||
%m1 = add <4 x i32> %v1, %v
|
%m1 = add <4 x i32> %v1, %v
|
||||||
@@ -261,29 +261,29 @@ define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
|||||||
ret i32 %sum
|
ret i32 %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__min_varying_int32, @__min_uniform_int32)
|
reduce4(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__max_varying_int32, @__max_uniform_int32)
|
reduce4(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
|
define i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
|
||||||
%r = call i32 @__reduce_add_int32(<4 x i32> %v)
|
%r = call i32 @__reduce_add_int32(<4 x i32> %v)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal double @__reduce_add_double(<4 x double>) nounwind readnone {
|
define double @__reduce_add_double(<4 x double>) nounwind readnone {
|
||||||
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
|
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
|
||||||
<2 x i32> <i32 0, i32 1>
|
<2 x i32> <i32 0, i32 1>
|
||||||
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
|
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
|
||||||
@@ -295,15 +295,15 @@ define internal double @__reduce_add_double(<4 x double>) nounwind readnone {
|
|||||||
ret double %m
|
ret double %m
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_min_double(<4 x double>) nounwind readnone {
|
define double @__reduce_min_double(<4 x double>) nounwind readnone {
|
||||||
reduce4(double, @__min_varying_double, @__min_uniform_double)
|
reduce4(double, @__min_varying_double, @__min_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_max_double(<4 x double>) nounwind readnone {
|
define double @__reduce_max_double(<4 x double>) nounwind readnone {
|
||||||
reduce4(double, @__max_varying_double, @__max_uniform_double)
|
reduce4(double, @__max_varying_double, @__max_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
||||||
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
||||||
<2 x i32> <i32 0, i32 1>
|
<2 x i32> <i32 0, i32 1>
|
||||||
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
||||||
@@ -315,19 +315,19 @@ define internal i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
|||||||
ret i64 %m
|
ret i64 %m
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
|
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
|
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -393,7 +393,7 @@ define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
|
%call = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
|
||||||
; do one N-R iteration to improve precision
|
; do one N-R iteration to improve precision
|
||||||
; float iv = __rcp_v(v);
|
; float iv = __rcp_v(v);
|
||||||
@@ -409,7 +409,7 @@ define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
|
define <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
|
||||||
; float is = __rsqrt_v(v);
|
; float is = __rsqrt_v(v);
|
||||||
%is = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %v)
|
%is = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %v)
|
||||||
; Newton-Raphson iteration to improve precision
|
; Newton-Raphson iteration to improve precision
|
||||||
@@ -427,7 +427,7 @@ define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind read
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %0)
|
%call = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %0)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
@@ -446,48 +446,48 @@ declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
|||||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define internal <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
||||||
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
||||||
store <4 x float> %s, <4 x float> * %1
|
store <4 x float> %s, <4 x float> * %1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
@@ -498,12 +498,12 @@ define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readn
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %0, <4 x float> %1)
|
%call = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %0, <4 x float> %1)
|
%call = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
@@ -513,7 +513,7 @@ define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounw
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
|
define <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
|
||||||
unary2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
unary2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||||
ret <4 x double> %ret
|
ret <4 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -524,12 +524,12 @@ define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alway
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
define <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
||||||
binary2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
binary2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||||
ret <4 x double> %ret
|
ret <4 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
||||||
binary2to4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
binary2to4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||||
ret <4 x double> %ret
|
ret <4 x double> %ret
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||||
; the roundss intrinsic is a total mess--docs say:
|
; the roundss intrinsic is a total mess--docs say:
|
||||||
;
|
;
|
||||||
@@ -58,7 +58,7 @@ define internal float @__round_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
ret float %rs
|
ret float %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1010 = 9
|
; roundps, round down 0b01 | don't signal precision exceptions 0b1010 = 9
|
||||||
@@ -67,7 +67,7 @@ define internal float @__floor_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
ret float %rs
|
ret float %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
@@ -81,14 +81,14 @@ define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinlin
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
%xi = insertelement <2 x double> undef, double %0, i32 0
|
%xi = insertelement <2 x double> undef, double %0, i32 0
|
||||||
%xr = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %xi, <2 x double> %xi, i32 8)
|
%xr = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %xi, <2 x double> %xi, i32 8)
|
||||||
%rs = extractelement <2 x double> %xr, i32 0
|
%rs = extractelement <2 x double> %xr, i32 0
|
||||||
ret double %rs
|
ret double %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <2 x double> undef, double %0, i32 0
|
%xi = insertelement <2 x double> undef, double %0, i32 0
|
||||||
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
@@ -97,7 +97,7 @@ define internal double @__floor_uniform_double(double) nounwind readonly alwaysi
|
|||||||
ret double %rs
|
ret double %rs
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
define double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
||||||
; see above for round_ss instrinsic discussion...
|
; see above for round_ss instrinsic discussion...
|
||||||
%xi = insertelement <2 x double> undef, double %0, i32 0
|
%xi = insertelement <2 x double> undef, double %0, i32 0
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
@@ -111,7 +111,7 @@ define internal double @__ceil_uniform_double(double) nounwind readonly alwaysin
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; do the rcpss call
|
; do the rcpss call
|
||||||
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
||||||
%call = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecval)
|
%call = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecval)
|
||||||
@@ -130,7 +130,7 @@ define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
; uniform float is = extract(__rsqrt_u(v), 0);
|
; uniform float is = extract(__rsqrt_u(v), 0);
|
||||||
%v = insertelement <4 x float> undef, float %0, i32 0
|
%v = insertelement <4 x float> undef, float %0, i32 0
|
||||||
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
||||||
@@ -152,7 +152,7 @@ define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinli
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||||
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
@@ -163,7 +163,7 @@ define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinlin
|
|||||||
declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
|
declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
|
||||||
declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
|
declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
|
||||||
|
|
||||||
define internal void @__fastmath() nounwind alwaysinline {
|
define void @__fastmath() nounwind alwaysinline {
|
||||||
%ptr = alloca i32
|
%ptr = alloca i32
|
||||||
%ptr8 = bitcast i32 * %ptr to i8 *
|
%ptr8 = bitcast i32 * %ptr to i8 *
|
||||||
call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
|
call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
|
||||||
@@ -182,12 +182,12 @@ define internal void @__fastmath() nounwind alwaysinline {
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
@@ -197,7 +197,7 @@ define internal float @__min_uniform_float(float, float) nounwind readonly alway
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
define double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
||||||
sse_unary_scalar(ret, 2, double, @llvm.x86.sse2.sqrt.sd, %0)
|
sse_unary_scalar(ret, 2, double, @llvm.x86.sse2.sqrt.sd, %0)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
@@ -209,13 +209,13 @@ define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal double @__min_uniform_double(double, double) nounwind readnone {
|
define double @__min_uniform_double(double, double) nounwind readnone {
|
||||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
|
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal double @__max_uniform_double(double, double) nounwind readnone {
|
define double @__max_uniform_double(double, double) nounwind readnone {
|
||||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
|
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
|
||||||
ret double %ret
|
ret double %ret
|
||||||
}
|
}
|
||||||
@@ -226,12 +226,12 @@ define internal double @__max_uniform_double(double, double) nounwind readnone {
|
|||||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
@@ -242,12 +242,12 @@ define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinlin
|
|||||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||||
ret i32 %ret
|
ret i32 %ret
|
||||||
}
|
}
|
||||||
@@ -258,14 +258,14 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
|
|||||||
|
|
||||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||||
ret i32 %call
|
ret i32 %call
|
||||||
}
|
}
|
||||||
|
|
||||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||||
|
|
||||||
define internal i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||||
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
||||||
ret i64 %call
|
ret i64 %call
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ include(`builtins-sse4-common.ll')
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; float iv = __rcp_v(v);
|
; float iv = __rcp_v(v);
|
||||||
; return iv * (2. - v * iv);
|
; return iv * (2. - v * iv);
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
define <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
||||||
; float is = __rsqrt_v(v);
|
; float is = __rsqrt_v(v);
|
||||||
unary4to8(is, float, @llvm.x86.sse.rsqrt.ps, %v)
|
unary4to8(is, float, @llvm.x86.sse.rsqrt.ps, %v)
|
||||||
; return 0.5 * is * (3. - (v * is) * is);
|
; return 0.5 * is * (3. - (v * is) * is);
|
||||||
@@ -85,7 +85,7 @@ define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind read
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
unary4to8(call, float, @llvm.x86.sse.sqrt.ps, %0)
|
unary4to8(call, float, @llvm.x86.sse.sqrt.ps, %0)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
@@ -104,17 +104,17 @@ declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
|||||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define internal <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_sinf4, %0)
|
unary4to8(ret, float, @__svml_sinf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_cosf4, %0)
|
unary4to8(ret, float, @__svml_cosf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal void @__svml_sincos(<8 x float>, <8 x float> *,
|
define void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||||
<8 x float> *) nounwind readnone alwaysinline {
|
<8 x float> *) nounwind readnone alwaysinline {
|
||||||
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
||||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||||
@@ -143,33 +143,33 @@ define internal void @__svml_sincos(<8 x float>, <8 x float> *,
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_tanf4, %0)
|
unary4to8(ret, float, @__svml_tanf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_atanf4, %0)
|
unary4to8(ret, float, @__svml_atanf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_atan2(<8 x float>,
|
define <8 x float> @__svml_atan2(<8 x float>,
|
||||||
<8 x float>) nounwind readnone alwaysinline {
|
<8 x float>) nounwind readnone alwaysinline {
|
||||||
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_expf4, %0)
|
unary4to8(ret, float, @__svml_expf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
define <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
||||||
unary4to8(ret, float, @__svml_logf4, %0)
|
unary4to8(ret, float, @__svml_logf4, %0)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__svml_pow(<8 x float>,
|
define <8 x float> @__svml_pow(<8 x float>,
|
||||||
<8 x float>) nounwind readnone alwaysinline {
|
<8 x float>) nounwind readnone alwaysinline {
|
||||||
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
||||||
ret <8 x float> %ret
|
ret <8 x float> %ret
|
||||||
@@ -182,12 +182,12 @@ define internal <8 x float> @__svml_pow(<8 x float>,
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__max_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__max_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, float, @llvm.x86.sse.max.ps, %0, %1)
|
binary4to8(call, float, @llvm.x86.sse.max.ps, %0, %1)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, float, @llvm.x86.sse.min.ps, %0, %1)
|
binary4to8(call, float, @llvm.x86.sse.min.ps, %0, %1)
|
||||||
ret <8 x float> %call
|
ret <8 x float> %call
|
||||||
}
|
}
|
||||||
@@ -195,12 +195,12 @@ define internal <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounw
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; int32 min/max
|
;; int32 min/max
|
||||||
|
|
||||||
define internal <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
binary4to8(call, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||||
ret <8 x i32> %call
|
ret <8 x i32> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
binary4to8(call, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||||
ret <8 x i32> %call
|
ret <8 x i32> %call
|
||||||
}
|
}
|
||||||
@@ -208,13 +208,13 @@ define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind re
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; unsigned int min/max
|
; unsigned int min/max
|
||||||
|
|
||||||
define internal <8 x i32> @__min_varying_uint32(<8 x i32>,
|
define <8 x i32> @__min_varying_uint32(<8 x i32>,
|
||||||
<8 x i32>) nounwind readonly alwaysinline {
|
<8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, i32, @llvm.x86.sse41.pminud, %0, %1)
|
binary4to8(call, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||||
ret <8 x i32> %call
|
ret <8 x i32> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x i32> @__max_varying_uint32(<8 x i32>,
|
define <8 x i32> @__max_varying_uint32(<8 x i32>,
|
||||||
<8 x i32>) nounwind readonly alwaysinline {
|
<8 x i32>) nounwind readonly alwaysinline {
|
||||||
binary4to8(call, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
binary4to8(call, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||||
ret <8 x i32> %call
|
ret <8 x i32> %call
|
||||||
@@ -225,7 +225,7 @@ define internal <8 x i32> @__max_varying_uint32(<8 x i32>,
|
|||||||
|
|
||||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
; first do two 4-wide movmsk calls
|
; first do two 4-wide movmsk calls
|
||||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||||
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
||||||
@@ -242,103 +242,103 @@ define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
|||||||
ret i32 %v
|
ret i32 %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(float, @llvm.x86.sse.min.ps, @__min_uniform_float)
|
reduce8by4(float, @llvm.x86.sse.min.ps, @__min_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(float, @llvm.x86.sse.max.ps, @__max_uniform_float)
|
reduce8by4(float, @llvm.x86.sse.max.ps, @__max_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
; helper function for reduce_add_int32
|
; helper function for reduce_add_int32
|
||||||
define internal <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||||
%v = add <4 x i32> %v0, %v1
|
%v = add <4 x i32> %v0, %v1
|
||||||
ret <4 x i32> %v
|
ret <4 x i32> %v
|
||||||
}
|
}
|
||||||
|
|
||||||
; helper function for reduce_add_int32
|
; helper function for reduce_add_int32
|
||||||
define internal i32 @__add_int32(i32, i32) nounwind readnone alwaysinline {
|
define i32 @__add_int32(i32, i32) nounwind readnone alwaysinline {
|
||||||
%v = add i32 %0, %1
|
%v = add i32 %0, %1
|
||||||
ret i32 %v
|
ret i32 %v
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(i32, @__vec4_add_int32, @__add_int32)
|
reduce8by4(i32, @__vec4_add_int32, @__add_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(i32, @llvm.x86.sse41.pminsd, @__min_uniform_int32)
|
reduce8by4(i32, @llvm.x86.sse41.pminsd, @__min_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(i32, @llvm.x86.sse41.pmaxsd, @__max_uniform_int32)
|
reduce8by4(i32, @llvm.x86.sse41.pmaxsd, @__max_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
define i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
||||||
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(i32, @llvm.x86.sse41.pminud, @__min_uniform_uint32)
|
reduce8by4(i32, @llvm.x86.sse41.pminud, @__min_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
define i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||||
reduce8by4(i32, @llvm.x86.sse41.pmaxud, @__max_uniform_uint32)
|
reduce8by4(i32, @llvm.x86.sse41.pmaxud, @__max_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__add_varying_double(<4 x double>,
|
define <4 x double> @__add_varying_double(<4 x double>,
|
||||||
<4 x double>) nounwind readnone alwaysinline {
|
<4 x double>) nounwind readnone alwaysinline {
|
||||||
%r = fadd <4 x double> %0, %1
|
%r = fadd <4 x double> %0, %1
|
||||||
ret <4 x double> %r
|
ret <4 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__add_uniform_double(double, double) nounwind readnone alwaysinline {
|
define double @__add_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||||
%r = fadd double %0, %1
|
%r = fadd double %0, %1
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_add_double(<8 x double>) nounwind readnone {
|
define double @__reduce_add_double(<8 x double>) nounwind readnone {
|
||||||
reduce8by4(double, @__add_varying_double, @__add_uniform_double)
|
reduce8by4(double, @__add_varying_double, @__add_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_min_double(<8 x double>) nounwind readnone {
|
define double @__reduce_min_double(<8 x double>) nounwind readnone {
|
||||||
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
reduce8(double, @__min_varying_double, @__min_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_max_double(<8 x double>) nounwind readnone {
|
define double @__reduce_max_double(<8 x double>) nounwind readnone {
|
||||||
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
reduce8(double, @__max_varying_double, @__max_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x i64> @__add_varying_int64(<4 x i64>,
|
define <4 x i64> @__add_varying_int64(<4 x i64>,
|
||||||
<4 x i64>) nounwind readnone alwaysinline {
|
<4 x i64>) nounwind readnone alwaysinline {
|
||||||
%r = add <4 x i64> %0, %1
|
%r = add <4 x i64> %0, %1
|
||||||
ret <4 x i64> %r
|
ret <4 x i64> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
define i64 @__add_uniform_int64(i64, i64) nounwind readnone alwaysinline {
|
||||||
%r = add i64 %0, %1
|
%r = add i64 %0, %1
|
||||||
ret i64 %r
|
ret i64 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_add_int64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_add_int64(<8 x i64>) nounwind readnone {
|
||||||
reduce8by4(i64, @__add_varying_int64, @__add_uniform_int64)
|
reduce8by4(i64, @__add_varying_int64, @__add_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_int64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_min_int64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
reduce8(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_int64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_max_int64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
reduce8(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_min_uint64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
reduce8(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
define i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
||||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -375,17 +375,17 @@ gen_scatter(8, i64)
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||||
round4to8(%0, 8)
|
round4to8(%0, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
round4to8(%0, 9)
|
round4to8(%0, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
define <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
round4to8(%0, 10)
|
round4to8(%0, 10)
|
||||||
}
|
}
|
||||||
@@ -395,16 +395,16 @@ define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__round_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__round_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
round2to8double(%0, 8)
|
round2to8double(%0, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__floor_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__floor_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
round2to8double(%0, 9)
|
round2to8double(%0, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__ceil_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
define <8 x double> @__ceil_varying_double(<8 x double>) nounwind readonly alwaysinline {
|
||||||
; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
round2to8double(%0, 10)
|
round2to8double(%0, 10)
|
||||||
}
|
}
|
||||||
@@ -414,7 +414,7 @@ define internal <8 x double> @__ceil_varying_double(<8 x double>) nounwind reado
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysinline {
|
define float @__reduce_add_float(<8 x float>) nounwind readonly alwaysinline {
|
||||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
%b = shufflevector <8 x float> %0, <8 x float> undef,
|
%b = shufflevector <8 x float> %0, <8 x float> undef,
|
||||||
@@ -544,7 +544,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
define <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
||||||
unary2to8(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
unary2to8(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -555,12 +555,12 @@ define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alway
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
define <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||||
binary2to8(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
binary2to8(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||||
binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||||
ret <8 x double> %ret
|
ret <8 x double> %ret
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ include(`builtins-sse4-common.ll')
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
|
%call = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
|
||||||
; do one N-R iteration to improve precision
|
; do one N-R iteration to improve precision
|
||||||
; float iv = __rcp_v(v);
|
; float iv = __rcp_v(v);
|
||||||
@@ -61,7 +61,7 @@ define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
|
define <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
|
||||||
; float is = __rsqrt_v(v);
|
; float is = __rsqrt_v(v);
|
||||||
%is = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %v)
|
%is = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %v)
|
||||||
; Newton-Raphson iteration to improve precision
|
; Newton-Raphson iteration to improve precision
|
||||||
@@ -79,7 +79,7 @@ define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind read
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %0)
|
%call = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %0)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
@@ -89,7 +89,7 @@ define internal <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
|
define <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
|
||||||
unary2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
unary2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||||
ret <4 x double> %ret
|
ret <4 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -99,19 +99,19 @@ define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alway
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||||
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 8)
|
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 8)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundps, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 9)
|
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 9)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundps, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 10)
|
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 10)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
@@ -122,16 +122,16 @@ define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly
|
|||||||
|
|
||||||
declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
define <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
||||||
round2to4double(%0, 8)
|
round2to4double(%0, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
define <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
||||||
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
|
||||||
round2to4double(%0, 9)
|
round2to4double(%0, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
define <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
|
||||||
; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
|
||||||
round2to4double(%0, 10)
|
round2to4double(%0, 10)
|
||||||
}
|
}
|
||||||
@@ -142,12 +142,12 @@ define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind reado
|
|||||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %0, <4 x float> %1)
|
%call = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
define <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %0, <4 x float> %1)
|
%call = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %call
|
ret <4 x float> %call
|
||||||
}
|
}
|
||||||
@@ -155,12 +155,12 @@ define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounw
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; int32 min/max
|
;; int32 min/max
|
||||||
|
|
||||||
define internal <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
|
%call = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
|
||||||
ret <4 x i32> %call
|
ret <4 x i32> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
|
%call = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
|
||||||
ret <4 x i32> %call
|
ret <4 x i32> %call
|
||||||
}
|
}
|
||||||
@@ -168,12 +168,12 @@ define internal <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind re
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; unsigned int min/max
|
; unsigned int min/max
|
||||||
|
|
||||||
define internal <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
|
%call = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
|
||||||
ret <4 x i32> %call
|
ret <4 x i32> %call
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||||
%call = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
|
%call = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
|
||||||
ret <4 x i32> %call
|
ret <4 x i32> %call
|
||||||
}
|
}
|
||||||
@@ -184,12 +184,12 @@ define internal <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind r
|
|||||||
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||||
|
|
||||||
define internal <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
define <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
||||||
binary2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
binary2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||||
ret <4 x double> %ret
|
ret <4 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
||||||
binary2to4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
binary2to4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||||
ret <4 x double> %ret
|
ret <4 x double> %ret
|
||||||
}
|
}
|
||||||
@@ -208,48 +208,48 @@ declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
|||||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define internal <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
||||||
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
||||||
store <4 x float> %s, <4 x float> * %1
|
store <4 x float> %s, <4 x float> * %1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||||
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
||||||
ret <4 x float> %ret
|
ret <4 x float> %ret
|
||||||
}
|
}
|
||||||
@@ -259,7 +259,7 @@ define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readn
|
|||||||
|
|
||||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||||
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
||||||
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
||||||
ret i32 %v
|
ret i32 %v
|
||||||
@@ -267,22 +267,22 @@ define internal i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
|||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
|
||||||
define internal float @__reduce_add_float(<4 x float>) nounwind readonly alwaysinline {
|
define float @__reduce_add_float(<4 x float>) nounwind readonly alwaysinline {
|
||||||
%v1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %0)
|
%v1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %0)
|
||||||
%v2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %v1, <4 x float> %v1)
|
%v2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %v1, <4 x float> %v1)
|
||||||
%scalar = extractelement <4 x float> %v2, i32 0
|
%scalar = extractelement <4 x float> %v2, i32 0
|
||||||
ret float %scalar
|
ret float %scalar
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_min_float(<4 x float>) nounwind readnone {
|
define float @__reduce_min_float(<4 x float>) nounwind readnone {
|
||||||
reduce4(float, @__min_varying_float, @__min_uniform_float)
|
reduce4(float, @__min_varying_float, @__min_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__reduce_max_float(<4 x float>) nounwind readnone {
|
define float @__reduce_max_float(<4 x float>) nounwind readnone {
|
||||||
reduce4(float, @__max_varying_float, @__max_uniform_float)
|
reduce4(float, @__max_varying_float, @__max_uniform_float)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
define i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
||||||
%v1 = shufflevector <4 x i32> %v, <4 x i32> undef,
|
%v1 = shufflevector <4 x i32> %v, <4 x i32> undef,
|
||||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||||
%m1 = add <4 x i32> %v1, %v
|
%m1 = add <4 x i32> %v1, %v
|
||||||
@@ -292,29 +292,29 @@ define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
|||||||
ret i32 %sum
|
ret i32 %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__min_varying_int32, @__min_uniform_int32)
|
reduce4(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__max_varying_int32, @__max_uniform_int32)
|
reduce4(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
|
define i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
|
||||||
%r = call i32 @__reduce_add_int32(<4 x i32> %v)
|
%r = call i32 @__reduce_add_int32(<4 x i32> %v)
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
|
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
|
||||||
reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal double @__reduce_add_double(<4 x double>) nounwind readnone {
|
define double @__reduce_add_double(<4 x double>) nounwind readnone {
|
||||||
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
|
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
|
||||||
<2 x i32> <i32 0, i32 1>
|
<2 x i32> <i32 0, i32 1>
|
||||||
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
|
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
|
||||||
@@ -326,15 +326,15 @@ define internal double @__reduce_add_double(<4 x double>) nounwind readnone {
|
|||||||
ret double %m
|
ret double %m
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_min_double(<4 x double>) nounwind readnone {
|
define double @__reduce_min_double(<4 x double>) nounwind readnone {
|
||||||
reduce4(double, @__min_varying_double, @__min_uniform_double)
|
reduce4(double, @__min_varying_double, @__min_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__reduce_max_double(<4 x double>) nounwind readnone {
|
define double @__reduce_max_double(<4 x double>) nounwind readnone {
|
||||||
reduce4(double, @__max_varying_double, @__max_uniform_double)
|
reduce4(double, @__max_varying_double, @__max_uniform_double)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
||||||
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
||||||
<2 x i32> <i32 0, i32 1>
|
<2 x i32> <i32 0, i32 1>
|
||||||
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
|
||||||
@@ -346,19 +346,19 @@ define internal i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
|
|||||||
ret i64 %m
|
ret i64 %m
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
|
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
|
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
||||||
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
318
builtins.cpp
318
builtins.cpp
@@ -210,6 +210,9 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
|||||||
if (name.size() < 3 || name[0] != '_' || name[1] != '_')
|
if (name.size() < 3 || name[0] != '_' || name[1] != '_')
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
Debug(SourcePos(), "Attempting to create ispc symbol for function \"%s\".",
|
||||||
|
name.c_str());
|
||||||
|
|
||||||
// An unfortunate hack: we want this builtin function to have the
|
// An unfortunate hack: we want this builtin function to have the
|
||||||
// signature "int __sext_varying_bool(bool)", but the ispc function
|
// signature "int __sext_varying_bool(bool)", but the ispc function
|
||||||
// symbol creation code below assumes that any LLVM vector of i32s is a
|
// symbol creation code below assumes that any LLVM vector of i32s is a
|
||||||
@@ -240,9 +243,11 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
|||||||
|
|
||||||
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
|
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
|
||||||
intAsUnsigned);
|
intAsUnsigned);
|
||||||
if (!returnType)
|
if (returnType == NULL) {
|
||||||
|
Debug(SourcePos(), "Failed: return type not representable.");
|
||||||
// return type not representable in ispc -> not callable from ispc
|
// return type not representable in ispc -> not callable from ispc
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Iterate over the arguments and try to find their equivalent ispc
|
// Iterate over the arguments and try to find their equivalent ispc
|
||||||
// types. Track if any of the arguments has an integer type.
|
// types. Track if any of the arguments has an integer type.
|
||||||
@@ -251,8 +256,10 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
|||||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
||||||
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
||||||
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
|
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
|
||||||
if (type == NULL)
|
if (type == NULL) {
|
||||||
|
Debug(SourcePos(), "Failed: type of parameter %d not representable", j);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
anyIntArgs |=
|
anyIntArgs |=
|
||||||
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
|
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
|
||||||
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
|
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
|
||||||
@@ -329,6 +336,312 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** We'd like to have all of these functions declared as 'internal' in
|
||||||
|
their respective bitcode files so that if they aren't needed by the
|
||||||
|
user's program they are elimiated from the final output. However, if
|
||||||
|
we do so, then they aren't brought in by the LinkModules() call below
|
||||||
|
since they aren't yet used by anything in the module they're being
|
||||||
|
linked with (in LLVM 3.1, at least).
|
||||||
|
|
||||||
|
Therefore, we don't declare them as internal when we first define them,
|
||||||
|
but instead mark them as internal after they've been linked in. This
|
||||||
|
is admittedly a kludge.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
lSetInternalFunctions(llvm::Module *module) {
|
||||||
|
const char *names[] = {
|
||||||
|
"__add_uniform_int32",
|
||||||
|
"__add_uniform_int64",
|
||||||
|
"__add_varying_int32",
|
||||||
|
"__add_varying_int64",
|
||||||
|
"__aos_to_soa3_float",
|
||||||
|
"__aos_to_soa3_float16",
|
||||||
|
"__aos_to_soa3_float4",
|
||||||
|
"__aos_to_soa3_float8",
|
||||||
|
"__aos_to_soa3_int32",
|
||||||
|
"__aos_to_soa4_float",
|
||||||
|
"__aos_to_soa4_float16",
|
||||||
|
"__aos_to_soa4_float4",
|
||||||
|
"__aos_to_soa4_float8",
|
||||||
|
"__aos_to_soa4_int32",
|
||||||
|
"__atomic_add_int32_global",
|
||||||
|
"__atomic_add_int64_global",
|
||||||
|
"__atomic_add_uniform_int32_global",
|
||||||
|
"__atomic_add_uniform_int64_global",
|
||||||
|
"__atomic_and_int32_global",
|
||||||
|
"__atomic_and_int64_global",
|
||||||
|
"__atomic_and_uniform_int32_global",
|
||||||
|
"__atomic_and_uniform_int64_global",
|
||||||
|
"__atomic_compare_exchange_double_global",
|
||||||
|
"__atomic_compare_exchange_float_global",
|
||||||
|
"__atomic_compare_exchange_int32_global",
|
||||||
|
"__atomic_compare_exchange_int64_global",
|
||||||
|
"__atomic_compare_exchange_uniform_double_global",
|
||||||
|
"__atomic_compare_exchange_uniform_float_global",
|
||||||
|
"__atomic_compare_exchange_uniform_int32_global",
|
||||||
|
"__atomic_compare_exchange_uniform_int64_global",
|
||||||
|
"__atomic_max_uniform_int32_global",
|
||||||
|
"__atomic_max_uniform_int64_global",
|
||||||
|
"__atomic_min_uniform_int32_global",
|
||||||
|
"__atomic_min_uniform_int64_global",
|
||||||
|
"__atomic_or_int32_global",
|
||||||
|
"__atomic_or_int64_global",
|
||||||
|
"__atomic_or_uniform_int32_global",
|
||||||
|
"__atomic_or_uniform_int64_global",
|
||||||
|
"__atomic_sub_int32_global",
|
||||||
|
"__atomic_sub_int64_global",
|
||||||
|
"__atomic_sub_uniform_int32_global",
|
||||||
|
"__atomic_sub_uniform_int64_global",
|
||||||
|
"__atomic_swap_double_global",
|
||||||
|
"__atomic_swap_float_global",
|
||||||
|
"__atomic_swap_int32_global",
|
||||||
|
"__atomic_swap_int64_global",
|
||||||
|
"__atomic_swap_uniform_double_global",
|
||||||
|
"__atomic_swap_uniform_float_global",
|
||||||
|
"__atomic_swap_uniform_int32_global",
|
||||||
|
"__atomic_swap_uniform_int64_global",
|
||||||
|
"__atomic_umax_uniform_uint32_global",
|
||||||
|
"__atomic_umax_uniform_uint64_global",
|
||||||
|
"__atomic_umin_uniform_uint32_global",
|
||||||
|
"__atomic_umin_uniform_uint64_global",
|
||||||
|
"__atomic_xor_int32_global",
|
||||||
|
"__atomic_xor_int64_global",
|
||||||
|
"__atomic_xor_uniform_int32_global",
|
||||||
|
"__atomic_xor_uniform_int64_global",
|
||||||
|
"__broadcast_double",
|
||||||
|
"__broadcast_float",
|
||||||
|
"__broadcast_int16",
|
||||||
|
"__broadcast_int32",
|
||||||
|
"__broadcast_int64",
|
||||||
|
"__broadcast_int8",
|
||||||
|
"__ceil_uniform_double",
|
||||||
|
"__ceil_uniform_float",
|
||||||
|
"__ceil_varying_double",
|
||||||
|
"__ceil_varying_float",
|
||||||
|
"__count_trailing_zeros",
|
||||||
|
"__do_assert_uniform",
|
||||||
|
"__do_assert_varying",
|
||||||
|
"__do_print",
|
||||||
|
"__doublebits_uniform_int64",
|
||||||
|
"__doublebits_varying_int64",
|
||||||
|
"__exclusive_scan_add_double",
|
||||||
|
"__exclusive_scan_add_float",
|
||||||
|
"__exclusive_scan_add_i32",
|
||||||
|
"__exclusive_scan_add_i64",
|
||||||
|
"__exclusive_scan_and_i32",
|
||||||
|
"__exclusive_scan_and_i64",
|
||||||
|
"__exclusive_scan_or_i32",
|
||||||
|
"__exclusive_scan_or_i64",
|
||||||
|
"__extract_int16",
|
||||||
|
"__extract_int32",
|
||||||
|
"__extract_int64",
|
||||||
|
"__extract_int8",
|
||||||
|
"__fastmath",
|
||||||
|
"__floatbits_uniform_int32",
|
||||||
|
"__floatbits_varying_int32",
|
||||||
|
"__floor_uniform_double",
|
||||||
|
"__floor_uniform_float",
|
||||||
|
"__floor_varying_double",
|
||||||
|
"__floor_varying_float",
|
||||||
|
"__insert_int16",
|
||||||
|
"__insert_int32",
|
||||||
|
"__insert_int64",
|
||||||
|
"__insert_int8",
|
||||||
|
"__intbits_uniform_double",
|
||||||
|
"__intbits_uniform_float",
|
||||||
|
"__intbits_varying_double",
|
||||||
|
"__intbits_varying_float",
|
||||||
|
"__max_uniform_double",
|
||||||
|
"__max_uniform_float",
|
||||||
|
"__max_uniform_int32",
|
||||||
|
"__max_uniform_int64",
|
||||||
|
"__max_uniform_uint32",
|
||||||
|
"__max_uniform_uint64",
|
||||||
|
"__max_varying_double",
|
||||||
|
"__max_varying_float",
|
||||||
|
"__max_varying_int32",
|
||||||
|
"__max_varying_int64",
|
||||||
|
"__max_varying_uint32",
|
||||||
|
"__max_varying_uint64",
|
||||||
|
"__memory_barrier",
|
||||||
|
"__min_uniform_double",
|
||||||
|
"__min_uniform_float",
|
||||||
|
"__min_uniform_int32",
|
||||||
|
"__min_uniform_int64",
|
||||||
|
"__min_uniform_uint32",
|
||||||
|
"__min_uniform_uint64",
|
||||||
|
"__min_varying_double",
|
||||||
|
"__min_varying_float",
|
||||||
|
"__min_varying_int32",
|
||||||
|
"__min_varying_int64",
|
||||||
|
"__min_varying_uint32",
|
||||||
|
"__min_varying_uint64",
|
||||||
|
"__movmsk",
|
||||||
|
"__num_cores",
|
||||||
|
"__packed_load_active",
|
||||||
|
"__packed_store_active",
|
||||||
|
"__popcnt_int32",
|
||||||
|
"__popcnt_int64",
|
||||||
|
"__prefetch_read_1_uniform_bool",
|
||||||
|
"__prefetch_read_1_uniform_double",
|
||||||
|
"__prefetch_read_1_uniform_float",
|
||||||
|
"__prefetch_read_1_uniform_int16",
|
||||||
|
"__prefetch_read_1_uniform_int32",
|
||||||
|
"__prefetch_read_1_uniform_int64",
|
||||||
|
"__prefetch_read_1_uniform_int8",
|
||||||
|
"__prefetch_read_1_varying_bool",
|
||||||
|
"__prefetch_read_1_varying_double",
|
||||||
|
"__prefetch_read_1_varying_float",
|
||||||
|
"__prefetch_read_1_varying_int16",
|
||||||
|
"__prefetch_read_1_varying_int32",
|
||||||
|
"__prefetch_read_1_varying_int64",
|
||||||
|
"__prefetch_read_1_varying_int8",
|
||||||
|
"__prefetch_read_2_uniform_bool",
|
||||||
|
"__prefetch_read_2_uniform_double",
|
||||||
|
"__prefetch_read_2_uniform_float",
|
||||||
|
"__prefetch_read_2_uniform_int16",
|
||||||
|
"__prefetch_read_2_uniform_int32",
|
||||||
|
"__prefetch_read_2_uniform_int64",
|
||||||
|
"__prefetch_read_2_uniform_int8",
|
||||||
|
"__prefetch_read_2_varying_bool",
|
||||||
|
"__prefetch_read_2_varying_double",
|
||||||
|
"__prefetch_read_2_varying_float",
|
||||||
|
"__prefetch_read_2_varying_int16",
|
||||||
|
"__prefetch_read_2_varying_int32",
|
||||||
|
"__prefetch_read_2_varying_int64",
|
||||||
|
"__prefetch_read_2_varying_int8",
|
||||||
|
"__prefetch_read_3_uniform_bool",
|
||||||
|
"__prefetch_read_3_uniform_double",
|
||||||
|
"__prefetch_read_3_uniform_float",
|
||||||
|
"__prefetch_read_3_uniform_int16",
|
||||||
|
"__prefetch_read_3_uniform_int32",
|
||||||
|
"__prefetch_read_3_uniform_int64",
|
||||||
|
"__prefetch_read_3_uniform_int8",
|
||||||
|
"__prefetch_read_3_varying_bool",
|
||||||
|
"__prefetch_read_3_varying_double",
|
||||||
|
"__prefetch_read_3_varying_float",
|
||||||
|
"__prefetch_read_3_varying_int16",
|
||||||
|
"__prefetch_read_3_varying_int32",
|
||||||
|
"__prefetch_read_3_varying_int64",
|
||||||
|
"__prefetch_read_3_varying_int8",
|
||||||
|
"__prefetch_read_nt_uniform_bool",
|
||||||
|
"__prefetch_read_nt_uniform_double",
|
||||||
|
"__prefetch_read_nt_uniform_float",
|
||||||
|
"__prefetch_read_nt_uniform_int16",
|
||||||
|
"__prefetch_read_nt_uniform_int32",
|
||||||
|
"__prefetch_read_nt_uniform_int64",
|
||||||
|
"__prefetch_read_nt_uniform_int8",
|
||||||
|
"__prefetch_read_nt_varying_bool",
|
||||||
|
"__prefetch_read_nt_varying_double",
|
||||||
|
"__prefetch_read_nt_varying_float",
|
||||||
|
"__prefetch_read_nt_varying_int16",
|
||||||
|
"__prefetch_read_nt_varying_int32",
|
||||||
|
"__prefetch_read_nt_varying_int64",
|
||||||
|
"__prefetch_read_nt_varying_int8",
|
||||||
|
"__rcp_uniform_float",
|
||||||
|
"__rcp_varying_float",
|
||||||
|
"__reduce_add_double",
|
||||||
|
"__reduce_add_float",
|
||||||
|
"__reduce_add_int32",
|
||||||
|
"__reduce_add_int64",
|
||||||
|
"__reduce_add_uint32",
|
||||||
|
"__reduce_add_uint64",
|
||||||
|
"__reduce_equal_double",
|
||||||
|
"__reduce_equal_float",
|
||||||
|
"__reduce_equal_int32",
|
||||||
|
"__reduce_equal_int64",
|
||||||
|
"__reduce_max_double",
|
||||||
|
"__reduce_max_float",
|
||||||
|
"__reduce_max_int32",
|
||||||
|
"__reduce_max_int64",
|
||||||
|
"__reduce_max_uint32",
|
||||||
|
"__reduce_max_uint64",
|
||||||
|
"__reduce_min_double",
|
||||||
|
"__reduce_min_float",
|
||||||
|
"__reduce_min_int32",
|
||||||
|
"__reduce_min_int64",
|
||||||
|
"__reduce_min_uint32",
|
||||||
|
"__reduce_min_uint64",
|
||||||
|
"__rotate_double",
|
||||||
|
"__rotate_float",
|
||||||
|
"__rotate_int16",
|
||||||
|
"__rotate_int32",
|
||||||
|
"__rotate_int64",
|
||||||
|
"__rotate_int8",
|
||||||
|
"__round_uniform_double",
|
||||||
|
"__round_uniform_float",
|
||||||
|
"__round_varying_double",
|
||||||
|
"__round_varying_float",
|
||||||
|
"__rsqrt_uniform_float",
|
||||||
|
"__rsqrt_varying_float",
|
||||||
|
"__sext_uniform_bool",
|
||||||
|
"__sext_varying_bool",
|
||||||
|
"__shuffle2_double",
|
||||||
|
"__shuffle2_float",
|
||||||
|
"__shuffle2_int16",
|
||||||
|
"__shuffle2_int32",
|
||||||
|
"__shuffle2_int64",
|
||||||
|
"__shuffle2_int8",
|
||||||
|
"__shuffle_double",
|
||||||
|
"__shuffle_float",
|
||||||
|
"__shuffle_int16",
|
||||||
|
"__shuffle_int32",
|
||||||
|
"__shuffle_int64",
|
||||||
|
"__shuffle_int8",
|
||||||
|
"__soa_to_aos3_float",
|
||||||
|
"__soa_to_aos3_float16",
|
||||||
|
"__soa_to_aos3_float4",
|
||||||
|
"__soa_to_aos3_float8",
|
||||||
|
"__soa_to_aos3_int32",
|
||||||
|
"__soa_to_aos4_float",
|
||||||
|
"__soa_to_aos4_float16",
|
||||||
|
"__soa_to_aos4_float4",
|
||||||
|
"__soa_to_aos4_float8",
|
||||||
|
"__soa_to_aos4_int32",
|
||||||
|
"__sqrt_uniform_double",
|
||||||
|
"__sqrt_uniform_float",
|
||||||
|
"__sqrt_varying_double",
|
||||||
|
"__sqrt_varying_float",
|
||||||
|
"__stdlib_atan",
|
||||||
|
"__stdlib_atan2",
|
||||||
|
"__stdlib_atan2f",
|
||||||
|
"__stdlib_atanf",
|
||||||
|
"__stdlib_cos",
|
||||||
|
"__stdlib_cosf",
|
||||||
|
"__stdlib_exp",
|
||||||
|
"__stdlib_expf",
|
||||||
|
"__stdlib_log",
|
||||||
|
"__stdlib_logf",
|
||||||
|
"__stdlib_pow",
|
||||||
|
"__stdlib_powf",
|
||||||
|
"__stdlib_sin",
|
||||||
|
"__stdlib_sincos",
|
||||||
|
"__stdlib_sincosf",
|
||||||
|
"__stdlib_sinf",
|
||||||
|
"__stdlib_tan",
|
||||||
|
"__stdlib_tanf",
|
||||||
|
"__svml_sin",
|
||||||
|
"__svml_cos",
|
||||||
|
"__svml_sincos",
|
||||||
|
"__svml_tan",
|
||||||
|
"__svml_atan",
|
||||||
|
"__svml_atan2",
|
||||||
|
"__svml_exp",
|
||||||
|
"__svml_log",
|
||||||
|
"__svml_pow",
|
||||||
|
"__undef_uniform",
|
||||||
|
"__undef_varying",
|
||||||
|
};
|
||||||
|
|
||||||
|
int count = sizeof(names) / sizeof(names[0]);
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
llvm::Function *f = module->getFunction(names[i]);
|
||||||
|
if (f != NULL)
|
||||||
|
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** This utility function takes serialized binary LLVM bitcode and adds its
|
/** This utility function takes serialized binary LLVM bitcode and adds its
|
||||||
definitions to the given module. Functions in the bitcode that can be
|
definitions to the given module. Functions in the bitcode that can be
|
||||||
mapped to ispc functions are also added to the symbol table.
|
mapped to ispc functions are also added to the symbol table.
|
||||||
@@ -371,6 +684,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
|||||||
#endif // LLVM_3_0
|
#endif // LLVM_3_0
|
||||||
&linkError))
|
&linkError))
|
||||||
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
|
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
|
||||||
|
lSetInternalFunctions(module);
|
||||||
if (symbolTable != NULL)
|
if (symbolTable != NULL)
|
||||||
lAddModuleSymbols(module, symbolTable);
|
lAddModuleSymbols(module, symbolTable);
|
||||||
lCheckModuleIntrinsics(module);
|
lCheckModuleIntrinsics(module);
|
||||||
|
|||||||
180
builtins.m4
180
builtins.m4
@@ -555,7 +555,7 @@ divert`'dnl
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
define(`shuffles', `
|
define(`shuffles', `
|
||||||
define internal <$1 x $2> @__broadcast_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
define <$1 x $2> @__broadcast_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||||
%v = extractelement <$1 x $2> %0, i32 %1
|
%v = extractelement <$1 x $2> %0, i32 %1
|
||||||
%r_0 = insertelement <$1 x $2> undef, $2 %v, i32 0
|
%r_0 = insertelement <$1 x $2> undef, $2 %v, i32 0
|
||||||
forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2 %v, i32 i
|
forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2 %v, i32 i
|
||||||
@@ -563,7 +563,7 @@ forloop(i, 1, eval($1-1), ` %r_`'i = insertelement <$1 x $2> %r_`'eval(i-1), $2
|
|||||||
ret <$1 x $2> %r_`'eval($1-1)
|
ret <$1 x $2> %r_`'eval($1-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
define <$1 x $2> @__rotate_$3(<$1 x $2>, i32) nounwind readnone alwaysinline {
|
||||||
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
|
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
|
||||||
br i1 %isc, label %is_const, label %not_const
|
br i1 %isc, label %is_const, label %not_const
|
||||||
|
|
||||||
@@ -596,7 +596,7 @@ not_const:
|
|||||||
ret <$1 x $2> %result
|
ret <$1 x $2> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x $2> @__shuffle_$3(<$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
define <$1 x $2> @__shuffle_$3(<$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
||||||
forloop(i, 0, eval($1-1), `
|
forloop(i, 0, eval($1-1), `
|
||||||
%index_`'i = extractelement <$1 x i32> %1, i32 i')
|
%index_`'i = extractelement <$1 x i32> %1, i32 i')
|
||||||
forloop(i, 0, eval($1-1), `
|
forloop(i, 0, eval($1-1), `
|
||||||
@@ -608,7 +608,7 @@ forloop(i, 1, eval($1-1), ` %ret_`'i = insertelement <$1 x $2> %ret_`'eval(i-1)
|
|||||||
ret <$1 x $2> %ret_`'eval($1-1)
|
ret <$1 x $2> %ret_`'eval($1-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x $2> @__shuffle2_$3(<$1 x $2>, <$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
define <$1 x $2> @__shuffle2_$3(<$1 x $2>, <$1 x $2>, <$1 x i32>) nounwind readnone alwaysinline {
|
||||||
%v2 = shufflevector <$1 x $2> %0, <$1 x $2> %1, <eval(2*$1) x i32> <
|
%v2 = shufflevector <$1 x $2> %0, <$1 x $2> %1, <eval(2*$1) x i32> <
|
||||||
forloop(i, 0, eval(2*$1-2), `i32 i, ') i32 eval(2*$1-1)
|
forloop(i, 0, eval(2*$1-2), `i32 i, ') i32 eval(2*$1-1)
|
||||||
>
|
>
|
||||||
@@ -675,7 +675,7 @@ forloop(i, 1, eval($1-1), `
|
|||||||
|
|
||||||
define(`global_atomic_associative', `
|
define(`global_atomic_associative', `
|
||||||
|
|
||||||
define internal <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
||||||
<$1 x i32> %m) nounwind alwaysinline {
|
<$1 x i32> %m) nounwind alwaysinline {
|
||||||
; first, for any lanes where the mask is off, compute a vector where those lanes
|
; first, for any lanes where the mask is off, compute a vector where those lanes
|
||||||
; hold the identity value..
|
; hold the identity value..
|
||||||
@@ -747,7 +747,7 @@ define(`global_atomic_uniform', `
|
|||||||
|
|
||||||
declare $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %delta)
|
declare $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %delta)
|
||||||
|
|
||||||
define internal $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val,
|
define $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%r = call $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %val)
|
%r = call $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %val)
|
||||||
ret $3 %r
|
ret $3 %r
|
||||||
@@ -765,7 +765,7 @@ declare i64 @llvm.atomic.swap.i64.p0i64(i64 * %ptr, i64 %val)
|
|||||||
|
|
||||||
define(`global_swap', `
|
define(`global_swap', `
|
||||||
|
|
||||||
define internal <$1 x $2> @__atomic_swap_$3_global($2* %ptr, <$1 x $2> %val,
|
define <$1 x $2> @__atomic_swap_$3_global($2* %ptr, <$1 x $2> %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%rptr = alloca <$1 x $2>
|
%rptr = alloca <$1 x $2>
|
||||||
%rptr32 = bitcast <$1 x $2> * %rptr to $2 *
|
%rptr32 = bitcast <$1 x $2> * %rptr to $2 *
|
||||||
@@ -780,7 +780,7 @@ define internal <$1 x $2> @__atomic_swap_$3_global($2* %ptr, <$1 x $2> %val,
|
|||||||
ret <$1 x $2> %r
|
ret <$1 x $2> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal $2 @__atomic_swap_uniform_$3_global($2* %ptr, $2 %val,
|
define $2 @__atomic_swap_uniform_$3_global($2* %ptr, $2 %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%r = call $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val)
|
%r = call $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val)
|
||||||
ret $2 %r
|
ret $2 %r
|
||||||
@@ -798,7 +798,7 @@ define(`global_atomic_exchange', `
|
|||||||
|
|
||||||
declare $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)
|
declare $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)
|
||||||
|
|
||||||
define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||||
<$1 x $2> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x $2> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%rptr = alloca <$1 x $2>
|
%rptr = alloca <$1 x $2>
|
||||||
%rptr32 = bitcast <$1 x $2> * %rptr to $2 *
|
%rptr32 = bitcast <$1 x $2> * %rptr to $2 *
|
||||||
@@ -815,7 +815,7 @@ define internal <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $
|
|||||||
ret <$1 x $2> %r
|
ret <$1 x $2> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
||||||
$2 %val, <$1 x i32> %mask) nounwind alwaysinline {
|
$2 %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%r = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)
|
%r = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)
|
||||||
ret $2 %r
|
ret $2 %r
|
||||||
@@ -834,22 +834,22 @@ define internal $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cm
|
|||||||
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
|
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
|
||||||
|
|
||||||
define(`prefetch_read', `
|
define(`prefetch_read', `
|
||||||
define internal void @__prefetch_read_1_$1($2 *) alwaysinline {
|
define void @__prefetch_read_1_$1($2 *) alwaysinline {
|
||||||
%ptr8 = bitcast $2 * %0 to i8 *
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 3)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
define internal void @__prefetch_read_2_$1($2 *) alwaysinline {
|
define void @__prefetch_read_2_$1($2 *) alwaysinline {
|
||||||
%ptr8 = bitcast $2 * %0 to i8 *
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 2)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
define internal void @__prefetch_read_3_$1($2 *) alwaysinline {
|
define void @__prefetch_read_3_$1($2 *) alwaysinline {
|
||||||
%ptr8 = bitcast $2 * %0 to i8 *
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 1)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
define internal void @__prefetch_read_nt_$1($2 *) alwaysinline {
|
define void @__prefetch_read_nt_$1($2 *) alwaysinline {
|
||||||
%ptr8 = bitcast $2 * %0 to i8 *
|
%ptr8 = bitcast $2 * %0 to i8 *
|
||||||
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
|
call void @llvm.prefetch(i8 * %ptr8, i32 0, i32 0)
|
||||||
ret void
|
ret void
|
||||||
@@ -966,45 +966,45 @@ declare void @__pseudo_scatter_base_offsets_64(i8 * nocapture, <$1 x i32>,
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; vector ops
|
;; vector ops
|
||||||
|
|
||||||
define internal i8 @__extract_int8(<$1 x i8>, i32) nounwind readnone alwaysinline {
|
define i8 @__extract_int8(<$1 x i8>, i32) nounwind readnone alwaysinline {
|
||||||
%extract = extractelement <$1 x i8> %0, i32 %1
|
%extract = extractelement <$1 x i8> %0, i32 %1
|
||||||
ret i8 %extract
|
ret i8 %extract
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i8> @__insert_int8(<$1 x i8>, i32,
|
define <$1 x i8> @__insert_int8(<$1 x i8>, i32,
|
||||||
i8) nounwind readnone alwaysinline {
|
i8) nounwind readnone alwaysinline {
|
||||||
%insert = insertelement <$1 x i8> %0, i8 %2, i32 %1
|
%insert = insertelement <$1 x i8> %0, i8 %2, i32 %1
|
||||||
ret <$1 x i8> %insert
|
ret <$1 x i8> %insert
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i16 @__extract_int16(<$1 x i16>, i32) nounwind readnone alwaysinline {
|
define i16 @__extract_int16(<$1 x i16>, i32) nounwind readnone alwaysinline {
|
||||||
%extract = extractelement <$1 x i16> %0, i32 %1
|
%extract = extractelement <$1 x i16> %0, i32 %1
|
||||||
ret i16 %extract
|
ret i16 %extract
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i16> @__insert_int16(<$1 x i16>, i32,
|
define <$1 x i16> @__insert_int16(<$1 x i16>, i32,
|
||||||
i16) nounwind readnone alwaysinline {
|
i16) nounwind readnone alwaysinline {
|
||||||
%insert = insertelement <$1 x i16> %0, i16 %2, i32 %1
|
%insert = insertelement <$1 x i16> %0, i16 %2, i32 %1
|
||||||
ret <$1 x i16> %insert
|
ret <$1 x i16> %insert
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__extract_int32(<$1 x i32>, i32) nounwind readnone alwaysinline {
|
define i32 @__extract_int32(<$1 x i32>, i32) nounwind readnone alwaysinline {
|
||||||
%extract = extractelement <$1 x i32> %0, i32 %1
|
%extract = extractelement <$1 x i32> %0, i32 %1
|
||||||
ret i32 %extract
|
ret i32 %extract
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i32> @__insert_int32(<$1 x i32>, i32,
|
define <$1 x i32> @__insert_int32(<$1 x i32>, i32,
|
||||||
i32) nounwind readnone alwaysinline {
|
i32) nounwind readnone alwaysinline {
|
||||||
%insert = insertelement <$1 x i32> %0, i32 %2, i32 %1
|
%insert = insertelement <$1 x i32> %0, i32 %2, i32 %1
|
||||||
ret <$1 x i32> %insert
|
ret <$1 x i32> %insert
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__extract_int64(<$1 x i64>, i32) nounwind readnone alwaysinline {
|
define i64 @__extract_int64(<$1 x i64>, i32) nounwind readnone alwaysinline {
|
||||||
%extract = extractelement <$1 x i64> %0, i32 %1
|
%extract = extractelement <$1 x i64> %0, i32 %1
|
||||||
ret i64 %extract
|
ret i64 %extract
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i64> @__insert_int64(<$1 x i64>, i32,
|
define <$1 x i64> @__insert_int64(<$1 x i64>, i32,
|
||||||
i64) nounwind readnone alwaysinline {
|
i64) nounwind readnone alwaysinline {
|
||||||
%insert = insertelement <$1 x i64> %0, i64 %2, i32 %1
|
%insert = insertelement <$1 x i64> %0, i64 %2, i32 %1
|
||||||
ret <$1 x i64> %insert
|
ret <$1 x i64> %insert
|
||||||
@@ -1020,70 +1020,70 @@ shuffles($1, i64, int64, 8)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; various bitcasts from one type to another
|
;; various bitcasts from one type to another
|
||||||
|
|
||||||
define internal <$1 x i32> @__intbits_varying_float(<$1 x float>) nounwind readnone alwaysinline {
|
define <$1 x i32> @__intbits_varying_float(<$1 x float>) nounwind readnone alwaysinline {
|
||||||
%float_to_int_bitcast = bitcast <$1 x float> %0 to <$1 x i32>
|
%float_to_int_bitcast = bitcast <$1 x float> %0 to <$1 x i32>
|
||||||
ret <$1 x i32> %float_to_int_bitcast
|
ret <$1 x i32> %float_to_int_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__intbits_uniform_float(float) nounwind readnone alwaysinline {
|
define i32 @__intbits_uniform_float(float) nounwind readnone alwaysinline {
|
||||||
%float_to_int_bitcast = bitcast float %0 to i32
|
%float_to_int_bitcast = bitcast float %0 to i32
|
||||||
ret i32 %float_to_int_bitcast
|
ret i32 %float_to_int_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i64> @__intbits_varying_double(<$1 x double>) nounwind readnone alwaysinline {
|
define <$1 x i64> @__intbits_varying_double(<$1 x double>) nounwind readnone alwaysinline {
|
||||||
%double_to_int_bitcast = bitcast <$1 x double> %0 to <$1 x i64>
|
%double_to_int_bitcast = bitcast <$1 x double> %0 to <$1 x i64>
|
||||||
ret <$1 x i64> %double_to_int_bitcast
|
ret <$1 x i64> %double_to_int_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i64 @__intbits_uniform_double(double) nounwind readnone alwaysinline {
|
define i64 @__intbits_uniform_double(double) nounwind readnone alwaysinline {
|
||||||
%double_to_int_bitcast = bitcast double %0 to i64
|
%double_to_int_bitcast = bitcast double %0 to i64
|
||||||
ret i64 %double_to_int_bitcast
|
ret i64 %double_to_int_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x float> @__floatbits_varying_int32(<$1 x i32>) nounwind readnone alwaysinline {
|
define <$1 x float> @__floatbits_varying_int32(<$1 x i32>) nounwind readnone alwaysinline {
|
||||||
%int_to_float_bitcast = bitcast <$1 x i32> %0 to <$1 x float>
|
%int_to_float_bitcast = bitcast <$1 x i32> %0 to <$1 x float>
|
||||||
ret <$1 x float> %int_to_float_bitcast
|
ret <$1 x float> %int_to_float_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__floatbits_uniform_int32(i32) nounwind readnone alwaysinline {
|
define float @__floatbits_uniform_int32(i32) nounwind readnone alwaysinline {
|
||||||
%int_to_float_bitcast = bitcast i32 %0 to float
|
%int_to_float_bitcast = bitcast i32 %0 to float
|
||||||
ret float %int_to_float_bitcast
|
ret float %int_to_float_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x double> @__doublebits_varying_int64(<$1 x i64>) nounwind readnone alwaysinline {
|
define <$1 x double> @__doublebits_varying_int64(<$1 x i64>) nounwind readnone alwaysinline {
|
||||||
%int_to_double_bitcast = bitcast <$1 x i64> %0 to <$1 x double>
|
%int_to_double_bitcast = bitcast <$1 x i64> %0 to <$1 x double>
|
||||||
ret <$1 x double> %int_to_double_bitcast
|
ret <$1 x double> %int_to_double_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__doublebits_uniform_int64(i64) nounwind readnone alwaysinline {
|
define double @__doublebits_uniform_int64(i64) nounwind readnone alwaysinline {
|
||||||
%int_to_double_bitcast = bitcast i64 %0 to double
|
%int_to_double_bitcast = bitcast i64 %0 to double
|
||||||
ret double %int_to_double_bitcast
|
ret double %int_to_double_bitcast
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x float> @__undef_varying() nounwind readnone alwaysinline {
|
define <$1 x float> @__undef_varying() nounwind readnone alwaysinline {
|
||||||
ret <$1 x float> undef
|
ret <$1 x float> undef
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__undef_uniform() nounwind readnone alwaysinline {
|
define float @__undef_uniform() nounwind readnone alwaysinline {
|
||||||
ret float undef
|
ret float undef
|
||||||
}
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; sign extension
|
;; sign extension
|
||||||
|
|
||||||
define internal i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline {
|
define i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline {
|
||||||
%r = sext i1 %0 to i32
|
%r = sext i1 %0 to i32
|
||||||
ret i32 %r
|
ret i32 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinline {
|
define <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinline {
|
||||||
ret <$1 x i32> %0
|
ret <$1 x i32> %0
|
||||||
}
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; count trailing zeros
|
;; count trailing zeros
|
||||||
|
|
||||||
define internal i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline {
|
define i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline {
|
||||||
%c = call i32 @llvm.cttz.i32(i32 %0)
|
%c = call i32 @llvm.cttz.i32(i32 %0)
|
||||||
ret i32 %c
|
ret i32 %c
|
||||||
}
|
}
|
||||||
@@ -1094,7 +1094,7 @@ define internal i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline
|
|||||||
;; take 4 4-wide vectors laid out like <r0 g0 b0 a0> <r1 g1 b1 a1> ...
|
;; take 4 4-wide vectors laid out like <r0 g0 b0 a0> <r1 g1 b1 a1> ...
|
||||||
;; and reorder them to <r0 r1 r2 r3> <g0 g1 g2 g3> ...
|
;; and reorder them to <r0 r1 r2 r3> <g0 g1 g2 g3> ...
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa4_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
@__aos_to_soa4_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
||||||
<4 x float> %v3, <4 x float> * noalias %out0,
|
<4 x float> %v3, <4 x float> * noalias %out0,
|
||||||
<4 x float> * noalias %out1, <4 x float> * noalias %out2,
|
<4 x float> * noalias %out1, <4 x float> * noalias %out2,
|
||||||
@@ -1129,7 +1129,7 @@ define internal void
|
|||||||
;; This is the exact same set of operations that __soa_to_soa4_float4 does
|
;; This is the exact same set of operations that __soa_to_soa4_float4 does
|
||||||
;; (a 4x4 transpose), so just call that...
|
;; (a 4x4 transpose), so just call that...
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos4_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
@__soa_to_aos4_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
||||||
<4 x float> %v3, <4 x float> * noalias %out0,
|
<4 x float> %v3, <4 x float> * noalias %out0,
|
||||||
<4 x float> * noalias %out1, <4 x float> * noalias %out2,
|
<4 x float> * noalias %out1, <4 x float> * noalias %out2,
|
||||||
@@ -1145,7 +1145,7 @@ define internal void
|
|||||||
;; <x0 y0 z0 x1> <y1 z1 x2 y2> <z2 x3 y3 z3>, transpose to
|
;; <x0 y0 z0 x1> <y1 z1 x2 y2> <z2 x3 y3 z3>, transpose to
|
||||||
;; <x0 x1 x2 x3> <y0 y1 y2 y3> <z0 z1 z2 z3>.
|
;; <x0 x1 x2 x3> <y0 y1 y2 y3> <z0 z1 z2 z3>.
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa3_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
@__aos_to_soa3_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
||||||
<4 x float> * noalias %out0, <4 x float> * noalias %out1,
|
<4 x float> * noalias %out0, <4 x float> * noalias %out1,
|
||||||
<4 x float> * noalias %out2) nounwind alwaysinline {
|
<4 x float> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1176,7 +1176,7 @@ define internal void
|
|||||||
;; <x0 x1 x2 x3> <y0 y1 y2 y3> <z0 z1 z2 z3> to
|
;; <x0 x1 x2 x3> <y0 y1 y2 y3> <z0 z1 z2 z3> to
|
||||||
;; <x0 y0 z0 x1> <y1 z1 x2 y2> <z2 x3 y3 z3>.
|
;; <x0 y0 z0 x1> <y1 z1 x2 y2> <z2 x3 y3 z3>.
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos3_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
@__soa_to_aos3_float4(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2,
|
||||||
<4 x float> * noalias %out0, <4 x float> * noalias %out1,
|
<4 x float> * noalias %out0, <4 x float> * noalias %out1,
|
||||||
<4 x float> * noalias %out2) nounwind alwaysinline {
|
<4 x float> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1206,7 +1206,7 @@ define internal void
|
|||||||
;; routines above. These implementations are all built on top of the 4-wide
|
;; routines above. These implementations are all built on top of the 4-wide
|
||||||
;; vector versions.
|
;; vector versions.
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa4_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
@__aos_to_soa4_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
||||||
<8 x float> %v3, <8 x float> * noalias %out0,
|
<8 x float> %v3, <8 x float> * noalias %out0,
|
||||||
<8 x float> * noalias %out1, <8 x float> * noalias %out2,
|
<8 x float> * noalias %out1, <8 x float> * noalias %out2,
|
||||||
@@ -1256,7 +1256,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos4_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
@__soa_to_aos4_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
||||||
<8 x float> %v3, <8 x float> * noalias %out0,
|
<8 x float> %v3, <8 x float> * noalias %out0,
|
||||||
<8 x float> * noalias %out1, <8 x float> * noalias %out2,
|
<8 x float> * noalias %out1, <8 x float> * noalias %out2,
|
||||||
@@ -1305,7 +1305,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa3_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
@__aos_to_soa3_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
||||||
<8 x float> * noalias %out0, <8 x float> * noalias %out1,
|
<8 x float> * noalias %out0, <8 x float> * noalias %out1,
|
||||||
<8 x float> * noalias %out2) nounwind alwaysinline {
|
<8 x float> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1339,7 +1339,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos3_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
@__soa_to_aos3_float8(<8 x float> %v0, <8 x float> %v1, <8 x float> %v2,
|
||||||
<8 x float> * noalias %out0, <8 x float> * noalias %out1,
|
<8 x float> * noalias %out0, <8 x float> * noalias %out1,
|
||||||
<8 x float> * noalias %out2) nounwind alwaysinline {
|
<8 x float> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1374,7 +1374,7 @@ define internal void
|
|||||||
|
|
||||||
;; 16-wide
|
;; 16-wide
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa4_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
@__aos_to_soa4_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
||||||
<16 x float> %v3, <16 x float> * noalias %out0,
|
<16 x float> %v3, <16 x float> * noalias %out0,
|
||||||
<16 x float> * noalias %out1, <16 x float> * noalias %out2,
|
<16 x float> * noalias %out1, <16 x float> * noalias %out2,
|
||||||
@@ -1445,7 +1445,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos4_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
@__soa_to_aos4_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
||||||
<16 x float> %v3, <16 x float> * noalias %out0,
|
<16 x float> %v3, <16 x float> * noalias %out0,
|
||||||
<16 x float> * noalias %out1, <16 x float> * noalias %out2,
|
<16 x float> * noalias %out1, <16 x float> * noalias %out2,
|
||||||
@@ -1516,7 +1516,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa3_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
@__aos_to_soa3_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
||||||
<16 x float> * noalias %out0, <16 x float> * noalias %out1,
|
<16 x float> * noalias %out0, <16 x float> * noalias %out1,
|
||||||
<16 x float> * noalias %out2) nounwind alwaysinline {
|
<16 x float> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1574,7 +1574,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos3_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
@__soa_to_aos3_float16(<16 x float> %v0, <16 x float> %v1, <16 x float> %v2,
|
||||||
<16 x float> * noalias %out0, <16 x float> * noalias %out1,
|
<16 x float> * noalias %out0, <16 x float> * noalias %out1,
|
||||||
<16 x float> * noalias %out2) nounwind alwaysinline {
|
<16 x float> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1633,7 +1633,7 @@ define internal void
|
|||||||
|
|
||||||
;; versions to be called from stdlib
|
;; versions to be called from stdlib
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa4_float([0 x float] * noalias %base, i32 %offset,
|
@__aos_to_soa4_float([0 x float] * noalias %base, i32 %offset,
|
||||||
<$1 x float> * noalias %out0, <$1 x float> * noalias %out1,
|
<$1 x float> * noalias %out0, <$1 x float> * noalias %out1,
|
||||||
<$1 x float> * noalias %out2, <$1 x float> * noalias %out3)
|
<$1 x float> * noalias %out2, <$1 x float> * noalias %out3)
|
||||||
@@ -1655,7 +1655,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa4_int32([0 x i32] * noalias %base, i32 %offset,
|
@__aos_to_soa4_int32([0 x i32] * noalias %base, i32 %offset,
|
||||||
<$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1,
|
<$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1,
|
||||||
<$1 x i32> * noalias %out2, <$1 x i32> * noalias %out3)
|
<$1 x i32> * noalias %out2, <$1 x i32> * noalias %out3)
|
||||||
@@ -1672,7 +1672,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos4_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2,
|
@__soa_to_aos4_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2,
|
||||||
<$1 x float> %v3, [0 x float] * noalias %base,
|
<$1 x float> %v3, [0 x float] * noalias %base,
|
||||||
i32 %offset) nounwind alwaysinline {
|
i32 %offset) nounwind alwaysinline {
|
||||||
@@ -1689,7 +1689,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos4_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2,
|
@__soa_to_aos4_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2,
|
||||||
<$1 x i32> %v3, [0 x i32] * noalias %base,
|
<$1 x i32> %v3, [0 x i32] * noalias %base,
|
||||||
i32 %offset) nounwind alwaysinline {
|
i32 %offset) nounwind alwaysinline {
|
||||||
@@ -1705,7 +1705,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa3_float([0 x float] * noalias %base, i32 %offset,
|
@__aos_to_soa3_float([0 x float] * noalias %base, i32 %offset,
|
||||||
<$1 x float> * %out0, <$1 x float> * %out1,
|
<$1 x float> * %out0, <$1 x float> * %out1,
|
||||||
<$1 x float> * %out2) nounwind alwaysinline {
|
<$1 x float> * %out2) nounwind alwaysinline {
|
||||||
@@ -1724,7 +1724,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__aos_to_soa3_int32([0 x i32] * noalias %base, i32 %offset,
|
@__aos_to_soa3_int32([0 x i32] * noalias %base, i32 %offset,
|
||||||
<$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1,
|
<$1 x i32> * noalias %out0, <$1 x i32> * noalias %out1,
|
||||||
<$1 x i32> * noalias %out2) nounwind alwaysinline {
|
<$1 x i32> * noalias %out2) nounwind alwaysinline {
|
||||||
@@ -1738,7 +1738,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos3_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2,
|
@__soa_to_aos3_float(<$1 x float> %v0, <$1 x float> %v1, <$1 x float> %v2,
|
||||||
[0 x float] * noalias %base, i32 %offset) nounwind alwaysinline {
|
[0 x float] * noalias %base, i32 %offset) nounwind alwaysinline {
|
||||||
%pf = bitcast [0 x float] * %base to float *
|
%pf = bitcast [0 x float] * %base to float *
|
||||||
@@ -1753,7 +1753,7 @@ define internal void
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void
|
define void
|
||||||
@__soa_to_aos3_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2,
|
@__soa_to_aos3_int32(<$1 x i32> %v0, <$1 x i32> %v1, <$1 x i32> %v2,
|
||||||
[0 x i32] * noalias %base, i32 %offset) nounwind alwaysinline {
|
[0 x i32] * noalias %base, i32 %offset) nounwind alwaysinline {
|
||||||
%fv0 = bitcast <$1 x i32> %v0 to <$1 x float>
|
%fv0 = bitcast <$1 x i32> %v0 to <$1 x float>
|
||||||
@@ -1791,7 +1791,7 @@ prefetch_read(varying_double, <$1 x double>)
|
|||||||
declare i32 @printf(i8*, ...)
|
declare i32 @printf(i8*, ...)
|
||||||
declare void @abort() noreturn
|
declare void @abort() noreturn
|
||||||
|
|
||||||
define internal void @__do_assert_uniform(i8 *%str, i1 %test, <$1 x i32> %mask) {
|
define void @__do_assert_uniform(i8 *%str, i1 %test, <$1 x i32> %mask) {
|
||||||
br i1 %test, label %ok, label %fail
|
br i1 %test, label %ok, label %fail
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
@@ -1804,7 +1804,7 @@ ok:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define internal void @__do_assert_varying(i8 *%str, <$1 x i32> %test,
|
define void @__do_assert_varying(i8 *%str, <$1 x i32> %test,
|
||||||
<$1 x i32> %mask) {
|
<$1 x i32> %mask) {
|
||||||
%nottest = xor <$1 x i32> %test,
|
%nottest = xor <$1 x i32> %test,
|
||||||
< forloop(i, 1, eval($1-1), `i32 -1, ') i32 -1 >
|
< forloop(i, 1, eval($1-1), `i32 -1, ') i32 -1 >
|
||||||
@@ -1839,47 +1839,47 @@ declare float @expf(float) nounwind readnone
|
|||||||
declare float @logf(float) nounwind readnone
|
declare float @logf(float) nounwind readnone
|
||||||
declare float @powf(float, float) nounwind readnone
|
declare float @powf(float, float) nounwind readnone
|
||||||
|
|
||||||
define internal float @__stdlib_sinf(float) nounwind readnone alwaysinline {
|
define float @__stdlib_sinf(float) nounwind readnone alwaysinline {
|
||||||
%r = call float @sinf(float %0)
|
%r = call float @sinf(float %0)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_cosf(float) nounwind readnone alwaysinline {
|
define float @__stdlib_cosf(float) nounwind readnone alwaysinline {
|
||||||
%r = call float @cosf(float %0)
|
%r = call float @cosf(float %0)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal void @__stdlib_sincosf(float, float *, float *) nounwind readnone alwaysinline {
|
define void @__stdlib_sincosf(float, float *, float *) nounwind readnone alwaysinline {
|
||||||
call void @sincosf(float %0, float *%1, float *%2)
|
call void @sincosf(float %0, float *%1, float *%2)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_tanf(float) nounwind readnone alwaysinline {
|
define float @__stdlib_tanf(float) nounwind readnone alwaysinline {
|
||||||
%r = call float @tanf(float %0)
|
%r = call float @tanf(float %0)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_atanf(float) nounwind readnone alwaysinline {
|
define float @__stdlib_atanf(float) nounwind readnone alwaysinline {
|
||||||
%r = call float @atanf(float %0)
|
%r = call float @atanf(float %0)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_atan2f(float, float) nounwind readnone alwaysinline {
|
define float @__stdlib_atan2f(float, float) nounwind readnone alwaysinline {
|
||||||
%r = call float @atan2f(float %0, float %1)
|
%r = call float @atan2f(float %0, float %1)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_logf(float) nounwind readnone alwaysinline {
|
define float @__stdlib_logf(float) nounwind readnone alwaysinline {
|
||||||
%r = call float @logf(float %0)
|
%r = call float @logf(float %0)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_expf(float) nounwind readnone alwaysinline {
|
define float @__stdlib_expf(float) nounwind readnone alwaysinline {
|
||||||
%r = call float @expf(float %0)
|
%r = call float @expf(float %0)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__stdlib_powf(float, float) nounwind readnone alwaysinline {
|
define float @__stdlib_powf(float, float) nounwind readnone alwaysinline {
|
||||||
%r = call float @powf(float %0, float %1)
|
%r = call float @powf(float %0, float %1)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
@@ -1894,47 +1894,47 @@ declare double @exp(double) nounwind readnone
|
|||||||
declare double @log(double) nounwind readnone
|
declare double @log(double) nounwind readnone
|
||||||
declare double @pow(double, double) nounwind readnone
|
declare double @pow(double, double) nounwind readnone
|
||||||
|
|
||||||
define internal double @__stdlib_sin(double) nounwind readnone alwaysinline {
|
define double @__stdlib_sin(double) nounwind readnone alwaysinline {
|
||||||
%r = call double @sin(double %0)
|
%r = call double @sin(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_cos(double) nounwind readnone alwaysinline {
|
define double @__stdlib_cos(double) nounwind readnone alwaysinline {
|
||||||
%r = call double @cos(double %0)
|
%r = call double @cos(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal void @__stdlib_sincos(double, double *, double *) nounwind readnone alwaysinline {
|
define void @__stdlib_sincos(double, double *, double *) nounwind readnone alwaysinline {
|
||||||
call void @sincos(double %0, double *%1, double *%2)
|
call void @sincos(double %0, double *%1, double *%2)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_tan(double) nounwind readnone alwaysinline {
|
define double @__stdlib_tan(double) nounwind readnone alwaysinline {
|
||||||
%r = call double @tan(double %0)
|
%r = call double @tan(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_atan(double) nounwind readnone alwaysinline {
|
define double @__stdlib_atan(double) nounwind readnone alwaysinline {
|
||||||
%r = call double @atan(double %0)
|
%r = call double @atan(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_atan2(double, double) nounwind readnone alwaysinline {
|
define double @__stdlib_atan2(double, double) nounwind readnone alwaysinline {
|
||||||
%r = call double @atan2(double %0, double %1)
|
%r = call double @atan2(double %0, double %1)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_log(double) nounwind readnone alwaysinline {
|
define double @__stdlib_log(double) nounwind readnone alwaysinline {
|
||||||
%r = call double @log(double %0)
|
%r = call double @log(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_exp(double) nounwind readnone alwaysinline {
|
define double @__stdlib_exp(double) nounwind readnone alwaysinline {
|
||||||
%r = call double @exp(double %0)
|
%r = call double @exp(double %0)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__stdlib_pow(double, double) nounwind readnone alwaysinline {
|
define double @__stdlib_pow(double, double) nounwind readnone alwaysinline {
|
||||||
%r = call double @pow(double %0, double %1)
|
%r = call double @pow(double %0, double %1)
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
@@ -1945,7 +1945,7 @@ define internal double @__stdlib_pow(double, double) nounwind readnone alwaysinl
|
|||||||
declare void @llvm.memory.barrier(i1 %loadload, i1 %loadstore, i1 %storeload,
|
declare void @llvm.memory.barrier(i1 %loadload, i1 %loadstore, i1 %storeload,
|
||||||
i1 %storestore, i1 %device)
|
i1 %storestore, i1 %device)
|
||||||
|
|
||||||
define internal void @__memory_barrier() nounwind readnone alwaysinline {
|
define void @__memory_barrier() nounwind readnone alwaysinline {
|
||||||
;; see http://llvm.org/bugs/show_bug.cgi?id=2829. It seems like we
|
;; see http://llvm.org/bugs/show_bug.cgi?id=2829. It seems like we
|
||||||
;; only get an MFENCE on x86 if "device" is true, but IMHO we should
|
;; only get an MFENCE on x86 if "device" is true, but IMHO we should
|
||||||
;; in the case where the first 4 args are true but it is false.
|
;; in the case where the first 4 args are true but it is false.
|
||||||
@@ -1987,7 +1987,7 @@ global_atomic_uniform($1, umax, i64, uint64)
|
|||||||
global_swap($1, i32, int32)
|
global_swap($1, i32, int32)
|
||||||
global_swap($1, i64, int64)
|
global_swap($1, i64, int64)
|
||||||
|
|
||||||
define internal <$1 x float> @__atomic_swap_float_global(float * %ptr, <$1 x float> %val,
|
define <$1 x float> @__atomic_swap_float_global(float * %ptr, <$1 x float> %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast float * %ptr to i32 *
|
%iptr = bitcast float * %ptr to i32 *
|
||||||
%ival = bitcast <$1 x float> %val to <$1 x i32>
|
%ival = bitcast <$1 x float> %val to <$1 x i32>
|
||||||
@@ -1996,7 +1996,7 @@ define internal <$1 x float> @__atomic_swap_float_global(float * %ptr, <$1 x flo
|
|||||||
ret <$1 x float> %ret
|
ret <$1 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x double> @__atomic_swap_double_global(double * %ptr, <$1 x double> %val,
|
define <$1 x double> @__atomic_swap_double_global(double * %ptr, <$1 x double> %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast double * %ptr to i64 *
|
%iptr = bitcast double * %ptr to i64 *
|
||||||
%ival = bitcast <$1 x double> %val to <$1 x i64>
|
%ival = bitcast <$1 x double> %val to <$1 x i64>
|
||||||
@@ -2005,7 +2005,7 @@ define internal <$1 x double> @__atomic_swap_double_global(double * %ptr, <$1 x
|
|||||||
ret <$1 x double> %ret
|
ret <$1 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__atomic_swap_uniform_float_global(float * %ptr, float %val,
|
define float @__atomic_swap_uniform_float_global(float * %ptr, float %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast float * %ptr to i32 *
|
%iptr = bitcast float * %ptr to i32 *
|
||||||
%ival = bitcast float %val to i32
|
%ival = bitcast float %val to i32
|
||||||
@@ -2014,7 +2014,7 @@ define internal float @__atomic_swap_uniform_float_global(float * %ptr, float %v
|
|||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__atomic_swap_uniform_double_global(double * %ptr, double %val,
|
define double @__atomic_swap_uniform_double_global(double * %ptr, double %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast double * %ptr to i64 *
|
%iptr = bitcast double * %ptr to i64 *
|
||||||
%ival = bitcast double %val to i64
|
%ival = bitcast double %val to i64
|
||||||
@@ -2026,7 +2026,7 @@ define internal double @__atomic_swap_uniform_double_global(double * %ptr, doubl
|
|||||||
global_atomic_exchange($1, i32, int32)
|
global_atomic_exchange($1, i32, int32)
|
||||||
global_atomic_exchange($1, i64, int64)
|
global_atomic_exchange($1, i64, int64)
|
||||||
|
|
||||||
define internal <$1 x float> @__atomic_compare_exchange_float_global(float * %ptr,
|
define <$1 x float> @__atomic_compare_exchange_float_global(float * %ptr,
|
||||||
<$1 x float> %cmp, <$1 x float> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x float> %cmp, <$1 x float> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast float * %ptr to i32 *
|
%iptr = bitcast float * %ptr to i32 *
|
||||||
%icmp = bitcast <$1 x float> %cmp to <$1 x i32>
|
%icmp = bitcast <$1 x float> %cmp to <$1 x i32>
|
||||||
@@ -2037,7 +2037,7 @@ define internal <$1 x float> @__atomic_compare_exchange_float_global(float * %pt
|
|||||||
ret <$1 x float> %ret
|
ret <$1 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x double> @__atomic_compare_exchange_double_global(double * %ptr,
|
define <$1 x double> @__atomic_compare_exchange_double_global(double * %ptr,
|
||||||
<$1 x double> %cmp, <$1 x double> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x double> %cmp, <$1 x double> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast double * %ptr to i64 *
|
%iptr = bitcast double * %ptr to i64 *
|
||||||
%icmp = bitcast <$1 x double> %cmp to <$1 x i64>
|
%icmp = bitcast <$1 x double> %cmp to <$1 x i64>
|
||||||
@@ -2048,7 +2048,7 @@ define internal <$1 x double> @__atomic_compare_exchange_double_global(double *
|
|||||||
ret <$1 x double> %ret
|
ret <$1 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal float @__atomic_compare_exchange_uniform_float_global(float * %ptr, float %cmp, float %val,
|
define float @__atomic_compare_exchange_uniform_float_global(float * %ptr, float %cmp, float %val,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast float * %ptr to i32 *
|
%iptr = bitcast float * %ptr to i32 *
|
||||||
%icmp = bitcast float %cmp to i32
|
%icmp = bitcast float %cmp to i32
|
||||||
@@ -2059,7 +2059,7 @@ define internal float @__atomic_compare_exchange_uniform_float_global(float * %p
|
|||||||
ret float %ret
|
ret float %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal double @__atomic_compare_exchange_uniform_double_global(double * %ptr, double %cmp,
|
define double @__atomic_compare_exchange_uniform_double_global(double * %ptr, double %cmp,
|
||||||
double %val, <$1 x i32> %mask) nounwind alwaysinline {
|
double %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
%iptr = bitcast double * %ptr to i64 *
|
%iptr = bitcast double * %ptr to i64 *
|
||||||
%icmp = bitcast double %cmp to i64
|
%icmp = bitcast double %cmp to i64
|
||||||
@@ -2084,13 +2084,13 @@ define internal double @__atomic_compare_exchange_uniform_double_global(double *
|
|||||||
;; $4: {slt,sgt} comparison operator to used
|
;; $4: {slt,sgt} comparison operator to used
|
||||||
|
|
||||||
define(`i64minmax', `
|
define(`i64minmax', `
|
||||||
define internal i64 @__$2_uniform_$3(i64, i64) nounwind alwaysinline readnone {
|
define i64 @__$2_uniform_$3(i64, i64) nounwind alwaysinline readnone {
|
||||||
%c = icmp $4 i64 %0, %1
|
%c = icmp $4 i64 %0, %1
|
||||||
%r = select i1 %c, i64 %0, i64 %1
|
%r = select i1 %c, i64 %0, i64 %1
|
||||||
ret i64 %r
|
ret i64 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal <$1 x i64> @__$2_varying_$3(<$1 x i64>, <$1 x i64>) nounwind alwaysinline readnone {
|
define <$1 x i64> @__$2_varying_$3(<$1 x i64>, <$1 x i64>) nounwind alwaysinline readnone {
|
||||||
%rptr = alloca <$1 x i64>
|
%rptr = alloca <$1 x i64>
|
||||||
%r64ptr = bitcast <$1 x i64> * %rptr to i64 *
|
%r64ptr = bitcast <$1 x i64> * %rptr to i64 *
|
||||||
|
|
||||||
@@ -2469,7 +2469,7 @@ done:
|
|||||||
declare i32 @llvm.cttz.i32(i32)
|
declare i32 @llvm.cttz.i32(i32)
|
||||||
|
|
||||||
define(`reduce_equal_aux', `
|
define(`reduce_equal_aux', `
|
||||||
define internal i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
entry:
|
entry:
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||||
@@ -2557,7 +2557,7 @@ reduce_equal_aux($1, double, double, i64, fcmp, 64)
|
|||||||
; $6: suffix for function (e.g. add_float)
|
; $6: suffix for function (e.g. add_float)
|
||||||
|
|
||||||
define(`exclusive_scan', `
|
define(`exclusive_scan', `
|
||||||
define internal <$1 x $2> @__exclusive_scan_$6(<$1 x $2> %v,
|
define <$1 x $2> @__exclusive_scan_$6(<$1 x $2> %v,
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
; first, set the value of any off lanes to the identity value
|
; first, set the value of any off lanes to the identity value
|
||||||
%ptr = alloca <$1 x $2>
|
%ptr = alloca <$1 x $2>
|
||||||
@@ -2686,7 +2686,7 @@ pl_done:
|
|||||||
define(`gen_gather', `
|
define(`gen_gather', `
|
||||||
;; Define the utility function to do the gather operation for a single element
|
;; Define the utility function to do the gather operation for a single element
|
||||||
;; of the type
|
;; of the type
|
||||||
define internal <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret,
|
define <$1 x $2> @__gather_elt_$2(i8 * %ptr, <$1 x i32> %offsets, <$1 x $2> %ret,
|
||||||
i32 %lane) nounwind readonly alwaysinline {
|
i32 %lane) nounwind readonly alwaysinline {
|
||||||
; compute address for this one from the base
|
; compute address for this one from the base
|
||||||
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
||||||
@@ -2735,7 +2735,7 @@ define <$1 x $2> @__gather_base_offsets_$2(i8 * %ptr, <$1 x i32> %offsets,
|
|||||||
define(`gen_scatter', `
|
define(`gen_scatter', `
|
||||||
;; Define the function that descripes the work to do to scatter a single
|
;; Define the function that descripes the work to do to scatter a single
|
||||||
;; value
|
;; value
|
||||||
define internal void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values,
|
define void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values,
|
||||||
i32 %lane) nounwind alwaysinline {
|
i32 %lane) nounwind alwaysinline {
|
||||||
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
||||||
%offset64 = zext i32 %offset32 to i64
|
%offset64 = zext i32 %offset32 to i64
|
||||||
|
|||||||
11
opt.cpp
11
opt.cpp
@@ -2814,11 +2814,11 @@ llvm::RegisterPass<MakeInternalFuncsStaticPass>
|
|||||||
bool
|
bool
|
||||||
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
||||||
const char *names[] = {
|
const char *names[] = {
|
||||||
"__do_print", "__fast_masked_vload", "__num_cores",
|
"__fast_masked_vload",
|
||||||
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
|
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
|
||||||
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
||||||
"__gather_elt_8", "__gather_elt_16",
|
"__gather_elt_i8", "__gather_elt_i16",
|
||||||
"__gather_elt_32", "__gather_elt_64",
|
"__gather_elt_i32", "__gather_elt_i64",
|
||||||
"__load_and_broadcast_8", "__load_and_broadcast_16",
|
"__load_and_broadcast_8", "__load_and_broadcast_16",
|
||||||
"__load_and_broadcast_32", "__load_and_broadcast_64",
|
"__load_and_broadcast_32", "__load_and_broadcast_64",
|
||||||
"__load_masked_8", "__load_masked_16",
|
"__load_masked_8", "__load_masked_16",
|
||||||
@@ -2827,11 +2827,10 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
|||||||
"__masked_store_32", "__masked_store_64",
|
"__masked_store_32", "__masked_store_64",
|
||||||
"__masked_store_blend_8", "__masked_store_blend_16",
|
"__masked_store_blend_8", "__masked_store_blend_16",
|
||||||
"__masked_store_blend_32", "__masked_store_blend_64",
|
"__masked_store_blend_32", "__masked_store_blend_64",
|
||||||
"__packed_load_active", "__packed_store_active",
|
|
||||||
"__scatter_base_offsets_i8", "__scatter_base_offsets_i16",
|
"__scatter_base_offsets_i8", "__scatter_base_offsets_i16",
|
||||||
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64",
|
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64",
|
||||||
"__scatter_elt_8", "__scatter_elt_16",
|
"__scatter_elt_i8", "__scatter_elt_i16",
|
||||||
"__scatter_elt_32", "__scatter_elt_64",
|
"__scatter_elt_i32", "__scatter_elt_i64",
|
||||||
};
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
|
|||||||
Reference in New Issue
Block a user