Workaround change to linker behavior in LLVM 3.1

Now, the Linker::LinkModules() call doesn't link in any functions
marked as 'internal', which is problematic, since we'd like to have
just about all of the builtins marked as internal so that they are
eliminated after they've been inlined when they are in fact used.

This change removes all of the internal qualifiers in the builtins
and adds a lSetInternalFunctions() routine to builtins.cpp that
sets this property on the functions that need it after they've
been linked in by LinkModules().
This commit is contained in:
Matt Pharr
2011-11-05 16:57:26 -07:00
parent b0d476fcdc
commit cabe358c0a
13 changed files with 760 additions and 447 deletions

View File

@@ -62,7 +62,7 @@ include(`builtins-sse2-common.ll')
; return x;
;}
define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
%float_to_int_bitcast.i.i.i.i = bitcast <4 x float> %0 to <4 x i32>
%bitop.i.i = and <4 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
%bitop.i = xor <4 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
@@ -88,7 +88,7 @@ define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonl
; return y + delta;
;}
define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
%bincmp.i = fcmp ogt <4 x float> %calltmp.i, %0
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
@@ -110,7 +110,7 @@ define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonl
; return y + delta;
;}
define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
%bincmp.i = fcmp olt <4 x float> %calltmp.i, %0
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
@@ -123,15 +123,15 @@ define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding doubles
define internal <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
define <4 x double> @__round_varying_double(<4 x double>) nounwind readonly alwaysinline {
unary1to4(double, @round)
}
define internal <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
define <4 x double> @__floor_varying_double(<4 x double>) nounwind readonly alwaysinline {
unary1to4(double, @floor)
}
define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
define <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alwaysinline {
unary1to4(double, @ceil)
}
@@ -143,7 +143,7 @@ define internal <4 x double> @__ceil_varying_double(<4 x double>) nounwind reado
; vector element, if the mask is on, we return the corresponding value
; from %1, and otherwise return the value from %0.
define internal <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
<4 x i32> %mask) nounwind readnone alwaysinline {
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
%cleared_old = and <4 x i32> %0, %notmask
@@ -152,7 +152,7 @@ define internal <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
ret <4 x i32> %new
}
define internal <4 x float> @__vselect_float(<4 x float>, <4 x float>,
define <4 x float> @__vselect_float(<4 x float>, <4 x float>,
<4 x i32> %mask) nounwind readnone alwaysinline {
%v0 = bitcast <4 x float> %0 to <4 x i32>
%v1 = bitcast <4 x float> %1 to <4 x i32>
@@ -166,27 +166,27 @@ define internal <4 x float> @__vselect_float(<4 x float>, <4 x float>,
; extend the i1 vector result to an i32 mask. The __vselect does the
; rest...
define internal <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
define <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%c = icmp slt <4 x i32> %0, %1
%mask = sext <4 x i1> %c to <4 x i32>
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
ret <4 x i32> %v
}
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
%c = icmp slt i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
}
define internal <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
define <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%c = icmp sgt <4 x i32> %0, %1
%mask = sext <4 x i1> %c to <4 x i32>
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
ret <4 x i32> %v
}
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
%c = icmp sgt i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
@@ -195,27 +195,27 @@ define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinlin
; The functions for unsigned ints are similar, just with unsigned
; comparison functions...
define internal <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
define <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%c = icmp ult <4 x i32> %0, %1
%mask = sext <4 x i1> %c to <4 x i32>
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
ret <4 x i32> %v
}
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
%c = icmp ult i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
}
define internal <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
define <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
%c = icmp ugt <4 x i32> %0, %1
%mask = sext <4 x i1> %c to <4 x i32>
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
ret <4 x i32> %v
}
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
%c = icmp ugt i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
@@ -227,13 +227,13 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define internal i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
%floatmask = bitcast <4 x i32> %0 to <4 x float>
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
ret i32 %v
}
define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
define float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
%v1 = shufflevector <4 x float> %v, <4 x float> undef,
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%m1 = fadd <4 x float> %v1, %v
@@ -243,15 +243,15 @@ define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwa
ret float %sum
}
define internal float @__reduce_min_float(<4 x float>) nounwind readnone {
define float @__reduce_min_float(<4 x float>) nounwind readnone {
reduce4(float, @__min_varying_float, @__min_uniform_float)
}
define internal float @__reduce_max_float(<4 x float>) nounwind readnone {
define float @__reduce_max_float(<4 x float>) nounwind readnone {
reduce4(float, @__max_varying_float, @__max_uniform_float)
}
define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
define i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
%v1 = shufflevector <4 x i32> %v, <4 x i32> undef,
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%m1 = add <4 x i32> %v1, %v
@@ -261,29 +261,29 @@ define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
ret i32 %sum
}
define internal i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
define i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
reduce4(i32, @__min_varying_int32, @__min_uniform_int32)
}
define internal i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
define i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
reduce4(i32, @__max_varying_int32, @__max_uniform_int32)
}
define internal i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
define i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
%r = call i32 @__reduce_add_int32(<4 x i32> %v)
ret i32 %r
}
define internal i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
define i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32)
}
define internal i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
define i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32)
}
define internal double @__reduce_add_double(<4 x double>) nounwind readnone {
define double @__reduce_add_double(<4 x double>) nounwind readnone {
%v0 = shufflevector <4 x double> %0, <4 x double> undef,
<2 x i32> <i32 0, i32 1>
%v1 = shufflevector <4 x double> %0, <4 x double> undef,
@@ -295,15 +295,15 @@ define internal double @__reduce_add_double(<4 x double>) nounwind readnone {
ret double %m
}
define internal double @__reduce_min_double(<4 x double>) nounwind readnone {
define double @__reduce_min_double(<4 x double>) nounwind readnone {
reduce4(double, @__min_varying_double, @__min_uniform_double)
}
define internal double @__reduce_max_double(<4 x double>) nounwind readnone {
define double @__reduce_max_double(<4 x double>) nounwind readnone {
reduce4(double, @__max_varying_double, @__max_uniform_double)
}
define internal i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
define i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
%v0 = shufflevector <4 x i64> %0, <4 x i64> undef,
<2 x i32> <i32 0, i32 1>
%v1 = shufflevector <4 x i64> %0, <4 x i64> undef,
@@ -315,19 +315,19 @@ define internal i64 @__reduce_add_int64(<4 x i64>) nounwind readnone {
ret i64 %m
}
define internal i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
define i64 @__reduce_min_int64(<4 x i64>) nounwind readnone {
reduce4(i64, @__min_varying_int64, @__min_uniform_int64)
}
define internal i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
define i64 @__reduce_max_int64(<4 x i64>) nounwind readnone {
reduce4(i64, @__max_varying_int64, @__max_uniform_int64)
}
define internal i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
reduce4(i64, @__min_varying_uint64, @__min_uniform_uint64)
}
define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
@@ -393,7 +393,7 @@ define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
%call = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
; do one N-R iteration to improve precision
; float iv = __rcp_v(v);
@@ -409,7 +409,7 @@ define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
define <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
; float is = __rsqrt_v(v);
%is = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %v)
; Newton-Raphson iteration to improve precision
@@ -427,7 +427,7 @@ define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind read
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
define internal <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
%call = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %0)
ret <4 x float> %call
}
@@ -446,48 +446,48 @@ declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
define internal <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
ret <4 x float> %ret
}
define internal <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
ret <4 x float> %ret
}
define internal void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
store <4 x float> %s, <4 x float> * %1
ret void
}
define internal <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
ret <4 x float> %ret
}
define internal <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
ret <4 x float> %ret
}
define internal <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
ret <4 x float> %ret
}
define internal <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
ret <4 x float> %ret
}
define internal <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
ret <4 x float> %ret
}
define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
ret <4 x float> %ret
}
@@ -498,12 +498,12 @@ define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readn
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
define internal <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
%call = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %0, <4 x float> %1)
ret <4 x float> %call
}
define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
define <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
%call = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %0, <4 x float> %1)
ret <4 x float> %call
}
@@ -513,7 +513,7 @@ define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounw
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
define <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
unary2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
ret <4 x double> %ret
}
@@ -524,12 +524,12 @@ define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alway
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
define internal <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
define <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
binary2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
ret <4 x double> %ret
}
define internal <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
binary2to4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
ret <4 x double> %ret
}