Workaround change to linker behavior in LLVM 3.1

Now, the Linker::LinkModules() call doesn't link in any functions
marked as 'internal', which is problematic, since we'd like to have
just about all of the builtins marked as internal so that they are
eliminated after they've been inlined when they are in fact used.

This change removes all of the internal qualifiers in the builtins
and adds a lSetInternalFunctions() routine to builtins.cpp that
sets this property on the functions that need it after they've
been linked in by LinkModules().
This commit is contained in:
Matt Pharr
2011-11-05 16:57:26 -07:00
parent b0d476fcdc
commit cabe358c0a
13 changed files with 760 additions and 447 deletions

View File

@@ -210,6 +210,9 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
if (name.size() < 3 || name[0] != '_' || name[1] != '_')
return false;
Debug(SourcePos(), "Attempting to create ispc symbol for function \"%s\".",
name.c_str());
// An unfortunate hack: we want this builtin function to have the
// signature "int __sext_varying_bool(bool)", but the ispc function
// symbol creation code below assumes that any LLVM vector of i32s is a
@@ -240,9 +243,11 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
intAsUnsigned);
if (!returnType)
if (returnType == NULL) {
Debug(SourcePos(), "Failed: return type not representable.");
// return type not representable in ispc -> not callable from ispc
return false;
}
// Iterate over the arguments and try to find their equivalent ispc
// types. Track if any of the arguments has an integer type.
@@ -251,8 +256,10 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
const llvm::Type *llvmArgType = ftype->getParamType(j);
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
if (type == NULL)
if (type == NULL) {
Debug(SourcePos(), "Failed: type of parameter %d not representable", j);
return false;
}
anyIntArgs |=
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
@@ -329,6 +336,312 @@ lCheckModuleIntrinsics(llvm::Module *module) {
}
/** We'd like to have all of these functions declared as 'internal' in
their respective bitcode files so that if they aren't needed by the
user's program they are elimiated from the final output. However, if
we do so, then they aren't brought in by the LinkModules() call below
since they aren't yet used by anything in the module they're being
linked with (in LLVM 3.1, at least).
Therefore, we don't declare them as internal when we first define them,
but instead mark them as internal after they've been linked in. This
is admittedly a kludge.
*/
static void
lSetInternalFunctions(llvm::Module *module) {
const char *names[] = {
"__add_uniform_int32",
"__add_uniform_int64",
"__add_varying_int32",
"__add_varying_int64",
"__aos_to_soa3_float",
"__aos_to_soa3_float16",
"__aos_to_soa3_float4",
"__aos_to_soa3_float8",
"__aos_to_soa3_int32",
"__aos_to_soa4_float",
"__aos_to_soa4_float16",
"__aos_to_soa4_float4",
"__aos_to_soa4_float8",
"__aos_to_soa4_int32",
"__atomic_add_int32_global",
"__atomic_add_int64_global",
"__atomic_add_uniform_int32_global",
"__atomic_add_uniform_int64_global",
"__atomic_and_int32_global",
"__atomic_and_int64_global",
"__atomic_and_uniform_int32_global",
"__atomic_and_uniform_int64_global",
"__atomic_compare_exchange_double_global",
"__atomic_compare_exchange_float_global",
"__atomic_compare_exchange_int32_global",
"__atomic_compare_exchange_int64_global",
"__atomic_compare_exchange_uniform_double_global",
"__atomic_compare_exchange_uniform_float_global",
"__atomic_compare_exchange_uniform_int32_global",
"__atomic_compare_exchange_uniform_int64_global",
"__atomic_max_uniform_int32_global",
"__atomic_max_uniform_int64_global",
"__atomic_min_uniform_int32_global",
"__atomic_min_uniform_int64_global",
"__atomic_or_int32_global",
"__atomic_or_int64_global",
"__atomic_or_uniform_int32_global",
"__atomic_or_uniform_int64_global",
"__atomic_sub_int32_global",
"__atomic_sub_int64_global",
"__atomic_sub_uniform_int32_global",
"__atomic_sub_uniform_int64_global",
"__atomic_swap_double_global",
"__atomic_swap_float_global",
"__atomic_swap_int32_global",
"__atomic_swap_int64_global",
"__atomic_swap_uniform_double_global",
"__atomic_swap_uniform_float_global",
"__atomic_swap_uniform_int32_global",
"__atomic_swap_uniform_int64_global",
"__atomic_umax_uniform_uint32_global",
"__atomic_umax_uniform_uint64_global",
"__atomic_umin_uniform_uint32_global",
"__atomic_umin_uniform_uint64_global",
"__atomic_xor_int32_global",
"__atomic_xor_int64_global",
"__atomic_xor_uniform_int32_global",
"__atomic_xor_uniform_int64_global",
"__broadcast_double",
"__broadcast_float",
"__broadcast_int16",
"__broadcast_int32",
"__broadcast_int64",
"__broadcast_int8",
"__ceil_uniform_double",
"__ceil_uniform_float",
"__ceil_varying_double",
"__ceil_varying_float",
"__count_trailing_zeros",
"__do_assert_uniform",
"__do_assert_varying",
"__do_print",
"__doublebits_uniform_int64",
"__doublebits_varying_int64",
"__exclusive_scan_add_double",
"__exclusive_scan_add_float",
"__exclusive_scan_add_i32",
"__exclusive_scan_add_i64",
"__exclusive_scan_and_i32",
"__exclusive_scan_and_i64",
"__exclusive_scan_or_i32",
"__exclusive_scan_or_i64",
"__extract_int16",
"__extract_int32",
"__extract_int64",
"__extract_int8",
"__fastmath",
"__floatbits_uniform_int32",
"__floatbits_varying_int32",
"__floor_uniform_double",
"__floor_uniform_float",
"__floor_varying_double",
"__floor_varying_float",
"__insert_int16",
"__insert_int32",
"__insert_int64",
"__insert_int8",
"__intbits_uniform_double",
"__intbits_uniform_float",
"__intbits_varying_double",
"__intbits_varying_float",
"__max_uniform_double",
"__max_uniform_float",
"__max_uniform_int32",
"__max_uniform_int64",
"__max_uniform_uint32",
"__max_uniform_uint64",
"__max_varying_double",
"__max_varying_float",
"__max_varying_int32",
"__max_varying_int64",
"__max_varying_uint32",
"__max_varying_uint64",
"__memory_barrier",
"__min_uniform_double",
"__min_uniform_float",
"__min_uniform_int32",
"__min_uniform_int64",
"__min_uniform_uint32",
"__min_uniform_uint64",
"__min_varying_double",
"__min_varying_float",
"__min_varying_int32",
"__min_varying_int64",
"__min_varying_uint32",
"__min_varying_uint64",
"__movmsk",
"__num_cores",
"__packed_load_active",
"__packed_store_active",
"__popcnt_int32",
"__popcnt_int64",
"__prefetch_read_1_uniform_bool",
"__prefetch_read_1_uniform_double",
"__prefetch_read_1_uniform_float",
"__prefetch_read_1_uniform_int16",
"__prefetch_read_1_uniform_int32",
"__prefetch_read_1_uniform_int64",
"__prefetch_read_1_uniform_int8",
"__prefetch_read_1_varying_bool",
"__prefetch_read_1_varying_double",
"__prefetch_read_1_varying_float",
"__prefetch_read_1_varying_int16",
"__prefetch_read_1_varying_int32",
"__prefetch_read_1_varying_int64",
"__prefetch_read_1_varying_int8",
"__prefetch_read_2_uniform_bool",
"__prefetch_read_2_uniform_double",
"__prefetch_read_2_uniform_float",
"__prefetch_read_2_uniform_int16",
"__prefetch_read_2_uniform_int32",
"__prefetch_read_2_uniform_int64",
"__prefetch_read_2_uniform_int8",
"__prefetch_read_2_varying_bool",
"__prefetch_read_2_varying_double",
"__prefetch_read_2_varying_float",
"__prefetch_read_2_varying_int16",
"__prefetch_read_2_varying_int32",
"__prefetch_read_2_varying_int64",
"__prefetch_read_2_varying_int8",
"__prefetch_read_3_uniform_bool",
"__prefetch_read_3_uniform_double",
"__prefetch_read_3_uniform_float",
"__prefetch_read_3_uniform_int16",
"__prefetch_read_3_uniform_int32",
"__prefetch_read_3_uniform_int64",
"__prefetch_read_3_uniform_int8",
"__prefetch_read_3_varying_bool",
"__prefetch_read_3_varying_double",
"__prefetch_read_3_varying_float",
"__prefetch_read_3_varying_int16",
"__prefetch_read_3_varying_int32",
"__prefetch_read_3_varying_int64",
"__prefetch_read_3_varying_int8",
"__prefetch_read_nt_uniform_bool",
"__prefetch_read_nt_uniform_double",
"__prefetch_read_nt_uniform_float",
"__prefetch_read_nt_uniform_int16",
"__prefetch_read_nt_uniform_int32",
"__prefetch_read_nt_uniform_int64",
"__prefetch_read_nt_uniform_int8",
"__prefetch_read_nt_varying_bool",
"__prefetch_read_nt_varying_double",
"__prefetch_read_nt_varying_float",
"__prefetch_read_nt_varying_int16",
"__prefetch_read_nt_varying_int32",
"__prefetch_read_nt_varying_int64",
"__prefetch_read_nt_varying_int8",
"__rcp_uniform_float",
"__rcp_varying_float",
"__reduce_add_double",
"__reduce_add_float",
"__reduce_add_int32",
"__reduce_add_int64",
"__reduce_add_uint32",
"__reduce_add_uint64",
"__reduce_equal_double",
"__reduce_equal_float",
"__reduce_equal_int32",
"__reduce_equal_int64",
"__reduce_max_double",
"__reduce_max_float",
"__reduce_max_int32",
"__reduce_max_int64",
"__reduce_max_uint32",
"__reduce_max_uint64",
"__reduce_min_double",
"__reduce_min_float",
"__reduce_min_int32",
"__reduce_min_int64",
"__reduce_min_uint32",
"__reduce_min_uint64",
"__rotate_double",
"__rotate_float",
"__rotate_int16",
"__rotate_int32",
"__rotate_int64",
"__rotate_int8",
"__round_uniform_double",
"__round_uniform_float",
"__round_varying_double",
"__round_varying_float",
"__rsqrt_uniform_float",
"__rsqrt_varying_float",
"__sext_uniform_bool",
"__sext_varying_bool",
"__shuffle2_double",
"__shuffle2_float",
"__shuffle2_int16",
"__shuffle2_int32",
"__shuffle2_int64",
"__shuffle2_int8",
"__shuffle_double",
"__shuffle_float",
"__shuffle_int16",
"__shuffle_int32",
"__shuffle_int64",
"__shuffle_int8",
"__soa_to_aos3_float",
"__soa_to_aos3_float16",
"__soa_to_aos3_float4",
"__soa_to_aos3_float8",
"__soa_to_aos3_int32",
"__soa_to_aos4_float",
"__soa_to_aos4_float16",
"__soa_to_aos4_float4",
"__soa_to_aos4_float8",
"__soa_to_aos4_int32",
"__sqrt_uniform_double",
"__sqrt_uniform_float",
"__sqrt_varying_double",
"__sqrt_varying_float",
"__stdlib_atan",
"__stdlib_atan2",
"__stdlib_atan2f",
"__stdlib_atanf",
"__stdlib_cos",
"__stdlib_cosf",
"__stdlib_exp",
"__stdlib_expf",
"__stdlib_log",
"__stdlib_logf",
"__stdlib_pow",
"__stdlib_powf",
"__stdlib_sin",
"__stdlib_sincos",
"__stdlib_sincosf",
"__stdlib_sinf",
"__stdlib_tan",
"__stdlib_tanf",
"__svml_sin",
"__svml_cos",
"__svml_sincos",
"__svml_tan",
"__svml_atan",
"__svml_atan2",
"__svml_exp",
"__svml_log",
"__svml_pow",
"__undef_uniform",
"__undef_varying",
};
int count = sizeof(names) / sizeof(names[0]);
for (int i = 0; i < count; ++i) {
llvm::Function *f = module->getFunction(names[i]);
if (f != NULL)
f->setLinkage(llvm::GlobalValue::InternalLinkage);
}
}
/** This utility function takes serialized binary LLVM bitcode and adds its
definitions to the given module. Functions in the bitcode that can be
mapped to ispc functions are also added to the symbol table.
@@ -371,6 +684,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
#endif // LLVM_3_0
&linkError))
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
lSetInternalFunctions(module);
if (symbolTable != NULL)
lAddModuleSymbols(module, symbolTable);
lCheckModuleIntrinsics(module);