Workaround change to linker behavior in LLVM 3.1

Now, the Linker::LinkModules() call doesn't link in any functions marked as 'internal', which is problematic, since we'd like to have just about all of the builtins marked as internal so that they are eliminated after they've been inlined when they are in fact used. This change removes all of the internal qualifiers in the builtins and adds a lSetInternalFunctions() routine to builtins.cpp that sets this property on the functions that need it after they've been linked in by LinkModules().
2011-11-05 16:57:26 -07:00
parent b0d476fcdc
commit cabe358c0a
13 changed files with 760 additions and 447 deletions
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -210,6 +210,9 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
    if (name.size() < 3 || name[0] != '_' || name[1] != '_')
        return false;

+    Debug(SourcePos(), "Attempting to create ispc symbol for function \"%s\".",
+          name.c_str());
+
    // An unfortunate hack: we want this builtin function to have the
    // signature "int __sext_varying_bool(bool)", but the ispc function
    // symbol creation code below assumes that any LLVM vector of i32s is a
@@ -240,9 +243,11 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {

        const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
                                                     intAsUnsigned);
-        if (!returnType)
+        if (returnType == NULL) {
+            Debug(SourcePos(), "Failed: return type not representable.");
            // return type not representable in ispc -> not callable from ispc
            return false;
+        }

        // Iterate over the arguments and try to find their equivalent ispc
        // types.  Track if any of the arguments has an integer type.
@@ -251,8 +256,10 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
        for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
            const llvm::Type *llvmArgType = ftype->getParamType(j);
            const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
-            if (type == NULL)
+            if (type == NULL) {
+                Debug(SourcePos(), "Failed: type of parameter %d not representable", j);
                return false;
+            }
            anyIntArgs |= 
                (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
            anyReferenceArgs |= (dynamic_cast<const ReferenceType *>(type) != NULL);
@@ -329,6 +336,312 @@ lCheckModuleIntrinsics(llvm::Module *module) {
 }


+/** We'd like to have all of these functions declared as 'internal' in
+    their respective bitcode files so that if they aren't needed by the
+    user's program they are elimiated from the final output.  However, if
+    we do so, then they aren't brought in by the LinkModules() call below
+    since they aren't yet used by anything in the module they're being
+    linked with (in LLVM 3.1, at least).
+
+    Therefore, we don't declare them as internal when we first define them,
+    but instead mark them as internal after they've been linked in.  This
+    is admittedly a kludge.
+ */
+static void
+lSetInternalFunctions(llvm::Module *module) {
+    const char *names[] = {
+        "__add_uniform_int32",
+        "__add_uniform_int64",
+        "__add_varying_int32",
+        "__add_varying_int64",
+        "__aos_to_soa3_float",
+        "__aos_to_soa3_float16",
+        "__aos_to_soa3_float4",
+        "__aos_to_soa3_float8",
+        "__aos_to_soa3_int32",
+        "__aos_to_soa4_float",
+        "__aos_to_soa4_float16",
+        "__aos_to_soa4_float4",
+        "__aos_to_soa4_float8",
+        "__aos_to_soa4_int32",
+        "__atomic_add_int32_global",
+        "__atomic_add_int64_global",
+        "__atomic_add_uniform_int32_global",
+        "__atomic_add_uniform_int64_global",
+        "__atomic_and_int32_global",
+        "__atomic_and_int64_global",
+        "__atomic_and_uniform_int32_global",
+        "__atomic_and_uniform_int64_global",
+        "__atomic_compare_exchange_double_global",
+        "__atomic_compare_exchange_float_global",
+        "__atomic_compare_exchange_int32_global",
+        "__atomic_compare_exchange_int64_global",
+        "__atomic_compare_exchange_uniform_double_global",
+        "__atomic_compare_exchange_uniform_float_global",
+        "__atomic_compare_exchange_uniform_int32_global",
+        "__atomic_compare_exchange_uniform_int64_global",
+        "__atomic_max_uniform_int32_global",
+        "__atomic_max_uniform_int64_global",
+        "__atomic_min_uniform_int32_global",
+        "__atomic_min_uniform_int64_global",
+        "__atomic_or_int32_global",
+        "__atomic_or_int64_global",
+        "__atomic_or_uniform_int32_global",
+        "__atomic_or_uniform_int64_global",
+        "__atomic_sub_int32_global",
+        "__atomic_sub_int64_global",
+        "__atomic_sub_uniform_int32_global",
+        "__atomic_sub_uniform_int64_global",
+        "__atomic_swap_double_global",
+        "__atomic_swap_float_global",
+        "__atomic_swap_int32_global",
+        "__atomic_swap_int64_global",
+        "__atomic_swap_uniform_double_global",
+        "__atomic_swap_uniform_float_global",
+        "__atomic_swap_uniform_int32_global",
+        "__atomic_swap_uniform_int64_global",
+        "__atomic_umax_uniform_uint32_global",
+        "__atomic_umax_uniform_uint64_global",
+        "__atomic_umin_uniform_uint32_global",
+        "__atomic_umin_uniform_uint64_global",
+        "__atomic_xor_int32_global",
+        "__atomic_xor_int64_global",
+        "__atomic_xor_uniform_int32_global",
+        "__atomic_xor_uniform_int64_global",
+        "__broadcast_double",
+        "__broadcast_float",
+        "__broadcast_int16",
+        "__broadcast_int32",
+        "__broadcast_int64",
+        "__broadcast_int8",
+        "__ceil_uniform_double",
+        "__ceil_uniform_float",
+        "__ceil_varying_double",
+        "__ceil_varying_float",
+        "__count_trailing_zeros",
+        "__do_assert_uniform",
+        "__do_assert_varying",
+        "__do_print", 
+        "__doublebits_uniform_int64",
+        "__doublebits_varying_int64",
+        "__exclusive_scan_add_double",
+        "__exclusive_scan_add_float",
+        "__exclusive_scan_add_i32",
+        "__exclusive_scan_add_i64",
+        "__exclusive_scan_and_i32",
+        "__exclusive_scan_and_i64",
+        "__exclusive_scan_or_i32",
+        "__exclusive_scan_or_i64",
+        "__extract_int16",
+        "__extract_int32",
+        "__extract_int64",
+        "__extract_int8",
+        "__fastmath",
+        "__floatbits_uniform_int32",
+        "__floatbits_varying_int32",
+        "__floor_uniform_double",
+        "__floor_uniform_float",
+        "__floor_varying_double",
+        "__floor_varying_float",
+        "__insert_int16",
+        "__insert_int32",
+        "__insert_int64",
+        "__insert_int8",
+        "__intbits_uniform_double",
+        "__intbits_uniform_float",
+        "__intbits_varying_double",
+        "__intbits_varying_float",
+        "__max_uniform_double",
+        "__max_uniform_float",
+        "__max_uniform_int32",
+        "__max_uniform_int64",
+        "__max_uniform_uint32",
+        "__max_uniform_uint64",
+        "__max_varying_double",
+        "__max_varying_float",
+        "__max_varying_int32",
+        "__max_varying_int64",
+        "__max_varying_uint32",
+        "__max_varying_uint64",
+        "__memory_barrier",
+        "__min_uniform_double",
+        "__min_uniform_float",
+        "__min_uniform_int32",
+        "__min_uniform_int64",
+        "__min_uniform_uint32",
+        "__min_uniform_uint64",
+        "__min_varying_double",
+        "__min_varying_float",
+        "__min_varying_int32",
+        "__min_varying_int64",
+        "__min_varying_uint32",
+        "__min_varying_uint64",
+        "__movmsk",
+        "__num_cores",
+        "__packed_load_active",
+        "__packed_store_active",
+        "__popcnt_int32",
+        "__popcnt_int64",
+        "__prefetch_read_1_uniform_bool",
+        "__prefetch_read_1_uniform_double",
+        "__prefetch_read_1_uniform_float",
+        "__prefetch_read_1_uniform_int16",
+        "__prefetch_read_1_uniform_int32",
+        "__prefetch_read_1_uniform_int64",
+        "__prefetch_read_1_uniform_int8",
+        "__prefetch_read_1_varying_bool",
+        "__prefetch_read_1_varying_double",
+        "__prefetch_read_1_varying_float",
+        "__prefetch_read_1_varying_int16",
+        "__prefetch_read_1_varying_int32",
+        "__prefetch_read_1_varying_int64",
+        "__prefetch_read_1_varying_int8",
+        "__prefetch_read_2_uniform_bool",
+        "__prefetch_read_2_uniform_double",
+        "__prefetch_read_2_uniform_float",
+        "__prefetch_read_2_uniform_int16",
+        "__prefetch_read_2_uniform_int32",
+        "__prefetch_read_2_uniform_int64",
+        "__prefetch_read_2_uniform_int8",
+        "__prefetch_read_2_varying_bool",
+        "__prefetch_read_2_varying_double",
+        "__prefetch_read_2_varying_float",
+        "__prefetch_read_2_varying_int16",
+        "__prefetch_read_2_varying_int32",
+        "__prefetch_read_2_varying_int64",
+        "__prefetch_read_2_varying_int8",
+        "__prefetch_read_3_uniform_bool",
+        "__prefetch_read_3_uniform_double",
+        "__prefetch_read_3_uniform_float",
+        "__prefetch_read_3_uniform_int16",
+        "__prefetch_read_3_uniform_int32",
+        "__prefetch_read_3_uniform_int64",
+        "__prefetch_read_3_uniform_int8",
+        "__prefetch_read_3_varying_bool",
+        "__prefetch_read_3_varying_double",
+        "__prefetch_read_3_varying_float",
+        "__prefetch_read_3_varying_int16",
+        "__prefetch_read_3_varying_int32",
+        "__prefetch_read_3_varying_int64",
+        "__prefetch_read_3_varying_int8",
+        "__prefetch_read_nt_uniform_bool",
+        "__prefetch_read_nt_uniform_double",
+        "__prefetch_read_nt_uniform_float",
+        "__prefetch_read_nt_uniform_int16",
+        "__prefetch_read_nt_uniform_int32",
+        "__prefetch_read_nt_uniform_int64",
+        "__prefetch_read_nt_uniform_int8",
+        "__prefetch_read_nt_varying_bool",
+        "__prefetch_read_nt_varying_double",
+        "__prefetch_read_nt_varying_float",
+        "__prefetch_read_nt_varying_int16",
+        "__prefetch_read_nt_varying_int32",
+        "__prefetch_read_nt_varying_int64",
+        "__prefetch_read_nt_varying_int8",
+        "__rcp_uniform_float",
+        "__rcp_varying_float",
+        "__reduce_add_double",
+        "__reduce_add_float",
+        "__reduce_add_int32",
+        "__reduce_add_int64",
+        "__reduce_add_uint32",
+        "__reduce_add_uint64",
+        "__reduce_equal_double",
+        "__reduce_equal_float",
+        "__reduce_equal_int32",
+        "__reduce_equal_int64",
+        "__reduce_max_double",
+        "__reduce_max_float",
+        "__reduce_max_int32",
+        "__reduce_max_int64",
+        "__reduce_max_uint32",
+        "__reduce_max_uint64",
+        "__reduce_min_double",
+        "__reduce_min_float",
+        "__reduce_min_int32",
+        "__reduce_min_int64",
+        "__reduce_min_uint32",
+        "__reduce_min_uint64",
+        "__rotate_double",
+        "__rotate_float",
+        "__rotate_int16",
+        "__rotate_int32",
+        "__rotate_int64",
+        "__rotate_int8",
+        "__round_uniform_double",
+        "__round_uniform_float",
+        "__round_varying_double",
+        "__round_varying_float",
+        "__rsqrt_uniform_float",
+        "__rsqrt_varying_float",
+        "__sext_uniform_bool",
+        "__sext_varying_bool",
+        "__shuffle2_double",
+        "__shuffle2_float",
+        "__shuffle2_int16",
+        "__shuffle2_int32",
+        "__shuffle2_int64",
+        "__shuffle2_int8",
+        "__shuffle_double",
+        "__shuffle_float",
+        "__shuffle_int16",
+        "__shuffle_int32",
+        "__shuffle_int64",
+        "__shuffle_int8",
+        "__soa_to_aos3_float",
+        "__soa_to_aos3_float16",
+        "__soa_to_aos3_float4",
+        "__soa_to_aos3_float8",
+        "__soa_to_aos3_int32",
+        "__soa_to_aos4_float",
+        "__soa_to_aos4_float16",
+        "__soa_to_aos4_float4",
+        "__soa_to_aos4_float8",
+        "__soa_to_aos4_int32",
+        "__sqrt_uniform_double",
+        "__sqrt_uniform_float",
+        "__sqrt_varying_double",
+        "__sqrt_varying_float",
+        "__stdlib_atan",
+        "__stdlib_atan2",
+        "__stdlib_atan2f",
+        "__stdlib_atanf",
+        "__stdlib_cos",
+        "__stdlib_cosf",
+        "__stdlib_exp",
+        "__stdlib_expf",
+        "__stdlib_log",
+        "__stdlib_logf",
+        "__stdlib_pow",
+        "__stdlib_powf",
+        "__stdlib_sin",
+        "__stdlib_sincos",
+        "__stdlib_sincosf",
+        "__stdlib_sinf",
+        "__stdlib_tan",
+        "__stdlib_tanf",
+        "__svml_sin",
+        "__svml_cos",
+        "__svml_sincos",
+        "__svml_tan",
+        "__svml_atan",
+        "__svml_atan2",
+        "__svml_exp",
+        "__svml_log",
+        "__svml_pow",
+        "__undef_uniform",
+        "__undef_varying",
+    };
+
+    int count = sizeof(names) / sizeof(names[0]);
+    for (int i = 0; i < count; ++i) {
+        llvm::Function *f = module->getFunction(names[i]);
+        if (f != NULL)
+            f->setLinkage(llvm::GlobalValue::InternalLinkage);
+    }
+}
+
+
 /** This utility function takes serialized binary LLVM bitcode and adds its
    definitions to the given module.  Functions in the bitcode that can be
    mapped to ispc functions are also added to the symbol table.
@@ -371,6 +684,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
 #endif // LLVM_3_0
                                      &linkError))
            Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
+        lSetInternalFunctions(module);
        if (symbolTable != NULL)
            lAddModuleSymbols(module, symbolTable);
        lCheckModuleIntrinsics(module);