merged with master
This commit is contained in:
141
builtins.cpp
141
builtins.cpp
@@ -112,10 +112,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||
|
||||
// varying
|
||||
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
|
||||
t == LLVMTypes::MaskType)
|
||||
return AtomicType::VaryingBool;
|
||||
else if (t == LLVMTypes::Int8VectorType)
|
||||
if (t == LLVMTypes::Int8VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||
else if (t == LLVMTypes::Int16VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
|
||||
@@ -127,6 +124,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return AtomicType::VaryingDouble;
|
||||
else if (t == LLVMTypes::Int64VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
||||
else if (t == LLVMTypes::MaskType)
|
||||
return AtomicType::VaryingBool;
|
||||
|
||||
// pointers to uniform
|
||||
else if (t == LLVMTypes::Int8PointerType)
|
||||
@@ -303,6 +302,7 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
// check the llvm.x86.* intrinsics for now...
|
||||
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
|
||||
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
|
||||
if (id == 0) fprintf(stderr, "FATAL: intrinsic is not found: %s \n", funcName.c_str());
|
||||
Assert(id != 0);
|
||||
llvm::Type *intrinsicType =
|
||||
llvm::Intrinsic::getType(*g->ctx, id);
|
||||
@@ -488,7 +488,6 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__num_cores",
|
||||
"__packed_load_active",
|
||||
"__packed_store_active",
|
||||
"__pause",
|
||||
"__popcnt_int32",
|
||||
"__popcnt_int64",
|
||||
"__prefetch_read_uniform_1",
|
||||
@@ -502,6 +501,8 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__rdrand_i64",
|
||||
"__reduce_add_double",
|
||||
"__reduce_add_float",
|
||||
"__reduce_add_int8",
|
||||
"__reduce_add_int16",
|
||||
"__reduce_add_int32",
|
||||
"__reduce_add_int64",
|
||||
"__reduce_equal_double",
|
||||
@@ -576,20 +577,34 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__stdlib_pow",
|
||||
"__stdlib_powf",
|
||||
"__stdlib_sin",
|
||||
"__stdlib_asin",
|
||||
"__stdlib_sincos",
|
||||
"__stdlib_sincosf",
|
||||
"__stdlib_sinf",
|
||||
"__stdlib_tan",
|
||||
"__stdlib_tanf",
|
||||
"__svml_sin",
|
||||
"__svml_cos",
|
||||
"__svml_sincos",
|
||||
"__svml_tan",
|
||||
"__svml_atan",
|
||||
"__svml_atan2",
|
||||
"__svml_exp",
|
||||
"__svml_log",
|
||||
"__svml_pow",
|
||||
"__svml_sind",
|
||||
"__svml_asind",
|
||||
"__svml_cosd",
|
||||
"__svml_acosd",
|
||||
"__svml_sincosd",
|
||||
"__svml_tand",
|
||||
"__svml_atand",
|
||||
"__svml_atan2d",
|
||||
"__svml_expd",
|
||||
"__svml_logd",
|
||||
"__svml_powd",
|
||||
"__svml_sinf",
|
||||
"__svml_asinf",
|
||||
"__svml_cosf",
|
||||
"__svml_acosf",
|
||||
"__svml_sincosf",
|
||||
"__svml_tanf",
|
||||
"__svml_atanf",
|
||||
"__svml_atan2f",
|
||||
"__svml_expf",
|
||||
"__svml_logf",
|
||||
"__svml_powf",
|
||||
"__undef_uniform",
|
||||
"__undef_varying",
|
||||
"__vec4_add_float",
|
||||
@@ -640,7 +655,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
llvm::Triple bcTriple(bcModule->getTargetTriple());
|
||||
Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n",
|
||||
mTriple.str().c_str(), bcTriple.str().c_str());
|
||||
#ifndef __arm__
|
||||
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||
// FIXME: More ugly and dangerous stuff. We really haven't set up
|
||||
// proper build and runtime infrastructure for ispc to do
|
||||
// cross-compilation, yet it's at minimum useful to be able to emit
|
||||
@@ -656,8 +671,12 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
// the values for an ARM target. This maybe won't cause problems
|
||||
// in the generated code, since bulitins.c doesn't do anything too
|
||||
// complex w.r.t. struct layouts, etc.
|
||||
if (g->target->getISA() != Target::NEON &&
|
||||
if (g->target->getISA() != Target::NEON32 &&
|
||||
g->target->getISA() != Target::NEON16 &&
|
||||
g->target->getISA() != Target::NEON8 &&
|
||||
g->target->getISA() != Target::NVPTX64)
|
||||
#else
|
||||
if (g->target->getISA() != Target::NVPTX64)
|
||||
#endif // !__arm__
|
||||
{
|
||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
@@ -831,15 +850,35 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
break;
|
||||
};
|
||||
case Target::NEON: {
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
case Target::NEON8: {
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_neon_32bit);
|
||||
EXPORT_MODULE(builtins_bitcode_neon_8_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_neon_64bit);
|
||||
EXPORT_MODULE(builtins_bitcode_neon_8_64bit);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Target::NEON16: {
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_neon_16_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_neon_16_64bit);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Target::NEON32: {
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_neon_32_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_neon_32_64bit);
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case Target::SSE2: {
|
||||
switch (g->target->getVectorWidth()) {
|
||||
case 4:
|
||||
@@ -875,10 +914,31 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
break;
|
||||
case 8:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
|
||||
if (g->target->getMaskBitCount() == 16) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_16_32bit);
|
||||
}
|
||||
else {
|
||||
Assert(g->target->getMaskBitCount() == 32);
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
|
||||
}
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
|
||||
if (g->target->getMaskBitCount() == 16) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_16_64bit);
|
||||
}
|
||||
else {
|
||||
Assert(g->target->getMaskBitCount() == 32);
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
Assert(g->target->getMaskBitCount() == 8);
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_8_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_8_64bit);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -888,6 +948,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
case Target::AVX: {
|
||||
switch (g->target->getVectorWidth()) {
|
||||
case 4:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_avx1_i64x4_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_avx1_i64x4_64bit);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_avx1_32bit);
|
||||
@@ -1050,16 +1118,33 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
// If the user wants the standard library to be included, parse the
|
||||
// serialized version of the stdlib.ispc file to get its
|
||||
// definitions added.
|
||||
extern char stdlib_mask1_code[], stdlib_mask8_code[];
|
||||
extern char stdlib_mask16_code[], stdlib_mask32_code[], stdlib_mask64_code[];
|
||||
if (g->target->getISA() == Target::GENERIC &&
|
||||
g->target->getVectorWidth() != 1) { // 1 wide uses x86 stdlib
|
||||
extern char stdlib_generic_code[];
|
||||
yy_scan_string(stdlib_generic_code);
|
||||
yyparse();
|
||||
g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib
|
||||
yy_scan_string(stdlib_mask32_code);
|
||||
}
|
||||
else {
|
||||
extern char stdlib_x86_code[];
|
||||
yy_scan_string(stdlib_x86_code);
|
||||
yyparse();
|
||||
switch (g->target->getMaskBitCount()) {
|
||||
case 1:
|
||||
yy_scan_string(stdlib_mask1_code);
|
||||
break;
|
||||
case 8:
|
||||
yy_scan_string(stdlib_mask8_code);
|
||||
break;
|
||||
case 16:
|
||||
yy_scan_string(stdlib_mask16_code);
|
||||
break;
|
||||
case 32:
|
||||
yy_scan_string(stdlib_mask32_code);
|
||||
break;
|
||||
case 64:
|
||||
yy_scan_string(stdlib_mask64_code);
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled mask bit size for stdlib.ispc");
|
||||
}
|
||||
}
|
||||
yyparse();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user