merged with master

This commit is contained in:
egaburov
2013-10-08 19:13:30 +02:00
162 changed files with 16148 additions and 1942 deletions

View File

@@ -112,10 +112,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
// varying
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
t == LLVMTypes::MaskType)
return AtomicType::VaryingBool;
else if (t == LLVMTypes::Int8VectorType)
if (t == LLVMTypes::Int8VectorType)
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
else if (t == LLVMTypes::Int16VectorType)
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
@@ -127,6 +124,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
return AtomicType::VaryingDouble;
else if (t == LLVMTypes::Int64VectorType)
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
else if (t == LLVMTypes::MaskType)
return AtomicType::VaryingBool;
// pointers to uniform
else if (t == LLVMTypes::Int8PointerType)
@@ -303,6 +302,7 @@ lCheckModuleIntrinsics(llvm::Module *module) {
// check the llvm.x86.* intrinsics for now...
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
if (id == 0) fprintf(stderr, "FATAL: intrinsic is not found: %s \n", funcName.c_str());
Assert(id != 0);
llvm::Type *intrinsicType =
llvm::Intrinsic::getType(*g->ctx, id);
@@ -488,7 +488,6 @@ lSetInternalFunctions(llvm::Module *module) {
"__num_cores",
"__packed_load_active",
"__packed_store_active",
"__pause",
"__popcnt_int32",
"__popcnt_int64",
"__prefetch_read_uniform_1",
@@ -502,6 +501,8 @@ lSetInternalFunctions(llvm::Module *module) {
"__rdrand_i64",
"__reduce_add_double",
"__reduce_add_float",
"__reduce_add_int8",
"__reduce_add_int16",
"__reduce_add_int32",
"__reduce_add_int64",
"__reduce_equal_double",
@@ -576,20 +577,34 @@ lSetInternalFunctions(llvm::Module *module) {
"__stdlib_pow",
"__stdlib_powf",
"__stdlib_sin",
"__stdlib_asin",
"__stdlib_sincos",
"__stdlib_sincosf",
"__stdlib_sinf",
"__stdlib_tan",
"__stdlib_tanf",
"__svml_sin",
"__svml_cos",
"__svml_sincos",
"__svml_tan",
"__svml_atan",
"__svml_atan2",
"__svml_exp",
"__svml_log",
"__svml_pow",
"__svml_sind",
"__svml_asind",
"__svml_cosd",
"__svml_acosd",
"__svml_sincosd",
"__svml_tand",
"__svml_atand",
"__svml_atan2d",
"__svml_expd",
"__svml_logd",
"__svml_powd",
"__svml_sinf",
"__svml_asinf",
"__svml_cosf",
"__svml_acosf",
"__svml_sincosf",
"__svml_tanf",
"__svml_atanf",
"__svml_atan2f",
"__svml_expf",
"__svml_logf",
"__svml_powf",
"__undef_uniform",
"__undef_varying",
"__vec4_add_float",
@@ -640,7 +655,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
llvm::Triple bcTriple(bcModule->getTargetTriple());
Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n",
mTriple.str().c_str(), bcTriple.str().c_str());
#ifndef __arm__
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
// FIXME: More ugly and dangerous stuff. We really haven't set up
// proper build and runtime infrastructure for ispc to do
// cross-compilation, yet it's at minimum useful to be able to emit
@@ -656,8 +671,12 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
// the values for an ARM target. This maybe won't cause problems
// in the generated code, since bulitins.c doesn't do anything too
// complex w.r.t. struct layouts, etc.
if (g->target->getISA() != Target::NEON &&
if (g->target->getISA() != Target::NEON32 &&
g->target->getISA() != Target::NEON16 &&
g->target->getISA() != Target::NEON8 &&
g->target->getISA() != Target::NVPTX64)
#else
if (g->target->getISA() != Target::NVPTX64)
#endif // !__arm__
{
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
@@ -831,15 +850,35 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
};
case Target::NEON: {
#ifdef ISPC_ARM_ENABLED
case Target::NEON8: {
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_neon_32bit);
EXPORT_MODULE(builtins_bitcode_neon_8_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_neon_64bit);
EXPORT_MODULE(builtins_bitcode_neon_8_64bit);
}
break;
}
case Target::NEON16: {
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_neon_16_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_neon_16_64bit);
}
break;
}
case Target::NEON32: {
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_neon_32_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_neon_32_64bit);
}
break;
}
#endif
case Target::SSE2: {
switch (g->target->getVectorWidth()) {
case 4:
@@ -875,10 +914,31 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
break;
case 8:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
if (g->target->getMaskBitCount() == 16) {
EXPORT_MODULE(builtins_bitcode_sse4_16_32bit);
}
else {
Assert(g->target->getMaskBitCount() == 32);
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
}
}
else {
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
if (g->target->getMaskBitCount() == 16) {
EXPORT_MODULE(builtins_bitcode_sse4_16_64bit);
}
else {
Assert(g->target->getMaskBitCount() == 32);
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
}
}
break;
case 16:
Assert(g->target->getMaskBitCount() == 8);
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_sse4_8_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_sse4_8_64bit);
}
break;
default:
@@ -888,6 +948,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
case Target::AVX: {
switch (g->target->getVectorWidth()) {
case 4:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_avx1_i64x4_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_avx1_i64x4_64bit);
}
break;
case 8:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_avx1_32bit);
@@ -1050,16 +1118,33 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its
// definitions added.
extern char stdlib_mask1_code[], stdlib_mask8_code[];
extern char stdlib_mask16_code[], stdlib_mask32_code[], stdlib_mask64_code[];
if (g->target->getISA() == Target::GENERIC &&
g->target->getVectorWidth() != 1) { // 1 wide uses x86 stdlib
extern char stdlib_generic_code[];
yy_scan_string(stdlib_generic_code);
yyparse();
g->target->getVectorWidth() == 1) { // 1 wide uses 32 stdlib
yy_scan_string(stdlib_mask32_code);
}
else {
extern char stdlib_x86_code[];
yy_scan_string(stdlib_x86_code);
yyparse();
switch (g->target->getMaskBitCount()) {
case 1:
yy_scan_string(stdlib_mask1_code);
break;
case 8:
yy_scan_string(stdlib_mask8_code);
break;
case 16:
yy_scan_string(stdlib_mask16_code);
break;
case 32:
yy_scan_string(stdlib_mask32_code);
break;
case 64:
yy_scan_string(stdlib_mask64_code);
break;
default:
FATAL("Unhandled mask bit size for stdlib.ispc");
}
}
yyparse();
}
}