Add SSE4-16 target.
Along the lines of sse4-8, this is an 8-wide target for SSE4, using 16-bit elements for the mask. It's thus (in principle) the best target for SIMD computation with 16-bit datatypes.
This commit is contained in:
16
builtins.cpp
16
builtins.cpp
@@ -862,10 +862,22 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
break;
|
||||
case 8:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
|
||||
if (g->target->getMaskBitCount() == 16) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_16_32bit);
|
||||
}
|
||||
else {
|
||||
Assert(g->target->getMaskBitCount() == 32);
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
|
||||
}
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
|
||||
if (g->target->getMaskBitCount() == 16) {
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_16_64bit);
|
||||
}
|
||||
else {
|
||||
Assert(g->target->getMaskBitCount() == 32);
|
||||
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
|
||||
Reference in New Issue
Block a user