Add SSE4-16 target.

Along the lines of sse4-8, this is an 8-wide target for SSE4, using
16-bit elements for the mask.  It's thus (in principle) the best
target for SIMD computation with 16-bit datatypes.
This commit is contained in:
Matt Pharr
2013-07-24 09:40:50 -07:00
parent 04d61afa23
commit 780b0dfe47
5 changed files with 463 additions and 7 deletions

View File

@@ -862,10 +862,22 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
break;
case 8:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
if (g->target->getMaskBitCount() == 16) {
EXPORT_MODULE(builtins_bitcode_sse4_16_32bit);
}
else {
Assert(g->target->getMaskBitCount() == 32);
EXPORT_MODULE(builtins_bitcode_sse4_x2_32bit);
}
}
else {
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
if (g->target->getMaskBitCount() == 16) {
EXPORT_MODULE(builtins_bitcode_sse4_16_64bit);
}
else {
Assert(g->target->getMaskBitCount() == 32);
EXPORT_MODULE(builtins_bitcode_sse4_x2_64bit);
}
}
break;
case 16: