Gather/scatter function improvements in builtins.

More naming consistency: _i32 rather than i32, now.

Also improved the m4 macros to generate these sequences to not require as
many parameters.
This commit is contained in:
Matt Pharr
2012-06-07 14:18:06 -07:00
parent b86d40091a
commit 1ac3e03171
15 changed files with 420 additions and 423 deletions

171
opt.cpp
View File

@@ -1689,38 +1689,41 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("DetectGSBaseOffsets");
GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather32_8", "__pseudo_gather_base_offsets32_8",
"__pseudo_gather_base_offsets32_8", true),
GSInfo("__pseudo_gather32_16", "__pseudo_gather_base_offsets32_16",
"__pseudo_gather_base_offsets32_16", true),
GSInfo("__pseudo_gather32_32", "__pseudo_gather_base_offsets32_32",
"__pseudo_gather_base_offsets32_32", true),
GSInfo("__pseudo_gather32_64", "__pseudo_gather_base_offsets32_64",
"__pseudo_gather_base_offsets32_64", true),
GSInfo("__pseudo_scatter32_8", "__pseudo_scatter_base_offsets32_8",
"__pseudo_scatter_base_offsets32_8", false),
GSInfo("__pseudo_scatter32_16", "__pseudo_scatter_base_offsets32_16",
"__pseudo_scatter_base_offsets32_16", false),
GSInfo("__pseudo_scatter32_32", "__pseudo_scatter_base_offsets32_32",
"__pseudo_scatter_base_offsets32_32", false),
GSInfo("__pseudo_scatter32_64", "__pseudo_scatter_base_offsets32_64",
"__pseudo_scatter_base_offsets32_64", false),
GSInfo("__pseudo_gather64_8", "__pseudo_gather_base_offsets64_8",
"__pseudo_gather_base_offsets32_8", true),
GSInfo("__pseudo_gather64_16", "__pseudo_gather_base_offsets64_16",
"__pseudo_gather_base_offsets32_16", true),
GSInfo("__pseudo_gather64_32", "__pseudo_gather_base_offsets64_32",
"__pseudo_gather_base_offsets32_32", true),
GSInfo("__pseudo_gather64_64", "__pseudo_gather_base_offsets64_64",
"__pseudo_gather_base_offsets32_64", true),
GSInfo("__pseudo_scatter64_8", "__pseudo_scatter_base_offsets64_8",
"__pseudo_scatter_base_offsets32_8", false),
GSInfo("__pseudo_scatter64_16", "__pseudo_scatter_base_offsets64_16",
"__pseudo_scatter_base_offsets32_16", false),
GSInfo("__pseudo_scatter64_32", "__pseudo_scatter_base_offsets64_32",
"__pseudo_scatter_base_offsets32_32", false),
GSInfo("__pseudo_scatter64_64", "__pseudo_scatter_base_offsets64_64",
"__pseudo_scatter_base_offsets32_64", false),
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8",
"__pseudo_gather_base_offsets32_i8", true),
GSInfo("__pseudo_gather32_i16", "__pseudo_gather_base_offsets32_i16",
"__pseudo_gather_base_offsets32_i16", true),
GSInfo("__pseudo_gather32_i32", "__pseudo_gather_base_offsets32_i32",
"__pseudo_gather_base_offsets32_i32", true),
GSInfo("__pseudo_gather32_i64", "__pseudo_gather_base_offsets32_i64",
"__pseudo_gather_base_offsets32_i64", true),
GSInfo("__pseudo_scatter32_i8", "__pseudo_scatter_base_offsets32_i8",
"__pseudo_scatter_base_offsets32_i8", false),
GSInfo("__pseudo_scatter32_i16", "__pseudo_scatter_base_offsets32_i16",
"__pseudo_scatter_base_offsets32_i16", false),
GSInfo("__pseudo_scatter32_i32", "__pseudo_scatter_base_offsets32_i32",
"__pseudo_scatter_base_offsets32_i32", false),
GSInfo("__pseudo_scatter32_i64", "__pseudo_scatter_base_offsets32_i64",
"__pseudo_scatter_base_offsets32_i64", false),
GSInfo("__pseudo_gather64_i8", "__pseudo_gather_base_offsets64_i8",
"__pseudo_gather_base_offsets32_i8", true),
GSInfo("__pseudo_gather64_i16", "__pseudo_gather_base_offsets64_i16",
"__pseudo_gather_base_offsets32_i16", true),
GSInfo("__pseudo_gather64_i32", "__pseudo_gather_base_offsets64_i32",
"__pseudo_gather_base_offsets32_i32", true),
GSInfo("__pseudo_gather64_i64", "__pseudo_gather_base_offsets64_i64",
"__pseudo_gather_base_offsets32_i64", true),
GSInfo("__pseudo_scatter64_i8", "__pseudo_scatter_base_offsets64_i8",
"__pseudo_scatter_base_offsets32_i8", false),
GSInfo("__pseudo_scatter64_i16", "__pseudo_scatter_base_offsets64_i16",
"__pseudo_scatter_base_offsets32_i16", false),
GSInfo("__pseudo_scatter64_i32", "__pseudo_scatter_base_offsets64_i32",
"__pseudo_scatter_base_offsets32_i32", false),
GSInfo("__pseudo_scatter64_i64", "__pseudo_scatter_base_offsets64_i64",
"__pseudo_scatter_base_offsets32_i64", false),
};
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
for (int i = 0; i < numGSFuncs; ++i)
@@ -2281,40 +2284,40 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("GSToLoadStorePass");
GatherImpInfo gInfo[] = {
GatherImpInfo("__pseudo_gather_base_offsets32_8", "__load_and_broadcast_i8",
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__load_and_broadcast_i8",
"__masked_load_i8", 1),
GatherImpInfo("__pseudo_gather_base_offsets32_16", "__load_and_broadcast_i16",
GatherImpInfo("__pseudo_gather_base_offsets32_i16", "__load_and_broadcast_i16",
"__masked_load_i16", 2),
GatherImpInfo("__pseudo_gather_base_offsets32_32", "__load_and_broadcast_i32",
GatherImpInfo("__pseudo_gather_base_offsets32_i32", "__load_and_broadcast_i32",
"__masked_load_i32", 4),
GatherImpInfo("__pseudo_gather_base_offsets32_64", "__load_and_broadcast_i64",
GatherImpInfo("__pseudo_gather_base_offsets32_i64", "__load_and_broadcast_i64",
"__masked_load_i64", 8),
GatherImpInfo("__pseudo_gather_base_offsets64_8", "__load_and_broadcast_i8",
GatherImpInfo("__pseudo_gather_base_offsets64_i8", "__load_and_broadcast_i8",
"__masked_load_i8", 1),
GatherImpInfo("__pseudo_gather_base_offsets64_16", "__load_and_broadcast_i16",
GatherImpInfo("__pseudo_gather_base_offsets64_i16", "__load_and_broadcast_i16",
"__masked_load_i16", 2),
GatherImpInfo("__pseudo_gather_base_offsets64_32", "__load_and_broadcast_i32",
GatherImpInfo("__pseudo_gather_base_offsets64_i32", "__load_and_broadcast_i32",
"__masked_load_i32", 4),
GatherImpInfo("__pseudo_gather_base_offsets64_64", "__load_and_broadcast_i64",
"__masked_load_i64", 8)
GatherImpInfo("__pseudo_gather_base_offsets64_i64", "__load_and_broadcast_i64",
"__masked_load_i64", 8),
};
ScatterImpInfo sInfo[] = {
ScatterImpInfo("__pseudo_scatter_base_offsets32_8", "__pseudo_masked_store_i8",
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo("__pseudo_scatter_base_offsets32_16", "__pseudo_masked_store_i16",
ScatterImpInfo("__pseudo_scatter_base_offsets32_i16", "__pseudo_masked_store_i16",
LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo("__pseudo_scatter_base_offsets32_32", "__pseudo_masked_store_i32",
ScatterImpInfo("__pseudo_scatter_base_offsets32_i32", "__pseudo_masked_store_i32",
LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets32_64", "__pseudo_masked_store_i64",
ScatterImpInfo("__pseudo_scatter_base_offsets32_i64", "__pseudo_masked_store_i64",
LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo("__pseudo_scatter_base_offsets64_8", "__pseudo_masked_store_i8",
ScatterImpInfo("__pseudo_scatter_base_offsets64_i8", "__pseudo_masked_store_i8",
LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo("__pseudo_scatter_base_offsets64_16", "__pseudo_masked_store_i16",
ScatterImpInfo("__pseudo_scatter_base_offsets64_i16", "__pseudo_masked_store_i16",
LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo("__pseudo_scatter_base_offsets64_32", "__pseudo_masked_store_i32",
ScatterImpInfo("__pseudo_scatter_base_offsets64_i32", "__pseudo_masked_store_i32",
LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets64_64", "__pseudo_masked_store_i64",
LLVMTypes::Int64VectorPointerType, 8)
ScatterImpInfo("__pseudo_scatter_base_offsets64_i64", "__pseudo_masked_store_i64",
LLVMTypes::Int64VectorPointerType, 8),
};
bool modifiedAny = false;
@@ -3387,8 +3390,8 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("GatherCoalescePass");
llvm::Function *gatherFuncs[] = {
m->module->getFunction("__pseudo_gather_base_offsets32_32"),
m->module->getFunction("__pseudo_gather_base_offsets64_32"),
m->module->getFunction("__pseudo_gather_base_offsets32_i32"),
m->module->getFunction("__pseudo_gather_base_offsets64_i32"),
};
int nGatherFuncs = sizeof(gatherFuncs) / sizeof(gatherFuncs[0]);
@@ -3570,45 +3573,45 @@ PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("PseudoGSToGSPass");
LowerGSInfo lgsInfo[] = {
LowerGSInfo("__pseudo_gather_base_offsets32_8", "__gather_base_offsets32_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets32_16", "__gather_base_offsets32_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets32_32", "__gather_base_offsets32_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets32_64", "__gather_base_offsets32_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i8", "__gather_base_offsets32_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i16", "__gather_base_offsets32_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i32", "__gather_base_offsets32_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets32_i64", "__gather_base_offsets32_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets64_8", "__gather_base_offsets64_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets64_16", "__gather_base_offsets64_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets64_32", "__gather_base_offsets64_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets64_64", "__gather_base_offsets64_i64", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i8", "__gather_base_offsets64_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i16", "__gather_base_offsets64_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i32", "__gather_base_offsets64_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets64_i64", "__gather_base_offsets64_i64", true),
LowerGSInfo("__pseudo_gather32_8", "__gather32_i8", true),
LowerGSInfo("__pseudo_gather32_16", "__gather32_i16", true),
LowerGSInfo("__pseudo_gather32_32", "__gather32_i32", true),
LowerGSInfo("__pseudo_gather32_64", "__gather32_i64", true),
LowerGSInfo("__pseudo_gather32_i8", "__gather32_i8", true),
LowerGSInfo("__pseudo_gather32_i16", "__gather32_i16", true),
LowerGSInfo("__pseudo_gather32_i32", "__gather32_i32", true),
LowerGSInfo("__pseudo_gather32_i64", "__gather32_i64", true),
LowerGSInfo("__pseudo_gather64_8", "__gather64_i8", true),
LowerGSInfo("__pseudo_gather64_16", "__gather64_i16", true),
LowerGSInfo("__pseudo_gather64_32", "__gather64_i32", true),
LowerGSInfo("__pseudo_gather64_64", "__gather64_i64", true),
LowerGSInfo("__pseudo_gather64_i8", "__gather64_i8", true),
LowerGSInfo("__pseudo_gather64_i16", "__gather64_i16", true),
LowerGSInfo("__pseudo_gather64_i32", "__gather64_i32", true),
LowerGSInfo("__pseudo_gather64_i64", "__gather64_i64", true),
LowerGSInfo("__pseudo_scatter_base_offsets32_8", "__scatter_base_offsets32_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_16", "__scatter_base_offsets32_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_32", "__scatter_base_offsets32_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_64", "__scatter_base_offsets32_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i8", "__scatter_base_offsets32_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i16", "__scatter_base_offsets32_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i32", "__scatter_base_offsets32_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets32_i64", "__scatter_base_offsets32_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_8", "__scatter_base_offsets64_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_16", "__scatter_base_offsets64_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_32", "__scatter_base_offsets64_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_64", "__scatter_base_offsets64_i64", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i8", "__scatter_base_offsets64_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i16", "__scatter_base_offsets64_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i32", "__scatter_base_offsets64_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets64_i64", "__scatter_base_offsets64_i64", false),
LowerGSInfo("__pseudo_scatter32_8", "__scatter32_i8", false),
LowerGSInfo("__pseudo_scatter32_16", "__scatter32_i16", false),
LowerGSInfo("__pseudo_scatter32_32", "__scatter32_i32", false),
LowerGSInfo("__pseudo_scatter32_64", "__scatter32_i64", false),
LowerGSInfo("__pseudo_scatter32_i8", "__scatter32_i8", false),
LowerGSInfo("__pseudo_scatter32_i16", "__scatter32_i16", false),
LowerGSInfo("__pseudo_scatter32_i32", "__scatter32_i32", false),
LowerGSInfo("__pseudo_scatter32_i64", "__scatter32_i64", false),
LowerGSInfo("__pseudo_scatter64_8", "__scatter64_i8", false),
LowerGSInfo("__pseudo_scatter64_16", "__scatter64_i16", false),
LowerGSInfo("__pseudo_scatter64_32", "__scatter64_i32", false),
LowerGSInfo("__pseudo_scatter64_64", "__scatter64_i64", false),
LowerGSInfo("__pseudo_scatter64_i8", "__scatter64_i8", false),
LowerGSInfo("__pseudo_scatter64_i16", "__scatter64_i16", false),
LowerGSInfo("__pseudo_scatter64_i32", "__scatter64_i32", false),
LowerGSInfo("__pseudo_scatter64_i64", "__scatter64_i64", false),
};
bool modifiedAny = false;