Remove load_and_broadcast from built-ins.
Now that we never ever run with the mask all off, we no longer need that logic in a built-in function so that we can check the mask. In the one place where it was used (turning gathers to the same location into a load and broadcast), we now just emit the code for that directly.
This commit is contained in:
@@ -353,13 +353,6 @@ define i64 @__reduce_max_uint64(<16 x i64>) nounwind readnone alwaysinline {
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
; no masked load instruction for i8 and i16 types??
|
; no masked load instruction for i8 and i16 types??
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
masked_load(i16, 2)
|
masked_load(i16, 2)
|
||||||
|
|||||||
@@ -334,12 +334,6 @@ define i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinline {
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
; no masked load instruction for i8 and i16 types??
|
; no masked load instruction for i8 and i16 types??
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
|
|||||||
@@ -21,12 +21,6 @@ gen_masked_store(i64)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
masked_load(i16, 2)
|
masked_load(i16, 2)
|
||||||
|
|||||||
@@ -230,12 +230,6 @@ declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
declare <WIDTH x i8> @__masked_load_i8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
declare <WIDTH x i8> @__masked_load_i8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||||
declare <WIDTH x i16> @__masked_load_i16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
declare <WIDTH x i16> @__masked_load_i16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||||
|
|||||||
@@ -433,12 +433,6 @@ reduce_equal(8)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
masked_load(i16, 2)
|
masked_load(i16, 2)
|
||||||
|
|||||||
@@ -562,12 +562,6 @@ gen_masked_store(i64)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
masked_load(i16, 2)
|
masked_load(i16, 2)
|
||||||
|
|||||||
@@ -360,12 +360,6 @@ reduce_equal(8)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
masked_load(i16, 2)
|
masked_load(i16, 2)
|
||||||
|
|||||||
@@ -461,12 +461,6 @@ masked_store_float_double()
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
load_and_broadcast(i8)
|
|
||||||
load_and_broadcast(i16)
|
|
||||||
load_and_broadcast(i32)
|
|
||||||
load_and_broadcast(float)
|
|
||||||
load_and_broadcast(i64)
|
|
||||||
load_and_broadcast(double)
|
|
||||||
|
|
||||||
masked_load(i8, 1)
|
masked_load(i8, 1)
|
||||||
masked_load(i16, 2)
|
masked_load(i16, 2)
|
||||||
|
|||||||
@@ -1796,19 +1796,6 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
|||||||
%mld = call <WIDTH x double> @__masked_load_double(i8 * %ptr, <WIDTH x MASK> %mask)
|
%mld = call <WIDTH x double> @__masked_load_double(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||||
call void @__usedouble(<WIDTH x double> %mld)
|
call void @__usedouble(<WIDTH x double> %mld)
|
||||||
|
|
||||||
%lb8 = call <WIDTH x i8> @__load_and_broadcast_i8(i8 * %ptr, <WIDTH x MASK> %mask)
|
|
||||||
call void @__use8(<WIDTH x i8> %lb8)
|
|
||||||
%lb16 = call <WIDTH x i16> @__load_and_broadcast_i16(i8 * %ptr, <WIDTH x MASK> %mask)
|
|
||||||
call void @__use16(<WIDTH x i16> %lb16)
|
|
||||||
%lb32 = call <WIDTH x i32> @__load_and_broadcast_i32(i8 * %ptr, <WIDTH x MASK> %mask)
|
|
||||||
call void @__use32(<WIDTH x i32> %lb32)
|
|
||||||
%lbf = call <WIDTH x float> @__load_and_broadcast_float(i8 * %ptr, <WIDTH x MASK> %mask)
|
|
||||||
call void @__usefloat(<WIDTH x float> %lbf)
|
|
||||||
%lb64 = call <WIDTH x i64> @__load_and_broadcast_i64(i8 * %ptr, <WIDTH x MASK> %mask)
|
|
||||||
call void @__use64(<WIDTH x i64> %lb64)
|
|
||||||
%lbd = call <WIDTH x double> @__load_and_broadcast_double(i8 * %ptr, <WIDTH x MASK> %mask)
|
|
||||||
call void @__usedouble(<WIDTH x double> %lbd)
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; stores
|
;; stores
|
||||||
%pv8 = bitcast i8 * %ptr to <WIDTH x i8> *
|
%pv8 = bitcast i8 * %ptr to <WIDTH x i8> *
|
||||||
@@ -2680,23 +2667,6 @@ i64minmax(WIDTH,min,uint64,ult)
|
|||||||
i64minmax(WIDTH,max,uint64,ugt)
|
i64minmax(WIDTH,max,uint64,ugt)
|
||||||
')
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; Emit code to safely load a scalar value and broadcast it across the
|
|
||||||
;; elements of a vector. Parameter:
|
|
||||||
;; $1: element type for which to emit the function (i32, i64, ...)
|
|
||||||
|
|
||||||
define(`load_and_broadcast', `
|
|
||||||
define <WIDTH x $1> @__load_and_broadcast_$1(i8 *, <WIDTH x MASK> %mask) nounwind alwaysinline {
|
|
||||||
%ptr = bitcast i8 * %0 to $1 *
|
|
||||||
%val = load $1 * %ptr
|
|
||||||
|
|
||||||
%ret0 = insertelement <WIDTH x $1> undef, $1 %val, i32 0
|
|
||||||
forloop(i, 1, eval(WIDTH-1), `
|
|
||||||
%ret`'i = insertelement <WIDTH x $1> %ret`'eval(i-1), $1 %val, i32 i')
|
|
||||||
ret <WIDTH x $1> %ret`'eval(WIDTH-1)
|
|
||||||
}
|
|
||||||
')
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Emit general-purpose code to do a masked load for targets that dont have
|
;; Emit general-purpose code to do a masked load for targets that dont have
|
||||||
;; an instruction to do that. Parameters:
|
;; an instruction to do that. Parameters:
|
||||||
|
|||||||
86
opt.cpp
86
opt.cpp
@@ -2266,19 +2266,18 @@ char GSToLoadStorePass::ID = 0;
|
|||||||
|
|
||||||
|
|
||||||
struct GatherImpInfo {
|
struct GatherImpInfo {
|
||||||
GatherImpInfo(const char *pName, const char *lbName, const char *lmName,
|
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
|
||||||
int a)
|
int a)
|
||||||
: align(a) {
|
: align(a) {
|
||||||
pseudoFunc = m->module->getFunction(pName);
|
pseudoFunc = m->module->getFunction(pName);
|
||||||
loadBroadcastFunc = m->module->getFunction(lbName);
|
|
||||||
loadMaskedFunc = m->module->getFunction(lmName);
|
loadMaskedFunc = m->module->getFunction(lmName);
|
||||||
|
Assert(pseudoFunc != NULL && loadMaskedFunc != NULL);
|
||||||
Assert(pseudoFunc != NULL && loadBroadcastFunc != NULL &&
|
scalarType = st;
|
||||||
loadMaskedFunc != NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Function *pseudoFunc;
|
llvm::Function *pseudoFunc;
|
||||||
llvm::Function *loadBroadcastFunc;
|
|
||||||
llvm::Function *loadMaskedFunc;
|
llvm::Function *loadMaskedFunc;
|
||||||
|
llvm::Type *scalarType;
|
||||||
const int align;
|
const int align;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2312,30 +2311,30 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
DEBUG_START_PASS("GSToLoadStorePass");
|
DEBUG_START_PASS("GSToLoadStorePass");
|
||||||
|
|
||||||
GatherImpInfo gInfo[] = {
|
GatherImpInfo gInfo[] = {
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__load_and_broadcast_i8",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__masked_load_i8",
|
||||||
"__masked_load_i8", 1),
|
LLVMTypes::Int8Type, 1),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_i16", "__load_and_broadcast_i16",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i16", "__masked_load_i16",
|
||||||
"__masked_load_i16", 2),
|
LLVMTypes::Int16Type, 2),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_i32", "__load_and_broadcast_i32",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i32", "__masked_load_i32",
|
||||||
"__masked_load_i32", 4),
|
LLVMTypes::Int32Type, 4),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_float", "__load_and_broadcast_float",
|
GatherImpInfo("__pseudo_gather_base_offsets32_float", "__masked_load_float",
|
||||||
"__masked_load_float", 4),
|
LLVMTypes::FloatType, 4),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_i64", "__load_and_broadcast_i64",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i64", "__masked_load_i64",
|
||||||
"__masked_load_i64", 8),
|
LLVMTypes::Int64Type, 8),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_double", "__load_and_broadcast_double",
|
GatherImpInfo("__pseudo_gather_base_offsets32_double", "__masked_load_double",
|
||||||
"__masked_load_double", 8),
|
LLVMTypes::DoubleType, 8),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_i8", "__load_and_broadcast_i8",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i8", "__masked_load_i8",
|
||||||
"__masked_load_i8", 1),
|
LLVMTypes::Int8Type, 1),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_i16", "__load_and_broadcast_i16",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i16", "__masked_load_i16",
|
||||||
"__masked_load_i16", 2),
|
LLVMTypes::Int16Type, 2),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_i32", "__load_and_broadcast_i32",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i32", "__masked_load_i32",
|
||||||
"__masked_load_i32", 4),
|
LLVMTypes::Int32Type, 4),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_float", "__load_and_broadcast_float",
|
GatherImpInfo("__pseudo_gather_base_offsets64_float", "__masked_load_float",
|
||||||
"__masked_load_float", 4),
|
LLVMTypes::FloatType, 4),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_i64", "__load_and_broadcast_i64",
|
GatherImpInfo("__pseudo_gather_base_offsets64_i64", "__masked_load_i64",
|
||||||
"__masked_load_i64", 8),
|
LLVMTypes::Int64Type, 8),
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__load_and_broadcast_double",
|
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__masked_load_double",
|
||||||
"__masked_load_double", 8)
|
LLVMTypes::DoubleType, 8)
|
||||||
};
|
};
|
||||||
ScatterImpInfo sInfo[] = {
|
ScatterImpInfo sInfo[] = {
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
|
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
|
||||||
@@ -2443,17 +2442,23 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
if (gatherInfo != NULL) {
|
if (gatherInfo != NULL) {
|
||||||
// A gather with everyone going to the same location is
|
// A gather with everyone going to the same location is
|
||||||
// handled as a scalar load and broadcast across the lanes.
|
// handled as a scalar load and broadcast across the lanes.
|
||||||
// Note that we do still have to pass the mask to the
|
|
||||||
// __load_and_broadcast_* functions, since they shouldn't
|
|
||||||
// access memory if the mask is all off (the location may
|
|
||||||
// be invalid in that case).
|
|
||||||
Debug(pos, "Transformed gather to scalar load and broadcast!");
|
Debug(pos, "Transformed gather to scalar load and broadcast!");
|
||||||
llvm::Instruction *newCall =
|
|
||||||
lCallInst(gatherInfo->loadBroadcastFunc, ptr, mask,
|
|
||||||
LLVMGetName(callInst, "_broadcast"));
|
|
||||||
lCopyMetadata(newCall, callInst);
|
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
|
||||||
|
|
||||||
|
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(gatherInfo->scalarType, 0),
|
||||||
|
ptr->getName(), callInst);
|
||||||
|
llvm::Value *scalarValue = new llvm::LoadInst(ptr, callInst->getName(), callInst);
|
||||||
|
llvm::Value *vecValue = llvm::UndefValue::get(callInst->getType());
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
||||||
|
if (i < g->target.vectorWidth - 1)
|
||||||
|
vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i),
|
||||||
|
callInst->getName(), callInst);
|
||||||
|
else
|
||||||
|
vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i),
|
||||||
|
callInst->getName());
|
||||||
|
}
|
||||||
|
lCopyMetadata(vecValue, callInst);
|
||||||
|
llvm::ReplaceInstWithInst(callInst,
|
||||||
|
llvm::dyn_cast<llvm::Instruction>(vecValue));
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
@@ -3894,9 +3899,6 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
|||||||
"__gather_elt64_i8", "__gather_elt64_i16",
|
"__gather_elt64_i8", "__gather_elt64_i16",
|
||||||
"__gather_elt64_i32", "__gather_elt64_i64",
|
"__gather_elt64_i32", "__gather_elt64_i64",
|
||||||
"__gather_elt64_float", "__gather_elt64_double",
|
"__gather_elt64_float", "__gather_elt64_double",
|
||||||
"__load_and_broadcast_i8", "__load_and_broadcast_i16",
|
|
||||||
"__load_and_broadcast_i32", "__load_and_broadcast_i64",
|
|
||||||
"__load_and_broadcast_float", "__load_and_broadcast_double",
|
|
||||||
"__masked_load_i8", "__masked_load_i16",
|
"__masked_load_i8", "__masked_load_i16",
|
||||||
"__masked_load_i32", "__masked_load_i64",
|
"__masked_load_i32", "__masked_load_i64",
|
||||||
"__masked_load_float", "__masked_load_double",
|
"__masked_load_float", "__masked_load_double",
|
||||||
|
|||||||
Reference in New Issue
Block a user