diff --git a/ctx.cpp b/ctx.cpp index 27e2f0f4..9c72fd3d 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1644,7 +1644,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, return; } - llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, name, bblock); + llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, + 4, bblock); AddDebugPos(inst); } @@ -1662,7 +1663,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, // Figure out what kind of store we're doing here if (rvalueType->IsUniformType()) { // The easy case; a regular store - llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, name, bblock); + llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, + 4, bblock); AddDebugPos(si); } else if (llvm::isa(lvalue->getType())) @@ -1673,7 +1675,7 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, // Otherwise it is a masked store unless we can determine that the // mask is all on... llvm::Instruction *si = - new llvm::StoreInst(rvalue, lvalue, name, bblock); + new llvm::StoreInst(rvalue, lvalue, false /*not volatile*/, 4, bblock); AddDebugPos(si); } else diff --git a/opt.cpp b/opt.cpp index 583e8324..69e75247 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1131,10 +1131,17 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { } else if (maskAsInt == allOnMask) { // The mask is all on, so turn this into a regular store - const llvm::Type *ptrType = llvm::PointerType::get(rvalue->getType(), 0); + const llvm::Type *rvalueType = rvalue->getType(); + const llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0); + // Need to update this when int8/int16 are added + int align = (called == pms32Func || called == pms64Func || + called == msb32Func) ? 4 : 8; + lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst); lCopyMetadata(lvalue, callInst); - llvm::Instruction *store = new llvm::StoreInst(rvalue, lvalue); + llvm::Instruction *store = + new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, + align); lCopyMetadata(store, callInst); llvm::ReplaceInstWithInst(callInst, store); diff --git a/stdlib-avx.ll b/stdlib-avx.ll index 3d125a7e..5ad79adf 100644 --- a/stdlib-avx.ll +++ b/stdlib-avx.ll @@ -520,7 +520,7 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>, <8 x float> %newAsFloat, <8 x float> %mask_as_float) %blendAsInt = bitcast <8 x float> %blend to <8 x i32> - store <8 x i32> %blendAsInt, <8 x i32>* %0 + store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4 ret void } diff --git a/stdlib-sse2.ll b/stdlib-sse2.ll index 654e81f1..a67584f9 100644 --- a/stdlib-sse2.ll +++ b/stdlib-sse2.ll @@ -280,7 +280,7 @@ define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>, <4 x i32> %mask) nounwind alwaysinline { %val = load <4 x i32> * %0 %newval = call <4 x i32> @__vselect_i32(<4 x i32> %val, <4 x i32> %1, <4 x i32> %mask) - store <4 x i32> %newval, <4 x i32> * %0 + store <4 x i32> %newval, <4 x i32> * %0, align 4 ret void } @@ -322,7 +322,7 @@ define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new, ; reconstruct the final <4 x i64> vector %final = shufflevector <2 x i64> %result01, <2 x i64> %result23, <4 x i32> - store <4 x i64> %final, <4 x i64> * %ptr + store <4 x i64> %final, <4 x i64> * %ptr, align 8 ret void } diff --git a/stdlib-sse4.ll b/stdlib-sse4.ll index f28dc35d..68b8dd90 100644 --- a/stdlib-sse4.ll +++ b/stdlib-sse4.ll @@ -195,7 +195,7 @@ define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>, <4 x float> %newAsFloat, <4 x float> %mask_as_float) %blendAsInt = bitcast <4 x float> %blend to <4 x i32> - store <4 x i32> %blendAsInt, <4 x i32>* %0 + store <4 x i32> %blendAsInt, <4 x i32>* %0, align 4 ret void } @@ -243,6 +243,6 @@ define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new, ; reconstruct the final <4 x i64> vector %final = shufflevector <2 x i64> %result01, <2 x i64> %result23, <4 x i32> - store <4 x i64> %final, <4 x i64> * %ptr + store <4 x i64> %final, <4 x i64> * %ptr, align 8 ret void } diff --git a/stdlib-sse4x2.ll b/stdlib-sse4x2.ll index c97fd8ce..39410eca 100644 --- a/stdlib-sse4x2.ll +++ b/stdlib-sse4x2.ll @@ -584,7 +584,7 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>, %blend = shufflevector <4 x float> %blend_a, <4 x float> %blend_b, <8 x i32> %blendAsInt = bitcast <8 x float> %blend to <8 x i32> - store <8 x i32> %blendAsInt, <8 x i32>* %0 + store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4 ret void } @@ -651,7 +651,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new, <4 x i32> %final = shufflevector <4 x i64> %final0123, <4 x i64> %final4567, <8 x i32> - store <8 x i64> %final, <8 x i64> * %ptr + store <8 x i64> %final, <8 x i64> * %ptr, align 8 ret void } diff --git a/stdlib.m4 b/stdlib.m4 index b098e131..b437ec19 100644 --- a/stdlib.m4 +++ b/stdlib.m4 @@ -544,7 +544,7 @@ all_on: ;; vector load %vecptr = bitcast i32 *%startptr to <$1 x i32> * %vec_load = load <$1 x i32> *%vecptr, align 4 - store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr + store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr, align 4 ret i32 $1 not_all_on: