Fixing --opt=force-aligned-memory for LLVM 3.3+

This commit is contained in:
Dmitry Babokin
2013-12-04 19:00:02 +04:00
parent 4a53ed1201
commit 2d2d14744b
4 changed files with 72 additions and 9 deletions

30
ctx.cpp
View File

@@ -316,7 +316,11 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
llvm::BasicBlock *offBB = llvm::BasicBlock *offBB =
llvm::BasicBlock::Create(*g->ctx, "entry", llvm::BasicBlock::Create(*g->ctx, "entry",
(llvm::Function *)offFunc, 0); (llvm::Function *)offFunc, 0);
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB); llvm::StoreInst *inst =
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
if (g->opt.forceAlignedMemory) {
inst->setAlignment(g->target->getNativeVectorAlignment());
}
llvm::ReturnInst::Create(*g->ctx, offBB); llvm::ReturnInst::Create(*g->ctx, offBB);
} }
@@ -2437,7 +2441,13 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
if (name == NULL) if (name == NULL)
name = LLVMGetName(ptr, "_load"); name = LLVMGetName(ptr, "_load");
llvm::Instruction *inst = new llvm::LoadInst(ptr, name, bblock); llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
if (g->opt.forceAlignedMemory &&
llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
inst->setAlignment(g->target->getNativeVectorAlignment());
}
AddDebugPos(inst); AddDebugPos(inst);
return inst; return inst;
} }
@@ -2719,7 +2729,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock); inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
// If no alignment was specified but we have an array of a uniform // If no alignment was specified but we have an array of a uniform
// type, then align it to 4 * the native vector width; it's not // type, then align it to the native vector alignment; it's not
// unlikely that this array will be loaded into varying variables with // unlikely that this array will be loaded into varying variables with
// what will be aligned accesses if the uniform -> varying load is done // what will be aligned accesses if the uniform -> varying load is done
// in regular chunks. // in regular chunks.
@@ -2727,7 +2737,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
llvm::dyn_cast<llvm::ArrayType>(llvmType); llvm::dyn_cast<llvm::ArrayType>(llvmType);
if (align == 0 && arrayType != NULL && if (align == 0 && arrayType != NULL &&
!llvm::isa<llvm::VectorType>(arrayType->getElementType())) !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
align = 4 * g->target->getNativeVectorWidth(); align = g->target->getNativeVectorAlignment();
if (align != 0) if (align != 0)
inst->setAlignment(align); inst->setAlignment(align);
@@ -2986,7 +2996,17 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
return; return;
} }
llvm::Instruction *inst = new llvm::StoreInst(value, ptr, bblock); llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(ptr->getType());
AssertPos(currentPos, pt != NULL);
llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
if (g->opt.forceAlignedMemory &&
llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
inst->setAlignment(g->target->getNativeVectorAlignment());
}
AddDebugPos(inst); AddDebugPos(inst);
} }

View File

@@ -191,6 +191,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_tf_attributes(NULL), m_tf_attributes(NULL),
#endif #endif
m_nativeVectorWidth(-1), m_nativeVectorWidth(-1),
m_nativeVectorAlignment(-1),
m_dataTypeWidth(-1), m_dataTypeWidth(-1),
m_vectorWidth(-1), m_vectorWidth(-1),
m_generatePIC(pic), m_generatePIC(pic),
@@ -309,6 +310,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "sse2-i32x4")) { !strcasecmp(isa, "sse2-i32x4")) {
this->m_isa = Target::SSE2; this->m_isa = Target::SSE2;
this->m_nativeVectorWidth = 4; this->m_nativeVectorWidth = 4;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
@@ -325,6 +327,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "sse2-i32x8")) { !strcasecmp(isa, "sse2-i32x8")) {
this->m_isa = Target::SSE2; this->m_isa = Target::SSE2;
this->m_nativeVectorWidth = 4; this->m_nativeVectorWidth = 4;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
@@ -341,6 +344,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "sse4-i32x4")) { !strcasecmp(isa, "sse4-i32x4")) {
this->m_isa = Target::SSE4; this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 4; this->m_nativeVectorWidth = 4;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
// TODO: why not sse42 and popcnt? // TODO: why not sse42 and popcnt?
@@ -359,6 +363,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "sse4-i32x8")) { !strcasecmp(isa, "sse4-i32x8")) {
this->m_isa = Target::SSE4; this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 4; this->m_nativeVectorWidth = 4;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
@@ -374,6 +379,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "sse4-i8x16")) { else if (!strcasecmp(isa, "sse4-i8x16")) {
this->m_isa = Target::SSE4; this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 16; this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 8; this->m_dataTypeWidth = 8;
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
@@ -389,6 +395,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "sse4-i16x8")) { else if (!strcasecmp(isa, "sse4-i16x8")) {
this->m_isa = Target::SSE4; this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 16; this->m_dataTypeWidth = 16;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
@@ -405,6 +412,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "generic-x4")) { !strcasecmp(isa, "generic-x4")) {
this->m_isa = Target::GENERIC; this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 4; this->m_nativeVectorWidth = 4;
this->m_nativeVectorAlignment = 16;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_maskingIsFree = true; this->m_maskingIsFree = true;
this->m_maskBitCount = 1; this->m_maskBitCount = 1;
@@ -416,6 +424,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "generic-x8")) { !strcasecmp(isa, "generic-x8")) {
this->m_isa = Target::GENERIC; this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_maskingIsFree = true; this->m_maskingIsFree = true;
this->m_maskBitCount = 1; this->m_maskBitCount = 1;
@@ -427,6 +436,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "generic-x16")) { !strcasecmp(isa, "generic-x16")) {
this->m_isa = Target::GENERIC; this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 16; this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 64;
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_maskingIsFree = true; this->m_maskingIsFree = true;
this->m_maskBitCount = 1; this->m_maskBitCount = 1;
@@ -438,6 +448,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "generic-x32")) { !strcasecmp(isa, "generic-x32")) {
this->m_isa = Target::GENERIC; this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 32; this->m_nativeVectorWidth = 32;
this->m_nativeVectorAlignment = 64;
this->m_vectorWidth = 32; this->m_vectorWidth = 32;
this->m_maskingIsFree = true; this->m_maskingIsFree = true;
this->m_maskBitCount = 1; this->m_maskBitCount = 1;
@@ -449,6 +460,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "generic-x64")) { !strcasecmp(isa, "generic-x64")) {
this->m_isa = Target::GENERIC; this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 64; this->m_nativeVectorWidth = 64;
this->m_nativeVectorAlignment = 64;
this->m_vectorWidth = 64; this->m_vectorWidth = 64;
this->m_maskingIsFree = true; this->m_maskingIsFree = true;
this->m_maskBitCount = 1; this->m_maskBitCount = 1;
@@ -460,6 +472,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "generic-x1")) { !strcasecmp(isa, "generic-x1")) {
this->m_isa = Target::GENERIC; this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 1; this->m_nativeVectorWidth = 1;
this->m_nativeVectorAlignment = 16;
this->m_vectorWidth = 1; this->m_vectorWidth = 1;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
@@ -467,6 +480,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "avx1-i32x4")) { else if (!strcasecmp(isa, "avx1-i32x4")) {
this->m_isa = Target::AVX; this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_attributes = "+avx,+popcnt,+cmov"; this->m_attributes = "+avx,+popcnt,+cmov";
@@ -478,6 +492,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx1-i32x8")) { !strcasecmp(isa, "avx1-i32x8")) {
this->m_isa = Target::AVX; this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+avx,+popcnt,+cmov"; this->m_attributes = "+avx,+popcnt,+cmov";
@@ -488,6 +503,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx1-i64x4")) { !strcasecmp(isa, "avx1-i64x4")) {
this->m_isa = Target::AVX; this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 64; this->m_dataTypeWidth = 64;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_attributes = "+avx,+popcnt,+cmov"; this->m_attributes = "+avx,+popcnt,+cmov";
@@ -499,6 +515,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx1-i32x16")) { !strcasecmp(isa, "avx1-i32x16")) {
this->m_isa = Target::AVX; this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_attributes = "+avx,+popcnt,+cmov"; this->m_attributes = "+avx,+popcnt,+cmov";
@@ -509,6 +526,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx1.1-i32x8")) { !strcasecmp(isa, "avx1.1-i32x8")) {
this->m_isa = Target::AVX11; this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c" this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
@@ -530,6 +548,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx1.1-i32x16")) { !strcasecmp(isa, "avx1.1-i32x16")) {
this->m_isa = Target::AVX11; this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c" this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
@@ -550,6 +569,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "avx1.1-i64x4")) { else if (!strcasecmp(isa, "avx1.1-i64x4")) {
this->m_isa = Target::AVX11; this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 64; this->m_dataTypeWidth = 64;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c" this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
@@ -571,6 +591,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx2-i32x8")) { !strcasecmp(isa, "avx2-i32x8")) {
this->m_isa = Target::AVX2; this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
@@ -596,6 +617,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "avx2-i32x16")) { !strcasecmp(isa, "avx2-i32x16")) {
this->m_isa = Target::AVX2; this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 16; this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
@@ -620,6 +642,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "avx2-i64x4")) { else if (!strcasecmp(isa, "avx2-i64x4")) {
this->m_isa = Target::AVX2; this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
this->m_nativeVectorAlignment = 32;
this->m_dataTypeWidth = 64; this->m_dataTypeWidth = 64;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
@@ -645,6 +668,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "neon-i8x16")) { else if (!strcasecmp(isa, "neon-i8x16")) {
this->m_isa = Target::NEON8; this->m_isa = Target::NEON8;
this->m_nativeVectorWidth = 16; this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 8; this->m_dataTypeWidth = 8;
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_attributes = "+neon,+fp16"; this->m_attributes = "+neon,+fp16";
@@ -655,6 +679,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
else if (!strcasecmp(isa, "neon-i16x8")) { else if (!strcasecmp(isa, "neon-i16x8")) {
this->m_isa = Target::NEON16; this->m_isa = Target::NEON16;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 16; this->m_dataTypeWidth = 16;
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_attributes = "+neon,+fp16"; this->m_attributes = "+neon,+fp16";
@@ -666,6 +691,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
!strcasecmp(isa, "neon-i32x4")) { !strcasecmp(isa, "neon-i32x4")) {
this->m_isa = Target::NEON32; this->m_isa = Target::NEON32;
this->m_nativeVectorWidth = 4; this->m_nativeVectorWidth = 4;
this->m_nativeVectorAlignment = 16;
this->m_dataTypeWidth = 32; this->m_dataTypeWidth = 32;
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_attributes = "+neon,+fp16"; this->m_attributes = "+neon,+fp16";

9
ispc.h
View File

@@ -260,6 +260,8 @@ public:
int getNativeVectorWidth() const {return m_nativeVectorWidth;} int getNativeVectorWidth() const {return m_nativeVectorWidth;}
int getNativeVectorAlignment() const {return m_nativeVectorAlignment;}
int getDataTypeWidth() const {return m_dataTypeWidth;} int getDataTypeWidth() const {return m_dataTypeWidth;}
int getVectorWidth() const {return m_vectorWidth;} int getVectorWidth() const {return m_vectorWidth;}
@@ -332,6 +334,13 @@ private:
SSE, 8 for AVX, etc.) */ SSE, 8 for AVX, etc.) */
int m_nativeVectorWidth; int m_nativeVectorWidth;
/** Native vector alignment in bytes. Theoretically this may be derived
from the vector size, but it's better to manage directly the alignement.
It allows easier experimenting and better fine tuning for particular
platform. This information is primatily used when
--opt=force-aligned-memory is used. */
int m_nativeVectorAlignment;
/** Data type with in bits. Typically it's 32, but could be 8, 16 or 64. /** Data type with in bits. Typically it's 32, but could be 8, 16 or 64.
For generic it's -1, which means undefined. */ For generic it's -1, which means undefined. */
int m_dataTypeWidth; int m_dataTypeWidth;

16
opt.cpp
View File

@@ -904,7 +904,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
lCopyMetadata(castPtr, callInst); lCopyMetadata(castPtr, callInst);
int align; int align;
if (g->opt.forceAlignedMemory) if (g->opt.forceAlignedMemory)
align = 0; align = g->target->getNativeVectorAlignment();
else else
align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
name = LLVMGetName(callInst->getArgOperand(0), "_load"); name = LLVMGetName(callInst->getArgOperand(0), "_load");
@@ -946,7 +946,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
int align; int align;
if (g->opt.forceAlignedMemory) if (g->opt.forceAlignedMemory)
align = 0; align = g->target->getNativeVectorAlignment();
else else
align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
storeInst->setAlignment(align); storeInst->setAlignment(align);
@@ -2758,7 +2758,8 @@ lImproveMaskedStore(llvm::CallInst *callInst) {
lCopyMetadata(lvalue, callInst); lCopyMetadata(lvalue, callInst);
llvm::Instruction *store = llvm::Instruction *store =
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
g->opt.forceAlignedMemory ? 0 : info->align); g->opt.forceAlignedMemory ?
g->target->getNativeVectorAlignment() : info->align);
lCopyMetadata(store, callInst); lCopyMetadata(store, callInst);
llvm::ReplaceInstWithInst(callInst, store); llvm::ReplaceInstWithInst(callInst, store);
return true; return true;
@@ -2821,7 +2822,8 @@ lImproveMaskedLoad(llvm::CallInst *callInst,
callInst); callInst);
llvm::Instruction *load = llvm::Instruction *load =
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */, new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
g->opt.forceAlignedMemory ? 0 : info->align, g->opt.forceAlignedMemory ?
g->target->getNativeVectorAlignment() : info->align,
(llvm::Instruction *)NULL); (llvm::Instruction *)NULL);
lCopyMetadata(load, callInst); lCopyMetadata(load, callInst);
llvm::ReplaceInstWithInst(callInst, load); llvm::ReplaceInstWithInst(callInst, load);
@@ -3226,6 +3228,9 @@ lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
} }
case 4: { case 4: {
// 4-wide vector load // 4-wide vector load
if (g->opt.forceAlignedMemory) {
align = g->target->getNativeVectorAlignment();
}
llvm::VectorType *vt = llvm::VectorType *vt =
llvm::VectorType::get(LLVMTypes::Int32Type, 4); llvm::VectorType::get(LLVMTypes::Int32Type, 4);
loadOps[i].load = lGEPAndLoad(basePtr, start, align, loadOps[i].load = lGEPAndLoad(basePtr, start, align,
@@ -3234,6 +3239,9 @@ lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
} }
case 8: { case 8: {
// 8-wide vector load // 8-wide vector load
if (g->opt.forceAlignedMemory) {
align = g->target->getNativeVectorAlignment();
}
llvm::VectorType *vt = llvm::VectorType *vt =
llvm::VectorType::get(LLVMTypes::Int32Type, 8); llvm::VectorType::get(LLVMTypes::Int32Type, 8);
loadOps[i].load = lGEPAndLoad(basePtr, start, align, loadOps[i].load = lGEPAndLoad(basePtr, start, align,