Fixing --opt=force-aligned-memory for LLVM 3.3+
This commit is contained in:
30
ctx.cpp
30
ctx.cpp
@@ -316,7 +316,11 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
llvm::BasicBlock *offBB =
|
||||
llvm::BasicBlock::Create(*g->ctx, "entry",
|
||||
(llvm::Function *)offFunc, 0);
|
||||
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
|
||||
llvm::StoreInst *inst =
|
||||
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
|
||||
if (g->opt.forceAlignedMemory) {
|
||||
inst->setAlignment(g->target->getNativeVectorAlignment());
|
||||
}
|
||||
llvm::ReturnInst::Create(*g->ctx, offBB);
|
||||
}
|
||||
|
||||
@@ -2437,7 +2441,13 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
|
||||
if (name == NULL)
|
||||
name = LLVMGetName(ptr, "_load");
|
||||
|
||||
llvm::Instruction *inst = new llvm::LoadInst(ptr, name, bblock);
|
||||
llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
|
||||
|
||||
if (g->opt.forceAlignedMemory &&
|
||||
llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
|
||||
inst->setAlignment(g->target->getNativeVectorAlignment());
|
||||
}
|
||||
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
}
|
||||
@@ -2719,7 +2729,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
|
||||
inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
|
||||
|
||||
// If no alignment was specified but we have an array of a uniform
|
||||
// type, then align it to 4 * the native vector width; it's not
|
||||
// type, then align it to the native vector alignment; it's not
|
||||
// unlikely that this array will be loaded into varying variables with
|
||||
// what will be aligned accesses if the uniform -> varying load is done
|
||||
// in regular chunks.
|
||||
@@ -2727,7 +2737,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
|
||||
llvm::dyn_cast<llvm::ArrayType>(llvmType);
|
||||
if (align == 0 && arrayType != NULL &&
|
||||
!llvm::isa<llvm::VectorType>(arrayType->getElementType()))
|
||||
align = 4 * g->target->getNativeVectorWidth();
|
||||
align = g->target->getNativeVectorAlignment();
|
||||
|
||||
if (align != 0)
|
||||
inst->setAlignment(align);
|
||||
@@ -2986,7 +2996,17 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::Instruction *inst = new llvm::StoreInst(value, ptr, bblock);
|
||||
llvm::PointerType *pt =
|
||||
llvm::dyn_cast<llvm::PointerType>(ptr->getType());
|
||||
AssertPos(currentPos, pt != NULL);
|
||||
|
||||
llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
|
||||
|
||||
if (g->opt.forceAlignedMemory &&
|
||||
llvm::dyn_cast<llvm::VectorType>(pt->getElementType())) {
|
||||
inst->setAlignment(g->target->getNativeVectorAlignment());
|
||||
}
|
||||
|
||||
AddDebugPos(inst);
|
||||
}
|
||||
|
||||
|
||||
26
ispc.cpp
26
ispc.cpp
@@ -191,6 +191,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
m_tf_attributes(NULL),
|
||||
#endif
|
||||
m_nativeVectorWidth(-1),
|
||||
m_nativeVectorAlignment(-1),
|
||||
m_dataTypeWidth(-1),
|
||||
m_vectorWidth(-1),
|
||||
m_generatePIC(pic),
|
||||
@@ -309,6 +310,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "sse2-i32x4")) {
|
||||
this->m_isa = Target::SSE2;
|
||||
this->m_nativeVectorWidth = 4;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
|
||||
@@ -325,6 +327,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "sse2-i32x8")) {
|
||||
this->m_isa = Target::SSE2;
|
||||
this->m_nativeVectorWidth = 4;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt"
|
||||
@@ -341,6 +344,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "sse4-i32x4")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
this->m_nativeVectorWidth = 4;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
// TODO: why not sse42 and popcnt?
|
||||
@@ -359,6 +363,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "sse4-i32x8")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
this->m_nativeVectorWidth = 4;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
@@ -374,6 +379,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "sse4-i8x16")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
this->m_nativeVectorWidth = 16;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 8;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
@@ -389,6 +395,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "sse4-i16x8")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 16;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov"
|
||||
@@ -405,6 +412,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "generic-x4")) {
|
||||
this->m_isa = Target::GENERIC;
|
||||
this->m_nativeVectorWidth = 4;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_maskingIsFree = true;
|
||||
this->m_maskBitCount = 1;
|
||||
@@ -416,6 +424,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "generic-x8")) {
|
||||
this->m_isa = Target::GENERIC;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_maskingIsFree = true;
|
||||
this->m_maskBitCount = 1;
|
||||
@@ -427,6 +436,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "generic-x16")) {
|
||||
this->m_isa = Target::GENERIC;
|
||||
this->m_nativeVectorWidth = 16;
|
||||
this->m_nativeVectorAlignment = 64;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_maskingIsFree = true;
|
||||
this->m_maskBitCount = 1;
|
||||
@@ -438,6 +448,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "generic-x32")) {
|
||||
this->m_isa = Target::GENERIC;
|
||||
this->m_nativeVectorWidth = 32;
|
||||
this->m_nativeVectorAlignment = 64;
|
||||
this->m_vectorWidth = 32;
|
||||
this->m_maskingIsFree = true;
|
||||
this->m_maskBitCount = 1;
|
||||
@@ -449,6 +460,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "generic-x64")) {
|
||||
this->m_isa = Target::GENERIC;
|
||||
this->m_nativeVectorWidth = 64;
|
||||
this->m_nativeVectorAlignment = 64;
|
||||
this->m_vectorWidth = 64;
|
||||
this->m_maskingIsFree = true;
|
||||
this->m_maskBitCount = 1;
|
||||
@@ -460,6 +472,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "generic-x1")) {
|
||||
this->m_isa = Target::GENERIC;
|
||||
this->m_nativeVectorWidth = 1;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_vectorWidth = 1;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
@@ -467,6 +480,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "avx1-i32x4")) {
|
||||
this->m_isa = Target::AVX;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
@@ -478,6 +492,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx1-i32x8")) {
|
||||
this->m_isa = Target::AVX;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
@@ -488,6 +503,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx1-i64x4")) {
|
||||
this->m_isa = Target::AVX;
|
||||
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 64;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
@@ -499,6 +515,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx1-i32x16")) {
|
||||
this->m_isa = Target::AVX;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov";
|
||||
@@ -509,6 +526,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx1.1-i32x8")) {
|
||||
this->m_isa = Target::AVX11;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
@@ -530,6 +548,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx1.1-i32x16")) {
|
||||
this->m_isa = Target::AVX11;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
@@ -550,6 +569,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
|
||||
this->m_isa = Target::AVX11;
|
||||
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 64;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx,+popcnt,+cmov,+f16c"
|
||||
@@ -571,6 +591,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx2-i32x8")) {
|
||||
this->m_isa = Target::AVX2;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
@@ -596,6 +617,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "avx2-i32x16")) {
|
||||
this->m_isa = Target::AVX2;
|
||||
this->m_nativeVectorWidth = 16;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
@@ -620,6 +642,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "avx2-i64x4")) {
|
||||
this->m_isa = Target::AVX2;
|
||||
this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_dataTypeWidth = 64;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c"
|
||||
@@ -645,6 +668,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||
this->m_isa = Target::NEON8;
|
||||
this->m_nativeVectorWidth = 16;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 8;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_attributes = "+neon,+fp16";
|
||||
@@ -655,6 +679,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
else if (!strcasecmp(isa, "neon-i16x8")) {
|
||||
this->m_isa = Target::NEON16;
|
||||
this->m_nativeVectorWidth = 8;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 16;
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_attributes = "+neon,+fp16";
|
||||
@@ -666,6 +691,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
!strcasecmp(isa, "neon-i32x4")) {
|
||||
this->m_isa = Target::NEON32;
|
||||
this->m_nativeVectorWidth = 4;
|
||||
this->m_nativeVectorAlignment = 16;
|
||||
this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_attributes = "+neon,+fp16";
|
||||
|
||||
9
ispc.h
9
ispc.h
@@ -260,6 +260,8 @@ public:
|
||||
|
||||
int getNativeVectorWidth() const {return m_nativeVectorWidth;}
|
||||
|
||||
int getNativeVectorAlignment() const {return m_nativeVectorAlignment;}
|
||||
|
||||
int getDataTypeWidth() const {return m_dataTypeWidth;}
|
||||
|
||||
int getVectorWidth() const {return m_vectorWidth;}
|
||||
@@ -332,6 +334,13 @@ private:
|
||||
SSE, 8 for AVX, etc.) */
|
||||
int m_nativeVectorWidth;
|
||||
|
||||
/** Native vector alignment in bytes. Theoretically this may be derived
|
||||
from the vector size, but it's better to manage directly the alignement.
|
||||
It allows easier experimenting and better fine tuning for particular
|
||||
platform. This information is primatily used when
|
||||
--opt=force-aligned-memory is used. */
|
||||
int m_nativeVectorAlignment;
|
||||
|
||||
/** Data type with in bits. Typically it's 32, but could be 8, 16 or 64.
|
||||
For generic it's -1, which means undefined. */
|
||||
int m_dataTypeWidth;
|
||||
|
||||
16
opt.cpp
16
opt.cpp
@@ -904,7 +904,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
lCopyMetadata(castPtr, callInst);
|
||||
int align;
|
||||
if (g->opt.forceAlignedMemory)
|
||||
align = 0;
|
||||
align = g->target->getNativeVectorAlignment();
|
||||
else
|
||||
align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
|
||||
name = LLVMGetName(callInst->getArgOperand(0), "_load");
|
||||
@@ -946,7 +946,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
|
||||
int align;
|
||||
if (g->opt.forceAlignedMemory)
|
||||
align = 0;
|
||||
align = g->target->getNativeVectorAlignment();
|
||||
else
|
||||
align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
|
||||
storeInst->setAlignment(align);
|
||||
@@ -2758,7 +2758,8 @@ lImproveMaskedStore(llvm::CallInst *callInst) {
|
||||
lCopyMetadata(lvalue, callInst);
|
||||
llvm::Instruction *store =
|
||||
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||
g->opt.forceAlignedMemory ? 0 : info->align);
|
||||
g->opt.forceAlignedMemory ?
|
||||
g->target->getNativeVectorAlignment() : info->align);
|
||||
lCopyMetadata(store, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, store);
|
||||
return true;
|
||||
@@ -2821,7 +2822,8 @@ lImproveMaskedLoad(llvm::CallInst *callInst,
|
||||
callInst);
|
||||
llvm::Instruction *load =
|
||||
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
|
||||
g->opt.forceAlignedMemory ? 0 : info->align,
|
||||
g->opt.forceAlignedMemory ?
|
||||
g->target->getNativeVectorAlignment() : info->align,
|
||||
(llvm::Instruction *)NULL);
|
||||
lCopyMetadata(load, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, load);
|
||||
@@ -3226,6 +3228,9 @@ lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
|
||||
}
|
||||
case 4: {
|
||||
// 4-wide vector load
|
||||
if (g->opt.forceAlignedMemory) {
|
||||
align = g->target->getNativeVectorAlignment();
|
||||
}
|
||||
llvm::VectorType *vt =
|
||||
llvm::VectorType::get(LLVMTypes::Int32Type, 4);
|
||||
loadOps[i].load = lGEPAndLoad(basePtr, start, align,
|
||||
@@ -3234,6 +3239,9 @@ lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
|
||||
}
|
||||
case 8: {
|
||||
// 8-wide vector load
|
||||
if (g->opt.forceAlignedMemory) {
|
||||
align = g->target->getNativeVectorAlignment();
|
||||
}
|
||||
llvm::VectorType *vt =
|
||||
llvm::VectorType::get(LLVMTypes::Int32Type, 8);
|
||||
loadOps[i].load = lGEPAndLoad(basePtr, start, align,
|
||||
|
||||
Reference in New Issue
Block a user