This commit is contained in:
Jean-Luc Duprat
2012-09-14 14:12:49 -07:00
9 changed files with 44 additions and 29 deletions

View File

@@ -322,6 +322,8 @@ lSetInternalFunctions(llvm::Module *module) {
"__add_varying_double", "__add_varying_double",
"__add_varying_int32", "__add_varying_int32",
"__add_varying_int64", "__add_varying_int64",
"__all",
"__any",
"__aos_to_soa3_float", "__aos_to_soa3_float",
"__aos_to_soa3_float16", "__aos_to_soa3_float16",
"__aos_to_soa3_float4", "__aos_to_soa3_float4",
@@ -465,6 +467,7 @@ lSetInternalFunctions(llvm::Module *module) {
"__new_uniform", "__new_uniform",
"__new_varying32", "__new_varying32",
"__new_varying64", "__new_varying64",
"__none",
"__num_cores", "__num_cores",
"__packed_load_active", "__packed_load_active",
"__packed_store_active", "__packed_store_active",

View File

@@ -2203,6 +2203,10 @@ bool CWriter::doInitialization(llvm::Module &M) {
Out << "#undef ISPC_FAST_MATH\n"; Out << "#undef ISPC_FAST_MATH\n";
} }
if (g->opt.forceAlignedMemory) {
Out << "#define ISPC_FORCE_ALIGNED_MEMORY\n";
}
Out << "#include \"" << includeName << "\"\n"; Out << "#include \"" << includeName << "\"\n";
Out << "\n/* Basic Library Function Declarations */\n"; Out << "\n/* Basic Library Function Declarations */\n";

23
ctx.cpp
View File

@@ -2397,16 +2397,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
if (name == NULL) if (name == NULL)
name = LLVMGetName(ptr, "_load"); name = LLVMGetName(ptr, "_load");
// FIXME: it's not clear to me that we generate unaligned vector loads llvm::Instruction *inst = new llvm::LoadInst(ptr, name, bblock);
// of varying stuff out of the front-end any more. (Only by the
// optimization passes that lower gathers to vector loads, I think..)
// So remove this??
int align = 0;
if (llvm::isa<llvm::VectorType>(pt->getElementType()))
align = 1;
llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
false /* not volatile */,
align, bblock);
AddDebugPos(inst); AddDebugPos(inst);
return inst; return inst;
} }
@@ -2958,17 +2949,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
return; return;
} }
llvm::Instruction *inst; llvm::Instruction *inst = new llvm::StoreInst(value, ptr, bblock);
if (llvm::isa<llvm::VectorType>(value->getType()))
// FIXME: same for load--do we still need/want this??
// Specify an unaligned store, since we don't know that the ptr
// will in fact be aligned to a vector width here. (Actually
// should be aligned to the alignment of the vector elment type...)
inst = new llvm::StoreInst(value, ptr, false /* not volatile */,
1, bblock);
else
inst = new llvm::StoreInst(value, ptr, bblock);
AddDebugPos(inst); AddDebugPos(inst);
} }

View File

@@ -640,6 +640,7 @@ Opt::Opt() {
unrollLoops = true; unrollLoops = true;
disableAsserts = false; disableAsserts = false;
disableFMA = false; disableFMA = false;
forceAlignedMemory = false;
disableMaskAllOnOptimizations = false; disableMaskAllOnOptimizations = false;
disableHandlePseudoMemoryOps = false; disableHandlePseudoMemoryOps = false;
disableBlendedMaskedStores = false; disableBlendedMaskedStores = false;

6
ispc.h
View File

@@ -311,6 +311,12 @@ struct Opt {
that support them). */ that support them). */
bool disableFMA; bool disableFMA;
/** Always generate aligned vector load/store instructions; this
implies a guarantee that all dynamic access through pointers that
becomes a vector load/store will be a cache-aligned sequence of
locations. */
bool forceAlignedMemory;
/** If enabled, disables the various optimizations that kick in when /** If enabled, disables the various optimizations that kick in when
the execution mask can be determined to be "all on" at compile the execution mask can be determined to be "all on" at compile
time. */ time. */

View File

@@ -119,6 +119,7 @@ usage(int ret) {
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n"); printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n"); printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n"); printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
printf(" force-aligned-memory\t\tAlways issue \"aligned\" vector load and store instructions\n");
#ifndef ISPC_IS_WINDOWS #ifndef ISPC_IS_WINDOWS
printf(" [--pic]\t\t\t\tGenerate position-independent code\n"); printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
#endif // !ISPC_IS_WINDOWS #endif // !ISPC_IS_WINDOWS
@@ -336,6 +337,8 @@ int main(int Argc, char *Argv[]) {
g->opt.unrollLoops = false; g->opt.unrollLoops = false;
else if (!strcmp(opt, "disable-fma")) else if (!strcmp(opt, "disable-fma"))
g->opt.disableFMA = true; g->opt.disableFMA = true;
else if (!strcmp(opt, "force-aligned-memory"))
g->opt.forceAlignedMemory = true;
// These are only used for performance tests of specific // These are only used for performance tests of specific
// optimizations // optimizations

View File

@@ -1783,6 +1783,8 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
opts.addMacroDef("ISPC_TARGET_HAS_HALF"); opts.addMacroDef("ISPC_TARGET_HAS_HALF");
if (g->target.hasTranscendentals) if (g->target.hasTranscendentals)
opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS"); opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS");
if (g->opt.forceAlignedMemory)
opts.addMacroDef("ISPC_FORCE_ALIGNED_MEMORY");
opts.addMacroDef("ISPC_MAJOR_VERSION=1"); opts.addMacroDef("ISPC_MAJOR_VERSION=1");
opts.addMacroDef("ISPC_MINOR_VERSION=3"); opts.addMacroDef("ISPC_MINOR_VERSION=3");

17
opt.cpp
View File

@@ -791,7 +791,11 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::PointerType::get(returnType, 0), llvm::PointerType::get(returnType, 0),
name, callInst); name, callInst);
lCopyMetadata(castPtr, callInst); lCopyMetadata(castPtr, callInst);
int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; int align;
if (g->opt.forceAlignedMemory)
align = 0;
else
align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
name = LLVMGetName(callInst->getArgOperand(0), "_load"); name = LLVMGetName(callInst->getArgOperand(0), "_load");
llvm::Instruction *loadInst = llvm::Instruction *loadInst =
new llvm::LoadInst(castPtr, name, false /* not volatile */, new llvm::LoadInst(castPtr, name, false /* not volatile */,
@@ -829,7 +833,11 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::StoreInst *storeInst = llvm::StoreInst *storeInst =
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; int align;
if (g->opt.forceAlignedMemory)
align = 0;
else
align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
storeInst->setAlignment(align); storeInst->setAlignment(align);
lCopyMetadata(storeInst, callInst); lCopyMetadata(storeInst, callInst);
llvm::ReplaceInstWithInst(callInst, storeInst); llvm::ReplaceInstWithInst(callInst, storeInst);
@@ -2553,7 +2561,7 @@ lImproveMaskedStore(llvm::CallInst *callInst) {
lCopyMetadata(lvalue, callInst); lCopyMetadata(lvalue, callInst);
llvm::Instruction *store = llvm::Instruction *store =
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
info->align); g->opt.forceAlignedMemory ? 0 : info->align);
lCopyMetadata(store, callInst); lCopyMetadata(store, callInst);
llvm::ReplaceInstWithInst(callInst, store); llvm::ReplaceInstWithInst(callInst, store);
return true; return true;
@@ -2616,7 +2624,8 @@ lImproveMaskedLoad(llvm::CallInst *callInst,
callInst); callInst);
llvm::Instruction *load = llvm::Instruction *load =
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */, new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
info->align, (llvm::Instruction *)NULL); g->opt.forceAlignedMemory ? 0 : info->align,
(llvm::Instruction *)NULL);
lCopyMetadata(load, callInst); lCopyMetadata(load, callInst);
llvm::ReplaceInstWithInst(callInst, load); llvm::ReplaceInstWithInst(callInst, load);
return true; return true;

View File

@@ -99,15 +99,21 @@ void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
} }
#if defined(_WIN32) || defined(_WIN64)
#define ALIGN
#else
#define ALIGN __attribute__((aligned(64)))
#endif
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
int w = width(); int w = width();
assert(w <= 64); assert(w <= 64);
float returned_result[64]; float returned_result[64] ALIGN;
float vfloat[64]; float vfloat[64] ALIGN;
double vdouble[64]; double vdouble[64] ALIGN;
int vint[64], vint2[64]; int vint[64] ALIGN;
int vint2[64] ALIGN;
for (int i = 0; i < 64; ++i) { for (int i = 0; i < 64; ++i) {
returned_result[i] = -1e20; returned_result[i] = -1e20;