Merge branch 'master' of https://github.com/ispc/ispc
This commit is contained in:
@@ -322,6 +322,8 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__add_varying_double",
|
"__add_varying_double",
|
||||||
"__add_varying_int32",
|
"__add_varying_int32",
|
||||||
"__add_varying_int64",
|
"__add_varying_int64",
|
||||||
|
"__all",
|
||||||
|
"__any",
|
||||||
"__aos_to_soa3_float",
|
"__aos_to_soa3_float",
|
||||||
"__aos_to_soa3_float16",
|
"__aos_to_soa3_float16",
|
||||||
"__aos_to_soa3_float4",
|
"__aos_to_soa3_float4",
|
||||||
@@ -465,6 +467,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__new_uniform",
|
"__new_uniform",
|
||||||
"__new_varying32",
|
"__new_varying32",
|
||||||
"__new_varying64",
|
"__new_varying64",
|
||||||
|
"__none",
|
||||||
"__num_cores",
|
"__num_cores",
|
||||||
"__packed_load_active",
|
"__packed_load_active",
|
||||||
"__packed_store_active",
|
"__packed_store_active",
|
||||||
|
|||||||
@@ -2203,6 +2203,10 @@ bool CWriter::doInitialization(llvm::Module &M) {
|
|||||||
Out << "#undef ISPC_FAST_MATH\n";
|
Out << "#undef ISPC_FAST_MATH\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g->opt.forceAlignedMemory) {
|
||||||
|
Out << "#define ISPC_FORCE_ALIGNED_MEMORY\n";
|
||||||
|
}
|
||||||
|
|
||||||
Out << "#include \"" << includeName << "\"\n";
|
Out << "#include \"" << includeName << "\"\n";
|
||||||
|
|
||||||
Out << "\n/* Basic Library Function Declarations */\n";
|
Out << "\n/* Basic Library Function Declarations */\n";
|
||||||
|
|||||||
23
ctx.cpp
23
ctx.cpp
@@ -2397,16 +2397,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
|
|||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
name = LLVMGetName(ptr, "_load");
|
name = LLVMGetName(ptr, "_load");
|
||||||
|
|
||||||
// FIXME: it's not clear to me that we generate unaligned vector loads
|
llvm::Instruction *inst = new llvm::LoadInst(ptr, name, bblock);
|
||||||
// of varying stuff out of the front-end any more. (Only by the
|
|
||||||
// optimization passes that lower gathers to vector loads, I think..)
|
|
||||||
// So remove this??
|
|
||||||
int align = 0;
|
|
||||||
if (llvm::isa<llvm::VectorType>(pt->getElementType()))
|
|
||||||
align = 1;
|
|
||||||
llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
|
|
||||||
false /* not volatile */,
|
|
||||||
align, bblock);
|
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
return inst;
|
return inst;
|
||||||
}
|
}
|
||||||
@@ -2958,17 +2949,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Instruction *inst;
|
llvm::Instruction *inst = new llvm::StoreInst(value, ptr, bblock);
|
||||||
if (llvm::isa<llvm::VectorType>(value->getType()))
|
|
||||||
// FIXME: same for load--do we still need/want this??
|
|
||||||
// Specify an unaligned store, since we don't know that the ptr
|
|
||||||
// will in fact be aligned to a vector width here. (Actually
|
|
||||||
// should be aligned to the alignment of the vector elment type...)
|
|
||||||
inst = new llvm::StoreInst(value, ptr, false /* not volatile */,
|
|
||||||
1, bblock);
|
|
||||||
else
|
|
||||||
inst = new llvm::StoreInst(value, ptr, bblock);
|
|
||||||
|
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1
ispc.cpp
1
ispc.cpp
@@ -640,6 +640,7 @@ Opt::Opt() {
|
|||||||
unrollLoops = true;
|
unrollLoops = true;
|
||||||
disableAsserts = false;
|
disableAsserts = false;
|
||||||
disableFMA = false;
|
disableFMA = false;
|
||||||
|
forceAlignedMemory = false;
|
||||||
disableMaskAllOnOptimizations = false;
|
disableMaskAllOnOptimizations = false;
|
||||||
disableHandlePseudoMemoryOps = false;
|
disableHandlePseudoMemoryOps = false;
|
||||||
disableBlendedMaskedStores = false;
|
disableBlendedMaskedStores = false;
|
||||||
|
|||||||
6
ispc.h
6
ispc.h
@@ -311,6 +311,12 @@ struct Opt {
|
|||||||
that support them). */
|
that support them). */
|
||||||
bool disableFMA;
|
bool disableFMA;
|
||||||
|
|
||||||
|
/** Always generate aligned vector load/store instructions; this
|
||||||
|
implies a guarantee that all dynamic access through pointers that
|
||||||
|
becomes a vector load/store will be a cache-aligned sequence of
|
||||||
|
locations. */
|
||||||
|
bool forceAlignedMemory;
|
||||||
|
|
||||||
/** If enabled, disables the various optimizations that kick in when
|
/** If enabled, disables the various optimizations that kick in when
|
||||||
the execution mask can be determined to be "all on" at compile
|
the execution mask can be determined to be "all on" at compile
|
||||||
time. */
|
time. */
|
||||||
|
|||||||
3
main.cpp
3
main.cpp
@@ -119,6 +119,7 @@ usage(int ret) {
|
|||||||
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||||
|
printf(" force-aligned-memory\t\tAlways issue \"aligned\" vector load and store instructions\n");
|
||||||
#ifndef ISPC_IS_WINDOWS
|
#ifndef ISPC_IS_WINDOWS
|
||||||
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||||
#endif // !ISPC_IS_WINDOWS
|
#endif // !ISPC_IS_WINDOWS
|
||||||
@@ -336,6 +337,8 @@ int main(int Argc, char *Argv[]) {
|
|||||||
g->opt.unrollLoops = false;
|
g->opt.unrollLoops = false;
|
||||||
else if (!strcmp(opt, "disable-fma"))
|
else if (!strcmp(opt, "disable-fma"))
|
||||||
g->opt.disableFMA = true;
|
g->opt.disableFMA = true;
|
||||||
|
else if (!strcmp(opt, "force-aligned-memory"))
|
||||||
|
g->opt.forceAlignedMemory = true;
|
||||||
|
|
||||||
// These are only used for performance tests of specific
|
// These are only used for performance tests of specific
|
||||||
// optimizations
|
// optimizations
|
||||||
|
|||||||
@@ -1783,6 +1783,8 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
|||||||
opts.addMacroDef("ISPC_TARGET_HAS_HALF");
|
opts.addMacroDef("ISPC_TARGET_HAS_HALF");
|
||||||
if (g->target.hasTranscendentals)
|
if (g->target.hasTranscendentals)
|
||||||
opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS");
|
opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS");
|
||||||
|
if (g->opt.forceAlignedMemory)
|
||||||
|
opts.addMacroDef("ISPC_FORCE_ALIGNED_MEMORY");
|
||||||
|
|
||||||
opts.addMacroDef("ISPC_MAJOR_VERSION=1");
|
opts.addMacroDef("ISPC_MAJOR_VERSION=1");
|
||||||
opts.addMacroDef("ISPC_MINOR_VERSION=3");
|
opts.addMacroDef("ISPC_MINOR_VERSION=3");
|
||||||
|
|||||||
17
opt.cpp
17
opt.cpp
@@ -791,7 +791,11 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
llvm::PointerType::get(returnType, 0),
|
llvm::PointerType::get(returnType, 0),
|
||||||
name, callInst);
|
name, callInst);
|
||||||
lCopyMetadata(castPtr, callInst);
|
lCopyMetadata(castPtr, callInst);
|
||||||
int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
|
int align;
|
||||||
|
if (g->opt.forceAlignedMemory)
|
||||||
|
align = 0;
|
||||||
|
else
|
||||||
|
align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
|
||||||
name = LLVMGetName(callInst->getArgOperand(0), "_load");
|
name = LLVMGetName(callInst->getArgOperand(0), "_load");
|
||||||
llvm::Instruction *loadInst =
|
llvm::Instruction *loadInst =
|
||||||
new llvm::LoadInst(castPtr, name, false /* not volatile */,
|
new llvm::LoadInst(castPtr, name, false /* not volatile */,
|
||||||
@@ -829,7 +833,11 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
|
|
||||||
llvm::StoreInst *storeInst =
|
llvm::StoreInst *storeInst =
|
||||||
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
|
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
|
||||||
int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
|
int align;
|
||||||
|
if (g->opt.forceAlignedMemory)
|
||||||
|
align = 0;
|
||||||
|
else
|
||||||
|
align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
|
||||||
storeInst->setAlignment(align);
|
storeInst->setAlignment(align);
|
||||||
lCopyMetadata(storeInst, callInst);
|
lCopyMetadata(storeInst, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, storeInst);
|
llvm::ReplaceInstWithInst(callInst, storeInst);
|
||||||
@@ -2553,7 +2561,7 @@ lImproveMaskedStore(llvm::CallInst *callInst) {
|
|||||||
lCopyMetadata(lvalue, callInst);
|
lCopyMetadata(lvalue, callInst);
|
||||||
llvm::Instruction *store =
|
llvm::Instruction *store =
|
||||||
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||||
info->align);
|
g->opt.forceAlignedMemory ? 0 : info->align);
|
||||||
lCopyMetadata(store, callInst);
|
lCopyMetadata(store, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, store);
|
llvm::ReplaceInstWithInst(callInst, store);
|
||||||
return true;
|
return true;
|
||||||
@@ -2616,7 +2624,8 @@ lImproveMaskedLoad(llvm::CallInst *callInst,
|
|||||||
callInst);
|
callInst);
|
||||||
llvm::Instruction *load =
|
llvm::Instruction *load =
|
||||||
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
|
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
|
||||||
info->align, (llvm::Instruction *)NULL);
|
g->opt.forceAlignedMemory ? 0 : info->align,
|
||||||
|
(llvm::Instruction *)NULL);
|
||||||
lCopyMetadata(load, callInst);
|
lCopyMetadata(load, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, load);
|
llvm::ReplaceInstWithInst(callInst, load);
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -99,15 +99,21 @@ void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define ALIGN
|
||||||
|
#else
|
||||||
|
#define ALIGN __attribute__((aligned(64)))
|
||||||
|
#endif
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
int w = width();
|
int w = width();
|
||||||
assert(w <= 64);
|
assert(w <= 64);
|
||||||
|
|
||||||
float returned_result[64];
|
float returned_result[64] ALIGN;
|
||||||
float vfloat[64];
|
float vfloat[64] ALIGN;
|
||||||
double vdouble[64];
|
double vdouble[64] ALIGN;
|
||||||
int vint[64], vint2[64];
|
int vint[64] ALIGN;
|
||||||
|
int vint2[64] ALIGN;
|
||||||
|
|
||||||
for (int i = 0; i < 64; ++i) {
|
for (int i = 0; i < 64; ++i) {
|
||||||
returned_result[i] = -1e20;
|
returned_result[i] = -1e20;
|
||||||
|
|||||||
Reference in New Issue
Block a user