Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6f6e28077f | ||
|
|
0a9a7c939a | ||
|
|
f30a5dea79 | ||
|
|
018b547c40 | ||
|
|
e82a720223 | ||
|
|
8d1b77b235 | ||
|
|
b8987faeee | ||
|
|
17fdab2793 | ||
|
|
1fa6520cb6 | ||
|
|
b6af5c16c6 | ||
|
|
10ebe88abf | ||
|
|
c0b41ad6f5 | ||
|
|
9920b30318 | ||
|
|
07f218137a | ||
|
|
89a5248f4f | ||
|
|
891919074e | ||
|
|
4adf527a4d | ||
|
|
533b539780 | ||
|
|
6f26ae9801 | ||
|
|
ddcdfff3ae | ||
|
|
5b48354d9a | ||
|
|
46bfef3fce | ||
|
|
20536bb339 | ||
|
|
f6605ee465 | ||
|
|
034507a35b | ||
|
|
0b2febcec0 | ||
|
|
d2fa735ef1 | ||
|
|
20f34b67da | ||
|
|
03f3db1e89 | ||
|
|
9805b0742d | ||
|
|
6000c696b2 | ||
|
|
5a2edf723b | ||
|
|
aec7da740a | ||
|
|
a79bc75b72 | ||
|
|
eaaebf7928 | ||
|
|
198aa9620e | ||
|
|
27c53a3c25 | ||
|
|
bd70182369 | ||
|
|
04df63d955 | ||
|
|
d59131d670 | ||
|
|
9475e13d81 | ||
|
|
765d86076f | ||
|
|
e2b6ed3db8 |
32
Makefile
32
Makefile
@@ -5,20 +5,32 @@
|
||||
ARCH_OS = $(shell uname)
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
ifeq ($(shell llvm-config --version), 3.1svn)
|
||||
LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \
|
||||
-lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \
|
||||
-lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo \
|
||||
-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \
|
||||
-lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \
|
||||
-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \
|
||||
-lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMTransformUtils \
|
||||
-lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld \
|
||||
-lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \
|
||||
-lLLVMSupport
|
||||
else
|
||||
LLVM_LIBS=$(shell llvm-config --libs)
|
||||
endif
|
||||
|
||||
CLANG=clang
|
||||
CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
-lclangSerialization -lclangParse -lclangSema \
|
||||
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
||||
|
||||
ISPC_LIBS=$(CLANG_LIBS) \
|
||||
$(shell llvm-config --ldflags --libs) \
|
||||
-lpthread -ldl
|
||||
ISPC_TEST_LIBS=$(shell llvm-config --ldflags --libs) \
|
||||
ISPC_LIBS=$(shell llvm-config --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
|
||||
-lpthread -ldl
|
||||
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
||||
LLVM_VERSION=LLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
@@ -59,7 +71,7 @@ OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_SRC:.ll=.o) \
|
||||
builtins-c-32.o builtins-c-64.o stdlib_ispc.o $(BISON_SRC:.yy=.o) \
|
||||
$(FLEX_SRC:.ll=.o))
|
||||
|
||||
default: ispc ispc_test
|
||||
default: ispc
|
||||
|
||||
.PHONY: dirs clean depend doxygen print_llvm_src
|
||||
.PRECIOUS: objs/builtins-%.cpp
|
||||
@@ -78,7 +90,7 @@ print_llvm_src:
|
||||
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs ispc ispc_test
|
||||
/bin/rm -rf objs ispc
|
||||
|
||||
doxygen:
|
||||
/bin/rm -rf docs/doxygen
|
||||
@@ -88,10 +100,6 @@ ispc: print_llvm_src dirs $(OBJS)
|
||||
@echo Creating ispc executable
|
||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
|
||||
|
||||
ispc_test: dirs ispc_test.cpp
|
||||
@echo Creating ispc_test executable
|
||||
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ ispc_test.cpp $(ISPC_TEST_LIBS)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
@@ -8,7 +8,6 @@ REM Both the LLVM binaries and python need to be in the path
|
||||
set path=%LLVM_INSTALL_DIR%\bin;%PATH%;c:\cygwin\bin
|
||||
|
||||
msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
msbuild ispc_test.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
|
||||
msbuild examples\examples.sln /V:m /p:Platform=x64 /p:Configuration=Release /t:rebuild
|
||||
msbuild examples\examples.sln /V:m /p:Platform=x64 /p:Configuration=Debug /t:rebuild
|
||||
|
||||
@@ -301,7 +301,7 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
}
|
||||
|
||||
define <4 x float> @__vec4_add_float(<4 x float> %v0,
|
||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||
%v = fadd <4 x float> %v0, %v1
|
||||
ret <4 x float> %v
|
||||
}
|
||||
@@ -325,7 +325,7 @@ define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
@@ -144,7 +144,7 @@ define <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alway
|
||||
; from %1, and otherwise return the value from %0.
|
||||
|
||||
define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%cleared_old = and <4 x i32> %0, %notmask
|
||||
%masked_new = and <4 x i32> %1, %mask
|
||||
@@ -153,7 +153,7 @@ define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
}
|
||||
|
||||
define <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%v0 = bitcast <4 x float> %0 to <4 x i32>
|
||||
%v1 = bitcast <4 x float> %1 to <4 x i32>
|
||||
%r = call <4 x i32> @__vselect_i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %mask)
|
||||
|
||||
@@ -252,7 +252,7 @@ define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
21
builtins.cpp
21
builtins.cpp
@@ -257,7 +257,7 @@ static void
|
||||
lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
#if 0
|
||||
// FIXME: handle globals?
|
||||
assert(module->global_empty());
|
||||
Assert(module->global_empty());
|
||||
#endif
|
||||
|
||||
llvm::Module::iterator iter;
|
||||
@@ -287,11 +287,11 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
// check the llvm.x86.* intrinsics for now...
|
||||
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
|
||||
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
|
||||
assert(id != 0);
|
||||
Assert(id != 0);
|
||||
LLVM_TYPE_CONST llvm::Type *intrinsicType =
|
||||
llvm::Intrinsic::getType(*g->ctx, id);
|
||||
intrinsicType = llvm::PointerType::get(intrinsicType, 0);
|
||||
assert(func->getType() == intrinsicType);
|
||||
Assert(func->getType() == intrinsicType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -311,8 +311,12 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
static void
|
||||
lSetInternalFunctions(llvm::Module *module) {
|
||||
const char *names[] = {
|
||||
"__add_float",
|
||||
"__add_int32",
|
||||
"__add_uniform_double",
|
||||
"__add_uniform_int32",
|
||||
"__add_uniform_int64",
|
||||
"__add_varying_double",
|
||||
"__add_varying_int32",
|
||||
"__add_varying_int64",
|
||||
"__aos_to_soa3_float",
|
||||
@@ -543,6 +547,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__svml_pow",
|
||||
"__undef_uniform",
|
||||
"__undef_varying",
|
||||
"__vec4_add_float",
|
||||
"__vec4_add_int32",
|
||||
"__vselect_float",
|
||||
"__vselect_i32",
|
||||
};
|
||||
|
||||
int count = sizeof(names) / sizeof(names[0]);
|
||||
@@ -583,9 +591,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
// linking together modules with incompatible target triples..
|
||||
llvm::Triple mTriple(m->module->getTargetTriple());
|
||||
llvm::Triple bcTriple(bcModule->getTargetTriple());
|
||||
assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
mTriple.getArch() == bcTriple.getArch());
|
||||
assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
|
||||
Assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
|
||||
mTriple.getVendor() == bcTriple.getVendor());
|
||||
bcModule->setTargetTriple(mTriple.str());
|
||||
|
||||
@@ -631,7 +639,7 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||
Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
|
||||
|
||||
llvm::Function *func = module->getFunction(name);
|
||||
assert(func != NULL); // it should be declared already...
|
||||
Assert(func != NULL); // it should be declared already...
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
|
||||
llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
|
||||
@@ -718,6 +726,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
break;
|
||||
case Target::AVX:
|
||||
case Target::AVX2:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_avx[];
|
||||
|
||||
18
builtins.m4
18
builtins.m4
@@ -715,7 +715,7 @@ define <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
||||
%eltvec`'i = insertelement <$1 x $3> %eltvec`'eval(i-1), $3 %red`'eval(i-1), i32 i')
|
||||
|
||||
; make the atomic call, passing it the final reduced value
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
%final0 = call $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %red`'eval($1-1))', `
|
||||
%final0 = atomicrmw $2 $3 * %ptr, $3 %red`'eval($1-1) seq_cst')
|
||||
|
||||
@@ -747,7 +747,7 @@ ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
|
||||
define(`global_atomic_uniform', `
|
||||
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
declare $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %delta)
|
||||
|
||||
define $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val,
|
||||
@@ -771,7 +771,7 @@ define $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val,
|
||||
;; $2: llvm type of the vector elements (e.g. i32)
|
||||
;; $3: ispc type of the elements (e.g. int32)
|
||||
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
declare i32 @llvm.atomic.swap.i32.p0i32(i32 * %ptr, i32 %val)
|
||||
declare i64 @llvm.atomic.swap.i64.p0i64(i64 * %ptr, i64 %val)')
|
||||
|
||||
@@ -784,7 +784,7 @@ define <$1 x $2> @__atomic_swap_$3_global($2* %ptr, <$1 x $2> %val,
|
||||
|
||||
per_lane($1, <$1 x i32> %mask, `
|
||||
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
%r_LANE_ID = call $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val_LANE_ID)', `
|
||||
%r_LANE_ID = atomicrmw xchg $2 * %ptr, $2 %val_LANE_ID seq_cst')
|
||||
%rp_LANE_ID = getelementptr $2 * %rptr32, i32 LANE
|
||||
@@ -796,7 +796,7 @@ ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
|
||||
define $2 @__atomic_swap_uniform_$3_global($2* %ptr, $2 %val,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
%r = call $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val)', `
|
||||
%r = atomicrmw xchg $2 * %ptr, $2 %val seq_cst')
|
||||
ret $2 %r
|
||||
@@ -812,7 +812,7 @@ ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
|
||||
define(`global_atomic_exchange', `
|
||||
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
declare $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)')
|
||||
|
||||
define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||
@@ -823,7 +823,7 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||
per_lane($1, <$1 x i32> %mask, `
|
||||
%cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE
|
||||
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
%r_LANE_ID = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp_LANE_ID,
|
||||
$2 %val_LANE_ID)', `
|
||||
%r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst')
|
||||
@@ -836,7 +836,7 @@ ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
|
||||
define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
||||
$2 %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',`
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',`
|
||||
%r = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)', `
|
||||
%r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst')
|
||||
ret $2 %r
|
||||
@@ -1784,7 +1784,7 @@ define void
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; prefetching
|
||||
|
||||
ifelse(`LLVM_VERSION', `LLVM_2_9',
|
||||
ifelse(LLVM_VERSION, `LLVM_2_9',
|
||||
`
|
||||
declare void @llvm.prefetch(i8* nocapture %ptr, i32 %readwrite, i32 %locality)
|
||||
|
||||
|
||||
299
ctx.cpp
299
ctx.cpp
@@ -89,7 +89,7 @@ struct CFInfo {
|
||||
|
||||
private:
|
||||
CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
|
||||
assert(t == If);
|
||||
Assert(t == If);
|
||||
type = t;
|
||||
isUniform = uniformIf;
|
||||
savedBreakTarget = savedContinueTarget = NULL;
|
||||
@@ -99,7 +99,7 @@ private:
|
||||
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
|
||||
llvm::Value *lm) {
|
||||
assert(t == Loop);
|
||||
Assert(t == Loop);
|
||||
type = t;
|
||||
isUniform = iu;
|
||||
savedBreakTarget = bt;
|
||||
@@ -112,7 +112,7 @@ private:
|
||||
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
|
||||
llvm::Value *lm) {
|
||||
assert(t == Foreach);
|
||||
Assert(t == Foreach);
|
||||
type = t;
|
||||
isUniform = false;
|
||||
savedBreakTarget = bt;
|
||||
@@ -171,8 +171,11 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
|
||||
internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
|
||||
StoreInst(LLVMMaskAllOn, internalMaskPointer);
|
||||
|
||||
functionMaskValue = LLVMMaskAllOn;
|
||||
fullMaskPointer = NULL;
|
||||
|
||||
fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory");
|
||||
StoreInst(LLVMMaskAllOn, fullMaskPointer);
|
||||
|
||||
loopMask = NULL;
|
||||
breakLanesPtr = continueLanesPtr = NULL;
|
||||
@@ -194,6 +197,47 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
returnValuePtr = AllocaInst(ftype, "return_value_memory");
|
||||
}
|
||||
|
||||
if (g->opt.disableMaskAllOnOptimizations) {
|
||||
// This is really disgusting. We want to be able to fool the
|
||||
// compiler to not be able to reason that the mask is all on, but
|
||||
// we don't want to pay too much of a price at the start of each
|
||||
// function to do so.
|
||||
//
|
||||
// Therefore: first, we declare a module-static __all_on_mask
|
||||
// variable that will hold an "all on" mask value. At the start of
|
||||
// each function, we'll load its value and call SetInternalMaskAnd
|
||||
// with the result to set the current internal execution mask.
|
||||
// (This is a no-op at runtime.)
|
||||
//
|
||||
// Then, to fool the optimizer that maybe the value of
|
||||
// __all_on_mask can't be guaranteed to be "all on", we emit a
|
||||
// dummy function that sets __all_on_mask be "all off". (That
|
||||
// function is never actually called.)
|
||||
llvm::Value *globalAllOnMaskPtr =
|
||||
m->module->getNamedGlobal("__all_on_mask");
|
||||
if (globalAllOnMaskPtr == NULL) {
|
||||
globalAllOnMaskPtr =
|
||||
new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
LLVMMaskAllOn, "__all_on_mask");
|
||||
|
||||
char buf[256];
|
||||
sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString());
|
||||
llvm::Constant *offFunc =
|
||||
m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
|
||||
NULL);
|
||||
Assert(llvm::isa<llvm::Function>(offFunc));
|
||||
llvm::BasicBlock *offBB =
|
||||
llvm::BasicBlock::Create(*g->ctx, "entry",
|
||||
(llvm::Function *)offFunc, 0);
|
||||
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
|
||||
llvm::ReturnInst::Create(*g->ctx, offBB);
|
||||
}
|
||||
|
||||
llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
|
||||
SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
|
||||
}
|
||||
|
||||
if (m->diBuilder) {
|
||||
/* If debugging is enabled, tell the debug information emission
|
||||
code about this new function */
|
||||
@@ -216,7 +260,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
|
||||
llvm::DIFile file = funcStartPos.GetDIFile();
|
||||
Symbol *programIndexSymbol = m->symbolTable->LookupVariable("programIndex");
|
||||
assert(programIndexSymbol && programIndexSymbol->storagePtr);
|
||||
Assert(programIndexSymbol && programIndexSymbol->storagePtr);
|
||||
m->diBuilder->createGlobalVariable(programIndexSymbol->name,
|
||||
file,
|
||||
funcStartPos.first_line,
|
||||
@@ -225,7 +269,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
programIndexSymbol->storagePtr);
|
||||
|
||||
Symbol *programCountSymbol = m->symbolTable->LookupVariable("programCount");
|
||||
assert(programCountSymbol);
|
||||
Assert(programCountSymbol);
|
||||
m->diBuilder->createGlobalVariable(programCountSymbol->name,
|
||||
file,
|
||||
funcStartPos.first_line,
|
||||
@@ -237,8 +281,8 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
|
||||
|
||||
FunctionEmitContext::~FunctionEmitContext() {
|
||||
assert(controlFlowInfo.size() == 0);
|
||||
assert(debugScopes.size() == (m->diBuilder ? 1 : 0));
|
||||
Assert(controlFlowInfo.size() == 0);
|
||||
Assert(debugScopes.size() == (m->diBuilder ? 1 : 0));
|
||||
}
|
||||
|
||||
|
||||
@@ -268,17 +312,15 @@ FunctionEmitContext::GetFunctionMask() {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetInternalMask() {
|
||||
if (VaryingCFDepth() == 0)
|
||||
return LLVMMaskAllOn;
|
||||
else
|
||||
return LoadInst(internalMaskPointer, "load_mask");
|
||||
return LoadInst(internalMaskPointer, "load_mask");
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetFullMask() {
|
||||
llvm::Value *internalMask = GetInternalMask();
|
||||
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn)
|
||||
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn &&
|
||||
!g->opt.disableMaskAllOnOptimizations)
|
||||
return LLVMMaskAllOn;
|
||||
else
|
||||
return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
|
||||
@@ -286,16 +328,17 @@ FunctionEmitContext::GetFullMask() {
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SetMaskPointer(llvm::Value *p) {
|
||||
fullMaskPointer = p;
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetFullMaskPointer() {
|
||||
return fullMaskPointer;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SetFunctionMask(llvm::Value *value) {
|
||||
functionMaskValue = value;
|
||||
StoreInst(GetFullMask(), fullMaskPointer);
|
||||
if (bblock != NULL)
|
||||
StoreInst(GetFullMask(), fullMaskPointer);
|
||||
}
|
||||
|
||||
|
||||
@@ -333,7 +376,7 @@ FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *te
|
||||
|
||||
void
|
||||
FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
||||
assert(bblock != NULL);
|
||||
Assert(bblock != NULL);
|
||||
llvm::Value *any = Any(GetFullMask());
|
||||
BranchInst(btrue, bfalse, any);
|
||||
// It's illegal to add any additional instructions to the basic block
|
||||
@@ -344,7 +387,7 @@ FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *
|
||||
|
||||
void
|
||||
FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
||||
assert(bblock != NULL);
|
||||
Assert(bblock != NULL);
|
||||
llvm::Value *all = All(GetFullMask());
|
||||
BranchInst(btrue, bfalse, all);
|
||||
// It's illegal to add any additional instructions to the basic block
|
||||
@@ -355,7 +398,7 @@ FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *
|
||||
|
||||
void
|
||||
FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
||||
assert(bblock != NULL);
|
||||
Assert(bblock != NULL);
|
||||
// switch sense of true/false bblocks
|
||||
BranchIfMaskAny(bfalse, btrue);
|
||||
// It's illegal to add any additional instructions to the basic block
|
||||
@@ -379,7 +422,7 @@ FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
|
||||
void
|
||||
FunctionEmitContext::EndIf() {
|
||||
// Make sure we match up with a Start{Uniform,Varying}If().
|
||||
assert(controlFlowInfo.size() > 0 && controlFlowInfo.back()->IsIf());
|
||||
Assert(controlFlowInfo.size() > 0 && controlFlowInfo.back()->IsIf());
|
||||
CFInfo *ci = controlFlowInfo.back();
|
||||
controlFlowInfo.pop_back();
|
||||
|
||||
@@ -458,7 +501,7 @@ FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||
|
||||
void
|
||||
FunctionEmitContext::EndLoop() {
|
||||
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop());
|
||||
Assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop());
|
||||
CFInfo *ci = controlFlowInfo.back();
|
||||
controlFlowInfo.pop_back();
|
||||
|
||||
@@ -501,7 +544,7 @@ FunctionEmitContext::StartForeach(llvm::BasicBlock *ct) {
|
||||
|
||||
void
|
||||
FunctionEmitContext::EndForeach() {
|
||||
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach());
|
||||
Assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach());
|
||||
CFInfo *ci = controlFlowInfo.back();
|
||||
controlFlowInfo.pop_back();
|
||||
|
||||
@@ -555,7 +598,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) {
|
||||
// Otherwise we need to update the mask of the lanes that have
|
||||
// executed a 'break' statement:
|
||||
// breakLanes = breakLanes | mask
|
||||
assert(breakLanesPtr != NULL);
|
||||
Assert(breakLanesPtr != NULL);
|
||||
llvm::Value *mask = GetInternalMask();
|
||||
llvm::Value *breakMask = LoadInst(breakLanesPtr,
|
||||
"break_mask");
|
||||
@@ -605,7 +648,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
||||
else {
|
||||
// Otherwise update the stored value of which lanes have 'continue'd.
|
||||
// continueLanes = continueLanes | mask
|
||||
assert(continueLanesPtr);
|
||||
Assert(continueLanesPtr);
|
||||
llvm::Value *mask = GetInternalMask();
|
||||
llvm::Value *continueMask =
|
||||
LoadInst(continueLanesPtr, "continue_mask");
|
||||
@@ -632,7 +675,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
||||
*/
|
||||
bool
|
||||
FunctionEmitContext::ifsInLoopAllUniform() const {
|
||||
assert(controlFlowInfo.size() > 0);
|
||||
Assert(controlFlowInfo.size() > 0);
|
||||
// Go backwards through controlFlowInfo, since we add new nested scopes
|
||||
// to the back. Stop once we come to the first enclosing loop.
|
||||
int i = controlFlowInfo.size() - 1;
|
||||
@@ -642,7 +685,7 @@ FunctionEmitContext::ifsInLoopAllUniform() const {
|
||||
return false;
|
||||
--i;
|
||||
}
|
||||
assert(i >= 0); // else we didn't find a loop!
|
||||
Assert(i >= 0); // else we didn't find a loop!
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -650,7 +693,7 @@ FunctionEmitContext::ifsInLoopAllUniform() const {
|
||||
void
|
||||
FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) {
|
||||
llvm::Value *allDone = NULL;
|
||||
assert(continueLanesPtr != NULL);
|
||||
Assert(continueLanesPtr != NULL);
|
||||
if (breakLanesPtr == NULL) {
|
||||
// In a foreach loop, break and return are illegal, and
|
||||
// breakLanesPtr is NULL. In this case, the mask is guaranteed to
|
||||
@@ -752,13 +795,19 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||
expr = TypeConvertExpr(expr, returnType, "return statement");
|
||||
if (expr != NULL) {
|
||||
llvm::Value *retVal = expr->GetValue(this);
|
||||
if (retVal != NULL)
|
||||
// Use a masked store to store the value of the expression
|
||||
// in the return value memory; this preserves the return
|
||||
// values from other lanes that may have executed return
|
||||
// statements previously.
|
||||
StoreInst(retVal, returnValuePtr, GetInternalMask(),
|
||||
PointerType::GetUniform(returnType));
|
||||
if (retVal != NULL) {
|
||||
if (returnType->IsUniformType() ||
|
||||
dynamic_cast<const ReferenceType *>(returnType) != NULL)
|
||||
StoreInst(retVal, returnValuePtr);
|
||||
else {
|
||||
// Use a masked store to store the value of the expression
|
||||
// in the return value memory; this preserves the return
|
||||
// values from other lanes that may have executed return
|
||||
// statements previously.
|
||||
StoreInst(retVal, returnValuePtr, GetInternalMask(),
|
||||
PointerType::GetUniform(returnType));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -827,7 +876,7 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
|
||||
std::vector<Symbol *> mm;
|
||||
m->symbolTable->LookupFunction("__movmsk", &mm);
|
||||
// There should be one with signed int signature, one unsigned int.
|
||||
assert(mm.size() == 2);
|
||||
Assert(mm.size() == 2);
|
||||
// We can actually call either one, since both are i32s as far as
|
||||
// LLVM's type system is concerned...
|
||||
llvm::Function *fmm = mm[0]->function;
|
||||
@@ -876,7 +925,7 @@ FunctionEmitContext::CreateBasicBlock(const char *name) {
|
||||
llvm::Value *
|
||||
FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
||||
if (b == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -922,7 +971,7 @@ lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
|
||||
|
||||
void
|
||||
FunctionEmitContext::AddInstrumentationPoint(const char *note) {
|
||||
assert(note != NULL);
|
||||
Assert(note != NULL);
|
||||
if (!g->emitInstrumentation)
|
||||
return;
|
||||
|
||||
@@ -990,7 +1039,7 @@ FunctionEmitContext::StartScope() {
|
||||
void
|
||||
FunctionEmitContext::EndScope() {
|
||||
if (m->diBuilder != NULL) {
|
||||
assert(debugScopes.size() > 0);
|
||||
Assert(debugScopes.size() > 0);
|
||||
debugScopes.pop_back();
|
||||
}
|
||||
}
|
||||
@@ -998,7 +1047,7 @@ FunctionEmitContext::EndScope() {
|
||||
|
||||
llvm::DIScope
|
||||
FunctionEmitContext::GetDIScope() const {
|
||||
assert(debugScopes.size() > 0);
|
||||
Assert(debugScopes.size() > 0);
|
||||
return debugScopes.back();
|
||||
}
|
||||
|
||||
@@ -1059,7 +1108,7 @@ lArrayVectorWidth(LLVM_TYPE_CONST llvm::Type *t) {
|
||||
// to things like FunctionEmitContext::BinaryOperator() as operands.
|
||||
LLVM_TYPE_CONST llvm::VectorType *vectorElementType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType());
|
||||
assert((vectorElementType != NULL &&
|
||||
Assert((vectorElementType != NULL &&
|
||||
(int)vectorElementType->getNumElements() == g->target.vectorWidth));
|
||||
|
||||
return (int)arrayType->getNumElements();
|
||||
@@ -1071,11 +1120,11 @@ FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
|
||||
llvm::Value *v0, llvm::Value *v1,
|
||||
const char *name) {
|
||||
if (v0 == NULL || v1 == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(v0->getType() == v1->getType());
|
||||
Assert(v0->getType() == v1->getType());
|
||||
LLVM_TYPE_CONST llvm::Type *type = v0->getType();
|
||||
int arraySize = lArrayVectorWidth(type);
|
||||
if (arraySize == 0) {
|
||||
@@ -1103,7 +1152,7 @@ FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
|
||||
llvm::Value *
|
||||
FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
|
||||
if (v == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1139,12 +1188,12 @@ static LLVM_TYPE_CONST llvm::Type *
|
||||
lGetMatchingBoolVectorType(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
LLVM_TYPE_CONST llvm::ArrayType *arrayType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
|
||||
assert(arrayType != NULL);
|
||||
Assert(arrayType != NULL);
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *vectorElementType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType());
|
||||
assert(vectorElementType != NULL);
|
||||
assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
|
||||
Assert(vectorElementType != NULL);
|
||||
Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *base =
|
||||
llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth);
|
||||
@@ -1158,11 +1207,11 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
|
||||
llvm::Value *v0, llvm::Value *v1,
|
||||
const char *name) {
|
||||
if (v0 == NULL || v1 == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(v0->getType() == v1->getType());
|
||||
Assert(v0->getType() == v1->getType());
|
||||
LLVM_TYPE_CONST llvm::Type *type = v0->getType();
|
||||
int arraySize = lArrayVectorWidth(type);
|
||||
if (arraySize == 0) {
|
||||
@@ -1189,7 +1238,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
|
||||
llvm::Value *
|
||||
FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1224,7 +1273,7 @@ FunctionEmitContext::BitCastInst(llvm::Value *value,
|
||||
LLVM_TYPE_CONST llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1238,7 +1287,7 @@ FunctionEmitContext::BitCastInst(llvm::Value *value,
|
||||
llvm::Value *
|
||||
FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1259,7 +1308,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value,
|
||||
LLVM_TYPE_CONST llvm::Type *toType,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1272,7 +1321,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value,
|
||||
else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
|
||||
return TruncInst(value, toType, "ptr_to_int");
|
||||
else {
|
||||
assert(fromType->getScalarSizeInBits() <
|
||||
Assert(fromType->getScalarSizeInBits() <
|
||||
toType->getScalarSizeInBits());
|
||||
return ZExtInst(value, toType, "ptr_to_int");
|
||||
}
|
||||
@@ -1290,7 +1339,7 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value,
|
||||
LLVM_TYPE_CONST llvm::Type *toType,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1303,7 +1352,7 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value,
|
||||
else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
|
||||
return TruncInst(value, toType, "int_to_ptr");
|
||||
else {
|
||||
assert(fromType->getScalarSizeInBits() <
|
||||
Assert(fromType->getScalarSizeInBits() <
|
||||
toType->getScalarSizeInBits());
|
||||
return ZExtInst(value, toType, "int_to_ptr");
|
||||
}
|
||||
@@ -1320,7 +1369,7 @@ llvm::Instruction *
|
||||
FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1337,7 +1386,7 @@ llvm::Instruction *
|
||||
FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
|
||||
LLVM_TYPE_CONST llvm::Type *type, const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1354,7 +1403,7 @@ llvm::Instruction *
|
||||
FunctionEmitContext::FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1371,7 +1420,7 @@ llvm::Instruction *
|
||||
FunctionEmitContext::SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1388,7 +1437,7 @@ llvm::Instruction *
|
||||
FunctionEmitContext::ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
const char *name) {
|
||||
if (value == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1461,7 +1510,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
|
||||
// index must be varying for this method to be called.
|
||||
bool baseIsUniform =
|
||||
(llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(basePtr->getType()));
|
||||
assert(baseIsUniform == false || indexIsVarying == true);
|
||||
Assert(baseIsUniform == false || indexIsVarying == true);
|
||||
llvm::Value *varyingPtr = baseIsUniform ?
|
||||
SmearUniform(basePtr, "ptr_smear") : basePtr;
|
||||
|
||||
@@ -1474,13 +1523,13 @@ llvm::Value *
|
||||
FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
|
||||
const Type *ptrType, const char *name) {
|
||||
if (basePtr == NULL || index == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
||||
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
||||
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
Assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
|
||||
bool indexIsVaryingType =
|
||||
llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index->getType());
|
||||
@@ -1512,13 +1561,13 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0
|
||||
llvm::Value *index1, const Type *ptrType,
|
||||
const char *name) {
|
||||
if (basePtr == NULL || index0 == NULL || index1 == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
||||
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
||||
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
Assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
|
||||
bool index0IsVaryingType =
|
||||
llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index0->getType());
|
||||
@@ -1551,7 +1600,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0
|
||||
// out the type of ptr0.
|
||||
const Type *baseType = ptrType->GetBaseType();
|
||||
const SequentialType *st = dynamic_cast<const SequentialType *>(baseType);
|
||||
assert(st != NULL);
|
||||
Assert(st != NULL);
|
||||
|
||||
bool ptr0IsUniform =
|
||||
llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(ptr0->getType());
|
||||
@@ -1586,7 +1635,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *basePtr, int elementNum,
|
||||
|
||||
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
||||
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
||||
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
Assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
|
||||
// Otherwise do the math to find the offset and add it to the given
|
||||
// varying pointers
|
||||
@@ -1598,14 +1647,14 @@ FunctionEmitContext::AddElementOffset(llvm::Value *basePtr, int elementNum,
|
||||
// us the offset in bytes to the given element of the structure
|
||||
offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum);
|
||||
else {
|
||||
// Otherwise we should have a vector here and the offset is given
|
||||
// by the element number times the size of the element type of the
|
||||
// vector.
|
||||
const VectorType *vt =
|
||||
dynamic_cast<const VectorType *>(ptrType->GetBaseType());
|
||||
assert(vt != NULL);
|
||||
// Otherwise we should have a vector or array here and the offset
|
||||
// is given by the element number times the size of the element
|
||||
// type of the vector.
|
||||
const SequentialType *st =
|
||||
dynamic_cast<const SequentialType *>(ptrType->GetBaseType());
|
||||
Assert(st != NULL);
|
||||
llvm::Value *size =
|
||||
g->target.SizeOf(vt->GetElementType()->LLVMType(g->ctx));
|
||||
g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx));
|
||||
llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ?
|
||||
LLVMInt32(elementNum) : LLVMInt64(elementNum);
|
||||
offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
|
||||
@@ -1627,13 +1676,13 @@ FunctionEmitContext::AddElementOffset(llvm::Value *basePtr, int elementNum,
|
||||
llvm::Value *
|
||||
FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
|
||||
if (ptr == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
LLVM_TYPE_CONST llvm::PointerType *pt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(ptr->getType());
|
||||
assert(pt != NULL);
|
||||
Assert(pt != NULL);
|
||||
|
||||
// FIXME: it's not clear to me that we generate unaligned vector loads
|
||||
// of varying stuff out of the front-end any more. (Only by the
|
||||
@@ -1654,16 +1703,16 @@ llvm::Value *
|
||||
FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
|
||||
const Type *ptrType, const char *name) {
|
||||
if (ptr == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(ptrType != NULL && mask != NULL);
|
||||
Assert(ptrType != NULL && mask != NULL);
|
||||
|
||||
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
||||
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
||||
|
||||
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
Assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
|
||||
if (ptrType->IsUniformType()) {
|
||||
// FIXME: same issue as above load inst regarding alignment...
|
||||
@@ -1691,7 +1740,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
|
||||
else {
|
||||
// Otherwise we should have a varying ptr and it's time for a
|
||||
// gather.
|
||||
return gather(ptr, ptrType, mask, name);
|
||||
return gather(ptr, ptrType, GetFullMask(), name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1700,7 +1749,7 @@ llvm::Value *
|
||||
FunctionEmitContext::gather(llvm::Value *ptr, const Type *ptrType,
|
||||
llvm::Value *mask, const char *name) {
|
||||
// We should have a varying lvalue if we get here...
|
||||
assert(ptrType->IsVaryingType() &&
|
||||
Assert(ptrType->IsVaryingType() &&
|
||||
ptr->getType() == LLVMTypes::VoidPointerVectorType);
|
||||
|
||||
const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
|
||||
@@ -1749,13 +1798,13 @@ FunctionEmitContext::gather(llvm::Value *ptr, const Type *ptrType,
|
||||
funcName = g->target.is32Bit ? "__pseudo_gather32_16" :
|
||||
"__pseudo_gather64_16";
|
||||
else {
|
||||
assert(llvmReturnType == LLVMTypes::Int8VectorType);
|
||||
Assert(llvmReturnType == LLVMTypes::Int8VectorType);
|
||||
funcName = g->target.is32Bit ? "__pseudo_gather32_8" :
|
||||
"__pseudo_gather64_8";
|
||||
}
|
||||
|
||||
llvm::Function *gatherFunc = m->module->getFunction(funcName);
|
||||
assert(gatherFunc != NULL);
|
||||
Assert(gatherFunc != NULL);
|
||||
|
||||
llvm::Value *call = CallInst(gatherFunc, NULL, ptr, mask, name);
|
||||
|
||||
@@ -1804,12 +1853,17 @@ llvm::Value *
|
||||
FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType,
|
||||
const char *name, int align,
|
||||
bool atEntryBlock) {
|
||||
if (llvmType == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
llvm::AllocaInst *inst = NULL;
|
||||
if (atEntryBlock) {
|
||||
// We usually insert it right before the jump instruction at the
|
||||
// end of allocaBlock
|
||||
llvm::Instruction *retInst = allocaBlock->getTerminator();
|
||||
assert(retInst);
|
||||
Assert(retInst);
|
||||
inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
|
||||
}
|
||||
else
|
||||
@@ -1845,12 +1899,12 @@ void
|
||||
FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
const Type *ptrType, llvm::Value *mask) {
|
||||
if (value == NULL || ptr == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
assert(ptrType->IsUniformType());
|
||||
Assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
Assert(ptrType->IsUniformType());
|
||||
|
||||
const Type *valueType = ptrType->GetBaseType();
|
||||
const CollectionType *collectionType =
|
||||
@@ -1872,7 +1926,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
|
||||
// We must have a regular atomic, enumerator, or pointer type at this
|
||||
// point.
|
||||
assert(dynamic_cast<const AtomicType *>(valueType) != NULL ||
|
||||
Assert(dynamic_cast<const AtomicType *>(valueType) != NULL ||
|
||||
dynamic_cast<const EnumType *>(valueType) != NULL ||
|
||||
dynamic_cast<const PointerType *>(valueType) != NULL);
|
||||
valueType = valueType->GetAsNonConstType();
|
||||
@@ -1918,7 +1972,7 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
ptr = BitCastInst(ptr, LLVMTypes::Int8VectorPointerType,
|
||||
"ptr_to_int8vecptr");
|
||||
}
|
||||
assert(maskedStoreFunc != NULL);
|
||||
Assert(maskedStoreFunc != NULL);
|
||||
|
||||
std::vector<llvm::Value *> args;
|
||||
args.push_back(ptr);
|
||||
@@ -1938,13 +1992,13 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
||||
void
|
||||
FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
|
||||
const Type *ptrType, llvm::Value *mask) {
|
||||
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
assert(ptrType->IsVaryingType());
|
||||
Assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
||||
Assert(ptrType->IsVaryingType());
|
||||
|
||||
const Type *valueType = ptrType->GetBaseType();
|
||||
|
||||
// I think this should be impossible
|
||||
assert(dynamic_cast<const ArrayType *>(valueType) == NULL);
|
||||
Assert(dynamic_cast<const ArrayType *>(valueType) == NULL);
|
||||
|
||||
const CollectionType *collectionType = dynamic_cast<const CollectionType *>(valueType);
|
||||
if (collectionType != NULL) {
|
||||
@@ -1963,7 +2017,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
|
||||
const PointerType *pt = dynamic_cast<const PointerType *>(valueType);
|
||||
|
||||
// And everything should be a pointer or atomic from here on out...
|
||||
assert(pt != NULL ||
|
||||
Assert(pt != NULL ||
|
||||
dynamic_cast<const AtomicType *>(valueType) != NULL);
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *type = value->getType();
|
||||
@@ -1991,7 +2045,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
|
||||
"__pseudo_scatter64_8";
|
||||
|
||||
llvm::Function *scatterFunc = m->module->getFunction(funcName);
|
||||
assert(scatterFunc != NULL);
|
||||
Assert(scatterFunc != NULL);
|
||||
|
||||
AddInstrumentationPoint("scatter");
|
||||
|
||||
@@ -2008,7 +2062,7 @@ void
|
||||
FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
|
||||
if (value == NULL || ptr == NULL) {
|
||||
// may happen due to error elsewhere
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2032,7 +2086,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
|
||||
llvm::Value *mask, const Type *ptrType) {
|
||||
if (value == NULL || ptr == NULL) {
|
||||
// may happen due to error elsewhere
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2044,7 +2098,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
|
||||
if (ptrType->GetBaseType()->IsUniformType())
|
||||
// the easy case
|
||||
StoreInst(value, ptr);
|
||||
else if (mask == LLVMMaskAllOn)
|
||||
else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
|
||||
// Otherwise it is a masked store unless we can determine that the
|
||||
// mask is all on... (Unclear if this check is actually useful.)
|
||||
StoreInst(value, ptr);
|
||||
@@ -2052,10 +2106,10 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
|
||||
maskedStore(value, ptr, ptrType, mask);
|
||||
}
|
||||
else {
|
||||
assert(ptrType->IsVaryingType());
|
||||
Assert(ptrType->IsVaryingType());
|
||||
// We have a varying ptr (an array of pointers), so it's time to
|
||||
// scatter
|
||||
scatter(value, ptr, ptrType, mask);
|
||||
scatter(value, ptr, ptrType, GetFullMask());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2072,7 +2126,7 @@ FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock,
|
||||
llvm::BasicBlock *falseBlock,
|
||||
llvm::Value *test) {
|
||||
if (test == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2085,7 +2139,7 @@ FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock,
|
||||
llvm::Value *
|
||||
FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
|
||||
if (v == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2105,7 +2159,7 @@ llvm::Value *
|
||||
FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
||||
const char *name) {
|
||||
if (v == NULL || eltVal == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2138,7 +2192,7 @@ llvm::Instruction *
|
||||
FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
|
||||
llvm::Value *val1, const char *name) {
|
||||
if (test == NULL || val0 == NULL || val1 == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2169,7 +2223,7 @@ lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
|
||||
ft = llvm::dyn_cast<LLVM_TYPE_CONST llvm::FunctionType>(pt->getElementType());
|
||||
}
|
||||
|
||||
assert(ft != NULL);
|
||||
Assert(ft != NULL);
|
||||
return ft->getNumParams();
|
||||
}
|
||||
|
||||
@@ -2179,7 +2233,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
const std::vector<llvm::Value *> &args,
|
||||
const char *name) {
|
||||
if (func == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2188,7 +2242,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
// isn't the case for things like intrinsics, builtins, and extern "C"
|
||||
// functions from the application. Add the mask if it's needed.
|
||||
unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
|
||||
assert(argVals.size() + 1 == calleeArgCount ||
|
||||
Assert(argVals.size() + 1 == calleeArgCount ||
|
||||
argVals.size() == calleeArgCount);
|
||||
if (argVals.size() + 1 == calleeArgCount)
|
||||
argVals.push_back(GetFullMask());
|
||||
@@ -2259,7 +2313,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
llvm::Value *currentMask = LoadInst(maskPtr);
|
||||
llvm::Function *cttz =
|
||||
m->module->getFunction("__count_trailing_zeros_i32");
|
||||
assert(cttz != NULL);
|
||||
Assert(cttz != NULL);
|
||||
llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask),
|
||||
"first_lane");
|
||||
|
||||
@@ -2306,12 +2360,12 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
// Now, do a masked store into the memory allocated to
|
||||
// accumulate the result using the call mask.
|
||||
if (callResult != NULL) {
|
||||
assert(resultPtr != NULL);
|
||||
Assert(resultPtr != NULL);
|
||||
StoreInst(callResult, resultPtr, callMask,
|
||||
PointerType::GetUniform(returnType));
|
||||
}
|
||||
else
|
||||
assert(resultPtr == NULL);
|
||||
Assert(resultPtr == NULL);
|
||||
|
||||
// Update the mask to turn off the program instances for which
|
||||
// we just called the function.
|
||||
@@ -2371,7 +2425,7 @@ FunctionEmitContext::ReturnInst() {
|
||||
rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
|
||||
}
|
||||
else {
|
||||
assert(function->GetReturnType() == AtomicType::Void);
|
||||
Assert(function->GetReturnType() == AtomicType::Void);
|
||||
rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
|
||||
}
|
||||
|
||||
@@ -2386,25 +2440,25 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount) {
|
||||
if (callee == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
launchedTasks = true;
|
||||
|
||||
assert(llvm::isa<llvm::Function>(callee));
|
||||
Assert(llvm::isa<llvm::Function>(callee));
|
||||
LLVM_TYPE_CONST llvm::Type *argType =
|
||||
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
||||
assert(llvm::PointerType::classof(argType));
|
||||
Assert(llvm::PointerType::classof(argType));
|
||||
LLVM_TYPE_CONST llvm::PointerType *pt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(argType);
|
||||
assert(llvm::StructType::classof(pt->getElementType()));
|
||||
Assert(llvm::StructType::classof(pt->getElementType()));
|
||||
LLVM_TYPE_CONST llvm::StructType *argStructType =
|
||||
static_cast<LLVM_TYPE_CONST llvm::StructType *>(pt->getElementType());
|
||||
assert(argStructType->getNumElements() == argVals.size() + 1);
|
||||
Assert(argStructType->getNumElements() == argVals.size() + 1);
|
||||
|
||||
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||
assert(falloc != NULL);
|
||||
Assert(falloc != NULL);
|
||||
llvm::Value *structSize = g->target.SizeOf(argStructType);
|
||||
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
||||
@@ -2439,7 +2493,7 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
// argument block we just filled in
|
||||
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||
assert(flaunch != NULL);
|
||||
Assert(flaunch != NULL);
|
||||
std::vector<llvm::Value *> args;
|
||||
args.push_back(launchGroupHandlePtr);
|
||||
args.push_back(fptr);
|
||||
@@ -2488,12 +2542,13 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
|
||||
const Type *ptrType) {
|
||||
// This should only be called for varying pointers
|
||||
const PointerType *pt = dynamic_cast<const PointerType *>(ptrType);
|
||||
assert(pt && pt->IsVaryingType());
|
||||
Assert(pt && pt->IsVaryingType());
|
||||
|
||||
const Type *baseType = ptrType->GetBaseType();
|
||||
assert(dynamic_cast<const AtomicType *>(baseType) != NULL ||
|
||||
dynamic_cast<const EnumType *>(baseType) != NULL ||
|
||||
dynamic_cast<const PointerType *>(baseType));
|
||||
if (dynamic_cast<const AtomicType *>(baseType) == NULL &&
|
||||
dynamic_cast<const EnumType *>(baseType) == NULL &&
|
||||
dynamic_cast<const PointerType *>(baseType) == NULL)
|
||||
return ptr;
|
||||
if (baseType->IsUniformType())
|
||||
return ptr;
|
||||
|
||||
|
||||
6
ctx.h
6
ctx.h
@@ -98,9 +98,9 @@ public:
|
||||
the function entry mask and the internal mask. */
|
||||
llvm::Value *GetFullMask();
|
||||
|
||||
/** Provides the alloca'd pointer to memory to store the full function
|
||||
mask. This is only used to wire up the __mask builtin variable. */
|
||||
void SetMaskPointer(llvm::Value *p);
|
||||
/** Returns a pointer to storage in memory that stores the current full
|
||||
mask. */
|
||||
llvm::Value *GetFullMaskPointer();
|
||||
|
||||
/** Provides the value of the mask at function entry */
|
||||
void SetFunctionMask(llvm::Value *val);
|
||||
|
||||
14
decl.cpp
14
decl.cpp
@@ -216,7 +216,7 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
return NULL;
|
||||
|
||||
Symbol *declSym = GetSymbol();
|
||||
assert(declSym != NULL);
|
||||
Assert(declSym != NULL);
|
||||
|
||||
// Get the symbol for the function from the symbol table. (It should
|
||||
// already have been added to the symbol table by AddGlobal() by the
|
||||
@@ -232,11 +232,11 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
Declarator *d = this;
|
||||
while (d != NULL && d->kind != DK_FUNCTION)
|
||||
d = d->child;
|
||||
assert(d != NULL);
|
||||
Assert(d != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
|
||||
Declaration *pdecl = d->functionParams[i];
|
||||
assert(pdecl->declarators.size() == 1);
|
||||
Assert(pdecl->declarators.size() == 1);
|
||||
funArgs->push_back(pdecl->declarators[0]->GetSymbol());
|
||||
}
|
||||
|
||||
@@ -263,8 +263,8 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
case DK_BASE:
|
||||
// All of the type qualifiers should be in the DeclSpecs for the
|
||||
// base declarator
|
||||
assert(typeQualifiers == 0);
|
||||
assert(child == NULL);
|
||||
Assert(typeQualifiers == 0);
|
||||
Assert(child == NULL);
|
||||
return type;
|
||||
|
||||
case DK_POINTER:
|
||||
@@ -376,7 +376,7 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
// it lives down to the base declarator.
|
||||
Declarator *decl = d->declarators[0];
|
||||
while (decl->child != NULL) {
|
||||
assert(decl->initExpr == NULL);
|
||||
Assert(decl->initExpr == NULL);
|
||||
decl = decl->child;
|
||||
}
|
||||
|
||||
@@ -485,7 +485,7 @@ Declaration::Declaration(DeclSpecs *ds, Declarator *d) {
|
||||
|
||||
std::vector<VariableDeclaration>
|
||||
Declaration::GetVariableDeclarations() const {
|
||||
assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
Assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
std::vector<VariableDeclaration> vars;
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
|
||||
@@ -1,3 +1,33 @@
|
||||
=== v1.1.1 === (15 December 2011)
|
||||
|
||||
This release doesn't include any significant new functionality, but does
|
||||
include a small improvements in generated code and a number of bug fixes.
|
||||
|
||||
The one user-visible language change is that integer constants may be
|
||||
specified with 'u' and 'l' suffixes, like in C. For example, "1024llu"
|
||||
defines the constant with unsigned 64-bit type.
|
||||
|
||||
More informative and useful error messages are printed when function
|
||||
overload resolution fails.
|
||||
|
||||
Masking is avoided in additional cases when the mask can be
|
||||
statically-determined to be all on.
|
||||
|
||||
A number of small bugs have been fixed:
|
||||
- Under some circumstances, incorrect masks were used when assigning a
|
||||
value to a reference and when doing gathers/scatters.
|
||||
- Incorrect code could be generated in some cases when some instances
|
||||
returned part way through a function but others contineud executing.
|
||||
- Type checking wasn't being performed for calls through function pointers;
|
||||
now an error is issued if the arguments don't match up, etc.
|
||||
- Incorrect code was being generated for gather/scatter to structs that had
|
||||
elements with varying short-vector types.
|
||||
- Typechecking wasn't being performed for "foreach" statements; this led to
|
||||
problems like function overload resolution not being performed if an
|
||||
overloaded function call was used to determine the iteration range..
|
||||
- A number of symbols would be multiply-defined when compiling to multiple
|
||||
targets and using the sse2-x2 target as one of them (issue #131).
|
||||
|
||||
=== v1.1.0 === (5 December 2011)
|
||||
|
||||
This is a major new release of the compiler, with significant additions to
|
||||
|
||||
@@ -273,10 +273,10 @@ Then four object files will be generated: ``foo_sse2.o``, ``foo_sse4.o``,
|
||||
``foo_avx.o``, and ``foo.o``.[#]_ Link all of these into your executable, and
|
||||
when you call a function in ``foo.ispc`` from your application code,
|
||||
``ispc`` will determine which instruction sets are supported by the CPU the
|
||||
code is running on and will call the most appropraite version of the
|
||||
code is running on and will call the most appropriate version of the
|
||||
function available.
|
||||
|
||||
.. [#] Similarly, if you choose to generate assembly langauage output or
|
||||
.. [#] Similarly, if you choose to generate assembly language output or
|
||||
LLVM bitcode output, multiple versions of those files will be created.
|
||||
|
||||
In general, the version of the function that runs will be the one in the
|
||||
|
||||
135
docs/ispc.txt
135
docs/ispc.txt
@@ -26,9 +26,9 @@ The main goals behind ``ispc`` are to:
|
||||
units without the extremely low-programmer-productivity activity of directly
|
||||
writing intrinsics.
|
||||
* Explore opportunities from close-coupling between C/C++ application code
|
||||
and SPMD ``ispc`` code running on the same processor--lightweight funcion
|
||||
calls betwen the two languages, sharing data directly via pointers without
|
||||
copying or reformating, etc.
|
||||
and SPMD ``ispc`` code running on the same processor--lightweight function
|
||||
calls between the two languages, sharing data directly via pointers without
|
||||
copying or reformatting, etc.
|
||||
|
||||
**We are very interested in your feedback and comments about ispc and
|
||||
in hearing your experiences using the system. We are especially interested
|
||||
@@ -146,8 +146,6 @@ Contents:
|
||||
+ `Restructuring Existing Programs to Use ISPC`_
|
||||
+ `Understanding How to Interoperate With the Application's Data`_
|
||||
|
||||
* `Related Languages`_
|
||||
|
||||
* `Disclaimer and Legal Information`_
|
||||
|
||||
* `Optimization Notice`_
|
||||
@@ -251,7 +249,7 @@ of the value.
|
||||
The first thing to notice in this program is the presence of the ``export``
|
||||
keyword in the function definition; this indicates that the function should
|
||||
be made available to be called from application code. The ``uniform``
|
||||
qualifiers on the parameters to ``simple`` indicate that the correpsonding
|
||||
qualifiers on the parameters to ``simple`` indicate that the corresponding
|
||||
variables are non-vector quantities--this concept is discussed in detail in the
|
||||
`"uniform" and "varying" Qualifiers`_ section.
|
||||
|
||||
@@ -323,7 +321,7 @@ When the executable ``simple`` runs, it generates the expected output:
|
||||
...
|
||||
|
||||
For a slightly more complex example of using ``ispc``, see the `Mandelbrot
|
||||
set example`_ page on the ``ispc`` website for a walkthrough of an ``ispc``
|
||||
set example`_ page on the ``ispc`` website for a walk-through of an ``ispc``
|
||||
implementation of that algorithm. After reading through that example, you
|
||||
may want to examine the source code of the various examples in the
|
||||
``examples/`` directory of the ``ispc`` distribution.
|
||||
@@ -374,7 +372,7 @@ Optimizations are on by default; they can be turned off with ``-O0``:
|
||||
On Mac\* and Linux\*, there is basic support for generating debugging
|
||||
symbols; this is enabled with the ``-g`` command-line flag. Using ``-g``
|
||||
causes optimizations to be disabled; to compile with debugging symbols and
|
||||
optimizaion, ``-O1`` should be provided as well as the ``-g`` flag.
|
||||
optimization, ``-O1`` should be provided as well as the ``-g`` flag.
|
||||
|
||||
The ``-h`` flag can also be used to direct ``ispc`` to generate a C/C++
|
||||
header file that includes C/C++ declarations of the C-callable ``ispc``
|
||||
@@ -402,7 +400,7 @@ which sets the target architecture, ``--cpu``, which sets the target CPU,
|
||||
and ``--target``, which sets the target instruction set.
|
||||
|
||||
By default, the ``ispc`` compiler generates code for the 64-bit x86-64
|
||||
architecture (i.e. ``--arch=x86-64`.) To compile to a 32-bit x86 target,
|
||||
architecture (i.e. ``--arch=x86-64``.) To compile to a 32-bit x86 target,
|
||||
supply ``--arch=x86`` on the command line:
|
||||
|
||||
::
|
||||
@@ -473,6 +471,9 @@ preprocessor runs:
|
||||
* - ISPC_TARGET_{SSE2,SSE4,AVX}
|
||||
- 1
|
||||
- One of these will be set, depending on the compilation target.
|
||||
* - ISPC_POINTER_SIZE
|
||||
- 32 or 64
|
||||
- Number of bits used to represent a pointer for the target architecture.
|
||||
* - ISPC_MAJOR_VERSION
|
||||
- 1
|
||||
- Major version of the ``ispc`` compiler/language
|
||||
@@ -525,8 +526,8 @@ Basic Concepts: Program Instances and Gangs of Program Instances
|
||||
Upon entry to a ``ispc`` function called from C/C++ code, the execution
|
||||
model switches from the application's serial model to ``ispc``'s execution
|
||||
model. Conceptually, a number of ``ispc`` *program instances* start
|
||||
running in concurrently. The group of running program instances is a
|
||||
called *gang* (harkening to "gang scheduling", since ``ispc`` provides
|
||||
running concurrently. The group of running program instances is a
|
||||
called a *gang* (harkening to "gang scheduling", since ``ispc`` provides
|
||||
certain guarantees about the control flow coherence of program instances
|
||||
running in a gang, detailed in `Gang Convergence Guarantees`_.) An
|
||||
``ispc`` program instance is thus similar to a CUDA* "thread" or an OpenCL*
|
||||
@@ -609,7 +610,7 @@ side-effects.
|
||||
|
||||
Upon entry to an ``ispc`` function called by the application, the execution
|
||||
mask is "all on" and the program counter points at the first statement in
|
||||
the function. The following two statments describe the required behavior
|
||||
the function. The following two statements describe the required behavior
|
||||
of the program counter and the execution mask over the course of execution
|
||||
of an ``ispc`` function.
|
||||
|
||||
@@ -730,7 +731,7 @@ program instances is *maximally converged*. Maximal convergence means that
|
||||
if two program instances follow the same control path, they are guaranteed
|
||||
to execute each program statement concurrently. If two program instances
|
||||
follow diverging control paths, it is guaranteed that they will reconverge
|
||||
as soon as possible (if they do later reconverge). [#]_
|
||||
as soon as possible in the function (if they do later reconverge). [#]_
|
||||
|
||||
.. [#] This is another significant difference between the ``ispc``
|
||||
execution model and the one implemented by OpenCL* and CUDA*, which
|
||||
@@ -754,9 +755,25 @@ It is guaranteed that all program instances that were running before the
|
||||
for the gang of program instances, rather than the concept of a unique
|
||||
program counter for each program instance.)
|
||||
|
||||
Another implication of this property is that it is illegal to execute a
|
||||
function with an 8-wide gang by running it two times, with a 4-wide gang
|
||||
representing half of the original 8-wide gang each time.
|
||||
Another implication of this property is that it would be illegal for the
|
||||
``ispc`` implementation to execute a function with an 8-wide gang by
|
||||
running it two times, with a 4-wide gang representing half of the original
|
||||
8-wide gang each time.
|
||||
|
||||
It also follows that given the following program:
|
||||
|
||||
::
|
||||
|
||||
if (programIndex == 0) {
|
||||
while (true) // infinite loop
|
||||
;
|
||||
}
|
||||
print("hello, world\n");
|
||||
|
||||
the program will loop infinitely and the ``print`` statement will never be
|
||||
executed. (A different execution model that allowed gang divergence might
|
||||
execute the ``print`` statement since not all program instances were caught
|
||||
in the infinite loop in the example above.)
|
||||
|
||||
The way that "varying" function pointers are handled in ``ispc`` is also
|
||||
affected by this guarantee: if a function pointer is ``varying``, then it
|
||||
@@ -802,7 +819,7 @@ of control flow, will say that control flow based on ``varying``
|
||||
expressions is "varying" control flow.)
|
||||
|
||||
Consider for example an image filtering operation where the program loops
|
||||
over pixels adjacent to the given (x,y) coordiantes:
|
||||
over pixels adjacent to the given (x,y) coordinates:
|
||||
|
||||
::
|
||||
|
||||
@@ -902,7 +919,7 @@ for all program instances in the gang, it's possible that the "true" clause
|
||||
executed with an "all off" mask and ``b`` was modified there.
|
||||
|
||||
If it is important that code never be executed with an "all off" execution
|
||||
mask, then the ``cif`` statment (documented in the `"Coherent" Control Flow
|
||||
mask, then the ``cif`` statement (documented in the `"Coherent" Control Flow
|
||||
Statements: "cif" and Friends`_ section) can be used in place of a regular
|
||||
``if``, as it guarantees this property.
|
||||
|
||||
@@ -973,6 +990,20 @@ which of them will write their value of ``value`` to ``array[index]``.
|
||||
array[index] = value;
|
||||
}
|
||||
|
||||
As another example, if the values of the array indices ``i`` and ``j`` have
|
||||
the same values for some of the program instances, and an assignment like
|
||||
the following is performed:
|
||||
|
||||
::
|
||||
|
||||
int i = ..., j = ...;
|
||||
uniform int array[...] = { ... };
|
||||
array[i] = array[j];
|
||||
|
||||
|
||||
then the program's behavior is undefined, since there is no sequence point
|
||||
between the reads and writes to the same location.
|
||||
|
||||
While this rule that says that program instances can safely depend on
|
||||
side-effects from by other program instances in their gang eliminates a
|
||||
class of synchronization requirements imposed by some other SPMD languages,
|
||||
@@ -1014,7 +1045,7 @@ completed.
|
||||
The ISPC Language
|
||||
=================
|
||||
|
||||
``ispc`` is an extended verion of the C programming language, providing a
|
||||
``ispc`` is an extended version of the C programming language, providing a
|
||||
number of new features that make it easy to write high-performance SPMD
|
||||
programs for the CPU. Note that between not only the few small syntactic
|
||||
differences between ``ispc`` and C code but more importantly ``ispc``'s
|
||||
@@ -1035,12 +1066,12 @@ This subsection summarizes the differences between ``ispc`` and C; if you
|
||||
are already familiar with C, you may find it most effective to focus on
|
||||
this subsection and just focus on the topics in the remainder of section
|
||||
that introduce new language features. You may also find it helpful to
|
||||
comapre the ``ispc`` and C++ implementations of various algorithms in the
|
||||
compare the ``ispc`` and C++ implementations of various algorithms in the
|
||||
``ispc`` ``examples/`` directory to get a sense of the close relationship
|
||||
between ``ispc`` and C.
|
||||
|
||||
Specifically, C89 is used as the baseline for comparison in this subsection
|
||||
(this is also the verion of C described in the Second Edition of Kernighan
|
||||
(this is also the version of C described in the Second Edition of Kernighan
|
||||
and Ritchie's book). (``ispc`` adopts some features from C99 and from C++,
|
||||
which will be highlighted in the below.)
|
||||
|
||||
@@ -1068,7 +1099,7 @@ in C:
|
||||
statement itself (e.g. ``for (int i = 0; ...``)
|
||||
* The ``inline`` qualifier to indicate that a function should be inlined
|
||||
* Function overloading by parameter type
|
||||
* Hexidecimal floating-point constants
|
||||
* Hexadecimal floating-point constants
|
||||
|
||||
``ispc`` also adds a number of new features that aren't in C89, C99, or
|
||||
C++:
|
||||
@@ -1127,11 +1158,11 @@ The following reserved words from C89 are also reserved in ``ispc``:
|
||||
Lexical Structure
|
||||
-----------------
|
||||
|
||||
Tokens in ``ispc`` are delimted by white-space and comments. The
|
||||
Tokens in ``ispc`` are delimited by white-space and comments. The
|
||||
white-space characters are the usual set of spaces, tabs, and carriage
|
||||
returns/line feeds. Comments can be delinated with ``//``, which starts a
|
||||
returns/line feeds. Comments can be delineated with ``//``, which starts a
|
||||
comment that continues to the end of the line, or the start of a comment
|
||||
can be delinated with ``/*`` and the end with ``*/``. Like C/C++,
|
||||
can be delineated with ``/*`` and the end with ``*/``. Like C/C++,
|
||||
comments can't be nested.
|
||||
|
||||
Identifiers in ``ispc`` are sequences of characters that start with an
|
||||
@@ -1139,9 +1170,9 @@ underscore or an upper-case or lower-case letter, and then followed by
|
||||
zero or more letters, numbers, or underscores. Identifiers that start with
|
||||
two underscores are reserved for use by the compiler.
|
||||
|
||||
Integer numeric constants can be specified in base 10, hexidecimal, or
|
||||
Integer numeric constants can be specified in base 10, hexadecimal, or
|
||||
binary. (Octal integer constants aren't supported). Base 10 constants are
|
||||
given by a sequence of one or more digits from 0 to 9. Hexidecimal
|
||||
given by a sequence of one or more digits from 0 to 9. Hexadecimal
|
||||
constants are denoted by a leading ``0x`` and then one or more digits from
|
||||
0-9, a-f, or A-F. Finally, binary constants are denoted by a leading
|
||||
``0b`` and then a sequence of 1s and 0s.
|
||||
@@ -1163,11 +1194,11 @@ The second option is scientific notation, where a base value is specified
|
||||
as the first form of a floating-point constant but is then followed by an
|
||||
"e" or "E", then a plus sign or a minus sign, and then an exponent.
|
||||
|
||||
Finally, floating-point constants may be specified as hexidecimal
|
||||
Finally, floating-point constants may be specified as hexadecimal
|
||||
constants; this form can ensure a perfectly bit-accurate representation of
|
||||
a particular floating-point number. These are specified with an "0x"
|
||||
prefix, followed by a zero or a one, a period, and then the remainder of
|
||||
the mantissa in hexidecimal form, with digits from 0-9, a-f, or A-F. The
|
||||
the mantissa in hexadecimal form, with digits from 0-9, a-f, or A-F. The
|
||||
start of the exponent is denoted by a "p", which is then followed by an
|
||||
optional plus or minus sign and then digits from 0 to 9. For example:
|
||||
|
||||
@@ -1204,7 +1235,7 @@ to specify special characters. These sequences all start with an initial
|
||||
* - ``\n``
|
||||
- newline
|
||||
* - ``\r``
|
||||
- carriabe return
|
||||
- carriage return
|
||||
* - ``\t``
|
||||
- horizontal tab
|
||||
* - ``\v``
|
||||
@@ -1212,7 +1243,7 @@ to specify special characters. These sequences all start with an initial
|
||||
* - ``\`` followed by one or more digits from 0-8
|
||||
- ASCII character in octal notation
|
||||
* - ``\x``, followed by one or more digits from 0-9, a-f, A-F
|
||||
- ASCII character in hexidecimal notation
|
||||
- ASCII character in hexadecimal notation
|
||||
|
||||
``ispc`` doesn't support a string data type; string constants can be passed
|
||||
as the first argument to the ``print()`` statement, however. ``ispc`` also
|
||||
@@ -1367,7 +1398,7 @@ store are:
|
||||
uniform float bar[10];
|
||||
|
||||
The first declaration corresponds to 10 gang-wide ``float`` values in
|
||||
memory, while the second declaration corresonds to 10 ``float`` values.
|
||||
memory, while the second declaration corresponds to 10 ``float`` values.
|
||||
|
||||
|
||||
Defining New Names For Types
|
||||
@@ -1531,7 +1562,7 @@ instance in the gang has its own unique pointer value)
|
||||
|
||||
(The rationale for this limitation is that references must be represented
|
||||
as either a uniform pointer or a varying pointer internally. While
|
||||
choosing a varying pointer would provide maximum flexibilty and eliminate
|
||||
choosing a varying pointer would provide maximum flexibility and eliminate
|
||||
this restriction, it would reduce performance in the common case where a
|
||||
uniform pointer is all that's needed. As a work-around, a varying pointer
|
||||
can be used in cases where a varying lvalue reference would be desired.)
|
||||
@@ -1554,7 +1585,7 @@ and then a brace-delimited list of enumerators with optional values:
|
||||
|
||||
Each ``enum`` declaration defines a new type; an attempt to implicitly
|
||||
convert between enumerations of different types gives a compile-time error,
|
||||
but enuemrations of different types can be explicitly cast to one other.
|
||||
but enumerations of different types can be explicitly cast to one other.
|
||||
|
||||
::
|
||||
|
||||
@@ -1564,7 +1595,7 @@ Enumerators are implicitly converted to integer types, however, so they can
|
||||
be directly passed to routines that take integer parameters and can be used
|
||||
in expressions including integers, for example. However, the integer
|
||||
result of such an expression must be explicitly cast back to the enumerant
|
||||
type if it to be assigned to a variable with the enuemrant type.
|
||||
type if it to be assigned to a variable with the enumerant type.
|
||||
|
||||
::
|
||||
|
||||
@@ -1815,7 +1846,7 @@ Structures can also be initialized by providing element values in braces:
|
||||
....
|
||||
Color d = { 0.5, .75, 1.0 }; // r = 0.5, ...
|
||||
|
||||
Arrays of structures and arrays inside structures can be initialzed with
|
||||
Arrays of structures and arrays inside structures can be initialized with
|
||||
the expected syntax:
|
||||
|
||||
::
|
||||
@@ -1849,7 +1880,7 @@ Structure member access and array indexing also work as in C.
|
||||
return foo.f[4] - foo.i;
|
||||
|
||||
|
||||
The address-of operator, pointer derefernce operator, and pointer member
|
||||
The address-of operator, pointer dereference operator, and pointer member
|
||||
operator also work as expected.
|
||||
|
||||
::
|
||||
@@ -1894,7 +1925,7 @@ Basic Iteration Statements: "for", "while", and "do"
|
||||
|
||||
``ispc`` supports ``for``, ``while``, and ``do`` loops, with the same
|
||||
specification as in C. Like C++, variables can be declared in the ``for``
|
||||
statment itself:
|
||||
statement itself:
|
||||
|
||||
::
|
||||
|
||||
@@ -1978,7 +2009,7 @@ nested inside a ``foreach`` loop.) ``continue`` statements are legal in
|
||||
a program instances that executes a ``continue`` statement effectively
|
||||
skips over the rest of the loop body for the current iteration.
|
||||
|
||||
As a specific example, consdier the following ``foreach`` statement:
|
||||
As a specific example, consider the following ``foreach`` statement:
|
||||
|
||||
::
|
||||
|
||||
@@ -2076,7 +2107,7 @@ some computation on an array of data.
|
||||
}
|
||||
|
||||
Here, we've written a loop that explicitly loops over the data in chunks of
|
||||
``programCount`` elements. In each loop iteraton, the running program
|
||||
``programCount`` elements. In each loop iteration, the running program
|
||||
instances effectively collude amongst themselves using ``programIndex`` to
|
||||
determine which elements to work on in a way that ensures that all of the
|
||||
data elements will be processed. In this particular case, a ``foreach``
|
||||
@@ -2282,7 +2313,7 @@ distributions.
|
||||
If you are implementing your own task system, the remainder of this section
|
||||
discusses the requirements for these calls. You will also likely want to
|
||||
review the example task systems in ``examples/tasksys.cpp`` for reference.
|
||||
If you are not implmenting your own task system, you can skip reading the
|
||||
If you are not implementing your own task system, you can skip reading the
|
||||
remainder of this section.
|
||||
|
||||
Here are the declarations of the three functions that must be provided to
|
||||
@@ -2302,7 +2333,7 @@ implementation can efficiently wait for completion on just the tasks
|
||||
launched from a single function.
|
||||
|
||||
The first time one of ``ISPCLaunch()`` or ``ISPCAlloc()`` is called in an
|
||||
``ispc`` functon, the ``void *`` pointed to by the ``handlePtr`` parameter
|
||||
``ispc`` function, the ``void *`` pointed to by the ``handlePtr`` parameter
|
||||
will be ``NULL``. The implementations of these function should then
|
||||
initialize ``*handlePtr`` to a unique handle value of some sort. (For
|
||||
example, it might allocate a small structure to record which tasks were
|
||||
@@ -2318,14 +2349,14 @@ than a pointer to it, as in the other functions.
|
||||
|
||||
The ``ISPCAlloc()`` function is used to allocate small blocks of memory to
|
||||
store parameters passed to tasks. It should return a pointer to memory
|
||||
with the given aize and alignment. Note that there is no explicit
|
||||
with the given size and alignment. Note that there is no explicit
|
||||
``ISPCFree()`` call; instead, all memory allocated within an ``ispc``
|
||||
function should be freed when ``ISPCSync()`` is called.
|
||||
|
||||
``ISPCLaunch()`` is called to launch to launch one or more asynchronous
|
||||
tasks. Each ``launch`` statement in ``ispc`` code causes a call to
|
||||
``ISPCLaunch()`` to be emitted in the generated code. The three parameters
|
||||
after the handle pointer to thie function are relatively straightforward;
|
||||
after the handle pointer to the function are relatively straightforward;
|
||||
the ``void *f`` parameter holds a pointer to a function to call to run the
|
||||
work for this task, ``data`` holds a pointer to data to pass to this
|
||||
function, and ``count`` is the number of instances of this function to
|
||||
@@ -2340,7 +2371,7 @@ The signature of the provided function pointer ``f`` is
|
||||
int taskIndex, int taskCount)
|
||||
|
||||
When this function pointer is called by one of the hardware threads managed
|
||||
bythe task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
||||
by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
||||
be passed to it for its first parameter; ``threadCount`` gives the total
|
||||
number of hardware threads that have been spawned to run tasks and
|
||||
``threadIndex`` should be an integer index between zero and ``threadCount``
|
||||
@@ -2659,7 +2690,7 @@ generates the following output on a four-wide compilation target:
|
||||
When a varying variable is printed, the values for program instances that
|
||||
aren't currently executing are printed inside double parenthesis,
|
||||
indicating inactive program instances. The elements for inactive program
|
||||
instances may have garabge values, though in some circumstances it can be
|
||||
instances may have garbage values, though in some circumstances it can be
|
||||
useful to see their values.
|
||||
|
||||
Assertions
|
||||
@@ -2879,7 +2910,7 @@ If called when none of the program instances are running,
|
||||
There are also a number of functions to compute "scan"s of values across
|
||||
the program instances. For example, the ``exclusive_scan_and()`` function
|
||||
computes, for each program instance, the sum of the given value over all of
|
||||
the preceeding program instances. (The scans currently available in
|
||||
the preceding program instances. (The scans currently available in
|
||||
``ispc`` are all so-called "exclusive" scans, meaning that the value
|
||||
computed for a given element does not include the value provided for that
|
||||
element.) In C code, an exclusive add scan over an array might be
|
||||
@@ -3175,7 +3206,7 @@ rather than one per program instance.
|
||||
uniform int32 newval)
|
||||
|
||||
Be careful that you use the atomic function that you mean to; consider the
|
||||
folloiwng code:
|
||||
following code:
|
||||
|
||||
::
|
||||
|
||||
@@ -3532,7 +3563,7 @@ Restructuring Existing Programs to Use ISPC
|
||||
|
||||
``ispc`` is designed to enable you to incorporate
|
||||
SPMD parallelism into existing code with minimal modification; features
|
||||
like the ability to share memory and data structures betwen C/C++ and
|
||||
like the ability to share memory and data structures between C/C++ and
|
||||
``ispc`` code and the ability to directly call back and forth between
|
||||
``ispc`` and C/C++ are motivated by this. These features also make it
|
||||
easy to incrementally transform a program to use ``ispc``; the most
|
||||
@@ -3708,12 +3739,6 @@ elements to work with and then proceeds with the computation.
|
||||
}
|
||||
|
||||
|
||||
Related Languages
|
||||
=================
|
||||
|
||||
TODO: rsl, C*, IVL
|
||||
|
||||
|
||||
Disclaimer and Legal Information
|
||||
================================
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ also included in the ``examples/`` directory.)
|
||||
- ``ispc``, 1 core
|
||||
- ``ispc``, 4 cores
|
||||
* - `AOBench`_ (512 x 512 resolution)
|
||||
- 3.99x
|
||||
- 19.32x
|
||||
- 6.19x
|
||||
- 28.06x
|
||||
* - `Binomial Options`_ (128k options)
|
||||
- 7.94x
|
||||
- 33.43x
|
||||
@@ -31,23 +31,23 @@ also included in the ``examples/`` directory.)
|
||||
- 8.45x
|
||||
- 32.48x
|
||||
* - `Deferred Shading`_ (1280p)
|
||||
- n/a
|
||||
- 5.02x
|
||||
- 23.06x
|
||||
* - `Mandelbrot Set`_
|
||||
- 6.21x
|
||||
- 19.90x
|
||||
- 20.28x
|
||||
* - `Perlin Noise Function`_
|
||||
- 5.37x
|
||||
- n/a
|
||||
* - `Ray Tracer`_ (Sponza dataset)
|
||||
- 3.99x
|
||||
- 19.32x
|
||||
- 4.31x
|
||||
- 20.29x
|
||||
* - `3D Stencil`_
|
||||
- 3.76x
|
||||
- 13.79x
|
||||
- 4.05x
|
||||
- 15.53x
|
||||
* - `Volume Rendering`_
|
||||
- 3.11x
|
||||
- 15.80x
|
||||
- 3.60x
|
||||
- 17.53x
|
||||
|
||||
|
||||
.. _AOBench: https://github.com/ispc/ispc/tree/master/examples/aobench
|
||||
|
||||
@@ -64,7 +64,7 @@ on each one:
|
||||
Depending on the specifics of the computation being performed, the code
|
||||
generated for this function could likely be improved by modifying the code
|
||||
so that the loop only goes as far through the data as is possible to pack
|
||||
an entire gang of program instances with computation each time thorugh the
|
||||
an entire gang of program instances with computation each time through the
|
||||
loop. Doing so enables the ``ispc`` compiler to generate more efficient
|
||||
code for cases where it knows that the execution mask is "all on". Then,
|
||||
an ``if`` statement at the end handles processing the ragged extra bits of
|
||||
@@ -153,7 +153,7 @@ processed, and so forth.
|
||||
|
||||
Performance benefit can come from using ``foreach_tiled`` in that it
|
||||
essentially optimizes for the benefit of iterating over *compact* regions
|
||||
of the domian (while ``foreach`` iterates over the domain in a way that
|
||||
of the domain (while ``foreach`` iterates over the domain in a way that
|
||||
generally allows linear memory access.) There are two benefits from
|
||||
processing compact regions of the domain.
|
||||
|
||||
@@ -215,7 +215,7 @@ Use "uniform" Whenever Appropriate
|
||||
----------------------------------
|
||||
|
||||
For any variable that will always have the same value across all of the
|
||||
program instances in a gang, declare the variable with the ``unfiorm``
|
||||
program instances in a gang, declare the variable with the ``uniform``
|
||||
qualifier. Doing so enables the ``ispc`` compiler to emit better code in
|
||||
many different ways.
|
||||
|
||||
@@ -229,7 +229,7 @@ number of iterations:
|
||||
|
||||
If this is written with ``i`` as a ``varying`` variable, as above, there's
|
||||
additional overhead in the code generated for the loop as the compiler
|
||||
emits instructions to handle the possibilty of not all program instances
|
||||
emits instructions to handle the possibility of not all program instances
|
||||
following the same control flow path (as might be the case if the loop
|
||||
limit, 10, was itself a ``varying`` value.)
|
||||
|
||||
@@ -568,7 +568,7 @@ mask of all lanes currently executing (assuming a four-wide gang size
|
||||
target machine).
|
||||
|
||||
For a fuller example of the utility of this functionality, see
|
||||
``examples/aobench_instrumented`` in the ``ispc`` distribution. Ths
|
||||
``examples/aobench_instrumented`` in the ``ispc`` distribution. This
|
||||
example includes an implementation of the ``ISPCInstrument()`` function
|
||||
that collects aggregate data about the program's execution behavior.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.1.0
|
||||
PROJECT_NUMBER = 1.1.1
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -49,17 +49,16 @@ mandel(float c_re, float c_im, int count) {
|
||||
}
|
||||
|
||||
|
||||
/* Task to compute the Mandelbrot iterations for a span of scanlines from
|
||||
[ystart,yend).
|
||||
/* Task to compute the Mandelbrot iterations for a single scanline.
|
||||
*/
|
||||
task void
|
||||
mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int maxIterations,
|
||||
uniform int output[]) {
|
||||
uniform int ystart = ybase + taskIndex * span;
|
||||
uniform int yend = ystart + span;
|
||||
mandelbrot_scanline(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int span,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * span;
|
||||
uniform int yend = min((taskIndex+1) * span, (unsigned int)height);
|
||||
|
||||
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
||||
float x = x0 + xi * dx;
|
||||
@@ -71,20 +70,6 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
}
|
||||
|
||||
|
||||
task void
|
||||
mandelbrot_chunk(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * (height/taskCount);
|
||||
uniform int yend = (taskIndex+1) * (height/taskCount);
|
||||
uniform int span = 1;
|
||||
|
||||
launch[(yend-ystart)/span] < mandelbrot_scanlines(ystart, span, x0, dx, y0, dy,
|
||||
width, maxIterations, output) >;
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
@@ -92,7 +77,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
uniform int span = 4;
|
||||
|
||||
launch[32] < mandelbrot_chunk(x0, dx, y0, dy, width, height,
|
||||
maxIterations, output) >;
|
||||
launch[height/span] < mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
|
||||
maxIterations, output) >;
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/stencil.o objs/stencil_serial.o $(TASK_OBJ) objs/stencil_ispc.o \
|
||||
objs/stencil_ispc_sse2.o objs/stencil_ispc_sse4.o \
|
||||
|
||||
@@ -41,27 +41,23 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
uniform const float Ain[], uniform float Aout[]) {
|
||||
const uniform int Nxy = Nx * Ny;
|
||||
|
||||
for (uniform int z = z0; z < z1; ++z) {
|
||||
for (uniform int y = y0; y < y1; ++y) {
|
||||
foreach (x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
foreach (z = z0 ... z1, y = y0 ... y1, x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||
|
||||
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||
vsq[index] * div;
|
||||
}
|
||||
}
|
||||
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||
vsq[index] * div;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,11 +65,12 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
static task void
|
||||
stencil_step_task(uniform int x0, uniform int x1,
|
||||
uniform int y0, uniform int y1,
|
||||
uniform int z0, uniform int z1,
|
||||
uniform int z0,
|
||||
uniform int Nx, uniform int Ny, uniform int Nz,
|
||||
uniform const float coef[4], uniform const float vsq[],
|
||||
uniform const float Ain[], uniform float Aout[]) {
|
||||
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, Ain, Aout);
|
||||
stencil_step(x0, x1, y0, y1, z0+taskIndex, z0+taskIndex+1,
|
||||
Nx, Ny, Nz, coef, vsq, Ain, Aout);
|
||||
}
|
||||
|
||||
|
||||
@@ -89,17 +86,14 @@ loop_stencil_ispc_tasks(uniform int t0, uniform int t1,
|
||||
{
|
||||
for (uniform int t = t0; t < t1; ++t) {
|
||||
// Parallelize across cores as well: each task will work on a slice
|
||||
// of "dz" in the z extent of the volume. (dz=1 seems to work
|
||||
// better than any larger values.)
|
||||
uniform int dz = 1;
|
||||
for (uniform int z = z0; z < z1; z += dz) {
|
||||
if ((t & 1) == 0)
|
||||
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
|
||||
coef, vsq, Aeven, Aodd) >;
|
||||
else
|
||||
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
|
||||
coef, vsq, Aodd, Aeven) >;
|
||||
}
|
||||
// of 1 in the z extent of the volume.
|
||||
if ((t & 1) == 0)
|
||||
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
|
||||
coef, vsq, Aeven, Aodd) >;
|
||||
else
|
||||
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
|
||||
coef, vsq, Aodd, Aeven) >;
|
||||
|
||||
// We need to wait for all of the launched tasks to finish before
|
||||
// starting the next iteration.
|
||||
sync;
|
||||
|
||||
@@ -8,10 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o objs/volume_ispc_avx.o
|
||||
|
||||
default: volume
|
||||
|
||||
@@ -34,5 +34,5 @@ objs/%.o: ../%.cpp
|
||||
|
||||
objs/volume.o: objs/volume_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o: %.ispc
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
|
||||
@@ -124,24 +124,13 @@ static inline float D(int x, int y, int z, uniform int nVoxels[3],
|
||||
}
|
||||
|
||||
|
||||
static inline float Du(uniform int x, uniform int y, uniform int z,
|
||||
uniform int nVoxels[3], uniform float density[]) {
|
||||
x = clamp(x, 0, nVoxels[0]-1);
|
||||
y = clamp(y, 0, nVoxels[1]-1);
|
||||
z = clamp(z, 0, nVoxels[2]-1);
|
||||
|
||||
return density[z*nVoxels[0]*nVoxels[1] + y*nVoxels[0] + x];
|
||||
}
|
||||
|
||||
|
||||
static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
|
||||
return (p - pMin) / (pMax - pMin);
|
||||
}
|
||||
|
||||
|
||||
static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3],
|
||||
uniform bool &checkForSameVoxel) {
|
||||
uniform float density[], uniform int nVoxels[3]) {
|
||||
if (!Inside(Pobj, pMin, pMax))
|
||||
return 0;
|
||||
// Compute voxel coordinates and offsets for _Pobj_
|
||||
@@ -153,39 +142,14 @@ static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
float dx = vox.x - vx, dy = vox.y - vy, dz = vox.z - vz;
|
||||
|
||||
// Trilinearly interpolate density values to compute local density
|
||||
float d00, d10, d01, d11;
|
||||
uniform int uvx, uvy, uvz;
|
||||
if (checkForSameVoxel && reduce_equal(vx, &uvx) && reduce_equal(vy, &uvy) &&
|
||||
reduce_equal(vz, &uvz)) {
|
||||
// If all of the program instances are inside the same voxel, then
|
||||
// we'll call the 'uniform' variant of the voxel density lookup
|
||||
// function, thus doing a single load for each value rather than a
|
||||
// gather.
|
||||
d00 = Lerp(dx, Du(uvx, uvy, uvz, nVoxels, density),
|
||||
Du(uvx+1, uvy, uvz, nVoxels, density));
|
||||
d10 = Lerp(dx, Du(uvx, uvy+1, uvz, nVoxels, density),
|
||||
Du(uvx+1, uvy+1, uvz, nVoxels, density));
|
||||
d01 = Lerp(dx, Du(uvx, uvy, uvz+1, nVoxels, density),
|
||||
Du(uvx+1, uvy, uvz+1, nVoxels, density));
|
||||
d11 = Lerp(dx, Du(uvx, uvy+1, uvz+1, nVoxels, density),
|
||||
Du(uvx+1, uvy+1, uvz+1, nVoxels, density));
|
||||
}
|
||||
else {
|
||||
// Otherwise, we have to do an actual gather in the more general
|
||||
// D() function. Once the reduce_equal tests above fail, we stop
|
||||
// checking in subsequent steps, since it's unlikely that this will
|
||||
// be true in the future once they've diverged into different
|
||||
// voxels.
|
||||
checkForSameVoxel = false;
|
||||
d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
}
|
||||
float d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
float d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
float d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
float d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
float d0 = Lerp(dy, d00, d10);
|
||||
float d1 = Lerp(dy, d01, d11);
|
||||
return Lerp(dz, d0, d1);
|
||||
@@ -221,10 +185,8 @@ transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
uniform bool checkForSameVoxel = true;
|
||||
while (t < rayT1) {
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels,
|
||||
checkForSameVoxel);
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels);
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
@@ -268,9 +230,8 @@ raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
uniform bool checkForSameVoxel = true;
|
||||
cwhile (t < rayT1) {
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels, checkForSameVoxel);
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels);
|
||||
|
||||
// terminate once attenuation is high
|
||||
float atten = exp(-tau);
|
||||
|
||||
@@ -156,18 +156,18 @@
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="volume.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
||||
5
expr.h
5
expr.h
@@ -634,13 +634,14 @@ public:
|
||||
being done just given type information without the parameter
|
||||
argument expressions being available. It returns true on success.
|
||||
*/
|
||||
bool ResolveOverloads(const std::vector<const Type *> &argTypes,
|
||||
bool ResolveOverloads(SourcePos argPos,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL = NULL);
|
||||
Symbol *GetMatchingFunction();
|
||||
|
||||
private:
|
||||
bool tryResolve(int (*matchFunc)(const Type *, const Type *),
|
||||
const std::vector<const Type *> &argTypes,
|
||||
SourcePos argPos, const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL);
|
||||
|
||||
/** Name of the function that is being called. */
|
||||
|
||||
111
func.cpp
111
func.cpp
@@ -72,7 +72,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
code = c;
|
||||
|
||||
maskSymbol = m->symbolTable->LookupVariable("__mask");
|
||||
assert(maskSymbol != NULL);
|
||||
Assert(maskSymbol != NULL);
|
||||
|
||||
if (code != NULL) {
|
||||
if (g->debugPrint) {
|
||||
@@ -109,7 +109,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
}
|
||||
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < args.size(); ++i)
|
||||
if (dynamic_cast<const ReferenceType *>(args[i]->type) == NULL)
|
||||
@@ -117,13 +117,13 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
|
||||
if (type->isTask) {
|
||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||
assert(threadIndexSym);
|
||||
Assert(threadIndexSym);
|
||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||
assert(threadCountSym);
|
||||
Assert(threadCountSym);
|
||||
taskIndexSym = m->symbolTable->LookupVariable("taskIndex");
|
||||
assert(taskIndexSym);
|
||||
Assert(taskIndexSym);
|
||||
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
||||
assert(taskCountSym);
|
||||
Assert(taskCountSym);
|
||||
}
|
||||
else
|
||||
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
||||
@@ -133,7 +133,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
const Type *
|
||||
Function::GetReturnType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
return type->GetReturnType();
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ Function::GetReturnType() const {
|
||||
const FunctionType *
|
||||
Function::GetType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
return type;
|
||||
}
|
||||
|
||||
@@ -157,9 +157,9 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
// We expect the argument structure to come in as a poitner to a
|
||||
// structure. Confirm and figure out its type here.
|
||||
const llvm::Type *structArgType = structArgPtr->getType();
|
||||
assert(llvm::isa<llvm::PointerType>(structArgType));
|
||||
Assert(llvm::isa<llvm::PointerType>(structArgType));
|
||||
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(structArgType);
|
||||
assert(llvm::isa<llvm::StructType>(pt->getElementType()));
|
||||
Assert(llvm::isa<llvm::StructType>(pt->getElementType()));
|
||||
const llvm::StructType *argStructType =
|
||||
llvm::dyn_cast<const llvm::StructType>(pt->getElementType());
|
||||
|
||||
@@ -189,10 +189,9 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
void
|
||||
Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
SourcePos firstStmtPos) {
|
||||
llvm::Value *maskPtr = ctx->AllocaInst(LLVMTypes::MaskType, "mask_memory");
|
||||
ctx->StoreInst(LLVMMaskAllOn, maskPtr);
|
||||
maskSymbol->storagePtr = maskPtr;
|
||||
ctx->SetMaskPointer(maskPtr);
|
||||
// Connect the __mask builtin to the location in memory that stores its
|
||||
// value
|
||||
maskSymbol->storagePtr = ctx->GetFullMaskPointer();
|
||||
|
||||
// add debugging info for __mask, programIndex, ...
|
||||
maskSymbol->pos = firstStmtPos;
|
||||
@@ -202,7 +201,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock();
|
||||
#endif
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
if (type->isTask == true) {
|
||||
// For tasks, we there should always be three parmeters: the
|
||||
// pointer to the structure that holds all of the arguments, the
|
||||
@@ -267,38 +266,74 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
else {
|
||||
// Otherwise use the mask to set the entry mask value
|
||||
argIter->setName("__mask");
|
||||
assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
Assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
ctx->SetFunctionMask(argIter);
|
||||
assert(++argIter == function->arg_end());
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, we can generate code for the function
|
||||
if (code != NULL) {
|
||||
ctx->SetDebugPos(code->pos);
|
||||
ctx->AddInstrumentationPoint("function entry");
|
||||
|
||||
int costEstimate = code->EstimateCost();
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
sym->name.c_str(), costEstimate);
|
||||
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
// entire thing inside code that tests to see if the mask is all
|
||||
// on, all off, or mixed. If this is a simple function, then this
|
||||
// isn't worth the code bloat / overhead.
|
||||
bool checkMask = (type->isTask == true) ||
|
||||
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
||||
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
sym->name.c_str(), costEstimate);
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
// entire thing around a varying "cif (true)" test in order to reap
|
||||
// the side-effect benefit of checking to see if the execution mask
|
||||
// is all on and thence having a specialized code path for that
|
||||
// case. If this is a simple function, then this isn't worth the
|
||||
// code bloat / overhead.
|
||||
if (checkMask) {
|
||||
bool allTrue[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
allTrue[i] = true;
|
||||
Expr *trueExpr = new ConstExpr(AtomicType::VaryingBool, allTrue,
|
||||
code->pos);
|
||||
code = new IfStmt(trueExpr, code, NULL, true, code->pos);
|
||||
}
|
||||
if (checkMask && g->opt.disableCoherentControlFlow == false) {
|
||||
llvm::Value *mask = ctx->GetFunctionMask();
|
||||
llvm::Value *allOn = ctx->All(mask);
|
||||
llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on");
|
||||
llvm::BasicBlock *bbNotAll = ctx->CreateBasicBlock("not_all_on");
|
||||
|
||||
ctx->SetDebugPos(code->pos);
|
||||
ctx->AddInstrumentationPoint("function entry");
|
||||
code->EmitCode(ctx);
|
||||
ctx->BranchInst(bbAllOn, bbNotAll, allOn);
|
||||
|
||||
// all on: we've determined dynamically that the mask is all
|
||||
// on. Set the current mask to "all on" explicitly so that
|
||||
// codegen for this path can be improved with this knowledge in
|
||||
// hand...
|
||||
ctx->SetCurrentBasicBlock(bbAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
code->EmitCode(ctx);
|
||||
if (ctx->GetCurrentBasicBlock())
|
||||
ctx->ReturnInst();
|
||||
|
||||
// not all on: figure out if no instances are running, or if
|
||||
// some of them are
|
||||
ctx->SetCurrentBasicBlock(bbNotAll);
|
||||
ctx->SetFunctionMask(mask);
|
||||
llvm::BasicBlock *bbNoneOn = ctx->CreateBasicBlock("none_on");
|
||||
llvm::BasicBlock *bbSomeOn = ctx->CreateBasicBlock("some_on");
|
||||
llvm::Value *anyOn = ctx->Any(mask);
|
||||
ctx->BranchInst(bbSomeOn, bbNoneOn, anyOn);
|
||||
|
||||
// Everyone is off; get out of here.
|
||||
ctx->SetCurrentBasicBlock(bbNoneOn);
|
||||
ctx->ReturnInst();
|
||||
|
||||
// some on: reset the mask to the value it had at function
|
||||
// entry and emit the code. Resetting the mask here is
|
||||
// important, due to the "all on" setting of it for the path
|
||||
// above
|
||||
ctx->SetCurrentBasicBlock(bbSomeOn);
|
||||
ctx->SetFunctionMask(mask);
|
||||
code->EmitCode(ctx);
|
||||
if (ctx->GetCurrentBasicBlock())
|
||||
ctx->ReturnInst();
|
||||
|
||||
}
|
||||
else
|
||||
// No check, just emit the code
|
||||
code->EmitCode(ctx);
|
||||
}
|
||||
|
||||
if (ctx->GetCurrentBasicBlock()) {
|
||||
@@ -337,7 +372,7 @@ Function::GenerateIR() {
|
||||
return;
|
||||
|
||||
llvm::Function *function = sym->function;
|
||||
assert(function != NULL);
|
||||
Assert(function != NULL);
|
||||
|
||||
// But if that function has a definition, we don't want to redefine it.
|
||||
if (function->empty() == false) {
|
||||
@@ -376,7 +411,7 @@ Function::GenerateIR() {
|
||||
// it without a mask parameter and without name mangling so that
|
||||
// the application can call it
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
if (type->isExported) {
|
||||
if (!type->isTask) {
|
||||
LLVM_TYPE_CONST llvm::FunctionType *ftype =
|
||||
|
||||
65
ispc.cpp
65
ispc.cpp
@@ -161,7 +161,21 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
}
|
||||
#endif // LLVM 3.0
|
||||
#endif // LLVM 3.0+
|
||||
#if defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx2,+popcnt,+cmov";
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx2,+popcnt,+cmov";
|
||||
}
|
||||
#endif // LLVM 3.1
|
||||
else {
|
||||
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
||||
isa, SupportedTargetISAs());
|
||||
@@ -201,9 +215,12 @@ Target::SupportedTargetArchs() {
|
||||
const char *
|
||||
Target::SupportedTargetISAs() {
|
||||
return "sse2, sse2-x2, sse4, sse4-x2"
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#ifndef LLVM_2_9
|
||||
", avx, avx-x2"
|
||||
#endif
|
||||
#endif // !LLVM_2_9
|
||||
#ifdef LLVM_3_1svn
|
||||
", avx2, avx2-x2"
|
||||
#endif // LLVM_3_1svn
|
||||
;
|
||||
}
|
||||
|
||||
@@ -241,11 +258,19 @@ Target::GetTargetMachine() const {
|
||||
|
||||
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
|
||||
llvm::Reloc::Default;
|
||||
#if defined(LLVM_3_0svn) || defined(LLVM_3_1svn) || defined(LLVM_3_0)
|
||||
#if defined(LLVM_3_1svn)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetOptions options;
|
||||
if (g->opt.fastMath == true)
|
||||
options.UnsafeFPMath = 1;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||
relocModel);
|
||||
#elif defined(LLVM_3_0)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, relocModel);
|
||||
#else
|
||||
#else // LLVM 2.9
|
||||
#ifdef ISPC_IS_APPLE
|
||||
relocModel = llvm::Reloc::PIC_;
|
||||
#endif // ISPC_IS_APPLE
|
||||
@@ -255,8 +280,9 @@ Target::GetTargetMachine() const {
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
targetMachine->setRelocationModel(relocModel);
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
#endif
|
||||
assert(targetMachine != NULL);
|
||||
#endif // LLVM_2_9
|
||||
|
||||
Assert(targetMachine != NULL);
|
||||
|
||||
targetMachine->setAsmVerbosityDefault(true);
|
||||
return targetMachine;
|
||||
@@ -272,7 +298,8 @@ Target::GetISAString() const {
|
||||
return "sse4";
|
||||
case Target::AVX:
|
||||
return "avx";
|
||||
break;
|
||||
case Target::AVX2:
|
||||
return "avx2";
|
||||
default:
|
||||
FATAL("Unhandled target in GetISAString()");
|
||||
}
|
||||
@@ -283,10 +310,10 @@ Target::GetISAString() const {
|
||||
llvm::Value *
|
||||
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
assert(td != NULL);
|
||||
Assert(td != NULL);
|
||||
uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32(byteSize);
|
||||
return LLVMInt32((int32_t)byteSize);
|
||||
else
|
||||
return LLVMInt64(byteSize);
|
||||
}
|
||||
@@ -295,16 +322,16 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
llvm::Value *
|
||||
Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element) {
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
assert(td != NULL);
|
||||
Assert(td != NULL);
|
||||
LLVM_TYPE_CONST llvm::StructType *structType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
|
||||
assert(structType != NULL);
|
||||
Assert(structType != NULL);
|
||||
const llvm::StructLayout *sl = td->getStructLayout(structType);
|
||||
assert(sl != NULL);
|
||||
Assert(sl != NULL);
|
||||
|
||||
uint64_t offset = sl->getElementOffset(element);
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32(offset);
|
||||
return LLVMInt32((int32_t)offset);
|
||||
else
|
||||
return LLVMInt64(offset);
|
||||
}
|
||||
@@ -320,6 +347,7 @@ Opt::Opt() {
|
||||
force32BitAddressing = true;
|
||||
unrollLoops = true;
|
||||
disableAsserts = false;
|
||||
disableMaskAllOnOptimizations = false;
|
||||
disableHandlePseudoMemoryOps = false;
|
||||
disableBlendedMaskedStores = false;
|
||||
disableCoherentControlFlow = false;
|
||||
@@ -328,7 +356,6 @@ Opt::Opt() {
|
||||
disableMaskedStoreToStore = false;
|
||||
disableGatherScatterFlattening = false;
|
||||
disableUniformMemoryOptimizations = false;
|
||||
disableMaskedStoreOptimizations = false;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -362,7 +389,13 @@ Globals::Globals() {
|
||||
// SourcePos
|
||||
|
||||
SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
|
||||
name = n ? n : m->module->getModuleIdentifier().c_str();
|
||||
name = n;
|
||||
if (name == NULL) {
|
||||
if (m != NULL)
|
||||
name = m->module->getModuleIdentifier().c_str();
|
||||
else
|
||||
name = "(unknown)";
|
||||
}
|
||||
first_line = fl;
|
||||
first_column = fc;
|
||||
last_line = ll != 0 ? ll : fl;
|
||||
|
||||
32
ispc.h
32
ispc.h
@@ -50,11 +50,22 @@
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#define Assert(expr) \
|
||||
((void)((expr) ? 0 : __Assert (#expr, __FILE__, __LINE__)))
|
||||
#define __Assert(expr, file, line) \
|
||||
((void)fprintf(stderr, "%s:%u: Assertion failed: \"%s\"\n" \
|
||||
"***\n*** Please file a bug report at " \
|
||||
"https://github.com/ispc/ispc/issues\n*** (Including as much " \
|
||||
"information as you can about how to reproduce this error).\n" \
|
||||
"*** You have apparently encountered a bug in the compiler that " \
|
||||
"we'd like to fix!\n***\n", file, line, expr), abort(), 0)
|
||||
|
||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||
targets.
|
||||
*/
|
||||
@@ -182,7 +193,7 @@ struct Target {
|
||||
flexible/performant of them will apear last in the enumerant. Note
|
||||
also that __best_available_isa() needs to be updated if ISAs are
|
||||
added or the enumerant values are reordered. */
|
||||
enum ISA { SSE2, SSE4, AVX, NUM_ISAS };
|
||||
enum ISA { SSE2, SSE4, AVX, AVX2, NUM_ISAS };
|
||||
|
||||
/** Instruction set being compiled to. */
|
||||
ISA isa;
|
||||
@@ -247,10 +258,15 @@ struct Opt {
|
||||
*/
|
||||
bool force32BitAddressing;
|
||||
|
||||
/** Indicates whether assert() statements should be ignored (for
|
||||
/** Indicates whether Assert() statements should be ignored (for
|
||||
performance in the generated code). */
|
||||
bool disableAsserts;
|
||||
|
||||
|
||||
/** If enabled, disables the various optimizations that kick in when
|
||||
the execution mask can be determined to be "all on" at compile
|
||||
time. */
|
||||
bool disableMaskAllOnOptimizations;
|
||||
|
||||
/** If enabled, the various __pseudo* memory ops (gather/scatter,
|
||||
masked load/store) are left in their __pseudo* form, for better
|
||||
understanding of the structure of generated code when reading
|
||||
@@ -302,14 +318,6 @@ struct Opt {
|
||||
than gathers/scatters. This is likely only useful for measuring
|
||||
the impact of this optimization. */
|
||||
bool disableUniformMemoryOptimizations;
|
||||
|
||||
/** Disables optimizations for masked stores: masked stores with the
|
||||
mask all on are transformed to regular stores, and masked stores
|
||||
with the mask are all off are removed (which in turn can allow
|
||||
eliminating additional dead code related to computing the value
|
||||
stored). This is likely only useful for measuring the impact of
|
||||
this optimization. */
|
||||
bool disableMaskedStoreOptimizations;
|
||||
};
|
||||
|
||||
/** @brief This structure collects together a number of global variables.
|
||||
|
||||
5
ispc.sln
5
ispc.sln
@@ -3,8 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc", "ispc.vcxproj", "{9861F490-F516-480C-B63C-D62A77AFA9D5}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc_test", "ispc_test.vcxproj", "{92547BA8-BE86-4E78-8799-1D72A70E5831}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
@@ -15,9 +13,6 @@ Global
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.Build.0 = Release|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Release|Win32.ActiveCfg = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
379
ispc_test.cpp
379
ispc_test.cpp
@@ -1,379 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define ISPC_IS_WINDOWS
|
||||
#elif defined(__linux__)
|
||||
#define ISPC_IS_LINUX
|
||||
#elif defined(__APPLE__)
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#ifdef ISPC_IS_LINUX
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#include <xmmintrin.h>
|
||||
extern "C" {
|
||||
extern __m128 __svml_sinf4(__m128);
|
||||
extern __m128 __svml_cosf4(__m128);
|
||||
extern __m128 __svml_sincosf4(__m128 *,__m128);
|
||||
extern __m128 __svml_tanf4(__m128);
|
||||
extern __m128 __svml_atanf4(__m128);
|
||||
extern __m128 __svml_atan2f4(__m128, __m128);
|
||||
extern __m128 __svml_expf4(__m128);
|
||||
extern __m128 __svml_logf4(__m128);
|
||||
extern __m128 __svml_powf4(__m128, __m128);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#endif
|
||||
#include <llvm/ExecutionEngine/JIT.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#include <llvm/Transforms/Scalar.h>
|
||||
#include <llvm/Transforms/IPO.h>
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/Support/CFG.h>
|
||||
#include <llvm/Analysis/Verifier.h>
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/system_error.h>
|
||||
|
||||
bool shouldFail = false;
|
||||
|
||||
extern "C" {
|
||||
void ISPCLaunch(void **, void *, void *, int32_t);
|
||||
void ISPCSync(void *);
|
||||
void *ISPCAlloc(void **, int64_t size, int32_t alignment);
|
||||
}
|
||||
|
||||
void ISPCLaunch(void **handle, void *func, void *data, int32_t count) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
typedef void (*TaskFuncType)(void *, int, int, int, int);
|
||||
TaskFuncType tft = (TaskFuncType)(func);
|
||||
for (int i = 0; i < count; ++i)
|
||||
tft(data, 0, 1, i, count);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync(void *) {
|
||||
}
|
||||
|
||||
|
||||
void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
// leak time!
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
return _aligned_malloc((size_t)size, alignment);
|
||||
#endif
|
||||
#ifdef ISPC_IS_LINUX
|
||||
return memalign(alignment, size);
|
||||
#endif
|
||||
#ifdef ISPC_IS_APPLE
|
||||
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||
(alignment - 1)));
|
||||
((void**)amem)[-1] = mem;
|
||||
return amem;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void usage(int ret) {
|
||||
fprintf(stderr, "usage: ispc_test\n");
|
||||
fprintf(stderr, "\t[-h/--help]\tprint help\n");
|
||||
fprintf(stderr, "\t[-f]\t\tindicates that test is expected to fail\n");
|
||||
fprintf(stderr, "\t<files>\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
static void svml_missing() {
|
||||
fprintf(stderr, "Program called unavailable SVML function!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// On Windows, sin() is an overloaded function, so we need an unambiguous
|
||||
// function we can take the address of when wiring up the external references
|
||||
// below.
|
||||
|
||||
double Sin(double x) { return sin(x); }
|
||||
double Cos(double x) { return cos(x); }
|
||||
double Tan(double x) { return tan(x); }
|
||||
double Atan(double x) { return atan(x); }
|
||||
double Atan2(double y, double x) { return atan2(y, x); }
|
||||
double Pow(double a, double b) { return pow(a, b); }
|
||||
double Exp(double x) { return exp(x); }
|
||||
double Log(double x) { return log(x); }
|
||||
|
||||
static bool lRunTest(const char *fn) {
|
||||
llvm::LLVMContext *ctx = new llvm::LLVMContext;
|
||||
|
||||
llvm::OwningPtr<llvm::MemoryBuffer> buf;
|
||||
llvm::error_code err = llvm::MemoryBuffer::getFileOrSTDIN(fn, buf);
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to open file \"%s\": %s\n", fn, err.message().c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
std::string bcErr;
|
||||
llvm::Module *module = llvm::ParseBitcodeFile(buf.get(), *ctx, &bcErr);
|
||||
|
||||
if (!module) {
|
||||
fprintf(stderr, "Bitcode reader failed for \"%s\": %s\n", fn, bcErr.c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string eeError;
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::EngineBuilder engineBuilder(module);
|
||||
engineBuilder.setErrorStr(&eeError);
|
||||
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
|
||||
#if 0
|
||||
std::vector<std::string> attributes;
|
||||
if (target != NULL && !strcmp(target, "avx"))
|
||||
attributes.push_back("+avx");
|
||||
engineBuilder.setMAttrs(attributes);
|
||||
engineBuilder.setUseMCJIT(true);
|
||||
#endif
|
||||
llvm::ExecutionEngine *ee = engineBuilder.create();
|
||||
#else
|
||||
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::createJIT(module, &eeError);
|
||||
#endif
|
||||
if (!ee) {
|
||||
fprintf(stderr, "Unable to create ExecutionEngine: %s\n", eeError.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm::Function *func;
|
||||
#define DO_FUNC(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||
DO_FUNC(ISPCSync, "ISPCSync");
|
||||
DO_FUNC(ISPCAlloc, "ISPCAlloc");
|
||||
DO_FUNC(putchar, "putchar");
|
||||
DO_FUNC(printf, "printf");
|
||||
DO_FUNC(fflush, "fflush");
|
||||
DO_FUNC(sinf, "sinf");
|
||||
DO_FUNC(cosf, "cosf");
|
||||
DO_FUNC(tanf, "tanf");
|
||||
DO_FUNC(atanf, "atanf");
|
||||
DO_FUNC(atan2f, "atan2f");
|
||||
DO_FUNC(powf, "powf");
|
||||
DO_FUNC(expf, "expf");
|
||||
DO_FUNC(logf, "logf");
|
||||
DO_FUNC(Sin, "sin");
|
||||
DO_FUNC(Cos, "cos");
|
||||
DO_FUNC(Tan, "tan");
|
||||
DO_FUNC(Atan, "atan");
|
||||
DO_FUNC(Atan2, "atan2");
|
||||
DO_FUNC(Pow, "pow");
|
||||
DO_FUNC(Exp, "exp");
|
||||
DO_FUNC(Log, "log");
|
||||
DO_FUNC(memset, "memset");
|
||||
#ifdef ISPC_IS_APPLE
|
||||
DO_FUNC(memset_pattern4, "memset_pattern4");
|
||||
DO_FUNC(memset_pattern8, "memset_pattern8");
|
||||
DO_FUNC(memset_pattern16, "memset_pattern16");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#define DO_SVML(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
#else
|
||||
#define DO_SVML(FUNC, FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)svml_missing)
|
||||
#endif
|
||||
|
||||
DO_SVML(__svml_sinf4, "__svml_sinf4");
|
||||
DO_SVML(__svml_cosf4, "__svml_cosf4");
|
||||
DO_SVML(__svml_sincosf4, "__svml_sincosf4");
|
||||
DO_SVML(__svml_tanf4, "__svml_tanf4");
|
||||
DO_SVML(__svml_atanf4, "__svml_atanf4");
|
||||
DO_SVML(__svml_atan2f4, "__svml_atan2f4");
|
||||
DO_SVML(__svml_expf4, "__svml_expf4");
|
||||
DO_SVML(__svml_logf4, "__svml_logf4");
|
||||
DO_SVML(__svml_powf4, "__svml_powf4");
|
||||
|
||||
// figure out the vector width in the compiled code
|
||||
func = module->getFunction("width");
|
||||
if (!func) {
|
||||
fprintf(stderr, "No width() function found!\n");
|
||||
return false;
|
||||
}
|
||||
int width;
|
||||
{
|
||||
typedef int (*PFN)();
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
width = pfn();
|
||||
assert(width == 4 || width == 8 || width == 12 || width == 16);
|
||||
}
|
||||
|
||||
// find the value that returns the desired result
|
||||
func = module->getFunction("result");
|
||||
bool foundResult = (func != NULL);
|
||||
float result[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
result[i] = 0;
|
||||
if (foundResult) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(result);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "Warning: no result() function found.\n");
|
||||
|
||||
// try to find a function to run
|
||||
float returned[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
returned[i] = 0;
|
||||
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
|
||||
int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
||||
|
||||
if ((func = module->getFunction("f_v")) != NULL) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned);
|
||||
}
|
||||
else if ((func = module->getFunction("f_f")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fu")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, float fu);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fi")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vfloat, vint);
|
||||
}
|
||||
else if ((func = module->getFunction("f_du")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, double);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_duf")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, float);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.f);
|
||||
}
|
||||
else if ((func = module->getFunction("f_di")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, vint2);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unable to find runnable function in file \"%s\"\n", fn);
|
||||
return false;
|
||||
}
|
||||
|
||||
// see if we got the right result
|
||||
bool resultsMatch = true;
|
||||
if (foundResult) {
|
||||
for (int i = 0; i < width; ++i)
|
||||
if (returned[i] != result[i]) {
|
||||
resultsMatch = false;
|
||||
fprintf(stderr, "Test \"%s\" RETURNED %d: %g / %a EXPECTED %g / %a\n",
|
||||
fn, i, returned[i], returned[i], result[i], result[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < width; ++i)
|
||||
fprintf(stderr, "Test \"%s\" returned %d: %g / %a\n",
|
||||
fn, i, returned[i], returned[i]);
|
||||
}
|
||||
if (foundResult && shouldFail && resultsMatch)
|
||||
fprintf(stderr, "Test %s unexpectedly passed\n", fn);
|
||||
|
||||
delete ee;
|
||||
delete ctx;
|
||||
|
||||
return foundResult && resultsMatch;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
llvm::InitializeNativeTarget();
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
LLVMLinkInJIT();
|
||||
#endif
|
||||
|
||||
const char *filename = NULL;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
|
||||
usage(0);
|
||||
if (!strcmp(argv[i], "-f"))
|
||||
shouldFail = true;
|
||||
else
|
||||
filename = argv[i];
|
||||
}
|
||||
|
||||
return (lRunTest(filename) == true) ? 0 : 1;
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ispc_test.cpp" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{92547BA8-BE86-4E78-8799-1D72A70E5831}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ispc_test</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>LLVM_3_0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4355;4800</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>LLVM_3_0;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4355;4800</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
83
lex.ll
83
lex.ll
@@ -148,65 +148,48 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
return TOKEN_IDENTIFIER;
|
||||
}
|
||||
|
||||
{INT_NUMBER} {
|
||||
char *endPtr = NULL;
|
||||
int64_t val;
|
||||
{INT_NUMBER}+(u|U|l|L)*? {
|
||||
int ls = 0, us = 0;
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
yylval->intVal = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
char *endPtr = NULL;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = _strtoi64(yytext, &endPtr, 0);
|
||||
yylval->intVal = _strtoi64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
yylval->intVal = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
for (; *endPtr; endPtr++) {
|
||||
if (*endPtr == 'l' || *endPtr == 'L')
|
||||
ls++;
|
||||
else if (*endPtr == 'u' || *endPtr == 'U')
|
||||
us++;
|
||||
}
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
}
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((val & 0xffffffff) == val) {
|
||||
yylval->int32Val = (int32_t)val;
|
||||
return TOKEN_INT32_CONSTANT;
|
||||
}
|
||||
else {
|
||||
yylval->int64Val = val;
|
||||
return TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
if ((yylval->intVal & 0xffffffff) == yylval->intVal)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
|
||||
{INT_NUMBER}[uU] {
|
||||
char *endPtr = NULL;
|
||||
uint64_t val;
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((val & 0xffffffff) == val) {
|
||||
// we can represent it in a 32-bit value
|
||||
yylval->int32Val = (int32_t)val;
|
||||
return TOKEN_UINT32_CONSTANT;
|
||||
}
|
||||
else {
|
||||
yylval->int64Val = val;
|
||||
return TOKEN_UINT64_CONSTANT;
|
||||
}
|
||||
}
|
||||
|
||||
{FLOAT_NUMBER} {
|
||||
yylval->floatVal = atof(yytext);
|
||||
yylval->floatVal = (float)atof(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
{HEX_FLOAT_NUMBER} {
|
||||
yylval->floatVal = lParseHexFloat(yytext);
|
||||
yylval->floatVal = (float)lParseHexFloat(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
@@ -291,7 +274,7 @@ lParseBinary(const char *ptr, SourcePos pos) {
|
||||
|
||||
while (*ptr != '\0') {
|
||||
/* if this hits, the regexp for 0b... constants is broken */
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
Assert(*ptr == '0' || *ptr == '1');
|
||||
|
||||
if ((val & (((int64_t)1)<<63)) && warned == false) {
|
||||
// We're about to shift out a set bit
|
||||
@@ -346,7 +329,7 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
char *ptr, *src;
|
||||
|
||||
// Advance past the opening stuff on the line.
|
||||
assert(yytext[0] == '#');
|
||||
Assert(yytext[0] == '#');
|
||||
if (yytext[1] == ' ')
|
||||
// On Linux/OSX, the preprocessor gives us lines like
|
||||
// # 1234 "foo.c"
|
||||
@@ -354,7 +337,7 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
else {
|
||||
// On windows, cl.exe's preprocessor gives us lines of the form:
|
||||
// #line 1234 "foo.c"
|
||||
assert(!strncmp(yytext+1, "line ", 5));
|
||||
Assert(!strncmp(yytext+1, "line ", 5));
|
||||
ptr = yytext + 6;
|
||||
}
|
||||
|
||||
@@ -364,13 +347,13 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
pos->last_column = 1;
|
||||
// Make sure that the character after the integer is a space and that
|
||||
// then we have open quotes
|
||||
assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
Assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
src += 2;
|
||||
|
||||
// And the filename is everything up until the closing quotes
|
||||
std::string filename;
|
||||
while (*src != '"') {
|
||||
assert(*src && *src != '\n');
|
||||
Assert(*src && *src != '\n');
|
||||
filename.push_back(*src);
|
||||
++src;
|
||||
}
|
||||
@@ -471,13 +454,13 @@ ipow2(int exponent) {
|
||||
*/
|
||||
static double
|
||||
lParseHexFloat(const char *ptr) {
|
||||
assert(ptr != NULL);
|
||||
Assert(ptr != NULL);
|
||||
|
||||
assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
Assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
ptr += 2;
|
||||
|
||||
// Start initializing the mantissa
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
Assert(*ptr == '0' || *ptr == '1');
|
||||
double mantissa = (*ptr == '1') ? 1. : 0.;
|
||||
++ptr;
|
||||
|
||||
@@ -497,7 +480,7 @@ lParseHexFloat(const char *ptr) {
|
||||
else if (*ptr >= 'a' && *ptr <= 'f')
|
||||
digit = 10 + *ptr - 'a';
|
||||
else {
|
||||
assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
Assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
digit = 10 + *ptr - 'A';
|
||||
}
|
||||
|
||||
@@ -510,7 +493,7 @@ lParseHexFloat(const char *ptr) {
|
||||
else
|
||||
// If there's not a '.', then we better be going straight to the
|
||||
// exponent
|
||||
assert(*ptr == 'p');
|
||||
Assert(*ptr == 'p');
|
||||
|
||||
++ptr; // skip the 'p'
|
||||
|
||||
|
||||
@@ -424,7 +424,7 @@ LLVMBoolVector(bool b) {
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
v = b ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
@@ -445,7 +445,7 @@ LLVMBoolVector(const bool *bvec) {
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
v = bvec[i] ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
|
||||
24
main.cpp
24
main.cpp
@@ -37,6 +37,7 @@
|
||||
|
||||
#include "ispc.h"
|
||||
#include "module.h"
|
||||
#include "util.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <llvm/Support/PrettyStackTrace.h>
|
||||
@@ -91,15 +92,15 @@ static void usage(int ret) {
|
||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
#if 0
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-all-on-optimizations\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||
#endif
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||
@@ -166,10 +167,12 @@ int main(int Argc, char *Argv[]) {
|
||||
char *argv[128];
|
||||
lGetAllArgs(Argc, Argv, argc, argv);
|
||||
|
||||
#if 0
|
||||
// Use LLVM's little utility function to print out nice stack traces if
|
||||
// we crash
|
||||
llvm::sys::PrintStackTraceOnErrorSignal();
|
||||
llvm::PrettyStackTraceProgram X(argc, argv);
|
||||
#endif
|
||||
|
||||
// initialize available LLVM targets
|
||||
LLVMInitializeX86TargetInfo();
|
||||
@@ -203,7 +206,7 @@ int main(int Argc, char *Argv[]) {
|
||||
if (atoi(argv[i] + 13) == 64)
|
||||
g->opt.force32BitAddressing = false;
|
||||
else if (atoi(argv[i] + 13) == 32)
|
||||
g->opt.force32BitAddressing = 32;
|
||||
g->opt.force32BitAddressing = true;
|
||||
else {
|
||||
fprintf(stderr, "Addressing width \"%s\" invalid--only 32 and "
|
||||
"64 are allowed.\n", argv[i]+13);
|
||||
@@ -270,6 +273,8 @@ int main(int Argc, char *Argv[]) {
|
||||
|
||||
// These are only used for performance tests of specific
|
||||
// optimizations
|
||||
else if (!strcmp(opt, "disable-all-on-optimizations"))
|
||||
g->opt.disableMaskAllOnOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-handle-pseudo-memory-ops"))
|
||||
g->opt.disableHandlePseudoMemoryOps = true;
|
||||
else if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||
@@ -286,8 +291,6 @@ int main(int Argc, char *Argv[]) {
|
||||
g->opt.disableGatherScatterFlattening = true;
|
||||
else if (!strcmp(opt, "disable-uniform-memory-optimizations"))
|
||||
g->opt.disableUniformMemoryOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-masked-store-optimizations"))
|
||||
g->opt.disableMaskedStoreOptimizations = true;
|
||||
else
|
||||
usage(1);
|
||||
}
|
||||
@@ -354,6 +357,11 @@ int main(int Argc, char *Argv[]) {
|
||||
if (debugSet && !optSet)
|
||||
g->opt.level = 0;
|
||||
|
||||
if (outFileName == NULL && headerFileName == NULL)
|
||||
Warning(SourcePos(), "No output file or header file name specified. "
|
||||
"Program will be compiled and warnings/errors will "
|
||||
"be issued, but no output will be generated.");
|
||||
|
||||
return Module::CompileAndOutput(file, arch, cpu, target, generatePIC,
|
||||
ot, outFileName, headerFileName);
|
||||
}
|
||||
|
||||
55
module.cpp
55
module.cpp
@@ -49,7 +49,6 @@
|
||||
#include "llvmutil.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <sys/types.h>
|
||||
@@ -150,8 +149,10 @@ extern void yy_delete_buffer(YY_BUFFER_STATE);
|
||||
|
||||
int
|
||||
Module::CompileFile() {
|
||||
#ifndef LLVM_3_1svn
|
||||
if (g->opt.fastMath == true)
|
||||
llvm::UnsafeFPMath = true;
|
||||
#endif // !LLVM_3_1svn
|
||||
|
||||
// FIXME: it'd be nice to do this in the Module constructor, but this
|
||||
// function ends up calling into routines that expect the global
|
||||
@@ -222,7 +223,7 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
|
||||
if (sym == NULL || sym->type == NULL) {
|
||||
// But if these are NULL and there haven't been any previous
|
||||
// errors, something surprising is going on
|
||||
assert(errorCount > 0);
|
||||
Assert(errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -389,7 +390,7 @@ void
|
||||
Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
|
||||
const FunctionType *functionType =
|
||||
dynamic_cast<const FunctionType *>(funSym->type);
|
||||
assert(functionType != NULL);
|
||||
Assert(functionType != NULL);
|
||||
|
||||
// If a global variable with the same name has already been declared
|
||||
// issue an error.
|
||||
@@ -416,7 +417,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
|
||||
// allowed.
|
||||
const FunctionType *ofType =
|
||||
dynamic_cast<const FunctionType *>(overloadFunc->type);
|
||||
assert(ofType != NULL);
|
||||
Assert(ofType != NULL);
|
||||
if (ofType->GetNumParameters() == functionType->GetNumParameters()) {
|
||||
int i;
|
||||
for (i = 0; i < functionType->GetNumParameters(); ++i) {
|
||||
@@ -571,7 +572,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
|
||||
// Finally, we know all is good and we can add the function to the
|
||||
// symbol table
|
||||
bool ok = symbolTable->AddFunction(funSym);
|
||||
assert(ok);
|
||||
Assert(ok);
|
||||
}
|
||||
|
||||
|
||||
@@ -729,7 +730,7 @@ static void
|
||||
lVisitNode(const StructType *structType,
|
||||
std::map<const StructType *, StructDAGNode *> &structToNode,
|
||||
std::vector<const StructType *> &sortedTypes) {
|
||||
assert(structToNode.find(structType) != structToNode.end());
|
||||
Assert(structToNode.find(structType) != structToNode.end());
|
||||
// Get the node that encodes the structs that this one is immediately
|
||||
// dependent on.
|
||||
StructDAGNode *node = structToNode[structType];
|
||||
@@ -793,7 +794,7 @@ lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
|
||||
if (hasIncomingEdges.find(structType) == hasIncomingEdges.end())
|
||||
lVisitNode(structType, structToNode, sortedTypes);
|
||||
}
|
||||
assert(sortedTypes.size() == structTypes.size());
|
||||
Assert(sortedTypes.size() == structTypes.size());
|
||||
|
||||
// And finally we can emit the struct declarations by going through the
|
||||
// sorted ones in order.
|
||||
@@ -828,10 +829,10 @@ lEmitEnumDecls(const std::vector<const EnumType *> &enumTypes, FILE *file) {
|
||||
// Print the individual enumerators
|
||||
for (int j = 0; j < enumTypes[i]->GetEnumeratorCount(); ++j) {
|
||||
const Symbol *e = enumTypes[i]->GetEnumerator(j);
|
||||
assert(e->constValue != NULL);
|
||||
Assert(e->constValue != NULL);
|
||||
unsigned int enumValue;
|
||||
int count = e->constValue->AsUInt32(&enumValue);
|
||||
assert(count == 1);
|
||||
Assert(count == 1);
|
||||
|
||||
// Always print an initializer to set the value. We could be
|
||||
// 'clever' here and detect whether the implicit value given by
|
||||
@@ -897,7 +898,7 @@ lAddTypeIfNew(const Type *type, std::vector<const T *> *exportedTypes) {
|
||||
return;
|
||||
|
||||
const T *castType = dynamic_cast<const T *>(type);
|
||||
assert(castType != NULL);
|
||||
Assert(castType != NULL);
|
||||
exportedTypes->push_back(castType);
|
||||
}
|
||||
|
||||
@@ -934,7 +935,7 @@ lGetExportedTypes(const Type *type,
|
||||
else if (dynamic_cast<const VectorType *>(type) != NULL)
|
||||
lAddTypeIfNew(type, exportedVectorTypes);
|
||||
else
|
||||
assert(dynamic_cast<const AtomicType *>(type) != NULL);
|
||||
Assert(dynamic_cast<const AtomicType *>(type) != NULL);
|
||||
}
|
||||
|
||||
|
||||
@@ -965,7 +966,7 @@ lPrintFunctionDeclarations(FILE *file, const std::vector<Symbol *> &funcs) {
|
||||
fprintf(file, "#ifdef __cplusplus\nextern \"C\" {\n#endif // __cplusplus\n");
|
||||
for (unsigned int i = 0; i < funcs.size(); ++i) {
|
||||
const FunctionType *ftype = dynamic_cast<const FunctionType *>(funcs[i]->type);
|
||||
assert(ftype);
|
||||
Assert(ftype);
|
||||
std::string decl = ftype->GetCDeclaration(funcs[i]->name);
|
||||
fprintf(file, " extern %s;\n", decl.c_str());
|
||||
}
|
||||
@@ -990,7 +991,7 @@ lPrintExternGlobals(FILE *file, const std::vector<Symbol *> &externGlobals) {
|
||||
static bool
|
||||
lIsExported(const Symbol *sym) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(ft);
|
||||
Assert(ft);
|
||||
return ft->isExported;
|
||||
}
|
||||
|
||||
@@ -998,7 +999,7 @@ lIsExported(const Symbol *sym) {
|
||||
static bool
|
||||
lIsExternC(const Symbol *sym) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(ft);
|
||||
Assert(ft);
|
||||
return ft->isExternC;
|
||||
}
|
||||
|
||||
@@ -1167,10 +1168,18 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
||||
case Target::AVX:
|
||||
opts.addMacroDef("ISPC_TARGET_AVX");
|
||||
break;
|
||||
case Target::AVX2:
|
||||
opts.addMacroDef("ISPC_TARGET_AVX2");
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled target ISA in preprocessor symbol definition");
|
||||
}
|
||||
|
||||
if (g->target.is32Bit)
|
||||
opts.addMacroDef("ISPC_POINTER_SIZE=32");
|
||||
else
|
||||
opts.addMacroDef("ISPC_POINTER_SIZE=64");
|
||||
|
||||
opts.addMacroDef("ISPC_MAJOR_VERSION=1");
|
||||
opts.addMacroDef("ISPC_MINOR_VERSION=1");
|
||||
|
||||
@@ -1317,7 +1326,7 @@ lExtractAndRewriteGlobals(llvm::Module *module,
|
||||
|
||||
Symbol *sym =
|
||||
m->symbolTable->LookupVariable(gv->getName().str().c_str());
|
||||
assert(sym != NULL);
|
||||
Assert(sym != NULL);
|
||||
globals->push_back(RewriteGlobalInfo(gv, init, sym->pos));
|
||||
}
|
||||
}
|
||||
@@ -1366,9 +1375,9 @@ lAddExtractedGlobals(llvm::Module *module,
|
||||
if (globals[j].size() > 0) {
|
||||
// There should be the same number of globals in the other
|
||||
// vectors, in the same order.
|
||||
assert(globals[firstActive].size() == globals[j].size());
|
||||
Assert(globals[firstActive].size() == globals[j].size());
|
||||
llvm::GlobalVariable *gv2 = globals[j][i].gv;
|
||||
assert(gv2->getName() == gv->getName());
|
||||
Assert(gv2->getName() == gv->getName());
|
||||
|
||||
// It is possible that the types may not match, though--for
|
||||
// example, this happens with varying globals if we compile
|
||||
@@ -1422,7 +1431,7 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
|
||||
|
||||
// Grab the type of the function as well.
|
||||
if (ftype != NULL)
|
||||
assert(ftype == funcs.func[i]->getFunctionType());
|
||||
Assert(ftype == funcs.func[i]->getFunctionType());
|
||||
else
|
||||
ftype = funcs.func[i]->getFunctionType();
|
||||
|
||||
@@ -1510,7 +1519,7 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
|
||||
// or some such, but we don't want to start imposing too much of a
|
||||
// runtime library requirement either...
|
||||
llvm::Function *abortFunc = module->getFunction("abort");
|
||||
assert(abortFunc);
|
||||
Assert(abortFunc);
|
||||
llvm::CallInst::Create(abortFunc, "", bblock);
|
||||
|
||||
// Return an undef value from the function here; we won't get to this
|
||||
@@ -1542,10 +1551,10 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
|
||||
|
||||
// Get pointers to things we need below
|
||||
llvm::Function *setFunc = module->getFunction("__set_system_isa");
|
||||
assert(setFunc != NULL);
|
||||
Assert(setFunc != NULL);
|
||||
llvm::Value *systemBestISAPtr =
|
||||
module->getGlobalVariable("__system_best_isa", true);
|
||||
assert(systemBestISAPtr != NULL);
|
||||
Assert(systemBestISAPtr != NULL);
|
||||
|
||||
// For each exported function, create the dispatch function
|
||||
std::map<std::string, FunctionTargetVariants>::iterator iter;
|
||||
@@ -1591,7 +1600,7 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
|
||||
else {
|
||||
// The user supplied multiple targets
|
||||
std::vector<std::string> targets = lExtractTargets(target);
|
||||
assert(targets.size() > 1);
|
||||
Assert(targets.size() > 1);
|
||||
|
||||
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
||||
Error(SourcePos(), "Multi-target compilation can't generate output "
|
||||
@@ -1668,7 +1677,7 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
|
||||
int i = 1;
|
||||
while (i < Target::NUM_ISAS && firstTargetMachine == NULL)
|
||||
firstTargetMachine = targetMachines[i++];
|
||||
assert(firstTargetMachine != NULL);
|
||||
Assert(firstTargetMachine != NULL);
|
||||
|
||||
if (outFileName != NULL) {
|
||||
if (outputType == Bitcode)
|
||||
|
||||
87
opt.cpp
87
opt.cpp
@@ -152,19 +152,19 @@ lGetSourcePosFromMetadata(const llvm::Instruction *inst, SourcePos *pos) {
|
||||
// All of these asserts are things that FunctionEmitContext::addGSMetadata() is
|
||||
// expected to have done in its operation
|
||||
llvm::MDString *str = llvm::dyn_cast<llvm::MDString>(filename->getOperand(0));
|
||||
assert(str);
|
||||
Assert(str);
|
||||
llvm::ConstantInt *first_lnum =
|
||||
llvm::dyn_cast<llvm::ConstantInt>(first_line->getOperand(0));
|
||||
assert(first_lnum);
|
||||
Assert(first_lnum);
|
||||
llvm::ConstantInt *first_colnum =
|
||||
llvm::dyn_cast<llvm::ConstantInt>(first_column->getOperand(0));
|
||||
assert(first_column);
|
||||
Assert(first_column);
|
||||
llvm::ConstantInt *last_lnum =
|
||||
llvm::dyn_cast<llvm::ConstantInt>(last_line->getOperand(0));
|
||||
assert(last_lnum);
|
||||
Assert(last_lnum);
|
||||
llvm::ConstantInt *last_colnum =
|
||||
llvm::dyn_cast<llvm::ConstantInt>(last_column->getOperand(0));
|
||||
assert(last_column);
|
||||
Assert(last_column);
|
||||
|
||||
*pos = SourcePos(str->getString().data(), (int)first_lnum->getZExtValue(),
|
||||
(int)first_colnum->getZExtValue(), (int)last_lnum->getZExtValue(),
|
||||
@@ -250,7 +250,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createReassociatePass());
|
||||
optPM.add(llvm::createConstantPropagationPass());
|
||||
|
||||
if (!g->opt.disableMaskedStoreOptimizations) {
|
||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
optPM.add(CreateMaskedStoreOptPass());
|
||||
}
|
||||
@@ -287,7 +287,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createTailCallEliminationPass());
|
||||
|
||||
if (!g->opt.disableMaskedStoreOptimizations) {
|
||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
optPM.add(CreateMaskedStoreOptPass());
|
||||
}
|
||||
@@ -334,12 +334,16 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
builder.DisableUnrollLoops = true;
|
||||
builder.populateFunctionPassManager(funcPM);
|
||||
builder.populateModulePassManager(optPM);
|
||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||
|
||||
optPM.add(CreateIsCompileTimeConstantPass(false));
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
|
||||
builder.populateLTOPassManager(optPM, true /* internalize */,
|
||||
true /* inline once again */);
|
||||
|
||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
|
||||
builder.populateModulePassManager(optPM);
|
||||
#endif
|
||||
optPM.add(CreateMakeInternalFuncsStaticPass());
|
||||
@@ -436,7 +440,7 @@ IntrinsicsOpt::IntrinsicsOpt()
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::Function *avxMovmsk =
|
||||
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_movmsk_ps_256);
|
||||
assert(avxMovmsk != NULL);
|
||||
Assert(avxMovmsk != NULL);
|
||||
maskInstructions.push_back(avxMovmsk);
|
||||
#endif
|
||||
|
||||
@@ -482,7 +486,7 @@ lGetMask(llvm::Value *factor) {
|
||||
else {
|
||||
// Otherwise get it as an int
|
||||
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[i]);
|
||||
assert(ci != NULL); // vs return -1 if NULL?
|
||||
Assert(ci != NULL); // vs return -1 if NULL?
|
||||
intMaskValue = ci->getValue();
|
||||
}
|
||||
// Is the high-bit set? If so, OR in the appropriate bit in
|
||||
@@ -505,7 +509,7 @@ lGetMask(llvm::Value *factor) {
|
||||
factor = c;
|
||||
}
|
||||
// else we should be able to handle it above...
|
||||
assert(!llvm::isa<llvm::Constant>(factor));
|
||||
Assert(!llvm::isa<llvm::Constant>(factor));
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
@@ -545,8 +549,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskstore_ps_256);
|
||||
llvm::Function *avxMaskedStore64 =
|
||||
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskstore_pd_256);
|
||||
assert(avxMaskedLoad32 != NULL && avxMaskedStore32 != NULL);
|
||||
assert(avxMaskedLoad64 != NULL && avxMaskedStore64 != NULL);
|
||||
Assert(avxMaskedLoad32 != NULL && avxMaskedStore32 != NULL);
|
||||
Assert(avxMaskedLoad64 != NULL && avxMaskedStore64 != NULL);
|
||||
#endif
|
||||
|
||||
bool modifiedAny = false;
|
||||
@@ -627,7 +631,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
if (mask == 0) {
|
||||
// nothing being loaded, replace with undef value
|
||||
llvm::Type *returnType = callInst->getType();
|
||||
assert(llvm::isa<llvm::VectorType>(returnType));
|
||||
Assert(llvm::isa<llvm::VectorType>(returnType));
|
||||
llvm::Value *undefValue = llvm::UndefValue::get(returnType);
|
||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||
iter, undefValue);
|
||||
@@ -637,7 +641,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
else if (mask == 0xff) {
|
||||
// all lanes active; replace with a regular load
|
||||
llvm::Type *returnType = callInst->getType();
|
||||
assert(llvm::isa<llvm::VectorType>(returnType));
|
||||
Assert(llvm::isa<llvm::VectorType>(returnType));
|
||||
// cast the i8 * to the appropriate type
|
||||
llvm::Value *castPtr =
|
||||
new llvm::BitCastInst(callInst->getArgOperand(0),
|
||||
@@ -751,7 +755,7 @@ llvm::RegisterPass<GatherScatterFlattenOpt> gsf("gs-flatten", "Gather/Scatter Fl
|
||||
static int64_t
|
||||
lGetIntValue(llvm::Value *offset) {
|
||||
llvm::ConstantInt *intOffset = llvm::dyn_cast<llvm::ConstantInt>(offset);
|
||||
assert(intOffset && (intOffset->getBitWidth() == 32 ||
|
||||
Assert(intOffset && (intOffset->getBitWidth() == 32 ||
|
||||
intOffset->getBitWidth() == 64));
|
||||
return intOffset->getSExtValue();
|
||||
}
|
||||
@@ -776,15 +780,15 @@ lFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
||||
|
||||
while (ie != NULL) {
|
||||
int64_t iOffset = lGetIntValue(ie->getOperand(2));
|
||||
assert(iOffset >= 0 && iOffset < vectorWidth);
|
||||
assert(elements[iOffset] == NULL);
|
||||
Assert(iOffset >= 0 && iOffset < vectorWidth);
|
||||
Assert(elements[iOffset] == NULL);
|
||||
|
||||
elements[iOffset] = ie->getOperand(1);
|
||||
|
||||
llvm::Value *insertBase = ie->getOperand(0);
|
||||
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
||||
if (ie == NULL)
|
||||
assert(llvm::isa<llvm::UndefValue>(insertBase));
|
||||
Assert(llvm::isa<llvm::UndefValue>(insertBase));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -950,7 +954,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets) {
|
||||
if (elementBase == NULL)
|
||||
return NULL;
|
||||
|
||||
assert(delta[i] != NULL);
|
||||
Assert(delta[i] != NULL);
|
||||
if (base == NULL)
|
||||
// The first time we've found a base pointer
|
||||
base = elementBase;
|
||||
@@ -960,10 +964,14 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(base != NULL);
|
||||
Assert(base != NULL);
|
||||
#ifdef LLVM_2_9
|
||||
*offsets = llvm::ConstantVector::get(delta);
|
||||
#else
|
||||
llvm::ArrayRef<llvm::Constant *> deltas(&delta[0],
|
||||
&delta[elements.size()]);
|
||||
*offsets = llvm::ConstantVector::get(deltas);
|
||||
#endif
|
||||
return base;
|
||||
}
|
||||
|
||||
@@ -1023,7 +1031,7 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
};
|
||||
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||
for (int i = 0; i < numGSFuncs; ++i)
|
||||
assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL &&
|
||||
Assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL &&
|
||||
gsFuncs[i].baseOffsets32Func != NULL);
|
||||
|
||||
bool modifiedAny = false;
|
||||
@@ -1169,7 +1177,7 @@ struct MSInfo {
|
||||
MSInfo(const char *name, const int a)
|
||||
: align(a) {
|
||||
func = m->module->getFunction(name);
|
||||
assert(func != NULL);
|
||||
Assert(func != NULL);
|
||||
}
|
||||
llvm::Function *func;
|
||||
const int align;
|
||||
@@ -1313,7 +1321,7 @@ struct LMSInfo {
|
||||
pseudoFunc = m->module->getFunction(pname);
|
||||
blendFunc = m->module->getFunction(bname);
|
||||
maskedStoreFunc = m->module->getFunction(msname);
|
||||
assert(pseudoFunc != NULL && blendFunc != NULL &&
|
||||
Assert(pseudoFunc != NULL && blendFunc != NULL &&
|
||||
maskedStoreFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
@@ -1447,7 +1455,7 @@ lValuesAreEqual(llvm::Value *v0, llvm::Value *v1,
|
||||
if (v0 == v1)
|
||||
return true;
|
||||
|
||||
assert(seenPhi0.size() == seenPhi1.size());
|
||||
Assert(seenPhi0.size() == seenPhi1.size());
|
||||
for (unsigned int i = 0; i < seenPhi0.size(); ++i)
|
||||
if (v0 == seenPhi0[i] && v1 == seenPhi1[i])
|
||||
return true;
|
||||
@@ -1477,7 +1485,7 @@ lValuesAreEqual(llvm::Value *v0, llvm::Value *v1,
|
||||
// then we're good.
|
||||
bool anyFailure = false;
|
||||
for (unsigned int i = 0; i < numIncoming; ++i) {
|
||||
assert(phi0->getIncomingBlock(i) == phi1->getIncomingBlock(i));
|
||||
Assert(phi0->getIncomingBlock(i) == phi1->getIncomingBlock(i));
|
||||
if (!lValuesAreEqual(phi0->getIncomingValue(i),
|
||||
phi1->getIncomingValue(i), seenPhi0, seenPhi1)) {
|
||||
anyFailure = true;
|
||||
@@ -1531,7 +1539,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
// probably to just ignore undef elements and return true if
|
||||
// all of the other ones are equal, but it'd be nice to have
|
||||
// some test cases to verify this.
|
||||
assert(elements[i] != NULL && elements[i+1] != NULL);
|
||||
Assert(elements[i] != NULL && elements[i+1] != NULL);
|
||||
|
||||
std::vector<llvm::PHINode *> seenPhi0;
|
||||
std::vector<llvm::PHINode *> seenPhi1;
|
||||
@@ -1565,7 +1573,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(!llvm::isa<llvm::Constant>(v));
|
||||
Assert(!llvm::isa<llvm::Constant>(v));
|
||||
|
||||
if (llvm::isa<llvm::CallInst>(v) || llvm::isa<llvm::LoadInst>(v) ||
|
||||
!llvm::isa<llvm::Instruction>(v))
|
||||
@@ -1610,7 +1618,7 @@ lVectorIsLinearConstantInts(llvm::ConstantVector *cv, int vectorLength,
|
||||
// Flatten the vector out into the elements array
|
||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||
cv->getVectorElements(elements);
|
||||
assert((int)elements.size() == vectorLength);
|
||||
Assert((int)elements.size() == vectorLength);
|
||||
|
||||
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[0]);
|
||||
if (ci == NULL)
|
||||
@@ -1665,7 +1673,8 @@ lCheckMulForLinear(llvm::Value *op0, llvm::Value *op1, int vectorLength,
|
||||
|
||||
// Check to see if the other operand is a linear vector with stride
|
||||
// given by stride/splatVal.
|
||||
return lVectorIsLinear(op1, vectorLength, stride / splatVal, seenPhis);
|
||||
return lVectorIsLinear(op1, vectorLength, (int)(stride / splatVal),
|
||||
seenPhis);
|
||||
}
|
||||
|
||||
|
||||
@@ -1784,7 +1793,7 @@ struct GatherImpInfo {
|
||||
loadBroadcastFunc = m->module->getFunction(lbName);
|
||||
loadMaskedFunc = m->module->getFunction(lmName);
|
||||
|
||||
assert(pseudoFunc != NULL && loadBroadcastFunc != NULL &&
|
||||
Assert(pseudoFunc != NULL && loadBroadcastFunc != NULL &&
|
||||
loadMaskedFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
@@ -1801,7 +1810,7 @@ struct ScatterImpInfo {
|
||||
pseudoFunc = m->module->getFunction(pName);
|
||||
maskedStoreFunc = m->module->getFunction(msName);
|
||||
vecPtrType = vpt;
|
||||
assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
|
||||
Assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
llvm::Function *maskedStoreFunc;
|
||||
@@ -1880,7 +1889,7 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
|
||||
SourcePos pos;
|
||||
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
||||
assert(ok);
|
||||
Assert(ok);
|
||||
|
||||
llvm::Value *base = callInst->getArgOperand(0);
|
||||
llvm::Value *offsets = callInst->getArgOperand(1);
|
||||
@@ -2058,7 +2067,7 @@ struct LowerGSInfo {
|
||||
: isGather(ig) {
|
||||
pseudoFunc = m->module->getFunction(pName);
|
||||
actualFunc = m->module->getFunction(aName);
|
||||
assert(pseudoFunc != NULL && actualFunc != NULL);
|
||||
Assert(pseudoFunc != NULL && actualFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
llvm::Function *actualFunc;
|
||||
@@ -2135,7 +2144,7 @@ LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// instruction so that we can issue PerformanceWarning()s below.
|
||||
SourcePos pos;
|
||||
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
||||
assert(ok);
|
||||
Assert(ok);
|
||||
|
||||
callInst->setCalledFunction(info->actualFunc);
|
||||
if (info->isGather)
|
||||
@@ -2217,9 +2226,11 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// not a __is_compile_time_constant_* function
|
||||
continue;
|
||||
|
||||
// This optimization pass can be disabled with the (poorly named)
|
||||
// disableGatherScatterFlattening option.
|
||||
if (g->opt.disableGatherScatterFlattening) {
|
||||
// This optimization pass can be disabled with both the (poorly
|
||||
// named) disableGatherScatterFlattening option and
|
||||
// disableMaskAllOnOptimizations.
|
||||
if (g->opt.disableGatherScatterFlattening ||
|
||||
g->opt.disableMaskAllOnOptimizations) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMFalse);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
|
||||
97
parse.yy
97
parse.yy
@@ -134,9 +134,8 @@ struct ForeachDimension {
|
||||
%}
|
||||
|
||||
%union {
|
||||
int32_t int32Val;
|
||||
double floatVal;
|
||||
int64_t int64Val;
|
||||
int64_t intVal;
|
||||
float floatVal;
|
||||
std::string *stringVal;
|
||||
const char *constCharPtr;
|
||||
|
||||
@@ -226,7 +225,7 @@ struct ForeachDimension {
|
||||
|
||||
%type <stringVal> string_constant
|
||||
%type <constCharPtr> struct_or_union_name enum_identifier
|
||||
%type <int32Val> int_constant soa_width_specifier
|
||||
%type <intVal> int_constant soa_width_specifier
|
||||
|
||||
%type <foreachDimension> foreach_dimension_specifier
|
||||
%type <foreachDimensionList> foreach_dimension_list
|
||||
@@ -259,16 +258,16 @@ primary_expression
|
||||
}
|
||||
}
|
||||
| TOKEN_INT32_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt32, yylval.int32Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt32, (int32_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_UINT32_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt32, (uint32_t)yylval.int32Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt32, (uint32_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_INT64_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt64, yylval.int64Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt64, (int64_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_UINT64_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt64, (uint64_t)yylval.int64Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt64, (uint64_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_FLOAT_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstFloat, (float)yylval.floatVal, @1);
|
||||
@@ -328,7 +327,7 @@ argument_expression_list
|
||||
| argument_expression_list ',' assignment_expression
|
||||
{
|
||||
ExprList *argList = dynamic_cast<ExprList *>($1);
|
||||
assert(argList != NULL);
|
||||
Assert(argList != NULL);
|
||||
argList->exprs.push_back($3);
|
||||
argList->pos = Union(argList->pos, @3);
|
||||
$$ = argList;
|
||||
@@ -545,7 +544,7 @@ declaration_specifiers
|
||||
| soa_width_specifier
|
||||
{
|
||||
DeclSpecs *ds = new DeclSpecs;
|
||||
ds->soaWidth = $1;
|
||||
ds->soaWidth = (int32_t)$1;
|
||||
$$ = ds;
|
||||
}
|
||||
| soa_width_specifier declaration_specifiers
|
||||
@@ -555,7 +554,7 @@ declaration_specifiers
|
||||
if (ds->soaWidth != 0)
|
||||
Error(@1, "soa<> qualifier supplied multiple times in declaration.");
|
||||
else
|
||||
ds->soaWidth = $1;
|
||||
ds->soaWidth = (int32_t)$1;
|
||||
}
|
||||
$$ = ds;
|
||||
}
|
||||
@@ -566,7 +565,7 @@ declaration_specifiers
|
||||
| type_specifier '<' int_constant '>'
|
||||
{
|
||||
DeclSpecs *ds = new DeclSpecs($1);
|
||||
ds->vectorSize = $3;
|
||||
ds->vectorSize = (int32_t)$3;
|
||||
$$ = ds;
|
||||
}
|
||||
| type_specifier declaration_specifiers
|
||||
@@ -630,7 +629,7 @@ type_specifier
|
||||
: atomic_var_type_specifier { $$ = $1; }
|
||||
| TOKEN_TYPE_NAME
|
||||
{ const Type *t = m->symbolTable->LookupType(yytext);
|
||||
assert(t != NULL);
|
||||
Assert(t != NULL);
|
||||
$$ = t;
|
||||
}
|
||||
| struct_or_union_specifier { $$ = $1; }
|
||||
@@ -652,7 +651,7 @@ short_vec_specifier
|
||||
: atomic_var_type_specifier '<' int_constant '>'
|
||||
{
|
||||
Type* vt =
|
||||
new VectorType($1, $3);
|
||||
new VectorType($1, (int32_t)$3);
|
||||
$$ = vt;
|
||||
}
|
||||
;
|
||||
@@ -930,7 +929,7 @@ declarator
|
||||
;
|
||||
|
||||
int_constant
|
||||
: TOKEN_INT32_CONSTANT { $$ = yylval.int32Val; }
|
||||
: TOKEN_INT32_CONSTANT { $$ = yylval.intVal; }
|
||||
;
|
||||
|
||||
direct_declarator
|
||||
@@ -948,10 +947,16 @@ direct_declarator
|
||||
{
|
||||
int size;
|
||||
if ($1 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
if (size < 0) {
|
||||
Error(@3, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
@@ -1142,10 +1147,16 @@ direct_abstract_declarator
|
||||
| '[' constant_expression ']'
|
||||
{
|
||||
int size;
|
||||
if (lGetConstantInt($2, &size, @2, "Array dimension")) {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
|
||||
d->arraySize = size;
|
||||
$$ = d;
|
||||
if ($2 != NULL && lGetConstantInt($2, &size, @2, "Array dimension")) {
|
||||
if (size < 0) {
|
||||
Error(@2, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
|
||||
d->arraySize = size;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
@@ -1160,11 +1171,17 @@ direct_abstract_declarator
|
||||
| direct_abstract_declarator '[' constant_expression ']'
|
||||
{
|
||||
int size;
|
||||
if (lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
if ($3 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
if (size < 0) {
|
||||
Error(@3, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
@@ -1206,7 +1223,7 @@ initializer_list
|
||||
$$ = NULL;
|
||||
else {
|
||||
ExprList *exprList = dynamic_cast<ExprList *>($1);
|
||||
assert(exprList);
|
||||
Assert(exprList);
|
||||
exprList->exprs.push_back($3);
|
||||
exprList->pos = Union(exprList->pos, @3);
|
||||
$$ = exprList;
|
||||
@@ -1537,7 +1554,7 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(t);
|
||||
if (ft != NULL) {
|
||||
Symbol *funSym = decl->GetSymbol();
|
||||
assert(funSym != NULL);
|
||||
Assert(funSym != NULL);
|
||||
funSym->type = ft;
|
||||
funSym->storageClass = ds->storageClass;
|
||||
|
||||
@@ -1561,19 +1578,21 @@ lAddFunctionParams(Declarator *decl) {
|
||||
// walk down to the declarator for the function itself
|
||||
while (decl->kind != DK_FUNCTION && decl->child != NULL)
|
||||
decl = decl->child;
|
||||
assert(decl->kind == DK_FUNCTION);
|
||||
Assert(decl->kind == DK_FUNCTION);
|
||||
|
||||
// now loop over its parameters and add them to the symbol table
|
||||
for (unsigned int i = 0; i < decl->functionParams.size(); ++i) {
|
||||
Declaration *pdecl = decl->functionParams[i];
|
||||
if (pdecl == NULL)
|
||||
if (pdecl == NULL || pdecl->declarators.size() == 0)
|
||||
// zero size declarators array corresponds to an anonymous
|
||||
// parameter
|
||||
continue;
|
||||
assert(pdecl->declarators.size() == 1);
|
||||
Assert(pdecl->declarators.size() == 1);
|
||||
Symbol *sym = pdecl->declarators[0]->GetSymbol();
|
||||
#ifndef NDEBUG
|
||||
bool ok = m->symbolTable->AddVariable(sym);
|
||||
if (ok == false)
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
#else
|
||||
m->symbolTable->AddVariable(sym);
|
||||
#endif
|
||||
@@ -1640,7 +1659,7 @@ lGetStorageClassString(StorageClass sc) {
|
||||
case SC_EXTERN_C:
|
||||
return "extern \"C\"";
|
||||
default:
|
||||
assert(!"logic error in lGetStorageClassString()");
|
||||
Assert(!"logic error in lGetStorageClassString()");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
@@ -1673,6 +1692,10 @@ lGetConstantInt(Expr *expr, int *value, SourcePos pos, const char *usage) {
|
||||
Error(pos, "%s must be a compile-time integer constant.", usage);
|
||||
return false;
|
||||
}
|
||||
if ((int64_t)((int32_t)ci->getSExtValue()) != ci->getSExtValue()) {
|
||||
Error(pos, "%s must be representable with a 32-bit integer.", usage);
|
||||
return false;
|
||||
}
|
||||
*value = (int)ci->getZExtValue();
|
||||
return true;
|
||||
}
|
||||
@@ -1720,7 +1743,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
if (enums[i]->constValue != NULL) {
|
||||
/* Already has a value, so first update nextVal with it. */
|
||||
int count = enums[i]->constValue->AsUInt32(&nextVal);
|
||||
assert(count == 1);
|
||||
Assert(count == 1);
|
||||
++nextVal;
|
||||
|
||||
/* When the source file as being parsed, the ConstExpr for any
|
||||
@@ -1733,7 +1756,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
false, enums[i]->pos);
|
||||
castExpr = castExpr->Optimize();
|
||||
enums[i]->constValue = dynamic_cast<ConstExpr *>(castExpr);
|
||||
assert(enums[i]->constValue != NULL);
|
||||
Assert(enums[i]->constValue != NULL);
|
||||
}
|
||||
else {
|
||||
enums[i]->constValue = new ConstExpr(enumType, nextVal++,
|
||||
|
||||
303
run_tests.py
303
run_tests.py
@@ -2,9 +2,6 @@
|
||||
|
||||
# test-running driver for ispc
|
||||
|
||||
# TODO: windows support (mostly should be calling CL.exe rather than gcc
|
||||
# for static linking?)
|
||||
|
||||
from optparse import OptionParser
|
||||
import multiprocessing
|
||||
from ctypes import c_int
|
||||
@@ -23,9 +20,6 @@ import platform
|
||||
parser = OptionParser()
|
||||
parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order tests",
|
||||
default=False, action="store_true")
|
||||
parser.add_option("-s", "--static-exe", dest="static_exe",
|
||||
help="Create and run a regular executable for each test (rather than using the LLVM JIT).",
|
||||
default=False, action="store_true")
|
||||
parser.add_option('-t', '--target', dest='target',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2)',
|
||||
default="sse4")
|
||||
@@ -52,7 +46,6 @@ if (options.random):
|
||||
|
||||
# counter
|
||||
total_tests = 0
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
|
||||
# We'd like to use the Lock class from the multiprocessing package to
|
||||
# serialize accesses to finished_tests_counter. Unfortunately, the version of
|
||||
@@ -60,7 +53,10 @@ finished_tests_counter = multiprocessing.Value(c_int)
|
||||
# http://bugs.python.org/issue5261. Therefore, we use the (deprecated but
|
||||
# still available) mutex class.
|
||||
#finished_tests_counter_lock = multiprocessing.Lock()
|
||||
finished_tests_mutex = mutex.mutex()
|
||||
if not (platform.system() == 'Windows' or
|
||||
'CYGWIN_NT' in platform.system()):
|
||||
finished_tests_mutex = mutex.mutex()
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
|
||||
# utility routine to print an update on the number of tests that have been
|
||||
# finished. Should be called with the mutex (or lock) held..
|
||||
@@ -79,21 +75,127 @@ fnull = open(os.devnull, 'w')
|
||||
|
||||
# run the commands in cmd_list
|
||||
def run_cmds(cmd_list, filename, expect_failure):
|
||||
output = ""
|
||||
for cmd in cmd_list:
|
||||
if expect_failure:
|
||||
failed = (subprocess.call(cmd, shell = True, stdout = fnull, stderr = fnull) != 0)
|
||||
else:
|
||||
failed = (os.system(cmd) != 0)
|
||||
sp = subprocess.Popen(shlex.split(cmd), stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
out = sp.communicate()
|
||||
output += out[0]
|
||||
output += out[1]
|
||||
failed = (sp.returncode != 0)
|
||||
if failed:
|
||||
break
|
||||
|
||||
surprise = ((expect_failure and not failed) or (not expect_failure and failed))
|
||||
surprise = ((expect_failure and not failed) or
|
||||
(not expect_failure and failed))
|
||||
if surprise == True:
|
||||
print "Test %s %s " % \
|
||||
(filename, "unexpectedly passed" if expect_failure else "failed")
|
||||
print "Test %s %s (return code %d) " % \
|
||||
(filename, "unexpectedly passed" if expect_failure else "failed",
|
||||
sp.returncode)
|
||||
if output != "":
|
||||
print "%s" % output
|
||||
return surprise
|
||||
|
||||
|
||||
def run_test(filename):
|
||||
# is this a test to make sure an error is issued?
|
||||
error_count = 0
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
if want_error == True:
|
||||
ispc_cmd = "ispc --werror --nowrap %s --arch=%s --target=%s" % \
|
||||
(filename, options.arch, options.target)
|
||||
sp = subprocess.Popen(shlex.split(ispc_cmd), stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
out = sp.communicate()
|
||||
output = ""
|
||||
output += out[0]
|
||||
output += out[1]
|
||||
got_error = (sp.returncode != 0)
|
||||
|
||||
# figure out the error message we're expecting
|
||||
file = open(filename, 'r')
|
||||
firstline = file.readline()
|
||||
firstline = string.replace(firstline, "//", "")
|
||||
firstline = string.lstrip(firstline)
|
||||
firstline = string.rstrip(firstline)
|
||||
file.close()
|
||||
|
||||
if (output.find(firstline) == -1):
|
||||
print "OUT %s" % filename
|
||||
print "Didnt see expected error message %s from test %s.\nActual output:\n%s" % \
|
||||
(firstline, filename, output)
|
||||
error_count += 1
|
||||
elif got_error == False:
|
||||
print "Unexpectedly no errors issued from test %s" % filename
|
||||
error_count += 1
|
||||
else:
|
||||
# do we expect this test to fail?
|
||||
should_fail = (filename.find("failing_") != -1)
|
||||
|
||||
# We need to figure out the signature of the test
|
||||
# function that this test has.
|
||||
sig2def = { "f_v(" : 0, "f_f(" : 1, "f_fu(" : 2, "f_fi(" : 3,
|
||||
"f_du(" : 4, "f_duf(" : 5, "f_di(" : 6 }
|
||||
file = open(filename, 'r')
|
||||
match = -1
|
||||
for line in file:
|
||||
# look for lines with 'export'...
|
||||
if line.find("export") == -1:
|
||||
continue
|
||||
# one of them should have a function with one of the
|
||||
# declarations in sig2def
|
||||
for pattern, ident in sig2def.items():
|
||||
if line.find(pattern) != -1:
|
||||
match = ident
|
||||
break
|
||||
file.close()
|
||||
if match == -1:
|
||||
print "Fatal error: unable to find function signature " + \
|
||||
"in test %s" % filename
|
||||
error_count += 1
|
||||
else:
|
||||
if (platform.system() == 'Windows' or
|
||||
'CYGWIN_NT' in platform.system()):
|
||||
obj_name = "%s.obj" % filename
|
||||
exe_name = "%s.exe" % filename
|
||||
cc_cmd = "cl /nologo test_static.cpp /DTEST_SIG=%d %s.obj /Fe%s" % \
|
||||
(match, filename, exe_name)
|
||||
if should_fail:
|
||||
cc_cmd += " /DEXPECT_FAILURE"
|
||||
else:
|
||||
obj_name = "%s.o" % filename
|
||||
exe_name = "%s.run" % filename
|
||||
if options.arch == 'x86':
|
||||
gcc_arch = '-m32'
|
||||
else:
|
||||
gcc_arch = '-m64'
|
||||
cc_cmd = "g++ %s test_static.cpp -DTEST_SIG=%d %s.o -o %s" % \
|
||||
(gcc_arch, match, filename, exe_name)
|
||||
if platform.system() == 'Darwin':
|
||||
cc_cmd += ' -Wl,-no_pie'
|
||||
if should_fail:
|
||||
cc_cmd += " -DEXPECT_FAILURE"
|
||||
|
||||
ispc_cmd = "ispc --woff %s -o %s --arch=%s --target=%s" % \
|
||||
(filename, obj_name, options.arch, options.target)
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
error_count += run_cmds([ispc_cmd, cc_cmd, exe_name], \
|
||||
filename, should_fail)
|
||||
|
||||
# clean up after running the test
|
||||
try:
|
||||
os.unlink(exe_name)
|
||||
os.unlink(obj_name)
|
||||
except:
|
||||
None
|
||||
|
||||
return error_count
|
||||
|
||||
# pull tests to run from the given queue and run them. Multiple copies of
|
||||
# this function will be running in parallel across all of the CPU cores of
|
||||
# the system.
|
||||
@@ -104,100 +206,7 @@ def run_tasks_from_queue(queue):
|
||||
if (filename == 'STOP'):
|
||||
sys.exit(error_count)
|
||||
|
||||
# is this a test to make sure an error is issued?
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
if want_error == True:
|
||||
ispc_cmd = "ispc --werror --nowrap %s --arch=%s --target=%s" % \
|
||||
(filename, options.arch, options.target)
|
||||
sp = subprocess.Popen(shlex.split(ispc_cmd), stdin=None, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
output = sp.communicate()[1]
|
||||
got_error = (sp.returncode != 0)
|
||||
|
||||
# figure out the error message we're expecting
|
||||
file = open(filename, 'r')
|
||||
firstline = file.readline()
|
||||
firstline = string.replace(firstline, "//", "")
|
||||
firstline = string.lstrip(firstline)
|
||||
firstline = string.rstrip(firstline)
|
||||
file.close()
|
||||
|
||||
if (output.find(firstline) == -1):
|
||||
print "Didn't see expected error message \"%s\" from test %s.\nActual outout: %s" % \
|
||||
(firstline, filename, output)
|
||||
error_count += 1
|
||||
elif got_error == False:
|
||||
print "Unexpectedly no errors issued from test %s" % filename
|
||||
error_count += 1
|
||||
continue
|
||||
|
||||
# do we expect this test to fail?
|
||||
should_fail = (filename.find("failing_") != -1)
|
||||
|
||||
if options.static_exe == True:
|
||||
# if the user wants us to build a static executable to run for
|
||||
# this test, we need to figure out the signature of the test
|
||||
# function that this test has.
|
||||
sig2def = { "f_v(" : 0, "f_f(" : 1, "f_fu(" : 2, "f_fi(" : 3,
|
||||
"f_du(" : 4, "f_duf(" : 5, "f_di(" : 6 }
|
||||
file = open(filename, 'r')
|
||||
match = -1
|
||||
for line in file:
|
||||
# look for lines with 'export'...
|
||||
if line.find("export") == -1:
|
||||
continue
|
||||
# one of them should have a function with one of the
|
||||
# declarations in sig2def
|
||||
for pattern, ident in sig2def.items():
|
||||
if line.find(pattern) != -1:
|
||||
match = ident
|
||||
break
|
||||
file.close()
|
||||
if match == -1:
|
||||
print "Fatal error: unable to find function signature in test %s" % filename
|
||||
error_count += 1
|
||||
else:
|
||||
obj_name = "%s.o" % filename
|
||||
exe_name = "%s.run" % filename
|
||||
ispc_cmd = "ispc --woff %s -o %s --arch=%s --target=%s" % \
|
||||
(filename, obj_name, options.arch, options.target)
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
if options.arch == 'x86':
|
||||
gcc_arch = '-m32'
|
||||
else:
|
||||
gcc_arch = '-m64'
|
||||
gcc_cmd = "g++ %s test_static.cpp -DTEST_SIG=%d %s.o -o %s" % \
|
||||
(gcc_arch, match, filename, exe_name)
|
||||
if platform.system() == 'Darwin':
|
||||
gcc_cmd += ' -Wl,-no_pie'
|
||||
if should_fail:
|
||||
gcc_cmd += " -DEXPECT_FAILURE"
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
error_count += run_cmds([ispc_cmd, gcc_cmd, exe_name], filename, should_fail)
|
||||
|
||||
# clean up after running the test
|
||||
try:
|
||||
os.unlink(exe_name)
|
||||
os.unlink(obj_name)
|
||||
except:
|
||||
None
|
||||
else:
|
||||
# otherwise we'll use ispc_test + the LLVM JIT to run the test
|
||||
bitcode_file = "%s.bc" % filename
|
||||
compile_cmd = "ispc --woff --emit-llvm %s --target=%s -o %s" % \
|
||||
(filename, options.target, bitcode_file)
|
||||
if options.no_opt:
|
||||
compile_cmd += " -O0"
|
||||
test_cmd = "ispc_test %s" % bitcode_file
|
||||
|
||||
error_count += run_cmds([compile_cmd, test_cmd], filename, should_fail)
|
||||
|
||||
try:
|
||||
os.unlink(bitcode_file)
|
||||
except:
|
||||
None
|
||||
error_count += run_test(filename)
|
||||
|
||||
# If not for http://bugs.python.org/issue5261 on OSX, we'd like to do this:
|
||||
#with finished_tests_counter_lock:
|
||||
@@ -214,34 +223,58 @@ def sigint(signum, frame):
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
nthreads = multiprocessing.cpu_count()
|
||||
total_tests = len(files)
|
||||
print "Found %d CPUs. Running %d tests." % (nthreads, total_tests)
|
||||
|
||||
# put each of the test filenames into a queue
|
||||
q = multiprocessing.Queue()
|
||||
for fn in files:
|
||||
q.put(fn)
|
||||
for x in range(nthreads):
|
||||
q.put('STOP')
|
||||
|
||||
# need to catch sigint so that we can terminate all of the tasks if
|
||||
# we're interrupted
|
||||
signal.signal(signal.SIGINT, sigint)
|
||||
|
||||
# launch jobs to run tests
|
||||
for x in range(nthreads):
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q,))
|
||||
task_threads.append(t)
|
||||
t.start()
|
||||
|
||||
# wait for them to all finish and then return the number that failed
|
||||
# (i.e. return 0 if all is ok)
|
||||
error_count = 0
|
||||
for t in task_threads:
|
||||
t.join()
|
||||
error_count += t.exitcode
|
||||
print
|
||||
|
||||
if (platform.system() == 'Windows' or
|
||||
'CYGWIN_NT' in platform.system()):
|
||||
# cl.exe gets itself all confused if we have multiple instances of
|
||||
# it running concurrently and operating on the same .cpp file
|
||||
# (test_static.cpp), even if we are generating a differently-named
|
||||
# exe in the end. So run serially. :-(
|
||||
nthreads = 1
|
||||
num_done = 0
|
||||
print "Running %d tests." % (total_tests)
|
||||
for fn in files:
|
||||
error_count += run_test(fn)
|
||||
|
||||
num_done += 1
|
||||
progress_str = " Done %d / %d [%s]" % (num_done, total_tests, fn)
|
||||
# spaces to clear out detrius from previous printing...
|
||||
for x in range(30):
|
||||
progress_str += ' '
|
||||
progress_str += '\r'
|
||||
sys.stdout.write(progress_str)
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
nthreads = multiprocessing.cpu_count()
|
||||
print "Found %d CPUs. Running %d tests." % (nthreads, total_tests)
|
||||
|
||||
# put each of the test filenames into a queue
|
||||
q = multiprocessing.Queue()
|
||||
for fn in files:
|
||||
q.put(fn)
|
||||
for x in range(nthreads):
|
||||
q.put('STOP')
|
||||
|
||||
# need to catch sigint so that we can terminate all of the tasks if
|
||||
# we're interrupted
|
||||
signal.signal(signal.SIGINT, sigint)
|
||||
|
||||
# launch jobs to run tests
|
||||
for x in range(nthreads):
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q,))
|
||||
task_threads.append(t)
|
||||
t.start()
|
||||
|
||||
# wait for them to all finish and then return the number that failed
|
||||
# (i.e. return 0 if all is ok)
|
||||
error_count = 0
|
||||
for t in task_threads:
|
||||
t.join()
|
||||
error_count += t.exitcode
|
||||
print
|
||||
|
||||
if error_count > 0:
|
||||
print "%d / %d tests FAILED!" % (error_count, total_tests)
|
||||
sys.exit(error_count)
|
||||
|
||||
95
run_tests.sh
95
run_tests.sh
@@ -1,95 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
surprises=0
|
||||
verbose=false
|
||||
number=$(ls -1 tests/*.ispc|wc -l)
|
||||
counter=1
|
||||
target=sse4
|
||||
|
||||
while getopts ":vt:h" opt;do
|
||||
case $opt in
|
||||
v) verbose=true
|
||||
;;
|
||||
t) target=$OPTARG
|
||||
;;
|
||||
h) cat <<EOF
|
||||
usage: run_tests.sh [-v] [-t target] [filenames]
|
||||
-v # verbose output
|
||||
-t # specify compilation target (SSE4 is the default).
|
||||
[filenames] # (optional) files to run through testing infrastructure
|
||||
# if none are provided, all in tests/ will be run.
|
||||
EOF
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
ISPC_ARCH=x86-64
|
||||
if [[ $OS == "Windows_NT" ]]; then
|
||||
ISPC_ARCH=x86
|
||||
fi
|
||||
ISPC_ARGS="--target=$target --arch=$ISPC_ARCH -O2 --woff"
|
||||
|
||||
shift $(( $OPTIND - 1 ))
|
||||
if [[ "$1" > 0 ]]; then
|
||||
while [[ "$1" > 0 ]]; do
|
||||
i=$1
|
||||
shift
|
||||
echo Running test $i
|
||||
|
||||
bc=${i%%ispc}bc
|
||||
ispc $ISPC_ARGS $i -o $bc --emit-llvm
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm -f $bc
|
||||
done
|
||||
else
|
||||
echo Running all correctness tests
|
||||
|
||||
for i in tests/*.ispc; do
|
||||
if $verbose; then
|
||||
echo -en "Running test $counter of $number.\r"
|
||||
fi
|
||||
(( counter++ ))
|
||||
bc=${i%%ispc}bc
|
||||
ispc $ISPC_ARGS $i -o $bc --emit-llvm
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm -f $bc
|
||||
done
|
||||
|
||||
echo -e "\nRunning failing tests"
|
||||
for i in failing_tests/*.ispc; do
|
||||
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
|
||||
if [[ $? == 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i UNEXPECTEDLY PASSED
|
||||
echo
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ $surprises == 0 ]]; then
|
||||
echo No surprises.
|
||||
fi
|
||||
|
||||
exit $surprises
|
||||
82
stmt.cpp
82
stmt.cpp
@@ -135,7 +135,7 @@ lPossiblyResolveFunctionOverloads(Expr *expr, const Type *type) {
|
||||
for (int i = 0; i < funcType->GetNumParameters(); ++i)
|
||||
paramTypes.push_back(funcType->GetParameterType(i));
|
||||
|
||||
if (fse->ResolveOverloads(paramTypes) == false)
|
||||
if (fse->ResolveOverloads(expr->pos, paramTypes) == false)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -287,7 +287,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
for (unsigned int i = 0; i < vars.size(); ++i) {
|
||||
Symbol *sym = vars[i].sym;
|
||||
assert(sym != NULL);
|
||||
Assert(sym != NULL);
|
||||
if (sym->type == NULL)
|
||||
continue;
|
||||
Expr *initExpr = vars[i].init;
|
||||
@@ -324,7 +324,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
|
||||
if (llvmType == NULL) {
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -491,7 +491,6 @@ IfStmt::IfStmt(Expr *t, Stmt *ts, Stmt *fs, bool checkCoherence, SourcePos p)
|
||||
: Stmt(p), test(t), trueStmts(ts), falseStmts(fs),
|
||||
doAllCheck(checkCoherence &&
|
||||
!g->opt.disableCoherentControlFlow) {
|
||||
// have to wait until after type checking to initialize doAnyCheck.
|
||||
}
|
||||
|
||||
|
||||
@@ -646,12 +645,12 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
|
||||
// under varying control flow,, returns can't stop instruction
|
||||
// emission, so this better be non-NULL...
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
if (falseStmts) {
|
||||
ctx->SetInternalMaskAndNot(oldMask, test);
|
||||
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -725,7 +724,7 @@ lSafeToRunWithAllLanesOff(Expr *expr) {
|
||||
|
||||
const SequentialType *seqType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
assert(seqType != NULL);
|
||||
Assert(seqType != NULL);
|
||||
int nElements = seqType->GetElementCount();
|
||||
if (nElements == 0)
|
||||
// Unsized array, so we can't be sure
|
||||
@@ -869,7 +868,9 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
||||
void
|
||||
IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||
if (ctx->GetFullMask() == LLVMMaskAllOn) {
|
||||
if (ctx->GetFullMask() == LLVMMaskAllOn &&
|
||||
!g->opt.disableCoherentControlFlow &&
|
||||
!g->opt.disableMaskAllOnOptimizations) {
|
||||
// We can tell that the mask is on statically at compile time; just
|
||||
// emit code for the 'if test with the mask all on' path
|
||||
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
|
||||
@@ -921,14 +922,15 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
//
|
||||
// where our use of blend for conditional assignments doesn't check
|
||||
// for the 'all lanes' off case.
|
||||
bool costIsAcceptable = ((trueStmts ? trueStmts->EstimateCost() : 0) +
|
||||
(falseStmts ? falseStmts->EstimateCost() : 0)) <
|
||||
PREDICATE_SAFE_IF_STATEMENT_COST;
|
||||
if (lSafeToRunWithAllLanesOff(trueStmts) &&
|
||||
lSafeToRunWithAllLanesOff(falseStmts) &&
|
||||
(((trueStmts ? trueStmts->EstimateCost() : 0) +
|
||||
(falseStmts ? falseStmts->EstimateCost() : 0)) <
|
||||
PREDICATE_SAFE_IF_STATEMENT_COST)) {
|
||||
(costIsAcceptable || g->opt.disableCoherentControlFlow)) {
|
||||
ctx->StartVaryingIf(oldMask);
|
||||
emitMaskedTrueAndFalse(ctx, oldMask, ltest);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->EndIf();
|
||||
}
|
||||
else {
|
||||
@@ -951,9 +953,12 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
// compiler see what's going on so that subsequent optimizations for
|
||||
// code emitted here can operate with the knowledge that the mask is
|
||||
// definitely all on (until it modifies the mask itself).
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
Assert(!g->opt.disableCoherentControlFlow);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
|
||||
// First, check the value of the test. If it's all on, then we jump to
|
||||
// a basic block that will only have code for the true case.
|
||||
@@ -998,7 +1003,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
emitMaskedTrueAndFalse(ctx, LLVMMaskAllOn, ltest);
|
||||
// In this case, return/break/continue isn't allowed to jump and end
|
||||
// emission.
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->EndIf();
|
||||
ctx->BranchInst(bDone);
|
||||
|
||||
@@ -1027,7 +1032,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
// Emit statements for true
|
||||
ctx->SetCurrentBasicBlock(bRunTrue);
|
||||
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->BranchInst(bNext);
|
||||
ctx->SetCurrentBasicBlock(bNext);
|
||||
}
|
||||
@@ -1044,7 +1049,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
// Emit code for false
|
||||
ctx->SetCurrentBasicBlock(bRunFalse);
|
||||
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->BranchInst(bNext);
|
||||
ctx->SetCurrentBasicBlock(bNext);
|
||||
}
|
||||
@@ -1155,12 +1160,14 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// IfStmt::emitCoherentTests()), and then emit the code for the
|
||||
// loop body.
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (bodyStmts)
|
||||
bodyStmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
ctx->BranchInst(btest);
|
||||
|
||||
@@ -1168,7 +1175,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetCurrentBasicBlock(bMixed);
|
||||
if (bodyStmts)
|
||||
bodyStmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->BranchInst(btest);
|
||||
}
|
||||
else {
|
||||
@@ -1321,7 +1328,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// it and then jump into the loop test code. (Also start a new scope
|
||||
// since the initiailizer may be a declaration statement).
|
||||
if (init) {
|
||||
assert(dynamic_cast<StmtList *>(init) == NULL);
|
||||
Assert(dynamic_cast<StmtList *>(init) == NULL);
|
||||
ctx->StartScope();
|
||||
init->EmitCode(ctx);
|
||||
}
|
||||
@@ -1349,7 +1356,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (uniformTest) {
|
||||
if (doCoherentCheck)
|
||||
Warning(pos, "Uniform condition supplied to cfor/cwhile statement.");
|
||||
assert(ltest->getType() == LLVMTypes::BoolType);
|
||||
Assert(ltest->getType() == LLVMTypes::BoolType);
|
||||
ctx->BranchInst(bloop, bexit, ltest);
|
||||
}
|
||||
else {
|
||||
@@ -1378,12 +1385,14 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// the runtime test has passed, make this fact clear for code
|
||||
// generation at compile time here.)
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (stmts)
|
||||
stmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
ctx->BranchInst(bstep);
|
||||
|
||||
@@ -1732,7 +1741,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->StartScope();
|
||||
|
||||
// This should be caught during typechecking
|
||||
assert(startExprs.size() == dimVariables.size() &&
|
||||
Assert(startExprs.size() == dimVariables.size() &&
|
||||
endExprs.size() == dimVariables.size());
|
||||
int nDims = (int)dimVariables.size();
|
||||
|
||||
@@ -1914,7 +1923,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
ctx->AddInstrumentationPoint("foreach loop body");
|
||||
stmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock() != NULL);
|
||||
Assert(ctx->GetCurrentBasicBlock() != NULL);
|
||||
ctx->BranchInst(bbStep[nDims-1]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -1944,12 +1953,12 @@ ForeachStmt::Optimize() {
|
||||
bool anyErrors = false;
|
||||
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i]->Optimize();
|
||||
startExprs[i] = startExprs[i]->Optimize();
|
||||
anyErrors |= (startExprs[i] == NULL);
|
||||
}
|
||||
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i]->Optimize();
|
||||
endExprs[i] = endExprs[i]->Optimize();
|
||||
anyErrors |= (endExprs[i] == NULL);
|
||||
}
|
||||
|
||||
@@ -1965,20 +1974,21 @@ Stmt *
|
||||
ForeachStmt::TypeCheck() {
|
||||
bool anyErrors = false;
|
||||
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||
// Typecheck first, to resolve function overloads
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i] = startExprs[i]->TypeCheck();
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i] = TypeConvertExpr(startExprs[i],
|
||||
AtomicType::UniformInt32,
|
||||
"foreach starting value");
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i]->TypeCheck();
|
||||
anyErrors |= (startExprs[i] == NULL);
|
||||
}
|
||||
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i] = endExprs[i]->TypeCheck();
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32,
|
||||
"foreach ending value");
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i]->TypeCheck();
|
||||
anyErrors |= (endExprs[i] == NULL);
|
||||
}
|
||||
|
||||
@@ -2341,7 +2351,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
// Now we can emit code to call __do_print()
|
||||
llvm::Function *printFunc = m->module->getFunction("__do_print");
|
||||
assert(printFunc);
|
||||
Assert(printFunc);
|
||||
|
||||
llvm::Value *mask = ctx->GetFullMask();
|
||||
// Set up the rest of the parameters to it
|
||||
@@ -2404,7 +2414,7 @@ AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
llvm::Function *assertFunc =
|
||||
isUniform ? m->module->getFunction("__do_assert_uniform") :
|
||||
m->module->getFunction("__do_assert_varying");
|
||||
assert(assertFunc != NULL);
|
||||
Assert(assertFunc != NULL);
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
char errorString[2048];
|
||||
|
||||
2
stmt.h
2
stmt.h
@@ -341,7 +341,7 @@ public:
|
||||
|
||||
Like print() above, since we don't have strings as first-class types in
|
||||
the language, we need to do some gymnastics to support it. Like
|
||||
assert() in C, assert checks the given condition and prints an error
|
||||
assert() in C, assert() checks the given condition and prints an error
|
||||
and calls abort if the condition fails. For varying conditions, the
|
||||
assert triggers if it's true for any of the program instances.
|
||||
*/
|
||||
|
||||
12
sym.cpp
12
sym.cpp
@@ -72,7 +72,7 @@ SymbolTable::SymbolTable() {
|
||||
|
||||
SymbolTable::~SymbolTable() {
|
||||
// Otherwise we have mismatched push/pop scopes
|
||||
assert(variables.size() == 1 && functions.size() == 1 &&
|
||||
Assert(variables.size() == 1 && functions.size() == 1 &&
|
||||
types.size() == 1);
|
||||
PopScope();
|
||||
}
|
||||
@@ -88,15 +88,15 @@ SymbolTable::PushScope() {
|
||||
|
||||
void
|
||||
SymbolTable::PopScope() {
|
||||
assert(variables.size() > 1);
|
||||
Assert(variables.size() > 1);
|
||||
delete variables.back();
|
||||
variables.pop_back();
|
||||
|
||||
assert(functions.size() > 1);
|
||||
Assert(functions.size() > 1);
|
||||
delete functions.back();
|
||||
functions.pop_back();
|
||||
|
||||
assert(types.size() > 1);
|
||||
Assert(types.size() > 1);
|
||||
delete types.back();
|
||||
types.pop_back();
|
||||
}
|
||||
@@ -104,7 +104,7 @@ SymbolTable::PopScope() {
|
||||
|
||||
bool
|
||||
SymbolTable::AddVariable(Symbol *symbol) {
|
||||
assert(symbol != NULL);
|
||||
Assert(symbol != NULL);
|
||||
|
||||
// Check to see if a symbol of the same name has already been declared.
|
||||
for (int i = (int)variables.size() - 1; i >= 0; --i) {
|
||||
@@ -154,7 +154,7 @@ SymbolTable::LookupVariable(const char *name) {
|
||||
bool
|
||||
SymbolTable::AddFunction(Symbol *symbol) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(symbol->type);
|
||||
assert(ft != NULL);
|
||||
Assert(ft != NULL);
|
||||
if (LookupFunction(symbol->name.c_str(), ft) != NULL)
|
||||
// A function of the same name and type has already been added to
|
||||
// the symbol table
|
||||
|
||||
@@ -39,6 +39,11 @@
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <windows.h>
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
18
tests/atomics-varyingptr-1.ispc
Normal file
18
tests/atomics-varyingptr-1.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
if (programIndex < 2)
|
||||
atomic_add_global(&s[programIndex], delta);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
RET[0] = RET[1] = 1;
|
||||
}
|
||||
16
tests/atomics-varyingptr-2.ispc
Normal file
16
tests/atomics-varyingptr-2.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
atomic_add_global(&s[programCount-1-programIndex], programIndex);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount-1-programIndex;
|
||||
}
|
||||
18
tests/atomics-varyingptr-3.ispc
Normal file
18
tests/atomics-varyingptr-3.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
s[i] = 1234;
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
a = atomic_max_global(&s[programIndex], programIndex);
|
||||
RET[programIndex] = a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234;
|
||||
}
|
||||
15
tests/atomics-varyingptr-4.ispc
Normal file
15
tests/atomics-varyingptr-4.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
s[i] = -1234;
|
||||
atomic_max_global(&s[programIndex], programIndex);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
31
tests/gather-struct-vector.ispc
Normal file
31
tests/gather-struct-vector.ispc
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
struct Ray {
|
||||
float<3> v;
|
||||
};
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
Ray r[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
r[i].v.x = 100*i + programIndex;
|
||||
r[i].v.y = 200*i + 2*programIndex;
|
||||
r[i].v.z = 300*i + 3*programIndex;
|
||||
}
|
||||
|
||||
Ray *rp = &r[programIndex/2];
|
||||
RET[programIndex] = rp->v.z;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int d0 = 0;
|
||||
uniform int d1 = 0;
|
||||
for (uniform int i = 0; i < programCount; i += 2) {
|
||||
RET[i] = d0+d1;
|
||||
d1 += 3;
|
||||
RET[i+1] = d0+d1;
|
||||
d0 += 300;
|
||||
d1 += 3;
|
||||
}
|
||||
}
|
||||
16
tests/ptr-int-null-1.ispc
Normal file
16
tests/ptr-int-null-1.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
bool foo(int *ptr) {
|
||||
return (ptr == NULL);
|
||||
}
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int a = 1;
|
||||
uniform int * uniform b = 0;
|
||||
RET[programIndex] = foo(0);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/ptr-int-null.ispc
Normal file
13
tests/ptr-int-null.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int a = 1;
|
||||
uniform int * uniform b = 0;
|
||||
RET[programIndex] = (b == NULL && b == 0 && 0 == b) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
15
tests/scatter-mask-1.ispc
Normal file
15
tests/scatter-mask-1.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform float a[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int index = aFOO[programIndex]-1;
|
||||
if (index & 1)
|
||||
a[index] = 1;
|
||||
RET[programIndex] = a[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programIndex & 1) ? 1 : 0;
|
||||
}
|
||||
19
tests/scatter-mask-2.ispc
Normal file
19
tests/scatter-mask-2.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform float a[programCount];
|
||||
|
||||
static void foo(int index) {
|
||||
a[index] = 1;
|
||||
}
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int index = aFOO[programIndex]-1;
|
||||
if (index & 1)
|
||||
foo(index);
|
||||
RET[programIndex] = a[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programIndex & 1) ? 1 : 0;
|
||||
}
|
||||
17
tests/vector-varying-scatter-2.ispc
Normal file
17
tests/vector-varying-scatter-2.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float aa = aFOO[programIndex];
|
||||
float<4> a = { -1, -2, -3, -4 };
|
||||
if (programIndex < 4)
|
||||
a[3-programIndex] = aa;
|
||||
//CO print("%\n%\n%\n%\n", a[0], a[1], a[2], a[3]);
|
||||
int i = clamp(3-programIndex, 0, 3);
|
||||
//CO print("%\n%\n", i, a[i]);
|
||||
RET[programIndex] = a[i];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programIndex < 4) ? 1+programIndex : -1;
|
||||
}
|
||||
15
tests/vector-varying-scatter.ispc
Normal file
15
tests/vector-varying-scatter.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float aa = aFOO[programIndex];
|
||||
float<4> a = { -1, -2, -3, -4 };
|
||||
if (programIndex < 4)
|
||||
a[3-programIndex] = aa;
|
||||
RET[programIndex] = a[2];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -3;
|
||||
RET[1] = 2;
|
||||
}
|
||||
7
tests_errors/array-dim-huge.ispc
Normal file
7
tests_errors/array-dim-huge.ispc
Normal file
@@ -0,0 +1,7 @@
|
||||
// Array dimension must be representable with a 32-bit integer.
|
||||
|
||||
struct foo {
|
||||
int x[0xffffffffffff];
|
||||
};
|
||||
|
||||
|
||||
7
tests_errors/array-dim-negative.ispc
Normal file
7
tests_errors/array-dim-negative.ispc
Normal file
@@ -0,0 +1,7 @@
|
||||
// Array dimension must be non-negative
|
||||
|
||||
struct foo {
|
||||
int x[-1];
|
||||
};
|
||||
|
||||
|
||||
9
tests_errors/fptr-typecheck-1.ispc
Normal file
9
tests_errors/fptr-typecheck-1.ispc
Normal file
@@ -0,0 +1,9 @@
|
||||
// Too many parameter values provided in function call
|
||||
|
||||
float bar(float a, float b);
|
||||
|
||||
export uniform int foo(uniform int x[], uniform int i[]) {
|
||||
float (*fptr)(float, float) = bar;
|
||||
//CO bar(0,1,2);
|
||||
fptr(0., 1, 2);
|
||||
}
|
||||
9
tests_errors/fptr-typecheck-2.ispc
Normal file
9
tests_errors/fptr-typecheck-2.ispc
Normal file
@@ -0,0 +1,9 @@
|
||||
// Can't convert argument of type "void * const uniform" to type "float" for funcion call argument.
|
||||
|
||||
float bar(float a, float b);
|
||||
|
||||
export uniform int foo(uniform int x[], uniform int i[]) {
|
||||
float (*fptr)(float, float) = bar;
|
||||
//CO bar(0,1,2);
|
||||
fptr(NULL, 1);
|
||||
}
|
||||
8
tests_errors/fptr-typecheck-3.ispc
Normal file
8
tests_errors/fptr-typecheck-3.ispc
Normal file
@@ -0,0 +1,8 @@
|
||||
// Too few parameter values provided in function call (1 provided, 2 expected).
|
||||
|
||||
float bar(float a, float b);
|
||||
|
||||
export uniform int foo(uniform int x[], uniform int i[]) {
|
||||
float (*fptr)(float, float) = bar;
|
||||
fptr(1.);
|
||||
}
|
||||
8
tests_errors/lvalue-1.ispc
Normal file
8
tests_errors/lvalue-1.ispc
Normal file
@@ -0,0 +1,8 @@
|
||||
// Left hand side of assignment statement can't be assigned to
|
||||
|
||||
int foo() {return 2;}
|
||||
|
||||
int bar()
|
||||
{
|
||||
foo() = 2;
|
||||
}
|
||||
6
tests_errors/lvalue-2.ispc
Normal file
6
tests_errors/lvalue-2.ispc
Normal file
@@ -0,0 +1,6 @@
|
||||
// Left hand side of assignment statement can't be assigned to
|
||||
|
||||
int bar(){
|
||||
4 = 0;
|
||||
}
|
||||
|
||||
7
tests_errors/lvalue-3.ispc
Normal file
7
tests_errors/lvalue-3.ispc
Normal file
@@ -0,0 +1,7 @@
|
||||
// Left hand side of assignment statement can't be assigned to
|
||||
|
||||
int bar(){
|
||||
int x;
|
||||
4 = x;
|
||||
}
|
||||
|
||||
61
type.cpp
61
type.cpp
@@ -293,7 +293,7 @@ AtomicType::GetAsUniformType() const {
|
||||
|
||||
const Type *
|
||||
AtomicType::GetSOAType(int width) const {
|
||||
assert(width > 0);
|
||||
Assert(width > 0);
|
||||
return new ArrayType(this, width);
|
||||
}
|
||||
|
||||
@@ -353,7 +353,10 @@ AtomicType::Mangle() const {
|
||||
std::string
|
||||
AtomicType::GetCDeclaration(const std::string &name) const {
|
||||
std::string ret;
|
||||
assert(isUniform);
|
||||
if (isUniform == false) {
|
||||
Assert(m->errorCount > 0);
|
||||
return ret;
|
||||
}
|
||||
if (isConst) ret += "const ";
|
||||
|
||||
switch (basicType) {
|
||||
@@ -567,7 +570,7 @@ EnumType::GetAsUniformType() const {
|
||||
|
||||
const Type *
|
||||
EnumType::GetSOAType(int width) const {
|
||||
assert(width > 0);
|
||||
Assert(width > 0);
|
||||
return new ArrayType(this, width);
|
||||
}
|
||||
|
||||
@@ -641,9 +644,9 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
std::vector<llvm::Value *> enumeratorDescriptors;
|
||||
for (unsigned int i = 0; i < enumerators.size(); ++i) {
|
||||
unsigned int enumeratorValue;
|
||||
assert(enumerators[i]->constValue != NULL);
|
||||
Assert(enumerators[i]->constValue != NULL);
|
||||
int count = enumerators[i]->constValue->AsUInt32(&enumeratorValue);
|
||||
assert(count == 1);
|
||||
Assert(count == 1);
|
||||
|
||||
llvm::Value *descriptor =
|
||||
m->diBuilder->createEnumerator(enumerators[i]->name, enumeratorValue);
|
||||
@@ -935,7 +938,7 @@ const Type *SequentialType::GetElementType(int index) const {
|
||||
ArrayType::ArrayType(const Type *c, int a)
|
||||
: child(c), numElements(a) {
|
||||
// 0 -> unsized array.
|
||||
assert(numElements >= 0);
|
||||
Assert(numElements >= 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -1134,7 +1137,7 @@ ArrayType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
|
||||
ArrayType *
|
||||
ArrayType::GetSizedArray(int sz) const {
|
||||
assert(numElements == 0);
|
||||
Assert(numElements == 0);
|
||||
return new ArrayType(child, sz);
|
||||
}
|
||||
|
||||
@@ -1175,7 +1178,7 @@ ArrayType::SizeUnsizedArrays(const Type *type, Expr *initExpr) {
|
||||
for (unsigned int i = 1; i < exprList->exprs.size(); ++i) {
|
||||
if (exprList->exprs[i] == NULL) {
|
||||
// We should have seen an error earlier in this case.
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1201,9 +1204,9 @@ ArrayType::SizeUnsizedArrays(const Type *type, Expr *initExpr) {
|
||||
|
||||
SOAArrayType::SOAArrayType(const StructType *eltType, int nElem, int sw)
|
||||
: ArrayType(eltType, nElem), soaWidth(sw) {
|
||||
assert(soaWidth > 0);
|
||||
Assert(soaWidth > 0);
|
||||
if (numElements > 0)
|
||||
assert((numElements % soaWidth) == 0);
|
||||
Assert((numElements % soaWidth) == 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -1334,8 +1337,8 @@ SOAArrayType::soaType() const {
|
||||
|
||||
VectorType::VectorType(const AtomicType *b, int a)
|
||||
: base(b), numElements(a) {
|
||||
assert(numElements > 0);
|
||||
assert(base != NULL);
|
||||
Assert(numElements > 0);
|
||||
Assert(base != NULL);
|
||||
}
|
||||
|
||||
|
||||
@@ -1716,7 +1719,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
// element starts at an offset that's the correct alignment.
|
||||
if (currentSize > 0 && (currentSize % eltAlign))
|
||||
currentSize += eltAlign - (currentSize % eltAlign);
|
||||
assert((currentSize == 0) || (currentSize % eltAlign) == 0);
|
||||
Assert((currentSize == 0) || (currentSize % eltAlign) == 0);
|
||||
|
||||
llvm::DIFile diFile = elementPositions[i].GetDIFile();
|
||||
int line = elementPositions[i].first_line;
|
||||
@@ -1755,7 +1758,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
|
||||
const Type *
|
||||
StructType::GetElementType(int i) const {
|
||||
assert(i < (int)elementTypes.size());
|
||||
Assert(i < (int)elementTypes.size());
|
||||
// If the struct is uniform qualified, then each member comes out with
|
||||
// the same type as in the original source file. If it's varying, then
|
||||
// all members are promoted to varying.
|
||||
@@ -1955,7 +1958,7 @@ FunctionType::FunctionType(const Type *r, const std::vector<const Type *> &a,
|
||||
paramTypes(a), paramNames(std::vector<std::string>(a.size(), "")),
|
||||
paramDefaults(std::vector<ConstExpr *>(a.size(), NULL)),
|
||||
paramPositions(std::vector<SourcePos>(a.size(), p)) {
|
||||
assert(returnType != NULL);
|
||||
Assert(returnType != NULL);
|
||||
}
|
||||
|
||||
|
||||
@@ -1966,10 +1969,10 @@ FunctionType::FunctionType(const Type *r, const std::vector<const Type *> &a,
|
||||
bool it, bool is, bool ec)
|
||||
: isTask(it), isExported(is), isExternC(ec), returnType(r), paramTypes(a),
|
||||
paramNames(an), paramDefaults(ad), paramPositions(ap) {
|
||||
assert(paramTypes.size() == paramNames.size() &&
|
||||
Assert(paramTypes.size() == paramNames.size() &&
|
||||
paramNames.size() == paramDefaults.size() &&
|
||||
paramDefaults.size() == paramPositions.size());
|
||||
assert(returnType != NULL);
|
||||
Assert(returnType != NULL);
|
||||
}
|
||||
|
||||
|
||||
@@ -2124,14 +2127,14 @@ FunctionType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
|
||||
LLVM_TYPE_CONST llvm::FunctionType *
|
||||
FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const {
|
||||
if (isTask == true) assert(includeMask == true);
|
||||
if (isTask == true) Assert(includeMask == true);
|
||||
|
||||
// Get the LLVM Type *s for the function arguments
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> llvmArgTypes;
|
||||
for (unsigned int i = 0; i < paramTypes.size(); ++i) {
|
||||
if (!paramTypes[i])
|
||||
return NULL;
|
||||
assert(paramTypes[i] != AtomicType::Void);
|
||||
Assert(paramTypes[i] != AtomicType::Void);
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *t = paramTypes[i]->LLVMType(ctx);
|
||||
if (t == NULL)
|
||||
@@ -2167,28 +2170,28 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const {
|
||||
|
||||
const Type *
|
||||
FunctionType::GetParameterType(int i) const {
|
||||
assert(i < (int)paramTypes.size());
|
||||
Assert(i < (int)paramTypes.size());
|
||||
return paramTypes[i];
|
||||
}
|
||||
|
||||
|
||||
ConstExpr *
|
||||
FunctionType::GetParameterDefault(int i) const {
|
||||
assert(i < (int)paramDefaults.size());
|
||||
Assert(i < (int)paramDefaults.size());
|
||||
return paramDefaults[i];
|
||||
}
|
||||
|
||||
|
||||
const SourcePos &
|
||||
FunctionType::GetParameterSourcePos(int i) const {
|
||||
assert(i < (int)paramPositions.size());
|
||||
Assert(i < (int)paramPositions.size());
|
||||
return paramPositions[i];
|
||||
}
|
||||
|
||||
|
||||
const std::string &
|
||||
FunctionType::GetParameterName(int i) const {
|
||||
assert(i < (int)paramNames.size());
|
||||
Assert(i < (int)paramNames.size());
|
||||
return paramNames[i];
|
||||
}
|
||||
|
||||
@@ -2241,7 +2244,7 @@ lVectorConvert(const Type *type, SourcePos pos, const char *reason, int vecSize)
|
||||
const Type *
|
||||
Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char *reason,
|
||||
bool forceVarying, int vecSize) {
|
||||
assert(reason != NULL);
|
||||
Assert(reason != NULL);
|
||||
|
||||
// First, if we need to go varying, promote both of the types to be
|
||||
// varying.
|
||||
@@ -2312,7 +2315,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char
|
||||
// The 'more general' version of the two vector element types must
|
||||
// be an AtomicType (that's all that vectors can hold...)
|
||||
const AtomicType *at = dynamic_cast<const AtomicType *>(t);
|
||||
assert(at != NULL);
|
||||
Assert(at != NULL);
|
||||
|
||||
return new VectorType(at, vt0->GetElementCount());
|
||||
}
|
||||
@@ -2327,7 +2330,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char
|
||||
return NULL;
|
||||
|
||||
const AtomicType *at = dynamic_cast<const AtomicType *>(t);
|
||||
assert(at != NULL);
|
||||
Assert(at != NULL);
|
||||
return new VectorType(at, vt0->GetElementCount());
|
||||
}
|
||||
else if (vt1) {
|
||||
@@ -2339,7 +2342,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char
|
||||
return NULL;
|
||||
|
||||
const AtomicType *at = dynamic_cast<const AtomicType *>(t);
|
||||
assert(at != NULL);
|
||||
Assert(at != NULL);
|
||||
return new VectorType(at, vt1->GetElementCount());
|
||||
}
|
||||
|
||||
@@ -2352,7 +2355,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char
|
||||
const EnumType *et1 = dynamic_cast<const EnumType *>(t1->GetReferenceTarget());
|
||||
if (et0 != NULL && et1 != NULL) {
|
||||
// Two different enum types -> make them uint32s...
|
||||
assert(et0->IsVaryingType() == et1->IsVaryingType());
|
||||
Assert(et0->IsVaryingType() == et1->IsVaryingType());
|
||||
return et0->IsVaryingType() ? AtomicType::VaryingUInt32 :
|
||||
AtomicType::UniformUInt32;
|
||||
}
|
||||
@@ -2383,7 +2386,7 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char
|
||||
|
||||
// Now all we can do is promote atomic types...
|
||||
if (at0 == NULL || at1 == NULL) {
|
||||
assert(reason != NULL);
|
||||
Assert(reason != NULL);
|
||||
Error(pos, "Implicit conversion from type \"%s\" to \"%s\" for %s not possible.",
|
||||
t0->GetString().c_str(), t1->GetString().c_str(), reason);
|
||||
return NULL;
|
||||
|
||||
19
util.cpp
19
util.cpp
@@ -45,7 +45,6 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
@@ -147,7 +146,7 @@ lPrintWithWordBreaks(const char *buf, int columnWidth, FILE *out) {
|
||||
fputs(buf, out);
|
||||
#else
|
||||
int column = 0;
|
||||
assert(strchr(buf, ':') != NULL);
|
||||
Assert(strchr(buf, ':') != NULL);
|
||||
int indent = strchr(buf, ':') - buf + 2;
|
||||
int width = std::max(40, columnWidth - 2);
|
||||
|
||||
@@ -267,7 +266,7 @@ Error(SourcePos p, const char *fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
lPrint("Error", p, fmt, args);
|
||||
++m->errorCount;
|
||||
if (m != NULL) ++m->errorCount;
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
@@ -292,7 +291,7 @@ Warning(SourcePos p, const char *fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
lPrint(g->warningsAsErrors ? "Error" : "Warning", p, fmt, args);
|
||||
if (g->warningsAsErrors)
|
||||
if (g->warningsAsErrors && m != NULL)
|
||||
++m->errorCount;
|
||||
va_end(args);
|
||||
}
|
||||
@@ -313,6 +312,12 @@ PerformanceWarning(SourcePos p, const char *fmt, ...) {
|
||||
void
|
||||
FatalError(const char *file, int line, const char *message) {
|
||||
fprintf(stderr, "%s(%d): FATAL ERROR: %s\n", file, line, message);
|
||||
fprintf(stderr, "***\n"
|
||||
"*** Please file a bug report at https://github.com/ispc/ispc/issues\n"
|
||||
"*** (Including as much information as you can about how to "
|
||||
"reproduce this error).\n"
|
||||
"*** You have apparently encountered a bug in the compiler that we'd "
|
||||
"like to fix!\n***\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
@@ -392,7 +397,7 @@ GetDirectoryAndFileName(const std::string ¤tDirectory,
|
||||
char path[MAX_PATH];
|
||||
const char *combPath = PathCombine(path, currentDirectory.c_str(),
|
||||
relativeName.c_str());
|
||||
assert(combPath != NULL);
|
||||
Assert(combPath != NULL);
|
||||
const char *filenamePtr = PathFindFileName(combPath);
|
||||
*filename = filenamePtr;
|
||||
*directory = std::string(combPath, filenamePtr - combPath);
|
||||
@@ -414,9 +419,9 @@ GetDirectoryAndFileName(const std::string ¤tDirectory,
|
||||
// now, we need to separate it into the base name and the directory
|
||||
const char *fp = fullPath.c_str();
|
||||
const char *basenameStart = strrchr(fp, '/');
|
||||
assert(basenameStart != NULL);
|
||||
Assert(basenameStart != NULL);
|
||||
++basenameStart;
|
||||
assert(basenameStart != '\0');
|
||||
Assert(basenameStart != '\0');
|
||||
*filename = basenameStart;
|
||||
*directory = std::string(fp, basenameStart - fp);
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
Reference in New Issue
Block a user